Source code for magicctapipe.reco.estimators

"""
Utilities for training
"""
import logging

import joblib
import numpy as np
import pandas as pd
import sklearn.ensemble

__all__ = ["EnergyRegressor", "DispRegressor", "EventClassifier"]

logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)
TEL_NAMES = {
    1: "LST-1",
    2: "MAGIC-I",
    3: "MAGIC-II",
}  # TODO: REMOVE WHEN SWITCHING TO THE NEW RFs IMPLEMENTTATION (1 RF PER TELESCOPE)


[docs] class EnergyRegressor: """ RF regressors to reconstruct the energies of primary particles. Parameters ---------- settings : dict Settings of RF regressors features : list Parameters for training RFs use_unsigned_features : bool If `True`, it trains RFs with unsigned features """ def __init__(self, settings={}, features=[], use_unsigned_features=None): """ Constructor of the class. Parameters ---------- settings : dict Settings of RF regressors features : list Parameters for training RFs use_unsigned_features : bool If `True`, it trains RFs with unsigned features """ self.settings = settings self.features = features self.use_unsigned_features = use_unsigned_features self.telescope_rfs = {}
[docs] def fit(self, event_data): """ Train a RF for every telescope. Parameters ---------- event_data : pandas.core.frame.DataFrame Data frame of shower events """ self.telescope_rfs.clear() # Loop over every telescope tel_ids = np.unique(event_data["tel_id"]) for tel_id in tel_ids: df_events = event_data.query(f"tel_id == {tel_id}").copy() df_events.dropna(subset=self.features, inplace=True) x_train = df_events[self.features].to_numpy() if self.use_unsigned_features: x_train = np.abs(x_train) # Use logarithmic energy for the target values y_train = np.log10(df_events["true_energy"].to_numpy()) # Configure the RF regressor regressor = sklearn.ensemble.RandomForestRegressor(**self.settings) # Train a telescope RF logger.info(f"Training a {TEL_NAMES[tel_id]} RF...") regressor.fit(x_train, y_train) self.telescope_rfs[tel_id] = regressor
[docs] def predict(self, event_data): """ Reconstructs the energies of primary particles with trained RFs. Parameters ---------- event_data : pandas.core.frame.DataFrame Data frame of shower events Returns ------- pandas.core.frame.DataFrame Data frame of the shower events with reconstructed energies """ reco_params = pd.DataFrame( data={"reco_energy": [], "reco_energy_var": []}, index=pd.MultiIndex.from_tuples([], names=event_data.index.names), ) # Loop over every telescope for tel_id, telescope_rf in self.telescope_rfs.items(): df_events = event_data.query(f"tel_id == {tel_id}").copy() df_events.dropna(subset=self.features, inplace=True) if df_events.empty: continue x_predict = df_events[self.features].to_numpy() if self.use_unsigned_features: x_predict = np.abs(x_predict) # Apply the trained RF. Since the predicted values are in # logarithmic scale, here we convert them to the normal one. reco_energy = 10 ** telescope_rf.predict(x_predict) responses_per_estimator = [] for estimator in telescope_rf.estimators_: responses_per_estimator.append(estimator.predict(x_predict)) reco_energy_var = np.var(responses_per_estimator, axis=0) df_reco_energy = pd.DataFrame( data={"reco_energy": reco_energy, "reco_energy_var": reco_energy_var}, index=df_events.index, ) reco_params = pd.concat([reco_params, df_reco_energy]) reco_params.sort_index(inplace=True) return reco_params
[docs] def save(self, output_file): """ Saves trained RFs in a joblib file. Parameters ---------- output_file : str Path to an output joblib file """ output_data = { "settings": self.settings, "features": self.features, "use_unsigned_features": self.use_unsigned_features, "telescope_rfs": self.telescope_rfs, } joblib.dump(output_data, output_file)
[docs] def load(self, input_file): """ Loads trained RFs from a joblib file. Parameters ---------- input_file : str Path to an input joblib file """ input_data = joblib.load(input_file) self.settings = input_data["settings"] self.features = input_data["features"] self.use_unsigned_features = input_data["use_unsigned_features"] self.telescope_rfs = input_data["telescope_rfs"]
[docs] class DispRegressor: """ RF regressors to reconstruct the DISP parameter. Parameters ---------- settings : dict Settings of RF regressors features : list Parameters for training RFs use_unsigned_features : bool If `True`, it trains RFs with unsigned features """ def __init__(self, settings={}, features=[], use_unsigned_features=None): """ Constructor of the class. Parameters ---------- settings : dict Settings of RF regressors features : list Parameters for training RFs use_unsigned_features : bool If `True`, it trains RFs with unsigned features """ self.settings = settings self.features = features self.use_unsigned_features = use_unsigned_features self.telescope_rfs = {}
[docs] def fit(self, event_data): """ Trains a RF for every telescope. Parameters ---------- event_data : pandas.core.frame.DataFrame Data frame of shower events """ self.telescope_rfs.clear() # Loop over every telescope tel_ids = np.unique(event_data["tel_id"]) for tel_id in tel_ids: df_events = event_data.query(f"tel_id == {tel_id}").copy() df_events.dropna(subset=self.features, inplace=True) x_train = df_events[self.features].to_numpy() if self.use_unsigned_features: x_train = np.abs(x_train) y_train = df_events["true_disp"].to_numpy() # Configure the RF regressor regressor = sklearn.ensemble.RandomForestRegressor(**self.settings) # Train a telescope RF logger.info(f"Training a {TEL_NAMES[tel_id]} RF...") regressor.fit(x_train, y_train) self.telescope_rfs[tel_id] = regressor
[docs] def predict(self, event_data): """ Reconstructs the DISP parameter with trained RFs. Parameters ---------- event_data : pandas.core.frame.DataFrame Data frame of shower events Returns ------- pandas.core.frame.DataFrame Data frame of the shower events with the DISP parameter """ reco_params = pd.DataFrame( data={"reco_disp": [], "reco_disp_var": []}, index=pd.MultiIndex.from_tuples([], names=event_data.index.names), ) # Loop over every telescope for tel_id, telescope_rf in self.telescope_rfs.items(): df_events = event_data.query(f"tel_id == {tel_id}").copy() df_events.dropna(subset=self.features, inplace=True) if df_events.empty: continue x_predict = df_events[self.features].to_numpy() if self.use_unsigned_features: x_predict = np.abs(x_predict) # Apply the trained RF reco_disp = telescope_rf.predict(x_predict) responses_per_estimator = [] for estimator in telescope_rf.estimators_: responses_per_estimator.append(estimator.predict(x_predict)) reco_disp_var = np.var(responses_per_estimator, axis=0) df_reco_disp = pd.DataFrame( data={"reco_disp": reco_disp, "reco_disp_var": reco_disp_var}, index=df_events.index, ) reco_params = pd.concat([reco_params, df_reco_disp]) reco_params.sort_index(inplace=True) return reco_params
[docs] def save(self, output_file): """ Saves trained RFs to a joblib file. Parameters ---------- output_file : str Path to an output joblib file """ output_data = { "settings": self.settings, "features": self.features, "use_unsigned_features": self.use_unsigned_features, "telescope_rfs": self.telescope_rfs, } joblib.dump(output_data, output_file)
[docs] def load(self, input_file): """ Loads trained RFs from a joblib file. Parameters ---------- input_file : str Path to an input joblib file """ input_data = joblib.load(input_file) self.settings = input_data["settings"] self.features = input_data["features"] self.use_unsigned_features = input_data["use_unsigned_features"] self.telescope_rfs = input_data["telescope_rfs"]
[docs] class EventClassifier: """ RF classifiers to reconstruct the gammaness. Parameters ---------- settings : dict Settings of RF classifiers features : list Parameters for training RFs use_unsigned_features : bool If `True`, it trains RFs with unsigned features """ def __init__(self, settings={}, features=[], use_unsigned_features=None): """ Constructor of the class. Parameters ---------- settings : dict Settings of RF classifiers features : list Parameters for training RFs use_unsigned_features : bool If `True`, it trains RFs with unsigned features """ self.settings = settings self.features = features self.use_unsigned_features = use_unsigned_features self.telescope_rfs = {}
[docs] def fit(self, event_data): """ Trains a RF for every telescope. Parameters ---------- event_data : pandas.core.frame.DataFrame Data frame of shower events """ self.telescope_rfs.clear() # Loop over every telescope tel_ids = np.unique(event_data["tel_id"]) for tel_id in tel_ids: df_events = event_data.query(f"tel_id == {tel_id}").copy() df_events.dropna(subset=self.features, inplace=True) x_train = df_events[self.features].to_numpy() if self.use_unsigned_features: x_train = np.abs(x_train) y_train = df_events["true_event_class"].to_numpy() # Configure the RF classifier classifier = sklearn.ensemble.RandomForestClassifier(**self.settings) # Train a telescope RF logger.info(f"Training a {TEL_NAMES[tel_id]} RF...") classifier.fit(x_train, y_train) self.telescope_rfs[tel_id] = classifier
[docs] def predict(self, event_data): """ Reconstructs the gammaness. Parameters ---------- event_data : pandas.core.frame.DataFrame Data frame of shower events Returns ------- pandas.core.frame.DataFrame Data frame of the shower events with the gammaness """ reco_params = pd.DataFrame( data={"gammaness": [], "gammaness_var": []}, index=pd.MultiIndex.from_tuples([], names=event_data.index.names), ) # Loop over every telescope for tel_id, telescope_rf in self.telescope_rfs.items(): df_events = event_data.query(f"tel_id == {tel_id}").copy() df_events.dropna(subset=self.features, inplace=True) if df_events.empty: continue x_predict = df_events[self.features].to_numpy() if self.use_unsigned_features: x_predict = np.abs(x_predict) # Apply the trained RF gammaness = telescope_rf.predict_proba(x_predict)[:, 0] # Calculate the variance of the binomial distribution gammaness_var = gammaness * (1 - gammaness) # Set the artificial finite value in case the variance is 0 # to avoid that the inverse value, which may be used for the # weights of averaging telescope-wise values, is infinite gammaness_var[gammaness == 1] = 0.99 * (1 - 0.99) gammaness_var[gammaness == 0] = 0.01 * (1 - 0.01) df_gammaness = pd.DataFrame( data={"gammaness": gammaness, "gammaness_var": gammaness_var}, index=df_events.index, ) reco_params = pd.concat([reco_params, df_gammaness]) reco_params.sort_index(inplace=True) return reco_params
[docs] def save(self, output_file): """ Saves trained RFs in a joblib file. Parameters ---------- output_file : str Path to an output joblib file """ output_data = { "settings": self.settings, "features": self.features, "use_unsigned_features": self.use_unsigned_features, "telescope_rfs": self.telescope_rfs, } joblib.dump(output_data, output_file)
[docs] def load(self, input_file): """ Loads trained RFs from a joblib file. Parameters ---------- input_file : str Path to an input joblib file """ input_data = joblib.load(input_file) self.settings = input_data["settings"] self.features = input_data["features"] self.use_unsigned_features = input_data["use_unsigned_features"] self.telescope_rfs = input_data["telescope_rfs"]