diff --git a/Calo/CaloTools/CMakeLists.txt b/Calo/CaloTools/CMakeLists.txt index 4b4d50b8104b14fa4e5496b7e91d4581de85baa4..9913fb03c3667dc03e5c0638707731157b8bb652 100644 --- a/Calo/CaloTools/CMakeLists.txt +++ b/Calo/CaloTools/CMakeLists.txt @@ -17,10 +17,10 @@ gaudi_depends_on_subdirs(Calo/CaloInterfaces find_package(Boost) find_package(ROOT) -include_directories(SYSTEM ${Boost_INCLUDE_DIRS} ${ROOT_INCLUDE_DIRS}) +find_package(XGBoost REQUIRED) +include_directories(SYSTEM ${ROOT_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS}) gaudi_add_module(CaloTools src/*.cpp - INCLUDE_DIRS Tr/TrackInterfaces - LINK_LIBRARIES CaloUtils CaloDetLib LinkerEvent RecEvent TrackEvent GaudiAlgLib LHCbKernel LHCbMathLib RelationsLib) - + INCLUDE_DIRS Tr/TrackInterfaces XGBoost + LINK_LIBRARIES CaloUtils CaloDetLib LinkerEvent RecEvent TrackEvent GaudiAlgLib LHCbKernel LHCbMathLib RelationsLib XGBoost) diff --git a/Calo/CaloTools/src/CaloHypoEstimator.cpp b/Calo/CaloTools/src/CaloHypoEstimator.cpp index 81d4c0919963f9d9a28c3aa1f55cd064b1c5f99c..690bb50957b4533ee7852a5809486ab23fb81215 100644 --- a/Calo/CaloTools/src/CaloHypoEstimator.cpp +++ b/Calo/CaloTools/src/CaloHypoEstimator.cpp @@ -57,6 +57,7 @@ StatusCode CaloHypoEstimator::initialize() { m_electron = tool<ICaloElectron>("CaloElectron","CaloElectron",this); m_GammaPi0 = tool<IGammaPi0SeparationTool>("GammaPi0SeparationTool" , "GammaPi0SeparationTool", this); + m_GammaPi0XGB = tool<IGammaPi0SeparationTool>("GammaPi0XGBoostTool" , "GammaPi0XGBoostTool"); m_neutralID = tool<INeutralIDTool>("NeutralIDTool" , "NeutralIDTool", this); m_ecal = getDet<DeCalorimeter>( DeCalorimeterLocation::Ecal ); @@ -292,7 +293,8 @@ bool CaloHypoEstimator::estimator(const LHCb::CaloHypo* hypo){ } // gamma/pi0 separation - m_data[isPhoton]= m_GammaPi0->isPhoton( hypo ); + m_data[CaloDataType::DataType::isPhoton]= m_GammaPi0->isPhoton( hypo ); + m_data[CaloDataType::DataType::isPhotonXGB] = m_GammaPi0XGB->isPhoton( hypo ); // 1- Ecal variables : diff --git a/Calo/CaloTools/src/CaloHypoEstimator.h b/Calo/CaloTools/src/CaloHypoEstimator.h index d0f599a174f1d9ae3caa38fd5c192995f1875c41..5f2599462bf4f5c632d277fffa81bec2323ebf9b 100644 --- a/Calo/CaloTools/src/CaloHypoEstimator.h +++ b/Calo/CaloTools/src/CaloHypoEstimator.h @@ -95,6 +95,7 @@ private: bool m_status = true; ICaloElectron * m_electron = nullptr; IGammaPi0SeparationTool* m_GammaPi0 = nullptr; + IGammaPi0SeparationTool* m_GammaPi0XGB = nullptr; INeutralIDTool* m_neutralID = nullptr; ICaloRelationsGetter* m_tables = nullptr; DeCalorimeter* m_ecal = nullptr; diff --git a/Calo/CaloTools/src/GammaPi0XGBoostTool.cpp b/Calo/CaloTools/src/GammaPi0XGBoostTool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8856ed815c328a80bfeb9aa60e524124236c6183 --- /dev/null +++ b/Calo/CaloTools/src/GammaPi0XGBoostTool.cpp @@ -0,0 +1,216 @@ +// Include files + +#include "CaloUtils/CaloMomentum.h" +#include "CaloUtils/CaloAlgUtils.h" +// from Event +#include "Event/RecHeader.h" +#include "Event/CaloHypo.h" +#include "Event/CaloCluster.h" +#include <unordered_map> +#include <iostream> +#include <iomanip> +#include <string> +#include <cstdlib> + +// local +#include "GammaPi0XGBoostTool.h" +#include <fstream> + +using namespace LHCb; +using namespace Gaudi::Units; +//----------------------------------------------------------------------------- +// Implementation file for class : GammaPi0XGBoostTool +// +// 2018-03-24 : author @sayankotor +//----------------------------------------------------------------------------- + +//Declare nessesary help functionality +namespace { + +size_t getClusterType (const CaloCellID& id) { + + const int CaloNCol[4] = {64, 32, 16, 16}; + const int CaloNRow[4] = {52, 20, 12, 12}; + const unsigned Granularity[3] = {1, 2, 3}; + + const unsigned ClusterSize = 5; + int type (id.area()); + int xOffsetOut = std::min (int(id.col() - (32 - CaloNCol[type]*Granularity[type]/2)), // left edge + int(31 + CaloNCol[type]*Granularity[type]/2 - id.col())); // right edge + int yOffsetOut = std::min (int(id.row() - (32 - CaloNRow[type]*Granularity[type]/2)), + int(31 + CaloNRow[type]*Granularity[type]/2 - id.row())); + int innerWidth = CaloNCol[type+1] * (type != 2 ? Granularity[type] : 1); // process inner hole specially + int innerHeight = CaloNRow[type+1] * (type != 2 ? Granularity[type] : 1); // process inner hole specially + + int xOffsetIn = std::min (int(id.col() - (31 - innerWidth/2)), + int(32 + innerWidth/2 - id.col())); + int yOffsetIn = std::min (int(id.row() - (31 - innerHeight/2)), + int(32 + innerHeight/2 - id.row())); + const int margin = (ClusterSize-1)/2; + bool outerBorder = xOffsetOut < margin || yOffsetOut < margin; + bool innerBorder = xOffsetIn > -margin && yOffsetIn > -margin; + if (innerBorder) return type+3; + else if (outerBorder) return type+6; + return type; +} + +} + +// Declaration of the Tool Factory +DECLARE_COMPONENT( GammaPi0XGBoostTool ) + +//============================================================================= +// Initialization +//============================================================================= +StatusCode GammaPi0XGBoostTool::initialize() { + + StatusCode sc = base_class::initialize(); // must be executed first + if ( sc.isFailure() ) return sc; // error printed already by GaudiAlgorithm + + if( UNLIKELY( msgLevel(MSG::DEBUG) ) ) debug() << "==> Initialize" << endmsg; + + /// Retrieve geometry of detector + m_ecal = getDet<DeCalorimeter>( DeCalorimeterLocation::Ecal ); + + + const std::string paramEnv = "PARAMFILESROOT"; + std::string paramRoot = std::string(getenv(paramEnv.c_str())); + + m_xgb0 = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_simpl0.model"); + m_xgb1 = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_simpl1.model"); + m_xgb2 = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_simpl2.model"); + m_xgb03_b = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_bound03.model"); + m_xgb06_b = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_bound06.model"); + m_xgb14_b = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_bound14.model"); + m_xgb17_b = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_bound17.model"); + m_xgb25_b = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_bound25.model"); + m_xgb28_b = std::make_unique<XGBClassifierPhPi0>(paramRoot + "/data/GammaPi0XgbTool_bound28.model"); + + return StatusCode::SUCCESS; +} + +//============================================================================= +// Finalize +//============================================================================= +StatusCode GammaPi0XGBoostTool::finalize() { + + if( UNLIKELY( msgLevel(MSG::DEBUG) ) ) debug() << "==> Finalize" << endmsg; + + m_xgb0.reset(); + m_xgb1.reset(); + m_xgb2.reset(); + m_xgb03_b.reset(); + m_xgb06_b.reset(); + m_xgb14_b.reset(); + m_xgb17_b.reset(); + m_xgb25_b.reset(); + m_xgb28_b.reset(); + + return base_class::finalize(); // must be executed last +} + +//============================================================================= +// Main execution +//============================================================================= + + +double GammaPi0XGBoostTool::isPhoton(const LHCb::CaloHypo* hypo){ + + if ( !m_ecal ) return m_def; + if ( LHCb::CaloMomentum(hypo).pt() < m_minPt) return m_def; + const LHCb::CaloCluster* cluster = LHCb::CaloAlgUtils::ClusterFromHypo( hypo ); + if (!cluster) return m_def; + + std::vector<double> rawEnergyVector(25, 0.0); + + int cluster_type = getClusterType(cluster->seed()); + bool rawEnergy = GetRawEnergy(hypo, cluster_type, rawEnergyVector); + + if(!rawEnergy) return m_def; + + double prediction_xgb = XGBDiscriminant(cluster_type, rawEnergyVector); + return prediction_xgb; +} + + +double GammaPi0XGBoostTool::XGBDiscriminant(int cluster_type, std::vector<double>& row_energies) +{ + + double value = -1e10; + switch (cluster_type) { + case 0: value = m_xgb0->getClassifierValues(row_energies); + break; + case 1: value = m_xgb1->getClassifierValues(row_energies); + break; + case 2: value = m_xgb2->getClassifierValues(row_energies); + break; + case 3: value = m_xgb03_b->getClassifierValues(row_energies); + break; + case 4: value = m_xgb14_b->getClassifierValues(row_energies); + break; + case 5: value = m_xgb25_b->getClassifierValues(row_energies); + break; + case 6: value = m_xgb06_b->getClassifierValues(row_energies); + break; + case 7: value = m_xgb17_b->getClassifierValues(row_energies); + break; + case 8: value = m_xgb28_b->getClassifierValues(row_energies); + break; + default: warning() << "GammaPi0XGBoostTool: Unsupperted cluster type" << endmsg; + + } + return value; +} + +bool GammaPi0XGBoostTool::GetRawEnergy(const LHCb::CaloHypo* hypo, int& cluster_type, std::vector<double>& rowEnergy) const{ + if( nullptr == hypo)return false; + + const LHCb::CaloDigits * digits_full = getIfExists<LHCb::CaloDigits>(LHCb::CaloDigitLocation::Ecal); + const LHCb::CaloCluster* cluster = LHCb::CaloAlgUtils::ClusterFromHypo( hypo ); // OD 2014/05 - change to Split Or Main cluster + + if( nullptr == digits_full || nullptr == cluster) return false; + + LHCb::CaloCellID centerID = cluster->seed(); + + std::vector<std::vector<double>> vector_cells (5, std::vector<double>(5, 0.0)); + + std::vector<int> col_numbers = {(int)centerID.col() - 2, (int)centerID.col() - 1, (int)centerID.col(), (int)centerID.col() + 1, (int)centerID.col() + 2}; + std::vector<int> row_numbers = {(int)centerID.row() - 2, (int)centerID.row() - 1, (int)centerID.row(), (int)centerID.row() + 1, (int)centerID.row() + 2}; + + if (cluster_type > 2) + { + for (auto& col_number: col_numbers){ + for (auto& row_number: row_numbers){ + const auto id_ = LHCb::CaloCellID(centerID.calo(), centerID.area(), row_number, col_number); + auto * test = digits_full->object(id_); + if (test) { + vector_cells[col_number - (int)centerID.col() + 2][row_number - (int)centerID.row() + 2] = test->e(); + } else { + continue; + } + } + } + } + + else + { + for (auto& col_number: col_numbers){ + for (auto& row_number: row_numbers){ + const auto id_ = LHCb::CaloCellID(centerID.calo(), centerID.area(), row_number, col_number); + auto * test = digits_full->object(id_); + if (test) { + vector_cells[col_number - (int)centerID.col() + 2][row_number - (int)centerID.row() + 2] = test->e(); + } else { + continue; + } + } + } + } + + for (int i = 0; i < 5; i++){ + for (int j = 0; j < 5; j++){ + rowEnergy[i*5 + j] = vector_cells[i][j]; + } + } + return true; +} diff --git a/Calo/CaloTools/src/GammaPi0XGBoostTool.h b/Calo/CaloTools/src/GammaPi0XGBoostTool.h new file mode 100644 index 0000000000000000000000000000000000000000..72e43a88d3eebdc02afa82e73a00b554a7d309d4 --- /dev/null +++ b/Calo/CaloTools/src/GammaPi0XGBoostTool.h @@ -0,0 +1,87 @@ +#ifndef GammaPi0XGBoostTool_H +#define GammaPi0XGBoostTool_H + +// Include files +// from Gaudi +#include "GaudiAlg/GaudiTool.h" +#include "CaloDet/DeCalorimeter.h" +#include "CaloInterfaces/IGammaPi0SeparationTool.h" + +// Math +#include "GaudiKernel/Point3DTypes.h" +#include "GaudiKernel/Vector3DTypes.h" +#include "LHCbMath/Line.h" +#include "LHCbMath/GeomFun.h" + +//using xgb in c++ + +#include "XGBClassifierPhPi0.h" + + +/** @class GammaPi0XGBoostTool GammaPi0XGBoostTool.h + * + * + * @author @sayankotor + * @date 2018-03-24 + */ + + +class GammaPi0XGBoostTool : public extends<GaudiTool, IGammaPi0SeparationTool>{ + + +public: + + GammaPi0XGBoostTool( const std::string& type, + const std::string& name, + const IInterface* parent):base_class ( type, name , parent ){}; + StatusCode initialize() override; + StatusCode finalize() override; + + //double isPhoton(const LHCb::Particle* gamma); + double isPhoton(const LHCb::CaloHypo* hypo) override; + + bool GetRawEnergy(const LHCb::CaloHypo* hypo, int& cluster_type, std::vector<double>& rowEnergy) const; + std::vector<std::vector<double>> GetCluster(const LHCb::CaloCellID & centerID, const LHCb::CaloDigits * digits_full) const; + + double inputData(std::string) override { //@TODO: const-ify + /* // try ecal data */ + /* auto it = m_data.find(data); */ + /* if( it != m_data.end() )return it->second; */ + /* // else try prs data : */ + /* auto itp = m_prsdata.find(data); */ + /* if( itp != m_prsdata.end() )return itp->second; */ + /* // yapa */ + return 0.; + } + + std::map<std::string,double> inputDataMap() override {return std::map<std::string,double>();} + std::map<std::string,double> inputPrsDataMap() override {return std::map<std::string,double>();} + bool ClusterVariables(const LHCb::CaloHypo*, double&, double&, double&, double&, double&, + double&, double&, int&) override {return true;} + bool PrsVariables(const LHCb::CaloHypo*, double&, double&, double&, double&, + double&, double&, double&, double&, double&, + int&, int&, int&, int&) override {return true;} + double isPhoton(const double*) override {return 0.0;}; + +private: + + Gaudi::Property<float> m_minPt {this, "MinPt", 2000.}; + + std::unique_ptr<XGBClassifierPhPi0> m_xgb0; + std::unique_ptr<XGBClassifierPhPi0> m_xgb1; + std::unique_ptr<XGBClassifierPhPi0> m_xgb2; + std::unique_ptr<XGBClassifierPhPi0> m_xgb03_b; + std::unique_ptr<XGBClassifierPhPi0> m_xgb14_b; + std::unique_ptr<XGBClassifierPhPi0> m_xgb25_b; + std::unique_ptr<XGBClassifierPhPi0> m_xgb06_b; + std::unique_ptr<XGBClassifierPhPi0> m_xgb17_b; + std::unique_ptr<XGBClassifierPhPi0> m_xgb28_b; + + const DeCalorimeter* m_ecal = nullptr; + + + double XGBDiscriminant(int cluster_type, std::vector<double>& row_energies); + + const double m_def = -1.e+06; +}; +#endif // GammaPi0XGBoostTool_H \ No newline at end of file diff --git a/Calo/CaloTools/src/XGBClassifierPhPi0.cpp b/Calo/CaloTools/src/XGBClassifierPhPi0.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fa2c502cbe45bf932212289b18ea88c948604d74 --- /dev/null +++ b/Calo/CaloTools/src/XGBClassifierPhPi0.cpp @@ -0,0 +1,76 @@ +#include "XGBClassifierPhPi0.h" +#include <iostream> +#include <exception> + + +XGBClassifierPhPi0::XGBClassifierPhPi0() +{ + XGDMatrixCreateFromMat( + &m_predictionsCache.at(0), + 1, + 2, + 0.5, + &m_cache_matrix); +} + +XGBClassifierPhPi0::XGBClassifierPhPi0(const std::string& path) +{ + xgb_path = path; + //std::cout<<"PATH str: "<< xgb_path << std::endl; + XGDMatrixCreateFromMat( + &m_predictionsCache.at(0), + 1, + 2, + 0.5, + &m_cache_matrix); + XGBoosterCreate(&m_cache_matrix, 1, &m_booster); + int err_code = XGBoosterLoadModel(m_booster, xgb_path.c_str()); + if (err_code == -1){ + log << MSG::WARNING << "Do not load XGBModel " << endmsg; + } +} + +void XGBClassifierPhPi0::setPath(const std::string& path) +{ + XGBoosterCreate(&m_cache_matrix, 1, &m_booster); + try{ + XGBoosterLoadModel(m_booster, path.c_str()); + } + catch(const std::exception& e){ + log << MSG::WARNING << "Standard exception " << endmsg; + } +} + +double XGBClassifierPhPi0::getClassifierValues(const std::vector<double>& featureValues) +{ + // currently XGBoost only supports float + std::vector<float> features_f(featureValues.begin(), featureValues.end()); + + // fill the feature vector into a XGBoost DMatrix + XGDMatrixCreateFromMat(&features_f.at(0), + 1, + features_f.size(), + 0, + &m_feature_matrix); + + // XGBoost returns the predictions into a arrayish object and return + // its size + unsigned long predictions_length; + const float *predictions; + try{ + XGBoosterPredict(m_booster, + m_feature_matrix, + 0, + 0, + &predictions_length, + &predictions); + } + catch(...){ + log << MSG::WARNING << "some exeption " << endmsg; + } + std::vector<double> vec_predictions; + for (unsigned int i =0; i<predictions_length; i++){ + vec_predictions.push_back(predictions[i]); + } + return vec_predictions[0]; +} \ No newline at end of file diff --git a/Calo/CaloTools/src/XGBClassifierPhPi0.h b/Calo/CaloTools/src/XGBClassifierPhPi0.h new file mode 100644 index 0000000000000000000000000000000000000000..f6f1016efb1117a5184660a68a5a2a4998a127bf --- /dev/null +++ b/Calo/CaloTools/src/XGBClassifierPhPi0.h @@ -0,0 +1,43 @@ +#ifndef XGBCLASSIFIER_H +#define XGBCLASSIFIER_H + +#include <string> +#include <vector> +#include "xgboost/c_api.h" +#include "GaudiKernel/ServiceHandle.h" +#include "GaudiKernel/IMessageSvc.h" +#include "GaudiKernel/MsgStream.h" +//#include "/afs/cern.ch/user/v/vchekali/public/xgboost/include/xgboost/c_api.h" + +class IClassifier +{ +public: + + /** * @brief Main classification method * + * Takes a vector of values of features and returns the corresponding MVA + * output. + * * @param[in] featureValues A vector of feature values + * * @return MVA classifier value */ + virtual double getClassifierValues(const std::vector<double>& featureValues) = 0; +}; + +class XGBClassifierPhPi0 : public IClassifier { + public: + XGBClassifierPhPi0(); + XGBClassifierPhPi0(const std::string& path); + double getClassifierValues(const std::vector<double>& featureValues) override; + void setPath(const std::string& path); + ServiceHandle<IMessageSvc> msgh = ServiceHandle<IMessageSvc>("MessageSvc","XGBLog"); + MsgStream log = MsgStream(&(*msgh), "XGBLog"); + + private: + std::string xgb_path ="def_path"; + std::vector<float> m_predictionsCache = {0, 0}; + DMatrixHandle m_cache_matrix, + m_feature_matrix; + BoosterHandle m_booster; + +}; + + +#endif // XGBCLASSIFIER_H diff --git a/cmake/FindXGBoost.cmake b/cmake/FindXGBoost.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e4ca31035ae222b9d6f8b71654d84ca0cde7c812 --- /dev/null +++ b/cmake/FindXGBoost.cmake @@ -0,0 +1,32 @@ +# - Locate XGBoost library +# Defines: +# +# XGBoost_FOUND +# XGBoost_INCLUDE_DIR +# XGBoost_INCLUDE_DIRS (not cached) +# XGBoost_LIBRARY +# XGBoost_LIBRARIES (not cached) +# XGBoost_LIBRARY_DIRS (not cached) +# XGBoost_EXTRA_INCLUDE_DIR + + +find_path(XGBoost_INCLUDE_DIR xgboost/c_api.h + HINTS $ENV{XGBoost_ROOT_DIR}/include ${XGBoost_ROOT_DIR}/include) + +find_path(XGBoost_EXTRA_INCLUDE_DIR rabit/c_api.h + HINTS ${XGBoost_INCLUDE_DIR}/../rabit/include + $ENV{XGBoost_ROOT_DIR}/rabit/include ${XGBoost_ROOT_DIR}/rabit/include) + +find_library(XGBoost_LIBRARY NAMES xgboost + HINTS $ENV{XGBoost_ROOT_DIR}/lib ${XGBoost_ROOT_DIR}/lib) + +# handle the QUIETLY and REQUIRED arguments and set XGBoost_FOUND to TRUE if +# all listed variables are TRUE +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(XGBoost DEFAULT_MSG XGBoost_INCLUDE_DIR XGBoost_LIBRARY XGBoost_EXTRA_INCLUDE_DIR) + +mark_as_advanced(XGBoost_FOUND XGBoost_INCLUDE_DIR XGBoost_LIBRARY XGBoost_EXTRA_INCLUDE_DIR) + +set(XGBoost_INCLUDE_DIRS ${XGBoost_INCLUDE_DIR} ${XGBoost_EXTRA_INCLUDE_DIR}) +set(XGBoost_LIBRARIES ${XGBoost_LIBRARY}) +get_filename_component(XGBoost_LIBRARY_DIRS ${XGBoost_LIBRARY} PATH)