diff --git a/Reconstruction/MVAUtils/MVAUtils/BDT.h b/Reconstruction/MVAUtils/MVAUtils/BDT.h index 11ba8e036b6dea75207549e555a9616c11a5a96a..1a9e3b4d84fa6e7e077f10dd8c9f79174b8591b9 100644 --- a/Reconstruction/MVAUtils/MVAUtils/BDT.h +++ b/Reconstruction/MVAUtils/MVAUtils/BDT.h @@ -106,33 +106,7 @@ namespace MVAUtils std::unique_ptr<IForest> m_forest; //!< the implementation of the forest, doing the hard work std::vector<float*> m_pointers; //!< where vars to cut on can be set (but can also be passed) }; - - - inline float BDT::GetResponse() const { - return (!m_pointers.empty() ? GetResponse(m_pointers) : -9999.); - } - - inline float BDT::GetClassification() const { - return (!m_pointers.empty() ? GetClassification(m_pointers) : -9999.); - } - - inline std::vector<float> BDT::GetMultiResponse(unsigned int numClasses) const { - return (!m_pointers.empty() ? GetMultiResponse(m_pointers, numClasses) : std::vector<float>()); - } - - inline std::vector<float> BDT::GetValues() const { - std::vector<float> result; - for (float* ptr : m_pointers) - { - assert (ptr); - result.push_back(*ptr); - } - return result; - } - - inline const std::vector<float*>& BDT::GetPointers() const { return m_pointers; } - inline void BDT::SetPointers(const std::vector<float*>& pointers) { m_pointers = pointers; } - } +#include "MVAUtils/BDT.icc" #endif diff --git a/Reconstruction/MVAUtils/MVAUtils/BDT.icc b/Reconstruction/MVAUtils/MVAUtils/BDT.icc new file mode 100644 index 0000000000000000000000000000000000000000..607d5ec67545e2a8cc121e0f946a6ed87f986b66 --- /dev/null +++ b/Reconstruction/MVAUtils/MVAUtils/BDT.icc @@ -0,0 +1,132 @@ +/* + Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration +*/ +namespace MVAUtils { + +inline float +BDT::GetResponse() const +{ + return (!m_pointers.empty() ? GetResponse(m_pointers) : -9999.); +} + +inline float +BDT::GetClassification() const +{ + return (!m_pointers.empty() ? GetClassification(m_pointers) : -9999.); +} + +inline std::vector<float> +BDT::GetMultiResponse(unsigned int numClasses) const +{ + return (!m_pointers.empty() ? GetMultiResponse(m_pointers, numClasses) + : std::vector<float>()); +} + +inline std::vector<float> +BDT::GetValues() const +{ + std::vector<float> result; + for (float* ptr : m_pointers) { + assert(ptr); + result.push_back(*ptr); + } + return result; +} + +inline const std::vector<float*>& +BDT::GetPointers() const +{ + return m_pointers; +} +inline void +BDT::SetPointers(const std::vector<float*>& pointers) +{ + m_pointers = pointers; +} +inline unsigned int +BDT::GetNTrees() const +{ + return m_forest->GetNTrees(); +} +inline int +BDT::GetNVars() const +{ + return m_forest->GetNVars(); +} +inline float +BDT::GetOffset() const +{ + return m_forest->GetOffset(); +} + +/** Return offset + the sum of the response of each tree **/ +inline float +BDT::GetResponse(const std::vector<float>& values) const +{ + return m_forest->GetResponse(values); +} + +/** Return offset + the sum of the response of each tree **/ +inline float +BDT::GetResponse(const std::vector<float*>& pointers) const +{ + return m_forest->GetResponse(pointers); +} + +inline float +BDT::GetClassification(const std::vector<float>& values) const +{ + return m_forest->GetClassification(values); +} + +inline float +BDT::GetClassification(const std::vector<float*>& pointers) const +{ + return m_forest->GetClassification(pointers); +} + +inline float +BDT::GetGradBoostMVA(const std::vector<float>& values) const +{ + const float sum = m_forest->GetRawResponse(values); // ignores the offset + return 2. / (1 + std::exp(-2 * sum)) - + 1; // output shaping for gradient boosted decision tree (-1,1) +} + +inline float +BDT::GetGradBoostMVA(const std::vector<float*>& pointers) const +{ + const float sum = m_forest->GetRawResponse(pointers); // ignores the offset + // output shaping for gradient boosted decision tree (-1,1) + return 2. / (1 + std::exp(-2 * sum)) - 1; +} + +inline std::vector<float> +BDT::GetMultiResponse(const std::vector<float>& values, + unsigned int numClasses) const +{ + return m_forest->GetMultiResponse(values, numClasses); +} + +inline std::vector<float> +BDT::GetMultiResponse(const std::vector<float*>& pointers, + unsigned int numClasses) const +{ + return m_forest->GetMultiResponse(pointers, numClasses); +} + +inline float +BDT::GetTreeResponse(const std::vector<float>& values, + MVAUtils::index_t index) const +{ + return m_forest->GetTreeResponse(values, index); +} + +inline float +BDT::GetTreeResponse(const std::vector<float*>& pointers, + MVAUtils::index_t index) const +{ + return m_forest->GetTreeResponse(pointers, index); +} + +} diff --git a/Reconstruction/MVAUtils/MVAUtils/Forest.h b/Reconstruction/MVAUtils/MVAUtils/Forest.h index 2e2a10f20ccbb401d90f6efd1456fce3ddf5a01c..7ac71050157f540751320dbf3db3ff53165c7861 100644 --- a/Reconstruction/MVAUtils/MVAUtils/Forest.h +++ b/Reconstruction/MVAUtils/MVAUtils/Forest.h @@ -53,43 +53,55 @@ namespace MVAUtils class Forest : public IForest { public: - - virtual float GetTreeResponse(const std::vector<float>& values, unsigned int itree) const override; - virtual float GetTreeResponse(const std::vector<float*>& pointers, unsigned int itree) const override; - - /** Return the offset of the forest. Since by default there is no offset, return 0 */ - virtual float GetOffset() const override { return 0.; } - - /** Return the response of the whole Forest. Raw is just the sum of all the trees **/ - // The method is not `final`, but it is very unlikely the derived class - // will redefine this (since it is "raw") - virtual float GetRawResponse(const std::vector<float>& values) const override; - virtual float GetRawResponse(const std::vector<float*>& pointers) const override; - - /** Compute the prediction for regression **/ - // In this class it is equal to the raw-reponse. Derived class should - // override this. - virtual float GetResponse(const std::vector<float>& values) const override; - virtual float GetResponse(const std::vector<float*>& pointers) const override; - - /** Compute the prediction for multiclassification (a score for each class). - * In addition to the input values need to pass the number of classes - **/ - // Since TMVA and lgbm are identical the common implementation is here: - // Return the softmax of the sub-forest raw-response - virtual std::vector<float> GetMultiResponse(const std::vector<float>& values, - unsigned int numClasses) const override; - virtual std::vector<float> GetMultiResponse(const std::vector<float*>& pointers, - unsigned int numClasses) const override; - - virtual unsigned int GetNTrees() const final { return m_forest.size(); } - - virtual void PrintForest() const override; - - virtual void PrintTree(unsigned int itree) const override; - - /** Return the vector of nodes for the tree itree **/ - virtual std::vector<Node_t> GetTree(unsigned int itree) const final; + virtual float GetTreeResponse(const std::vector<float>& values, + unsigned int itree) const override final; + virtual float GetTreeResponse(const std::vector<float*>& pointers, + unsigned int itree) const override final; + + /** Return the offset of the forest. Since by default there is no offset, + * return 0 */ + virtual float GetOffset() const override { return 0.; } + + /** Return the response of the whole Forest. Raw is just the sum of all + * the trees **/ + virtual float GetRawResponse( + const std::vector<float>& values) const override final; + virtual float GetRawResponse( + const std::vector<float*>& pointers) const override final; + + /** Compute the prediction for regression **/ + // In this class it is equal to the raw-reponse. Derived class should + // override this. + virtual float GetResponse( + const std::vector<float>& values) const override; + virtual float GetResponse( + const std::vector<float*>& pointers) const override; + + /** Compute the prediction for multiclassification (a score for each + *class). In addition to the input values need to pass the number of + *classes + **/ + // Since TMVA and lgbm are identical the common implementation is here: + // Return the softmax of the sub-forest raw-response + virtual std::vector<float> GetMultiResponse( + const std::vector<float>& values, + unsigned int numClasses) const override; + + virtual std::vector<float> GetMultiResponse( + const std::vector<float*>& pointers, + unsigned int numClasses) const override; + + virtual unsigned int GetNTrees() const override final + { + return m_forest.size(); + } + + virtual void PrintForest() const override; + + virtual void PrintTree(unsigned int itree) const override; + + /** Return the vector of nodes for the tree itree **/ + std::vector<Node_t> GetTree(unsigned int itree) const; protected: /** Get the response of a tree. Instead of specifying the index of the tree diff --git a/Reconstruction/MVAUtils/MVAUtils/ForestLGBM.h b/Reconstruction/MVAUtils/MVAUtils/ForestLGBM.h index e344246a2953f91d036acd0a6af7e5712fa3e5fe..51e1856169f9dca3c388cca6d2bc7f152a7e28da 100644 --- a/Reconstruction/MVAUtils/MVAUtils/ForestLGBM.h +++ b/Reconstruction/MVAUtils/MVAUtils/ForestLGBM.h @@ -49,7 +49,6 @@ namespace MVAUtils ForestLGBMSimple (ForestLGBMSimple&&) = default; ForestLGBMSimple& operator=(ForestLGBMSimple&&) = default; ~ForestLGBMSimple()=default; - virtual TTree* WriteTree(TString name) const override; virtual void PrintForest() const override; virtual int GetNVars() const override { return m_max_var + 1; } @@ -69,7 +68,6 @@ namespace MVAUtils ForestLGBM (ForestLGBM&&) = default; ForestLGBM& operator=(ForestLGBM&&) = default; ~ForestLGBM()=default; - virtual TTree* WriteTree(TString name) const override; virtual void PrintForest() const override; virtual int GetNVars() const override { return m_max_var + 1; } diff --git a/Reconstruction/MVAUtils/MVAUtils/ForestTMVA.h b/Reconstruction/MVAUtils/MVAUtils/ForestTMVA.h index 084054066de3a6fb061b9a566012ff267a98a9f9..1dc7dd428a1a2cb2ca30a4a71dc57a57b5b89aa7 100644 --- a/Reconstruction/MVAUtils/MVAUtils/ForestTMVA.h +++ b/Reconstruction/MVAUtils/MVAUtils/ForestTMVA.h @@ -25,22 +25,16 @@ namespace MVAUtils public: ForestWeighted() : m_sumWeights(0.) { } - float GetTreeResponseWeighted(const std::vector<float>& values, unsigned int itree) const - { - return Forest<Node_t>::GetTreeResponse(values, itree) * m_weights[itree]; - } - - float GetTreeResponseWeighted(const std::vector<float*>& pointers, unsigned int itree) const - { - return Forest<Node_t>::GetTreeResponse(pointers, itree) * m_weights[itree]; - } - - using Forest<Node_t>::GetNTrees; // lookup is deferred until template paramers are known, force it + using Forest<Node_t>::GetNTrees; using Forest<Node_t>::newTree; + float GetTreeResponseWeighted(const std::vector<float>& values, unsigned int itree) const; + float GetTreeResponseWeighted(const std::vector<float*>& pointers, unsigned int itree) const; + float GetWeightedResponse(const std::vector<float>& values) const; float GetWeightedResponse(const std::vector<float*>& pointers) const; - virtual void newTree(const std::vector<Node_t>& nodes, float weight); + + void newTree(const std::vector<Node_t>& nodes, float weight); float GetTreeWeight(unsigned int itree) const { return m_weights[itree]; } float GetSumWeights() const { return m_sumWeights; } @@ -57,33 +51,6 @@ namespace MVAUtils }; - template<typename Node_t> - float ForestWeighted<Node_t>::GetWeightedResponse(const std::vector<float>& values) const { - float result = 0.; - for (unsigned int itree = 0; itree != GetNTrees(); ++itree) - { - result += GetTreeResponseWeighted(values, itree); - } - return result; - } - - template<typename Node_t> - float ForestWeighted<Node_t>::GetWeightedResponse(const std::vector<float*>& pointers) const { - float result = 0.; - for (unsigned int itree = 0; itree != GetNTrees(); ++itree) - { - result += GetTreeResponseWeighted(pointers, itree); - } - return result; - } - - template<typename Node_t> - void ForestWeighted<Node_t>::newTree(const std::vector<Node_t>& nodes, float weight) { - newTree(nodes); - m_weights.push_back(weight); - m_sumWeights += weight; - } - /* * Support TMVA processing * @@ -118,5 +85,5 @@ namespace MVAUtils }; } - +#include "MVAUtils/ForestTMVA.icc" #endif diff --git a/Reconstruction/MVAUtils/MVAUtils/ForestTMVA.icc b/Reconstruction/MVAUtils/MVAUtils/ForestTMVA.icc new file mode 100644 index 0000000000000000000000000000000000000000..e6bcac410504dc37bf0e54039c42217b671444d6 --- /dev/null +++ b/Reconstruction/MVAUtils/MVAUtils/ForestTMVA.icc @@ -0,0 +1,81 @@ +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ +namespace MVAUtils { + +template<typename Node_t> +float +ForestWeighted<Node_t>::GetTreeResponseWeighted( + const std::vector<float>& values, + unsigned int itree) const +{ + return Forest<Node_t>::GetTreeResponse(values, itree) * m_weights[itree]; +} +template<typename Node_t> +float +ForestWeighted<Node_t>::GetTreeResponseWeighted( + const std::vector<float*>& pointers, + unsigned int itree) const +{ + return Forest<Node_t>::GetTreeResponse(pointers, itree) * m_weights[itree]; +} + +template<typename Node_t> +float +ForestWeighted<Node_t>::GetWeightedResponse( + const std::vector<float>& values) const +{ + float result = 0.; + for (unsigned int itree = 0; itree != GetNTrees(); ++itree) { + result += GetTreeResponseWeighted(values, itree); + } + return result; +} + +template<typename Node_t> +float +ForestWeighted<Node_t>::GetWeightedResponse( + const std::vector<float*>& pointers) const +{ + float result = 0.; + for (unsigned int itree = 0; itree != GetNTrees(); ++itree) { + result += GetTreeResponseWeighted(pointers, itree); + } + return result; +} + +template<typename Node_t> +void +ForestWeighted<Node_t>::newTree(const std::vector<Node_t>& nodes, float weight) +{ + newTree(nodes); + m_weights.push_back(weight); + m_sumWeights += weight; +} + +inline float +ForestTMVA::GetResponse(const std::vector<float>& values) const +{ + return GetRawResponse(values) + GetOffset(); +} + +inline float +ForestTMVA::GetResponse(const std::vector<float*>& pointers) const +{ + return GetRawResponse(pointers) + GetOffset(); +} + +inline float +ForestTMVA::GetClassification(const std::vector<float>& values) const +{ + float result = GetWeightedResponse(values); + return result / GetSumWeights(); +} + +inline float +ForestTMVA::GetClassification(const std::vector<float*>& pointers) const +{ + float result = GetWeightedResponse(pointers); + return result / GetSumWeights(); +} +} diff --git a/Reconstruction/MVAUtils/Root/BDT.cxx b/Reconstruction/MVAUtils/Root/BDT.cxx index c7d76f08843467ae08547e41f87f3152cd19d28d..46ed101c72292508ec5dc942f6c5c85002c70258 100644 --- a/Reconstruction/MVAUtils/Root/BDT.cxx +++ b/Reconstruction/MVAUtils/Root/BDT.cxx @@ -30,7 +30,7 @@ std::string get_default_string_map(const std::map <std::string, std::string> & m { std::map<std::string, std::string>::const_iterator it = m.find(key); if (it == m.end()) { return defval; } - return it->second; + return it->second; } std::map<std::string, std::string> parseOptions(const std::string& raw_options) @@ -44,7 +44,8 @@ std::map<std::string, std::string> parseOptions(const std::string& raw_options) const auto left = item.substr(0, pos); if (!options.insert(std::make_pair(left, right)).second) { - throw std::runtime_error(std::string("option ") + left + " duplicated in title of TTree used as input"); + throw std::runtime_error(std::string("option ") + left + + " duplicated in title of TTree used as input"); } } @@ -64,95 +65,24 @@ BDT::BDT(::TTree *tree) std::string node_type = get_default_string_map (options, std::string("node_type")); if (node_type == "lgbm") { m_forest = std::make_unique<ForestLGBM>(tree); + } else if (node_type == "lgbm_simple") { + m_forest = std::make_unique<ForestLGBMSimple>( + tree); // this do not support nan as inputs + } else { + throw std::runtime_error( + "the title of the input tree is misformatted: cannot understand which " + "BDT implementation to use"); } - else if (node_type == "lgbm_simple") { - m_forest = std::make_unique<ForestLGBMSimple>(tree); // this do not support nan as inputs - } - else - { - throw std::runtime_error("the title of the input tree is misformatted: cannot understand which BDT implementation to use"); - } - } - else if (creator == "xgboost") - { - //this do support nan as inputs + } else if (creator == "xgboost") { + // this do support nan as inputs m_forest = std::make_unique<ForestXGBoost>(tree); - } - else { + } else { // default for compatibility: old TTree (based on TMVA) don't have a special title m_forest = std::make_unique<ForestTMVA>(tree); } } -unsigned int BDT::GetNTrees() const { return m_forest->GetNTrees(); } -int BDT::GetNVars() const { return m_forest->GetNVars(); } -float BDT::GetOffset() const { return m_forest->GetOffset(); } - -/** Return offset + the sum of the response of each tree **/ -float BDT::GetResponse(const std::vector<float>& values) const -{ - return m_forest->GetResponse(values); -} - - -/** Return offset + the sum of the response of each tree **/ -float BDT::GetResponse(const std::vector<float*>& pointers) const -{ - return m_forest->GetResponse(pointers); -} - - -float BDT::GetClassification(const std::vector<float>& values) const -{ - return m_forest->GetClassification(values); -} - - -float BDT::GetClassification(const std::vector<float*>& pointers) const -{ - return m_forest->GetClassification(pointers); -} - -float BDT::GetGradBoostMVA(const std::vector<float>& values) const -{ - const float sum = m_forest->GetRawResponse(values); // ignores the offset - return 2. / (1 + std::exp(-2 * sum)) - 1; //output shaping for gradient boosted decision tree (-1,1) -} - -float BDT::GetGradBoostMVA(const std::vector<float*>& pointers) const -{ - const float sum = m_forest->GetRawResponse(pointers); // ignores the offset - return 2. / (1 + std::exp(-2 * sum)) - 1; //output shaping for gradient boosted decision tree (-1,1) -} - - -std::vector<float> BDT::GetMultiResponse(const std::vector<float>& values, - unsigned int numClasses) const -{ - return m_forest->GetMultiResponse(values, numClasses); -} - - -std::vector<float> BDT::GetMultiResponse(const std::vector<float*>& pointers, - unsigned int numClasses) const -{ - return m_forest->GetMultiResponse(pointers, numClasses); -} - - -float BDT::GetTreeResponse(const std::vector<float>& values, MVAUtils::index_t index) const -{ - return m_forest->GetTreeResponse(values, index); -} - - -float BDT::GetTreeResponse(const std::vector<float*>& pointers, MVAUtils::index_t index) const -{ - return m_forest->GetTreeResponse(pointers, index); -} - TTree* BDT::WriteTree(TString name) const { return m_forest->WriteTree(std::move(name)); } - void BDT::PrintForest() const { m_forest->PrintForest(); } void BDT::PrintTree(unsigned int itree) const { m_forest->PrintTree(itree); } diff --git a/Reconstruction/MVAUtils/Root/ForestTMVA.cxx b/Reconstruction/MVAUtils/Root/ForestTMVA.cxx index dd199d0e32da3f6cd926371c0802acfd6adc0a03..4c30deb571869bd2b07889d3bbc8833492a87b9f 100644 --- a/Reconstruction/MVAUtils/Root/ForestTMVA.cxx +++ b/Reconstruction/MVAUtils/Root/ForestTMVA.cxx @@ -74,26 +74,6 @@ TTree* ForestTMVA::WriteTree(TString name) const return tree; } -float ForestTMVA::GetResponse(const std::vector<float>& values) const { - return GetRawResponse(values) + GetOffset(); -} - -float ForestTMVA::GetResponse(const std::vector<float*>& pointers) const { - return GetRawResponse(pointers) + GetOffset(); -} - -float ForestTMVA::GetClassification(const std::vector<float>& values) const -{ - float result = GetWeightedResponse(values); - return result / GetSumWeights(); -} - -float ForestTMVA::GetClassification(const std::vector<float*>& pointers) const -{ - float result = GetWeightedResponse(pointers); - return result / GetSumWeights(); -} - void ForestTMVA::PrintForest() const { std::cout << "***BDT TMVA: Printing entire forest***" << std::endl;