Commit ed6247d4 authored by Jovan Mitrevski's avatar Jovan Mitrevski Committed by Graeme Stewart
Browse files

updates for tau reconstruction (MVAUtils-00-00-07)

	* Incorporating changes from Justin Griffith (griffith@cern.ch) for tau
	* Tag as MVAUtils-00-00-07

2016-09-03 Jovan Mitrevski <Jovan.Mitrevski@cern.ch>
	* Add support for classifications (in addition to regressions):
	* Add a isRegression flag to BDT constructor with TMVA BDT
	*   (default true for unchanged behavior); make newTree private
	* Tag as MVAUtils-00-00-06

2016-09-02 Jovan Mitrevski <Jovan.Mitrevski@cern.ch>
	* Root/BDT.cxx: fix reading in TMVA::MethodBDT input
	* Tag as MVAUtils-00-00-05
parent b4261fdb
...@@ -40,13 +40,7 @@ namespace MVAUtils ...@@ -40,13 +40,7 @@ namespace MVAUtils
{ {
public: public:
BDT(TTree *tree); BDT(TTree *tree);
BDT(TMVA::MethodBDT *bdt); BDT(TMVA::MethodBDT *bdt, bool isRegression = true, bool useYesNoLeaf = false);
// create new tree from root file
void newTree(const std::vector<int>& vars, const std::vector<float>& values);
// create new tree from decision tree
void newTree(const TMVA::DecisionTreeNode *node);
/** return the number of trees in the forest */ /** return the number of trees in the forest */
unsigned int GetNTrees() const { return m_forest.size(); } unsigned int GetNTrees() const { return m_forest.size(); }
...@@ -63,6 +57,14 @@ namespace MVAUtils ...@@ -63,6 +57,14 @@ namespace MVAUtils
return (m_pointers.size() ? GetResponse(m_pointers) : -9999.); return (m_pointers.size() ? GetResponse(m_pointers) : -9999.);
} }
/** these return Sum( purity_i*weight_i )/Sum weight_i */
float GetClassification() const {
return (m_pointers.size() ? GetClassification(m_pointers) : -9999.);
}
/** these return Sum( purity_i*weight_i )/Sum weight_i */
float GetClassification(const std::vector<float*>& pointers) const;
// these return 2.0/(1.0+exp(-2.0*sum))-1, with no offset. // these return 2.0/(1.0+exp(-2.0*sum))-1, with no offset.
float GetGradBoostMVA(const std::vector<float>& values) const; float GetGradBoostMVA(const std::vector<float>& values) const;
float GetGradBoostMVA(const std::vector<float*>& pointers) const; float GetGradBoostMVA(const std::vector<float*>& pointers) const;
...@@ -89,11 +91,20 @@ namespace MVAUtils ...@@ -89,11 +91,20 @@ namespace MVAUtils
private: private:
// create new tree from root file
void newTree(const std::vector<int>& vars, const std::vector<float>& values);
// create new tree from decision tree
void newTree(const TMVA::DecisionTreeNode *node, bool isRegression, bool useYesNoLeaf);
float GetTreeResponse(const std::vector<float>& values, Node::index_t index) const; float GetTreeResponse(const std::vector<float>& values, Node::index_t index) const;
float GetTreeResponse(const std::vector<float*>& pointers, Node::index_t index) const; float GetTreeResponse(const std::vector<float*>& pointers, Node::index_t index) const;
float m_offset; //!< the offset to add in the GetResponse functions float m_offset; //!< the offset to add in the GetResponse functions
float m_sumWeights; //!< the sumOfBoostWeights--no need to recompute each call
std::vector<Node::index_t> m_forest; //!< indices of the top-level nodes of each tree std::vector<Node::index_t> m_forest; //!< indices of the top-level nodes of each tree
std::vector<float> m_weights; //!< boost weights
std::vector<float*> m_pointers; //!< where vars to cut on can be set (but can also be passed) std::vector<float*> m_pointers; //!< where vars to cut on can be set (but can also be passed)
std::vector<Node> m_nodes; //!< where the nodes of the forest are stored std::vector<Node> m_nodes; //!< where the nodes of the forest are stored
......
...@@ -17,6 +17,7 @@ using namespace MVAUtils; ...@@ -17,6 +17,7 @@ using namespace MVAUtils;
/** c-tor from TTree **/ /** c-tor from TTree **/
BDT::BDT(TTree *tree) BDT::BDT(TTree *tree)
: m_sumWeights(0)
{ {
std::vector<int> *vars = 0; std::vector<int> *vars = 0;
std::vector<float> *values = 0; std::vector<float> *values = 0;
...@@ -32,8 +33,12 @@ BDT::BDT(TTree *tree) ...@@ -32,8 +33,12 @@ BDT::BDT(TTree *tree)
assert (values); assert (values);
m_forest.push_back(m_nodes.size()); m_forest.push_back(m_nodes.size());
newTree(*vars, *values); newTree(*vars, *values);
m_weights.push_back(m_offset);
m_sumWeights+=m_offset;
} }
m_offset = m_weights[0];//original use of m_offset
delete vars; delete vars;
delete values; delete values;
...@@ -44,14 +49,21 @@ BDT::BDT(TTree *tree) ...@@ -44,14 +49,21 @@ BDT::BDT(TTree *tree)
} }
/** c-tor from TMVA::MethodBDT **/ /** c-tor from TMVA::MethodBDT **/
BDT::BDT(TMVA::MethodBDT* bdt) BDT::BDT(TMVA::MethodBDT* bdt, bool isRegression, bool useYesNoLeaf)
: m_sumWeights(0)
{ {
assert(bdt); assert(bdt);
m_offset = bdt->GetBoostWeights().size() ? bdt->GetBoostWeights()[0] : 0.; m_offset = bdt->GetBoostWeights().size() ? bdt->GetBoostWeights()[0] : 0.;
std::vector<TMVA::DecisionTree*>::const_iterator it; std::vector<TMVA::DecisionTree*>::const_iterator it;
for(it = bdt->GetForest().begin(); it != bdt->GetForest().end(); ++it) { for(it = bdt->GetForest().begin(); it != bdt->GetForest().end(); ++it) {
m_forest.push_back(m_nodes.size()); m_forest.push_back(m_nodes.size());
newTree((*it)->GetRoot()); uint index=it - bdt->GetForest().begin();
if( bdt->GetBoostWeights().size() > index ) {
m_weights.push_back( bdt->GetBoostWeights()[index]);
m_sumWeights+=m_weights.back();
}
else m_weights.push_back(0);
newTree((*it)->GetRoot(), isRegression, useYesNoLeaf);
} }
// // For Debug // // For Debug
// std::cout << "Constructed from a MethodBDT" << std::endl; // std::cout << "Constructed from a MethodBDT" << std::endl;
...@@ -93,7 +105,7 @@ void BDT::newTree(const std::vector<int>& vars, const std::vector<float>& values ...@@ -93,7 +105,7 @@ void BDT::newTree(const std::vector<int>& vars, const std::vector<float>& values
/** /**
* Creates the full tree structure from TMVA::DecisionTree node. * Creates the full tree structure from TMVA::DecisionTree node.
**/ **/
void BDT::newTree(const TMVA::DecisionTreeNode *node) void BDT::newTree(const TMVA::DecisionTreeNode *node, bool isRegression, bool useYesNoLeaf)
{ {
// index is relative to the current node // index is relative to the current node
...@@ -120,6 +132,8 @@ void BDT::newTree(const TMVA::DecisionTreeNode *node) ...@@ -120,6 +132,8 @@ void BDT::newTree(const TMVA::DecisionTreeNode *node)
if (currParentIndex >= 0) { if (currParentIndex >= 0) {
right[currParentIndex] = i + 1 - currParentIndex; right[currParentIndex] = i + 1 - currParentIndex;
currNode = currParent->GetCutType() ? currParent->GetLeft() : currParent->GetRight(); currNode = currParent->GetCutType() ? currParent->GetLeft() : currParent->GetRight();
} else {
currNode = nullptr;
} }
parent.pop(); parent.pop();
parentIndex.pop(); parentIndex.pop();
...@@ -142,11 +156,16 @@ void BDT::newTree(const TMVA::DecisionTreeNode *node) ...@@ -142,11 +156,16 @@ void BDT::newTree(const TMVA::DecisionTreeNode *node)
++i; ++i;
if (!currNode->GetLeft()){ if (!currNode->GetLeft()){
// a leaf // a leaf
m_nodes.emplace_back(-1, currNode->GetResponse(), right[i]); m_nodes.emplace_back(-1,
isRegression ?
currNode->GetResponse() : useYesNoLeaf ? currNode->GetNodeType() : currNode->GetPurity(),
right[i]);
auto currParent = parent.top(); auto currParent = parent.top();
// if right has not been visited, next will be right // if right has not been visited, next will be right
if (currParent) { if (currParent) {
currNode = currParent->GetCutType() ? currParent->GetLeft() : currParent->GetRight(); currNode = currParent->GetCutType() ? currParent->GetLeft() : currParent->GetRight();
} else {
currNode = nullptr;
} }
parent.pop(); parent.pop();
} else { } else {
...@@ -178,6 +197,16 @@ float BDT::GetResponse(const std::vector<float*>& pointers) const ...@@ -178,6 +197,16 @@ float BDT::GetResponse(const std::vector<float*>& pointers) const
return result; return result;
} }
float BDT::GetClassification(const std::vector<float*>& pointers) const
{
float result = 0;
for (auto it = m_forest.begin(); it != m_forest.end(); ++it){
uint index = it-m_forest.begin();
result += GetTreeResponse(pointers, *it) * m_weights[index];
}
return result/m_sumWeights;
}
/** Return 2.0/(1.0+exp(-2.0*sum))-1, with no offset **/ /** Return 2.0/(1.0+exp(-2.0*sum))-1, with no offset **/
float BDT::GetGradBoostMVA(const std::vector<float>& values) const float BDT::GetGradBoostMVA(const std::vector<float>& values) const
{ {
...@@ -299,6 +328,7 @@ TTree* BDT::WriteTree(TString name) ...@@ -299,6 +328,7 @@ TTree* BDT::WriteTree(TString name)
vars.push_back(m_nodes[j].GetVar()); vars.push_back(m_nodes[j].GetVar());
values.push_back(m_nodes[j].GetVal()); values.push_back(m_nodes[j].GetVal());
} }
m_offset = m_weights[i];
tree->Fill(); tree->Fill();
} }
return tree; return tree;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment