Commit d9c67c16 authored by Jovan Mitrevski's avatar Jovan Mitrevski Committed by Graeme Stewart
Browse files

updates for tau reconstruction (MVAUtils-00-00-07)

	* Incorporating changes from Justin Griffith (griffith@cern.ch) for tau
	* Tag as MVAUtils-00-00-07

2016-09-03 Jovan Mitrevski <Jovan.Mitrevski@cern.ch>
	* Add support for classifications (in addition to regressions):
	* Add a isRegression flag to BDT constructor with TMVA BDT
	*   (default true for unchanged behavior); make newTree private
	* Tag as MVAUtils-00-00-06

2016-09-02 Jovan Mitrevski <Jovan.Mitrevski@cern.ch>
	* Root/BDT.cxx: fix reading in TMVA::MethodBDT input
	* Tag as MVAUtils-00-00-05


Former-commit-id: ed6247d4
parent da1e852b
......@@ -40,13 +40,7 @@ namespace MVAUtils
{
public:
BDT(TTree *tree);
BDT(TMVA::MethodBDT *bdt);
// create new tree from root file
void newTree(const std::vector<int>& vars, const std::vector<float>& values);
// create new tree from decision tree
void newTree(const TMVA::DecisionTreeNode *node);
BDT(TMVA::MethodBDT *bdt, bool isRegression = true, bool useYesNoLeaf = false);
/** return the number of trees in the forest */
unsigned int GetNTrees() const { return m_forest.size(); }
......@@ -63,6 +57,14 @@ namespace MVAUtils
return (m_pointers.size() ? GetResponse(m_pointers) : -9999.);
}
/** these return Sum( purity_i*weight_i )/Sum weight_i */
float GetClassification() const {
return (m_pointers.size() ? GetClassification(m_pointers) : -9999.);
}
/** these return Sum( purity_i*weight_i )/Sum weight_i */
float GetClassification(const std::vector<float*>& pointers) const;
// these return 2.0/(1.0+exp(-2.0*sum))-1, with no offset.
float GetGradBoostMVA(const std::vector<float>& values) const;
float GetGradBoostMVA(const std::vector<float*>& pointers) const;
......@@ -89,11 +91,20 @@ namespace MVAUtils
private:
// create new tree from root file
void newTree(const std::vector<int>& vars, const std::vector<float>& values);
// create new tree from decision tree
void newTree(const TMVA::DecisionTreeNode *node, bool isRegression, bool useYesNoLeaf);
float GetTreeResponse(const std::vector<float>& values, Node::index_t index) const;
float GetTreeResponse(const std::vector<float*>& pointers, Node::index_t index) const;
float m_offset; //!< the offset to add in the GetResponse functions
float m_sumWeights; //!< the sumOfBoostWeights--no need to recompute each call
std::vector<Node::index_t> m_forest; //!< indices of the top-level nodes of each tree
std::vector<float> m_weights; //!< boost weights
std::vector<float*> m_pointers; //!< where vars to cut on can be set (but can also be passed)
std::vector<Node> m_nodes; //!< where the nodes of the forest are stored
......
......@@ -17,6 +17,7 @@ using namespace MVAUtils;
/** c-tor from TTree **/
BDT::BDT(TTree *tree)
: m_sumWeights(0)
{
std::vector<int> *vars = 0;
std::vector<float> *values = 0;
......@@ -32,8 +33,12 @@ BDT::BDT(TTree *tree)
assert (values);
m_forest.push_back(m_nodes.size());
newTree(*vars, *values);
m_weights.push_back(m_offset);
m_sumWeights+=m_offset;
}
m_offset = m_weights[0];//original use of m_offset
delete vars;
delete values;
......@@ -44,14 +49,21 @@ BDT::BDT(TTree *tree)
}
/** c-tor from TMVA::MethodBDT **/
BDT::BDT(TMVA::MethodBDT* bdt)
BDT::BDT(TMVA::MethodBDT* bdt, bool isRegression, bool useYesNoLeaf)
: m_sumWeights(0)
{
assert(bdt);
m_offset = bdt->GetBoostWeights().size() ? bdt->GetBoostWeights()[0] : 0.;
std::vector<TMVA::DecisionTree*>::const_iterator it;
for(it = bdt->GetForest().begin(); it != bdt->GetForest().end(); ++it) {
m_forest.push_back(m_nodes.size());
newTree((*it)->GetRoot());
uint index=it - bdt->GetForest().begin();
if( bdt->GetBoostWeights().size() > index ) {
m_weights.push_back( bdt->GetBoostWeights()[index]);
m_sumWeights+=m_weights.back();
}
else m_weights.push_back(0);
newTree((*it)->GetRoot(), isRegression, useYesNoLeaf);
}
// // For Debug
// std::cout << "Constructed from a MethodBDT" << std::endl;
......@@ -93,7 +105,7 @@ void BDT::newTree(const std::vector<int>& vars, const std::vector<float>& values
/**
* Creates the full tree structure from TMVA::DecisionTree node.
**/
void BDT::newTree(const TMVA::DecisionTreeNode *node)
void BDT::newTree(const TMVA::DecisionTreeNode *node, bool isRegression, bool useYesNoLeaf)
{
// index is relative to the current node
......@@ -120,6 +132,8 @@ void BDT::newTree(const TMVA::DecisionTreeNode *node)
if (currParentIndex >= 0) {
right[currParentIndex] = i + 1 - currParentIndex;
currNode = currParent->GetCutType() ? currParent->GetLeft() : currParent->GetRight();
} else {
currNode = nullptr;
}
parent.pop();
parentIndex.pop();
......@@ -142,11 +156,16 @@ void BDT::newTree(const TMVA::DecisionTreeNode *node)
++i;
if (!currNode->GetLeft()){
// a leaf
m_nodes.emplace_back(-1, currNode->GetResponse(), right[i]);
m_nodes.emplace_back(-1,
isRegression ?
currNode->GetResponse() : useYesNoLeaf ? currNode->GetNodeType() : currNode->GetPurity(),
right[i]);
auto currParent = parent.top();
// if right has not been visited, next will be right
if (currParent) {
currNode = currParent->GetCutType() ? currParent->GetLeft() : currParent->GetRight();
} else {
currNode = nullptr;
}
parent.pop();
} else {
......@@ -178,6 +197,16 @@ float BDT::GetResponse(const std::vector<float*>& pointers) const
return result;
}
float BDT::GetClassification(const std::vector<float*>& pointers) const
{
float result = 0;
for (auto it = m_forest.begin(); it != m_forest.end(); ++it){
uint index = it-m_forest.begin();
result += GetTreeResponse(pointers, *it) * m_weights[index];
}
return result/m_sumWeights;
}
/** Return 2.0/(1.0+exp(-2.0*sum))-1, with no offset **/
float BDT::GetGradBoostMVA(const std::vector<float>& values) const
{
......@@ -299,6 +328,7 @@ TTree* BDT::WriteTree(TString name)
vars.push_back(m_nodes[j].GetVar());
values.push_back(m_nodes[j].GetVal());
}
m_offset = m_weights[i];
tree->Fill();
}
return tree;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment