Skip to content
Snippets Groups Projects
Commit 5260233a authored by Jana Schaarschmidt's avatar Jana Schaarschmidt
Browse files

Removing standalone FCS energy parametrization code from Athena

Former-commit-id: 8ef9ae1ff3b149d348dffdbfafeb57a6653d030d
parent 7d1a1fad
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 1901 deletions
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#include "TFCS1DRegression.h"
#include "ISF_FastCaloSimEvent/TFCS1DFunction.h"
#include "TMVA/Config.h"
#include "TMVA/Tools.h"
#include "TMVA/Reader.h"
#include "TMVA/Factory.h"
#include "TRandom1.h"
#include "TFile.h"
#include "TString.h"
#include "TMath.h"
#include "TMVA/DataLoader.h"
#include "TMVA/IMethod.h"
#include "TMVA/MethodMLP.h"
using namespace std;
void TFCS1DRegression::storeRegression(string weightfilename, vector<vector<double> > &fWeightMatrix0to1, vector<vector<double> > &fWeightMatrix1to2)
{
get_weights(weightfilename, fWeightMatrix0to1, fWeightMatrix1to2);
//for testing:
//validate(10,weightfilename);
}
void TFCS1DRegression::validate(int Ntoys,string weightfilename)
{
TRandom1* myRandom=new TRandom1(); myRandom->SetSeed(0);
//calculate regression from the weights and compare to TMVA value:
cout<<endl;
cout<<"--- Validating the regression value:"<<endl;
for(int i=0;i<Ntoys;i++)
{
double random=myRandom->Uniform(1);
//double myval=regression_value(random);
double myval=-1;
double tmvaval=tmvaregression_application(random,weightfilename);
cout<<"myvalue "<<myval<<" TMVA value "<<tmvaval<<endl;
}
}
TH1* TFCS1DRegression::transform(TH1* h_input, float &rangeval, float& startval)
{
bool do_transform=false;
float xmin=h_input->GetXaxis()->GetXmin();
float xmax=h_input->GetXaxis()->GetXmax();
if(xmin<0 || xmax>1) do_transform=true;
TH1D* h_out;
if(do_transform)
{
int nbins=h_input->GetNbinsX();
double min=0;
double max=1;
h_out=new TH1D("h_out","h_out",nbins,min,max);
for(int b=1;b<=nbins;b++)
h_out->SetBinContent(b,h_input->GetBinContent(b));
//store the inital range
rangeval=xmax-xmin;
startval=xmin;
}
if(!do_transform)
{
rangeval=-1;
h_out=(TH1D*)h_input->Clone("h_out");
}
return h_out;
}
double TFCS1DRegression::get_range_low(TH1* hist)
{
double range_low=0.0;
int bin_start=-1;
for(int b=1;b<=hist->GetNbinsX();b++)
{
if(hist->GetBinContent(b)>0 && bin_start<0)
{
bin_start=b;
range_low=hist->GetBinContent(b);
b=hist->GetNbinsX()+1;
}
}
return range_low;
}
TH1* TFCS1DRegression::get_cumul(TH1* hist)
{
TH1D* h_cumul=(TH1D*)hist->Clone("h_cumul");
double sum=0;
for(int b=1;b<=h_cumul->GetNbinsX();b++)
{
sum+=hist->GetBinContent(b);
h_cumul->SetBinContent(b,sum);
}
return h_cumul;
}
int TFCS1DRegression::testHisto(TH1* hist, std::string weightfilename, float &rangeval, float &startval, std::string outfilename, int neurons_start, int neurons_end, double cut_maxdev, int ntoys)
{
//int debug=1;
//transform the histogram
TH1* h_transf=transform(hist, rangeval, startval); h_transf->SetName("h_transf");
//new! map the y-axis to 0-1:
//Turn the histogram into a tree:
std::vector<double> contents;
std::vector<double> centers;
for(int b=1;b<=h_transf->GetNbinsX();b++)
{
contents.push_back(h_transf->GetBinContent(b));
centers.push_back(h_transf->GetBinCenter(b));
}
TTree* tree=new TTree("tree","tree");
Float_t x,y;
tree->Branch("x",&x,"x/F");
tree->Branch("y",&y,"y/F");
for(unsigned int i=0;i<centers.size();i++)
{
y=(Float_t)(contents[i]); //xvals are the BinContents
x=(Float_t)(centers[i]); //yvals are the BinCenters
tree->Fill();
}
double range_low=get_range_low(h_transf);
TRandom1* myRandom=new TRandom1(); myRandom->SetSeed(0);
int do_range=1;
double maxdev=1000;
int neurons=neurons_start;
std::vector<TH1*> histos;
while(maxdev>cut_maxdev && neurons<=neurons_end)
{
int pass_training=0;
try
{
TFCS1DRegression::tmvaregression_training(neurons, tree, weightfilename, outfilename, pass_training);
}
catch(std::runtime_error &e)
{
std::cout<<"An exception occured: "<<e.what()<<std::endl;
std::cout<<"Continuing anyway :P"<<std::endl;
pass_training=0;
}
if(pass_training)
{
std::cout<<"Testing the regression with "<<ntoys<<" toys"<<std::endl;
TH1* h_output=(TH1*)h_transf->Clone("h_output");
h_output->Reset();
for(int i=0;i<ntoys;i++)
{
double random=myRandom->Uniform(1);
if(do_range && random<range_low) random=range_low;
double value=TFCS1DRegression::tmvaregression_application(random,weightfilename);
h_output->Fill(value);
}
TH1* h_cumul=get_cumul(h_output);
h_cumul->SetName(Form("h_cumul_neurons%i",neurons));
histos.push_back(h_cumul);
maxdev=TFCS1DFunction::get_maxdev(h_transf,h_cumul);
std::cout<<"---> Neurons="<<neurons<<" MAXDEV="<<maxdev<<"%"<<std::endl;
}
neurons++;
}
//TH1* histclone=(TH1*)hist->Clone("histclone");
/*
TFile* out_iteration=new TFile(Form("output/iteration_%s.root",label.c_str()),"RECREATE");
for(int h=0;h<histos.size();h++)
{
out_iteration->Add(histos[h]);
}
out_iteration->Add(histclone);
out_iteration->Write();
out_iteration->Close();
*/
int regression_success=1;
if(maxdev>cut_maxdev) regression_success=0;
int status=0;
if(regression_success)
{
std::cout<<"Regression successful. Weights are stored."<<std::endl;
if(rangeval<0) status=1;
if(rangeval>=0) status=2;
}
if(!regression_success)
{
std::cout<<"Regression failed. Histogram is stored."<<std::endl;
status=3;
} //!success
return status;
}
double TFCS1DRegression::tmvaregression_application(double uniform, std::string weightfile)
{
using namespace TMVA;
TString myMethodList = "" ;
TMVA::Tools::Instance();
std::map<std::string,int> Use;
Use["PDERS"] = 0; Use["PDEFoam"] = 0; Use["KNN"] = 0;
Use["LD"] = 0; Use["FDA_GA"] = 0; Use["FDA_MC"] = 0;
Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["MLP"] = 1;
Use["SVM"] = 0; Use["BDT"] = 0; Use["BDTG"] = 0;
// Select methods (don't look at this code - not of interest)
if(myMethodList != "")
{
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
for (UInt_t i=0; i<mlist.size(); i++)
{
std::string regMethod(mlist[i]);
if (Use.find(regMethod) == Use.end())
{
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
for(std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
std::cout << std::endl;
return 0;
}
Use[regMethod] = 1;
}
}
// --------------------------------------------------------------------------------------------------
TMVA::Reader *reader = new TMVA::Reader( "!Color:Silent");
Float_t y=uniform;
reader->AddVariable( "y", &y );
TString dir = Form("dl/%s/",weightfile.c_str());
TString prefix = "TMVARegression";
// Book method(s)
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++)
{
if (it->second)
{
TString methodName = it->first + " method";
TString weightfilename = dir + prefix + "_" + TString(it->first) + ".weights.xml";
reader->BookMVA( methodName, weightfilename );
}
}
Float_t val = (reader->EvaluateRegression("MLP method"))[0];
delete reader;
return val;
return 0;
}
void TFCS1DRegression::tmvaregression_training(int neurons, TTree *regTree, std::string weightfile, std::string outfilename, int& pass_training)
{
using namespace TMVA;
TString myMethodList = "" ;
TMVA::Tools::Instance();
std::map<std::string,int> Use;
Use["PDERS"] = 0; Use["PDEFoam"] = 0; Use["KNN"] = 0; Use["LD"] = 0; Use["FDA_GA"] = 0; Use["FDA_MC"] = 0;
Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["MLP"] = 1; Use["SVM"] = 0; Use["BDT"] = 0; Use["BDTG"] = 0;
std::cout << std::endl; std::cout << "==> Start TMVARegression with "<<neurons<<" Neurons "<<std::endl;
if(myMethodList != "")
{
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
for (UInt_t i=0; i<mlist.size(); i++)
{
std::string regMethod(mlist[i]);
if (Use.find(regMethod) == Use.end())
{
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
std::cout << std::endl;
return;
}
Use[regMethod] = 1;
}
}
TFile* outputFile = TFile::Open( outfilename.c_str(), "RECREATE" );
TMVA::DataLoader *dl=new TMVA::DataLoader("dl");
TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression" );
TString dirname=Form("%s/",weightfile.c_str());
(TMVA::gConfig().GetIONames()).fWeightFileDir = dirname;
dl->AddVariable( "y", "y", 'F' );
dl->AddTarget( "x" );
Double_t regWeight = 1.0;
dl->AddRegressionTree( regTree, regWeight );
TCut mycut = "";
//dl->PrepareTrainingAndTestTree( mycut,"nTrain_Regression=0:nTest_Regression=1:SplitMode=Block:NormMode=NumEvents:!V" );
dl->PrepareTrainingAndTestTree( mycut,"nTrain_Regression=0:nTest_Regression=0:SplitMode=Alternate:NormMode=NumEvents:!V" );
factory->BookMethod(dl, TMVA::Types::kMLP, "MLP",
Form("!H:!V:VerbosityLevel=Info:NeuronType=sigmoid:NCycles=20000:HiddenLayers=%i:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator",neurons));
// Train MVAs using the set of training events
factory->TrainAllMethods();
// ---- Evaluate all MVAs using the set of test events
//factory->TestAllMethods();
// ----- Evaluate and compare performance of all configured MVAs
//factory->EvaluateAllMethods();
// Save the output
outputFile->Close();
std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
std::cout << "==> TMVARegression is done!" << std::endl;
delete factory;
delete dl;
pass_training=1;
}
void TFCS1DRegression::get_weights(string weightfile, vector<vector<double> > &fWeightMatrix0to1, vector<vector<double> > &fWeightMatrix1to2)
{
using namespace TMVA;
int debug=1;
TString myMethodList = "" ;
TMVA::Tools::Instance();
std::map<std::string,int> Use;
// --- Mutidimensional likelihood and Nearest-Neighbour methods
Use["PDERS"] = 0; Use["PDEFoam"] = 0; Use["KNN"] = 0;
Use["LD"] = 0; Use["FDA_GA"] = 0; Use["FDA_MC"] = 0;
Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["MLP"] = 1;
Use["SVM"] = 0; Use["BDT"] = 0; Use["BDTG"] = 0;
// ---------------------------------------------------------------
// Select methods (don't look at this code - not of interest)
if (myMethodList != "")
{
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
for (UInt_t i=0; i<mlist.size(); i++)
{
std::string regMethod(mlist[i]);
if (Use.find(regMethod) == Use.end())
{
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
for(std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
std::cout << std::endl;
}
Use[regMethod] = 1;
}
}
// --------------------------------------------------------------------------------------------------
TMVA::Reader *reader = new TMVA::Reader( "!Color:Silent");
Float_t y=0.5; //just a dummy
reader->AddVariable( "y", &y );
TString dir = Form("dl/%s/",weightfile.c_str());
TString prefix = "TMVARegression";
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++)
{
if (it->second)
{
TString methodName = it->first + " method";
TString weightfilename = dir + prefix + "_" + TString(it->first) + ".weights.xml";
reader->BookMVA( methodName, weightfilename );
}
}
TMVA::IMethod* m=reader->FindMVA("MLP method");
TMVA::MethodMLP *mlp = dynamic_cast<TMVA::MethodMLP*>(m);
TObjArray* Network=mlp->fNetwork;
int num_neurons_input=((TObjArray*)Network->At(1))->GetEntriesFast()-1;
if(debug) cout<<"num_neurons_input "<<num_neurons_input<<endl;
// mlp->MakeClass(Form("mlpcode_neurons%i.C",num_neurons_input));
int fLayers = Network->GetEntriesFast();
for(int a=0;a<((TObjArray*)Network->At(1))->GetEntriesFast();a++)
{
vector<double> thisvector;
for(int b=0;b<((TObjArray*)Network->At(0))->GetEntriesFast();b++)
thisvector.push_back(0);
fWeightMatrix0to1.push_back(thisvector);
}
for(int a=0;a<((TObjArray*)Network->At(2))->GetEntriesFast();a++)
{
vector<double> thisvector;
for(int b=0;b<((TObjArray*)Network->At(1))->GetEntriesFast();b++)
thisvector.push_back(0);
fWeightMatrix1to2.push_back(thisvector);
}
TObjArray* curLayer= (TObjArray*)Network->At(0);
Int_t numNeurons = curLayer->GetEntriesFast();
for (Int_t n = 0; n < numNeurons; n++)
{
TMVA::TNeuron* neuron = (TMVA::TNeuron*)curLayer->At(n);
int numSynapses = neuron->NumPostLinks();
for (int s = 0; s < numSynapses; s++)
{
TMVA::TSynapse* synapse = neuron->PostLinkAt(s);
fWeightMatrix0to1[s][n]=synapse->GetWeight();
if(debug) cout<<"fWeightMatrix0to1["<<s<<"]["<<n<<"] "<<synapse->GetWeight()<<endl;
}
}
curLayer= (TObjArray*)Network->At(1);
numNeurons = curLayer->GetEntriesFast();
for (Int_t n = 0; n < numNeurons; n++)
{
TMVA::TNeuron* neuron = (TMVA::TNeuron*)curLayer->At(n);
int numSynapses = neuron->NumPostLinks();
for (int s = 0; s < numSynapses; s++)
{
TMVA::TSynapse* synapse = neuron->PostLinkAt(s);
fWeightMatrix1to2[s][n]=synapse->GetWeight();
if(debug) cout<<"fWeightMatrix1to2["<<s<<"]["<<n<<"] "<<synapse->GetWeight()<<endl;
}
}
delete reader;
}
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#ifndef TFCS1DRegression_h
#define TFCS1DRegression_h
// STL includes
#include <string>
#include "TH1.h"
#include "TTree.h"
class TFCS1DRegression
{
public:
TFCS1DRegression() {};
~TFCS1DRegression() {};
static int testHisto(TH1* hist, std::string, float&, float&, std::string, int, int, double, int);
static void storeRegression(string, vector<vector<double> > &fWeightMatrix0to1, vector<vector<double> > &fWeightMatrix1to2);
static void get_weights(string weightfile, vector<vector<double> > &fWeightMatrix0to1, vector<vector<double> > &fWeightMatrix1to2);
static TH1* transform(TH1* h_input, float &rangeval, float& startval);
static double get_range_low(TH1* hist);
static void tmvaregression_training(int, TTree *regTree, std::string, std::string, int&);
static TH1* get_cumul(TH1* hist);
static double tmvaregression_application(double, std::string);
static void validate(int,string);
};
#if defined(__MAKECINT__)
#pragma link C++ class TFCS1DRegression+;
#endif
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#include "ISF_FastCaloSimEvent/TFCS1DFunctionRegression.h"
#include "ISF_FastCaloSimEvent/TFCS1DFunctionRegressionTF.h"
#include "ISF_FastCaloSimEvent/TFCS1DFunctionHistogram.h"
#include "ISF_FastCaloSimEvent/TFCS1DFunction.h"
#include "TFCSFunction.h"
#include "TFCS1DRegression.h"
#include "TFile.h"
#include "TRandom1.h"
#include <iostream>
//#include <string>
#include <sstream>
using namespace std;
//=============================================
//======= TFCSFunction =========
//=============================================
TFCS1DFunction* TFCSFunction::Create(TH1* hist,int skip_regression,int neurons_start, int neurons_end, double maxdev_regression, double maxdev_smartrebin, int ntoys)
{
int verbose_level=1;
// This function is called by the user when he wants a histogram to be transformed into a space efficient variant for the parametrization.
// All code that decides whether a histogram should be transformed into a TFCS1DFunctionRegression or TFCS1DFunctionHistogram
// should go here.
TRandom1* random=new TRandom1();
random->SetSeed(0);
int myrand=floor(random->Uniform()*1000000);
stringstream ss;
ss << myrand;
string myrandstr = ss.str();
string xmlweightfilename="regressionweights"+myrandstr;
string outfilename="TMVAReg"+myrandstr+".root";
float rangeval, startval;
TFCS1DFunctionRegression* freg=0;
TFCS1DFunctionRegressionTF* fregTF=0;
TFCS1DFunctionHistogram* fhis=0;
int status=3;
if(!skip_regression)
status=TFCS1DRegression::testHisto(hist,xmlweightfilename,rangeval,startval,outfilename,neurons_start,neurons_end,maxdev_regression,ntoys);
if(verbose_level==1) cout<<"--- testHisto status="<<status<<endl;
if(status==1)
{
if(verbose_level==1) cout<<"Regression"<<endl;
freg=new TFCS1DFunctionRegression();
vector<vector<double> > fWeightMatrix0to1;
vector<vector<double> > fWeightMatrix1to2;
TFCS1DRegression::storeRegression(xmlweightfilename, fWeightMatrix0to1, fWeightMatrix1to2);
freg->set_weights(fWeightMatrix0to1, fWeightMatrix1to2);
remove(outfilename.c_str());
remove(Form("dl/%s/TMVARegression_MLP.weights.xml",xmlweightfilename.c_str()));
remove(Form("dl/%s",xmlweightfilename.c_str()));
return freg;
}
if(status==2)
{
if(verbose_level==1) cout<<"Regression and Transformation"<<endl;
fregTF=new TFCS1DFunctionRegressionTF(rangeval,startval);
vector<vector<double> > fWeightMatrix0to1;
vector<vector<double> > fWeightMatrix1to2;
TFCS1DRegression::storeRegression(xmlweightfilename, fWeightMatrix0to1, fWeightMatrix1to2);
fregTF->set_weights(fWeightMatrix0to1, fWeightMatrix1to2);
remove(outfilename.c_str());
remove(Form("dl/%s/TMVARegression_MLP.weights.xml",xmlweightfilename.c_str()));
remove(Form("dl/%s",xmlweightfilename.c_str()));
return fregTF;
}
if(status==3)
{
cout<<"xmlweightfilename: "<<xmlweightfilename<<endl;
remove(outfilename.c_str());
remove(Form("dl/%s/TMVARegression_MLP.weights.xml",xmlweightfilename.c_str()));
remove(Form("dl/%s",xmlweightfilename.c_str()));
if(verbose_level==1) cout<<"Histogram"<<endl;
fhis=new TFCS1DFunctionHistogram(hist, verbose_level,maxdev_smartrebin);
return fhis;
}
if(status==0)
{
cout<<"something went wrong :("<<endl;
return 0;
}
return 0;
}
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#ifndef TFCSFunction_h
#define TFCSFunction_h
#include "ISF_FastCaloSimEvent/TFCS1DFunction.h"
class TFCSFunction
{
public:
TFCSFunction() {};
//virtual ~TFCSFunction() {};
~TFCSFunction() {};
static TFCS1DFunction* Create(TH1* hist,int,int,int,double,double,int);
private:
ClassDef(TFCSFunction,1)
};
#if defined(__MAKECINT__)
#pragma link C++ class TFCSFunction+;
#endif
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
using namespace std;
#include "TMatrixD.h"
#include "TVectorD.h"
#include "TH1D.h"
#include "TFile.h"
#include "TROOT.h"
#include "TSystem.h"
#include "TApplication.h"
#include "TTree.h"
#include "TSystem.h"
#include "TH2D.h"
#include "TPrincipal.h"
#include "TMath.h"
#include "TBrowser.h"
#include "firstPCA.h"
#include "ISF_FastCaloSimParametrization/TreeReader.h"
#include "TLorentzVector.h"
#include "TChain.h"
#include <iostream>
firstPCA::firstPCA()
{
//default parameters:
m_numberfinebins=5000;
m_edepositcut=0.001;
m_cut_eta_low=-100;
m_cut_eta_high=100;
m_nbins1=5;
m_nbins2=1;
m_debuglevel=0;
m_apply_etacut=1;
m_outfilename="";
m_chain = 0;
}
firstPCA::firstPCA(TChain* chain,string outfilename)
{
//default parameters:
m_numberfinebins=5000;
m_edepositcut=0.001;
m_cut_eta_low=-100;
m_cut_eta_high=100;
m_nbins1=5;
m_nbins2=1;
m_debuglevel=0;
m_outfilename=outfilename;
m_chain=chain;
}
void firstPCA::apply_etacut(int flag)
{
m_apply_etacut=flag;
}
void firstPCA::set_cumulativehistobins(int bins)
{
m_numberfinebins=bins;
}
void firstPCA::set_edepositcut(double cut)
{
m_edepositcut=cut;
}
void firstPCA::set_etacut(double cut_low, double cut_high)
{
m_cut_eta_low=cut_low;
m_cut_eta_high=cut_high;
}
void firstPCA::set_pcabinning(int bin1,int bin2)
{
m_nbins1=bin1;
m_nbins2=bin2;
}
void firstPCA::run()
{
cout<<endl;
cout<<"****************"<<endl;
cout<<" 1st PCA"<<endl;
cout<<"****************"<<endl;
cout<<endl;
cout<<"Now running firstPCA with the following parameters:"<<endl;
cout<<" Energy deposit cut: "<<m_edepositcut<<endl;
cout<<" PCA binning: 1st compo "<<m_nbins1<<", 2nd compo "<<m_nbins2<<endl;
cout<<" Number of bins in the cumulative histograms "<<m_numberfinebins<<endl;
cout<<" Eta cut: "<<m_cut_eta_low<<" "<<m_cut_eta_high<<endl;
cout<<" Apply eta cut: "<<m_apply_etacut<<endl;
cout<<endl;
if(m_debuglevel) cout<<"initialize input tree reader"<<endl;
TreeReader* read_inputTree = new TreeReader();
read_inputTree->SetTree(m_chain);
vector<int> layerNr;
cout<<"--- Get relevant layers and Input Histograms"<<endl;
vector<TH1D*> histos_data=get_relevantlayers_inputs(layerNr,read_inputTree);
vector<string> layer;
for(unsigned int l=0;l<layerNr.size();l++)
layer.push_back(Form("layer%i",layerNr[l]));
layer.push_back("totalE");
vector<TH1D*> cumul_data =get_cumul_histos(layer, histos_data);
cout<<"--- Now define the TPrincipal"<<endl;
TPrincipal* principal = new TPrincipal(layer.size(),"ND"); //ND means normalize cov matrix and store data
TTree *T_Gauss = new TTree("T_Gauss","T_Gauss");
double* data_Gauss = new double[layer.size()];
int eventNumber;
for(unsigned int l=0;l<layer.size();l++)
{
T_Gauss->Branch(Form("data_Gauss_%s",layer[l].c_str()),&data_Gauss[l],Form("data_Gauss_%s/D",layer[l].c_str()));
T_Gauss->Branch("eventNumber",&eventNumber,"eventNumber/I");
}
cout<<"--- Uniformization/Gaussianization"<<endl;
for(int event=0;event<read_inputTree->GetEntries();event++)
{
read_inputTree->GetEntry(event);
eventNumber=event;
double E = read_inputTree->GetVariable("TruthE");
double px = read_inputTree->GetVariable("TruthPx");
double py = read_inputTree->GetVariable("TruthPy");
double pz = read_inputTree->GetVariable("TruthPz");
TLorentzVector tlv; tlv.SetPxPyPzE(px,py,pz,E);
bool pass_eta=0;
if(!m_apply_etacut) pass_eta=1;
if(m_apply_etacut) pass_eta=(fabs(tlv.Eta())>m_cut_eta_low && fabs(tlv.Eta())<m_cut_eta_high);
if(pass_eta)
{
double total_e=read_inputTree->GetVariable("total_cell_energy");
for(unsigned int l=0;l<layer.size();l++)
{
double data = 0.;
//double data_uniform;
if(l==layer.size()-1)
data = total_e;
else
data = read_inputTree->GetVariable(Form("cell_energy[%d]",layerNr[l]))/total_e;
//Uniformization
double cumulant = get_cumulant(data,cumul_data[l]);
cumulant = TMath::Min(cumulant,1.-10e-10);
cumulant = TMath::Max(cumulant,0.+10e-10);
//data_uniform = cumulant;
//Gaussianization
double maxErfInvArgRange = 0.99999999;
double arg = 2.0*cumulant - 1.0;
arg = TMath::Min(+maxErfInvArgRange,arg);
arg = TMath::Max(-maxErfInvArgRange,arg);
data_Gauss[l] = 1.414213562*TMath::ErfInverse(arg);
} //for layers
//Add this datapoint to the PCA
principal->AddRow(data_Gauss);
T_Gauss->Fill();
} //pass eta
if(event%2000==0) cout<<event<<" from "<<read_inputTree->GetEntries()<<" done "<<endl;
} //event loop
cout<<"--- MakePrincipals()"<<endl;
principal->MakePrincipals();
cout<<"--- PCA Results"<<endl;
principal->Print("MSE");
cout<<"--- PCA application and binning of first principal component"<<endl;
TreeReader* tree_Gauss = new TreeReader();
tree_Gauss->SetTree(T_Gauss);
int Bin_1stPC1=0;
int Bin_1stPC2=0;
TH1D* hPCA_first_component = new TH1D("hPCA_first_component","hPCA_first_component",10000,-20,20);
for(int event=0;event<tree_Gauss->GetEntries();event++)
{
tree_Gauss->GetEntry(event);
double* data_PCA = new double[layer.size()];
double* input_data = new double[layer.size()];
for (unsigned int l=0;l<layer.size(); l++)
input_data[l] = tree_Gauss->GetVariable(Form("data_Gauss_%s",layer[l].c_str()));
principal->X2P(input_data,data_PCA);
hPCA_first_component->Fill(data_PCA[0]);
}
cout<<"--- Binning 1st Principal Component" <<endl;
double xq[m_nbins1]; double yq[m_nbins1];
//2D binning:
quantiles( hPCA_first_component, m_nbins1, xq , yq );
for (int m = 0; m < m_nbins1 ; m++)
{
int a=1;
if(m>0) a=hPCA_first_component->FindBin(yq[m-1]);
int b=hPCA_first_component->FindBin(yq[m]);
cout<<"Quantiles "<<m+1<<" "<<xq[m]<<" "<<yq[m]<<" -> Events "<<hPCA_first_component->Integral(a,b-1)<<endl;
}
std::vector<TH1D*> h_compo1;
for(int m=0;m<m_nbins1;m++)
h_compo1.push_back(new TH1D(Form("h_compo1_bin%i",m),Form("h_compo1_bin%i",m),20000,-20,20));
cout<<"--- PCA application and binning 2nd principal component"<<endl;
for(int event=0;event<tree_Gauss->GetEntries();event++)
{
tree_Gauss->GetEntry(event);
double* data_PCA = new double[layer.size()];
double* input_data = new double[layer.size()];
for (unsigned int l=0;l<layer.size();l++)
input_data[l] = tree_Gauss->GetVariable(Form("data_Gauss_%s",layer[l].c_str()));
principal->X2P(input_data,data_PCA);
int firstbin=-42;
//Binning 1st PC
for(int m = 0 ; m < m_nbins1 ; m++)
{
if(m==0 && data_PCA[0] <= yq[0])
firstbin = 0;
if(m > 0 && data_PCA[0] > yq[m-1] && data_PCA[0] <= yq[m])
firstbin = m;
}
if (firstbin >= 0) h_compo1[firstbin]->Fill(data_PCA[1]);
}
// non-standard
//double yq2d[m_nbins1][m_nbins2];
std::vector<std::vector<double> > yq2d (m_nbins1, std::vector<double>(m_nbins2));
for(int m=0;m<m_nbins1;m++)
{
if(m_debuglevel) cout<<"now do m "<<m<<endl;
double xq2[m_nbins2]; double yq2[m_nbins2];
quantiles( h_compo1[m], m_nbins2, xq2 , yq2);
if(m_debuglevel) {
cout<<"1stPCA bin# "<<m<<" Events "<<h_compo1[m]->Integral()<<endl;
}
for (int u = 0; u < m_nbins2 ; u++)
{
int a=0;
if(u>0) a=h_compo1[m]->FindBin(yq2[u-1]);
int b=h_compo1[m]->FindBin(yq2[u]);
cout<<"Quantile # "<<u<<" "<<xq2[u]<<" "<<yq2[u]<<" -> Events "<<h_compo1[m]->Integral(a,b-1)<<endl;
}
for(int u=0;u<m_nbins2;u++)
yq2d[m][u]=yq2[u];
}
// cleanup
for (auto it = h_compo1.begin(); it != h_compo1.end(); ++it)
delete *it;
h_compo1.clear();
cout<<"--- Fill a tree that has the bin information"<<endl;
int firstPCAbin;
double* data = new double[layer.size()];
TTree* tree_1stPCA=new TTree(Form("tree_1stPCA"),Form("tree_1stPCA"));
tree_1stPCA->Branch("firstPCAbin",&firstPCAbin,"firstPCAbin/I");
for(unsigned int l=0;l<layer.size();l++)
tree_1stPCA->Branch(Form("energy_%s",layer[l].c_str()),&data[l],Form("energy_%s/D",layer[l].c_str()));
for(int event=0;event<tree_Gauss->GetEntries();event++)
{
tree_Gauss->GetEntry(event);
double* data_PCA = new double[layer.size()];
double* input_data = new double[layer.size()];
int eventNumber=tree_Gauss->GetVariable("eventNumber");
for(unsigned int l=0; l<layer.size();l++)
input_data[l] = tree_Gauss->GetVariable(Form("data_Gauss_%s",layer[l].c_str()));
//PCA Application
principal->X2P(input_data,data_PCA);
//Binning 1st and 2nd PC
for(int m = 0 ; m < m_nbins1 ; m++)
{
if(m==0 && data_PCA[0]<=yq[m])
{
Bin_1stPC1 = 0;
for(int u=0;u<m_nbins2;u++)
{
if(u==0 && data_PCA[1]<=yq2d[0][u])
Bin_1stPC2 = 0;
if(u>0 && data_PCA[1]>yq2d[0][u-1] && data_PCA[1]<=yq2d[0][u])
Bin_1stPC2 = u;
}
}
if(m>0 && data_PCA[0]>yq[m-1] && data_PCA[0]<=yq[m])
{
Bin_1stPC1 = m;
for(int u=0;u<m_nbins2;u++)
{
if(u==0 && data_PCA[1]<=yq2d[m][u])
Bin_1stPC2 = 0;
if(u>0 && data_PCA[1]>yq2d[m][u-1] && data_PCA[1]<=yq2d[m][u])
Bin_1stPC2 = u;
}
}
}
firstPCAbin=Bin_1stPC1+m_nbins1*Bin_1stPC2+1;
//find the energy fractions and total energy for that given event
read_inputTree->GetEntry(eventNumber);
for(unsigned int l=0;l<layer.size();l++)
{
if(l==layer.size()-1)
data[l] = read_inputTree->GetVariable("total_cell_energy");
else
data[l] = read_inputTree->GetVariable(Form("cell_energy[%d]",layerNr[l]))/read_inputTree->GetVariable("total_cell_energy");
}
tree_1stPCA->Fill();
} //for events in gauss
//add a histogram that holds the relevant layer:
int totalbins=m_nbins1*m_nbins2;
TH2I* h_layer=new TH2I("h_layer","h_layer",totalbins,0.5,totalbins+0.5,25,-0.5,24.5);
h_layer->GetXaxis()->SetTitle("PCA bin");
h_layer->GetYaxis()->SetTitle("Layer");
for(int b=0;b<totalbins;b++)
{
for(int l=0;l<25;l++)
{
int is_relevant=0;
for(unsigned int i=0;i<layerNr.size();i++)
{
if(l==layerNr[i]) is_relevant=1;
}
h_layer->SetBinContent(b+1,l+1,is_relevant);
}
}
TFile* output=new TFile(m_outfilename.c_str(),"RECREATE");
output->Add(h_layer);
output->Add(tree_1stPCA);
output->Write();
cout<<"1st PCA is done. Output file: "<<m_outfilename<<endl;
// cleanup
delete read_inputTree;
delete tree_Gauss;
} // run
vector<TH1D*> firstPCA::get_cumul_histos(vector<string> layer, vector<TH1D*> histos)
{
vector<TH1D*> cumul;
for(unsigned int i=0;i<histos.size();i++)
{
TH1D* h_cumul=(TH1D*)histos[i]->Clone(Form("h_cumul_%s",layer[i].c_str()));
for (int b=1; b<=h_cumul->GetNbinsX(); b++)
{
h_cumul->SetBinContent(b,histos[i]->Integral(1,b));
h_cumul->SetBinError(b,0);
}
cumul.push_back(h_cumul);
}
return cumul;
}
vector<TH1D*> firstPCA::get_relevantlayers_inputs(vector<int> &layerNr, TreeReader* read_inputTree)
{
int NLAYER=25;
vector<TH1D*> data;
vector<double> MaxInputs;
vector<double> MinInputs;
double max_e=0.0;
double min_e=100000000;
vector<double> sum_efraction;
for(int l=0;l<NLAYER;l++)
{
sum_efraction.push_back(0.0);
MaxInputs.push_back(0.0);
MinInputs.push_back(1000000.0);
}
int N_pass_eta=0;
for(int event=0;event<read_inputTree->GetEntries();event++ )
{
read_inputTree->GetEntry(event);
double E = read_inputTree->GetVariable("TruthE");
double px = read_inputTree->GetVariable("TruthPx");
double py = read_inputTree->GetVariable("TruthPy");
double pz = read_inputTree->GetVariable("TruthPz");
TLorentzVector tlv; tlv.SetPxPyPzE(px,py,pz,E);
bool pass_eta=0;
if(!m_apply_etacut) pass_eta=1;
if(m_apply_etacut) pass_eta=(fabs(tlv.Eta())>m_cut_eta_low && fabs(tlv.Eta())<m_cut_eta_high);
if(pass_eta)
{
N_pass_eta++;
double total_e=read_inputTree->GetVariable("total_cell_energy");
for(int l=0;l<NLAYER;l++)
{
double efraction = read_inputTree->GetVariable(Form("cell_energy[%d]",l))/total_e;
sum_efraction[l] += efraction;
if(efraction>MaxInputs[l]) MaxInputs[l]=efraction;
if(efraction<MinInputs[l]) MinInputs[l]=efraction;
if(total_e>max_e) max_e=total_e;
if(total_e<min_e) min_e=total_e;
}
}
if(event%2000==0) cout<<event<<" from "<<read_inputTree->GetEntries()<<" done"<<endl;
}
cout<<"rel. layer"<<endl;
for(int l=0;l<NLAYER;l++)
{
if(N_pass_eta>0)
{
if(sum_efraction[l]/N_pass_eta>=m_edepositcut)
{
layerNr.push_back(l);
cout<<"Layer " <<l <<" is relevant! sum_efraction= "<<sum_efraction[l]<<" sum/entries= "<<sum_efraction[l]/N_pass_eta<<endl;
}
}
}
for(unsigned int k=0;k<layerNr.size();k++)
cout<<"Relevant "<<layerNr[k]<<endl;
cout<<"init data histos"<<endl;
for(int l=0;l<NLAYER;l++)
{
int is_rel=0;
for(unsigned int k=0;k<layerNr.size();k++)
{
if(l==layerNr[k])
is_rel=1;
}
if(is_rel)
{
TH1D* h_data=new TH1D(Form("h_data_layer%i",l),Form("h_data_layer%i",l),m_numberfinebins,MinInputs[l],MaxInputs[l]);
h_data->Sumw2();
data.push_back(h_data);
}
}
TH1D* h_data_total=new TH1D("h_data_totalE","h_data_totalE",m_numberfinebins,min_e,max_e);
data.push_back(h_data_total);
cout<<"fill data histos"<<endl;
for(int event=0;event<read_inputTree->GetEntries();event++)
{
read_inputTree->GetEntry(event);
TLorentzVector tlv; tlv.SetPxPyPzE(read_inputTree->GetVariable("TruthPx"),read_inputTree->GetVariable("TruthPy"),read_inputTree->GetVariable("TruthPz"),read_inputTree->GetVariable("TruthE"));
bool pass_eta=0;
if(!m_apply_etacut) pass_eta=1;
if(m_apply_etacut) pass_eta=(fabs(tlv.Eta())>m_cut_eta_low && fabs(tlv.Eta())<m_cut_eta_high);
if(pass_eta)
{
double total_e=read_inputTree->GetVariable("total_cell_energy");
((TH1D*)data[data.size()-1])->Fill(total_e);
for(unsigned int l=0;l<layerNr.size();l++)
{
((TH1D*)data[l])->Fill(read_inputTree->GetVariable(Form("cell_energy[%d]",layerNr[l]))/total_e);
}
}
if(event%2000==0) cout<<event<<" from "<<read_inputTree->GetEntries()<<" done"<<endl;
}
for(unsigned int l=0;l<data.size();l++)
{
((TH1D*)data[l])->Scale(1.0/data[l]->Integral());
}
return data;
}
double firstPCA::get_cumulant(double x, TH1D* h)
{
//Cumulant " la TMVA"
int nbin = h->GetNbinsX();
int bin = h->FindBin(x);
bin = TMath::Max(bin,1);
bin = TMath::Min(bin,h->GetNbinsX());
double AvNuEvPerBin;
double Tampon = 0 ;
for (int i=1; i<=nbin; i++) {
Tampon += h->GetBinContent(i);
}
AvNuEvPerBin = Tampon/nbin;
double cumulant;
double x0, x1, y0, y1;
double total = h->GetNbinsX()*AvNuEvPerBin;
double supmin = 0.5/total;
x0 = h->GetBinLowEdge(TMath::Max(bin,1));
x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
//Zero bin
if (bin == 0) {
y0 = supmin;
y1 = supmin;
}
if (bin == 1) {
y0 = supmin;
}
if (bin > h->GetNbinsX()) {
y0 = 1.-supmin;
y1 = 1.-supmin;
}
if (bin == h->GetNbinsX()) {
y1 = 1.-supmin;
}
////////////////////////
if (x0 == x1) {
cumulant = y1;
} else {
cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
}
if (x <= h->GetBinLowEdge(1)){
cumulant = supmin;
}
if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
cumulant = 1-supmin;
}
return cumulant;
}
void firstPCA::quantiles(TH1D* h, int nq, double* xq, double* yq)
{
//Function for quantiles
// h Input histo
// nq number of quantiles
// xq position where to compute the quantiles in [0,1]
// yq array to contain the quantiles
for (int i=0;i<nq;i++)
{
xq[i] = double(i+1)/nq;
h->GetQuantiles(nq,yq,xq);
}
}
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#ifndef firstPCA_h
#define firstPCA_h
#include "TChain.h"
#include "ISF_FastCaloSimParametrization/TreeReader.h"
using namespace std;
class firstPCA
{
public:
firstPCA(TChain*, string);
firstPCA();
virtual ~firstPCA() {}
void run();
vector<TH1D*> get_relevantlayers_inputs(vector<int> &, TreeReader*);
vector<TH1D*> get_cumul_histos(vector<string> layer, vector<TH1D*>);
static double get_cumulant(double x, TH1D* h);
void quantiles(TH1D* h, int nq, double* xq, double* yq);
void set_cumulativehistobins(int);
void set_edepositcut(double);
void set_etacut(double,double);
void apply_etacut(int);
void set_pcabinning(int,int);
private:
int m_debuglevel;
double m_cut_eta_low;
double m_cut_eta_high;
int m_apply_etacut;
int m_nbins1;
int m_nbins2;
int m_numberfinebins;
double m_edepositcut;
string m_outfilename;
TChain* m_chain;
ClassDef(firstPCA,1);
};
#if defined(__MAKECINT__)
#pragma link C++ class firstPCA+;
#endif
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#include "TMatrixF.h"
#include "TMatrixD.h"
#include "TMatrixDSym.h"
#include "TMatrixDSymEigen.h"
#include "TVectorF.h"
#include "TH1D.h"
#include "TFile.h"
#include "TROOT.h"
#include "TSystem.h"
#include "TApplication.h"
#include "TTree.h"
#include "TSystem.h"
#include "TH2D.h"
#include "TPrincipal.h"
#include "TMath.h"
#include "TBrowser.h"
#include "secondPCA.h"
#include "firstPCA.h"
#include "TFCSFunction.h"
#include "ISF_FastCaloSimEvent/TFCS1DFunction.h"
#include "ISF_FastCaloSimParametrization/TreeReader.h"
#include "ISF_FastCaloSimEvent/IntArray.h"
#include <iostream>
#include <sstream>
using namespace std;
secondPCA::secondPCA(string firstpcafilename, string outfilename)
{
m_firstpcafilename=firstpcafilename;
m_outfilename=outfilename;
m_numberfinebins =5000;
m_storeDetails =0;
m_PCAbin =-1; //-1 means all bins
m_skip_regression =0;
m_neurons_start =2;
m_neurons_end =8;
m_ntoys =1000;
m_maxdev_regression=5;
m_maxdev_smartrebin=5;
}
void secondPCA::set_cut_maxdeviation_regression(double val)
{
m_maxdev_regression=val;
}
void secondPCA::set_cut_maxdeviation_smartrebin(double val)
{
m_maxdev_smartrebin=val;
}
void secondPCA::set_Ntoys(int val)
{
m_ntoys=val;
}
void secondPCA::set_neurons_iteration(int start,int end)
{
m_neurons_start=start;
m_neurons_end=end;
}
void secondPCA::set_storeDetails(int flag)
{
m_storeDetails=flag;
}
void secondPCA::set_cumulativehistobins(int bins)
{
m_numberfinebins=bins;
}
void secondPCA::set_PCAbin(int bin)
{
m_PCAbin=bin;
}
void secondPCA::set_skip_regression(int flag)
{
m_skip_regression=flag;
}
void secondPCA::run()
{
//Open inputfile:
TFile* inputfile=TFile::Open(m_firstpcafilename.c_str(),"READ");
if(!inputfile) {
cout<<"ERROR: Inputfile could not be opened"<<endl;
// I don't think we can continue...
return;
}
int nbins; int nbins0=1;
vector<int> layerNr=getLayerBins(inputfile, nbins);
//if a specific PCA bin was set,check if this is available, and set nbins to that
if(m_PCAbin>0)
{
if(m_PCAbin<=nbins)
{
nbins =m_PCAbin;
nbins0=m_PCAbin;
string binlabel=Form("_bin%i",m_PCAbin);
m_outfilename=m_outfilename+binlabel;
}
else cout<<"ERROR: The PCA bin you set is not available"<<endl;
}
vector<string> layer;
for(unsigned int l=0;l<layerNr.size();l++)
layer.push_back(Form("layer%i",layerNr[l]));
layer.push_back("totalE");
int* samplings=new int[layerNr.size()];
for(unsigned int i=0;i<layerNr.size();i++)
samplings[i]=layerNr[i];
cout<<endl;
cout<<"****************"<<endl;
cout<<" 2nd PCA"<<endl;
cout<<"****************"<<endl;
cout<<endl;
cout<<"Now running 2nd PCA with the following parameters:"<<endl;
cout<<" Input file (1st PCA): "<<m_firstpcafilename<<endl;
cout<<" Number of bins of the cumulative histos: "<<m_numberfinebins<<endl;
cout<<" storeDetails: "<<m_storeDetails<<endl;
cout<<" PCA bin number(s): ";
for(int b=nbins0;b<=nbins;b++) cout<<b<<" "; cout<<endl;
cout<<" skip regression: "<<m_skip_regression<<endl;
cout<<" Regression test toys:" <<m_ntoys<<endl;
cout<<" Neurons in the regression iteration:" <<m_neurons_start<<" - "<<m_neurons_end<<endl;
cout<<" Maximal deviation of approximated histogram (regression): "<<m_maxdev_regression<<"%"<<endl;
cout<<" Maximal deviation of approximated histogram (smart-rebin): "<<m_maxdev_smartrebin<<"%"<<endl;
cout<<endl;
cout<<"--- Init the TreeReader"<<endl;
TTree* InputTree = (TTree*)inputfile->Get("tree_1stPCA");
TreeReader* read_inputTree = new TreeReader();
read_inputTree->SetTree(InputTree);
TFile* output=new TFile(m_outfilename.c_str(),"RECREATE");
for(int b=nbins0;b<=nbins;b++)
{
output->mkdir(Form("bin%i",b));
output->mkdir(Form("bin%i/pca",b));
for(unsigned int l=0;l<layer.size();l++)
output->mkdir(Form("bin%i/%s",b,layer[l].c_str()));
}
output->Write();
output->Close();
for(int b=nbins0;b<=nbins;b++)
{
cout<<"--- now performing 2nd PCA in bin "<<b<<endl;
do_pca(layer, b, read_inputTree, samplings);
}
cout<<"2nd PCA is done. Output: "<<m_outfilename<<endl;
// cleanup
delete read_inputTree;
delete[] samplings;
}
void secondPCA::do_pca(vector<string> layer, int bin, TreeReader* read_inputTree, int* samplings)
{
//make a tree that holds only the events for that
TTree* bintree=new TTree("bintree","bintree");
double* data=new double[layer.size()];
for(unsigned int l=0;l<layer.size();l++)
bintree->Branch(Form("energy_%s",layer[l].c_str()),&data[l],Form("energy_%s/D",layer[l].c_str()));
for(int event=0;event<read_inputTree->GetEntries();event++)
{
read_inputTree->GetEntry(event);
int firstPCAbin=read_inputTree->GetVariable("firstPCAbin");
if(firstPCAbin==bin)
{
for(unsigned int l=0;l<layer.size();l++)
data[l]=read_inputTree->GetVariable(Form("energy_%s",layer[l].c_str()));
bintree->Fill();
}
}
//initialize the reader for this bintree
TreeReader* read_bintree = new TreeReader();
read_bintree->SetTree(bintree);
vector<TH1D*> histos_data=get_histos_data(layer, read_bintree);
vector<TH1D*> cumul_data =get_cumul_histos(layer, histos_data);
TPrincipal* principal = new TPrincipal(layer.size(),"ND"); //ND means normalize cov matrix and store data
TTree* T_Gauss=new TTree("T_Gauss","T_Gauss");
double* data_Gauss=new double[layer.size()];
for(unsigned int l=0;l<layer.size();l++)
T_Gauss->Branch(Form("energy_gauss_%s",layer[l].c_str()),&data_Gauss[l],Form("energy_gauss_%s/D",layer[l].c_str()));
for(int event=0;event<read_bintree->GetEntries();event++)
{
read_bintree->GetEntry(event);
for(unsigned int l=0;l<layer.size();l++)
{
double data=read_bintree->GetVariable(Form("energy_%s",layer[l].c_str()));
double cumulant = get_cumulant(data,cumul_data[l]);
cumulant = TMath::Min(cumulant,1.-10e-10);
cumulant = TMath::Max(cumulant,0.+10e-10);
//Gaussianization
double maxErfInvArgRange = 0.99999999;
double arg = 2.0*cumulant - 1.0;
arg = TMath::Min(+maxErfInvArgRange,arg);
arg = TMath::Max(-maxErfInvArgRange,arg);
data_Gauss[l] = 1.414213562*TMath::ErfInverse(arg);
}
principal->AddRow(data_Gauss);
T_Gauss->Fill();
} //event loop
principal->MakePrincipals();
cout<<std::endl <<"- Principal Component Analysis Results in bin "<<bin<<endl;
principal->Print("MSE");
cout<<"--- Application to get Mean and RMS of the PCA transformed data"<<endl;
TreeReader* reader_treeGauss = new TreeReader();
cout<<"check1"<<endl;
reader_treeGauss->SetTree(T_Gauss);
vector<double> data_PCA_min; vector<double> data_PCA_max;
for(unsigned int l=0;l<layer.size();l++)
{
data_PCA_min.push_back(100000.0);
data_PCA_max.push_back(-100000.0);
}
for(int event=0;event<reader_treeGauss->GetEntries();event++)
{
reader_treeGauss->GetEntry(event);
double input_data[layer.size()];
double data_PCA[layer.size()];
for(unsigned int l=0;l<layer.size();l++)
input_data[l]=reader_treeGauss->GetVariable(Form("energy_gauss_%s",layer[l].c_str()));
principal->X2P(input_data,data_PCA);
for(unsigned int l=0;l<layer.size();l++)
{
if(data_PCA[l]>data_PCA_max[l]) data_PCA_max[l]=data_PCA[l];
if(data_PCA[l]<data_PCA_min[l]) data_PCA_min[l]=data_PCA[l];
}
}
//fill histograms
std::vector<TH1D*> h_data_PCA;
for(unsigned int l=0;l<layer.size();l++)
{
h_data_PCA.push_back(new TH1D(Form("h_data_PCA_%s",layer[l].c_str()),Form("h_data_PCA_%s",layer[l].c_str()),1000,data_PCA_min[l],data_PCA_max[l]));
}
for(int event=0;event<reader_treeGauss->GetEntries();event++)
{
reader_treeGauss->GetEntry(event);
double input_data[layer.size()];
double data_PCA[layer.size()];
for(unsigned int l=0;l<layer.size();l++)
input_data[l]=reader_treeGauss->GetVariable(Form("energy_gauss_%s",layer[l].c_str()));
principal->X2P(input_data,data_PCA);
for(unsigned int l=0;l<layer.size();l++)
h_data_PCA[l]->Fill(data_PCA[l]);
}
double* gauss_means=new double[layer.size()];
double* gauss_rms=new double[layer.size()];
for(unsigned int l=0;l<layer.size();l++)
{
gauss_means[l]=h_data_PCA[l]->GetMean();
gauss_rms[l]=h_data_PCA[l]->GetRMS();
}
if(m_storeDetails)
{
TFile* output=TFile::Open(m_outfilename.c_str(),"UPDATE");
output->cd(Form("bin%i/",bin));
for(unsigned int l=0;l<layer.size();l++)
{
h_data_PCA[l]->Write(Form("h_PCA_component%i",l));
histos_data[l]->Write(Form("h_input_%s",layer[l].c_str()));
cumul_data[l]->Write(Form("h_cumul_%s",layer[l].c_str()));
}
output->Write();
output->Close();
}
// cleanup
delete bintree;
delete read_bintree;
delete reader_treeGauss;
delete T_Gauss;
delete[] data;
delete[] data_Gauss;
for (auto it = h_data_PCA.begin(); it != h_data_PCA.end(); ++it)
delete *it;
h_data_PCA.clear();
//get the lower ranges and store them:
double* lowerBound=new double[layer.size()];
for(unsigned int l=0;l<layer.size();l++)
{
lowerBound[l]=get_lowerBound(cumul_data[l]);
}
//Save EigenValues/EigenVectors/CovarianceMatrix in the output file
IntArray* myArray=new IntArray((int)(layer.size()-1));
myArray->Set(layer.size()-1,samplings);
TMatrixD* EigenVectors =(TMatrixD*)principal->GetEigenVectors();
TMatrixD* CovarianceMatrix =(TMatrixD*)principal->GetCovarianceMatrix();
TMatrixDSym *symCov=new TMatrixDSym();
symCov->Use(CovarianceMatrix->GetNrows(),CovarianceMatrix->GetMatrixArray()); //symCov to be stored!
TVectorD* MeanValues =(TVectorD*)principal->GetMeanValues();
TVectorD* SigmaValues =(TVectorD*)principal->GetSigmas();
TVectorD* Gauss_means =new TVectorD((int)(layer.size()),gauss_means);
TVectorD* Gauss_rms =new TVectorD((int)(layer.size()),gauss_rms);
TVectorD* LowerBounds =new TVectorD((int)(layer.size()),lowerBound);
TFile* output=TFile::Open(m_outfilename.c_str(),"UPDATE");
output->cd(Form("bin%i/pca",bin));
symCov->Write("symCov");
//EigenVectors->Write("EigenVectors");
MeanValues ->Write("MeanValues");
SigmaValues ->Write("SigmaValues");
Gauss_means ->Write("Gauss_means");
Gauss_rms ->Write("Gauss_rms");
myArray ->Write("RelevantLayers");
LowerBounds ->Write("LowerBounds");
output->Write();
output->Close();
//call the TFCS1DFunction to decide whether or not to use regression:
for(unsigned int l=0;l<layer.size();l++)
{
cout<<endl;
cout<<"====> Now create the fct object for "<<layer[l]<<" <===="<<endl;
cout<<endl;
stringstream ss;
ss << bin;
string binstring = ss.str();
TFCS1DFunction* fct=TFCSFunction::Create(cumul_data[l],m_skip_regression,m_neurons_start,m_neurons_end,m_maxdev_regression,m_maxdev_smartrebin,m_ntoys);
//Store it:
TFile* output=TFile::Open(m_outfilename.c_str(),"UPDATE");
output->cd(Form("bin%i/%s/",bin,layer[l].c_str()));
fct->Write();
output->Write();
output->Close();
}
} //do_pca
double secondPCA::get_lowerBound(TH1D* h_cumulative)
{
/*
double range_low=0;
int bin_start,bin_end;
bin_start=bin_end=-1;
for(int b=1;b<=h_cumulative->GetNbinsX();b++)
{
if(h_cumulative->GetBinContent(b)>0 && bin_start<0)
{
bin_start=b;
range_low=(double)(h_cumulative->GetBinContent(b));
}
if(h_cumulative->GetBinContent(b)>0.9999999 && bin_end<0)
{
bin_end=b;
}
}
return range_low;
*/
return h_cumulative->GetBinContent(1);
}
vector<TH1D*> secondPCA::get_histos_data(vector<string> layer, TreeReader* read_bintree)
{
vector<TH1D*> data;
//get the maxima per layer:
vector<double> MaxInputs;
for(unsigned int l=0;l<layer.size();l++) MaxInputs.push_back(0.0);
vector<double> MinInputs;
for(unsigned int l=0;l<layer.size();l++) MinInputs.push_back(1000000.0);
for(int event=0;event<read_bintree->GetEntries();event++)
{
read_bintree->GetEntry(event);
for(unsigned int l=0;l<layer.size();l++)
{
double val = read_bintree->GetVariable(Form("energy_%s",layer[l].c_str()));
if(val>MaxInputs[l])
MaxInputs[l]=val;
if(val<MinInputs[l])
MinInputs[l]=val;
}
}
for(unsigned int l=0; l<layer.size(); l++)
{
TH1D* h_data;
h_data = new TH1D(Form("h_data_%s",layer[l].c_str()),Form("h_data_%s",layer[l].c_str()),m_numberfinebins,MinInputs[l],MaxInputs[l]);
for(int event=0;event<read_bintree->GetEntries();event++)
{
read_bintree->GetEntry(event);
h_data->Fill(read_bintree->GetVariable(Form("energy_%s",layer[l].c_str())));
}
h_data->Sumw2();
h_data->Scale(1.0/h_data->Integral());
data.push_back(h_data);
} //for layer
return data;
}
vector<int> secondPCA::getLayerBins(TFile* file, int &bins)
{
vector<int> layer;
TH2I* h_layer=(TH2I*)file->Get("h_layer");
//the layers are stored in the y axis
for(int i=1;i<=h_layer->GetNbinsY();i++)
{
if(h_layer->GetBinContent(1,i)==1)
layer.push_back(h_layer->GetYaxis()->GetBinCenter(i));
}
bins=h_layer->GetNbinsX();
return layer;
}
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#ifndef secondPCA_h
#define secondPCA_h
#include "ISF_FastCaloSimParametrization/TreeReader.h"
#include "firstPCA.h"
using namespace std;
class secondPCA: public firstPCA
{
public:
secondPCA(string,string);
//virtual ~secondPCA();
void run();
vector<int> getLayerBins(TFile* file, int &bins);
void do_pca(vector<string>, int, TreeReader*, int*);
vector<TH1D*> get_histos_data(vector<string> layer, TreeReader*);
double get_lowerBound(TH1D* h_cumulative);
void set_cumulativehistobins(int);
void set_storeDetails(int);
void set_skip_regression(int);
void set_PCAbin(int);
void set_cut_maxdeviation_regression(double val);
void set_cut_maxdeviation_smartrebin(double val);
void set_Ntoys(int val);
void set_neurons_iteration(int start,int end);
private:
int m_numberfinebins;
int m_storeDetails;
int m_PCAbin;
int m_skip_regression;
string m_outfilename,m_firstpcafilename;
int m_neurons_start,m_neurons_end,m_ntoys;
double m_maxdev_regression,m_maxdev_smartrebin;
ClassDef(secondPCA,2);
};
#if defined(__MAKECINT__)
#pragma link C++ class secondPCA+;
#endif
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
//This linkdef file gets used by ALCliC only
//see https://root.cern.ch/root/html/guides/users-guide/Cling.html#dictionary-generation
#if defined(__ROOTCLING__)
#pragma link C++ class IntArray+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCS1DFunctionHistogram+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCS1DFunctionRegressionTF+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCS1DFunctionRegression+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCS1DFunction+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCSEnergyParametrization+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCSExtrapolationState+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCSPCAEnergyParametrization+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCSParametrizationBase+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCSParametrization+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCSSimulationState+;
#endif
/*
Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
*/
#if defined(__ROOTCLING__)
#pragma link C++ class TFCSTruthState+;
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment