diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..7a97f194913c9533b9a7b68ad40a7e83f5341e71 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +FROM placeholder-will-be-replaced-with-CI-variable + +ARG ASETUP_VERSION="placeholder-will-be-replaced-with-CI-variable" +ARG DEBIAN_FRONTEND=noninteractive + +COPY . /tmp/source +COPY ./python /opt/fastframes/python +COPY ./python_wrapper /opt/fastframes/python_wrapper + +RUN mkdir -p /opt/fastframes /tmp/build && \ + export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase && \ + source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh --quiet && \ + asetup $ASETUP_VERSION && \ + cmake -S /tmp/source -B /tmp/build -DCMAKE_INSTALL_PREFIX=/opt/fastframes && \ + cd /tmp/build && \ + make -j 4 && \ + make install && \ + cd && \ + rm -rf /tmp && \ + ldd /opt/fastframes/lib/libFastFrames.so + +ENV CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH:+$CMAKE_PREFIX_PATH:}/opt/fastframes" +ENV PATH="/opt/fastframes/bin:/opt/fastframes/python:${PATH}" +ENV LD_LIBRARY_PATH="/opt/fastframes/lib:${LD_LIBRARY_PATH}" +ENV ROOT_INCLUDE_PATH="${ROOT_INCLUDE_PATH:+$ROOT_INCLUDE_PATH:}/opt/fastframes/include" diff --git a/FastFrames/ConfigSetting.h b/FastFrames/ConfigSetting.h index 7e206b01a4dc75c03a9cff920fc9bb5e6c1e65ea..d963117bec8486b045707e54ac06ccf3d0057290 100644 --- a/FastFrames/ConfigSetting.h +++ b/FastFrames/ConfigSetting.h @@ -153,17 +153,17 @@ public: const std::map<std::string, float>& luminosityMap() const {return m_luminosity_map;} /** - * @brief Add x-section file + * @brief Add x-section files for a given campaigns * */ - void addXsectionFile(const std::string& xSectionFile) { m_xSectionFiles.push_back(xSectionFile); }; + void addXsectionFiles(const std::vector<std::string>& xsectionFiles, const std::vector<std::string> &campaigns); /** * @brief Get x-section files * - * @return const std::vector<std::string>& + * @return const std::map<std::vector<std::string>,std::vector<std::string>> & */ - const std::vector<std::string>& xSectionFiles() const {return m_xSectionFiles;}; + const std::map<std::vector<std::string>, std::vector<std::string>>& xSectionFiles() const {return m_mapCampaignsToxSectionFiles;}; /** * @brief Add TlorentzVector to create @@ -497,7 +497,7 @@ private: long long int m_maxEvent = -1; std::map<std::string, float> m_luminosity_map; - std::vector<std::string> m_xSectionFiles ; + std::map<std::vector<std::string>, std::vector<std::string>> m_mapCampaignsToxSectionFiles; std::vector<std::string> m_tLorentzVectors; bool m_useRVec = false; diff --git a/FastFrames/HistoContainer.h b/FastFrames/HistoContainer.h index 89b93fe72e4703efffd6ec8a7f59bcdeadb35b42..c087f01917381d26cf42edd074974c79f504f892 100644 --- a/FastFrames/HistoContainer.h +++ b/FastFrames/HistoContainer.h @@ -9,6 +9,7 @@ #include "TH1D.h" #include "TH2D.h" #include "TH3D.h" +#include "TProfile.h" #include "ROOT/RResultPtr.hxx" @@ -92,8 +93,8 @@ public: /** * @brief Get the unique ptr histogram - * - * @return const std::unique_ptr<TH1D>& + * + * @return const std::unique_ptr<TH1D>& */ inline const std::unique_ptr<TH1D>& histoUniquePtr() const {return m_histoUniquePtr;} @@ -105,10 +106,10 @@ public: void mergeHisto(ROOT::RDF::RResultPtr<TH1D> h); /** - * @brief Copy the RResultsPtr to the unique ptr - * - * @param h - * @return * void + * @brief Copy the RResultsPtr to the unique ptr + * + * @param h + * @return * void */ void copyHisto(ROOT::RDF::RResultPtr<TH1D> h); @@ -158,7 +159,7 @@ public: * @brief Deleted assignment operator * * @param other - * @return VariableHisto& + * @return VariableHisto2D& */ VariableHisto2D& operator =(const VariableHisto2D& other) = delete; @@ -166,7 +167,7 @@ public: * @brief Default forwarding operator * * @param other - * @return VariableHisto& + * @return VariableHisto2D& */ VariableHisto2D& operator =(VariableHisto2D&& other) = default; @@ -189,14 +190,14 @@ public: /** * @brief Get the histogram * - * @return ROOT::RDF::RResultPtr<TH1D> + * @return ROOT::RDF::RResultPtr<TH2D> */ inline ROOT::RDF::RResultPtr<TH2D> histo() const {return m_histo;} /** * @brief Get the unique ptr histogram - * - * @return const std::unique_ptr<TH2D>& + * + * @return const std::unique_ptr<TH2D>& */ inline const std::unique_ptr<TH2D>& histoUniquePtr() const {return m_histoUniquePtr;} @@ -208,9 +209,9 @@ public: void mergeHisto(ROOT::RDF::RResultPtr<TH2D> h); /** - * @brief Copy the RResultsPtr to the unique ptr - * - * @param h + * @brief Copy the RResultsPtr to the unique ptr + * + * @param h */ void copyHisto(ROOT::RDF::RResultPtr<TH2D> h); @@ -220,6 +221,107 @@ private: std::unique_ptr<TH2D> m_histoUniquePtr; }; +/** + * @brief Class to store the TProfile histograms + * + */ +class VariableHistoProfile { +public: + + /** + * @brief Construct a new Variable Histo TProfile object + * + * @param name Name of the combination + */ + explicit VariableHistoProfile(const std::string& name) : + m_name(name) {} + + /** + * @brief Destroy the Variable Histo TProfile object + * + */ + ~VariableHistoProfile() = default; + + /** + * @brief Deleted copy constructor + * + * @param other + */ + VariableHistoProfile(const VariableHistoProfile& other) = delete; + + /** + * @brief Move constructor + * + * @param other + */ + VariableHistoProfile(VariableHistoProfile&& other) = default; + + /** + * @brief Deleted assignment operator + * + * @param other + * @return VariableHistoProfile& + */ + VariableHistoProfile& operator =(const VariableHistoProfile& other) = delete; + + /** + * @brief Default forwarding operator + * + * @param other + * @return VariableHistoProfile& + */ + VariableHistoProfile& operator =(VariableHistoProfile&& other) = default; + + /** + * @brief Get name of the Variable + * + * @return const std::string& + */ + inline const std::string& name() const {return m_name;} + + /** + * @brief Set the histogram from the RDataFrame results object + * This triggers the event loop! + * Need to make a copy as RDF owns the pointer + * + * @param h + */ + void setHisto(ROOT::RDF::RResultPtr<TProfile>& h) {m_histo = std::move(h);} + + /** + * @brief Get the histogram + * + * @return ROOT::RDF::RResultPtr<TProfile> + */ + inline ROOT::RDF::RResultPtr<TProfile> histo() const {return m_histo;} + + /** + * @brief Get the unique ptr histogram + * + * @return const std::unique_ptr<TProfile>& + */ + inline const std::unique_ptr<TProfile>& histoUniquePtr() const {return m_histoUniquePtr;} + + /** + * @brief Merge histograms (add them) + * + * @param h Other histogram + */ + void mergeHisto(ROOT::RDF::RResultPtr<TProfile> h); + + /** + * @brief Copy the RResultsPtr to the unique ptr + * + * @param h + */ + void copyHisto(ROOT::RDF::RResultPtr<TProfile> h); + +private: + std::string m_name; + ROOT::RDF::RResultPtr<TProfile> m_histo; + std::unique_ptr<TProfile> m_histoUniquePtr; +}; + /** * @brief Class that stores 2D histograms for each Variable * @@ -296,8 +398,8 @@ public: /** * @brief Get the unique ptr histogram - * - * @return const std::unique_ptr<TH2D>& + * + * @return const std::unique_ptr<TH2D>& */ inline const std::unique_ptr<TH3D>& histoUniquePtr() const {return m_histoUniquePtr;} @@ -309,12 +411,26 @@ public: void mergeHisto(ROOT::RDF::RResultPtr<TH3D> h); /** - * @brief Copy the RResultsPtr to the unique ptr - * - * @param h + * @brief Copy the RResultsPtr to the unique ptr + * + * @param h */ void copyHisto(ROOT::RDF::RResultPtr<TH3D> h); + /** + * @brief Merge histograms (add them) + * + * @param h Other histogram + */ + void mergeHisto(ROOT::RDF::RResultPtr<TProfile> h); + + /** + * @brief Copy the RResultsPtr to the unique ptr + * + * @param h + */ + void copyHisto(ROOT::RDF::RResultPtr<TProfile> h); + private: std::string m_name; ROOT::RDF::RResultPtr<TH3D> m_histo; @@ -396,6 +512,13 @@ public: */ inline void addVariableHisto3D(VariableHisto3D&& vh) {m_variables3D.emplace_back(std::move(vh));} + /** + * @brief Add TProfile Variable for this region + * + * @param vh + */ + inline void addVariableHistoProfile(VariableHistoProfile&& vh) {m_variablesProfile.emplace_back(std::move(vh));} + /** * @brief Get all variableHisto (const) * @@ -438,11 +561,26 @@ public: */ inline std::vector<VariableHisto3D>& variableHistos3D() {return m_variables3D;} + /** + * @brief Get all variableHistoProfile (const) + * + * @return const std::vector<VariableHistoProfile>& + */ + inline const std::vector<VariableHistoProfile>& variableHistosProfile() const {return m_variablesProfile;} + + /** + * @brief Get all variableHistoProfile + * + * @return std::vector<VariableHistoProfile>& + */ + inline std::vector<VariableHistoProfile>& variableHistosProfile() {return m_variablesProfile;} + private: std::string m_name; std::vector<VariableHisto> m_variables; std::vector<VariableHisto2D> m_variables2D; std::vector<VariableHisto3D> m_variables3D; + std::vector<VariableHistoProfile> m_variablesProfile; }; @@ -523,9 +661,9 @@ public: void merge(const SystematicHisto& histo); /** - * @brief Copy the histos without the RResultsPts (allows to free it) - * - * @return SystematicHisto + * @brief Copy the histos without the RResultsPts (allows to free it) + * + * @return SystematicHisto */ SystematicHisto copy() const; diff --git a/FastFrames/MainFrame.h b/FastFrames/MainFrame.h index 42b886b6dba9a4f9c7bafe2808bd56b6a4667111..183d866e3141dbc16292bcce0c5a08f54194ffb5 100644 --- a/FastFrames/MainFrame.h +++ b/FastFrames/MainFrame.h @@ -176,6 +176,7 @@ public: return node; } + /** * @brief A helper method that make systematic copies of a provided nominal column * Name of the new variable has to contain _NOSYS @@ -349,6 +350,15 @@ public: const std::string& newName, const std::string& formula); +protected: + + /** + * @brief Add custom TH1 to be stored in the output. Uses object->GetName() as the output name + * + * @param hist + */ + void addCustomHistogramsToOutput(const TH1& hist); + private: /** @@ -540,6 +550,21 @@ private: const std::shared_ptr<Region>& region, const std::shared_ptr<Systematic>& systematic) const; + /** + * @brief Define TProfile histograms with variables and systematics + * + * @param regionHisto RegionHisto to be filled + * @param node Filtered node + * @param sample Sample + * @param region Region + * @param systematic Systematic + */ + void processHistogramsProfile(RegionHisto* regionHisto, + const ROOT::RDF::RNode& node, + const std::shared_ptr<Sample>& sample, + const std::shared_ptr<Region>& region, + const std::shared_ptr<Systematic>& systematic) const; + /** * @brief Define 2D histograms with variables and systematics for unfolding * @@ -550,7 +575,7 @@ private: * @param systematic Systematic */ void processRecoVsTruthHistograms2D(RegionHisto* regionHisto, - ROOT::RDF::RNode& node, + ROOT::RDF::RNode node, const std::shared_ptr<Sample>& sample, const std::shared_ptr<Region>& region, const std::shared_ptr<Systematic>& systematic); @@ -615,7 +640,7 @@ private: std::vector<std::string> automaticSystematicNames(const std::vector<std::string>& filePath) const; /** - * @brief Connect truth trees to the reco tree + * @brief Connect truth trees to the reco tree also add the truth branch names to the SystematicReplacer tracking * * @param chain The reco chain * @param sample Current Sample @@ -624,7 +649,7 @@ private: */ std::vector<std::pair<std::unique_ptr<TChain>, std::unique_ptr<TTreeIndex> > > connectTruthTrees(std::unique_ptr<TChain>& chain, const std::shared_ptr<Sample>& sample, - const std::vector<std::string>& filePaths) const; + const std::vector<std::string>& filePaths); /** * @brief Process truth histograms @@ -706,6 +731,20 @@ private: const Variable& variable2, const std::shared_ptr<Systematic>& systematic) const; + /** + * @brief Book TProfile histograms using the JIT compiler + * + * @param node + * @param variable1 + * @param variable2 + * @param systematic + * @return ROOT::RDF::RResultPtr<TProfile> + */ + ROOT::RDF::RResultPtr<TProfile> bookProfilehisto(ROOT::RDF::RNode node, + const Variable& variable1, + const Variable& variable2, + const std::shared_ptr<Systematic>& systematic) const; + /** * @brief Book 3D histograms using the JIT compiler * @@ -833,6 +872,13 @@ private: */ void prepareONNXwrapper(); + /** + * @brief Write the custom histograms to the TFile + * + * @param file + */ + void writeCustomHistograms(TFile* file) const; + protected: /** @@ -865,6 +911,12 @@ protected: */ std::map<std::string, std::map<std::string, std::string> > m_variablesWithFormulaTruth; + /** + * @brief List of custom histograms to be stored when running over ntuples + * + */ + std::vector<std::unique_ptr<TH1> > m_customHistograms; + /** * @brief Needed for ROOT to generate the dictionary * diff --git a/FastFrames/MetadataManager.h b/FastFrames/MetadataManager.h index 5ffb0614ee596ad7c091b53f99de960d77f9357b..ca27ecf6290945b5ae08e5b4a262ad3a7af205b8 100644 --- a/FastFrames/MetadataManager.h +++ b/FastFrames/MetadataManager.h @@ -60,7 +60,7 @@ public: * @param xSectionFiles list of the paths to the cross-section files * @param usedDSIDs list of the DSIDs defined in the config */ - void readXSectionFiles(const std::vector<std::string>& xSectionFiles, const std::vector<int>& usedDSIDs); + void readXSectionFiles(const std::map<std::vector<std::string>, std::vector<std::string>>& xSectionFiles, const std::vector<int>& usedDSIDs); /** * @brief Adds luminosity value for a given campaign diff --git a/FastFrames/Region.h b/FastFrames/Region.h index 851b2cada5d94e6ff0df83339e77358a19b6b2b1..071c436d16d3189d76101bfd68535ae9ee08df4f 100644 --- a/FastFrames/Region.h +++ b/FastFrames/Region.h @@ -90,6 +90,14 @@ public: */ void addVariableCombination3D(const std::string& v1, const std::string& v2, const std::string& v3) {m_variableCombinations3D.emplace_back(std::tuple<std::string, std::string, std::string>(v1, v2, v3));} + /** + * @brief Add varaibles for TProfile + * + * @param v1 + * @param v2 + */ + void addVariableForProfile(const std::string& v1, const std::string& v2) {m_variableForProfile.emplace_back(std::make_pair(v1, v2));} + /** * @brief Get variable combinations * @@ -104,6 +112,12 @@ public: */ inline const std::vector<std::tuple<std::string, std::string, std::string>>& variableCombinations3D() const {return m_variableCombinations3D;} + /** + * @brief Get the names of the variables for TProfile + * + * @return const std::vector<std::pair<std::string, std::string > > + */ + inline const std::vector<std::pair<std::string, std::string > >& variablesForProfile() const {return m_variableForProfile;} /** * @brief Retrieve variable by its name @@ -120,4 +134,5 @@ private: std::vector<Variable> m_variables; std::vector<std::pair<std::string, std::string> > m_variableCombinations; std::vector<std::tuple<std::string, std::string, std::string>> m_variableCombinations3D; + std::vector<std::pair<std::string, std::string> > m_variableForProfile; }; diff --git a/FastFrames/StringOperations.h b/FastFrames/StringOperations.h index 5b3b7c7a1943f5efe07d1b8e597ab27302060e00..d16910663ffa9a21db4eeeadfcb9b89f13b2bad2 100644 --- a/FastFrames/StringOperations.h +++ b/FastFrames/StringOperations.h @@ -329,4 +329,12 @@ namespace StringOperations { */ std::string joinStrings(const std::string &separator, const std::vector<std::string> &strings); + + /** + * @brief Get valid C++ variable names from a string + * + * @param std::string - input + * @return std::vector<std::pair<std::string,int>> - vector of all valid C++ variable names in the input string and start position + */ + std::vector<std::pair<std::string, int>> getValidCxxVariableNames(const std::string &input); } \ No newline at end of file diff --git a/FastFrames/SystematicReplacer.h b/FastFrames/SystematicReplacer.h index 8e0868db376d1651935f63b960be5dcb3b84f10a..3855729ac784cc1c504ada6166029e261aca7c8f 100644 --- a/FastFrames/SystematicReplacer.h +++ b/FastFrames/SystematicReplacer.h @@ -149,6 +149,22 @@ public: */ void updateVariableAndEffectiveSystematics(const std::string& variable, const std::vector<std::string>& systematics); + /** + * @brief Add a single systematic to a variable. If the variable is not tracked in the maps, add it. + * If the systematic is already added, will not do anything. + * + * @param variable + * @param systematic + */ + void addSingleSystematic(const std::string& variable, const std::string& systematic); + + /** + * @brief Add branches from the truth tree to the maps if they do contain NOSYS + * + * @param truthTreeName + * @param branches + */ + void addTruthBranchesNominal(const std::string& truthTreeName, const std::vector<std::string>& branches); /** * @brief Print the contents of the maps for systematic matching diff --git a/FastFrames/Utils.h b/FastFrames/Utils.h index f0d1d4b4b7b814b536bdda60fb63317702535c83..d02558a64ec671b2f5717a4352b2cc1e0c78c647 100644 --- a/FastFrames/Utils.h +++ b/FastFrames/Utils.h @@ -50,6 +50,15 @@ namespace Utils { */ ROOT::RDF::TH2DModel histoModel2D(const Variable& v1, const Variable& v2); + /** + * @brief Get TProfile histo model (TProfile) from variables + * + * @param v1 + * @param v2 + * @return ROOT::RDF::TProfile1DModel + */ + ROOT::RDF::TProfile1DModel histoModelProfile(const Variable& v1, const Variable& v2); + /** * @brief Get 3D histo model (TH3D) from variables * @@ -115,6 +124,28 @@ namespace Utils { const std::string& variableName); + /** + * @brief Get the variables that form a 2D histogram + * + * @param sample + * @param regionName + * @param variableName + * @return std::pair<Variable, Variable> + */ + std::pair<Variable, Variable> get2DVariablesByName(const std::shared_ptr<Sample>& sample, + const std::string& regionName, + const std::string& variableName); + + /** + * @brief Get the Variable By Name from a truth object + * + * @param sample + * @param variableName + * @return const Variable& + */ + const Variable& getVariableByNameTruth(const std::shared_ptr<Sample>& sample, + const std::string& variableName); + /** * @brief Compare 2 doubles with a given relative precision * @@ -137,6 +168,16 @@ namespace Utils { const std::vector<std::string>& selected, const std::vector<std::string>& excluded); + /** + * @brief Provide the list of element that were requested but not matched (with regex) + * + * @param all + * @param requested + * @return std::vector<std::string> + */ + std::vector<std::string> requestedNotPresentElements(const std::vector<std::string>& all, + const std::vector<std::string>& requested); + /** * @brief Get the list of defined variables that use a formula and not the column name directly * @@ -172,4 +213,33 @@ namespace Utils { std::vector<std::string> matchingBranchesFromChains(const std::unique_ptr<TChain>& reco, const std::unique_ptr<TChain>& truth, const std::vector<std::string>& toCheck); + + /** + * @brief Split a formula definition to the individual columns + * + * @param formula + * @param truthTreeName + * @param node + * @return std::vector<std::string> + */ + std::vector<std::string> getColumnsFromString(const std::string& formula, + const std::string& truthTreeName, + ROOT::RDF::RNode& node); + + /** + * @brief Add undber/overflow to the last/first bin if requested + * + * @param histo + * @param type + */ + void MergeUnderOverFlow(TH1D* histo, const UnderOverFlowType& type); + + /** + * @brief Add undber/overflow to the last/first bin if requested + * + * @param histo + * @param type1 + * @param type2 + */ + void MergeUnderOverFlow2D(TH2D* histo, const UnderOverFlowType& type1, const UnderOverFlowType& type2); } diff --git a/FastFrames/Variable.h b/FastFrames/Variable.h index b830a855eaca114fba311c5fe224f5dbc5d61645..695326d7ee3a191629b2cc73c6f85877d3410195 100644 --- a/FastFrames/Variable.h +++ b/FastFrames/Variable.h @@ -14,7 +14,7 @@ /** * @brief Type of varaible needed to avoid JITing - * + * */ enum class VariableType { UNDEFINED = 0, @@ -49,11 +49,23 @@ enum class VariableType { RVEC_DOUBLE, }; +/** + * @brief Class for deciding if the under/overflow events should be added to the first/last bin + * + */ +enum class UnderOverFlowType { + NO_UNDER_OVER_FLOW_MERGE, + MERGE_UNDERFLOW, + MERGE_OVERFLOW, + MERGE_BOTH, +}; + /** * @brief Class responsible for the varaible definition * */ class Variable { + public: /** @@ -190,18 +202,32 @@ public: /** * @brief Set the Type object - * - * @param type + * + * @param type */ inline void setType(const VariableType type) {m_type = type;} /** * @brief variable type - * - * @return VariableType + * + * @return VariableType */ inline VariableType type() const {return m_type;} + /** + * @brief Set the Under/Over Flow Type + * + * @param type + */ + inline void setUnderOverFlowType(const UnderOverFlowType& type) {m_underOverFlowType = type;} + + /** + * @brief Get Unde/Overflow type + * + * @return const UnderOverFlowType& + */ + inline const UnderOverFlowType& underOverFlowType() const {return m_underOverFlowType;} + private: std::string m_name; std::string m_definition; @@ -209,4 +235,5 @@ private: Binning m_binning; bool m_isNominalOnly; VariableType m_type; + UnderOverFlowType m_underOverFlowType; }; diff --git a/README.md b/README.md index 0cc1ee4efa4b1b16e159e217f2d0a5a705c8879e..fad5568f7f7db3959fe081b9e70606dbb96632ae 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -# FastFrames +# FastFrames [](https://gitlab.cern.ch/atlas-amglab/fastframes/commits/main) [](https://doi.org/10.5281/zenodo.14773274) + FastFrames is a package aimed at processing ntuples produced by [TopCPToolkit](https://topcptoolkit.docs.cern.ch/), or a similar CP algorithm based framework, into histograms or ntuples. FastFrames rely on ROOT's [RDataFrame](https://root.cern/doc/master/classROOT_1_1RDataFrame.html) to do the event loop processing. @@ -10,4 +11,11 @@ The package documentation, including detailed instruction how to compile and run Doxygen documentation for the code can be found [here](https://atlas-project-topreconstruction.web.cern.ch/fastframesdoxygen/). ## Support -Support mattermost channel is available. Please, first join the Top Analysis team: [link](https://mattermost.web.cern.ch/signup_user_complete/?id=95983da3f25882a52b0e389f0b042150&md=link&sbr=su) and then join the `Fast Frames support channel`. \ No newline at end of file +Support mattermost channel is available. Please, first join the Top Analysis team: [link](https://mattermost.web.cern.ch/signup_user_complete/?id=95983da3f25882a52b0e389f0b042150&md=link&sbr=su) and then join the `Fast Frames support channel`. + +## TRExFitter +FastFrames provide a convenient interface to [TRExFitter](https://gitlab.cern.ch/TRExStats/TRExFitter), a software that can be used for plotting and for running binned-likelihood fits. + +## Citation +A [Zenodo record](https://zenodo.org/records/14774464) of FastFrames is available. This record is automatically updated with every version. +We recommend citing the overall identifier: `DOI 10.5281/zenodo.14773274.` \ No newline at end of file diff --git a/Root/ConfigSetting.cc b/Root/ConfigSetting.cc index f0dd4305bde457c370182ff0de3bc491fdb1bc43..6736a40170d8c7769e1f19f9490bd2d0be03b899 100644 --- a/Root/ConfigSetting.cc +++ b/Root/ConfigSetting.cc @@ -55,7 +55,7 @@ void ConfigSetting::setTestingValues() { sample->addRegion(reg); m_samples.emplace_back(sample); - addXsectionFile("test/data/PMGxsecDB_mc16.txt"); + addXsectionFiles({"test/data/PMGxsecDB_mc16.txt"}, {"mc20e"}); }; void ConfigSetting::addLuminosityInformation(const std::string& campaign, const float luminosity, const bool force) { @@ -76,6 +76,11 @@ float ConfigSetting::getLuminosity(const std::string& campaign) const { return m_luminosity_map.at(campaign); }; +void ConfigSetting::addXsectionFiles(const std::vector<std::string>& xsectionFiles, const std::vector<std::string> &campaigns) { + std::vector<std::string> campaignsSorted = campaigns; + std::sort(campaignsSorted.begin(), campaignsSorted.end()); + m_mapCampaignsToxSectionFiles[campaignsSorted] = xsectionFiles; +}; void ConfigSetting::addRegion(const std::shared_ptr<Region>& region) { LOG(INFO) << "Adding region " << region->name() << "\n"; diff --git a/Root/HistoContainer.cc b/Root/HistoContainer.cc index 7de0b4c1af4e96de00a1ad04398517308ac98066..71ba197c6771c5b067e075d6cece969c377e79e7 100644 --- a/Root/HistoContainer.cc +++ b/Root/HistoContainer.cc @@ -21,7 +21,11 @@ void VariableHisto2D::mergeHisto(ROOT::RDF::RResultPtr<TH2D> h) { void VariableHisto3D::mergeHisto(ROOT::RDF::RResultPtr<TH3D> h) { m_histoUniquePtr->Add(h.GetPtr()); } - + +void VariableHistoProfile::mergeHisto(ROOT::RDF::RResultPtr<TProfile> h) { + m_histoUniquePtr->Add(h.GetPtr()); +} + void VariableHisto::copyHisto(ROOT::RDF::RResultPtr<TH1D> h) { m_histoUniquePtr.reset(static_cast<TH1D*>(h->Clone())); } @@ -34,6 +38,10 @@ void VariableHisto3D::copyHisto(ROOT::RDF::RResultPtr<TH3D> h) { m_histoUniquePtr.reset(static_cast<TH3D*>(h->Clone())); } +void VariableHistoProfile::copyHisto(ROOT::RDF::RResultPtr<TProfile> h) { + m_histoUniquePtr.reset(static_cast<TProfile*>(h->Clone())); +} + void SystematicHisto::merge(const SystematicHisto& other) { if (m_name != other.name()) { LOG(ERROR) << "Something went wrong with the merging of the histograms\n"; @@ -54,6 +62,14 @@ void SystematicHisto::merge(const SystematicHisto& other) { LOG(ERROR) << "Sizes of the 2D variables do not match!\n"; throw std::runtime_error(""); } + if (m_regions.at(ireg).variableHistos3D().size() != other.m_regions.at(ireg).variableHistos3D().size()) { + LOG(ERROR) << "Sizes of the 3D variables do not match!\n"; + throw std::runtime_error(""); + } + if (m_regions.at(ireg).variableHistosProfile().size() != other.m_regions.at(ireg).variableHistosProfile().size()) { + LOG(ERROR) << "Sizes of the TProfile variables do not match!\n"; + throw std::runtime_error(""); + } // merge 1D histos for (std::size_t ivariable = 0; ivariable < m_regions.at(ireg).variableHistos().size(); ++ivariable) { @@ -72,6 +88,12 @@ void SystematicHisto::merge(const SystematicHisto& other) { m_regions.at(ireg).variableHistos3D().at(ivariable3D) .mergeHisto(other.regionHistos().at(ireg).variableHistos3D().at(ivariable3D).histo()); } + + // merge TProfile histos + for (std::size_t ivariableProfile = 0; ivariableProfile < m_regions.at(ireg).variableHistosProfile().size(); ++ivariableProfile) { + m_regions.at(ireg).variableHistosProfile().at(ivariableProfile) + .mergeHisto(other.regionHistos().at(ireg).variableHistosProfile().at(ivariableProfile).histo()); + } } } @@ -91,6 +113,10 @@ SystematicHisto SystematicHisto::copy() const { result.m_regions.back().variableHistos3D().emplace_back(ivariable.name()); result.m_regions.back().variableHistos3D().back().copyHisto(ivariable.histo()); } + for (const auto& ivariable : ireg.variableHistosProfile()) { + result.m_regions.back().variableHistosProfile().emplace_back(ivariable.name()); + result.m_regions.back().variableHistosProfile().back().copyHisto(ivariable.histo()); + } } return result; diff --git a/Root/MainFrame.cc b/Root/MainFrame.cc index 07e4e3266755b305bc22381643697bd041461ff8..1a86b7164a17a4ac3c20c220cc213893ac097d4c 100644 --- a/Root/MainFrame.cc +++ b/Root/MainFrame.cc @@ -66,6 +66,7 @@ void MainFrame::init() { } this->prepareONNXwrapper(); + } void MainFrame::executeHistograms() { @@ -208,6 +209,14 @@ void MainFrame::executeNtuples() { for (const auto& isample : m_config->ntuple()->samples()) { LOG(INFO) << "\n"; LOG(INFO) << "Processing sample: " << sampleN << " out of " << m_config->samples().size() << " samples\n"; + if (!m_config->ntuple()->copyTrees().empty()) { + for (const auto& icopy : m_config->ntuple()->copyTrees()) { + auto itr = std::find_if(isample->truths().begin(), isample->truths().end(), [&icopy](const auto& element){return icopy == element->truthTreeName();}); + if (itr != isample->truths().end()) { + LOG(WARNING) << "You are asking the code to copy a truth tree: " << icopy << " that is also being processed. This might overwrite the results!\n"; + } + } + } std::size_t uniqueSampleN(1); for (const auto& iUniqueSampleID : isample->uniqueSampleIDs()) { LOG(INFO) << "\n"; @@ -245,6 +254,9 @@ std::tuple<std::vector<SystematicHisto>, return std::make_tuple(std::vector<SystematicHisto>{}, std::vector<VariableHisto>{}, std::vector<CutflowContainer>{}, tmp, nullptr, std::move(truthChains)); } + // we could use any file from the list, use the first one + m_systReplacer.readSystematicMapFromFile(selectedFilePaths.at(0), sample->recoTreeName(), sample->systematics()); + std::vector<VariableHisto> truthHistos; if (sample->hasTruth()) { @@ -260,9 +272,6 @@ std::tuple<std::vector<SystematicHisto>, truthChains = this->connectTruthTrees(recoChain, sample, selectedFilePaths); } - // we could use any file from the list, use the first one - m_systReplacer.readSystematicMapFromFile(selectedFilePaths.at(0), sample->recoTreeName(), sample->systematics()); - ROOT::RDataFrame df(*recoChain); ROOT::RDF::RNode mainNode = df; @@ -293,9 +302,6 @@ std::tuple<std::vector<SystematicHisto>, mainNode = this->addCustomDefinesFromConfig(mainNode, sample); } - mainNode = this->prepareWeightMetadata(mainNode, sample, uniqueSampleID); - mainNode = this->addWeightColumns(mainNode, sample, uniqueSampleID); - // add truth variables if matching reco and truth mainNode = this->addTruthVariablesToReco(mainNode, sample, uniqueSampleID); @@ -309,6 +315,9 @@ std::tuple<std::vector<SystematicHisto>, mainNode = this->addVariablesWithFormulaReco(mainNode, sample, {}); } + mainNode = this->prepareWeightMetadata(mainNode, sample, uniqueSampleID); + mainNode = this->addWeightColumns(mainNode, sample, uniqueSampleID); + LOG(DEBUG) << "Finished adding all columns to the reco tree\n"; m_systReplacer.printMaps(); @@ -361,15 +370,15 @@ std::tuple<std::vector<SystematicHisto>, mainNode = this->addCustomDefinesFromConfig(mainNode, sample); } - mainNode = this->prepareWeightMetadataAllUniqueSamples(mainNode, sample); - mainNode = this->addWeightColumns(mainNode, sample, dummy); - // run models from simple_onnx_inference block mainNode = this->scheduleSimpleONNXInference(mainNode); // add columns for variables defined with a formula mainNode = this->addVariablesWithFormulaReco(mainNode, sample, {}); + mainNode = this->prepareWeightMetadataAllUniqueSamples(mainNode, sample); + mainNode = this->addWeightColumns(mainNode, sample, dummy); + LOG(DEBUG) << "Finished adding all columns to the reco tree\n"; m_systReplacer.printMaps(); @@ -400,6 +409,9 @@ void MainFrame::processUniqueSampleNtuple(const std::shared_ptr<Sample>& sample, return; } + // we could use any file from the list, use the first one + m_systReplacer.readSystematicMapFromFile(selectedFilePaths.at(0), sample->recoTreeName(), sample->systematics()); + // we still need to get the list of all systematics to be able to identify the nominal branches if (sample->nominalOnly()) { const auto systematics = this->automaticSystematicNames(selectedFilePaths); @@ -412,8 +424,6 @@ void MainFrame::processUniqueSampleNtuple(const std::shared_ptr<Sample>& sample, if (sample->hasTruth()) { truthChains = this->connectTruthTrees(chain, sample, selectedFilePaths); } - // we could use any file from the list, use the first one - m_systReplacer.readSystematicMapFromFile(selectedFilePaths.at(0), sample->recoTreeName(), sample->systematics()); const bool hasZeroEvents = chain->GetEntries() == 0; if (hasZeroEvents) LOG(WARNING) << "UniqueSampleID: " << id << ", has no events, skipping it\n"; @@ -421,6 +431,7 @@ void MainFrame::processUniqueSampleNtuple(const std::shared_ptr<Sample>& sample, ROOT::RDataFrame df(*chain); ROOT::RDF::RNode mainNode = df; + ROOT::RDF::RNode filteredNode = mainNode; #if ROOT_VERSION_CODE > ROOT_VERSION(6,29,0) ROOT::RDF::Experimental::AddProgressBar(mainNode); #endif @@ -454,9 +465,12 @@ void MainFrame::processUniqueSampleNtuple(const std::shared_ptr<Sample>& sample, // apply filter if (!m_config->ntuple()->selection().empty()) { mainNode = mainNode.Filter(this->systematicOrFilter(sample)); + filteredNode = mainNode; } } + auto filterNodeCount = filteredNode.Count(); + //store the file const std::string folder = m_config->outputPathNtuples().empty() ? "" : m_config->outputPathNtuples() + "/"; std::string suffix(""); @@ -466,6 +480,17 @@ void MainFrame::processUniqueSampleNtuple(const std::shared_ptr<Sample>& sample, const std::string fileName = folder + sample->name() + "_" + std::to_string(id.dsid())+"_" + id.campaign() + "_"+id.simulation() + suffix + ".root"; const bool nominalOnly = sample->nominalOnly(); const std::vector<std::string> allBranches = mainNode.GetColumnNames(); + + std::vector<std::string> notSelected = Utils::requestedNotPresentElements(nominalOnly ? m_systReplacer.nominalBranches(allBranches) : allBranches, + m_config->ntuple()->branches()); + + if (!notSelected.empty()) { + LOG(WARNING) << "Some branches were requested for storing into the ntuples but were not found:\n"; + for (const auto& i : notSelected) { + LOG(WARNING) << "\tRequested and not found branch (regex): " << i << "\n"; + } + } + const std::vector<std::string> selectedBranches = m_config->ntuple()->listOfSelectedBranches(nominalOnly ? m_systReplacer.nominalBranches(allBranches) : allBranches); LOG(VERBOSE) << "List of selected branches:\n"; for (const auto& iselected : selectedBranches) { @@ -478,12 +503,12 @@ void MainFrame::processUniqueSampleNtuple(const std::shared_ptr<Sample>& sample, opts.fCompressionLevel = m_config->ntupleCompressionLevel(); opts.fVector2RVec = m_config->convertVectorToRVec(); mainNode.Snapshot(sample->recoTreeName(), fileName, selectedBranches, opts); - LOG(DEBUG) << "Number of event loops: " << mainNode.GetNRuns() << ". For an optimal run, this number should be 1\n"; - auto nEntriesAfterCuts = mainNode.Count().GetValue(); + auto nEntriesAfterCuts = filterNodeCount.GetValue(); if (nEntriesAfterCuts==0) { LOG(WARNING) << "UniqueSampleID: " << id << ", has no events after cuts, generating an empty reco TTree\n"; } + LOG(DEBUG) << "Number of event loops: " << mainNode.GetNRuns() << ". For an optimal run, this number should be 1\n"; // IF run in multi-threaded mode, we need to manually add an empty tree: // See ROOT::DataFrame::Snapshot for more details if (m_config->numCPU()!=1 && nEntriesAfterCuts==0){ @@ -507,8 +532,17 @@ void MainFrame::processUniqueSampleNtuple(const std::shared_ptr<Sample>& sample, LOG(INFO) << "Copying metadata from the original files\n"; copier.copyObjectsTo(fileName); LOG(INFO) << "Finished copying metadata from the original files\n"; + if (!m_customHistograms.empty()) { + std::unique_ptr<TFile> out(TFile::Open(fileName.c_str(), "UPDATE")); + if (!out) { + LOG(ERROR) << "Cannot open file: " << fileName << "\n"; + throw std::runtime_error{""}; + } + this->writeCustomHistograms(out.get()); + out->Close(); + } - if (!truthChains.size()) { + if (!truthChains.empty()) { LOG(DEBUG) << "Deleting truth chains\n"; } } @@ -532,6 +566,10 @@ std::string MainFrame::systematicOrFilter(const std::shared_ptr<Sample>& sample) const std::string& nominalSelection = m_config->ntuple()->selection(); std::string result = "(" + nominalSelection + ")"; + if (!sample->selectionSuffix().empty()) { + result = "(" + result; + result += "&&("+sample->selectionSuffix()+"))"; + } if (!sample->nominalOnly()) { for (const auto& isyst : sample->systematics()) { const std::string systSelection = m_systReplacer.replaceString(nominalSelection, isyst); @@ -638,6 +676,8 @@ ROOT::RDF::RNode MainFrame::addSingleWeightColumn(ROOT::RDF::RNode mainNode, // add it to the list of branches m_systReplacer.addBranch(systName); + m_systReplacer.addSingleSystematic("weight_total_NOSYS", systematic->name()); + auto node = mainNode.Define(systName, formula); return node; } @@ -721,12 +761,14 @@ std::vector<SystematicHisto> MainFrame::processHistograms(std::vector<std::vecto } RegionHisto regionHisto(ireg->name()); - ROOT::RDF::RNode& node = filters.at(systIndex).at(regIndex); + ROOT::RDF::RNode node = filters.at(systIndex).at(regIndex); this->processHistograms1D(®ionHisto, node, sample, ireg, isyst); this->processHistograms2D(®ionHisto, node, sample, ireg, isyst); + this->processHistogramsProfile(®ionHisto, node, sample, ireg, isyst); + this->processRecoVsTruthHistograms2D(®ionHisto, node, sample, ireg, isyst); this->processHistograms3D(®ionHisto, node, sample, ireg, isyst); @@ -798,25 +840,44 @@ void MainFrame::writeHistosToFile(const std::vector<SystematicHisto>& histos, } else { out->cd(isystHist.name().c_str()); } + + const Variable& var = Utils::getVariableByName(sample->regions(), iregionHist.name(), ivariableHist.name()); + if (allUniqueSamples) { - ivariableHist.histo()->Write(histoName.c_str()); + auto histo = ivariableHist.histo(); + Utils::MergeUnderOverFlow(histo.GetPtr(), var.underOverFlowType()); + histo->Write(histoName.c_str()); } else { - ivariableHist.histoUniquePtr()->Write(histoName.c_str()); + std::unique_ptr<TH1D> histo(static_cast<TH1D*>(ivariableHist.histoUniquePtr()->Clone())); + Utils::MergeUnderOverFlow(histo.get(), var.underOverFlowType()); + + histo->Write(histoName.c_str()); } } // 2D histograms for (const auto& ivariableHist2D : iregionHist.variableHistos2D()) { const std::string histo2DName = StringOperations::replaceString(ivariableHist2D.name(), "_NOSYS", "") + "_" + iregionHist.name(); + + const std::pair<Variable, Variable> variables = Utils::get2DVariablesByName(sample, iregionHist.name(), histo2DName); + if (m_config->useRegionSubfolders()) { out->cd(subRegionName.c_str()); } else { out->cd(isystHist.name().c_str()); } if (allUniqueSamples) { - ivariableHist2D.histo()->Write(histo2DName.c_str()); + auto histo = ivariableHist2D.histo(); + Utils::MergeUnderOverFlow2D(histo.GetPtr(), variables.first.underOverFlowType(), variables.second.underOverFlowType()); + histo->GetXaxis()->SetTitle(variables.first.name().c_str()); + histo->GetYaxis()->SetTitle(variables.second.name().c_str()); + histo->Write(histo2DName.c_str()); } else { - ivariableHist2D.histoUniquePtr()->Write(histo2DName.c_str()); + std::unique_ptr<TH2D> histo(static_cast<TH2D*>(ivariableHist2D.histoUniquePtr()->Clone())); + Utils::MergeUnderOverFlow2D(histo.get(), variables.first.underOverFlowType(), variables.second.underOverFlowType()); + histo->GetXaxis()->SetTitle(variables.first.name().c_str()); + histo->GetYaxis()->SetTitle(variables.second.name().c_str()); + histo->Write(histo2DName.c_str()); } } @@ -834,6 +895,21 @@ void MainFrame::writeHistosToFile(const std::vector<SystematicHisto>& histos, ivariableHist3D.histoUniquePtr()->Write(histo3DName.c_str()); } } + + // TProfile histograms + for (const auto& ivariableHistProfile : iregionHist.variableHistosProfile()) { + const std::string histoProfileName = StringOperations::replaceString(ivariableHistProfile.name(), "_NOSYS", "") + "_" + iregionHist.name(); + if (m_config->useRegionSubfolders()) { + out->cd(subRegionName.c_str()); + } else { + out->cd(isystHist.name().c_str()); + } + if (allUniqueSamples) { + ivariableHistProfile.histo()->Write(histoProfileName.c_str()); + } else { + ivariableHistProfile.histoUniquePtr()->Write(histoProfileName.c_str()); + } + } } } @@ -858,8 +934,11 @@ void MainFrame::writeHistosToFile(const std::vector<SystematicHisto>& histos, // Write truth histograms for (const auto& itruthHist : truthHistos) { const std::string truthHistoName = StringOperations::replaceString(itruthHist.name(), "_NOSYS", ""); + const auto& var = Utils::getVariableByNameTruth(sample, itruthHist.name()); out->cd(); - itruthHist.histoUniquePtr()->Write(truthHistoName.c_str()); + std::unique_ptr<TH1D> histo(static_cast<TH1D*>(itruthHist.histoUniquePtr()->Clone())); + Utils::MergeUnderOverFlow(histo.get(), var.underOverFlowType()); + histo->Write(truthHistoName.c_str()); } if (m_config->totalJobSplits() > 0 && sample->hasUnfolding()) { @@ -868,6 +947,10 @@ void MainFrame::writeHistosToFile(const std::vector<SystematicHisto>& histos, this->writeUnfoldingHistos(out.get(), histos, truthHistos, sample); } + if (!m_customHistograms.empty()) { + this->writeCustomHistograms(out.get()); + } + out->Close(); } @@ -1033,8 +1116,44 @@ void MainFrame::processHistograms2D(RegionHisto* regionHisto, } } +void MainFrame::processHistogramsProfile(RegionHisto* regionHisto, + const ROOT::RDF::RNode& node, + const std::shared_ptr<Sample>& sample, + const std::shared_ptr<Region>& region, + const std::shared_ptr<Systematic>& systematic) const { + + for (const auto& combinations : region->variablesForProfile()) { + const Variable& v1 = region->variableByName(combinations.first); + const Variable& v2 = region->variableByName(combinations.second); + const std::string name = "profile_" + v1.name() + "_vs_" + v2.name(); + if ((v1.isNominalOnly() || v2.isNominalOnly()) && systematic->name() != "NOSYS") continue; + + const std::vector<std::string>& variables = sample->variables(); + auto itrVar1 = std::find(variables.begin(), variables.end(), v1.name()); + auto itrVar2 = std::find(variables.begin(), variables.end(), v2.name()); + if (itrVar1 == variables.end() || itrVar2 == variables.end()) { + LOG(VERBOSE) << "Skipping variable (TProfile): " << name << " for sample: " << sample->name() << ", systematic" << systematic->name() << "\n"; + continue; + } + + VariableHistoProfile variableHistoProfile(name); + ROOT::RDF::RResultPtr<TProfile> histogramProfile = this->bookProfilehisto(node, v1, v2, systematic); + + if (!histogramProfile) { + LOG(ERROR) << "Histogram for sample: " << sample->name() << ", systematic: " + << systematic->name() << ", region: " << region->name() << " and variable combination: " << v1.name() << " & " << v2.name() << " is empty!\n"; + throw std::runtime_error(""); + + } + + variableHistoProfile.setHisto(histogramProfile); + + regionHisto->addVariableHistoProfile(std::move(variableHistoProfile)); + } +} + void MainFrame::processRecoVsTruthHistograms2D(RegionHisto* regionHisto, - ROOT::RDF::RNode& node, + ROOT::RDF::RNode node, const std::shared_ptr<Sample>& sample, const std::shared_ptr<Region>& region, const std::shared_ptr<Systematic>& systematic) { @@ -1044,7 +1163,16 @@ void MainFrame::processRecoVsTruthHistograms2D(RegionHisto* regionHisto, ROOT::RDF::RNode passedNode = node; if (!itruth->selection().empty()) { - passedNode = node.Filter(itruth->selection()); + + // the variables in the selection need to be protected + // to make sure they can be read when the corresponding truth event does not exist + const std::vector<std::string> columns = Utils::getColumnsFromString(itruth->selection(), itruth->truthTreeName(), passedNode); + for (const auto& icolumn : columns) { + LOG(DEBUG) << "Setting FilterAvailable() " << icolumn << " for truth: " << itruth->name() << "\n"; + passedNode = passedNode.FilterAvailable(icolumn); + } + + passedNode = passedNode.Filter(itruth->selection()); } for (const auto& imatch : itruth->matchedVariables()) { const Variable& recoVariable = region->variableByName(imatch.first); @@ -1060,7 +1188,7 @@ void MainFrame::processRecoVsTruthHistograms2D(RegionHisto* regionHisto, } } - const std::string name = recoVariable.name() + "_vs_" + itruth->name() + "_" + truthVariable.name(); + const std::string name = itruth->name() + "_" + truthVariable.name() + "_vs_" + recoVariable.name(); VariableHisto2D variableHistoPassed(name); passedNode = passedNode.FilterAvailable(truthVariable.definition()); @@ -1120,7 +1248,7 @@ void MainFrame::processHistograms3D(RegionHisto* regionHisto, std::vector<std::pair<std::unique_ptr<TChain>, std::unique_ptr<TTreeIndex> > > MainFrame::connectTruthTrees(std::unique_ptr<TChain>& chain, const std::shared_ptr<Sample>& sample, - const std::vector<std::string>& filePaths) const { + const std::vector<std::string>& filePaths) { std::vector<std::pair<std::unique_ptr<TChain>, std::unique_ptr<TTreeIndex> > > result; @@ -1149,6 +1277,15 @@ std::vector<std::pair<std::unique_ptr<TChain>, std::unique_ptr<TTreeIndex> > > M LOG(WARNING) << "This can cause issues when doing the reco and truth tree matching!\n"; } + // add the branches to the systematicReplacer + std::vector<std::string> listOfTruthBranches; + const TObjArray* const truthBranchList = truthChain->GetListOfBranches(); + for (int ibranch = 0; ibranch < truthBranchList->GetSize(); ++ibranch) { + const std::string name = truthBranchList->At(ibranch)->GetName(); + listOfTruthBranches.emplace_back(name); + } + m_systReplacer.addTruthBranchesNominal(itruth, listOfTruthBranches); + std::unique_ptr<TTreeIndex> t(nullptr); if (indexNames.size() == 1 ) { LOG(INFO) << "Building reco truth index with: " << indexNames.at(0) << "\n"; @@ -1224,9 +1361,13 @@ void MainFrame::writeUnfoldingHistos(TFile* outputFile, for (const auto& imatch : itruth->matchedVariables()) { const std::string& truthName = itruth->name() + "_" + imatch.second; const std::string& recoName = imatch.first; - const std::string& migrationName = recoName + "_vs_" + truthName; + const std::string& migrationName = truthName + "_vs_" + recoName; std::unique_ptr<TH1D> truth = Utils::copyHistoFromVariableHistos(truthHistos, truthName); + const auto& truthVar = Utils::getVariableByNameTruth(sample, truthName); + + Utils::MergeUnderOverFlow(truth.get(), truthVar.underOverFlowType()); + for (const auto& isystHist : histos) { if (isystHist.regionHistos().empty()) { LOG(WARNING) << "No histograms available for sample: " << sample->name() << ", systematic: " << isystHist.name() << "\n"; @@ -1253,6 +1394,11 @@ void MainFrame::writeUnfoldingHistos(TFile* outputFile, std::unique_ptr<TH1D> reco = Utils::copyHistoFromVariableHistos(iregionHist.variableHistos(), recoName); std::unique_ptr<TH2D> migration = Utils::copyHistoFromVariableHistos2D(iregionHist.variableHistos2D(), migrationName); + std::pair<Variable, Variable> vars = Utils::get2DVariablesByName(sample, iregionHist.name(), migrationName+"_"+iregionHist.name()); + + Utils::MergeUnderOverFlow(reco.get(), vars.first.underOverFlowType()); + Utils::MergeUnderOverFlow2D(migration.get(), vars.first.underOverFlowType(), vars.second.underOverFlowType()); + std::unique_ptr<TH1D> selectionEff(migration->ProjectionX("")); selectionEff->Divide(truth.get()); selectionEff->SetDirectory(nullptr); @@ -1510,6 +1656,17 @@ ROOT::RDF::RResultPtr<TH2D> MainFrame::book2Dhisto(ROOT::RDF::RNode node, this->systematicWeight(systematic)); } +ROOT::RDF::RResultPtr<TProfile> MainFrame::bookProfilehisto(ROOT::RDF::RNode node, + const Variable& variable1, + const Variable& variable2, + const std::shared_ptr<Systematic>& systematic) const { + + return node.Profile1D(Utils::histoModelProfile(variable1, variable2), + this->systematicVariable(variable1, systematic), + this->systematicVariable(variable2, systematic), + this->systematicWeight(systematic)); +} + ROOT::RDF::RResultPtr<TH3D> MainFrame::book3Dhisto(ROOT::RDF::RNode node, const Variable& variable1, const Variable& variable2, @@ -1749,6 +1906,16 @@ void MainFrame::processSingleTruthTreeNtuple(const std::shared_ptr<Truth>& truth mainNode = mainNode.Filter(truth->selection()); } + std::vector<std::string> notSelected = Utils::requestedNotPresentElements(mainNode.GetColumnNames(), + truth->branches()); + + if (!notSelected.empty()) { + LOG(WARNING) << "Some branches were requested for storing into the ntuples but were not found:\n"; + for (const auto& i : notSelected) { + LOG(WARNING) << "\tRequested and not found branch (regex): " << i << "\n"; + } + } + const std::vector<std::string> branches = Utils::selectedNotExcludedElements(mainNode.GetColumnNames(), truth->branches(), truth->excludedBranches()); @@ -1758,6 +1925,11 @@ void MainFrame::processSingleTruthTreeNtuple(const std::shared_ptr<Truth>& truth LOG(VERBOSE) << "branch: " << ibranch << "\n"; } + if (branches.empty()) { + LOG(INFO) << "No selected branches for truth tree: " << truth->truthTreeName() << " - will not produce the truth tree\n"; + return; + } + ROOT::RDF::RSnapshotOptions opts; opts.fMode = "UPDATE"; opts.fAutoFlush = m_config->ntupleAutoFlush(); @@ -1804,3 +1976,29 @@ void MainFrame::prepareONNXwrapper() { infer->initializeModels(); } } + +void MainFrame::writeCustomHistograms(TFile* file) const { + + if (!file) { + LOG(ERROR) << "Passed file is nullptr\n"; + return; + } + + file->cd(); + + for (auto& i : m_customHistograms) { + if (!i) { + LOG(WARNING) << "Passed nullptr custom histogram\n"; + continue; + } + + i->SetDirectory(nullptr); + const std::string name = i->GetName(); + LOG(DEBUG) << "Storing object: " << name << "\n"; + i->Write(name.c_str()); + } +} + +void MainFrame::addCustomHistogramsToOutput(const TH1& hist) { + m_customHistograms.emplace_back(std::unique_ptr<TH1>(static_cast<TH1*>(hist.Clone()))); +} \ No newline at end of file diff --git a/Root/MetadataManager.cc b/Root/MetadataManager.cc index 76a71162ef7449fa5649a83c4c7b989f4f147141..722e5473aa097553a7d21d3a22e20fdeb7e5f928 100644 --- a/Root/MetadataManager.cc +++ b/Root/MetadataManager.cc @@ -14,6 +14,7 @@ #include <fstream> #include <exception> +#include <memory> MetadataManager::MetadataManager() noexcept { @@ -78,16 +79,31 @@ void MetadataManager::readSumWeights(const std::string& path) { } -void MetadataManager::readXSectionFiles(const std::vector<std::string>& xSectionFiles, const std::vector<int>& usedDSIDs) { - XSectionManager xSectionManger(xSectionFiles, usedDSIDs); +void MetadataManager::readXSectionFiles(const std::map<std::vector<std::string>, std::vector<std::string>>& xSectionFiles, const std::vector<int>& usedDSIDs) { + std::map<std::string, std::shared_ptr<const XSectionManager>> xSectionManagers; + for (const auto& [campaigns, xsection_files] : xSectionFiles) { + std::shared_ptr<const XSectionManager> xSectionManager = std::make_shared<XSectionManager>(xsection_files, usedDSIDs); + for (const std::string& campaign : campaigns) { + xSectionManagers.insert({campaign, xSectionManager}); + } + } - for (auto &m_mapPair : m_metadata) { - const UniqueSampleID &uniqueSampleId = m_mapPair.first; + for (auto &mapPair : m_metadata) { + const UniqueSampleID &uniqueSampleId = mapPair.first; const int dsid = uniqueSampleId.dsid(); if (!usedDSIDs.empty() && std::find(usedDSIDs.begin(), usedDSIDs.end(), dsid) == usedDSIDs.end()) continue; if (uniqueSampleId.isData()) continue; - Metadata &metadata = m_mapPair.second; - const double xSec = xSectionManger.xSection(uniqueSampleId.dsid()); + + const std::string campaign = uniqueSampleId.campaign(); + auto itr = xSectionManagers.find(campaign); + if (itr == xSectionManagers.end()) { + LOG(ERROR) << "Cannot find the campaign " + campaign + " in the xSectionManagers map\n"; + throw std::invalid_argument(""); + } + const std::shared_ptr<const XSectionManager> &xSectionManager = itr->second; + + Metadata &metadata = mapPair.second; + const double xSec = xSectionManager->xSection(uniqueSampleId.dsid()); LOG(DEBUG) << "Cross-section for UniqueSample: " << uniqueSampleId << " is: " << xSec << "\n"; if (xSec <= 0) { LOG(WARNING) << "Cross-section for UniqueSample: " << uniqueSampleId << " is <= 0. Please check!"; @@ -242,7 +258,7 @@ ROOT::RDF::Experimental::RSample MetadataManager::singleSampleInfo(const std::sh ROOT::RDF::Experimental::RMetaData meta = this->sampleMetadata(sample, id); - ROOT::RDF::Experimental::RSample result(sample->name(), sample->recoTreeName(), this->filePaths(id), meta); + ROOT::RDF::Experimental::RSample result(sample->name(), sample->recoTreeName(), paths, meta); return result; } @@ -255,6 +271,12 @@ ROOT::RDF::Experimental::RMetaData MetadataManager::sampleMetadata(const std::sh meta.Add("luminosity", this->luminosity(id.campaign())); meta.Add("xSection", this->crossSection(id)); + + // these are not directly needed by FastFrames, but they might be useful for the users + meta.Add("unique_sample_id", id.dsid()); + meta.Add("unique_sample_campaign", id.campaign()); + meta.Add("unique_sample_simulation", id.simulation()); + LOG(DEBUG) << "UniqueSample: " << id << " adding luminosity: " << this->luminosity(id.campaign()) << ", xSection: " << this->crossSection(id) << "\n"; bool addedNominal(false); diff --git a/Root/ONNXWrapper.cc b/Root/ONNXWrapper.cc index b83d8260ca4996291bb693e69f8799780e8a15f5..d2c7f37fdd984598d54a3d47545575ac43483171 100644 --- a/Root/ONNXWrapper.cc +++ b/Root/ONNXWrapper.cc @@ -25,6 +25,7 @@ ONNXWrapper::ONNXWrapper( // any session options are set via this object // use single thread (single CPU core) for the model evaluation m_session_options->SetIntraOpNumThreads(1); + m_session_options->SetInterOpNumThreads(1); // ONNX can perform optimizations of the model graph to improve performance (ORT_ENABLE_EXTENDED) m_session_options->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); diff --git a/Root/StringOperations.cc b/Root/StringOperations.cc index 2b3492692aade330c8e5868780a3f4d11664c200..60f5a50db988f6742d47215c671a399509716ec8 100644 --- a/Root/StringOperations.cc +++ b/Root/StringOperations.cc @@ -146,4 +146,31 @@ std::string StringOperations::joinStrings(const std::string &separator, const st } if (result.size() > 0) result.erase(result.size()-separator.size()); return result; +}; + +std::vector<std::pair<std::string, int>> StringOperations::getValidCxxVariableNames(const std::string &input) { + std::vector<std::pair<std::string, int>> result; + string currentWord; + auto addWordIfValid = [&result](const std::string &word, int startPos) -> void { + if (word.empty()) return; + if (isdigit(word[0])) return; + + result.emplace_back(word, startPos); + }; + + int startPos = -1; + for (size_t iPos = 0; iPos < input.length(); ++iPos) { + const char c = input[iPos]; + if (isalnum(c) || c == '_') { + if (startPos < 0) startPos = iPos; + currentWord += c; + } + else { + addWordIfValid(currentWord, startPos); + startPos = -1; + currentWord.clear(); + } + } + addWordIfValid(currentWord, startPos); + return result; }; \ No newline at end of file diff --git a/Root/SystematicReplacer.cc b/Root/SystematicReplacer.cc index b92197e50e04ca699d698cf6ffcddb757febafbc..9d1f0aaca8617dd0025a370e71bec126167c75a9 100644 --- a/Root/SystematicReplacer.cc +++ b/Root/SystematicReplacer.cc @@ -96,13 +96,21 @@ std::string SystematicReplacer::replaceString(const std::string& original, const LOG(ERROR) << "Cannot find systematic: " << systematicName << " in the systematic map. Please, fix!\n"; throw std::invalid_argument(""); } + const auto &affectedBranches = itr->second; std::string result(original); + std::vector<std::pair<std::string,int>> validCxxVariables = StringOperations::getValidCxxVariableNames(original); + std::reverse(validCxxVariables.begin(), validCxxVariables.end()); + for (const std::pair<std::string,int> &validCxxVariable : validCxxVariables) { + const std::string &branchName = validCxxVariable.first; + const int startPos = validCxxVariable.second; + + if (std::find(affectedBranches.begin(), affectedBranches.end(), branchName) == affectedBranches.end()) { + continue; + } - // loop over all affected branches and replace all of them - for (const std::string& ibranch : itr->second) { - const std::string replacer = StringOperations::replaceString(ibranch, "NOSYS", systematicName); - result = StringOperations::replaceString(result, ibranch, replacer); + const std::string replacer = StringOperations::replaceString(branchName, "NOSYS", systematicName); + result.replace(startPos, branchName.length(), replacer); } return result; @@ -180,6 +188,68 @@ void SystematicReplacer::addVariableAndEffectiveSystematics(const std::string& v } } +void SystematicReplacer::addSingleSystematic(const std::string& variable, const std::string& systematic) { + if (variable.find("NOSYS") == std::string::npos) { + LOG(ERROR) << "Variable " << variable << " does not contain \"NOSYS\"\n"; + throw std::invalid_argument(""); + } + + // check if the systematics is already added or not + auto itr = m_branchesAffectedBySyst.find(variable); + if (itr == m_branchesAffectedBySyst.end()) { + m_branchesAffectedBySyst.insert({variable, {systematic}}); + } else { + auto itrBranch = std::find(itr->second.begin(), itr->second.end(), systematic); + if (itrBranch == itr->second.end()) { + itr->second.emplace_back(systematic); + } + } + + auto itrSyst = m_systImpactsBranches.find(systematic); + if (itrSyst == m_systImpactsBranches.end()) { + m_systImpactsBranches.insert({systematic, {variable}}); + } else { + auto vec = std::find(itrSyst->second.begin(), itrSyst->second.end(), variable); + if (vec == itrSyst->second.end()) { + itrSyst->second.emplace_back(variable); + } + } + + if (!this->branchExists(variable)) { + m_allBranches.emplace_back(variable); + } +} + +void SystematicReplacer::addTruthBranchesNominal(const std::string& truthTreeName, const std::vector<std::string>& branches) { + for (const auto& ibranch : branches) { + if (ibranch.find("NOSYS") == std::string::npos) continue; + + const std::string branch = truthTreeName+"."+ibranch; + + bool isAlreadyAdded(false); + if (this->branchExists(ibranch)) { + // was added in the reco tree + isAlreadyAdded = true; + } + + // add it with the tree.branch name + m_branchesAffectedBySyst.insert({branch, {"NOSYS"}}); + auto itrSyst = m_systImpactsBranches.find("NOSYS"); + if (itrSyst == m_systImpactsBranches.end()) { + LOG(ERROR) << "Cannot find NOSYS systematic!\n"; + throw std::runtime_error(""); + } + + itrSyst->second.emplace_back(branch); + + if (!isAlreadyAdded) { + // if it is unique, add it also without the "." + m_branchesAffectedBySyst.insert({ibranch, {"NOSYS"}}); + itrSyst->second.emplace_back(ibranch); + } + } +} + void SystematicReplacer::updateVariableAndEffectiveSystematics(const std::string& variable, const std::vector<std::string>& systematics) { if (!this->branchExists(variable)) { LOG(DEBUG) << "Variable " << variable << " does not exist, adding it\n"; diff --git a/Root/Utils.cc b/Root/Utils.cc index a01be9778623cf6f0eae76b4ec9e48cda6c4cc99..144923e2ddff28845887da34ceaff7ac167473b1 100644 --- a/Root/Utils.cc +++ b/Root/Utils.cc @@ -62,6 +62,15 @@ ROOT::RDF::TH2DModel Utils::histoModel2D(const Variable& v1, const Variable& v2) return ROOT::RDF::TH2DModel("", "", binEdges1.size() -1, binEdges1.data(), binEdges2.size() - 1, binEdges2.data()); } +ROOT::RDF::TProfile1DModel Utils::histoModelProfile(const Variable& v1, const Variable& v2) { + if (v1.hasRegularBinning()) { + return ROOT::RDF::TProfile1DModel("", "", v1.axisNbins(), v1.axisMin(), v1.axisMax(), v2.axisMin(), v2.axisMax()); + } else { + const std::vector<double>& binEdges1 = v1.binEdges(); + return ROOT::RDF::TProfile1DModel("", "", binEdges1.size() -1, binEdges1.data(), v2.axisMin(), v2.axisMax()); + } +} + ROOT::RDF::TH3DModel Utils::histoModel3D(const Variable& v1, const Variable& v2, const Variable& v3) { const std::vector<double> binEdges1 = v1.hasRegularBinning() ? fromRegularToEdges(v1) : v1.binEdges(); const std::vector<double> binEdges2 = v2.hasRegularBinning() ? fromRegularToEdges(v2) : v2.binEdges(); @@ -157,6 +166,86 @@ const Variable& Utils::getVariableByName(const std::vector<std::shared_ptr<Regio return *itrVar; } +const Variable& Utils::getVariableByNameTruth(const std::shared_ptr<Sample>& sample, + const std::string& variableName) { + + for (const auto& itruth : sample->truths()) { + const auto vars = itruth->variables(); + auto itr = std::find_if(vars.begin(), vars.end(), [&itruth,&variableName](const auto& element){return variableName == (itruth->name() + "_" + element.name());}); + + if (itr != vars.end()) { + return *itr; + } + } + + // this should not happen + LOG(ERROR) << "Cannot find the corresponding truth variable for: " << variableName << "\n"; + throw std::runtime_error(""); +} + + +std::pair<Variable, Variable> Utils::get2DVariablesByName(const std::shared_ptr<Sample>& sample, + const std::string& regionName, + const std::string& variableName) { + + // first find the region + auto itrReg = std::find_if(sample->regions().begin(), sample->regions().end(), [®ionName](const auto& element){return element->name() == regionName;}); + if (itrReg == sample->regions().end()) { + LOG(ERROR) << "Cannot find region: " << regionName << "\n"; + throw std::runtime_error(""); + } + + const auto vars = (*itrReg)->variables(); + + auto itrVar1 = std::find_if(vars.begin(), vars.end(), + [&variableName](const auto& element){return StringOperations::stringStartsWith(variableName, (element.name()+"_vs"));}); + + std::unique_ptr<Variable> var1(nullptr); + + if (itrVar1 == vars.end()) { + // check truth + for (const auto& itruth : sample->truths()) { + const auto& truthVars = itruth->variables(); + auto itrTruth1 = std::find_if(truthVars.begin(), truthVars.end(), + [&variableName, &itruth](const auto& element){return StringOperations::stringStartsWith(variableName, (itruth->name() + "_" + element.name()+"_vs"));}); + + if (itrTruth1 != truthVars.end()) { + var1 = std::make_unique<Variable>(*itrTruth1); + } + } + } else { + var1 = std::make_unique<Variable>(*itrVar1); + } + + auto itrVar2 = std::find_if(vars.begin(), vars.end(), + [&variableName, ®ionName](const auto& element){return StringOperations::stringEndsWith(variableName, ("vs_"+element.name()+"_" + regionName));}); + + std::unique_ptr<Variable> var2(nullptr); + + if (itrVar2 == vars.end()) { + // check truth + for (const auto& itruth : sample->truths()) { + const auto& truthVars = itruth->variables(); + auto itrTruth2 = std::find_if(truthVars.begin(), truthVars.end(), + [&variableName, &itruth, ®ionName](const auto& element){return StringOperations::stringEndsWith(variableName, ("vs_" + itruth->name() + "_" + element.name()+"_" + regionName));}); + + if (itrTruth2 != truthVars.end()) { + var2 = std::make_unique<Variable>(*itrTruth2); + } + } + } else { + var2 = std::make_unique<Variable>(*itrVar2); + } + + if (!var1 || !var2) { + LOG(ERROR) << "Cannot split 2D histogram into variables: " << variableName << "\n"; + throw std::runtime_error(""); + } + + return std::make_pair(std::move(*var1), std::move(*var2)); +} + + bool Utils::compareDoubles(const double a, const double b, const double relative_precision) { return std::abs(a - b) < relative_precision * std::max(std::abs(a), std::abs(b)); }; @@ -201,6 +290,24 @@ std::vector<std::string> Utils::selectedNotExcludedElements(const std::vector<st return result; } +std::vector<std::string> Utils::requestedNotPresentElements(const std::vector<std::string>& all, + const std::vector<std::string>& requested) { + + std::vector<std::string> result; + + for (const auto& irequested : requested) { + std::regex match(irequested); + + auto itr = std::find_if(all.begin(), all.end(), [&match](const auto& element){return std::regex_match(element, match);}); + + if (itr == all.end()) { + result.emplace_back(irequested); + } + } + + return result; +} + std::map<std::string, std::string> Utils::variablesWithFormulaReco(ROOT::RDF::RNode node, const std::shared_ptr<Sample>& sample, const std::vector<std::string>& truthTrees) { @@ -270,6 +377,9 @@ std::map<std::string, std::string>Utils::variablesWithFormulaTruth(ROOT::RDF::RN auto itrColumns = std::find(columns.begin(), columns.end(), definition); if (itrColumns != columns.end()) continue; + itrColumns = std::find(columns.begin(), columns.end(), treeName+"."+definition); + if (itrColumns != columns.end()) continue; + // skip if it is already added auto itrMap = result.find(definition); if (itrMap != result.end()) continue; @@ -325,4 +435,120 @@ std::vector<std::string> Utils::matchingBranchesFromChains(const std::unique_ptr } return matches; -} \ No newline at end of file +} + +std::vector<std::string> Utils::getColumnsFromString(const std::string& formula, + const std::string& truthTreeName, + ROOT::RDF::RNode& node) { + + const std::vector<std::string> allColumns = node.GetColumnNames(); + + std::vector<std::string> result; + + for (const auto& icolumn : allColumns) { + std::string toCheck(""); + const auto strip = StringOperations::splitAndStripString(icolumn, "."); + if ((strip.size() == 2) && (strip.at(0) == truthTreeName)) { + toCheck = strip.at(1); + } + if (!toCheck.empty() && formula.find(toCheck) != std::string::npos) { + result.emplace_back(toCheck); + } + } + + return result; +} + +void Utils::MergeUnderOverFlow(TH1D* histo, const UnderOverFlowType& type) { + if (type == UnderOverFlowType::NO_UNDER_OVER_FLOW_MERGE) return; + + const int bins = histo->GetNbinsX(); + + if (type == UnderOverFlowType::MERGE_BOTH || type == UnderOverFlowType::MERGE_UNDERFLOW) { + const double valueUnder = histo->GetBinContent(0); + const double errorUnder = histo->GetBinError(0); + const double valueFirst = histo->GetBinContent(1); + const double errorFirst = histo->GetBinError(1); + + histo->SetBinContent(1, valueUnder + valueFirst); + histo->SetBinError(1, std::hypot(errorUnder, errorFirst)); + + histo->SetBinContent(0, 0); + histo->SetBinError(0, 0); + } + if (type == UnderOverFlowType::MERGE_BOTH || type == UnderOverFlowType::MERGE_OVERFLOW) { + + const double valueOver = histo->GetBinContent(bins+1); + const double errorOver = histo->GetBinError(bins+1); + const double valueLast = histo->GetBinContent(bins); + const double errorLast = histo->GetBinError(bins); + + histo->SetBinContent(bins, valueOver + valueLast); + histo->SetBinError(bins, std::hypot(errorOver, errorLast)); + + histo->SetBinContent(bins+1, 0); + histo->SetBinError(bins+1, 0); + } +} + +void Utils::MergeUnderOverFlow2D(TH2D* histo, const UnderOverFlowType& type1, const UnderOverFlowType& type2) { + if (type1 == UnderOverFlowType::NO_UNDER_OVER_FLOW_MERGE && type2 == UnderOverFlowType::NO_UNDER_OVER_FLOW_MERGE) return; + + const int binsX = histo->GetNbinsX(); + const int binsY = histo->GetNbinsY(); + + for (int ibinX = 1; ibinX <= binsX; ++ibinX) { + if (type2 == UnderOverFlowType::MERGE_BOTH || type2 == UnderOverFlowType::MERGE_UNDERFLOW) { + const double valueUnder = histo->GetBinContent(ibinX, 0); + const double errorUnder = histo->GetBinError(ibinX, 0); + const double valueFirst = histo->GetBinContent(ibinX, 1); + const double errorFirst = histo->GetBinError(ibinX, 1); + + histo->SetBinContent(ibinX, 1, valueUnder + valueFirst); + histo->SetBinError(ibinX, 1, std::hypot(errorUnder, errorFirst)); + + histo->SetBinContent(ibinX, 0, 0); + histo->SetBinError(ibinX, 0, 0); + } + if (type2 == UnderOverFlowType::MERGE_BOTH || type2 == UnderOverFlowType::MERGE_OVERFLOW) { + + const double valueOver = histo->GetBinContent(ibinX, binsY+1); + const double errorOver = histo->GetBinError(ibinX, binsY+1); + const double valueLast = histo->GetBinContent(ibinX, binsY); + const double errorLast = histo->GetBinError(ibinX, binsY); + + histo->SetBinContent(ibinX, binsY, valueOver + valueLast); + histo->SetBinError(ibinX, binsY, std::hypot(errorOver, errorLast)); + + histo->SetBinContent(ibinX, binsY+1, 0); + histo->SetBinError(ibinX, binsY+1, 0); + } + } + for (int ibinY = 1; ibinY <= binsY; ++ibinY) { + if (type1 == UnderOverFlowType::MERGE_BOTH || type1 == UnderOverFlowType::MERGE_UNDERFLOW) { + const double valueUnder = histo->GetBinContent(0, ibinY); + const double errorUnder = histo->GetBinError(0, ibinY); + const double valueFirst = histo->GetBinContent(1, ibinY); + const double errorFirst = histo->GetBinError(1, ibinY); + + histo->SetBinContent(1, ibinY, valueUnder + valueFirst); + histo->SetBinError(1, ibinY, std::hypot(errorUnder, errorFirst)); + + histo->SetBinContent(0, ibinY, 0); + histo->SetBinError(0, ibinY, 0); + } + if (type1 == UnderOverFlowType::MERGE_BOTH || type1 == UnderOverFlowType::MERGE_OVERFLOW) { + + const double valueOver = histo->GetBinContent(binsX+1, ibinY); + const double errorOver = histo->GetBinError(binsX+1, ibinY); + const double valueLast = histo->GetBinContent(binsX, ibinY); + const double errorLast = histo->GetBinError(binsX, ibinY); + + histo->SetBinContent(binsX, ibinY, valueOver + valueLast); + histo->SetBinError(binsX, ibinY, std::hypot(errorOver, errorLast)); + + histo->SetBinContent(binsX+1, ibinY, 0); + histo->SetBinError(binsX+1, ibinY, 0); + } + } +} diff --git a/Root/Variable.cc b/Root/Variable.cc index fe039a6094a8160915e15afa84dd19a386818a13..902f28d3f182a3eeefcb7186a52fd325bbead70d 100644 --- a/Root/Variable.cc +++ b/Root/Variable.cc @@ -12,7 +12,8 @@ Variable::Variable(const std::string& name) noexcept : m_title("title"), m_binning(Binning()), m_isNominalOnly(false), - m_type(VariableType::UNDEFINED) + m_type(VariableType::UNDEFINED), + m_underOverFlowType(UnderOverFlowType::NO_UNDER_OVER_FLOW_MERGE) { } diff --git a/docs/changelog/index.md b/docs/changelog/index.md index 2923cb60ae5407d2ad9532b3f764593eda5569f1..06d64ff3b4c9fbb5d37fe3ee7c5cdeeb1668bb0c 100644 --- a/docs/changelog/index.md +++ b/docs/changelog/index.md @@ -1,14 +1,58 @@ # Changelog ### Upcoming release -- [issue #94](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/92): Fixing bug with regex support for regions in sample block. + +- [issue #127](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/127): Fix in ```merge_jobs.py``` script, so that it will produce also unfolding corrections. + +### 5.1.0 <small>March 24, 2025</small> + +- [issue #126](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/126): Fix batch submission script which crashes due to new x-sec files structure. +- [issue #125](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/125): Add protection for copying the reco tree when processing ntuples. +- [issue #124](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/124): Store per-UniqueSample metadata in RSampleInfo. +- [issue #123](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/123): Ensuring that running FF with an ONNX wrapper will not result in excess CPU threads being spun up. +- [issue #122](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/122): Fixing issue with using the four-momentum class in the truth selection when doing reco and truth matching causing the code to throw an exception. +- [issue #121](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/121): Fixing bug which was causing ```selected_samples``` option to be ignored in trex-fitter settings file. + +### 5.0.0 <small>March 11, 2025</small> + +- [issue #119](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/119): Add support for the same DSID with different cross-section values. **This breaks interface** for the config setting. +- [issue #117](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/117): Add interface for adding custom histograms to the output file(s). +- [issue #118](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/118): Do not run the event loop twice when running ntupling. +- [issue #116](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/116): Add a warning for requested branches in ntupling if these are not found. +- [issue #114](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/114): Add event weights variables as the last step of adding variables. + +### 4.3.0 <small>February 20, 2025</small> + +- [issue #113](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/113): Respect selection suffix for Samples when running ntupling. +- [issue #111](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/111): Add support for TProfile histograms. +- [issue #112](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/112): Add option for timestamped batch submission directories for config/class flexibility. +- [issue #110](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/110): Adding an option to apply under/overflow to the first/last bin for 1D and 2D histograms. +- [issue #109](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/109): Fixing issues with systematic replace if there are 2 branches and one is substring of the other. +- [issue #108](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/108): Adding a script for automated updates of reference ROOT files (for FF developers). +- [issue #107](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/107): Updating input ROOT files for CI tests. Using Gaussian-smeared MC and data. + +### 4.2.0 <small>January 27, 2025</small> + +- [issue #105](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/105): Do not produce the truth tree during ntupling if no branches are selected. +- [issue #104](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/104): Protect against non-matched events when doing the truth selection. +- [issue #103](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/103): Fix typo in samples from CustomBlocks. +- [issue #102](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/102): Addresses an issue with cling warnings appearing in the FastFrames Docker image. + +### 4.1.0 <small>January 20, 2025</small> + +- [issue #101](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/101): Adding a docker image configuration file and an image build step to the CI. These `latest` image gets created for every push to the main branch, tagged images for every git tag. +- [issue #99](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/99): Fixed split in N jobs and added this feature to the batch submission script. +- [issue #95](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/95) and [issue #100](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/100) : Properly propagate systematics on the "weight_total_NOSYS" variable in a custom code. Add support for matched truth branches with "NOSYS" in the name. +- [issue #97](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/97): Adding the possibility to specify trees to check when merging empty input files. +- [issue #96](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/96): Allowing user to override/use any keyword for sample, region and fit blocks in trex-fitter settings config. +- [issue #94](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/94): Fixing bug with regex support for regions in sample block. - [issue #92](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/92): Adding CI test for testing histogram step on the ntuple output. ### 4.0.0 <small>December 4, 2024</small> -- [issue #91](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/91): Fix mysterious crash when pairing mroe than 1 truth tree to the reco tree. +- [issue #91](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/91): Fix mysterious crash when pairing more than 1 truth tree to the reco tree. - [issue #90](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/90): Fix memory leaks due to unreleased tchains. -- [issue #89](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/89): Allow support for treeName. prefit for the truth tree in case of identical branches in the truth and reco trees. +- [issue #89](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/89): Allow support for treeName. prefix for the truth tree in case of identical branches in the truth and reco trees. - [issue #88](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/88): Add error if user request reco vs. truth histograms, but disables truth vs. reco matching. - [issue #43](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/43): Removed the workaround for the bug in ROOT for unmatched truth events. Using FilterAvailable() method of RDataFrame, see: https://root.cern/doc/v634/release-notes.html#rdataframe. This **breaks interface** as the `_IsMatched` available used in the workaround is removed. - [issue #87](https://gitlab.cern.ch/atlas-amglab/fastframes/-/issues/87): Add an option to control the std::vector to RVec conversion for ntupling. This **breaks interface** for ntupling, see the related issue. diff --git a/docs/config.md b/docs/config.md index c74aa386413e40cf58004c1ecee5bb84ecaff12e..44dd477010028c88026278951780d001d504edb5 100644 --- a/docs/config.md +++ b/docs/config.md @@ -15,7 +15,7 @@ The options are case sensitive. Example config files can be found in ```test/con | default_sumweights | string | Default sum of weights to be used (can be overridden from Sample block). Default value is ```NOSYS```. | | default_event_weights | string | Default weight of events - i.e. product of mc_weights and all scale factors. The term corresponding to luminosity, sum of weights and x-section will be added automatically. For data, by default this is set to `1.` | | default_reco_tree_name | string |Default name of the reco-level tree. It can be overwritten in Sample block for a given sample. | -| xsection_files | list of strings | List of x-section files to use. Both TopDataPreparation and PMG formats are supported. In case of PMG file, the x-section is defined mulltiple times, it will take a value for the latest e-tag. The same DSIDs defined in TopDataPreparation cannot be defined in PMG file, nor multiple times in TopDataPreparation files (except for the case when their values are the same). You can find the PMG x-section files here: ```/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/``` | +| xsection_files | list of dicts | List of x-section files to use, for each campaign, or group of campaigns, in a form of ``` - files: ["file1", "file2", ...] campaigns: ["mc23a", "mc23d"]```. Both TopDataPreparation and PMG formats are supported. In case of PMG file, the x-section is defined mulltiple times, it will take a value for the latest e-tag. The same DSIDs defined in TopDataPreparation cannot be defined in PMG file, nor multiple times in TopDataPreparation files (except for the case when their values are the same). You can find the PMG x-section files here: ```/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/```. | | create_tlorentz_vectors_for | list of strings | List of objects (i.e. "jet", "el", "mu" ...) for which TLorentzVectors will be created for each systematic variations. Default is empty list. | | use_rvec | bool | Whether vector-like variables defined in FastFrames should be made as ```ROOT::VecOps::RVec``` (if set ```True```), or using ```std::vector``` (default is ```False```, i.e. use ```std::vector```) | | reco_to_truth_pairing_indices | list of strings | List of branches which should be used to pair reco-level to truth-level trees. Default is ``` ["runNumber"."eventNumber"]```| @@ -38,6 +38,8 @@ The options are case sensitive. Example config files can be found in ```test/con | ntuple_auto_flush | int | Corresponding option from ```RDF::RSnapshotOptions``` used to produce ntuples. Default value is 0. | split_processing_per_unique_samples | bool | Flag that controls if RDataFrame call should be done for each UniqueSampleID or the whole Sample is processed in one go. The default is `False`, meaning the whole Sample will be processed in one go. Note that in case a given Sample has turth block configured, the processing for that sample will be done for each UniqueSampleID separately. | convert_vector_to_rvec | bool | Should std::vector branches be converted to ROOT's RVec during the ntupling step? Default is ```False``` | +| merge_underflow_overflow | str | Defines what to do with underflow and overflow bins, whether they should be merged to the first/last visible bins or not. Valid only for 1D and 2D histograms. The available options are: ```none```, ```underflow```, ```overflow```, ```both```. The default option is ```None```. The options are not case sensitive. The user an override the option for particular variable in the corresponding block. + ## `ntuples` block settings @@ -79,6 +81,7 @@ This block is optional. For a more detailed explanation on how to use this block | variables | list of dicts | List of variables defined for the region | | histograms_2d | list of dicts | List of 2D histograms between 2 reco-level variables to produce. The dict must have 2 keys: ```x``` and ```y``` for variables on x and y axes. ```numbering_sequence``` block is supported (see details bellow). | | histograms_3d | list of dicts | List of 3D histograms between 3 reco-level variables to produce. The dict must have 3 keys: ```x```, ```Y``` and ```z``` for variables on x, y and z axes. | +| profile | list of dicts | List of TProfile histograms between 2 reco-level variables to produce. The dict must have 2 keys: ```x``` and ```Y``` for variables on x and y (the one for the mean calculation) axes. ```numbering_sequence``` block is supported (see details below) | #### `variable` block inside of the `region` block | **Option** | **Value type** | **Function** | @@ -89,7 +92,8 @@ This block is optional. For a more detailed explanation on how to use this block | binning | dict | Binning of the variable | | is_nominal_only | bool | If set to true, only histogram for NOSYS will be produced. Default is ```False``` | | type | string | Allows to tell the code to define the c++ template arguments for the histograms. This prevents JITing thus saving some CPU time and memory. Allowed options are "char", "unsigned char", "bool", "int", "long long int","unsigned int", "unsigned long", "unsigned long long int", "float", "double". The vector version of all of these types (except for "vector\<bool>") are also supported - one example is "vector\<float>". ROOT::RVec<type> are also supported, see [here](https://root.cern/doc/master/classROOT_1_1VecOps_1_1RVec.html). If not provided the JITed version will be used. | -| numbering_sequence| list of dicts | It can be used to automatically add more variables in one block, if they differ by a single value (for example index). More information can be found bellow in ```numbering_sequence``` block description. +| numbering_sequence| list of dicts | It can be used to automatically add more variables in one block, if they differ by a single value (for example index). More information can be found bellow in ```numbering_sequence``` block description. | +| merge_underflow_overflow | str | Defines what to do with underflow and overflow bins, whether they should be merged to the first/last visible bins or not. The available options are: ```none```, ```underflow```, ```overflow```, ```both```. The default option is ```None```. The options are not case sensitive. Override the value from ```General``` block.| #### `binning` block inside of the `variable` block User has 2 options how to define the binning. Either specify bin edges for irregular binning, or specify number of bins and range of the x-axis for regular binning. diff --git a/docs/index.md b/docs/index.md index e484c43e9926a9524c75cf8e37300581482afd00..6e39c6bf2bbc97b7a7c4adf316aed8e795978082 100644 --- a/docs/index.md +++ b/docs/index.md @@ -98,6 +98,52 @@ asetup StatAnalysis,0.5.0 This will setup an appropriate version of ROOT (you can check the ROOT version with `root --version`) + + +#### Docker Image and Continuous Integration (CI) + +FastFrames provides pre-built Docker images to streamline the setup and usage of the framework. These images are based on the [ATLASOS](https://gitlab.cern.ch/atlas-sit/docker/) Docker images and rely on dependencies available through the CVMFS file system. As a result, the same conditions for running on lxplus-like machines apply when using these images. + +These Docker images are particularly useful for automated CI testing of downstream software, as they ensure a consistent and reproducible environment aligned with lxplus-like setups. + +##### Image Creation and Availability: + +- Main Branch Updates: Any update to the main branch triggers the creation of a new Docker image tagged as latest. This tag is ideal for testing ongoing development. +- Tagged Releases: Each tagged release in the repository results in a Docker image tagged with the corresponding version number (e.g., v1.2.3). These tags represent stable releases for production or specific use cases. + +##### Pulling the Docker Images + +To pull the latest image: + +```sh +docker pull gitlab.cern.ch:7999/atlas-amglab/fastframes:latest +``` + +To pull an image for a specific release (replace <version> with the desired tag): + +```sh +docker pull gitlab.cern.ch:7999/atlas-amglab/fastframes:<version> +``` + +##### Using the Docker Images + +These images are pre-configured to run FastFrames with dependencies set up via CVMFS. So, he host system must meet the following conditions for the images to work: + +- Access to the CVMFS file system. +- A configuration compatible with lxplus-like environments. + +Provided these conditions are met, you can easily use these images interactively for development and testing by mounting a working directory into the container (the > indicate that the commands are executed within the docker container): + +```sh +docker run -it -v /path/to/your/workdir gitlab.cern.ch:7999/atlas-amglab/fastframes:latest /bin/bash +> export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase # Sets up the ATLAS commands +> alias setupATLAS='source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh' # Creates usual setupATLAS alias +> setupATLAS +> asetup StatAnalysis,0.5.0 +``` + + + ## How to run the code: @@ -129,6 +175,12 @@ If one needs more detailed logging from this step, the following command line op --log_level <LOG LEVEL, FOR EXAMPLE "DEBUG"> ``` +The script will by default check ```reco```, ```truth``` and ```particleLevel``` trees (if some of them is missing, it's fine, it will be ignored). If you want to use any other list of trees, you can use the following argument to specify it. + +``` +--trees_to_check <comma separated list of trees> +``` + --- **TopCPToolkit < v2.12.0 users** @@ -261,7 +313,7 @@ In order to run only on given set of campaigns, you can use: #### Submitting jobs to the HTCondor/Slurm batch service -FastFrames provides a useful script to submit jobs to the [CERN HTCondor batch service](https://batchdocs.web.cern.ch/concepts/index.html) or a local Slurm batch system. After loging into an Lxplus node go the directory you have FastFrames under: +FastFrames provides a useful script to submit jobs to the [CERN HTCondor batch service](https://batchdocs.web.cern.ch/concepts/index.html) or a local Slurm batch system. After logging into an Lxplus node go the directory you have FastFrames under: ``` cd <your_fastframes_path> @@ -279,16 +331,17 @@ the following description will appear: ``` python3 batch_submit.py --help -usage: batch_submit.py [-h] [-c CONFIG] [--samples SAMPLES] [--step {h,n}] [--system {condor,slurm}] - [--custom-class-path CUSTOM_CLASS_PATH] [--flavour FLAVOUR] [--slurm_time SLURM_TIME] - [--memory MEMORY] [--dry-run] [--chicago] [--local-data] [--metadata-path METADATA_PATH] - [--remote-eos-access] [--kerberos-path KERBEROS_PATH] +usage: batch_submit.py [-h] [-c CONFIG] [--samples SAMPLES] [--split-n-jobs SPLIT_N_JOBS] [--step {h,n}] [--system {condor,slurm}] [--custom-class-path CUSTOM_CLASS_PATH] [--flavour FLAVOUR] + [--slurm_time SLURM_TIME] [--memory MEMORY] [--dry-run] [--chicago] [--local-data] [--metadata-path METADATA_PATH] [--remote-eos-access] [--kerberos-path KERBEROS_PATH] + [--timestamp] + options: -h, --help show this help message and exit -c CONFIG, --config CONFIG Path to the yml config file. - --samples SAMPLES A comma separated list of samples to run. One job is created per listed sample. Default: all - samples listed in the yml config. + --samples SAMPLES A comma separated list of samples to run. One job is created per listed sample. Default: all samples listed in the yml config. + --split-n-jobs SPLIT_N_JOBS + Number of jobs to split each sample into. Default: 1 (No splitting) --step {h,n} Step to run: 'n' (ntuples) or 'h' (histograms). Default: 'h' --system {condor,slurm} Batch system, either condor or slurm. Default: 'condor' @@ -297,18 +350,18 @@ options: --flavour FLAVOUR Job flavour which controls the max time of a job, CONDOR ONLY. Default: microcentury = 1h --slurm_time SLURM_TIME Max time job can run before it is killed. Default: 1:00:00 = 1h - --memory MEMORY The amount of RAM to be request in GB. This option is only valid if you are running in Chicago or - on Slurm. Memory in Lxplus scales as 2GB/CPU. Default: None - --dry-run Creates the execution and submission environment without sending the jobs to HTCondor. Useful for - debugging. + --memory MEMORY The amount of RAM to be request in GB. This option is only valid if you are running in Chicago or on Slurm. Memory in Lxplus scales as 2GB/CPU. Default: None + --dry-run Creates the execution and submission environment without sending the jobs to HTCondor. Useful for debugging. --chicago Use this flag if you are running the jobs in the Chicago Analysis Facility. - --local-data Use this flag if you want to copy the data to the scratch directory where jobs run before running - the jobs. + --local-data Use this flag if you want to copy the data to the scratch directory where jobs run before running the jobs. --metadata-path METADATA_PATH Path to directory containing the metadata of the input files. --remote-eos-access Use this flag to run FastFrames on a remote machine while accessing files stored in eos. --kerberos-path KERBEROS_PATH Add the path to your kerberos key on your remote machine e.g. '$HOME/krb5cc_12345' + --timestamp Add timestamp extension to batch submission folders to make unique. Also copies config and CustomClass (if specified) into unique batch submission folder. Allows for unique + config/class/build for a given batch submission. + ``` Let's run an example containing a custom fastframes class - more details about the custom class in the next section. Here the custom class is located two levels up from the submission directory, i.e., at the same level than the fastframes source code. ``` @@ -366,6 +419,8 @@ Total for dbaronmo: 8 jobs; 0 completed, 0 removed, 8 idle, 0 running, 0 held, 0 Total for all users: 22412 jobs; 2240 completed, 15 removed, 15542 idle, 4540 running, 75 held, 0 suspended ``` +Note that when running `batch_submit`, you have the option to use the `--timestamp` option. If used, this will create a unique `BatchSubmission` directory for your given submission, and places a copy of the `BatchConfig.yml` file being used for the submission, allowing you to submit multiple batches with the flexibility for them to use diffrent `BatchConfig.yml` files. A copy of the custom class build, if specified, is also copied into a unique `BatchSubmission` directory (but two directories back, in the directory containing `log`/`error`/`output` files for `HTCondor`), allowing the flexibility for each submission to use different versions of the Custom class build, if desired. + --- **NOTES (for HTCondor)** @@ -373,11 +428,11 @@ Total for all users: 22412 jobs; 2240 completed, 15 removed, 15542 idle, 4540 ru * Be careful with how CPU cores you request per job. The more resources you request, the later your jobs will start executing. * Make sure you compile the code to include new changes before submiting the jobs. * If you are working in the Chicago AF, you need to provide the appropriate `--chicago` flag to produce the correct configuration for this cluster. -* You can specify with the `--flavour` argument an upper bound for how long individual jobs can last (afterwhich they may be killed.) Default is "microcentury" which is 1h. You can see other flavours [here](https://batchdocs.web.cern.ch/local/submit.html#job-flavours). +* You can specify with the `--flavour` argument an upper bound for how long individual jobs can last (afterwhich they may be killed.) Default is "microcentury" which is 1h. You can see other flavours [here](https://batchdocs.web.cern.ch/local/submit.html#job-flavours). --- -If using a local Slurm batch system (not on Lxplus!), you just need to pass the `--system slurm` argumet to the `batch_submit.py` script. Note the slurms jobs are submitted via an array. If using slurm you need to use the `--slurm_time` argument to pass an upper limit for how long jobs can take instead of the `--flavour` argument. +If using a local Slurm batch system (not on Lxplus!), you just need to pass the `--system slurm` argumet to the `batch_submit.py` script. Note the slurms jobs are submitted via an array. If using slurm you need to use the `--slurm_time` argument to pass an upper limit for how long jobs can take instead of the `--flavour` argument. ## Adding custom class for custom Define() call diff --git a/docs/trex_config.md b/docs/trex_config.md index cff1e6f7c33b149477cdadbcfb9fb975775e523c..a1e2e7e60f2ece7b445215d5f3d9cecef7987fde 100644 --- a/docs/trex_config.md +++ b/docs/trex_config.md @@ -191,7 +191,7 @@ NormFactors: #### CustomBlocks block: In some cases it might be usefull to define some additional blocks in TRExFitter config, for example ghost samples. One can use ```CustomBlocks``` block to achieve this. -In order to add custom samples, just add ```Samples:``` under ```CustomBlocks``` in the trex-fitter settings file and then define list of custom samples there. Each sample must have ```name``` which will be +In order to add custom samples, just add ```Sample:``` under ```CustomBlocks``` in the trex-fitter settings file and then define list of custom samples there. Each sample must have ```name``` which will be used in TRExFitter config. The other options will just be copied to TRExFitter config, without checking if that option is implemented. It is up to the user to provide reasonable keys and values. #### Morphing block: diff --git a/docs/tutorial.md b/docs/tutorial.md index 626f027cc53a861d09bb87cb12746e4b5f8254d8..6e973a27e99ff6bca31102e903b1988fefb720fd 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -39,10 +39,10 @@ Now, clone the [repository](https://gitlab.cern.ch/atlas-amglab/fastframes) usin git clone ssh://git@gitlab.cern.ch:7999/atlas-amglab/fastframes.git FastFrames ``` -And switch to release v4.0.0 using: +And switch to release v4.2.0 using: ``` cd FastFrames -git checkout v4.0.0 +git checkout v4.2.0 cd ../ ``` @@ -435,7 +435,11 @@ general: default_reco_tree_name: "reco" # path to the file with cross-sections - xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + xsection_files: + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt"] + campaigns: ["mc20a", "mc20d", "mc20e"] + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + campaigns: ["mc23a", "mc23c", "mc23e"] # name of the custom class custom_frame_name: "TutorialClass" @@ -511,9 +515,13 @@ The above block sets the weights for normalisation. `default_sumweights` tell th ```yaml # path to the file with cross-sections - xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + xsection_files: + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt"] + campaigns: ["mc20a", "mc20d", "mc20e"] + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + campaigns: ["mc23a", "mc23c", "mc23e"] ``` -The above block tells the code where to look for the file that contains the cross-section for input MC samples. +The above block tells the code where to look for the file that contains the cross-section for input MC samples split for campaigns. These point to the standard PMG file, feel free to copy it an update locally! ```yaml @@ -821,6 +829,35 @@ The cutflow selection will follow the order of the selections in the config file You can provide multiple cutflows in the config file. Note that you need to create at least one "standard" histogram if you want to run the cutflows. +## UniqueSample-based decision in the custom class +Sometimes, it is important to define a variable (or decide to add it) based on the sample type as defined in the config file or also based on the UniqueSample (unique combination of DSID, campaign and simulation type) for a given defined sample. +All the relevant methods in the custom class take the pointer to the `Sample` object as an argument, so one can do decision based on the sample name/type e.g.: +```c++ +if (sample->isData()) { + ... +} +``` + +If one wants to do a decision based on UniqueSample, this is also possible. +However, there are two ways to do it depending on whether all the UniqueSamples are processed in one run (the default setting) or the processing proceeds per UniqueSample (done for samples where matching between reco and truth trees is request or when `split_processing_per_unique_samples` is set to `True`). +For the default setting, RDF `DefinePerSample` can be used to create a new variable that will automatically update when the processing changes the UniqueSample internally. +E.g. +```c++ +mainNode = mainNode.DefinePerSample("sample_dsid", [](unsigned int slot, const RSampleInfo& id){return id.GetI("unique_sample_id")}); +``` + +The arguments of the lambda function for `DefinePerSlot` are always the same. +The following metadata information is available: `unique_sample_id` , `unique_sample_campaign` and `unique_sample_simulation`. +`unique_sample_id` is an integerer (accessed with `GetI()`) while the other two are strings (accessed with GetS()). +The new column then can be used as any other column in the custom class/config. + +For the cases of per-UniqueSampleProcessing, the user needs to use the `const UniqueSample& id` object that is passed to the relevant methods in the custom class as an argument and do decision based on this object, i.e.: +```c++ +if (id.dsid() == XXX) { + ... +} +``` + ## Distributed computing RDataFrame supports multi-threading when processing the input files (both when the output is an ntuple or a set of histograms). The number of threads used is controlled by the `number_of_cpus` parameter. diff --git a/examples/README.md b/examples/README.md index 7a11550ef7c78b3c2770a139b4f217806e5895ee..b0be59aa77ba63122fd4a83d3e0ce8d1bf3e4e53 100644 --- a/examples/README.md +++ b/examples/README.md @@ -10,4 +10,5 @@ * Dilepton reconstruction (example of using ONNX): https://gitlab.cern.ch/dbiswas/SAJAEvaluator * Fake lepton estimate for Matrix Method (1L): https://gitlab.cern.ch/tdado/FakeLeptonFrame * ttHH analysis: https://gitlab.cern.ch/awierda/tthhclass - * HH->bbyy: https://gitlab.cern.ch/gdigrego/fastframes/-/tree/main/bbyyClass \ No newline at end of file + * HH->bbyy: https://gitlab.cern.ch/gdigrego/fastframes/-/tree/main/bbyyClass + * DV+X (SUSY): https://gitlab.cern.ch/atlas-phys-susy-wg/RPVLL/dvplusmu/dvxframe \ No newline at end of file diff --git a/python/ConfigReaderModules/BlockReaderGeneral.py b/python/ConfigReaderModules/BlockReaderGeneral.py index 683c8d0fcb82945e088a9234cb29827718ab3655..420be084ad83ea09326289da4a0688e248e74e2b 100644 --- a/python/ConfigReaderModules/BlockReaderGeneral.py +++ b/python/ConfigReaderModules/BlockReaderGeneral.py @@ -9,6 +9,7 @@ from ConfigReaderCpp import ConfigSettingWrapper, RegionWrapper, SampleWrapper, from ConfigReaderCpp import StringVector, ptrVector from python_wrapper.python.logger import Logger from BlockOptionsGetter import BlockOptionsGetter +from BlockReaderVariable import BlockReaderVariable from CommandLineOptions import CommandLineOptions def vector_to_list(cpp_vector) -> list: @@ -48,7 +49,7 @@ class BlockReaderGeneral: self._create_tlorentz_vectors_for = self._options_getter.get("create_tlorentz_vectors_for", [], [list], [str]) self._use_rvec = self._options_getter.get("use_rvec", False, [bool]) self._number_of_cpus = self._options_getter.get("number_of_cpus", 1, [int]) - self._xsection_files = self._options_getter.get("xsection_files", ["data/XSection-MC16-13TeV.data"], [list], [str]) + self._xsection_files = self._options_getter.get("xsection_files", None, [list], [dict]) self._luminosity_map = {} self._set_luminosity_map(self._options_getter.get("luminosity", None, [dict])) self._min_event = self._options_getter.get("min_event", None, [int]) @@ -95,6 +96,10 @@ class BlockReaderGeneral: # Custom defininitions from config come after custom class definitions self.config_define_after_custom_class = self._options_getter.get("config_define_after_custom_class", False, [bool]) + # Merge underflow and overflow + self.merge_underflow_overflow = self._options_getter.get("merge_underflow_overflow", "none", [str]) + BlockReaderVariable.check_underflow_overflow_option(self.merge_underflow_overflow) + self._set_custom_options() self._set_job_index_and_split_n_jobs() self._set_config_reader_cpp() @@ -139,8 +144,28 @@ class BlockReaderGeneral: for campaign, lumi_value in self._luminosity_map.items(): self.cpp_class.setLuminosity(campaign, lumi_value, True) - for xsection_file in self._xsection_files: - self.cpp_class.addXsectionFile(xsection_file) + for xsection_files_and_campaigns in self._xsection_files: + options_getter = BlockOptionsGetter(xsection_files_and_campaigns) + xsection_files = options_getter.get("files", None, [list], [str]) + campaigns = options_getter.get("campaigns", None, [list], [str]) + unused = options_getter.get_unused_options() + if len(unused) > 0: + Logger.log_message("ERROR", "Key {} used xsection_files block is not supported!".format(unused)) + exit(1) + + if xsection_files is None or campaigns is None: + Logger.log_message("ERROR", "xsection_files and campaigns have to be defined in xsection_files block") + exit(1) + + xsection_files_vector = StringVector() + for xsection_file in xsection_files: + xsection_files_vector.append(xsection_file) + + campaigns_vector = StringVector() + for campaign in campaigns: + campaigns_vector.append(campaign) + + self.cpp_class.addXsectionFiles(xsection_files_vector, campaigns_vector) for tlorentz_vector in self._create_tlorentz_vectors_for: self.cpp_class.addTLorentzVector(tlorentz_vector) @@ -200,7 +225,7 @@ class BlockReaderGeneral: @return list of cross section files """ result = [] - vector_xsection_files = self.cpp_class.xSectionFiles() + vector_xsection_files = self.cpp_class.xSectionFilesPreview() for xsection_file in vector_xsection_files: result.append(xsection_file) return result diff --git a/python/ConfigReaderModules/BlockReaderNtuple.py b/python/ConfigReaderModules/BlockReaderNtuple.py index 0b90111a102a7dbcdf3b9f52959d7c0ab8b641c4..99c7eba4dd232e7ee7a235e718595c9be60fc458 100644 --- a/python/ConfigReaderModules/BlockReaderNtuple.py +++ b/python/ConfigReaderModules/BlockReaderNtuple.py @@ -6,7 +6,6 @@ set_paths() from ConfigReaderCpp import NtupleWrapper -from BlockReaderVariable import BlockReaderVariable from BlockReaderGeneral import BlockReaderGeneral from BlockOptionsGetter import BlockOptionsGetter from python_wrapper.python.logger import Logger @@ -111,3 +110,19 @@ class BlockReaderNtuple: """ vector_trees = self.cpp_class.copyTrees() return [tree for tree in vector_trees] + + def check_trees_to_copy(self, all_samples : list) -> None: + """!Chech if reco-level tree is not copied in any of the samples + """ + copy_trees = self.get_copy_trees() + n_samples = self.cpp_class.nSamples() + selected_samples = [self.cpp_class.sampleName(i_sample) for i_sample in range(n_samples)] + + for sample_object in all_samples: + sample_name = sample_object.name() + if sample_name not in selected_samples: + continue + reco_tree = sample_object.recoTreeName() + if reco_tree in copy_trees: + Logger.log_message("ERROR", f"Tree {reco_tree} was selected to copy, but it is reco-tree in sample {sample_name}!") + exit(1) \ No newline at end of file diff --git a/python/ConfigReaderModules/BlockReaderRegion.py b/python/ConfigReaderModules/BlockReaderRegion.py index 2b5a9163f2fb503cd681fe1f530f61df786aeddb..aafa96d5339a0be72c047d966f2ebc627464dff4 100644 --- a/python/ConfigReaderModules/BlockReaderRegion.py +++ b/python/ConfigReaderModules/BlockReaderRegion.py @@ -68,7 +68,7 @@ class BlockReaderRegion: # Add the variables to the region for variable_dict in new_container: - variable = BlockReaderVariable(variable_dict) + variable = BlockReaderVariable(variable_dict, block_reader_general) self._variables.append(variable) # 2D nad 3D histograms @@ -78,6 +78,9 @@ class BlockReaderRegion: self._histograms_3d = self._options_getter.get("histograms_3d", [], [list], [dict]) self._histograms_3d = AutomaticRangeGenerator.unroll_sequence(self._histograms_3d) + self._tprofiles = self._options_getter.get("profile", [], [list], [dict]) + self._tprofiles = AutomaticRangeGenerator.unroll_sequence(self._tprofiles) + ## Instance of the RegionWrapper C++ class -> wrapper around C++ Region class self.cpp_class = RegionWrapper(self._name) self._set_config_reader_cpp() @@ -126,9 +129,28 @@ class BlockReaderRegion: if len(unused) > 0: Logger.log_message("ERROR", "Key {} used in 'histograms_3d' block is not supported!".format(unused)) exit(1) - self.cpp_class.addVariableCombination3D(x, y, z) + for tprofile in self._tprofiles: + options_getter = BlockOptionsGetter(tprofile) + x = options_getter.get("x", None, [str]) + y = options_getter.get("y", None, [str]) + unused = options_getter.get_unused_options() + if len(unused) > 0: + Logger.log_message("ERROR", f"Keys {unused} used in 'profile' block are not supported!") + exit(1) + if x is None or y is None: + Logger.log_message("ERROR", f"profile in region {self._name} does not have x or y specified".format()) + exit(1) + if x not in variables_names: + Logger.log_message("ERROR", f"profile in region {self._name} has x variable {x} which is not defined") + exit(1) + if y not in variables_names: + Logger.log_message("ERROR", f"profile in region {self._name} has y variable {y} which is not defined") + exit(1) + self.cpp_class.addVariableForProfile(x, y) + + def __merge_settings(self, block_reader_general) -> list: if block_reader_general is None: return @@ -152,20 +174,11 @@ class BlockReaderRegion: result.append(variable_cpp_object) return result - def get_2d_combinations(vector_combinations) -> list: + def vector_to_list(input_vector) -> list: """!Get list of 2D variable combinations defined in the region @param: vector of variable combinations """ result = [] - for combination in vector_combinations: - result.append(combination) - return result - - def get_3d_combinations(vector_combinations_3d) -> list: - """!Get list of 3D variable combinations defined in the region - @param: vector of variable combinations - """ - result = [] - for combination in vector_combinations_3d: + for combination in input_vector: result.append(combination) return result diff --git a/python/ConfigReaderModules/BlockReaderSample.py b/python/ConfigReaderModules/BlockReaderSample.py index f48c48cdf400db37b7b283c00feae5f3180cd5a0..374206f204d00cd62c526ec28a914ffead85061f 100644 --- a/python/ConfigReaderModules/BlockReaderSample.py +++ b/python/ConfigReaderModules/BlockReaderSample.py @@ -57,6 +57,7 @@ class BlockReaderSample: @param block_reader_general: BlockReaderGeneral object with general options from the config file - this is there to get default values """ self._options_getter = BlockOptionsGetter(input_dict) + self._block_reader_general = block_reader_general self._name = self._options_getter.get("name", None, [str]) if self._name is None: @@ -118,7 +119,7 @@ class BlockReaderSample: if self._truth_dicts is not None: reco_variables_from_regions = block_reader_general.cpp_class.getVariableNames() for truth_dict in self._truth_dicts: - truth_object = BlockReaderSampleTruth(truth_dict) + truth_object = BlockReaderSampleTruth(truth_dict, self._block_reader_general) self._truths.append(truth_object) truth_object.check_reco_variables_existence(reco_variables_from_regions) self.cpp_class.addTruth(truth_object.cpp_class.getPtr()) diff --git a/python/ConfigReaderModules/BlockReaderSampleTruth.py b/python/ConfigReaderModules/BlockReaderSampleTruth.py index 3914659b0a536b8bfbd23c7fcfe313ac7e0bdb19..836f13d4250758525f04ba80668a49fd500effba 100644 --- a/python/ConfigReaderModules/BlockReaderSampleTruth.py +++ b/python/ConfigReaderModules/BlockReaderSampleTruth.py @@ -18,11 +18,12 @@ class BlockReaderSampleTruth: """!Class for reading truth block of sample block in the config, equivalent of C++ class Truth """ - def __init__(self, input_dict : dict): + def __init__(self, input_dict : dict, block_reader_general : BlockReaderGeneral): """!Constructor of the BlockReaderSampleTruth class. @param input_dict: dictionary with options from the config file """ self._options_getter = BlockOptionsGetter(input_dict) + self._block_reader_general = block_reader_general self._name = self._options_getter.get("name", None, [str]) if self._name is None: @@ -91,7 +92,7 @@ class BlockReaderSampleTruth: def _read_variables(self) -> None: for variable_dict in self._variables: - variable = BlockReaderVariable(variable_dict) + variable = BlockReaderVariable(variable_dict, self._block_reader_general) self.cpp_class.addVariable(variable.cpp_class.getPtr()) def _read_match_variables(self) -> None: diff --git a/python/ConfigReaderModules/BlockReaderVariable.py b/python/ConfigReaderModules/BlockReaderVariable.py index 943272333503e6f2417cf4e974f82f1da6bae4c6..e343cfe495a16e26554689f977d7169b95fbd1c5 100644 --- a/python/ConfigReaderModules/BlockReaderVariable.py +++ b/python/ConfigReaderModules/BlockReaderVariable.py @@ -13,7 +13,7 @@ from AutomaticRangeGenerator import AutomaticRangeGenerator class BlockReaderVariable: """!Class for reading variable block from config file, equivalent of C++ class Variable """ - def __init__(self, variable_dict : dict): + def __init__(self, variable_dict : dict, block_reader_general): """!Constructor of the BlockReaderVariable class @param variable_dict: dictionary with options from the config file """ @@ -34,6 +34,9 @@ class BlockReaderVariable: Logger.log_message("ERROR", "Variable name cannot contain 'NOSYS' string. It was found in the following variable: " + self._name) exit(1) + self._merge_underflow_overflow = self._options_getter.get("merge_underflow_overflow", block_reader_general.merge_underflow_overflow, [str]) + BlockReaderVariable.check_underflow_overflow_option(self._merge_underflow_overflow) + ## Instance of the VariableWrapper C++ class -> wrapper around C++ Variable class self.cpp_class = VariableWrapper(self._name) self._set_cpp_class() @@ -45,6 +48,7 @@ class BlockReaderVariable: self.cpp_class.setTitle(self._title) self.cpp_class.setIsNominalOnly(self._is_nominal_only) self.cpp_class.setType(self._variable_type) + self.cpp_class.setUnderOverFlowType(self._merge_underflow_overflow) def _read_binning(self, binning_dict : dict): if binning_dict is None: @@ -95,3 +99,9 @@ class BlockReaderVariable: @return list[dict] """ return AutomaticRangeGenerator.unroll_sequence(variables_dicts) + + def check_underflow_overflow_option(value_from_config : str) -> None: + allowed_options = ["none", "underflow", "overflow", "both"] + if value_from_config.lower() not in allowed_options: + Logger.log_message("ERROR", f"Invalid value for underflow_overflow option: '{value_from_config}'. Allowed values are: " + str(allowed_options)) + exit(1) diff --git a/python/ConfigReaderModules/ConfigReader.py b/python/ConfigReaderModules/ConfigReader.py index 8ba9f87af5104c970a5b3a16445359f45b96b269..c556e93c649ea264a327b9d6aa661710f1b5f3ac 100644 --- a/python/ConfigReaderModules/ConfigReader.py +++ b/python/ConfigReaderModules/ConfigReader.py @@ -164,6 +164,8 @@ class ConfigReader: Logger.log_message("ERROR", "Unused blocks: {}".format(unused_blocks)) exit(1) + self.block_ntuple.check_trees_to_copy(self.block_general.get_samples_objects()) + if __name__ == "__main__": config_path = CommandLineOptions().get_config_path() @@ -258,6 +260,7 @@ if __name__ == "__main__": print("\t\ttype: ", variable_cpp_object.type()) print("\t\tdefinition: ", variable_cpp_object.definition()) print("\t\tis_nominal_only: ", variable_cpp_object.isNominalOnly()) + print("\t\tmerge_underflow_overflow: ", variable_cpp_object.underOverFlowType()) if variable_cpp_object.hasRegularBinning(): print( "\t\tbinning: ", variable_cpp_object.axisNbins(), ", ", @@ -266,20 +269,27 @@ if __name__ == "__main__": else: print("\t\tbinning: ", variable_cpp_object.binEdgesString()) print("\n") - variable_combinations = BlockReaderRegion.get_2d_combinations(region.variableCombinations()) + variable_combinations = BlockReaderRegion.vector_to_list(region.variableCombinations()) if len(variable_combinations) > 0: print("\t2d combinations:") for variable_combination in variable_combinations: print("\t\t", variable_combination) print("\n") - variable_combinations_3d = BlockReaderRegion.get_3d_combinations(region.variableCombinations3D()) + variable_combinations_3d = BlockReaderRegion.vector_to_list(region.variableCombinations3D()) if len(variable_combinations_3d) > 0: print("\t3d combinations:") for variable_combination in variable_combinations_3d: print("\t\t", variable_combination) print("\n") + tprofiles = BlockReaderRegion.vector_to_list(region.variablesForProfile()) + if len(tprofiles) > 0: + print("\tProfiles:") + for tprofile in tprofiles: + print("\t\t", tprofile) + print("\n") + print("\n\nSamples block:\n") samples = config_reader.block_general.get_samples_objects() for sample in samples: @@ -330,6 +340,7 @@ if __name__ == "__main__": print("\t\t\tname: ", variable.name()) print("\t\t\ttitle: ", variable.title()) print("\t\t\ttype: ", variable.type()) + print("\t\t\tmerge_underflow_overflow: ", variable.underOverFlowType()) print("\t\t\tdefinition: ", variable.definition()) if variable.hasRegularBinning(): print( "\t\t\tbinning: ", diff --git a/python/TRExFitterConfigPreparation/TrexSettingsGetter.py b/python/TRExFitterConfigPreparation/TrexSettingsGetter.py index cff8f64bd6874f3cfe9c6ac4c533dac5110d384c..8922b5a0f47079d064046ba99ed26961d9cd69de 100644 --- a/python/TRExFitterConfigPreparation/TrexSettingsGetter.py +++ b/python/TRExFitterConfigPreparation/TrexSettingsGetter.py @@ -60,11 +60,6 @@ def custom_sort_ghost(item): class TrexSettingsGetter: def __init__(self, fast_frames_config_address : str, trex_settings_yaml : str = "", unfolding_tuple : tuple[str,str,str,str] = None, regions : list[str] = None): - config_reader = ConfigReader(fast_frames_config_address) - if config_reader.block_general.cpp_class.useRegionSubfolders(): - Logger.log_message("ERROR", "You have set up 'use_region_subfolders: True' in your config, this is not supported by trex-fitter") - exit(1) - self.trex_settings_dict = None if trex_settings_yaml: with open(trex_settings_yaml, "r") as f: @@ -99,8 +94,13 @@ class TrexSettingsGetter: self.unfolding_variable_reco = "" self.run_unfolding = False + config_reader = ConfigReader(fast_frames_config_address) + if config_reader.block_general.cpp_class.useRegionSubfolders(): + Logger.log_message("ERROR", "You have set up 'use_region_subfolders: True' in your config, this is not supported by trex-fitter") + exit(1) + # TODO: clean this up - # Set the default to be reading from histograms, but it can be changed by the ReadFrom flag in the Job block + # Set the default to be reading from histograms, but it can be changed by the ReadFrom flag in the Job block self._ntuple_path = config_reader.block_general.cpp_class.outputPathNtuples() if self._ntuple_path == "": self._ntuple_path = "." @@ -114,7 +114,7 @@ class TrexSettingsGetter: self._files_path = os.path.abspath(self._ntuple_path) else: self._files_path = os.path.abspath(self._histo_path) - + self._set_unfolding_settings(unfolding_tuple) if regions != None: self._region_variable_regexes = deepcopy(regions) @@ -146,7 +146,7 @@ class TrexSettingsGetter: if len(self._systematics_blocks) != 0 and self._readfrom == "NTUP": # Reading from ntuples doesn't work with systematics yet! Logger.log_message("ERROR", "Cannot run systematics when reading from ntuple!") exit(1) - + self._total_lumi = BlockReaderSample.get_total_luminosity(config_reader.block_general.cpp_class, config_reader.block_general.get_samples_objects()) @@ -216,18 +216,13 @@ class TrexSettingsGetter: if re.match(region_dict["name"],region_name): this_trex_region_dict = deepcopy(region_dict) del this_trex_region_dict["name"] - dictionary["Type"] = this_trex_region_dict.get("Type","SIGNAL") - if "LogScale" in this_trex_region_dict: - dictionary["LogScale"] = this_trex_region_dict["LogScale"] - if "RatioYmax" in this_trex_region_dict: - dictionary["RatioYmax"] = this_trex_region_dict["RatioYmax"] - if "RatioYmin" in this_trex_region_dict: - dictionary["RatioYmin"] = this_trex_region_dict["RatioYmin"] + dictionary["Type"] = "SIGNAL" # will be overriden later if defined in trex-settings config dictionary["VariableTitle"] = dictionary.get("VariableTitle", variable_name) dictionary["HistoName"] = "NOSYS/" + variable_name + "_" + region.name() dictionary["Label"] = this_region_dict.get("Label", region.name()) dictionary["ShortLabel"] = this_region_dict.get("ShortLabel", region.name()) + if self.run_unfolding: dictionary["NumberOfRecoBins"] = variable.axisNbins() dictionary["AcceptanceNameSuff"] = "_" + region.name() @@ -238,7 +233,10 @@ class TrexSettingsGetter: if key not in dictionary: dictionary[key] = this_region_dict[key] + keywords = ["name"] for key in this_trex_region_dict: + if key in keywords: + continue dictionary[key] = this_trex_region_dict[key] return "Region", region_name, dictionary @@ -251,7 +249,7 @@ class TrexSettingsGetter: def get_samples_blocks(self) -> list[tuple[str,str,dict]]: # re-order list such that GHOST samples are written out first - all_samples = self._inclusive_samples_blocks + self.get_custom_blocks("Samples") + all_samples = self._inclusive_samples_blocks + self.get_custom_blocks("Sample") ordered_samples = sorted(all_samples, key=custom_sort_ghost) return ordered_samples @@ -326,7 +324,7 @@ class TrexSettingsGetter: unfolding_sample_dict["AcceptanceName"] = "NOSYS/acceptance_" + level + "_" + self.unfolding_variable_reco unfolding_sample_dict["SelectionEffName"] = "NOSYS/selection_eff_" + level + "_" + truth_variable.name() - unfolding_sample_dict["MigrationName"] = "NOSYS/" + self.unfolding_variable_reco + "_vs_" + level + "_" + truth_variable.name() + unfolding_sample_dict["MigrationName"] = "NOSYS/" + level + "_" + truth_variable.name() + "_vs_" + self.unfolding_variable_reco self._unfolding_samples_blocks.append(("UnfoldingSample", sample.name(), unfolding_sample_dict)) unfolding_samples_cpp_objects.append(sample.name()) @@ -376,7 +374,7 @@ class TrexSettingsGetter: for r in unique_list: r = r.strip("()") r_elements = r.split(",") - if is_data: + if is_data: files_string += "\""+r_elements[2]+"_"+r_elements[1]+"\"," else: files_string += "\""+sample.name()+"_"+r_elements[0]+"_"+r_elements[1]+"_"+r_elements[2]+"\"," @@ -390,16 +388,12 @@ class TrexSettingsGetter: sample_color = sample_setting_dict.get("Color", self.get_sample_color()) dictionary["FillColor"] = sample_setting_dict.get("FillColor", sample_color) dictionary["LineColor"] = sample_setting_dict.get("LineColor", sample_color) - if "Template" in sample_setting_dict: - dictionary["Template"] = sample_setting_dict["Template"] - if "NormToSample" in sample_setting_dict: - dictionary["NormToSample"] = sample_setting_dict["NormToSample"] - if "AddSample" in sample_setting_dict: - dictionary["AddSample"] = sample_setting_dict["AddSample"] - if "SubtractSample" in sample_setting_dict: - dictionary["SubtractSample"] = sample_setting_dict["SubtractSample"] - if "Morphing" in sample_setting_dict: - dictionary["Morphing"] = sample_setting_dict["Morphing"] + + keywords = ["Color", "name"] + for key in sample_setting_dict: + if key in keywords: + continue + dictionary[key] = sample_setting_dict[key] region_names = vector_to_list(sample.regionsNames()) selected_regions = [] @@ -735,10 +729,13 @@ class TrexSettingsGetter: result["BinnedLikelihoodOptimization"] = fit_dict_settings.get("BinnedLikelihoodOptimization", "TRUE") else: result["POIAsimov"] = fit_dict_settings.get("POIAsimov",1) + result["FitRegion"] = fit_dict_settings.get("FitRegion","CRSR") result["FitBlind"] = fit_dict_settings.get("FitBlind", "True") - if "UseMinos" in fit_dict_settings: - result["UseMinos"] = fit_dict_settings["UseMinos"] + + for key in fit_dict_settings: + result[key] = fit_dict_settings[key] + return "Fit", "fit", result def get_morphing_block(self) -> tuple[str,str,dict]: @@ -758,7 +755,7 @@ class TrexSettingsGetter: job_name = job_dict_settings.get("Name","my_fit") if self._readfrom == "NTUP": dictionary["NtuplePath"] = job_dict_settings.get("NtuplePath",self._ntuple_path) - else: + else: dictionary["HistoPath"] = job_dict_settings.get("HistoPath",self._histo_path) if self.run_unfolding: dictionary["AcceptancePath"] = self._files_path @@ -975,4 +972,4 @@ class TrexSettingsGetter: if unfolding_dict: result_unfolding = ("UnfoldingSystematic", systematics_name, unfolding_dict) - return (result_inclusive, result_unfolding) \ No newline at end of file + return (result_inclusive, result_unfolding) diff --git a/python/batch_submit.py b/python/batch_submit.py index 44b73719009d08dd07f29b7124642b44a4409b1b..f4c33f44896c2c4dea42f63e57365be62aec3f32 100644 --- a/python/batch_submit.py +++ b/python/batch_submit.py @@ -1,6 +1,7 @@ import argparse import os import yaml +import datetime # Define the colors for the output class bcolors: @@ -28,6 +29,7 @@ def createParser(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", help="Path to the yml config file.") parser.add_argument("--samples", help="A comma separated list of samples to run. One job is created per listed sample. Default: all samples listed in the yml config.",default="all") + parser.add_argument("--split-n-jobs", help="Number of jobs to split each sample into. Default: 1 (No splitting)", default=1, type=int) parser.add_argument("--step", help="Step to run: 'n' (ntuples) or 'h' (histograms). Default: 'h'", choices=["h","n"], default="h") parser.add_argument("--system", help="Batch system, either condor or slurm. Default: 'condor'", choices=["condor","slurm"], default="condor") parser.add_argument("--custom-class-path", help= "Path to the custom class used in the config file (if used). Default: None", default=None) @@ -40,6 +42,7 @@ def createParser(): parser.add_argument("--metadata-path",help="Path to directory containing the metadata of the input files.") parser.add_argument("--remote-eos-access",help="Use this flag to run FastFrames on a remote machine while accessing files stored in eos.", action="store_true") parser.add_argument("--kerberos-path", help="Add the path to your kerberos key on your remote machine e.g. '$HOME/krb5cc_12345'") + parser.add_argument("--timestamp", help="Add timestamp extension to batch submission folders to make unique. Also copies config and CustomClass (if specified) into unique batch submission folder. Allows for unique config/class/build for a given batch submission.", action="store_true") return parser def checkParser(cmdLineArguments): @@ -56,6 +59,14 @@ def checkParser(cmdLineArguments): if cmdLineArguments.memory.replace("GB","").isdigit() == False or cmdLineArguments.memory.replace("GB","")=="0": print(ERROR("Please provide a valid number for the requested memory.")) exit(1) + if cmdLineArguments.memory is None and cmdLineArguments.chicago: + print(WARNING("The amount of memory is not being set. The default memory in Chicago is 2GB and this can be too low, causing your jobs to go on hold status.")) + + # Check that job splitting makes sense + if cmdLineArguments.split_n_jobs != 1: + if cmdLineArguments.split_n_jobs < 1: + print(ERROR("The number of jobs to split the samples into must be greater than 1.")) + exit(1) # The params dictionary contains the parameters that are calculated given the user input. jobParamatersDict = { @@ -71,15 +82,15 @@ def getFFPath(): twoLevelsUp = os.path.abspath(os.path.join(submissionPath, os.pardir, os.pardir)) return twoLevelsUp -def createSubmissionFile(paramsDictionary): - executableCMD = "executable = runFF.sh\n" +def createSubmissionFile(paramsDictionary, batchNameExtension): + executableCMD = f"executable = BatchSubmission{batchNameExtension}/runFF.sh\n" argumentsCMD = "arguments = $(ClusterId)$(ProcId)\n" - outputCMD = "output = output/runFF.$(ClusterId).$(ProcId).out\n" - errorCMD = "error = error/runFF.$(ClusterId).$(ProcId).err\n" - logCMD = "log = log/runFF.$(ClusterId).log\n" + outputCMD = f"output = BatchSubmission{batchNameExtension}/output/runFF.$(ClusterId).$(ProcId).out\n" + errorCMD = f"error = BatchSubmission{batchNameExtension}/error/runFF.$(ClusterId).$(ProcId).err\n" + logCMD = f"log = BatchSubmission{batchNameExtension}/log/runFF.$(ClusterId).log\n" getenvCMD = "getenv = True\n" preserveRelativePathsCMD = "preserve_relative_paths = True\n" - with open("condor_submit.sub","w") as f: + with open(f"BatchSubmission{batchNameExtension}/condor_submit.sub","w") as f: f.write(executableCMD) f.write(argumentsCMD) f.write(outputCMD) @@ -90,57 +101,70 @@ def createSubmissionFile(paramsDictionary): for key,value in paramsDictionary.items(): f.write(key + " = " + str(value) + "\n") f.write("\n") - f.write("queue arguments from inputSamples.txt\n") + f.write(f"queue arguments from BatchSubmission{batchNameExtension}/inputSamples.txt\n") -def createSlurmArraySubmissionFile(params, num_samples): +def createSlurmArraySubmissionFile(params, num_samples, split_n_jobs, batchNameExtension): """Creates a Slurm submission script for a job array.""" with open("slurm_submit_array.sh", "w") as f: f.write("#!/bin/bash\n") f.write("#SBATCH --job-name=FastFramesArray\n") - f.write("#SBATCH --output=output/runFF.%A_%a.out\n") - f.write("#SBATCH --error=error/runFF.%A_%a.err\n") + f.write(f"#SBATCH --output=BatchSubmission{batchNameExtension}/output/runFF.%A_%a.out\n") + f.write(f"#SBATCH --error=BatchSubmission{batchNameExtension}/error/runFF.%A_%a.err\n") f.write(f"#SBATCH --time={params['time']}\n") f.write(f"#SBATCH --cpus-per-task={params['cpus']}\n") if params['memory'] is not None: f.write(f"#SBATCH --mem={params['memory']}\n") - f.write(f"#SBATCH --array=0-{num_samples - 1}\n") # Array indices + n_totaljobs = num_samples * split_n_jobs + f.write(f"#SBATCH --array=0-{n_totaljobs - 1}\n") # Array indices f.write("\n") - f.write("module load python/3.8.0\n") # Load necessary modules - f.write("module load other/dependencies\n") - f.write("shopt -s expand_aliases\n") - f.write("alias setupATLAS='source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh'\n") - f.write("setupATLAS\n") - f.write("asetup StatAnalysis,0.4.0\n") - f.write("source build/setup.sh\n") - f.write("source CustomClassForBatch/build/setup.sh\n") - f.write("\n") - f.write("# Get the sample name from inputSamples.txt using the SLURM array index\n") - f.write("sample=$(sed -n \"$((SLURM_ARRAY_TASK_ID + 1))p\" inputSamples.txt)\n") - f.write("echo \"Processing sample: $sample\"\n") - f.write("srun ./runFF.sh $sample\n") # Pass the sample name to runFF.sh -def createExecutable(configYMLPath,step,copyDataToScratch,metadataPath): + f.write(f"# Get the corresponding line from BatchSubmission{batchNameExtension}/inputSamples.txt\n") + f.write(f"line=$(sed -n \"$((SLURM_ARRAY_TASK_ID + 1))p\" BatchSubmission{batchNameExtension}/inputSamples.txt)\n") + + f.write("# Parse the line to extract details\n") + f.write("sample=$(echo $line | awk '{print $1}') # Extract sample name\n") + if split_n_jobs > 1: + f.write("total_jobs=$(echo $line | awk '{print $2}') # Extract total jobs\n") + f.write("job_index=$(echo $line | awk '{print $3}') # Extract job index\n") + + f.write("\n") + f.write("# Run script with appropriate arguments\n") + if split_n_jobs > 1: + f.write("echo \"Processing sample: $sample, total jobs: $total_jobs, job index: $job_index\"\n") + f.write(f"srun ./BatchSubmission{batchNameExtension}/runFF.sh $sample $total_jobs $job_index\n") + else: + f.write("echo \"Processing sample: $sample\"\n") + f.write(f"srun ./BatchSubmission{batchNameExtension}/runFF.sh $sample\n") + +def createExecutable(configYMLPath, step, copyDataToScratch, metadataPath, split_n_jobs, system, batchNameExtension): # Copy the config file to the submission directory - os.system('cp ' +configYMLPath+ ' BatchConfig.yml') - with open("runFF.sh","w") as f: + os.system(f"cp {configYMLPath} BatchSubmission{batchNameExtension}/BatchConfig.yml") + with open(f"BatchSubmission{batchNameExtension}/runFF.sh","w") as f: f.write("#!/bin/bash\n") if(commandLineArguments.remote_eos_access): f.write("export KRB5CCNAME="+commandLineArguments.kerberos_path+"\n") - f.write("shopt -s expand_aliases\n") # Enable aliases in the remote machine - f.write("alias setupATLAS='source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh'\n") # Set up the ATLAS environment - f.write("setupATLAS\n") - f.write("asetup StatAnalysis,0.4.2\n") - f.write("source build/setup.sh\n") # Load the FastFrames environment - f.write("source CustomClassForBatch/build/setup.sh\n") # Load the custom class environment - if copyDataToScratch: + if system == 'condor': + f.write("shopt -s expand_aliases\n") # Enable aliases in the remote machine + f.write("alias setupATLAS='source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh'\n") # Set up the ATLAS environment + f.write("setupATLAS\n") + f.write("asetup StatAnalysis,0.5.0\n") + f.write("source build/setup.sh\n") # Load the FastFrames environment + f.write(f"source BatchSubmission{batchNameExtension}/CustomClassForBatch/build/setup.sh\n") # Load the custom class environment + if copyDataToScratch and system == 'condor': if metadataPath is None: print(ERROR("ERROR: Please provide the path to the permanet data directory that will be used to send data from.")) exit(1) f.write("python3 fastframes/python/copyDataToScratch.py $_CONDOR_SCRATCH_DIR $1 "+metadataPath+"\n") # Copy the data to the scratch directory if requested - f.write("cd fastframes/python\n") - f.write("python3 FastFrames.py " + "--config BatchConfig.yml --step " +step+ " --samples $1\n") # Run + if system == "condor": + f.write("cd fastframes/python\n") + if split_n_jobs == 1: # Run + f.write(f"python3 FastFrames.py --config BatchSubmission{batchNameExtension}/BatchConfig.yml --step {step} --samples $1\n") + + else: + f.write(f"python3 FastFrames.py --config BatchSubmission{batchNameExtension}/BatchConfig.yml --step {step} --samples $1 --split_n_jobs $2 --job_index $3\n") + # Give the file executable rights. - os.system("chmod +x runFF.sh") + os.system(f"chmod +x BatchSubmission{batchNameExtension}/runFF.sh") def loadYMLConfig(pathToConfig): with open(pathToConfig, 'r') as stream: @@ -149,35 +173,29 @@ def loadYMLConfig(pathToConfig): except yaml.YAMLError as exc: print(exc) -def copyCustomClassFilesForSubmission(customClassPath): +def copyCustomClassFilesForSubmission(customClassPath, batchNameExtension): # Do nothing if no custom class is used if customClassPath is None: return - # First, remove any previous custom class used for condor jobs. - try: - os.remove('../../CustomClassForBatch/') - except OSError: - pass - # Then, copy the custom class files to the submission directory try: - os.system('rsync -r --exclude .git '+customClassPath+'/*'+' '+'../../CustomClassForBatch') # Avoid copying the .git directory if present + os.system(f"rsync -r --exclude .git {customClassPath}/* ../../BatchSubmission{batchNameExtension}/CustomClassForBatch/") #../../CustomClassForBatch") except FileNotFoundError: print("Error copying the custom class files to the submission directory.") print(customClassPath,' path to custom class not found.') exit() -def setupJobParamsDict(generalBlock,cmdLineArguments): +def setupJobParamsDict(generalBlock,cmdLineArguments, batchNameExtension): # Set to the FastFrames path jobParamatersDict["initialdir"] = getFFPath() # Set number of cpus as requested in the config file jobParamatersDict["RequestCpus"] = generalBlock["number_of_cpus"] # Set up the wall-time - jobParamatersDict["+JobFlavour"] = cmdLineArguments.flavour + jobParamatersDict["+JobFlavour"] = f"\"{cmdLineArguments.flavour}\"" # Add the custom class path to the transfer_input_files if cmdLineArguments.custom_class_path is not None: - jobParamatersDict["transfer_input_files"] += ",CustomClassForBatch/" + jobParamatersDict["transfer_input_files"] += f",BatchSubmission{batchNameExtension}/CustomClassForBatch/" # Configuration change for Chicago Analysis Facility if cmdLineArguments.chicago: @@ -198,10 +216,15 @@ def getListOfSamples(listOfSamplesFromInput,samplesBlock): listOfSamples.append(sample["name"]) return listOfSamples -def createInputSamplesFile(listOfSamples): - with open("inputSamples.txt","w") as f: - for sample in listOfSamples: - f.write(sample + "\n") +def createInputSamplesFile(listOfSamples, nJobs, batchNameExtension): + with open(f"BatchSubmission{batchNameExtension}/inputSamples.txt","w") as f: + if nJobs == 1: + for sample in listOfSamples: + f.write(sample + "\n") + else: + for iJob in range(nJobs): + for sample in listOfSamples: + f.write("%s %d %d \n" % (sample, nJobs, iJob)) def checkIsAFSorEOSPath(path): if not path.startswith("/eos/") and not path.startswith("/afs/"): @@ -250,10 +273,12 @@ def checkAssumptions(geeneralBlock): pathToInputSumW = geeneralBlock['input_sumweights_path'] checkFileExistsFromSubmissionPath(pathToInputFiles) checkFileExistsFromSubmissionPath(pathToInputSumW) - # Check the xSec file - pathsToXSecFile = geeneralBlock['xsection_files'] - for path in pathsToXSecFile: - checkFileExistsFromSubmissionPath(path) + # Check the xSec files + xSecBlock = geeneralBlock['xsection_files'] + for campaigns in xSecBlock: + files = campaigns['files'] + for file in files: + checkFileExistsFromSubmissionPath(file) def askUserForConfirmation(): print(TITLE("This script submits jobs to the HTCondor/Slurm batch system from an lxplus-like machine...")) @@ -278,6 +303,17 @@ if __name__ == "__main__": checkIsRemoteEosAccess(commandLineArguments) + # make unique directories for this batch submission + now = datetime.datetime.now() + timestamp = int(now.timestamp()) + + batchNameExtension = "" + if(commandLineArguments.timestamp): + batchNameExtension = f"_{timestamp}" + + os.system(f"mkdir -p BatchSubmission{batchNameExtension}") + os.system(f"mkdir -p ../../BatchSubmission{batchNameExtension}") + # Load the config file config = loadYMLConfig(commandLineArguments.config) # Get the the general block and set up all the parameters @@ -288,33 +324,33 @@ if __name__ == "__main__": checkAssumptions(generalBlockSettings) #Create the directories for the logs - os.system("mkdir -p ../../output ../../log ../../error") - + os.system(f"mkdir -p ../../BatchSubmission{batchNameExtension}/output ../../BatchSubmission{batchNameExtension}/log ../../BatchSubmission{batchNameExtension}/error") + # Create the executable file - createExecutable(commandLineArguments.config,commandLineArguments.step,commandLineArguments.local_data,commandLineArguments.metadata_path) + createExecutable(commandLineArguments.config,commandLineArguments.step,commandLineArguments.local_data,commandLineArguments.metadata_path,commandLineArguments.split_n_jobs,commandLineArguments.system, batchNameExtension) # Copy the necessary files to send with the job - copyCustomClassFilesForSubmission(commandLineArguments.custom_class_path) + copyCustomClassFilesForSubmission(commandLineArguments.custom_class_path, batchNameExtension) # Create input samples files commaSeparatedSamples = commandLineArguments.samples.split(",") samplesBlock = config["samples"] samplesList = getListOfSamples(commaSeparatedSamples,samplesBlock) - createInputSamplesFile(samplesList) + createInputSamplesFile(samplesList, commandLineArguments.split_n_jobs, batchNameExtension) # System specific setup if commandLineArguments.system == 'condor': # Set up job parameters and create the submission file - setupJobParamsDict(generalBlockSettings,commandLineArguments) - createSubmissionFile(jobParamatersDict) + setupJobParamsDict(generalBlockSettings,commandLineArguments, batchNameExtension) + createSubmissionFile(jobParamatersDict, batchNameExtension) # Submit the jobs if not commandLineArguments.dry_run: - os.system("condor_submit condor_submit.sub") + os.system(f"condor_submit BatchSubmission{batchNameExtension}/condor_submit.sub") else: print(DEBUG("Dry run. The submission files have been created in this directory. But the jobs have not been submitted.")) - print("To submit the jobs, run 'condor_submit condor_submit.sub'") + print(f"To submit the jobs, run 'condor_submit BatchSubmission{batchNameExtension}/condor_submit.sub'") elif commandLineArguments.system == 'slurm': @@ -326,7 +362,7 @@ if __name__ == "__main__": } # Create slurm job array submission script - createSlurmArraySubmissionFile(slurmParams, len(samplesList)) + createSlurmArraySubmissionFile(slurmParams, len(samplesList), commandLineArguments.split_n_jobs, batchNameExtension) # Submit the job array if not commandLineArguments.dry_run: diff --git a/python/development/DevelopmentCommon/DevelopmentFunctions.py b/python/development/DevelopmentCommon/DevelopmentFunctions.py new file mode 100644 index 0000000000000000000000000000000000000000..1b99b08ab33ef57b2498e74e7d4dbbac6fd44148 --- /dev/null +++ b/python/development/DevelopmentCommon/DevelopmentFunctions.py @@ -0,0 +1,16 @@ + +def get_keyboard_bool(question : str) -> bool: + """!Asks a question and returns True if the answer is 'y' and False if the answer is 'n' + @param question: question to ask the user + + @return True if the answer is 'y', False if the answer is 'n', exits with code 1 otherwise + """ + answer = input(question) + answer = answer.strip().lower() + if answer == 'y': + return True + elif answer == 'n': + return False + else: + print("Only 'y' or 'n' is accepted.") + exit(1) diff --git a/python/development/smear_input_root_file.py b/python/development/smear_input_root_file.py new file mode 100644 index 0000000000000000000000000000000000000000..d09131041269ca6fd21cb5071a73ab30651c3ac4 --- /dev/null +++ b/python/development/smear_input_root_file.py @@ -0,0 +1,165 @@ +"""!Script for smearing original ATLAS data or MC samples, to be able to use them in public CI tests. + +Usage: + python3 python/development/smear_input_root_file.py <input_root_file> <output_root_file> +""" + +from sys import argv +from ROOT import TFile, TTree, RDataFrame, vector, gROOT +import uproot +import numpy as np +from copy import deepcopy +import random + +def copy_other_objects_to_output_tfile(input_file_address : str, output_file : TFile, object_to_skip : list[str]) -> None: + input_file = TFile(input_file_address, "READ") + types_to_copy = ["TH1", "TTree"] + list_of_keys = input_file.GetListOfKeys() + for key in list_of_keys: + object_name = key.GetName() + if object_name in object_to_skip: + continue + + this_class = gROOT.GetClass(key.GetClassName()) + is_type_to_copy = any([this_class.InheritsFrom(type_to_copy) for type_to_copy in types_to_copy]) + if not is_type_to_copy: + continue + object = input_file.Get(object_name) + output_file.cd() + object.Write() + input_file.Close() + +def get_list_of_trees(root_file_address : str) -> list[str]: + root_file = TFile(root_file_address, "READ") + selected_trees = ["truth", "reco", "particleLevel"] + list_of_trees = [key.GetName() for key in root_file.GetListOfKeys() if key.GetClassName() == "TTree" and key.GetName() in selected_trees] + root_file.Close() + return list_of_trees + +def get_selected_branches_for_smearing(branches : list[str]): + keywords = set(["_eta", "_phi", "_pt", "_E_", "_e_", "_m_", "met_met"]) + selected_branches = [branch for branch in branches if any(keyword in branch for keyword in keywords)] + return selected_branches + +def apply_gaussian_smearing(array, mean=1, stddev=0.2): + return array*np.random.normal(mean, stddev, array.shape) + +def get_branches_types(file_address : str, tree_name : str) -> dict[str,str]: + rdf = RDataFrame(tree_name, file_address) + branches = rdf.GetColumnNames() + branches_types = {} + for branch in branches: + type_string = rdf.GetColumnType(branch) + type_string = type_string.replace("ROOT::VecOps::RVec", "std::vector") + type_string = type_string.replace("Float_t", "float") + type_string = type_string.replace("Int_t", "int") + type_string = type_string.replace("Double_t", "double") + type_string = type_string.replace("Long64_t", "long long") + type_string = type_string.replace("Bool_t", "bool") + type_string = type_string.replace("Char_t", "char") + type_string = type_string.replace("Uint", "unsigned int") + type_string = type_string.replace("Ulong long", "unsigned long long") + branch_name = str(branch) + + branches_types[branch_name] = type_string + return branches_types + +def get_vector_type(string_type : str) -> str: + return string_type[string_type.find("<")+1:string_type.find(">")] + +def copy_array_to_vector(numpy_array, vec : vector) -> None: + vec.clear() + for element in numpy_array: + if type(element) == np.int32: + element = int(element) # I have no idea why this is needed ... + if type(element) == np.uint32: + element = int(element) # I have no idea why this is needed ... + + vec.push_back(element) + +def get_type_one_char(type_string : str) -> str: + if type_string == "float": + return "F" + if type_string == "int": + return "I" + if type_string == "double": + return "D" + if type_string == "long long": + return "L" + if type_string == "bool": + return "O" + if type_string == "char": + return "C" + if type_string == "unsigned int": + return "i" + if type_string == "unsigned long long": + return "l" + return "F" + + +def smear_file(input_address : str, output_address : str) -> None: + list_of_trees = get_list_of_trees(input_root_file) + + input_file_uproot = uproot.open(input_address) + output_file = TFile(output_address, "RECREATE") + copy_other_objects_to_output_tfile(input_address, output_file, list_of_trees) + for tree in list_of_trees: + input_tree = input_file_uproot[tree] + output_file.cd() + output_tree = TTree(tree, tree) + + + branch_name_to_type = get_branches_types(input_address, tree) + branches = list(branch_name_to_type.keys()) + branches.sort() + selected_branches_to_smear = get_selected_branches_for_smearing(branches) + selected_branches_to_smear = set(selected_branches_to_smear) + + vector_dictionary = {} + event_arrays_dictionary = {} + #selected_branches = ["met_met_NOSYS"] + for branch_name in branches: + this_array = input_tree[branch_name].array() + first_element_copy = deepcopy(this_array[0:1]) + type_string = branch_name_to_type[branch_name] + + one_char_type = get_type_one_char(type_string) + if "vector" in type_string: + vector_type = get_vector_type(type_string) + this_vector = vector(vector_type)() + vector_dictionary[branch_name] = this_vector + output_tree.Branch(branch_name, this_vector) + else: + output_tree.Branch(branch_name, first_element_copy, branch_name + "/" + one_char_type) + + # now smear the branch if needed + if branch_name in selected_branches_to_smear: + this_array = apply_gaussian_smearing(this_array) + event_arrays_dictionary[branch_name] = (first_element_copy,this_array) + + + + for i_event in range(input_tree.numentries): + if i_event % 10 == 0: + print(f"event: {i_event}/{input_tree.numentries}") + for branch_name in event_arrays_dictionary: + first_element, this_array = event_arrays_dictionary[branch_name] + if "vector" in branch_name_to_type[branch_name]: + copy_array_to_vector(this_array[i_event], vector_dictionary[branch_name]) + else: + first_element[0] = this_array[i_event] + output_tree.Fill() + + output_tree.Write() + output_file.Close() + + +if __name__ == "__main__": + if len(argv) != 3: + print("Usage: python smear_input_root_file.py <input_root_file> <output_root_file>") + exit(1) + + input_root_file = argv[1] + output_root_file = argv[2] + + smear_file(input_root_file, output_root_file) diff --git a/python/development/update_config_reading_tests.py b/python/development/update_config_reading_tests.py index fdc480b48c53962815ddf2a4ecc7a463c156b556..e7d49a191bf7862cfc05e18a490bba1f9c607945 100644 --- a/python/development/update_config_reading_tests.py +++ b/python/development/update_config_reading_tests.py @@ -8,6 +8,8 @@ Usage: import yaml import os +from DevelopmentCommon.DevelopmentFunctions import get_keyboard_bool + def get_config_reading_test_dict(gitlab_ci_yaml_address : str) -> dict[str, list[str]]: """!read the .gitlab-ci.yml file and return a dictionary with all tests that are related to the ConfigReader.py script @param gitlab_ci_yaml_address: path to the .gitlab-ci.yml file @@ -85,22 +87,6 @@ def get_reference_file_and_test_output_file(commands : list[str]) -> tuple[str, return reference_file, test_output_file -def get_keyboard_bool(question : str) -> bool: - """!Asks a question and returns True if the answer is 'y' and False if the answer is 'n' - @param question: question to ask the user - - @return True if the answer is 'y', False if the answer is 'n', exits with code 1 otherwise - """ - answer = input(question) - answer = answer.strip().lower() - if answer == 'y': - return True - elif answer == 'n': - return False - else: - print("Only 'y' or 'n' is accepted.") - exit(1) - if __name__ == "__main__": test_dictionary = get_config_reading_test_dict('.gitlab-ci.yml') diff --git a/python/development/update_reference_root_files.py b/python/development/update_reference_root_files.py new file mode 100644 index 0000000000000000000000000000000000000000..d8e189b0033e75168305d61dab397a6bd1a30057 --- /dev/null +++ b/python/development/update_reference_root_files.py @@ -0,0 +1,65 @@ +"""!Script for automated updates of the reference files for output ROOT comparison tests. +The script has to be run from the root directory of the project. + +Usage: + python3 python/development/update_reference_root_files.py +""" + +import yaml +import os +from sys import path + +path.append("test/python") + +from DevelopmentCommon.DevelopmentFunctions import get_keyboard_bool +from compare_two_root_files import test_compare_files + +class RootFilesComparisonTest: + def __init__(self): + self.produce_inputs_command = "" + self.pairs_output_file_and_reference_file = [] + def valid_test(self): + return self.produce_inputs_command and self.pairs_output_file_and_reference_file + +def get_from_dict(test_dict : dict) -> RootFilesComparisonTest: + if type(test_dict) != dict: + return + if test_dict.get("stage", "") != "compare_results": + return + script = test_dict.get("script", []) + result = RootFilesComparisonTest() + for command in script: + command = command.strip() + if command.startswith("python3 python/FastFrames.py"): + if (result.produce_inputs_command): + print("Cannot read test from CI test confing: ", test_dict) + result.produce_inputs_command = command + elif command.startswith("python3 test/python/compare_two_root_files.py"): + elements = command.split() + if len(elements) < 4: + print("Cannot read test from CI test confing: ", test_dict) + result.pairs_output_file_and_reference_file.append((elements[2], elements[3])) + if result.valid_test(): + return result + + +if __name__ == "__main__": + with open(".gitlab-ci.yml", "r") as f: + data_full = yaml.load(f, Loader=yaml.FullLoader) + + for test_name in data_full: + test_dict = data_full[test_name] + test = get_from_dict(test_dict) + if test == None: + continue + os.system(test.produce_inputs_command) + for pair_to_compare in test.pairs_output_file_and_reference_file: + comparison_result = test_compare_files(pair_to_compare[0], pair_to_compare[1]) + + if comparison_result: + print(f"Files{pair_to_compare[0]} and {pair_to_compare[1]} are different:") + print("\t", comparison_result) + want_to_update = get_keyboard_bool("Do you want to update the reference file? (y/n): ") + if want_to_update: + os.system(f"cp {pair_to_compare[0]} {pair_to_compare[1]}") + diff --git a/python/merge_empty_grid_files.py b/python/merge_empty_grid_files.py index 42017104986231143e8a44a51d073752605e4132..8a9349527b6db916fed8242d8e5f18e524fe09a7 100644 --- a/python/merge_empty_grid_files.py +++ b/python/merge_empty_grid_files.py @@ -45,7 +45,6 @@ def build_model_file_from_empty_files(empty_files : list, trees_to_check : list[ if model_tree != None: # Copy the tree to the model file if the tree is found in one of the files model_file.cd() model_tree.Write() - break model_file.Close() model_file.Save() @@ -100,7 +99,7 @@ def get_file_dictionary(folder_address : str) -> dict[Metadata, list[str]]: result[metadata_tuple].append(file) return result -def merge_files(files_from_unique_sample : list[str], remove_original_files : bool, legacy_mode = False) -> bool: +def merge_files(files_from_unique_sample : list[str], remove_original_files : bool, legacy_mode = False, trees_to_check = None) -> bool: """ Merge empty files from the same sample @param files_from_unique_sample: list of files from the same sample @@ -112,10 +111,12 @@ def merge_files(files_from_unique_sample : list[str], remove_original_files : bo if len(files_from_unique_sample) <= 1: # nothing to merge return True + if trees_to_check == None: + trees_to_check = ["reco", "truth", "particleLevel"] + if not legacy_mode: # This work under the assumption that all files containing a tree, that tree will have structure. # So we only really care about separating the files that are completely empty from the ones that are not. - trees_to_check = ["reco", "truth", "particleLevel", "AnalysisMiniTree"] empty_files = [file for file in files_from_unique_sample if not has_at_least_one_tree(file, trees_to_check)] first_non_empty_file = None at_least_one_buggy_file = False # Buggy files here are defined as files with trees that do not have any structure @@ -134,7 +135,7 @@ def merge_files(files_from_unique_sample : list[str], remove_original_files : bo if len(empty_files) != 0: merged_file_name = first_non_empty_file[:-5] + "_merged.root" # Put the model file first such that hadd takes the structure from it - command = "hadd " + " " + merged_file_name + " " + first_non_empty_file + " " + " ".join(empty_files) + command = f'hadd {merged_file_name} {first_non_empty_file} {" ".join(empty_files)}' os.system(command) if remove_original_files and not at_least_one_buggy_file: @@ -144,11 +145,10 @@ def merge_files(files_from_unique_sample : list[str], remove_original_files : bo os.system("rm {}".format(first_non_empty_file)) Logger.log_message("DEBUG", "Removing first non-empty file: {}".format(file)) + return not at_least_one_buggy_file else: - trees_to_check = ["reco", "truth", "particleLevel", "AnalysisMiniTree"] empty_files = [file for file in files_from_unique_sample if has_empty_trees(file, trees_to_check)] - print(len(empty_files)) first_non_empty_file = None at_least_one_buggy_file = False for file in files_from_unique_sample: @@ -158,7 +158,6 @@ def merge_files(files_from_unique_sample : list[str], remove_original_files : bo if file not in empty_files and first_non_empty_file == None: first_non_empty_file = file - break # if all files are empty, we still need to merge them if first_non_empty_file == None: @@ -167,15 +166,8 @@ def merge_files(files_from_unique_sample : list[str], remove_original_files : bo if len(empty_files) != 0: build_model_file_from_empty_files(files_from_unique_sample, trees_to_check) - Logger.log_message("INFO", "Now I am going to merge all the empty files to the first file not empty!") - empty_file1 = empty_files[:len(empty_files)//2] #to be safe it is better to divide in two the list of empty files - empty_file2 = empty_files[len(empty_files)//2:] merged_file_name = first_non_empty_file[:-5] + "_merged.root" # Put the model file first such that hadd takes the structure from it - command = "hadd " + " " + merged_file_name + " " + "model_file.root" + " " + first_non_empty_file + " " + " ".join(empty_file1) - os.system(command) - - merged_file_name_new = merged_file_name[:-5] + "_merged.root" # Put the model file first such that hadd takes the structure from it - command = "hadd " + " " + merged_file_name_new + " " + "model_file.root" + " " + merged_file_name + " " + " ".join(empty_file2) + command = f'hadd {merged_file_name} model_file.root {first_non_empty_file} {" ".join(empty_files)}' os.system(command) if remove_original_files and not at_least_one_buggy_file: @@ -183,9 +175,9 @@ def merge_files(files_from_unique_sample : list[str], remove_original_files : bo for file in empty_files: os.system("rm {}".format(file)) Logger.log_message("DEBUG", "Removing empty file: {}".format(file)) + os.system("rm {}".format(first_non_empty_file)) Logger.log_message("DEBUG", "Removing first non-empty file: {}".format(file)) - os.system("rm {}".format(merged_file_name)) return not at_least_one_buggy_file @@ -193,10 +185,12 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--root_files_folder", help="Path to folder containing root files", default=None) parser.add_argument("--legacy-tct-output", help="From TopCPToolkit (TCT) v2.12.0 empty trees are not written to the outputs. Use this flag if your files where produced with older TCT.",action="store_true") + parser.add_argument("--trees_to_check", type=str, help="Optional string of command separated values of trees to check.", default=None) parser.add_argument("--log_level", help="Logging level", default="INFO") args = parser.parse_args() root_files_folder = args.root_files_folder log_level = args.log_level + trees_to_check = args.trees_to_check.split(",") if args.trees_to_check != None else None Logger.set_log_level(log_level) @@ -218,7 +212,7 @@ if __name__ == "__main__": for metadata_tuple, files_from_unique_sample in file_dictionary.items(): Logger.log_message("INFO", "Processing sample: {}".format(metadata_tuple)) Logger.log_message("DEBUG", "List of all files for sample: {}".format("\n\t\t".join(files_from_unique_sample))) - sample_contains_buggy_files = (not merge_files(files_from_unique_sample, True, legacy_mode)) # This is where the merging happens + sample_contains_buggy_files = (not merge_files(files_from_unique_sample, True, legacy_mode, trees_to_check)) # This is where the merging happens if sample_contains_buggy_files: buggy_samples.append(metadata_tuple) @@ -227,4 +221,4 @@ if __name__ == "__main__": Logger.log_message("ERROR", "This is the list of samples with at least one file with an empty reco, truth or particleLevel tree. The original files for them have not been removed. You should check the inputs for samples:\n {}".format(buggy_samples)) else: Logger.log_message("ERROR", "This is the list of samples with at least one file without any of reco, truth and particleLevel trees. The original files for them have not been removed. You should check the inputs for samples:\n {}".format(buggy_samples)) - exit(1) + exit(1) \ No newline at end of file diff --git a/python/merge_jobs.py b/python/merge_jobs.py index 72b4cf9fc6eabb53e5b0810e19c99eb26972c7b9..70d602312477b2add697f04a97953889146d599a 100644 --- a/python/merge_jobs.py +++ b/python/merge_jobs.py @@ -92,7 +92,7 @@ def merge_files(input_files : list[str], output_file : str, truth_blocks : dict[ reco,truth = reco_truth_tuple truth_histogram_name = truth_block_name + "_" + truth reco_histogram_name = reco + "_" + region - migration_matrix_name = reco + "_vs_" + truth_histogram_name + "_" + region + migration_matrix_name = truth_histogram_name + "_vs_" + reco + "_" + region if (migration_matrix_name not in histograms_names) or (reco_histogram_name not in histograms_names): Logger.log_message("DEBUG", "Skipping histogram: " + reco_histogram_name + " in folder " + directory) continue diff --git a/python/produce_metadata_files.py b/python/produce_metadata_files.py index fd977dddfa6cffbcc2e8257a29baa447bb5e8dad..e2c62254c4c098192524db98080144fc43dc21a0 100755 --- a/python/produce_metadata_files.py +++ b/python/produce_metadata_files.py @@ -74,6 +74,9 @@ if __name__ == "__main__": check_duplicates = args.check_duplicates.upper() == "TRUE" + # If user specifies an output path, make the directory if it does not already exist + if(args.output_path!=""): os.system(f"mkdir -p {args.output_path}") + histo_name = args.sum_weights_histo # If user has local root files if args.grid_datasets is None: diff --git a/python_wrapper/headers/ConfigSettingWrapper.h b/python_wrapper/headers/ConfigSettingWrapper.h index 7512976a03ebb13490c56ba4939208c73905c56f..5c28f5ca807fce47fa12f23b05b4bef4bfc0cb62 100644 --- a/python_wrapper/headers/ConfigSettingWrapper.h +++ b/python_wrapper/headers/ConfigSettingWrapper.h @@ -11,10 +11,12 @@ #include "FastFrames/Region.h" #include "FastFrames/Sample.h" #include "FastFrames/SimpleONNXInference.h" +#include "FastFrames/StringOperations.h" #include <memory> #include <map> #include <string> +#include <vector> /** @@ -189,8 +191,8 @@ class ConfigSettingWrapper { * * @param std::string - xsectionFile */ - void addXsectionFile(const std::string& xsectionFile) { - m_configSetting->addXsectionFile(xsectionFile); + void addXsectionFiles(const std::vector<std::string>& xsectionFiles, const std::vector<std::string> &campaigns) { + m_configSetting->addXsectionFiles(xsectionFiles, campaigns); }; /** @@ -198,8 +200,15 @@ class ConfigSettingWrapper { * * @return std::vector<std::string> */ - std::vector<std::string> xSectionFiles() const { - return m_configSetting->xSectionFiles(); + std::vector<std::string> xSectionFilesPreview() const { + std::vector<std::string> result; + const std::map<std::vector<std::string>,std::vector<std::string>> &xSectionFilesData = m_configSetting->xSectionFiles(); + for (const auto& [campaigns, xsection_files] : xSectionFilesData) { + const std::string campaignsString = StringOperations::joinStrings(",", campaigns); + const std::string xsectionFilesString = StringOperations::joinStrings(",", xsection_files); + result.push_back(campaignsString + " : " + xsectionFilesString); + } + return result; }; diff --git a/python_wrapper/headers/RegionWrapper.h b/python_wrapper/headers/RegionWrapper.h index 9ca15c88f3129adf5fb86ac400f63f8146203059..1cfe4480aed5d31c8ba08b7d44112995a8272c2e 100644 --- a/python_wrapper/headers/RegionWrapper.h +++ b/python_wrapper/headers/RegionWrapper.h @@ -168,6 +168,32 @@ class RegionWrapper { return combinations_python; }; + + /** + * @brief Add varaibles for TProfile + * + * @param v1 + * @param v2 + */ + void addVariableForProfile(const std::string& v1, const std::string& v2) { + m_region->addVariableForProfile(v1, v2); + }; + + /** + * @brief Get the names of the variables for TProfile + * + * @return const std::vector<std::string> + */ + const std::vector<std::string> variablesForProfile() const { + const std::vector<std::pair<std::string,std::string>> &combinations = m_region->variablesForProfile(); + std::vector<std::string> combinations_python; + for (const std::pair<std::string,std::string> &combination : combinations) { + combinations_python.push_back(combination.first + ", "+ combination.second); + } + return combinations_python; + }; + + private: std::shared_ptr<Region> m_region; diff --git a/python_wrapper/headers/VariableWrapper.h b/python_wrapper/headers/VariableWrapper.h index 82a6571f8e9ec4121ebff1797136790b00f11e01..e89bb0246c0a4b1dced5c0e0ff51829b874a79b1 100644 --- a/python_wrapper/headers/VariableWrapper.h +++ b/python_wrapper/headers/VariableWrapper.h @@ -227,6 +227,24 @@ class VariableWrapper { inline std::string type() const {return variableTypeToString(m_variable->type());}; + /** + * @brief Set the Under/Over Flow Type + * + * @param type + */ + inline void setUnderOverFlowType(const std::string& type) { + m_variable->setUnderOverFlowType(stringToUnderOverFlowType(type)); + } + + /** + * @brief Get Unde/Overflow type + * + * @return const UnderOverFlowType& + */ + inline std::string underOverFlowType() const { + return underOverFlowTypeToString(m_variable->underOverFlowType()); + } + private: std::shared_ptr<Variable> m_variable; @@ -249,6 +267,27 @@ class VariableWrapper { } throw std::runtime_error("Unknown variable type: " + type); }; + + + static std::vector<std::tuple<std::string, UnderOverFlowType> > s_underOverFlowTypeTypes; + + static std::string underOverFlowTypeToString(UnderOverFlowType type) { + for (const auto& [typeName, typeType] : s_underOverFlowTypeTypes) { + if (typeType == type) { + return typeName; + } + } + throw std::runtime_error("Unknown under/overflow enum type: " + std::to_string(static_cast<int>(type))); + }; + + static UnderOverFlowType stringToUnderOverFlowType(const std::string& type) { + for (const auto& [typeName, typeType] : s_underOverFlowTypeTypes) { + if (StringOperations::compare_case_insensitive(typeName, type)) { + return typeType; + } + } + throw std::runtime_error("Unknown under/overflow type: " + type); + }; }; std::vector<std::tuple<std::string, VariableType> > VariableWrapper::s_variableTypes = { @@ -280,5 +319,12 @@ std::vector<std::tuple<std::string, VariableType> > VariableWrapper::s_variableT {"rvec<unsigned long>", VariableType::RVEC_UNSIGNED}, {"rvec<unsigned long long int>", VariableType::RVEC_LONG_UNSIGNED}, {"rvec<float>", VariableType::RVEC_FLOAT}, - {"rvec<double>", VariableType::RVEC_DOUBLE} + {"rvec<double>", VariableType::RVEC_DOUBLE} +}; + +std::vector<std::tuple<std::string, UnderOverFlowType> > VariableWrapper::s_underOverFlowTypeTypes = { + {"none", UnderOverFlowType::NO_UNDER_OVER_FLOW_MERGE}, + {"underflow", UnderOverFlowType::MERGE_UNDERFLOW}, + {"overflow", UnderOverFlowType::MERGE_OVERFLOW}, + {"both", UnderOverFlowType::MERGE_BOTH}, }; diff --git a/python_wrapper/utils/ConfigReaderCpp.cxx b/python_wrapper/utils/ConfigReaderCpp.cxx index dd49863b33dad8162fad2b0a3a15d9d9b94e8279..b84db0b1a6048ab52850616e6c5fac191a4c1bd0 100644 --- a/python_wrapper/utils/ConfigReaderCpp.cxx +++ b/python_wrapper/utils/ConfigReaderCpp.cxx @@ -139,8 +139,8 @@ BOOST_PYTHON_MODULE(ConfigReaderCpp) { .def("campaignIsDefined", &ConfigSettingWrapper::campaignIsDefined) // x-section files - .def("addXsectionFile", &ConfigSettingWrapper::addXsectionFile) - .def("xSectionFiles", &ConfigSettingWrapper::xSectionFiles) + .def("addXsectionFiles", &ConfigSettingWrapper::addXsectionFiles) + .def("xSectionFilesPreview", &ConfigSettingWrapper::xSectionFilesPreview) // TLorentzVectors .def("addTLorentzVector", &ConfigSettingWrapper::addTLorentzVector) @@ -240,12 +240,15 @@ BOOST_PYTHON_MODULE(ConfigReaderCpp) { .def("getVariableNames", &RegionWrapper::getVariableNames) // addVariableCombination - .def("addVariableCombination", &RegionWrapper::addVariableCombination) + .def("addVariableCombination", &RegionWrapper::addVariableCombination) .def("variableCombinations", &RegionWrapper::variableCombinations) // addVariableCombination3D - .def("addVariableCombination3D", &RegionWrapper::addVariableCombination3D) - .def("variableCombinations3D", &RegionWrapper::variableCombinations3D) + .def("addVariableCombination3D",&RegionWrapper::addVariableCombination3D) + .def("variableCombinations3D", &RegionWrapper::variableCombinations3D) + + .def("addVariableForProfile", &RegionWrapper::addVariableForProfile) + .def("variablesForProfile", &RegionWrapper::variablesForProfile) ; /** @@ -288,6 +291,10 @@ BOOST_PYTHON_MODULE(ConfigReaderCpp) { // type .def("type", &VariableWrapper::type) .def("setType", &VariableWrapper::setType) + + // underOverFlowType + .def("underOverFlowType", &VariableWrapper::underOverFlowType) + .def("setUnderOverFlowType", &VariableWrapper::setUnderOverFlowType) ; /** diff --git a/test/configs/config.yml b/test/configs/config.yml index 7255e2e5094e470424470e780d4d651ebb46fe82..6f890b79c2230034625c297bdc1259029b07f24f 100644 --- a/test/configs/config.yml +++ b/test/configs/config.yml @@ -7,7 +7,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + xsection_files: + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt"] + campaigns: ["mc16a", "mc16d", "mc16e", "mc20a", "mc20d", "mc20e"] + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + campaigns: ["mc23a", "mc23c", "mc23e"] # custom_frame_name: CustomFrame automatic_systematics: True nominal_only: False diff --git a/test/configs/config_TRExFitter_test.yml b/test/configs/config_TRExFitter_test.yml index 6d7734fa79da3d14a80f40c2ff1060089fd9b363..6e3f52fa4bec954dd256407d27c0d6b696ba21f1 100644 --- a/test/configs/config_TRExFitter_test.yml +++ b/test/configs/config_TRExFitter_test.yml @@ -8,8 +8,11 @@ general: default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" # xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] - xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] -# custom_frame_name: "CustomFrame" + xsection_files: + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt"] + campaigns: ["mc16a", "mc16d", "mc16e", "mc20a", "mc20d", "mc20e"] + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + campaigns: ["mc23a", "mc23c", "mc23e"]# custom_frame_name: "CustomFrame" automatic_systematics: True nominal_only: False create_tlorentz_vectors_for: ["jet", "el"] diff --git a/test/configs/config_testing.yml b/test/configs/config_testing.yml index 7fc9202d56d4f90a387017c74a13dd46a67fe073..f24ada5d94ac7aa63f56d7fea11bf93b7a3cd8d8 100644 --- a/test/configs/config_testing.yml +++ b/test/configs/config_testing.yml @@ -13,7 +13,11 @@ general: default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" # xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] - xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + xsection_files: + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt"] + campaigns: ["mc16a", "mc16d", "mc16e", "mc20a", "mc20d", "mc20e"] + - files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + campaigns: ["mc23a", "mc23c", "mc23e"] create_tlorentz_vectors_for: ["jet"] #reco_to_truth_pairing_indices: ["eventNumber", "runNumber"] # custom_frame_name: "my_custom_frame_name.so" diff --git a/test/input/data17.PHYS.p4910.root b/test/input/data17.PHYS.p4910.root deleted file mode 100644 index c238abcb9f1d677b4023f6e8c5336f8ffed69d1a..0000000000000000000000000000000000000000 --- a/test/input/data17.PHYS.p4910.root +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b8655442ee11e853511be0fe28b8c718cc250f8fa94b9bc0d0ff50b292471e24 -size 46035 diff --git a/test/input/mc20e.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.PHYS.e6337_s3681_r13145_p5658.root b/test/input/mc20e.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.PHYS.e6337_s3681_r13145_p5658.root deleted file mode 100644 index 42bdb9d13c03458c52dfc2aa07ae6768d1d78c8f..0000000000000000000000000000000000000000 --- a/test/input/mc20e.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.PHYS.e6337_s3681_r13145_p5658.root +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25b32ae1a222408f6fef7f700a5ae110a000d1e747d25237929b97c745eb6c7f -size 91480362 diff --git a/test/input/mc20e.700341.Sh_2211_Wmunu_maxHTpTV2_BFilter.PHYS.e8351_s3681_r13167_r13146_p5855.root b/test/input/mc20e.700341.Sh_2211_Wmunu_maxHTpTV2_BFilter.PHYS.e8351_s3681_r13167_r13146_p5855.root deleted file mode 100644 index b8cb3218d562758524e3c3d91b52465222d9f333..0000000000000000000000000000000000000000 --- a/test/input/mc20e.700341.Sh_2211_Wmunu_maxHTpTV2_BFilter.PHYS.e8351_s3681_r13167_r13146_p5855.root +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb7d16dafa36ced6c5f7df829604dfe15f24bdc649292ae993661f5829932a7a -size 3165848 diff --git a/test/python/compare_two_root_files.py b/test/python/compare_two_root_files.py index 6ca5bf3130a7009bd948345091402aae72b6ae8b..63179ec702afc5c72afa85fd191f998e4c3ccea3 100644 --- a/test/python/compare_two_root_files.py +++ b/test/python/compare_two_root_files.py @@ -5,7 +5,7 @@ Two identical files will return a zero exit code and print out success message. Usage: python3 compare_two_root_files.py <file1> <file2> """ -from ROOT import TFile, TTree, TH1D, TH2D, TH3D, TDirectory +from ROOT import TFile, TTree, TH1D, TH2D, TH3D, TDirectory, TProfile import sys def floats_are_equal(val1 : float, val2 : float, tolerance : float = 1e-5) -> bool: @@ -16,6 +16,8 @@ def floats_are_equal(val1 : float, val2 : float, tolerance : float = 1e-5) -> bo return True if abs(val1) < 1e-9 and abs(val2) < 1e-9: return True + if abs(val1) < 1e-9 or abs(val2) < 1e-9: + return False return abs((val1 - val2)/val1) < tolerance def test_compare_trees(tree_1 : TTree, tree_2 : TTree) -> str: @@ -100,6 +102,17 @@ def compare_3d_histograms(hist1 : TH3D, hist2 : TH3D) -> str: return f"Histograms have different bin content at bin ({i}, {j}, {k}): {hist1.GetBinContent(i, j, k)} != {hist2.GetBinContent(i, j, k)}" return None +def compare_tprofiles(profile1 : TProfile, profile2 : TProfile) -> str: + """ + Compare two TProfile histograms. If they are different, return a string explaining the first difference found. If identical, return None + """ + if profile1.GetNbinsX() != profile2.GetNbinsX(): + return "Profiles have different number of bins in x" + for i in range(profile1.GetNbinsX()+2): + if not floats_are_equal(profile1.GetBinContent(i), profile2.GetBinContent(i)): + return f"Profiles have different bin content at bin {i}: {profile1.GetBinContent(i)} != {profile2.GetBinContent(i)}" + return None + def get_list_of_folders(file : TFile) -> list[str]: """ Get the list TDirectories names in a TFile @@ -144,6 +157,8 @@ def compare_histograms_in_folder(tfile1 : TFile, tfile2 : TFile, folder : str, h compare_function = compare_2d_histograms elif histo_type == "TH3D": compare_function = compare_3d_histograms + elif histo_type == "TProfile": + compare_function = compare_3d_histograms else: raise ValueError(f"Unknown histogram type {histo_type}") @@ -188,7 +203,9 @@ def compare_all_histograms_in_files(file1 : TFile, file2 : TFile, folders_to_ign comparison_3D = compare_histograms_in_folder(file1, file2, folder, "TH3D") if comparison_3D: return comparison_3D - + comparison_3D = compare_histograms_in_folder(file1, file2, folder, "TProfile") + if comparison_3D: + return comparison_3D return None diff --git a/test/reference_files/config_reading/config_MiniAnalysis.yml b/test/reference_files/config_reading/config_MiniAnalysis.yml index a06ea446fddc71b00c143d58384c6a2ccadc7c2b..a4bbdf9d7bff509c6ed57ecae9bf521559973722 100644 --- a/test/reference_files/config_reading/config_MiniAnalysis.yml +++ b/test/reference_files/config_reading/config_MiniAnalysis.yml @@ -7,7 +7,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * globalTriggerEffSF_NOSYS * weight_jvt_effSF_NOSYS * weight_pileup_NOSYS * weight_beamspot * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"] # xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] automatic_systematics: False nominal_only: True diff --git a/test/reference_files/config_reading/config_TRExFitter_test.yml b/test/reference_files/config_reading/config_TRExFitter_test.yml index 900c246ea497cfd42ca70860a5875618879182b0..a98fea6a0ba8b13792d840bbc1480a94e6374a4f 100644 --- a/test/reference_files/config_reading/config_TRExFitter_test.yml +++ b/test/reference_files/config_reading/config_TRExFitter_test.yml @@ -7,7 +7,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"] # xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] # custom_frame_name: "CustomFrame" automatic_systematics: True diff --git a/test/reference_files/config_reading/config_custom_class.yml b/test/reference_files/config_reading/config_custom_class.yml index 2c925835461cae0ed495c04e92873027d9bdb02d..1d8a18e945d048a2bffeefd379209298162eb483 100644 --- a/test/reference_files/config_reading/config_custom_class.yml +++ b/test/reference_files/config_reading/config_custom_class.yml @@ -7,7 +7,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"] # xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] custom_frame_name: CustomFrame automatic_systematics: True diff --git a/test/reference_files/config_reading/config_nested_anchors_test.yml b/test/reference_files/config_reading/config_nested_anchors_test.yml index de569f935cb34e231cb9165abdf6e7263029eb2e..cc95a79bd9af6aa9c2a5f73b35d18e72263a8941 100644 --- a/test/reference_files/config_reading/config_nested_anchors_test.yml +++ b/test/reference_files/config_reading/config_nested_anchors_test.yml @@ -8,7 +8,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * globalTriggerEffSF_NOSYS * weight_jvt_effSF_NOSYS * weight_pileup_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"] # xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] automatic_systematics: False use_region_subfolders: True diff --git a/test/reference_files/config_reading/config_testing.yml b/test/reference_files/config_reading/config_testing.yml index 77b52fde85fd9a96021a4a8777d1ffff106abd16..d643893e29dd635dcee71d1819edef750dae04f8 100644 --- a/test/reference_files/config_reading/config_testing.yml +++ b/test/reference_files/config_reading/config_testing.yml @@ -12,9 +12,15 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"] + # xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] create_tlorentz_vectors_for: ["jet"] + merge_underflow_overflow: "Both" #reco_to_truth_pairing_indices: ["eventNumber", "runNumber"] # custom_frame_name: "my_custom_frame_name.so" #automatic_systematics: True @@ -96,6 +102,7 @@ regions: - name: "met_met" title : "histo title;X axis title;Y axis title" definition: "met_met_NOSYS" + merge_underflow_overflow: "overflow" type: "float" binning: #min: 0 @@ -114,6 +121,9 @@ regions: - x: "met_met" y: "met_phi" z: "jet_pt" + profile: + - x: "met_phi" + y: "jet_pt" - name: "Muon" selection: "mu_pt_NOSYS[0] > 30000" @@ -139,6 +149,7 @@ regions: - name: "met_phi" title : "histo title;X axis title;Y axis title" definition: "met_phi_NOSYS" + merge_underflow_overflow: "Underflow" type: "float" binning: min: -3.2 @@ -220,6 +231,7 @@ samples: truth: "truth_jet_pt" variables: - name: "truth_jet_pt" + merge_underflow_overflow: "both" definition: "particle_jet_pt" type: "float" binning: diff --git a/test/reference_files/config_reading/reference_MiniAnalysis.txt b/test/reference_files/config_reading/reference_MiniAnalysis.txt index 47b1694da7bb8f288b452c1ed3f961c733b980fc..df280eaa617cf92ee7c235001b5d9adcdb9f8682 100644 --- a/test/reference_files/config_reading/reference_MiniAnalysis.txt +++ b/test/reference_files/config_reading/reference_MiniAnalysis.txt @@ -10,7 +10,7 @@ General block: max_event: -1 --split_n_jobs: -1 --job_index: -1 - xSectionFiles: ['test/data/PMGxsecDB_mc16.txt', 'test/data/PMGxsecDB_mc23.txt'] + xSectionFiles: ['mc20a,mc20d,mc20e : test/data/PMGxsecDB_mc16.txt', 'mc23a,mc23d,mc23e : test/data/PMGxsecDB_mc23.txt'] cap_acceptance_selection: True config_define_after_custom_class: False use_region_subfolders: False @@ -46,6 +46,7 @@ Regions block: type: int definition: nElectrons_tight_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 4 , 0.0 , 4.0 @@ -54,6 +55,7 @@ Regions block: type: int definition: nJets_good_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 10 , 0.0 , 10.0 @@ -65,6 +67,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -73,6 +76,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -81,6 +85,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -89,6 +94,7 @@ Regions block: type: float definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -100,6 +106,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -108,6 +115,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -116,6 +124,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -124,6 +133,7 @@ Regions block: type: int definition: nElectrons_tight_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 4 , 0.0 , 4.0 @@ -132,6 +142,7 @@ Regions block: type: int definition: nJets_good_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 10 , 0.0 , 10.0 @@ -143,6 +154,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -151,6 +163,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -159,6 +172,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -167,6 +181,7 @@ Regions block: type: int definition: nElectrons_tight_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 4 , 0.0 , 4.0 @@ -175,6 +190,7 @@ Regions block: type: int definition: nJets_good_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 10 , 0.0 , 10.0 @@ -183,6 +199,7 @@ Regions block: type: float definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -194,6 +211,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -202,6 +220,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -210,6 +229,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -218,6 +238,7 @@ Regions block: type: int definition: nJets_good_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 10 , 0.0 , 10.0 @@ -226,6 +247,7 @@ Regions block: type: float definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -237,6 +259,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -245,6 +268,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -253,6 +277,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -261,6 +286,7 @@ Regions block: type: float definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -272,6 +298,7 @@ Regions block: type: int definition: nElectrons_tight_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 4 , 0.0 , 4.0 @@ -283,6 +310,7 @@ Regions block: type: int definition: nJets_good_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 10 , 0.0 , 10.0 @@ -294,6 +322,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -305,6 +334,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -316,6 +346,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -324,6 +355,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -332,6 +364,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -340,6 +373,7 @@ Regions block: type: int definition: nElectrons_tight_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 4 , 0.0 , 4.0 @@ -348,6 +382,7 @@ Regions block: type: float definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -359,6 +394,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -367,6 +403,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -375,6 +412,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -383,6 +421,7 @@ Regions block: type: float definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -394,6 +433,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -402,6 +442,7 @@ Regions block: type: float definition: jet2_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -410,6 +451,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 @@ -418,6 +460,7 @@ Regions block: type: float definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -429,6 +472,7 @@ Regions block: type: float definition: jet1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 56 , 20.0 , 300.0 @@ -440,6 +484,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 35 , 25.0 , 200.0 diff --git a/test/reference_files/config_reading/reference_log.txt b/test/reference_files/config_reading/reference_log.txt index 05f9b2565a0f2e98c79451052c842b8ec79db56b..f3dd8783978fdea4e6a1cb26db300c4e8f1b3273 100644 --- a/test/reference_files/config_reading/reference_log.txt +++ b/test/reference_files/config_reading/reference_log.txt @@ -10,7 +10,7 @@ General block: max_event: 1000 --split_n_jobs: -1 --job_index: -1 - xSectionFiles: ['test/data/PMGxsecDB_mc16.txt', 'test/data/PMGxsecDB_mc23.txt'] + xSectionFiles: ['mc20a,mc20d,mc20e : test/data/PMGxsecDB_mc16.txt', 'mc23a,mc23d,mc23e : test/data/PMGxsecDB_mc23.txt'] cap_acceptance_selection: False config_define_after_custom_class: False use_region_subfolders: False @@ -58,6 +58,7 @@ Regions block: type: float definition: jet_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -66,6 +67,7 @@ Regions block: type: float definition: met_met_NOSYS is_nominal_only: False + merge_underflow_overflow: overflow binning: 0.000000,20000.000000,60000.000000,80000.000000,140000.000000,250000.000000 @@ -74,6 +76,7 @@ Regions block: type: float definition: met_phi_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 16 , -3.2 , 3.2 @@ -81,6 +84,10 @@ Regions block: met_met, met_phi, jet_pt + Profiles: + met_phi, jet_pt + + name: Muon selection: mu_pt_NOSYS[0] > 30000 variables: @@ -89,6 +96,7 @@ Regions block: type: float definition: jet_pt_NOSYS is_nominal_only: True + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -97,6 +105,7 @@ Regions block: type: float definition: met_met_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 0.000000,20000.000000,60000.000000,80000.000000,140000.000000,250000.000000 @@ -105,6 +114,7 @@ Regions block: type: float definition: met_phi_NOSYS is_nominal_only: False + merge_underflow_overflow: underflow binning: 16 , -3.2 , 3.2 @@ -113,6 +123,7 @@ Regions block: type: float definition: jet_pt_index_1_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -121,6 +132,7 @@ Regions block: type: float definition: jet_pt_index_2_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -129,6 +141,7 @@ Regions block: type: float definition: jet_pt_index_3_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -137,6 +150,7 @@ Regions block: type: float definition: jet_pt_index_4_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -145,6 +159,7 @@ Regions block: type: float definition: particle_jet_pt_index_1_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -153,6 +168,7 @@ Regions block: type: float definition: particle_jet_pt_index_2_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -161,6 +177,7 @@ Regions block: type: float definition: particle_jet_pt_index_3_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -169,6 +186,7 @@ Regions block: type: float definition: particle_jet_pt_index_4_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -177,6 +195,7 @@ Regions block: type: float definition: pt_top_mass_170_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -185,6 +204,7 @@ Regions block: type: float definition: pt_top_mass_172.5_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -193,6 +213,7 @@ Regions block: type: float definition: pt_top_mass_175.0_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -272,31 +293,37 @@ Samples block: name: truth_jet_pt title: type: float + merge_underflow_overflow: both definition: particle_jet_pt binning: 10 , 0.0 , 10.0 name: truth_jet1_pt title: type: float + merge_underflow_overflow: both definition: particle_jet1_pt binning: 10 , 0.0 , 10.0 name: truth_jet2_pt title: type: float + merge_underflow_overflow: both definition: particle_jet2_pt binning: 10 , 0.0 , 10.0 name: truth_jet3_pt title: type: float + merge_underflow_overflow: both definition: particle_jet3_pt binning: 10 , 0.0 , 10.0 name: truth_jet4_pt title: type: float + merge_underflow_overflow: both definition: particle_jet4_pt binning: 10 , 0.0 , 10.0 name: truth_el_pt title: type: float + merge_underflow_overflow: both definition: particle_el_pt binning: 0.000000,2.000000,5.000000,7.000000,8.000000,10.000000,15.000000 Custom defines: diff --git a/test/reference_files/config_reading/reference_log_samples_ttbar_Wjets.txt b/test/reference_files/config_reading/reference_log_samples_ttbar_Wjets.txt index 51f37cde0b045aca33945cbb58f0253bb526b822..591038f326a40fc4e83f441e18d337298d11bc69 100644 --- a/test/reference_files/config_reading/reference_log_samples_ttbar_Wjets.txt +++ b/test/reference_files/config_reading/reference_log_samples_ttbar_Wjets.txt @@ -10,7 +10,7 @@ General block: max_event: 1000 --split_n_jobs: 10 --job_index: 1 - xSectionFiles: ['test/data/PMGxsecDB_mc16.txt', 'test/data/PMGxsecDB_mc23.txt'] + xSectionFiles: ['mc20a,mc20d,mc20e : test/data/PMGxsecDB_mc16.txt', 'mc23a,mc23d,mc23e : test/data/PMGxsecDB_mc23.txt'] cap_acceptance_selection: False config_define_after_custom_class: False use_region_subfolders: False @@ -58,6 +58,7 @@ Regions block: type: float definition: jet_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -66,6 +67,7 @@ Regions block: type: float definition: met_met_NOSYS is_nominal_only: False + merge_underflow_overflow: overflow binning: 0.000000,20000.000000,60000.000000,80000.000000,140000.000000,250000.000000 @@ -74,6 +76,7 @@ Regions block: type: float definition: met_phi_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 16 , -3.2 , 3.2 @@ -81,6 +84,10 @@ Regions block: met_met, met_phi, jet_pt + Profiles: + met_phi, jet_pt + + name: Muon selection: mu_pt_NOSYS[0] > 30000 variables: @@ -89,6 +96,7 @@ Regions block: type: float definition: jet_pt_NOSYS is_nominal_only: True + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -97,6 +105,7 @@ Regions block: type: float definition: met_met_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 0.000000,20000.000000,60000.000000,80000.000000,140000.000000,250000.000000 @@ -105,6 +114,7 @@ Regions block: type: float definition: met_phi_NOSYS is_nominal_only: False + merge_underflow_overflow: underflow binning: 16 , -3.2 , 3.2 @@ -113,6 +123,7 @@ Regions block: type: float definition: jet_pt_index_1_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -121,6 +132,7 @@ Regions block: type: float definition: jet_pt_index_2_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -129,6 +141,7 @@ Regions block: type: float definition: jet_pt_index_3_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -137,6 +150,7 @@ Regions block: type: float definition: jet_pt_index_4_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -145,6 +159,7 @@ Regions block: type: float definition: particle_jet_pt_index_1_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -153,6 +168,7 @@ Regions block: type: float definition: particle_jet_pt_index_2_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -161,6 +177,7 @@ Regions block: type: float definition: particle_jet_pt_index_3_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -169,6 +186,7 @@ Regions block: type: float definition: particle_jet_pt_index_4_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -177,6 +195,7 @@ Regions block: type: float definition: pt_top_mass_170_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -185,6 +204,7 @@ Regions block: type: float definition: pt_top_mass_172.5_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -193,6 +213,7 @@ Regions block: type: float definition: pt_top_mass_175.0_NOSYS is_nominal_only: False + merge_underflow_overflow: both binning: 10 , 0.0 , 300000.0 @@ -272,31 +293,37 @@ Samples block: name: truth_jet_pt title: type: float + merge_underflow_overflow: both definition: particle_jet_pt binning: 10 , 0.0 , 10.0 name: truth_jet1_pt title: type: float + merge_underflow_overflow: both definition: particle_jet1_pt binning: 10 , 0.0 , 10.0 name: truth_jet2_pt title: type: float + merge_underflow_overflow: both definition: particle_jet2_pt binning: 10 , 0.0 , 10.0 name: truth_jet3_pt title: type: float + merge_underflow_overflow: both definition: particle_jet3_pt binning: 10 , 0.0 , 10.0 name: truth_jet4_pt title: type: float + merge_underflow_overflow: both definition: particle_jet4_pt binning: 10 , 0.0 , 10.0 name: truth_el_pt title: type: float + merge_underflow_overflow: both definition: particle_el_pt binning: 0.000000,2.000000,5.000000,7.000000,8.000000,10.000000,15.000000 Custom defines: diff --git a/test/reference_files/config_reading/reference_nested_anchors.txt b/test/reference_files/config_reading/reference_nested_anchors.txt index 5ccf7c0c8f5fb62934542d3346d524afc9f1235c..b5e8809984cf6670b17e94ab7ae245ae99e89148 100644 --- a/test/reference_files/config_reading/reference_nested_anchors.txt +++ b/test/reference_files/config_reading/reference_nested_anchors.txt @@ -10,7 +10,7 @@ General block: max_event: -1 --split_n_jobs: -1 --job_index: -1 - xSectionFiles: ['test/data/PMGxsecDB_mc16.txt', 'test/data/PMGxsecDB_mc23.txt'] + xSectionFiles: ['mc20a,mc20d,mc20e : test/data/PMGxsecDB_mc16.txt', 'mc23a,mc23d,mc23e : test/data/PMGxsecDB_mc23.txt'] cap_acceptance_selection: True config_define_after_custom_class: False use_region_subfolders: True @@ -43,6 +43,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 40 , 0.0 , 200.0 @@ -51,6 +52,7 @@ Regions block: type: int definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -62,6 +64,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 40 , 0.0 , 200.0 @@ -70,6 +73,7 @@ Regions block: type: int definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -81,6 +85,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 40 , 0.0 , 200.0 @@ -89,6 +94,7 @@ Regions block: type: int definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -97,6 +103,7 @@ Regions block: type: double definition: met_met_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 0.000000,20000.000000,40000.000000,60000.000000,80000.000000,140000.000000,250000.000000 @@ -108,6 +115,7 @@ Regions block: type: float definition: el1_pt_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 40 , 0.0 , 200.0 @@ -116,6 +124,7 @@ Regions block: type: int definition: el1_eta_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 60 , -3.0 , 3.0 @@ -124,6 +133,7 @@ Regions block: type: double definition: met_met_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 0.000000,20000.000000,40000.000000,60000.000000,80000.000000,140000.000000,250000.000000 @@ -132,6 +142,7 @@ Regions block: type: float definition: met_met_NOSYS is_nominal_only: False + merge_underflow_overflow: none binning: 0.000000,20000.000000,40000.000000,60000.000000,80000.000000,140000.000000,250000.000000 diff --git a/test/reference_files/configs_root_files_comparison/config.yml b/test/reference_files/configs_root_files_comparison/config.yml index b945d945e9e19565fe834dbbb7ba42af4de145fc..a1babb9f0dce8503ee22089cb8c95142629ce19e 100644 --- a/test/reference_files/configs_root_files_comparison/config.yml +++ b/test/reference_files/configs_root_files_comparison/config.yml @@ -7,8 +7,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] -# xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"]# xsection_files: ["/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt","/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc23.txt"] # custom_frame_name: CustomFrame automatic_systematics: True nominal_only: False @@ -108,6 +111,9 @@ regions: min: 0 max: 1 number_of_bins: 100 + profile: + - x: met_met + y: signal_score - name: "Muon" selection: "mu_pt_NOSYS[0] > 30000" diff --git a/test/reference_files/configs_root_files_comparison/output_histograms/Data.root b/test/reference_files/configs_root_files_comparison/output_histograms/Data.root deleted file mode 100644 index 6c7a8ffdb3cad0e49cccc85792f4709f765cbc6c..0000000000000000000000000000000000000000 --- a/test/reference_files/configs_root_files_comparison/output_histograms/Data.root +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c0514b22dd4b17e21c0fb3958ca2cae7e0b64320a5925a544f338d060780173 -size 7670 diff --git a/test/reference_files/configs_root_files_comparison/output_histograms/Wjets.root b/test/reference_files/configs_root_files_comparison/output_histograms/Wjets.root deleted file mode 100644 index cb9553fb6778377e47b7096a0142459ecd1760c7..0000000000000000000000000000000000000000 --- a/test/reference_files/configs_root_files_comparison/output_histograms/Wjets.root +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43ad0ce9b5b95780f47e8ad55c313dca055fe804ec5563ebd439b9e1722330a6 -size 1074855 diff --git a/test/reference_files/configs_root_files_comparison/output_histograms/ttbar_FS.root b/test/reference_files/configs_root_files_comparison/output_histograms/ttbar_FS.root deleted file mode 100644 index 85f649b1d4b98e8bd4d3e9d3d09c1745a71fedfa..0000000000000000000000000000000000000000 --- a/test/reference_files/configs_root_files_comparison/output_histograms/ttbar_FS.root +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:106b0b4f3992898a6005d202c6d6bd0d69727c0a4407dbce2be56f356350caa6 -size 3369116 diff --git a/test/reference_files/configs_root_files_comparison/output_ntuples/Data_0_2017_data.root b/test/reference_files/configs_root_files_comparison/output_ntuples/Data_0_2017_data.root deleted file mode 100644 index 9b82e588c2d4962008ab096d47d28bb8c4a36e82..0000000000000000000000000000000000000000 --- a/test/reference_files/configs_root_files_comparison/output_ntuples/Data_0_2017_data.root +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:046ba0c71a61e1da3b95c9c6907b7aa1859cefa26bf14d6ae30883ac499ea053 -size 19369 diff --git a/test/reference_files/configs_run/config.yml b/test/reference_files/configs_run/config.yml index 9b02565502c0b20625b6c079bee822308a13d273..4929dcc3a464d36cf4340a929b4f4d1182673e88 100644 --- a/test/reference_files/configs_run/config.yml +++ b/test/reference_files/configs_run/config.yml @@ -7,7 +7,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"] # custom_frame_name: CustomFrame automatic_systematics: True nominal_only: False diff --git a/test/reference_files/configs_run/config_histogram_after_ntuple.yml b/test/reference_files/configs_run/config_histogram_after_ntuple.yml index c4b84038bf4c90a920c99012cefb25a49e29bfa4..9d24d760e773369810672c278d2b5140320a02ff 100644 --- a/test/reference_files/configs_run/config_histogram_after_ntuple.yml +++ b/test/reference_files/configs_run/config_histogram_after_ntuple.yml @@ -7,7 +7,11 @@ general: default_sumweights: "NOSYS" default_event_weights: "weight_mc_NOSYS * weight_beamspot * weight_pileup_NOSYS * weight_jvt_effSF_NOSYS * weight_btagSF_DL1dv01_FixedCutBEff_85_NOSYS * globalTriggerEffSF_NOSYS * weight_leptonSF_tight_NOSYS" default_reco_tree_name: "reco" - xsection_files: ["test/data/PMGxsecDB_mc16.txt","test/data/PMGxsecDB_mc23.txt"] + xsection_files: + - campaigns: ["mc20a", "mc20d", "mc20e"] + files: ["test/data/PMGxsecDB_mc16.txt"] + - campaigns: ["mc23a", "mc23d", "mc23e"] + files: ["test/data/PMGxsecDB_mc23.txt"] # custom_frame_name: CustomFrame automatic_systematics: True nominal_only: False diff --git a/test/reference_files/trex_configs/output/config_TRExFitter_test_inclusive_with_settings_file.config b/test/reference_files/trex_configs/output/config_TRExFitter_test_inclusive_with_settings_file.config index 16b4ee1d2c6db87ad9dee2b1445ce9bae1bc8dfd..7ec93a4420cce5195a37d17ea0142e5e52052fad 100644 --- a/test/reference_files/trex_configs/output/config_TRExFitter_test_inclusive_with_settings_file.config +++ b/test/reference_files/trex_configs/output/config_TRExFitter_test_inclusive_with_settings_file.config @@ -15,6 +15,7 @@ Job: "my_fit" % ------- % Fit: "fit" + CmeLabel: 13.6 FitBlind: True FitRegion: CRSR FitType: SPLUSB @@ -70,6 +71,12 @@ Region: "Muon_met_phi" % - SAMPLES - % % ----------- % +Sample: "ttll_syst_PDF_nominal" + HistoFile: ttll_FS + HistoFolderName: GEN_MUR10_MUF10_PDF93300 + Title: "ttll PDF nominal" + Type: GHOST + Sample: "ttbar_FS" FillColor: 50 HistoFile: ttbar_FS diff --git a/test/reference_files/trex_configs/output/config_TRExFitter_test_unfolding.config b/test/reference_files/trex_configs/output/config_TRExFitter_test_unfolding.config index 90dfe54fea9dceccc175700697a1908dd176195c..2dc89e564e6cae120c90da6f01939c46a5bb5d90 100644 --- a/test/reference_files/trex_configs/output/config_TRExFitter_test_unfolding.config +++ b/test/reference_files/trex_configs/output/config_TRExFitter_test_unfolding.config @@ -5,7 +5,7 @@ Job: "my_fit" AcceptancePath: /home/dubovsky/Analysis/4L/fastframes HistoChecks: NOCRASH - HistoPath: /home/dubovsky/Analysis/4L/fastframes + HistoPath: . ImageFormat: pdf Lumi: 1 MigrationPath: /home/dubovsky/Analysis/4L/fastframes @@ -72,7 +72,7 @@ UnfoldingSample: "ttbar_FS" FillColor: 3 LineColor: 3 MigrationFile: ttbar_FS - MigrationName: NOSYS/jet_pt_vs_parton_Ttbar_MC_t_afterFSR_pt + MigrationName: NOSYS/parton_Ttbar_MC_t_afterFSR_pt_vs_jet_pt SelectionEffFile: ttbar_FS SelectionEffName: NOSYS/selection_eff_parton_Ttbar_MC_t_afterFSR_pt Title: ttbar_FS diff --git a/test/reference_files/trex_configs/output/config_testing_unfolding.config b/test/reference_files/trex_configs/output/config_testing_unfolding.config index 7af709aeda6dadff0ba9fe4b9a27ac808eb05b8d..31365d290c6b2361386f8337e8be388af9ff54a9 100644 --- a/test/reference_files/trex_configs/output/config_testing_unfolding.config +++ b/test/reference_files/trex_configs/output/config_testing_unfolding.config @@ -5,7 +5,7 @@ Job: "my_fit" AcceptancePath: /home/dubovsky/Analysis/4L/fastframes HistoChecks: NOCRASH - HistoPath: /home/dubovsky/Analysis/4L/fastframes + HistoPath: . ImageFormat: pdf Lumi: 1 MigrationPath: /home/dubovsky/Analysis/4L/fastframes @@ -72,7 +72,7 @@ UnfoldingSample: "ttbar_FS" FillColor: 3 LineColor: 3 MigrationFile: ttbar_FS - MigrationName: NOSYS/jet_pt_vs_particle_truth_jet_pt + MigrationName: NOSYS/particle_truth_jet_pt_vs_jet_pt SelectionEffFile: ttbar_FS SelectionEffName: NOSYS/selection_eff_particle_truth_jet_pt Title: ttbar_FS diff --git a/test/reference_files/trex_configs/trex_settings.yml b/test/reference_files/trex_configs/trex_settings.yml index 8e6caf61e232f26e6253af01660ddb15345de385..985e6c4684262bcf2ef559da54f2fcd3c4250074 100644 --- a/test/reference_files/trex_configs/trex_settings.yml +++ b/test/reference_files/trex_configs/trex_settings.yml @@ -19,6 +19,14 @@ excluded_systematics: ["JET_EffectiveNP_Statistical\\d", "JET_JER_DataVsMC_MC16" #selected_systematics: ["JET_JER_DataVsMC_MC16"] +CustomBlocks: + Sample: + - name: "ttll_syst_PDF_nominal" + Title: "ttll PDF nominal" + Type: GHOST + HistoFolderName: "GEN_MUR10_MUF10_PDF93300" + HistoFile: "ttll_FS" + Systematics: - name: "Luminosity" Title: "Luminosity"