diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6b5947c2321c78624b8c8866c8c64b72bdd68447..89be2bde806c2668e40d91a317a030bcc987f771 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,7 @@ variables: - BUILD_IMAGE: atlas/analysisbase:21.2.108 + BUILD_IMAGE: atlas/analysisbase:21.2.120 DATA_URL: https://dguest-ci.web.cern.ch/dguest-ci/ci/ftag/dumper/21.2.80 - + image: $BUILD_IMAGE diff --git a/BTagTrainingPreprocessing/CMakeLists.txt b/BTagTrainingPreprocessing/CMakeLists.txt index eed05682194b021a88a5d572ae409891c4fc3a39..f91d303161c7e7e3581eecea1b35bd86edbc6e2c 100644 --- a/BTagTrainingPreprocessing/CMakeLists.txt +++ b/BTagTrainingPreprocessing/CMakeLists.txt @@ -83,3 +83,8 @@ target_include_directories( dump-hbb PRIVATE src) atlas_add_executable( test-btagging util/test-btagging.cxx ) target_link_libraries( test-btagging dataset-dumper) target_include_directories( test-btagging PRIVATE src) + +atlas_add_executable( dump-htt + util/dump-htt.cxx util/SingleBTagOptions.cxx util/SingleBTagConfig.cxx) +target_link_libraries( dump-htt dataset-dumper) +target_include_directories( dump-htt PRIVATE src) diff --git a/BTagTrainingPreprocessing/src/BTagTrackWriter.cxx b/BTagTrainingPreprocessing/src/BTagTrackWriter.cxx index 5754e1ecb87f76ddde157d5a79e445e784f27e4d..712f13acdeab114e6836e5865a9cef857eab7391 100644 --- a/BTagTrainingPreprocessing/src/BTagTrackWriter.cxx +++ b/BTagTrainingPreprocessing/src/BTagTrackWriter.cxx @@ -9,6 +9,12 @@ #include "xAODTracking/TrackParticle.h" #include "xAODJet/Jet.h" +#include "FlavorTagDiscriminants/customGetter.h" + +///////////////////////////////////////////// +// Internal classes +///////////////////////////////////////////// + typedef std::function FloatFiller; class TrackConsumers: public H5Utils::Consumers {}; class TrackOutputWriter: public H5Utils::Writer<1,const TrackOutputs&> @@ -21,6 +27,27 @@ public: H5Utils::Writer<1,const TrackOutputs&>(file, name, cons, {{size}}) {} }; +// wrapper class to convert the things we have in +// FlavorTagDiscriminants into something that HDF5Utils can read. +template +class CustomSeqWrapper +{ + using SeqGetter = decltype( + FlavorTagDiscriminants::customSequenceGetter(std::declval())); +public: + CustomSeqWrapper(SeqGetter g): m_getter(g) { + } + T operator()(const TrackOutputs& t) { + auto output_vec = m_getter(*t.jet, {t.track}); + return output_vec.at(0); + } +private: + SeqGetter m_getter; +}; + +/////////////////////////////////// +// Class definition starts here +/////////////////////////////////// BTagTrackWriter::BTagTrackWriter( H5::Group& output_file, const BTagTrackWriterConfig& config): @@ -36,15 +63,16 @@ BTagTrackWriter::BTagTrackWriter( add_track_fillers(fillers, config.int_variables, -1); add_track_fillers(fillers, config.uchar_variables, 0); - // hard coded 4 momentum fillers - FloatFiller pt = [](const TrackOutputs& trk) -> float { - return trk.track->pt(); - }; - fillers.add("pt", pt, NAN); - FloatFiller eta = [](const TrackOutputs& trk) -> float { - return trk.track->eta(); - }; - fillers.add("eta", eta, NAN); + // use custom variables from the b-tagging inference code in + // FlavorTagDiscriminants + for (const std::string& name: config.flavortagdiscriminants_sequences) { + auto getter = FlavorTagDiscriminants::customSequenceGetter(name); + // note that within FlavorTagDiscriminants all the floats are + // double we're truncating here because that precision is + // probably not needed + CustomSeqWrapper wrapped(getter); + fillers.add(name, std::function(wrapped)); + } add_rel_jet_kinematics(fillers); diff --git a/BTagTrainingPreprocessing/src/BTagTrackWriterConfig.hh b/BTagTrainingPreprocessing/src/BTagTrackWriterConfig.hh index 8fd5c977d952096cd6f06e42adffdf6e0a039d70..76693c4aa3016609496e54f0b223f4d8bf5e5fee 100644 --- a/BTagTrainingPreprocessing/src/BTagTrackWriterConfig.hh +++ b/BTagTrainingPreprocessing/src/BTagTrackWriterConfig.hh @@ -13,6 +13,8 @@ struct BTagTrackWriterConfig { std::vector int_variables; std::vector float_variables; std::vector double_variables; + // these variables come out of customGetter in FlavorTagDiscriminants + std::vector flavortagdiscriminants_sequences; }; diff --git a/BTagTrainingPreprocessing/util/SingleBTagConfig.cxx b/BTagTrainingPreprocessing/util/SingleBTagConfig.cxx index 917fa1ecf752a70a51afb98c94e956dbf7f7b473..459ff180beb25ac66a6bc56a6f3bed5c1528d069 100644 --- a/BTagTrainingPreprocessing/util/SingleBTagConfig.cxx +++ b/BTagTrainingPreprocessing/util/SingleBTagConfig.cxx @@ -18,6 +18,9 @@ namespace { if (val == "d0_significance") { return TrackSortOrder::D0_SIGNIFICANCE; } + if (val == "pt") { + return TrackSortOrder::PT; + } throw std::logic_error("sort order '" + val + "' not recognized"); } } diff --git a/BTagTrainingPreprocessing/util/SingleBTagConfig.hh b/BTagTrainingPreprocessing/util/SingleBTagConfig.hh index 30e60c5b30cc8a8613d061fde4474bcdfc1c80fc..271cbd0ed0538a3be31dd5c3c087feac41b3ab13 100644 --- a/BTagTrainingPreprocessing/util/SingleBTagConfig.hh +++ b/BTagTrainingPreprocessing/util/SingleBTagConfig.hh @@ -6,7 +6,8 @@ #include #include "TrackSelectorConfig.hh" -enum class TrackSortOrder {ABS_D0_SIGNIFICANCE, ABS_D0, D0_SIGNIFICANCE}; +enum class TrackSortOrder { + ABS_D0_SIGNIFICANCE, ABS_D0, D0_SIGNIFICANCE, PT}; typedef std::map> VariableList; diff --git a/BTagTrainingPreprocessing/util/dump-hbb.cxx b/BTagTrainingPreprocessing/util/dump-hbb.cxx index ffec61dd18e3d9b37ffa14fb478ddd4e4a37a889..005b04cb55dabc71b3990d20f6376f3da3dc7673 100644 --- a/BTagTrainingPreprocessing/util/dump-hbb.cxx +++ b/BTagTrainingPreprocessing/util/dump-hbb.cxx @@ -39,6 +39,9 @@ // HDF includes #include "H5Cpp.h" +// json +#include "nlohmann/json.hpp" + // AnalysisBase tool include(s): #include "xAODRootAccess/Init.h" #include "xAODRootAccess/TEvent.h" @@ -108,13 +111,6 @@ std::vector get(const MapOfLists& v, const std::string& k) { const double GeV = 1000; -std::string get_json_metadata(unsigned long long n_jets, - unsigned long long n_broken_links) { - return "{\"n_jets\": " + std::to_string(n_jets) + ", " - + "\"n_jets_with_broken_track_links\": " - + std::to_string(n_broken_links) + "}"; -} - int main (int argc, char *argv[]) { namespace ftd = FlavorTagDiscriminants; @@ -247,6 +243,7 @@ int main (int argc, char *argv[]) // keep track of n written jets unsigned long long n_jets_written = 0; + unsigned long long n_skipped_files = 0; // keep track of broken links BTagInputChecker input_checker; @@ -259,7 +256,14 @@ int main (int argc, char *argv[]) std::unique_ptr ifile(TFile::Open(file.c_str(), "READ")); if ( ! ifile.get() || ifile->IsZombie()) { Error( APP_NAME, "Couldn't open file: %s", file.c_str() ); - return 1; + // sometimes the grid just doesn't return xrood files... ignore + // this and eat the missing stats. + if (std::regex_match(file, std::regex("root://.*"))) { + n_skipped_files++; + continue; + } else { + return 1; + } } Info( APP_NAME, "Opened file: %s", file.c_str() ); @@ -436,8 +440,13 @@ int main (int argc, char *argv[]) // save count metadata addMetadata(output, counts); - std::ofstream meta("userJobMetadata.json"); - meta << get_json_metadata(n_jets_written, n_broken_links); + nlohmann::json metadata; + metadata["n_jets"] = n_jets_written; + metadata["n_jets_with_broken_track_links"] = n_broken_links; + metadata["n_skipped_files"] = n_skipped_files; + metadata["n_files"] = opts.in.size(); + std::ofstream metastream("userJobMetadata.json"); + metastream << metadata.dump(2); return 0; } diff --git a/BTagTrainingPreprocessing/util/dump-htt.cxx b/BTagTrainingPreprocessing/util/dump-htt.cxx new file mode 100644 index 0000000000000000000000000000000000000000..4505a0e5d15950d47673cad0f8de0ef4934ec9d6 --- /dev/null +++ b/BTagTrainingPreprocessing/util/dump-htt.cxx @@ -0,0 +1,193 @@ +#include +#include +#include + +#include "SingleBTagConfig.hh" +#include "SingleBTagOptions.hh" + +#include "BTaggingWriterConfiguration.hh" +#include "BTagJetWriterConfig.hh" +#include "BTagTrackWriterConfig.hh" +#include "BTagJetWriter.hh" +#include "BTagTrackWriter.hh" + +#include "xAODRootAccess/Init.h" +#include "xAODRootAccess/tools/ReturnCheck.h" + +#include "FlavorTagDiscriminants/BTagTrackAugmenter.h" + +#include "xAODRootAccess/TEvent.h" +#include "xAODEventInfo/EventInfo.h" +#include "xAODTruth/TruthParticleContainer.h" +#include "xAODJet/JetContainer.h" +#include "xAODTracking/TrackParticleContainer.h" + +#include "H5Cpp.h" +#include "nlohmann/json.hpp" + +#include "TFile.h" +#include "TTree.h" + +#include + +namespace { + + // sort functions (note that these have to be redefined for HTT) + bool by_d0(const xAOD::TrackParticle* t1, + const xAOD::TrackParticle* t2) { + static SG::AuxElement::ConstAccessor d0("btag_ip_d0"); + return std::abs(d0(*t1)) > std::abs(d0(*t2)); + } + bool by_sd0(const xAOD::TrackParticle* t1, + const xAOD::TrackParticle* t2) { + static SG::AuxElement::ConstAccessor d0("btag_ip_d0"); + static SG::AuxElement::ConstAccessor d0s("btag_ip_d0_sigma"); + return std::abs(d0(*t1) / d0s(*t1)) > std::abs(d0(*t2) / d0s(*t2)); + } + bool by_signed_d0(const xAOD::TrackParticle* t1, + const xAOD::TrackParticle* t2) { + static SG::AuxElement::ConstAccessor d0_signed("IP3D_signed_d0_significance"); + return d0_signed(*t1) > d0_signed(*t2); + } + bool by_pt_descending(const xAOD::TrackParticle* t1, + const xAOD::TrackParticle* t2) { + return t1->pt() > t2->pt(); + } + + typedef bool (*TrackSort)(const xAOD::TrackParticle* t1, + const xAOD::TrackParticle* t2); + TrackSort trackSort(TrackSortOrder order) { + switch(order) { + case TrackSortOrder::ABS_D0_SIGNIFICANCE: return &by_sd0; + case TrackSortOrder::ABS_D0: return &by_d0; + case TrackSortOrder::D0_SIGNIFICANCE: return &by_signed_d0; + case TrackSortOrder::PT: return &by_pt_descending; + default: throw std::logic_error("undefined sort order"); + } + } + + std::vector get(const VariableList& v, const std::string& k) { + if (v.count(k)) return v.at(k); + return {}; + } + +} + +int main (int argc, char *argv[]) { + SingleTagIOOpts opts = get_single_tag_io_opts(argc, argv); + const SingleBTagConfig jobcfg = get_singlebtag_config(opts.config_file_name); + // The name of the application: + const char *const APP_NAME = "BTagTestDumper"; + + // Set up the environment: + RETURN_CHECK( APP_NAME, xAOD::Init() ); + + // Set up the event object: + xAOD::TEvent event(xAOD::TEvent::kClassAccess); + + // configure sort function + TrackSort track_sort = trackSort(jobcfg.track_sort_order); + + // setup output + H5::H5File output(opts.out, H5F_ACC_TRUNC); + + // Set up jet writer. Note that a lot of variables aren't set for + // HTT, since there's no b-tagging object to read from. See + // dump-single-btag for more things you can read from the b-tagging + // object. + BTagJetWriterConfig jet_cfg; + jet_cfg.write_event_info = true; + jet_cfg.jet_int_variables = get(jobcfg.btag, "jet_int_variables"); + jet_cfg.jet_float_variables = get(jobcfg.btag,"jet_floats"); + jet_cfg.variable_maps.replace_with_defaults_checks = cfg::check_map_from(cfg::BTagDefaultsMap); + jet_cfg.variable_maps.rename = {}; // please don't use this :( + jet_cfg.name = "jets"; + + BTagJetWriter jet_writer(output, jet_cfg); + // set up track writer + BTagTrackWriterConfig track_cfg; + track_cfg.name = "tracks"; + track_cfg.uchar_variables = get(jobcfg.track, "uchar"); + track_cfg.int_variables = get(jobcfg.track, "ints"); + track_cfg.float_variables = get(jobcfg.track, "floats"); + track_cfg.flavortagdiscriminants_sequences + = get(jobcfg.track, "flavortagdiscriminants_sequences"); + track_cfg.output_size = {jobcfg.n_tracks_to_save}; + std::unique_ptr track_writer(nullptr); + if (opts.save_tracks && jobcfg.n_tracks_to_save > 0) { + track_writer.reset(new BTagTrackWriter(output, track_cfg)); + } + + BTagTrackAugmenter track_augmenter; + + size_t n_truth_records_read = 0; + size_t n_truth_record_errors = 0; + + // Loop over the specified files: + for (const std::string& file: opts.in) { + // Open the file: + std::unique_ptr ifile(TFile::Open(file.c_str(), "READ")); + if ( ! ifile.get() || ifile->IsZombie()) { + Error( APP_NAME, "Couldn't open file: %s", file.c_str() ); + return 1; + } + Info( APP_NAME, "Opened file: %s", file.c_str() ); + + // Connect the event object to it: + RETURN_CHECK( APP_NAME, event.readFrom(ifile.get()) ); + + // Loop over its events: + unsigned long long entries = event.getEntries(); + if (opts.max_events > 0) entries = std::min(opts.max_events, entries); + for (unsigned long long entry = 0; entry < entries; ++entry) { + + // Load the event: + if (event.getEntry(entry) < 0) { + Error( APP_NAME, "Couldn't load entry %lld from file: %s", + entry, file.c_str() ); + return 1; + } + + // Print some status: + if ( ! (entry % 500)) { + Info( APP_NAME, "Processing entry %lld / %lld", entry, entries ); + } + + const xAOD::JetContainer *jets = nullptr; + RETURN_CHECK( APP_NAME, event.retrieve(jets, jobcfg.jet_collection) ); + + const xAOD::TrackParticleContainer *tpc = nullptr; + RETURN_CHECK( APP_NAME, event.retrieve(tpc, "JetAssocHTTlikeTracks") ); + + const xAOD::EventInfo *event_info = nullptr; + RETURN_CHECK( APP_NAME, event.retrieve(event_info, "EventInfo") ); + + for (const xAOD::Jet *const jet : *jets) { + + // this is more important stuff + if (jet->pt() < jobcfg.pt_cut || std::abs(jet->eta()) > 2.5) { + continue; + } + jet_writer.write(*jet, event_info); + + if (track_writer) { + std::vector tracks; + for (const auto* track: *tpc){ + double dr = track->p4().DeltaR(jet->p4()); + if (dr < 0.4) tracks.push_back(track); + } + sort(tracks.begin(), tracks.end(), track_sort); + track_writer->write(tracks, *jet); + } + } + } + } + + nlohmann::json metadata; + metadata["n_truth_records_read"] = n_truth_records_read; + metadata["n_truth_record_errors"] = n_truth_record_errors; + std::ofstream metastream("userJobMetadata.json"); + metastream << metadata.dump(2); + + return 0; +} diff --git a/CMakeLists.txt b/CMakeLists.txt index ad96d28d9b238ffab6cadf3a0b597e8d37b08ad7..3c9791dcbe398701070796f909fe259029155b45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ # # Set the minimum required CMake version: -cmake_minimum_required( VERSION 3.2 FATAL_ERROR ) +cmake_minimum_required( VERSION 3.14 FATAL_ERROR ) find_package( AnalysisBase 21.2 REQUIRED ) # Set up CTest: diff --git a/configs/single-b-tag/HTTJets.json b/configs/single-b-tag/HTTJets.json new file mode 100644 index 0000000000000000000000000000000000000000..69e4e4f8cfdde49b6491316f7e72807eb9194c3c --- /dev/null +++ b/configs/single-b-tag/HTTJets.json @@ -0,0 +1,42 @@ +{ + "jet_collection": "AntiKt4EMTopoJets", + "do_calibration": "false", + "run_augmenters": "false", + "vr_cuts": "true", + "n_tracks_to_save": 40, + "pt_cut": 10000, + "track_sort_order": "pt", + "track_pt_minimum": 1000, + "track_d0_maximum": 1, + "track_z0_maximum": 1.5, + "nn_file_paths": [], + "variables": { + "btag": { + }, + "track": { + "uchar": [ + "numberOfInnermostPixelLayerHits", + "numberOfNextToInnermostPixelLayerHits", + "numberOfInnermostPixelLayerSharedHits", + "numberOfInnermostPixelLayerSplitHits", + "numberOfPixelHits", + "numberOfPixelHoles", + "numberOfPixelSharedHits", + "numberOfPixelSplitHits", + "numberOfSCTHits", + "numberOfSCTHoles", + "numberOfSCTSharedHits" + ], + "floats": [ + "chiSquared", + "numberDoF", + "phi", + "theta", + "qOverP" + ], + "flavortagdiscriminants_sequences": [ + "pt", "eta" + ] + } + } +} diff --git a/configs/single-b-tag/single-btag-track-variables.json b/configs/single-b-tag/single-btag-track-variables.json index 12ba94b23edb3ef5540f31153828206f09c6e1c5..d4f94234441b55f5da414467a076ef5ba74578c6 100644 --- a/configs/single-b-tag/single-btag-track-variables.json +++ b/configs/single-b-tag/single-btag-track-variables.json @@ -29,5 +29,6 @@ "phi", "theta", "qOverP" - ] + ], + "flavortagdiscriminants_sequences": ["pt", "eta"] }