From 6ceb177e513dade0d82c78be483854e0fb4b81f4 Mon Sep 17 00:00:00 2001 From: Jon Burr <jon.burr@cern.ch> Date: Thu, 11 Jul 2019 22:20:03 +0200 Subject: [PATCH 01/22] Initial commit --- .gitmodules | 3 + CMakeLists.txt | 52 +++ HDF5Writer/CMakeLists.txt | 29 ++ HDF5Writer/python/athena_without_athena | 1 + HDF5Writer/python/run.py | 14 + HDF5Writer/src/HDF5WriterAlg.cxx | 52 +++ HDF5Writer/src/HDF5WriterAlg.h | 44 +++ .../src/components/HDF5Writer_entries.cxx | 11 + HDF5Writer/src/components/HDF5Writer_load.cxx | 3 + HDF5Writer/util/hdf5-merge.cxx | 331 ++++++++++++++++++ HDF5Writer/version.cmake | 1 + 11 files changed, 541 insertions(+) create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 HDF5Writer/CMakeLists.txt create mode 160000 HDF5Writer/python/athena_without_athena create mode 100644 HDF5Writer/python/run.py create mode 100644 HDF5Writer/src/HDF5WriterAlg.cxx create mode 100644 HDF5Writer/src/HDF5WriterAlg.h create mode 100644 HDF5Writer/src/components/HDF5Writer_entries.cxx create mode 100644 HDF5Writer/src/components/HDF5Writer_load.cxx create mode 100644 HDF5Writer/util/hdf5-merge.cxx create mode 100644 HDF5Writer/version.cmake diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000000..7d006f3ee94c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "HDF5Writer/python/athena_without_athena"] + path = HDF5Writer/python/athena_without_athena + url = https://:@gitlab.cern.ch:8443/jburr/athena_without_athena.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000000..988426696aa9 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,52 @@ +# Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +# +# This is a template for a CMakeLists.txt file that can be used in a client +# project (work area) to set up building ATLAS packages against the configured +# release. +# + +# Set the minimum required CMake version: +cmake_minimum_required( VERSION 3.6 FATAL_ERROR ) + +# Make sure that all _ROOT variables *are* used when they are set. +if( POLICY CMP0074 ) + cmake_policy( SET CMP0074 NEW ) +endif() + +# If there's a directory called AtlasCMake in the project, +# and the user didn't specify AtlasCMake_DIR yet, then let's +# give it a default value. +if( IS_DIRECTORY ${CMAKE_SOURCE_DIR}/Build/AtlasCMake AND + NOT AtlasCMake_DIR AND NOT ENV{AtlasCMake_DIR} ) + set( AtlasCMake_DIR ${CMAKE_SOURCE_DIR}/Build/AtlasCMake ) +endif() + +# If there's a directory called AtlasLCG in the project, +# and the user didn't specify LCG_DIR yet, then let's +# give it a default value. +if( IS_DIRECTORY ${CMAKE_SOURCE_DIR}/Build/AtlasLCG AND + NOT LCG_DIR AND NOT ENV{LCG_DIR} ) + set( LCG_DIR ${CMAKE_SOURCE_DIR}/Build/AtlasLCG ) +endif() + +# Pick up a local version of the AtlasCMake code if it exists: +find_package( AtlasCMake QUIET ) + +# Find the project that we depend on: +find_package( AthAnalysis ) + +# Set up CTest: +atlas_ctest_setup() + +# Set up a work directory project: +atlas_project( WorkDir 21.2.80 + USE AthAnalysis 21.2.80 + FORTRAN ) + +# Set up the runtime environment setup script(s): +lcg_generate_env( SH_FILE ${CMAKE_BINARY_DIR}/${ATLAS_PLATFORM}/env_setup.sh ) +install( FILES ${CMAKE_BINARY_DIR}/${ATLAS_PLATFORM}/env_setup.sh + DESTINATION . ) + +# Set up CPack: +atlas_cpack_setup() diff --git a/HDF5Writer/CMakeLists.txt b/HDF5Writer/CMakeLists.txt new file mode 100644 index 000000000000..a521e459f9bb --- /dev/null +++ b/HDF5Writer/CMakeLists.txt @@ -0,0 +1,29 @@ +## automatically generated CMakeLists.txt file + +# Declare the package +atlas_subdir( HDF5Writer ) + +# Declare external dependencies ... default here is to include ROOT +find_package( ROOT COMPONENTS MathCore RIO Core Tree Hist ) +find_package( HDF5 1.10.1 REQUIRED COMPONENTS CXX C ) +find_package( Boost COMPONENTS program_options REQUIRED ) + +message("HDF5 includes: ${HDF5_INCLUDE_DIRS}") + +atlas_add_component( HDF5Writer src/components/*.cxx src/HDF5WriterAlg.cxx + NOCLIDDB + INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS} + LINK_LIBRARIES ${ROOT_LIBRARIES} ${HDF5_LIBRARIES} + AthAnalysisBaseCompsLib HDF5Utils xAODJet +) + +atlas_add_executable( hdf5-merge + util/hdf5-merge.cxx + INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} + LINK_LIBRARIES ${HDF5_LIBRARIES} Boost::program_options ) + +# Install python modules, joboptions, and share content +atlas_install_python_modules( + python/athena_without_athena + python/run.py + ) diff --git a/HDF5Writer/python/athena_without_athena b/HDF5Writer/python/athena_without_athena new file mode 160000 index 000000000000..961c6718203e --- /dev/null +++ b/HDF5Writer/python/athena_without_athena @@ -0,0 +1 @@ +Subproject commit 961c6718203ed457f3b56da89991b7adb7bc7bd3 diff --git a/HDF5Writer/python/run.py b/HDF5Writer/python/run.py new file mode 100644 index 000000000000..a2d41931b18c --- /dev/null +++ b/HDF5Writer/python/run.py @@ -0,0 +1,14 @@ +from athena_without_athena.core import basic_setup, run +from AthenaCommon.AppMgr import athAlgSeq +from AthenaCommon import CfgMgr +from AthenaCommon.AthenaCommonFlags import jobproperties as jps +import os +basic_setup() + +jps.AthenaCommonFlags.EvtMax=1000 +jps.AthenaCommonFlags.SkipEvents=0 +jps.AthenaCommonFlags.FilesInput=[os.getenv("ASG_TEST_FILE_MC")] + +athAlgSeq += CfgMgr.AthEventCounter(Frequency=1000) +athAlgSeq += CfgMgr.HDF5WriterAlg(HDF5Output="jetsTest.h5") +run() diff --git a/HDF5Writer/src/HDF5WriterAlg.cxx b/HDF5Writer/src/HDF5WriterAlg.cxx new file mode 100644 index 000000000000..f3b03ceb3419 --- /dev/null +++ b/HDF5Writer/src/HDF5WriterAlg.cxx @@ -0,0 +1,52 @@ +// HDF5Writer includes +#include "HDF5WriterAlg.h" + +HDF5WriterAlg::HDF5WriterAlg( const std::string& name, ISvcLocator* pSvcLocator ) : AthAnalysisAlgorithm( name, pSvcLocator ){ + declareProperty("HDF5Output", m_h5OutName="jets.h5", "The output file"); + declareProperty("JetContainer", m_jetContainer="AntiKt4EMTopoJets", "The input jet container"); +} + + +HDF5WriterAlg::~HDF5WriterAlg() {} + + +StatusCode HDF5WriterAlg::initialize() { + ATH_MSG_INFO ("Initializing " << name() << "..."); + // Prepare the output file + // H5F_ACC_TRUNC == RECREATE in ROOT + m_h5Out = H5::H5File(m_h5OutName, H5F_ACC_TRUNC); + + // Now make the list of consumers + H5Utils::Consumers<const xAOD::Jet&> consumers; + consumers.add<float>("pt", [] (const xAOD::Jet& j) { return j.pt(); }); + consumers.add<float>("eta", [] (const xAOD::Jet& j) { return j.eta(); }); + consumers.add<float>("phi", [] (const xAOD::Jet& j) { return j.phi(); }); + consumers.add<float>("mass", [] (const xAOD::Jet& j) { return j.m(); }); + consumers.add<int>("nConstituents", [] (const xAOD::Jet& j) { return j.numConstituents(); }); + + // Now make the writer + m_writer = std::make_unique<H5Utils::Writer<0, const xAOD::Jet&>>( + m_h5Out, "jets", consumers); + + return StatusCode::SUCCESS; +} + +StatusCode HDF5WriterAlg::finalize() { + ATH_MSG_INFO ("Finalizing " << name() << "..."); + m_h5Out.close(); + return StatusCode::SUCCESS; +} + +StatusCode HDF5WriterAlg::execute() { + ATH_MSG_DEBUG ("Executing " << name() << "..."); + const xAOD::JetContainer* jets(nullptr); + ATH_CHECK( evtStore()->retrieve(jets, m_jetContainer) ); + + std::size_t count = 0; + for (const xAOD::Jet* ijet : *jets) { + m_writer->fill(*ijet); + if (++count == 2) + break; + } + return StatusCode::SUCCESS; +} diff --git a/HDF5Writer/src/HDF5WriterAlg.h b/HDF5Writer/src/HDF5WriterAlg.h new file mode 100644 index 000000000000..9ee761373aa8 --- /dev/null +++ b/HDF5Writer/src/HDF5WriterAlg.h @@ -0,0 +1,44 @@ +#ifndef HDF5WRITER_HDF5WRITERALG_H +#define HDF5WRITER_HDF5WRITERALG_H 1 + +#include "AthAnalysisBaseComps/AthAnalysisAlgorithm.h" +#include "HDF5Utils/Writer.h" +#include "xAODJet/JetContainer.h" +#include "H5Cpp.h" + +#include <memory> + +//Example ROOT Includes +//#include "TTree.h" +//#include "TH1D.h" + + + +class HDF5WriterAlg: public ::AthAnalysisAlgorithm { + public: + HDF5WriterAlg( const std::string& name, ISvcLocator* pSvcLocator ); + virtual ~HDF5WriterAlg(); + + ///uncomment and implement methods as required + + //IS EXECUTED: + virtual StatusCode initialize(); //once, before any input is loaded + virtual StatusCode execute(); //per event + virtual StatusCode finalize(); //once, after all events processed + + private: + // Properties + /// The output file name + std::string m_h5OutName; + /// The input jet container + std::string m_jetContainer; + + // Internals + /// The output file + H5::H5File m_h5Out; + /// The writer + std::unique_ptr<H5Utils::Writer<0, const xAOD::Jet&>> m_writer; + +}; + +#endif //> !HDF5WRITER_HDF5WRITERALG_H diff --git a/HDF5Writer/src/components/HDF5Writer_entries.cxx b/HDF5Writer/src/components/HDF5Writer_entries.cxx new file mode 100644 index 000000000000..8b54ab96fb3d --- /dev/null +++ b/HDF5Writer/src/components/HDF5Writer_entries.cxx @@ -0,0 +1,11 @@ + +#include "GaudiKernel/DeclareFactoryEntries.h" + +#include "../HDF5WriterAlg.h" + +DECLARE_ALGORITHM_FACTORY( HDF5WriterAlg ) + +DECLARE_FACTORY_ENTRIES( HDF5Writer ) +{ + DECLARE_ALGORITHM( HDF5WriterAlg ); +} diff --git a/HDF5Writer/src/components/HDF5Writer_load.cxx b/HDF5Writer/src/components/HDF5Writer_load.cxx new file mode 100644 index 000000000000..6e69752267d3 --- /dev/null +++ b/HDF5Writer/src/components/HDF5Writer_load.cxx @@ -0,0 +1,3 @@ + +#include "GaudiKernel/LoadFactoryEntries.h" +LOAD_FACTORY_ENTRIES(HDF5Writer) diff --git a/HDF5Writer/util/hdf5-merge.cxx b/HDF5Writer/util/hdf5-merge.cxx new file mode 100644 index 000000000000..41365d596b1e --- /dev/null +++ b/HDF5Writer/util/hdf5-merge.cxx @@ -0,0 +1,331 @@ +#include "H5Cpp.h" +#include <boost/program_options.hpp> +#include <iostream> +#include <iomanip> + +/** + * A simple script to merge HDF5 files. + * + * This script is intended to read in a list of HDF5 files and create a new file + * with all datasets contained inside them concatenated along a particular axis. + */ + +namespace { + + std::ostream& operator<<(std::ostream& os, const H5::DataSet& ds) + { + os << os.fill() << ds.getObjName(); + return os; + } + + std::ostream& operator<<(std::ostream& os, const H5::Group& group) + { + std::size_t indent = os.width(); + os << os.fill() << group.getObjName() << " {" << std::endl; + for (std::size_t ii = 0; ii < group.getNumObjs(); ++ii) { + H5G_obj_t childType = group.getObjTypeByIdx(ii); + std::string childName = group.getObjnameByIdx(ii); + switch(childType) { + case H5G_GROUP: + os << std::setw(indent+2) << group.openGroup(childName) << std::endl; + break; + case H5G_DATASET: + os << std::setw(indent+2) << group.openDataSet(childName) << std::endl; + break; + default: + // For now do nothing with other types - maybe in the future rethink + // this? + break; + } + } + os << std::setw(indent) << os.fill() << "}"; + return os; + } + + struct MergeInstructions { + /// The files being merged must have exactly the same format. This means + /// that they must have the same group/dataset structure. + bool requireSameFormat; + /// The axis along which to extend datasets + int mergeAxis; + /// The chunk size, leave negative to use the chunk size of the first + /// dataset + int chunkSize; + }; + + void merge( + H5::DataSet& target, + const H5::DataSet& source, + const MergeInstructions& instructions) { + // Do these datasets hold the same type? + if (target.getDataType() != source.getDataType() ) + throw std::invalid_argument( + "Target and source datasets hold different types!"); + // Get the dataspaces + H5::DataSpace targetSpace = target.getSpace(); + H5::DataSpace sourceSpace = source.getSpace(); + if (!targetSpace.isSimple() || !sourceSpace.isSimple() ) + throw std::invalid_argument("Only simple dataspaces are understood!"); + // Make sure that the dataspaces have the same dimensions + int nDims = targetSpace.getSimpleExtentNdims(); + if (nDims != sourceSpace.getSimpleExtentNdims() ) + throw std::invalid_argument( + "Target and source dataspaces have different dimensions, " + + std::to_string(nDims) + " and " + + std::to_string(sourceSpace.getSimpleExtentNdims() ) + " respectively"); + if (nDims == 0) + // This is completely empty... Skip it? + return; + + // Now make sure that the extent matches + std::vector<hsize_t> targetDims(nDims, 0); + targetSpace.getSimpleExtentDims(targetDims.data() ); + std::vector<hsize_t> sourceDims(nDims, 0); + sourceSpace.getSimpleExtentDims(sourceDims.data() ); + + for (int ii = 0; ii < nDims; ++ii) { + // Skip the merge axis in this check + if (ii == instructions.mergeAxis) + continue; + if (targetDims.at(ii) != sourceDims.at(ii) ) + throw std::invalid_argument( + "Target and source databases dimensions differ on axis " + + std::to_string(ii) + ", " + std::to_string(targetDims.at(ii) ) + + " and " + std::to_string(sourceDims.at(ii) ) + " respectively"); + } + + // Getting here means that the datasets are consistent and can be merged! + // We have to tell the target dataset that it's going to be extended + std::vector<hsize_t> newDims = targetDims; + newDims.at(instructions.mergeAxis) += sourceDims.at(instructions.mergeAxis); + target.extend(newDims.data() ); + targetSpace.setExtentSimple(newDims.size(), newDims.data() ); + // Now select the target hyperslab + // Define the offset + std::vector<hsize_t> slabOffset(nDims, 0); + slabOffset.at(instructions.mergeAxis) += targetDims.at(instructions.mergeAxis); + targetSpace.selectNone(); + targetSpace.selectHyperslab( + H5S_SELECT_SET, + sourceDims.data(), //> count - i.e. the dimensions of the source + slabOffset.data() ); + // Select the source hyperslab - this should be the entire dataset + sourceSpace.selectAll(); + // Data location in memory + // I hope that there's a better way to do this but I haven't found it yet + std::size_t memorySize = + target.getDataType().getSize()*sourceSpace.getSimpleExtentNpoints(); + // This is the first time since learning C that I have *ever* had to call + // malloc. I'm expecting things to go very wrong. + struct SmartMalloc { + SmartMalloc(std::size_t size) : + data(malloc(size)) {} + ~SmartMalloc() { free(data); } + + void* data; + }; + SmartMalloc location(memorySize); + std::cout << "read from source" << std::endl << std::flush; + source.read(location.data, source.getDataType() ); + std::cout << "write to target" << std::endl << std::flush; + std::cout << "source space: " << sourceSpace.getSelectNpoints() << std::endl; + std::cout << "target space: " << targetSpace.getSelectNpoints() << std::endl; + + target.write(location.data, target.getDataType(), sourceSpace, targetSpace); + } + + void merge( + H5::Group& target, + const H5::Group& source, + const MergeInstructions& instructions) + { + // Check if this group is empty before we started + bool isEmpty = target.getNumObjs() == 0; + // Go through each child of the source group + for (hsize_t ii = 0; ii < source.getNumObjs(); ++ii) { + H5G_obj_t childType = source.getObjTypeByIdx(ii); + std::string childName = source.getObjnameByIdx(ii); + // Find the correct index in the target + hsize_t targetIdx = 0; + for (; targetIdx < target.getNumObjs(); ++targetIdx) + if (target.getObjnameByIdx(targetIdx) == childName) + break; + if (targetIdx != target.getNumObjs()) { + // Means we found it! + // Make sure these are the same type! + if (target.getObjTypeByIdx(targetIdx) != childType) + throw std::invalid_argument( + "Both target and source contain " + childName + + " but they have different types!"); + + switch (childType) { + case H5G_GROUP: + { + H5::Group g = target.openGroup(childName); + try { + merge( + g, + source.openGroup(childName), + instructions); + } + catch (...) { + std::cerr << "Encountered an error merging child " << childName << std::endl; + throw; + } + } + break; + case H5G_DATASET: + { + H5::DataSet d = target.openDataSet(childName); + try { + merge( + d, + source.openDataSet(childName), + instructions); + } + catch (...) { + std::cerr << "Encountered an error merging child " << childName << std::endl; + throw; + } + } + break; + default: + // Just skip everything else for now + break; + + } + } //> end if targetIdx found + else { + if (instructions.requireSameFormat && !isEmpty) + throw std::invalid_argument( + "Target and source have different formats!"); + // We need to make it + switch (childType) { + case H5G_GROUP: + { + H5::Group g = target.createGroup(childName); + try{ + merge( + g, + source.openGroup(childName), + instructions); + } + catch (...) { + std::cerr << "Encountered an error merging child " << childName << std::endl; + throw; + } + } + break; + case H5G_DATASET: + { + H5::DataSet sourceDS = source.openDataSet(childName); + H5::DataSpace sourceSpace = sourceDS.getSpace(); + // Get the new extent + std::vector<hsize_t> DSExtent(sourceSpace.getSimpleExtentNdims(), 0); + sourceSpace.getSimpleExtentDims(DSExtent.data() ); + // Set the merge axis to be 0 length to begin with + DSExtent.at(instructions.mergeAxis) = 0; + std::vector<hsize_t> maxDSExtent = DSExtent; + // Get the existing dataset creation properties + H5::DSetCreatPropList cList = sourceDS.getCreatePlist(); + if (instructions.chunkSize > 0) { + std::vector<hsize_t> chunks = DSExtent; + chunks.at(instructions.mergeAxis) = instructions.chunkSize; + cList.setChunk(chunks.size(), chunks.data() ); + } + // Set the merge axis to be unlimited + maxDSExtent.at(instructions.mergeAxis) = -1; + H5::DataSpace space(DSExtent.size(), DSExtent.data(), maxDSExtent.data()); + // This does nothing with the acc property list because I don't know + // what it is + H5::DataSet d = target.createDataSet( + childName, sourceDS.getDataType(), space, cList); + + try { + merge( + d, + sourceDS, + instructions); + } + catch (...) { + std::cerr << "Encountered an error merging child " << childName << std::endl; + throw; + } + } + break; + default: + // Just skip everything else for now + break; + } + } //> end else (target not found) + } //> end loop over children + // TODO - this did no check to see if target contained something source + // didn't, this is probably fine though. + } //> end function merge(group) + +} //> end anonymous namespace + +int main(int argc, char* argv[]) { + // The options + std::string outputFile; + std::vector<std::string> inputFiles; + MergeInstructions instructions{true, 0, -1}; + + namespace po = boost::program_options; + po::options_description desc("Allowed options"); + desc.add_options() + ("output,o", po::value(&outputFile)->required(), "The output file.") + ("allowDifferentFormats", po::bool_switch(&instructions.requireSameFormat), + "Allow input files to have different formats.") + ("mergeAxis,a", po::value(&instructions.mergeAxis), + "The axis along which to merge datasets") + ("chunkSize,s", po::value(&instructions.chunkSize), + "The chunk size to use along the merge axis. If left negative uses the same chunks as the first input.") + ("help,h", "Print this message and exit."); + + po::options_description hidden; + hidden.add_options() + ("inputFiles", po::value(&inputFiles), "The input files"); + po::positional_options_description positional; + positional.add("inputFiles", -1); //> All positional arguments are input files + + po::variables_map vm; + po::options_description allOptions; + allOptions.add(desc).add(hidden); + + po::store( + po::command_line_parser(argc, argv). + options(allOptions). + positional(positional). + run(), + vm); + // Do help before notify - notify will verify input arguments which we don't + // want to do with help + if (vm.count("help") ) { + std::cout << "Merge HDF5 files. Usage:" << std::endl << std::endl; + std::cout << "hdf5-merge [options] input1 [input2 ...]" << std::endl << std::endl; + std::cout << desc << std::endl; + return 0; + } + po::notify(vm); + + if (inputFiles.size() == 0) { + std::cerr << "You must specify at least 1 input file!" << std::endl; + return 1; + } + + // Make the output file + H5::H5File fOut(outputFile, H5F_ACC_EXCL); + // Loop over the input files and merge them + for (const std::string& inName : inputFiles) { + H5::H5File fIn(inName, H5F_ACC_RDONLY); + merge(fOut, fIn, instructions); + } + + +/* // For now let's just look at one file */ +/* H5::H5File fIn(inputFiles.at(0), H5F_ACC_RDONLY); */ + + + return 0; +} diff --git a/HDF5Writer/version.cmake b/HDF5Writer/version.cmake new file mode 100644 index 000000000000..a07c22463f27 --- /dev/null +++ b/HDF5Writer/version.cmake @@ -0,0 +1 @@ +HDF5Writer-00-00-01 -- GitLab From ab9580e46d0ca9610c098e617f9f377176fbbf9e Mon Sep 17 00:00:00 2001 From: Jon Burr <jon.burr@cern.ch> Date: Thu, 11 Jul 2019 22:22:37 +0200 Subject: [PATCH 02/22] Remove debugging print outs --- HDF5Writer/util/hdf5-merge.cxx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/HDF5Writer/util/hdf5-merge.cxx b/HDF5Writer/util/hdf5-merge.cxx index 41365d596b1e..1228fd550b3c 100644 --- a/HDF5Writer/util/hdf5-merge.cxx +++ b/HDF5Writer/util/hdf5-merge.cxx @@ -125,11 +125,7 @@ namespace { void* data; }; SmartMalloc location(memorySize); - std::cout << "read from source" << std::endl << std::flush; source.read(location.data, source.getDataType() ); - std::cout << "write to target" << std::endl << std::flush; - std::cout << "source space: " << sourceSpace.getSelectNpoints() << std::endl; - std::cout << "target space: " << targetSpace.getSelectNpoints() << std::endl; target.write(location.data, target.getDataType(), sourceSpace, targetSpace); } -- GitLab From aa669abb94d5946f98f74722d302a18624405f47 Mon Sep 17 00:00:00 2001 From: Jon Burr <jon.burr@cern.ch> Date: Fri, 12 Jul 2019 00:57:43 +0200 Subject: [PATCH 03/22] Rearranging code to make it nicer to use and extend --- HDF5Writer/CMakeLists.txt | 12 +- HDF5Writer/HDF5Writer/DefaultMerger.h | 70 +++++++ HDF5Writer/HDF5Writer/H5Print.h | 21 ++ HDF5Writer/HDF5Writer/IH5Merger.h | 71 +++++++ HDF5Writer/HDF5Writer/MergeUtils.h | 75 +++++++ HDF5Writer/Root/DefaultMerger.cxx | 98 +++++++++ HDF5Writer/Root/H5Print.cxx | 34 +++ HDF5Writer/Root/IH5Merger.cxx | 22 ++ HDF5Writer/Root/MergeUtils.cxx | 185 +++++++++++++++++ HDF5Writer/util/hdf5-merge.cxx | 288 +++----------------------- 10 files changed, 611 insertions(+), 265 deletions(-) create mode 100644 HDF5Writer/HDF5Writer/DefaultMerger.h create mode 100644 HDF5Writer/HDF5Writer/H5Print.h create mode 100644 HDF5Writer/HDF5Writer/IH5Merger.h create mode 100644 HDF5Writer/HDF5Writer/MergeUtils.h create mode 100644 HDF5Writer/Root/DefaultMerger.cxx create mode 100644 HDF5Writer/Root/H5Print.cxx create mode 100644 HDF5Writer/Root/IH5Merger.cxx create mode 100644 HDF5Writer/Root/MergeUtils.cxx diff --git a/HDF5Writer/CMakeLists.txt b/HDF5Writer/CMakeLists.txt index a521e459f9bb..deebb2f9559e 100644 --- a/HDF5Writer/CMakeLists.txt +++ b/HDF5Writer/CMakeLists.txt @@ -8,7 +8,14 @@ find_package( ROOT COMPONENTS MathCore RIO Core Tree Hist ) find_package( HDF5 1.10.1 REQUIRED COMPONENTS CXX C ) find_package( Boost COMPONENTS program_options REQUIRED ) -message("HDF5 includes: ${HDF5_INCLUDE_DIRS}") +atlas_add_library( HDF5WriterLib + Root/DefaultMerger.cxx + Root/H5Print.cxx + Root/IH5Merger.cxx + Root/MergeUtils.cxx + PUBLIC_HEADERS HDF5Writer + INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} + LINK_LIBRARIES ${HDF5_LIBRARIES} ) atlas_add_component( HDF5Writer src/components/*.cxx src/HDF5WriterAlg.cxx NOCLIDDB @@ -19,8 +26,7 @@ atlas_add_component( HDF5Writer src/components/*.cxx src/HDF5WriterAlg.cxx atlas_add_executable( hdf5-merge util/hdf5-merge.cxx - INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} - LINK_LIBRARIES ${HDF5_LIBRARIES} Boost::program_options ) + LINK_LIBRARIES HDF5WriterLib Boost::program_options ) # Install python modules, joboptions, and share content atlas_install_python_modules( diff --git a/HDF5Writer/HDF5Writer/DefaultMerger.h b/HDF5Writer/HDF5Writer/DefaultMerger.h new file mode 100644 index 000000000000..d7e58f6c07a8 --- /dev/null +++ b/HDF5Writer/HDF5Writer/DefaultMerger.h @@ -0,0 +1,70 @@ +#ifndef HDF5Writer_DefaultMerger_H +#define HDF5Writer_DefaultMerger_H + +#include "HDF5Writer/IH5Merger.h" + +/** + * @file DefaultMerger + * + * The default merging implementation + */ + +namespace HDF5 { + /** + * @class Default H5 Merger + */ + class DefaultMerger : public IH5Merger { + public: + /** + * @brief Create the merger + * @param mergeAxis The axis to merge along + * @param chunkSize The chunk size to apply. If negative then the value + * found in the input datasets will be used. + * @param requireSameFormat Require all input files to have the same + * groups and datasets. + */ + DefaultMerger( + hsize_t mergeAxis = 0, + int chunkSize = -1, + bool requireSameFormat = true); + + ~DefaultMerger(); + + using IH5Merger::merge; + using IH5Merger::createFrom; + + /** + * @brief Merge a source group into a target group + * @param target The group to merge into + * @param source The group to merge from + */ + void merge(H5::Group& target, const H5::Group& source) override; + + /** + * @brief Merge a source dataset into a target dataset + * @param target The dataset to merge into + * @param source The dataset to merge from + */ + void merge(H5::DataSet& target, const H5::DataSet& source) override; + + + /** + * @brief Make a new dataset from information in a source dataset + * @param targetLocation Where the new dataset will be created + * @param source The dataset to use to create the new dataset + */ + H5::DataSet createFrom( + H5::H5Location& targetLocation, + const H5::DataSet& source) override; + + protected: + /// The axis to merge along + hsize_t m_mergeAxis; + /// The chunk size to apply + int m_chunkSize; + /// Whether to require the same group structure + bool m_requireSameFormat; + }; //> end class DefaultMerger +} //> end namespace HDF5 + +#endif //> !HDF5Writer_DefaultMerger_H diff --git a/HDF5Writer/HDF5Writer/H5Print.h b/HDF5Writer/HDF5Writer/H5Print.h new file mode 100644 index 000000000000..83e51765e5a5 --- /dev/null +++ b/HDF5Writer/HDF5Writer/H5Print.h @@ -0,0 +1,21 @@ +#ifndef HDF5Writer_H5Print_H +#define HDF5Writer_H5Print_H +#include <H5Cpp.h> +#include <iostream> + +/** + * @file H5Print.h + * + * Helper functions to print out basic information about H5 groups. + * To use, pull them into the namespace of your function with + * using namespace HDF5::Print; + * std::cout << h5File << std::endl; + */ + +namespace HDF5 { namespace Print { + /// Print information about a dataset + std::ostream& operator<<(std::ostream& os, const H5::DataSet& ds); + /// Print information about a group + std::ostream& operator<<(std::ostream& os, const H5::Group& group); +} } //> end namespace HDF5::Print +#endif //> !HDF5Writer_H5Print_H diff --git a/HDF5Writer/HDF5Writer/IH5Merger.h b/HDF5Writer/HDF5Writer/IH5Merger.h new file mode 100644 index 000000000000..6e709e25b239 --- /dev/null +++ b/HDF5Writer/HDF5Writer/IH5Merger.h @@ -0,0 +1,71 @@ +#ifndef HDF5Writer_IH5Merger_H +#define HDF5Writer_IH5Merger_H + +#include "H5Cpp.h" + +/** + * @file IH5Merger.h + * + * Provides a base class for H5Mergers + */ + +namespace HDF5 { + /** + * @class Base class for H5Mergers + * + * A merger is responsible for merging two H5 objects. + * + * This class could be extended to allow for links, etc + */ + class IH5Merger { + public: + virtual ~IH5Merger() = 0; + + /** + * @brief Merge a source file into a target file + * @param target The file to merge into + * @param source The file to merge from + * + * The default implementation provided here just forwards this to the + * group function. + */ + virtual void merge(H5::H5File& target, const H5::H5File& source); + + /** + * @brief Merge a source group into a target group + * @param target The group to merge into + * @param source The group to merge from + */ + virtual void merge(H5::Group& target, const H5::Group& source) = 0; + + /** + * @brief Merge a source dataset into a target dataset + * @param target The dataset to merge into + * @param source The dataset to merge from + */ + virtual void merge(H5::DataSet& target, const H5::DataSet& source) = 0; + + /** + * @brief Make a new group from information in a source group + * @param targetLocation Where the new group will be created + * @param source The group to use to create the new group + * + * The default implementation provided here just copies the source group's + * name then uses the merge function. + */ + virtual H5::Group createFrom( + H5::H5Location& targetLocation, + const H5::Group& source); + + /** + * @brief Make a new dataset from information in a source dataset + * @param targetLocation Where the new dataset will be created + * @param source The dataset to use to create the new dataset + */ + virtual H5::DataSet createFrom( + H5::H5Location& targetLocation, + const H5::DataSet& source) = 0; + }; //> end class +} //> end namespace HDF5 + +#endif //> !HDF5Writer_IH5Merger_H diff --git a/HDF5Writer/HDF5Writer/MergeUtils.h b/HDF5Writer/HDF5Writer/MergeUtils.h new file mode 100644 index 000000000000..85c42143e03b --- /dev/null +++ b/HDF5Writer/HDF5Writer/MergeUtils.h @@ -0,0 +1,75 @@ +#ifndef HDF5Writer_MergeUtils_H +#define HDF5Writer_MergeUtils_H + +#include "H5Cpp.h" +#include <string> + +/** + * @file MergeUtils + * + * Provides several helper functions for doing common parts of file merging. + */ + +namespace HDF5 { + /** + * @brief Make sure that two datasets can be merged. + * @param target The dataset to merge into + * @param source The dataset to merge from + * @param mergeAxis The axis to merged along. + * @return False if the datasets cannot be merged + */ + bool checkDatasetsToMerge( + const H5::DataSet& target, + const H5::DataSet& source, + hsize_t mergeAxis); + + /** + * @brief Make sure that two datasets can be merged. + * @param target The dataset to merge into + * @param source The dataset to merge from + * @param mergeAxis The axis to merged along. + * @param[out] errMsg If the datasets cannot be merged, fill this string with + * an explanation + * @return False if the datasets cannot be merged + */ + bool checkDatasetsToMerge( + const H5::DataSet& target, + const H5::DataSet& source, + hsize_t mergeAxis, + std::string& errMsg); + + /** + * @brief Merge two datasets + * @param target The dataset to merge into + * @param source The dataset to merge from + * @param mergeAxis The axis to merged along. + * + * Note that this does nothing to dataset attributes + */ + void mergeDatasets( + H5::DataSet& target, + const H5::DataSet& source, + hsize_t mergeAxis); + + /** + * @brief Make a new dataset using the properties of another + * @param targetLocation The location to place the new dataset + * @param source The dataset to create from + * @param mergeAxis The axis to merge along + * @param chunkSize The chunk size to use. If negative then the chunk size + * from the source is used. + * @param mergeExtent The maximum extent to allow along the merge axis. -1 + * means unlimited. + * + * This will not merge the source dataset into the new one! + */ + H5::DataSet createDataSet( + H5::H5Location& targetLocation, + const H5::DataSet& source, + hsize_t mergeAxis, + int chunkSize = -1, + int mergeExtent = -1); + +} //> end namespace HDF5 + +#endif //> !HDF5Writer_MergeUtils_H diff --git a/HDF5Writer/Root/DefaultMerger.cxx b/HDF5Writer/Root/DefaultMerger.cxx new file mode 100644 index 000000000000..3fa9d5e3d5de --- /dev/null +++ b/HDF5Writer/Root/DefaultMerger.cxx @@ -0,0 +1,98 @@ +#include "HDF5Writer/DefaultMerger.h" +#include "HDF5Writer/MergeUtils.h" +#include <exception> +#include <iostream> + +namespace HDF5 { + + DefaultMerger::DefaultMerger( + hsize_t mergeAxis, + int chunkSize, + bool requireSameFormat) : + m_mergeAxis(mergeAxis), + m_chunkSize(chunkSize), + m_requireSameFormat(requireSameFormat) {} + + DefaultMerger::~DefaultMerger() {} + + void DefaultMerger::merge( + H5::Group& target, + const H5::Group& source) + { + // Check if this group was empty before we started + bool isEmpty = target.getNumObjs() == 0; + + // Iterate through each child of the source group + for (hsize_t ii = 0; ii < source.getNumObjs(); ++ii) { + H5G_obj_t childType = source.getObjTypeByIdx(ii); + std::string childName = source.getObjnameByIdx(ii); + // Find the correct index in the target + hsize_t targetIdx = 0; + for (; targetIdx < target.getNumObjs(); ++targetIdx) + if (target.getObjnameByIdx(targetIdx) == childName) + break; + bool found = targetIdx != target.getNumObjs(); + if (found) { + // Make sure these are the same type! + if (target.getObjTypeByIdx(targetIdx) != childType) + throw std::invalid_argument( + "Both target and source contain " + childName + + " but they have different types!"); + } + else if (m_requireSameFormat && !isEmpty) { + throw std::invalid_argument( + "Target and source have different formats!"); + } + switch (childType) { + case H5G_GROUP: + { + H5::Group sg = source.openGroup(childName); + H5::Group tg = found ? + target.openGroup(childName) : + createFrom(target, sg); + try { + merge(tg, sg); + } + catch (...) { + std::cerr << "Encountered an error merging child " << childName << std::endl; + throw; + } + } + break; + case H5G_DATASET: + { + H5::DataSet sd = source.openDataSet(childName); + H5::DataSet td = found ? + target.openDataSet(childName) : + createFrom(target, sd); + try { + merge(td, sd); + } + catch (...) { + std::cerr << "Encountered an error merging child " << childName << std::endl; + throw; + } + } + break; + default: + break; + } + } //> end loop over children + // TODO - this did no check to see if target contained something source + // didn't, this is probably fine though. + } //> end function merge(group) + + void DefaultMerger::merge( + H5::DataSet& target, + const H5::DataSet& source) + { + mergeDatasets(target, source, m_mergeAxis); + } + + H5::DataSet DefaultMerger::createFrom( + H5::H5Location& targetLocation, + const H5::DataSet& source) + { + return createDataSet(targetLocation, source, m_mergeAxis, m_chunkSize); + } +} //> end namespace HDF5 diff --git a/HDF5Writer/Root/H5Print.cxx b/HDF5Writer/Root/H5Print.cxx new file mode 100644 index 000000000000..277b43b854f3 --- /dev/null +++ b/HDF5Writer/Root/H5Print.cxx @@ -0,0 +1,34 @@ +#include "HDF5Writer/H5Print.h" +#include <iomanip> + +namespace HDF5 { namespace Print { + std::ostream& operator<<(std::ostream& os, const H5::DataSet& ds) + { + os << os.fill() << ds.getObjName(); + return os; + } + + std::ostream& operator<<(std::ostream& os, const H5::Group& group) + { + std::size_t indent = os.width(); + os << os.fill() << group.getObjName() << " {" << std::endl; + for (std::size_t ii = 0; ii < group.getNumObjs(); ++ii) { + H5G_obj_t childType = group.getObjTypeByIdx(ii); + std::string childName = group.getObjnameByIdx(ii); + switch(childType) { + case H5G_GROUP: + os << std::setw(indent+2) << group.openGroup(childName) << std::endl; + break; + case H5G_DATASET: + os << std::setw(indent+2) << group.openDataSet(childName) << std::endl; + break; + default: + // For now do nothing with other types - maybe in the future rethink + // this? + break; + } + } + os << std::setw(indent) << os.fill() << "}"; + return os; + } +} } //> end namespace HDF5::Print diff --git a/HDF5Writer/Root/IH5Merger.cxx b/HDF5Writer/Root/IH5Merger.cxx new file mode 100644 index 000000000000..d9d357bd1df0 --- /dev/null +++ b/HDF5Writer/Root/IH5Merger.cxx @@ -0,0 +1,22 @@ +#include "HDF5Writer/IH5Merger.h" + +namespace HDF5 { + + IH5Merger::~IH5Merger() {} + + void IH5Merger::merge(H5::H5File& target, const H5::H5File& source) + { + merge( + static_cast<H5::Group&>(target), + static_cast<const H5::Group&>(source) ); + } + + H5::Group IH5Merger::createFrom( + H5::H5Location& targetLocation, + const H5::Group& source) + { + H5::Group newGroup = targetLocation.createGroup(source.getObjName() ); + merge(newGroup, source); + return newGroup; + } +} //> end namespace HDF5 diff --git a/HDF5Writer/Root/MergeUtils.cxx b/HDF5Writer/Root/MergeUtils.cxx new file mode 100644 index 000000000000..215cfee48209 --- /dev/null +++ b/HDF5Writer/Root/MergeUtils.cxx @@ -0,0 +1,185 @@ +#include "HDF5Writer/MergeUtils.h" + +#include <vector> +#include <stdexcept> + +namespace HDF5 { + bool checkDatasetsToMerge( + const H5::DataSet& target, + const H5::DataSet& source, + hsize_t mergeAxis) + { + std::string sink; + return checkDatasetsToMerge(target, source, mergeAxis, sink); + } + + bool checkDatasetsToMerge( + const H5::DataSet& target, + const H5::DataSet& source, + hsize_t mergeAxis, + std::string& errMsg) + { + // Check that the datasets hold the same types + // Note that H5 *can* do type comparisons but this function assumes that we + // should only merge the same types + if (target.getDataType() != source.getDataType() ) { + errMsg = "Target and source datasets hold different types."; + return false; + } + + // Get the dataspaces + H5::DataSpace targetSpace = target.getSpace(); + H5::DataSpace sourceSpace = source.getSpace(); + if (!targetSpace.isSimple() || !sourceSpace.isSimple() ) { + errMsg = "Only simple dataspaces are understood."; + return false; + } + + // Make sure that the dataspaces have the same dimensions + int nDims = targetSpace.getSimpleExtentNdims(); + if (nDims != sourceSpace.getSimpleExtentNdims() ) { + errMsg = "Target and source dataspaces have different dimensions, " + + std::to_string(nDims) + " and " + + std::to_string(sourceSpace.getSimpleExtentNdims() ) + " respectively"; + return false; + } + + // Make sure that the merge axis fits in the dimension + if (nDims <= static_cast<int>(mergeAxis)) { + errMsg = "Dataset dimension " + std::to_string(nDims) + + " is not compatible with the merge axis " + + std::to_string(mergeAxis); + return false; + } + + // Now make sure that the extent matches + std::vector<hsize_t> targetDims(nDims, 0); + std::vector<hsize_t> maxTargetDims(nDims, 0); + targetSpace.getSimpleExtentDims(targetDims.data(), maxTargetDims.data() ); + std::vector<hsize_t> sourceDims(nDims, 0); + sourceSpace.getSimpleExtentDims(sourceDims.data() ); + + for (int ii = 0; ii < nDims; ++ii) { + // Skip the merge axis in this check + if (ii == static_cast<int>(mergeAxis) ) + continue; + if (targetDims.at(ii) != sourceDims.at(ii) ) { + errMsg = "Target and source databases dimensions differ on axis " + + std::to_string(ii) + ", " + std::to_string(targetDims.at(ii) ) + + " and " + std::to_string(sourceDims.at(ii) ) + " respectively"; + return false; + } + } + + // Check the maximum extent is sufficient + if (maxTargetDims.at(mergeAxis) < ( + targetDims.at(mergeAxis) + sourceDims.at(mergeAxis) ) ) { + errMsg = "Merged dataset will not fit into target dataset"; + return false; + } + + return true; + } //> end function checkDatasetsToMerge + + void mergeDatasets( + H5::DataSet& target, + const H5::DataSet& source, + hsize_t mergeAxis) + { + std::string errMsg; + if (!checkDatasetsToMerge(target, source, mergeAxis, errMsg) ) + throw std::invalid_argument(errMsg); + + // Get information about the target and source datasets + H5::DataSpace targetSpace = target.getSpace(); + H5::DataSpace sourceSpace = source.getSpace(); + int nDims = targetSpace.getSimpleExtentNdims(); + + // Now make sure that the extent matches + std::vector<hsize_t> targetDims(nDims, 0); + targetSpace.getSimpleExtentDims(targetDims.data() ); + std::vector<hsize_t> sourceDims(nDims, 0); + sourceSpace.getSimpleExtentDims(sourceDims.data() ); + + // Start by extending the target dataset + std::vector<hsize_t> newDims = targetDims; + newDims.at(mergeAxis) += sourceDims.at(mergeAxis); + target.extend(newDims.data() ); + targetSpace.setExtentSimple(newDims.size(), newDims.data() ); + + // Now define the target hyperslab. This is the part of the output dataset + // that the source dataset will be loaded into. A hyperslab is essentially a + // mask over the dataspace that restricts what dataset operations work on + + // The offset is where the slab starts. + std::vector<hsize_t> slabOffset(nDims, 0); + // Start from where it ended before + slabOffset.at(mergeAxis) += targetDims.at(mergeAxis); + // Make sure nothing else is selected + targetSpace.selectNone(); + targetSpace.selectHyperslab( + H5S_SELECT_SET, //> select these cells + sourceDims.data(), //> count - i.e. the dimensions of the source + slabOffset.data() ); + + // For the source, just select the whole thing + sourceSpace.selectAll(); + // note that this means we have to be able to hold the entire source dataset + // in memory! + // An alternative would be to do this chunk-by-chunk but I haven't figured + // that out yet. + + // We now have to allocate an area in memory for the whole source dataset to + // be loaded into. This means using malloc. In order to prevent memory leaks + // make something a little bit like a smart pointer. + struct SmartMalloc { + SmartMalloc(std::size_t size) : + data(malloc(size)) {} + ~SmartMalloc() { free(data); } + void* data; + }; + + // We need to know how much space to allocate. This is the size of the data + // type multiplied by the number of elements + std::size_t memSize = + source.getDataType().getSize() * sourceSpace.getSimpleExtentNpoints(); + SmartMalloc location(memSize); + + // Read into this location + source.read(location.data, source.getDataType() ); + // Write from this location + target.write(location.data, target.getDataType(), sourceSpace, targetSpace); + } + + H5::DataSet createDataSet( + H5::H5Location& targetLocation, + const H5::DataSet& source, + hsize_t mergeAxis, + int chunkSize, + int mergeExtent) + { + H5::DataSpace sourceSpace = source.getSpace(); + // Get the new extent + std::vector<hsize_t> DSExtent(sourceSpace.getSimpleExtentNdims(), 0); + sourceSpace.getSimpleExtentDims(DSExtent.data() ); + // Set the merge axis to be 0 length to begin with + DSExtent.at(mergeAxis) = 0; + std::vector<hsize_t> maxDSExtent = DSExtent; + maxDSExtent.at(mergeAxis) = mergeExtent; + + // Get the existing dataset creation properties + H5::DSetCreatPropList cList = source.getCreatePlist(); + if (chunkSize > 0) { + std::vector<hsize_t> chunks = DSExtent; + chunks.at(mergeAxis) = chunkSize; + cList.setChunk(chunks.size(), chunks.data() ); + } + + // Create the new space + H5::DataSpace space(DSExtent.size(), DSExtent.data(), maxDSExtent.data()); + // This does nothing with the acc property list because I don't know + // what it is + return targetLocation.createDataSet( + source.getObjName(), source.getDataType(), space, cList); + } +} //> end namespace HDF5 diff --git a/HDF5Writer/util/hdf5-merge.cxx b/HDF5Writer/util/hdf5-merge.cxx index 1228fd550b3c..bc3febf8e8f7 100644 --- a/HDF5Writer/util/hdf5-merge.cxx +++ b/HDF5Writer/util/hdf5-merge.cxx @@ -1,4 +1,5 @@ #include "H5Cpp.h" +#include <HDF5Writer/DefaultMerger.h> #include <boost/program_options.hpp> #include <iostream> #include <iomanip> @@ -10,273 +11,31 @@ * with all datasets contained inside them concatenated along a particular axis. */ -namespace { - - std::ostream& operator<<(std::ostream& os, const H5::DataSet& ds) - { - os << os.fill() << ds.getObjName(); - return os; - } - - std::ostream& operator<<(std::ostream& os, const H5::Group& group) - { - std::size_t indent = os.width(); - os << os.fill() << group.getObjName() << " {" << std::endl; - for (std::size_t ii = 0; ii < group.getNumObjs(); ++ii) { - H5G_obj_t childType = group.getObjTypeByIdx(ii); - std::string childName = group.getObjnameByIdx(ii); - switch(childType) { - case H5G_GROUP: - os << std::setw(indent+2) << group.openGroup(childName) << std::endl; - break; - case H5G_DATASET: - os << std::setw(indent+2) << group.openDataSet(childName) << std::endl; - break; - default: - // For now do nothing with other types - maybe in the future rethink - // this? - break; - } - } - os << std::setw(indent) << os.fill() << "}"; - return os; - } - - struct MergeInstructions { - /// The files being merged must have exactly the same format. This means - /// that they must have the same group/dataset structure. - bool requireSameFormat; - /// The axis along which to extend datasets - int mergeAxis; - /// The chunk size, leave negative to use the chunk size of the first - /// dataset - int chunkSize; - }; - - void merge( - H5::DataSet& target, - const H5::DataSet& source, - const MergeInstructions& instructions) { - // Do these datasets hold the same type? - if (target.getDataType() != source.getDataType() ) - throw std::invalid_argument( - "Target and source datasets hold different types!"); - // Get the dataspaces - H5::DataSpace targetSpace = target.getSpace(); - H5::DataSpace sourceSpace = source.getSpace(); - if (!targetSpace.isSimple() || !sourceSpace.isSimple() ) - throw std::invalid_argument("Only simple dataspaces are understood!"); - // Make sure that the dataspaces have the same dimensions - int nDims = targetSpace.getSimpleExtentNdims(); - if (nDims != sourceSpace.getSimpleExtentNdims() ) - throw std::invalid_argument( - "Target and source dataspaces have different dimensions, " + - std::to_string(nDims) + " and " + - std::to_string(sourceSpace.getSimpleExtentNdims() ) + " respectively"); - if (nDims == 0) - // This is completely empty... Skip it? - return; - - // Now make sure that the extent matches - std::vector<hsize_t> targetDims(nDims, 0); - targetSpace.getSimpleExtentDims(targetDims.data() ); - std::vector<hsize_t> sourceDims(nDims, 0); - sourceSpace.getSimpleExtentDims(sourceDims.data() ); - - for (int ii = 0; ii < nDims; ++ii) { - // Skip the merge axis in this check - if (ii == instructions.mergeAxis) - continue; - if (targetDims.at(ii) != sourceDims.at(ii) ) - throw std::invalid_argument( - "Target and source databases dimensions differ on axis " + - std::to_string(ii) + ", " + std::to_string(targetDims.at(ii) ) + - " and " + std::to_string(sourceDims.at(ii) ) + " respectively"); - } - - // Getting here means that the datasets are consistent and can be merged! - // We have to tell the target dataset that it's going to be extended - std::vector<hsize_t> newDims = targetDims; - newDims.at(instructions.mergeAxis) += sourceDims.at(instructions.mergeAxis); - target.extend(newDims.data() ); - targetSpace.setExtentSimple(newDims.size(), newDims.data() ); - // Now select the target hyperslab - // Define the offset - std::vector<hsize_t> slabOffset(nDims, 0); - slabOffset.at(instructions.mergeAxis) += targetDims.at(instructions.mergeAxis); - targetSpace.selectNone(); - targetSpace.selectHyperslab( - H5S_SELECT_SET, - sourceDims.data(), //> count - i.e. the dimensions of the source - slabOffset.data() ); - // Select the source hyperslab - this should be the entire dataset - sourceSpace.selectAll(); - // Data location in memory - // I hope that there's a better way to do this but I haven't found it yet - std::size_t memorySize = - target.getDataType().getSize()*sourceSpace.getSimpleExtentNpoints(); - // This is the first time since learning C that I have *ever* had to call - // malloc. I'm expecting things to go very wrong. - struct SmartMalloc { - SmartMalloc(std::size_t size) : - data(malloc(size)) {} - ~SmartMalloc() { free(data); } - - void* data; - }; - SmartMalloc location(memorySize); - source.read(location.data, source.getDataType() ); - - target.write(location.data, target.getDataType(), sourceSpace, targetSpace); - } - - void merge( - H5::Group& target, - const H5::Group& source, - const MergeInstructions& instructions) - { - // Check if this group is empty before we started - bool isEmpty = target.getNumObjs() == 0; - // Go through each child of the source group - for (hsize_t ii = 0; ii < source.getNumObjs(); ++ii) { - H5G_obj_t childType = source.getObjTypeByIdx(ii); - std::string childName = source.getObjnameByIdx(ii); - // Find the correct index in the target - hsize_t targetIdx = 0; - for (; targetIdx < target.getNumObjs(); ++targetIdx) - if (target.getObjnameByIdx(targetIdx) == childName) - break; - if (targetIdx != target.getNumObjs()) { - // Means we found it! - // Make sure these are the same type! - if (target.getObjTypeByIdx(targetIdx) != childType) - throw std::invalid_argument( - "Both target and source contain " + childName + - " but they have different types!"); - - switch (childType) { - case H5G_GROUP: - { - H5::Group g = target.openGroup(childName); - try { - merge( - g, - source.openGroup(childName), - instructions); - } - catch (...) { - std::cerr << "Encountered an error merging child " << childName << std::endl; - throw; - } - } - break; - case H5G_DATASET: - { - H5::DataSet d = target.openDataSet(childName); - try { - merge( - d, - source.openDataSet(childName), - instructions); - } - catch (...) { - std::cerr << "Encountered an error merging child " << childName << std::endl; - throw; - } - } - break; - default: - // Just skip everything else for now - break; - - } - } //> end if targetIdx found - else { - if (instructions.requireSameFormat && !isEmpty) - throw std::invalid_argument( - "Target and source have different formats!"); - // We need to make it - switch (childType) { - case H5G_GROUP: - { - H5::Group g = target.createGroup(childName); - try{ - merge( - g, - source.openGroup(childName), - instructions); - } - catch (...) { - std::cerr << "Encountered an error merging child " << childName << std::endl; - throw; - } - } - break; - case H5G_DATASET: - { - H5::DataSet sourceDS = source.openDataSet(childName); - H5::DataSpace sourceSpace = sourceDS.getSpace(); - // Get the new extent - std::vector<hsize_t> DSExtent(sourceSpace.getSimpleExtentNdims(), 0); - sourceSpace.getSimpleExtentDims(DSExtent.data() ); - // Set the merge axis to be 0 length to begin with - DSExtent.at(instructions.mergeAxis) = 0; - std::vector<hsize_t> maxDSExtent = DSExtent; - // Get the existing dataset creation properties - H5::DSetCreatPropList cList = sourceDS.getCreatePlist(); - if (instructions.chunkSize > 0) { - std::vector<hsize_t> chunks = DSExtent; - chunks.at(instructions.mergeAxis) = instructions.chunkSize; - cList.setChunk(chunks.size(), chunks.data() ); - } - // Set the merge axis to be unlimited - maxDSExtent.at(instructions.mergeAxis) = -1; - H5::DataSpace space(DSExtent.size(), DSExtent.data(), maxDSExtent.data()); - // This does nothing with the acc property list because I don't know - // what it is - H5::DataSet d = target.createDataSet( - childName, sourceDS.getDataType(), space, cList); - - try { - merge( - d, - sourceDS, - instructions); - } - catch (...) { - std::cerr << "Encountered an error merging child " << childName << std::endl; - throw; - } - } - break; - default: - // Just skip everything else for now - break; - } - } //> end else (target not found) - } //> end loop over children - // TODO - this did no check to see if target contained something source - // didn't, this is probably fine though. - } //> end function merge(group) - -} //> end anonymous namespace int main(int argc, char* argv[]) { // The options - std::string outputFile; + std::string outputFile = "merged.h5"; std::vector<std::string> inputFiles; - MergeInstructions instructions{true, 0, -1}; + hsize_t mergeAxis = 0; + int chunkSize = -1; + bool requireSameFormat = true; + bool overwrite = false; + bool inPlace = false; namespace po = boost::program_options; po::options_description desc("Allowed options"); desc.add_options() - ("output,o", po::value(&outputFile)->required(), "The output file.") - ("allowDifferentFormats", po::bool_switch(&instructions.requireSameFormat), + ("output,o", po::value(&outputFile), "The output file.") + ("allowDifferentFormats", po::bool_switch(&requireSameFormat), "Allow input files to have different formats.") - ("mergeAxis,a", po::value(&instructions.mergeAxis), + ("mergeAxis,a", po::value(&mergeAxis), "The axis along which to merge datasets") - ("chunkSize,s", po::value(&instructions.chunkSize), + ("chunkSize,s", po::value(&chunkSize), "The chunk size to use along the merge axis. If left negative uses the same chunks as the first input.") + ("overwrite,w", po::bool_switch(&overwrite), + "Overwrite the output file if it already exists. Cannot be set with 'in-place'") + ("in-place,p", po::bool_switch(&inPlace), + "The output file is modified in place. Cannot be set with 'overwrite'") ("help,h", "Print this message and exit."); po::options_description hidden; @@ -309,19 +68,24 @@ int main(int argc, char* argv[]) { std::cerr << "You must specify at least 1 input file!" << std::endl; return 1; } + if (overwrite && inPlace) { + std::cerr << "You cannot specify both overwrite and in-place!" << std::endl; + return 1; + } + + // Make the merger + HDF5::DefaultMerger merger(mergeAxis, chunkSize, requireSameFormat); // Make the output file - H5::H5File fOut(outputFile, H5F_ACC_EXCL); + H5::H5File fOut(outputFile, + overwrite ? H5F_ACC_TRUNC : (inPlace ? H5F_ACC_RDWR : H5F_ACC_EXCL) ); // Loop over the input files and merge them for (const std::string& inName : inputFiles) { H5::H5File fIn(inName, H5F_ACC_RDONLY); - merge(fOut, fIn, instructions); + std::cout << "Merging file " << inName << std::endl; + merger.merge(fOut, fIn); } -/* // For now let's just look at one file */ -/* H5::H5File fIn(inputFiles.at(0), H5F_ACC_RDONLY); */ - - return 0; } -- GitLab From 1956eff6d1c4ccb7a0d4d1b274fad246d2da7cab Mon Sep 17 00:00:00 2001 From: Jon Burr <jon.burr@cern.ch> Date: Fri, 12 Jul 2019 10:48:11 +0200 Subject: [PATCH 04/22] Updating the merging strategy This now allows for specifying a maximum buffer size to use. If the source dataset to use overflows this buffer it is split into 'rows' along the merge axis and merged iteratively. The buffer to use can be specified either in (machine) MBs or in rows. --- HDF5Writer/HDF5Writer/DefaultMerger.h | 12 +- HDF5Writer/HDF5Writer/MergeUtils.h | 22 +++- HDF5Writer/Root/DefaultMerger.cxx | 18 ++- HDF5Writer/Root/MergeUtils.cxx | 168 +++++++++++++++++++------- HDF5Writer/util/hdf5-merge.cxx | 30 ++++- 5 files changed, 199 insertions(+), 51 deletions(-) diff --git a/HDF5Writer/HDF5Writer/DefaultMerger.h b/HDF5Writer/HDF5Writer/DefaultMerger.h index d7e58f6c07a8..c253526414e7 100644 --- a/HDF5Writer/HDF5Writer/DefaultMerger.h +++ b/HDF5Writer/HDF5Writer/DefaultMerger.h @@ -22,11 +22,17 @@ namespace HDF5 { * found in the input datasets will be used. * @param requireSameFormat Require all input files to have the same * groups and datasets. + * @param bufferSize The maximum size of the buffer to use while merging + * datasets + * @param bufferInRows Whether the buffer size is specified in rows or + * bytes */ DefaultMerger( hsize_t mergeAxis = 0, int chunkSize = -1, - bool requireSameFormat = true); + bool requireSameFormat = true, + std::size_t bufferSize = -1, + bool bufferInRows = false); ~DefaultMerger(); @@ -64,6 +70,10 @@ namespace HDF5 { int m_chunkSize; /// Whether to require the same group structure bool m_requireSameFormat; + /// The size of the buffer + std::size_t m_bufferSize; + /// Whether to measure the buffer in bytes or rows + bool m_measureBufferInRows; }; //> end class DefaultMerger } //> end namespace HDF5 diff --git a/HDF5Writer/HDF5Writer/MergeUtils.h b/HDF5Writer/HDF5Writer/MergeUtils.h index 85c42143e03b..fa80db9713e0 100644 --- a/HDF5Writer/HDF5Writer/MergeUtils.h +++ b/HDF5Writer/HDF5Writer/MergeUtils.h @@ -43,13 +43,19 @@ namespace HDF5 { * @param target The dataset to merge into * @param source The dataset to merge from * @param mergeAxis The axis to merged along. + * @param bufferSize The maximum size of the buffer to use. Take care when + * setting this, if it is too large then the job may run into memory issues! + * This size is measured in bytes. * - * Note that this does nothing to dataset attributes + * Note that this does nothing to dataset attributes. This function ignores + * the chunking of the source and target datasets, only splitting up the + * source dataset along the merge axis. */ void mergeDatasets( H5::DataSet& target, const H5::DataSet& source, - hsize_t mergeAxis); + hsize_t mergeAxis, + std::size_t bufferSize = -1); /** * @brief Make a new dataset using the properties of another @@ -70,6 +76,18 @@ namespace HDF5 { int chunkSize = -1, int mergeExtent = -1); + /** + * @brief Calculate the size of a row of a dataset in bytes + * @param ds The dataset to use + * @param axis The axis that the row is orthogonal to + * + * A row is the hyperplane orthogonal to the axis. + * This will throw an overflow error if the row size overflows a std::size_t. + * This is rather unlikely because that means that there wouldn't be enough + * memory addresses to hold a single row in memory! + */ + std::size_t getRowSize(const H5::DataSet& ds, hsize_t axis); + } //> end namespace HDF5 #endif //> !HDF5Writer_MergeUtils_H diff --git a/HDF5Writer/Root/DefaultMerger.cxx b/HDF5Writer/Root/DefaultMerger.cxx index 3fa9d5e3d5de..e50c7e53b6fd 100644 --- a/HDF5Writer/Root/DefaultMerger.cxx +++ b/HDF5Writer/Root/DefaultMerger.cxx @@ -8,10 +8,14 @@ namespace HDF5 { DefaultMerger::DefaultMerger( hsize_t mergeAxis, int chunkSize, - bool requireSameFormat) : + bool requireSameFormat, + std::size_t bufferSize, + bool bufferInRows) : m_mergeAxis(mergeAxis), m_chunkSize(chunkSize), - m_requireSameFormat(requireSameFormat) {} + m_requireSameFormat(requireSameFormat), + m_bufferSize(bufferSize), + m_measureBufferInRows(bufferInRows) {} DefaultMerger::~DefaultMerger() {} @@ -86,7 +90,15 @@ namespace HDF5 { H5::DataSet& target, const H5::DataSet& source) { - mergeDatasets(target, source, m_mergeAxis); + std::size_t bufferSize = m_bufferSize; + if (m_measureBufferInRows) { + // Need to calculate the actual buffer size + std::size_t rowSize = getRowSize(source, m_mergeAxis); + if (std::size_t(-1) / m_bufferSize < rowSize) + std::overflow_error("Requested buffer would overflow the register!"); + bufferSize = rowSize * m_bufferSize; + } + mergeDatasets(target, source, m_mergeAxis, bufferSize); } H5::DataSet DefaultMerger::createFrom( diff --git a/HDF5Writer/Root/MergeUtils.cxx b/HDF5Writer/Root/MergeUtils.cxx index 215cfee48209..7af577ad4b86 100644 --- a/HDF5Writer/Root/MergeUtils.cxx +++ b/HDF5Writer/Root/MergeUtils.cxx @@ -3,6 +3,48 @@ #include <vector> #include <stdexcept> +namespace { + struct SmartMalloc { + SmartMalloc() : data(nullptr) {} + ~SmartMalloc() { this->freeData(); } + operator bool() { return data != nullptr; } + + void* allocate(std::size_t size); + void freeData(); + void* data; + }; + + + void* SmartMalloc::allocate(std::size_t size) { + // If we are already looking at memory, reallocate it + if (data) { + void* newData = realloc(data, size); + if (!newData) + // Note that we don't free 'data' here. That will still be taken care of + // by the destructor. This means that a user can catch the exception if + // they like and the old memory will still be available. + throw std::bad_alloc{}; + else + data = newData; + } + else { + // We aren't looking at memory - use malloc + data = malloc(size); + if (!data) + throw std::bad_alloc{}; + } + return data; + } + + void SmartMalloc::freeData() { + // free does nothing to the nullptr so it's safe to call without a check + free(data); + // Make sure we know that we don't own anything + data = nullptr; + } + +} + namespace HDF5 { bool checkDatasetsToMerge( const H5::DataSet& target, @@ -84,7 +126,8 @@ namespace HDF5 { void mergeDatasets( H5::DataSet& target, const H5::DataSet& source, - hsize_t mergeAxis) + hsize_t mergeAxis, + std::size_t bufferSize) { std::string errMsg; if (!checkDatasetsToMerge(target, source, mergeAxis, errMsg) ) @@ -107,48 +150,66 @@ namespace HDF5 { target.extend(newDims.data() ); targetSpace.setExtentSimple(newDims.size(), newDims.data() ); - // Now define the target hyperslab. This is the part of the output dataset - // that the source dataset will be loaded into. A hyperslab is essentially a - // mask over the dataspace that restricts what dataset operations work on - - // The offset is where the slab starts. - std::vector<hsize_t> slabOffset(nDims, 0); - // Start from where it ended before - slabOffset.at(mergeAxis) += targetDims.at(mergeAxis); - // Make sure nothing else is selected - targetSpace.selectNone(); - targetSpace.selectHyperslab( - H5S_SELECT_SET, //> select these cells - sourceDims.data(), //> count - i.e. the dimensions of the source - slabOffset.data() ); - - // For the source, just select the whole thing - sourceSpace.selectAll(); - // note that this means we have to be able to hold the entire source dataset - // in memory! - // An alternative would be to do this chunk-by-chunk but I haven't figured - // that out yet. - - // We now have to allocate an area in memory for the whole source dataset to - // be loaded into. This means using malloc. In order to prevent memory leaks - // make something a little bit like a smart pointer. - struct SmartMalloc { - SmartMalloc(std::size_t size) : - data(malloc(size)) {} - ~SmartMalloc() { free(data); } - void* data; - }; - - // We need to know how much space to allocate. This is the size of the data - // type multiplied by the number of elements - std::size_t memSize = - source.getDataType().getSize() * sourceSpace.getSimpleExtentNpoints(); - SmartMalloc location(memSize); - - // Read into this location - source.read(location.data, source.getDataType() ); - // Write from this location - target.write(location.data, target.getDataType(), sourceSpace, targetSpace); + // Now we need to work out how far we need to subdivide the source dataset + // to fit it inside the buffer. + std::size_t rowSize = getRowSize(source, mergeAxis); + // How many rows can we fit into one buffer + std::size_t nRowsBuffer = bufferSize / rowSize; + if (nRowsBuffer == 0) + throw std::invalid_argument( + "Allocated buffer is smaller than a single row! Merging is impossible."); + + // We have to allocate an area in memory for the buffer. Unlike normally in + // C++ we aren't allocating a space for an object but a specific size. This + // means that we have to use malloc. + // Smart pointers require some annoying syntax to use with malloc, but we + // can implement the same pattern with a simple struct. + SmartMalloc buffer; + + // Keep track of the offset from the target dataset + std::vector<hsize_t> targetOffset(nDims, 0); + // Start it from its end point before we extended it + targetOffset.at(mergeAxis) = targetDims.at(mergeAxis); + + // Step through the source dataset in increments equal to the number of + // source rows that can fit into the buffer. + std::size_t nSourceRows = sourceDims.at(mergeAxis); + for (std::size_t iRow = 0; iRow < nSourceRows; iRow += nRowsBuffer) { + // Construct the size and offset of the source slab + std::vector<hsize_t> sourceOffset(nDims, 0); + sourceOffset.at(mergeAxis) = iRow; + // The number of rows to write + std::size_t nRowsToWrite = std::min(nSourceRows-iRow, nRowsBuffer); + std::vector<hsize_t> sourceSize(sourceDims); + sourceSize.at(mergeAxis) = nRowsToWrite; + // Create the source hyperslab + sourceSpace.selectNone(); + sourceSpace.selectHyperslab( + H5S_SELECT_SET, + sourceSize.data(), + sourceOffset.data() ); + + // Create the target hyperslab + targetSpace.selectNone(); + targetSpace.selectHyperslab( + H5S_SELECT_SET, + sourceSize.data(), + targetOffset.data() ); + + // Prepare the buffer + buffer.allocate(nRowsToWrite*rowSize); + // Read into it + source.read(buffer.data, source.getDataType(), sourceSpace, sourceSpace); + // Write from it + target.write(buffer.data, target.getDataType(), sourceSpace, targetSpace); + // Increment the target offset + targetOffset.at(mergeAxis) += nSourceRows; + } + // Sanity check - make sure that the final targetOffset is where we think it + // should be + if (targetOffset.at(mergeAxis) != newDims.at(mergeAxis) ) + throw std::logic_error( + "Target dataset was not filled! This indicates a logic error in the code!"); } H5::DataSet createDataSet( @@ -182,4 +243,25 @@ namespace HDF5 { return targetLocation.createDataSet( source.getObjName(), source.getDataType(), space, cList); } + + std::size_t getRowSize(const H5::DataSet& ds, hsize_t axis) { + // The size of one element + std::size_t eleSize = ds.getDataType().getSize(); + + // The dimensions of the space + H5::DataSpace space = ds.getSpace(); + std::vector<hsize_t> spaceDims(space.getSimpleExtentNdims(), 0); + space.getSimpleExtentDims(spaceDims.data() ); + + std::size_t nRowElements = 1; + for (std::size_t ii = 0; ii < spaceDims.size(); ++ii) + if (ii != axis) + nRowElements *= spaceDims.at(ii); + + // Double check that this fits. This is probably over cautious but fine... + if (std::size_t(-1) / nRowElements < eleSize) + throw std::overflow_error("The size of one row would overflow the register!"); + + return eleSize * nRowElements; + } } //> end namespace HDF5 diff --git a/HDF5Writer/util/hdf5-merge.cxx b/HDF5Writer/util/hdf5-merge.cxx index bc3febf8e8f7..11c4d20c4a4e 100644 --- a/HDF5Writer/util/hdf5-merge.cxx +++ b/HDF5Writer/util/hdf5-merge.cxx @@ -19,6 +19,8 @@ int main(int argc, char* argv[]) { hsize_t mergeAxis = 0; int chunkSize = -1; bool requireSameFormat = true; + std::size_t bufferSizeMB = 100; + std::size_t bufferSizeRows = -1; bool overwrite = false; bool inPlace = false; @@ -30,8 +32,12 @@ int main(int argc, char* argv[]) { "Allow input files to have different formats.") ("mergeAxis,a", po::value(&mergeAxis), "The axis along which to merge datasets") - ("chunkSize,s", po::value(&chunkSize), + ("chunkSize,c", po::value(&chunkSize), "The chunk size to use along the merge axis. If left negative uses the same chunks as the first input.") + ("bufferSizeMB,b", po::value(&bufferSizeMB), + "The size of the buffer to use in MB. Cannot be set with 'bufferSizeRows'") + ("bufferSizeRows,b", po::value(&bufferSizeRows), + "The size of the buffer to use in rows. Cannot be set with 'bufferSizeMB'") ("overwrite,w", po::bool_switch(&overwrite), "Overwrite the output file if it already exists. Cannot be set with 'in-place'") ("in-place,p", po::bool_switch(&inPlace), @@ -72,9 +78,29 @@ int main(int argc, char* argv[]) { std::cerr << "You cannot specify both overwrite and in-place!" << std::endl; return 1; } + if (vm.count("bufferSizeMB") && vm.count("bufferSizeRows") ) { + std::cerr << "You cannot specify both bufferSizeMB and bufferSizeRows!" << std::endl; + return 1; + } + std::size_t buffer; + bool bufferInRows; + if (vm.count("bufferSizeRows") ) { + buffer = bufferSizeRows; + bufferInRows = true; + } + else { + // Default used if neither was set or if bufferSizeMB is set + std::size_t MB = 1024*1024; + if (std::size_t(-1) / bufferSizeMB < MB) + throw std::overflow_error( + "Requested buffer size would overflow the register!"); + buffer = bufferSizeMB * MB; + bufferInRows = false; + } // Make the merger - HDF5::DefaultMerger merger(mergeAxis, chunkSize, requireSameFormat); + HDF5::DefaultMerger merger( + mergeAxis, chunkSize, requireSameFormat, buffer, bufferInRows); // Make the output file H5::H5File fOut(outputFile, -- GitLab From 8d9dd8246df179732d956f068d4d1db6cd2d3fa6 Mon Sep 17 00:00:00 2001 From: Jon Burr <jon.burr@cern.ch> Date: Fri, 12 Jul 2019 13:01:47 +0200 Subject: [PATCH 05/22] Add an input option to allow a comma separated list of inputs. This is how the grid provides the %IN parameter so if this is going to be used for grid merging it needs to know how to accept that. --- HDF5Writer/util/hdf5-merge.cxx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/HDF5Writer/util/hdf5-merge.cxx b/HDF5Writer/util/hdf5-merge.cxx index 11c4d20c4a4e..dd2d22a4fa4e 100644 --- a/HDF5Writer/util/hdf5-merge.cxx +++ b/HDF5Writer/util/hdf5-merge.cxx @@ -1,6 +1,8 @@ #include "H5Cpp.h" #include <HDF5Writer/DefaultMerger.h> #include <boost/program_options.hpp> +#include <boost/algorithm/string/split.hpp> +#include <boost/algorithm/string/trim.hpp> #include <iostream> #include <iomanip> @@ -15,6 +17,7 @@ int main(int argc, char* argv[]) { // The options std::string outputFile = "merged.h5"; + std::string inCSV = ""; std::vector<std::string> inputFiles; hsize_t mergeAxis = 0; int chunkSize = -1; @@ -28,6 +31,7 @@ int main(int argc, char* argv[]) { po::options_description desc("Allowed options"); desc.add_options() ("output,o", po::value(&outputFile), "The output file.") + ("input,i", po::value(&inCSV), "A comma separated list of input files") ("allowDifferentFormats", po::bool_switch(&requireSameFormat), "Allow input files to have different formats.") ("mergeAxis,a", po::value(&mergeAxis), @@ -64,12 +68,16 @@ int main(int argc, char* argv[]) { // want to do with help if (vm.count("help") ) { std::cout << "Merge HDF5 files. Usage:" << std::endl << std::endl; - std::cout << "hdf5-merge [options] input1 [input2 ...]" << std::endl << std::endl; + std::cout << "hdf5-merge [options] [--input input1,input2,... | input1 [input2 ...]]" << std::endl << std::endl; std::cout << desc << std::endl; return 0; } po::notify(vm); + std::vector<std::string> splitCSV; + boost::algorithm::split(splitCSV, inCSV, boost::algorithm::is_any_of(",") ); + for (const std::string& i : splitCSV) + inputFiles.push_back(boost::algorithm::trim_copy(i) ); if (inputFiles.size() == 0) { std::cerr << "You must specify at least 1 input file!" << std::endl; return 1; -- GitLab From b49890be8c94912942e3be37608f4ce4737017d2 Mon Sep 17 00:00:00 2001 From: Dan Guest <dguest@cern.ch> Date: Fri, 12 Jul 2019 16:12:39 +0200 Subject: [PATCH 06/22] Avoid blank input file if no CSV is given --- HDF5Writer/util/hdf5-merge.cxx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/HDF5Writer/util/hdf5-merge.cxx b/HDF5Writer/util/hdf5-merge.cxx index dd2d22a4fa4e..2620da96d314 100644 --- a/HDF5Writer/util/hdf5-merge.cxx +++ b/HDF5Writer/util/hdf5-merge.cxx @@ -74,10 +74,12 @@ int main(int argc, char* argv[]) { } po::notify(vm); - std::vector<std::string> splitCSV; - boost::algorithm::split(splitCSV, inCSV, boost::algorithm::is_any_of(",") ); - for (const std::string& i : splitCSV) - inputFiles.push_back(boost::algorithm::trim_copy(i) ); + if (inCSV.size() > 0) { + std::vector<std::string> splitCSV; + boost::algorithm::split(splitCSV, inCSV, boost::algorithm::is_any_of(",") ); + for (const std::string& i : splitCSV) + inputFiles.push_back(boost::algorithm::trim_copy(i) ); + } if (inputFiles.size() == 0) { std::cerr << "You must specify at least 1 input file!" << std::endl; return 1; @@ -115,8 +117,8 @@ int main(int argc, char* argv[]) { overwrite ? H5F_ACC_TRUNC : (inPlace ? H5F_ACC_RDWR : H5F_ACC_EXCL) ); // Loop over the input files and merge them for (const std::string& inName : inputFiles) { - H5::H5File fIn(inName, H5F_ACC_RDONLY); std::cout << "Merging file " << inName << std::endl; + H5::H5File fIn(inName, H5F_ACC_RDONLY); merger.merge(fOut, fIn); } -- GitLab From c360c154ebedc8ef34665309f6f218bf4e7f036e Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:26:07 +0200 Subject: [PATCH 07/22] remove Jon's writer tools --- HDF5Writer/CMakeLists.txt | 14 ----- HDF5Writer/python/athena_without_athena | 1 - HDF5Writer/python/run.py | 14 ----- HDF5Writer/src/HDF5WriterAlg.cxx | 52 ------------------- HDF5Writer/src/HDF5WriterAlg.h | 44 ---------------- .../src/components/HDF5Writer_entries.cxx | 11 ---- HDF5Writer/src/components/HDF5Writer_load.cxx | 3 -- 7 files changed, 139 deletions(-) delete mode 160000 HDF5Writer/python/athena_without_athena delete mode 100644 HDF5Writer/python/run.py delete mode 100644 HDF5Writer/src/HDF5WriterAlg.cxx delete mode 100644 HDF5Writer/src/HDF5WriterAlg.h delete mode 100644 HDF5Writer/src/components/HDF5Writer_entries.cxx delete mode 100644 HDF5Writer/src/components/HDF5Writer_load.cxx diff --git a/HDF5Writer/CMakeLists.txt b/HDF5Writer/CMakeLists.txt index deebb2f9559e..127a0abb63e0 100644 --- a/HDF5Writer/CMakeLists.txt +++ b/HDF5Writer/CMakeLists.txt @@ -4,7 +4,6 @@ atlas_subdir( HDF5Writer ) # Declare external dependencies ... default here is to include ROOT -find_package( ROOT COMPONENTS MathCore RIO Core Tree Hist ) find_package( HDF5 1.10.1 REQUIRED COMPONENTS CXX C ) find_package( Boost COMPONENTS program_options REQUIRED ) @@ -17,19 +16,6 @@ atlas_add_library( HDF5WriterLib INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} LINK_LIBRARIES ${HDF5_LIBRARIES} ) -atlas_add_component( HDF5Writer src/components/*.cxx src/HDF5WriterAlg.cxx - NOCLIDDB - INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS} - LINK_LIBRARIES ${ROOT_LIBRARIES} ${HDF5_LIBRARIES} - AthAnalysisBaseCompsLib HDF5Utils xAODJet -) - atlas_add_executable( hdf5-merge util/hdf5-merge.cxx LINK_LIBRARIES HDF5WriterLib Boost::program_options ) - -# Install python modules, joboptions, and share content -atlas_install_python_modules( - python/athena_without_athena - python/run.py - ) diff --git a/HDF5Writer/python/athena_without_athena b/HDF5Writer/python/athena_without_athena deleted file mode 160000 index 961c6718203e..000000000000 --- a/HDF5Writer/python/athena_without_athena +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 961c6718203ed457f3b56da89991b7adb7bc7bd3 diff --git a/HDF5Writer/python/run.py b/HDF5Writer/python/run.py deleted file mode 100644 index a2d41931b18c..000000000000 --- a/HDF5Writer/python/run.py +++ /dev/null @@ -1,14 +0,0 @@ -from athena_without_athena.core import basic_setup, run -from AthenaCommon.AppMgr import athAlgSeq -from AthenaCommon import CfgMgr -from AthenaCommon.AthenaCommonFlags import jobproperties as jps -import os -basic_setup() - -jps.AthenaCommonFlags.EvtMax=1000 -jps.AthenaCommonFlags.SkipEvents=0 -jps.AthenaCommonFlags.FilesInput=[os.getenv("ASG_TEST_FILE_MC")] - -athAlgSeq += CfgMgr.AthEventCounter(Frequency=1000) -athAlgSeq += CfgMgr.HDF5WriterAlg(HDF5Output="jetsTest.h5") -run() diff --git a/HDF5Writer/src/HDF5WriterAlg.cxx b/HDF5Writer/src/HDF5WriterAlg.cxx deleted file mode 100644 index f3b03ceb3419..000000000000 --- a/HDF5Writer/src/HDF5WriterAlg.cxx +++ /dev/null @@ -1,52 +0,0 @@ -// HDF5Writer includes -#include "HDF5WriterAlg.h" - -HDF5WriterAlg::HDF5WriterAlg( const std::string& name, ISvcLocator* pSvcLocator ) : AthAnalysisAlgorithm( name, pSvcLocator ){ - declareProperty("HDF5Output", m_h5OutName="jets.h5", "The output file"); - declareProperty("JetContainer", m_jetContainer="AntiKt4EMTopoJets", "The input jet container"); -} - - -HDF5WriterAlg::~HDF5WriterAlg() {} - - -StatusCode HDF5WriterAlg::initialize() { - ATH_MSG_INFO ("Initializing " << name() << "..."); - // Prepare the output file - // H5F_ACC_TRUNC == RECREATE in ROOT - m_h5Out = H5::H5File(m_h5OutName, H5F_ACC_TRUNC); - - // Now make the list of consumers - H5Utils::Consumers<const xAOD::Jet&> consumers; - consumers.add<float>("pt", [] (const xAOD::Jet& j) { return j.pt(); }); - consumers.add<float>("eta", [] (const xAOD::Jet& j) { return j.eta(); }); - consumers.add<float>("phi", [] (const xAOD::Jet& j) { return j.phi(); }); - consumers.add<float>("mass", [] (const xAOD::Jet& j) { return j.m(); }); - consumers.add<int>("nConstituents", [] (const xAOD::Jet& j) { return j.numConstituents(); }); - - // Now make the writer - m_writer = std::make_unique<H5Utils::Writer<0, const xAOD::Jet&>>( - m_h5Out, "jets", consumers); - - return StatusCode::SUCCESS; -} - -StatusCode HDF5WriterAlg::finalize() { - ATH_MSG_INFO ("Finalizing " << name() << "..."); - m_h5Out.close(); - return StatusCode::SUCCESS; -} - -StatusCode HDF5WriterAlg::execute() { - ATH_MSG_DEBUG ("Executing " << name() << "..."); - const xAOD::JetContainer* jets(nullptr); - ATH_CHECK( evtStore()->retrieve(jets, m_jetContainer) ); - - std::size_t count = 0; - for (const xAOD::Jet* ijet : *jets) { - m_writer->fill(*ijet); - if (++count == 2) - break; - } - return StatusCode::SUCCESS; -} diff --git a/HDF5Writer/src/HDF5WriterAlg.h b/HDF5Writer/src/HDF5WriterAlg.h deleted file mode 100644 index 9ee761373aa8..000000000000 --- a/HDF5Writer/src/HDF5WriterAlg.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef HDF5WRITER_HDF5WRITERALG_H -#define HDF5WRITER_HDF5WRITERALG_H 1 - -#include "AthAnalysisBaseComps/AthAnalysisAlgorithm.h" -#include "HDF5Utils/Writer.h" -#include "xAODJet/JetContainer.h" -#include "H5Cpp.h" - -#include <memory> - -//Example ROOT Includes -//#include "TTree.h" -//#include "TH1D.h" - - - -class HDF5WriterAlg: public ::AthAnalysisAlgorithm { - public: - HDF5WriterAlg( const std::string& name, ISvcLocator* pSvcLocator ); - virtual ~HDF5WriterAlg(); - - ///uncomment and implement methods as required - - //IS EXECUTED: - virtual StatusCode initialize(); //once, before any input is loaded - virtual StatusCode execute(); //per event - virtual StatusCode finalize(); //once, after all events processed - - private: - // Properties - /// The output file name - std::string m_h5OutName; - /// The input jet container - std::string m_jetContainer; - - // Internals - /// The output file - H5::H5File m_h5Out; - /// The writer - std::unique_ptr<H5Utils::Writer<0, const xAOD::Jet&>> m_writer; - -}; - -#endif //> !HDF5WRITER_HDF5WRITERALG_H diff --git a/HDF5Writer/src/components/HDF5Writer_entries.cxx b/HDF5Writer/src/components/HDF5Writer_entries.cxx deleted file mode 100644 index 8b54ab96fb3d..000000000000 --- a/HDF5Writer/src/components/HDF5Writer_entries.cxx +++ /dev/null @@ -1,11 +0,0 @@ - -#include "GaudiKernel/DeclareFactoryEntries.h" - -#include "../HDF5WriterAlg.h" - -DECLARE_ALGORITHM_FACTORY( HDF5WriterAlg ) - -DECLARE_FACTORY_ENTRIES( HDF5Writer ) -{ - DECLARE_ALGORITHM( HDF5WriterAlg ); -} diff --git a/HDF5Writer/src/components/HDF5Writer_load.cxx b/HDF5Writer/src/components/HDF5Writer_load.cxx deleted file mode 100644 index 6e69752267d3..000000000000 --- a/HDF5Writer/src/components/HDF5Writer_load.cxx +++ /dev/null @@ -1,3 +0,0 @@ - -#include "GaudiKernel/LoadFactoryEntries.h" -LOAD_FACTORY_ENTRIES(HDF5Writer) -- GitLab From 680434358a3948a99629b2c7c40eb90b247e2ad9 Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:35:57 +0200 Subject: [PATCH 08/22] Move HDF5Writer into HDF5Utils --- .../AnalysisCommon/HDF5Utils/HDF5Utils}/DefaultMerger.h | 0 .../AnalysisCommon/HDF5Utils/HDF5Utils}/H5Print.h | 0 .../AnalysisCommon/HDF5Utils/HDF5Utils}/IH5Merger.h | 0 .../AnalysisCommon/HDF5Utils/HDF5Utils}/MergeUtils.h | 0 .../AnalysisCommon/HDF5Utils}/Root/DefaultMerger.cxx | 0 .../AnalysisCommon/HDF5Utils}/Root/H5Print.cxx | 0 .../AnalysisCommon/HDF5Utils}/Root/IH5Merger.cxx | 0 .../AnalysisCommon/HDF5Utils}/Root/MergeUtils.cxx | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename {HDF5Writer/HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils}/DefaultMerger.h (100%) rename {HDF5Writer/HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils}/H5Print.h (100%) rename {HDF5Writer/HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils}/IH5Merger.h (100%) rename {HDF5Writer/HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils}/MergeUtils.h (100%) rename {HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils}/Root/DefaultMerger.cxx (100%) rename {HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils}/Root/H5Print.cxx (100%) rename {HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils}/Root/IH5Merger.cxx (100%) rename {HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils}/Root/MergeUtils.cxx (100%) diff --git a/HDF5Writer/HDF5Writer/DefaultMerger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h similarity index 100% rename from HDF5Writer/HDF5Writer/DefaultMerger.h rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h diff --git a/HDF5Writer/HDF5Writer/H5Print.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h similarity index 100% rename from HDF5Writer/HDF5Writer/H5Print.h rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h diff --git a/HDF5Writer/HDF5Writer/IH5Merger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h similarity index 100% rename from HDF5Writer/HDF5Writer/IH5Merger.h rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h diff --git a/HDF5Writer/HDF5Writer/MergeUtils.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h similarity index 100% rename from HDF5Writer/HDF5Writer/MergeUtils.h rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h diff --git a/HDF5Writer/Root/DefaultMerger.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx similarity index 100% rename from HDF5Writer/Root/DefaultMerger.cxx rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx diff --git a/HDF5Writer/Root/H5Print.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx similarity index 100% rename from HDF5Writer/Root/H5Print.cxx rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx diff --git a/HDF5Writer/Root/IH5Merger.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx similarity index 100% rename from HDF5Writer/Root/IH5Merger.cxx rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx diff --git a/HDF5Writer/Root/MergeUtils.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx similarity index 100% rename from HDF5Writer/Root/MergeUtils.cxx rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx -- GitLab From 30475c15cdbc9b477cd9baaf151b76b85e160be8 Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:39:39 +0200 Subject: [PATCH 09/22] Add merge utilities to HDF5Utils CMakeList --- PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt index e5d1f318ab0b..6e0020c53ada 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt @@ -13,6 +13,10 @@ find_package( Boost 1.54.0 REQUIRED COMPONENTS program_options) # Add the hdf tuple library atlas_add_library(HDF5Utils Root/HdfTuple.cxx Root/common.cxx Root/H5Traits.cxx Root/CompressedTypes.cxx + Root/DefaultMerger.cxx + Root/H5Print.cxx + Root/IH5Merger.cxx + Root/MergeUtils.cxx PUBLIC_HEADERS HDF5Utils PRIVATE_INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} LINK_LIBRARIES ${HDF5_LIBRARIES} ${ZLIB_LIBRARIES}) @@ -31,3 +35,8 @@ atlas_add_executable(ttree2hdf5 ${_exe_sources} ${ROOT_LIBRARIES}) unset(_exe_sources) + +# add the merge utility +atlas_add_executable( hdf5-merge + util/hdf5-merge.cxx + LINK_LIBRARIES HDF5WriterLib Boost::program_options ) -- GitLab From 2773a28a61c3976f98a05732f373592ab3a797d5 Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:45:33 +0200 Subject: [PATCH 10/22] Add ATLAS copyrights and change header paths --- .../AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h | 5 +++++ .../AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h | 6 ++++++ .../AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h | 5 +++++ .../AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h | 4 ++++ .../AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx | 8 ++++++-- PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx | 6 +++++- .../AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx | 8 ++++++-- .../AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx | 6 +++++- 8 files changed, 42 insertions(+), 6 deletions(-) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h index c253526414e7..a4ff38964cd9 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h @@ -1,3 +1,7 @@ +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + #ifndef HDF5Writer_DefaultMerger_H #define HDF5Writer_DefaultMerger_H @@ -5,6 +9,7 @@ /** * @file DefaultMerger + * @author Jon Burr * * The default merging implementation */ diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h index 83e51765e5a5..3551d809a21b 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h @@ -1,3 +1,8 @@ +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + + #ifndef HDF5Writer_H5Print_H #define HDF5Writer_H5Print_H #include <H5Cpp.h> @@ -5,6 +10,7 @@ /** * @file H5Print.h + * @author Jon Burr * * Helper functions to print out basic information about H5 groups. * To use, pull them into the namespace of your function with diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h index 6e709e25b239..c78a9c086cbe 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h @@ -1,3 +1,7 @@ +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + #ifndef HDF5Writer_IH5Merger_H #define HDF5Writer_IH5Merger_H @@ -5,6 +9,7 @@ /** * @file IH5Merger.h + * @author Jon Burr * * Provides a base class for H5Mergers */ diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h index fa80db9713e0..f3f05927bbcb 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h @@ -1,3 +1,7 @@ +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + #ifndef HDF5Writer_MergeUtils_H #define HDF5Writer_MergeUtils_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx index e50c7e53b6fd..038277dd0196 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx @@ -1,5 +1,9 @@ -#include "HDF5Writer/DefaultMerger.h" -#include "HDF5Writer/MergeUtils.h" +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + +#include "HDF5Utils/DefaultMerger.h" +#include "HDF5Utils/MergeUtils.h" #include <exception> #include <iostream> diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx index 277b43b854f3..5cdf2288835f 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx @@ -1,4 +1,8 @@ -#include "HDF5Writer/H5Print.h" +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + +#include "HDF5Utils/H5Print.h" #include <iomanip> namespace HDF5 { namespace Print { diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx index d9d357bd1df0..9919c2bb51a7 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx @@ -1,11 +1,15 @@ -#include "HDF5Writer/IH5Merger.h" +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + +#include "HDF5Utils/IH5Merger.h" namespace HDF5 { IH5Merger::~IH5Merger() {} void IH5Merger::merge(H5::H5File& target, const H5::H5File& source) - { + { merge( static_cast<H5::Group&>(target), static_cast<const H5::Group&>(source) ); diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx index 7af577ad4b86..d57549b87572 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx @@ -1,4 +1,8 @@ -#include "HDF5Writer/MergeUtils.h" +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + +#include "HDF5Utils/MergeUtils.h" #include <vector> #include <stdexcept> -- GitLab From 084dbb1d519a9eb6d323ba5525195cf2232e0ec9 Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:46:20 +0200 Subject: [PATCH 11/22] Move hdf5 merge util --- .../AnalysisCommon/HDF5Utils}/util/hdf5-merge.cxx | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {HDF5Writer => PhysicsAnalysis/AnalysisCommon/HDF5Utils}/util/hdf5-merge.cxx (100%) diff --git a/HDF5Writer/util/hdf5-merge.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx similarity index 100% rename from HDF5Writer/util/hdf5-merge.cxx rename to PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx -- GitLab From 5ce7df7d1c4f08b101d26e47e0999eb2892ce376 Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:49:59 +0200 Subject: [PATCH 12/22] Remove old HDF5Writer files --- HDF5Writer/CMakeLists.txt | 21 --------------------- HDF5Writer/version.cmake | 1 - 2 files changed, 22 deletions(-) delete mode 100644 HDF5Writer/CMakeLists.txt delete mode 100644 HDF5Writer/version.cmake diff --git a/HDF5Writer/CMakeLists.txt b/HDF5Writer/CMakeLists.txt deleted file mode 100644 index 127a0abb63e0..000000000000 --- a/HDF5Writer/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -## automatically generated CMakeLists.txt file - -# Declare the package -atlas_subdir( HDF5Writer ) - -# Declare external dependencies ... default here is to include ROOT -find_package( HDF5 1.10.1 REQUIRED COMPONENTS CXX C ) -find_package( Boost COMPONENTS program_options REQUIRED ) - -atlas_add_library( HDF5WriterLib - Root/DefaultMerger.cxx - Root/H5Print.cxx - Root/IH5Merger.cxx - Root/MergeUtils.cxx - PUBLIC_HEADERS HDF5Writer - INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} - LINK_LIBRARIES ${HDF5_LIBRARIES} ) - -atlas_add_executable( hdf5-merge - util/hdf5-merge.cxx - LINK_LIBRARIES HDF5WriterLib Boost::program_options ) diff --git a/HDF5Writer/version.cmake b/HDF5Writer/version.cmake deleted file mode 100644 index a07c22463f27..000000000000 --- a/HDF5Writer/version.cmake +++ /dev/null @@ -1 +0,0 @@ -HDF5Writer-00-00-01 -- GitLab From d503c1abcf08923f82da0da6ce5c2842323ac695 Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:50:36 +0200 Subject: [PATCH 13/22] Fix several more compile bugs from migration to HDF5Utils --- PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt | 2 +- .../AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h | 2 +- PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt index 6e0020c53ada..69b6b2470c76 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt @@ -39,4 +39,4 @@ unset(_exe_sources) # add the merge utility atlas_add_executable( hdf5-merge util/hdf5-merge.cxx - LINK_LIBRARIES HDF5WriterLib Boost::program_options ) + LINK_LIBRARIES HDF5Utils Boost::program_options ) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h index a4ff38964cd9..64408d26b6b9 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h @@ -5,7 +5,7 @@ #ifndef HDF5Writer_DefaultMerger_H #define HDF5Writer_DefaultMerger_H -#include "HDF5Writer/IH5Merger.h" +#include "HDF5Utils/IH5Merger.h" /** * @file DefaultMerger diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx index 2620da96d314..8f1dc792362d 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx @@ -1,5 +1,5 @@ #include "H5Cpp.h" -#include <HDF5Writer/DefaultMerger.h> +#include <HDF5Utils/DefaultMerger.h> #include <boost/program_options.hpp> #include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/trim.hpp> -- GitLab From 9755ded8d053b046fca4970fc62de2399383d74d Mon Sep 17 00:00:00 2001 From: Cerny McCernface <cernface@cern.ch> Date: Mon, 15 Jul 2019 18:51:42 +0200 Subject: [PATCH 14/22] Remove residual .gitmodules file --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .gitmodules diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 7d006f3ee94c..000000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "HDF5Writer/python/athena_without_athena"] - path = HDF5Writer/python/athena_without_athena - url = https://:@gitlab.cern.ch:8443/jburr/athena_without_athena.git -- GitLab From fe5de234de5fec70ebae41acd6da9c84947a5ede Mon Sep 17 00:00:00 2001 From: Dan Guest <dguest@cern.ch> Date: Mon, 15 Jul 2019 19:06:40 +0200 Subject: [PATCH 15/22] Change namespace HDF5 -> H5Utils --- .../AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h | 4 ++-- .../AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h | 6 +++--- .../AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h | 4 ++-- .../AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h | 4 ++-- .../AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx | 4 ++-- PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx | 4 ++-- PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx | 4 ++-- .../AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx | 4 ++-- .../AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx | 2 +- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h index 64408d26b6b9..a28b9d59253c 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h @@ -14,7 +14,7 @@ * The default merging implementation */ -namespace HDF5 { +namespace H5Utils { /** * @class Default H5 Merger */ @@ -80,6 +80,6 @@ namespace HDF5 { /// Whether to measure the buffer in bytes or rows bool m_measureBufferInRows; }; //> end class DefaultMerger -} //> end namespace HDF5 +} //> end namespace H5Utils #endif //> !HDF5Writer_DefaultMerger_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h index 3551d809a21b..867eb1c8805d 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h @@ -14,14 +14,14 @@ * * Helper functions to print out basic information about H5 groups. * To use, pull them into the namespace of your function with - * using namespace HDF5::Print; + * using namespace H5Utils::Print; * std::cout << h5File << std::endl; */ -namespace HDF5 { namespace Print { +namespace H5Utils { namespace Print { /// Print information about a dataset std::ostream& operator<<(std::ostream& os, const H5::DataSet& ds); /// Print information about a group std::ostream& operator<<(std::ostream& os, const H5::Group& group); -} } //> end namespace HDF5::Print +} } //> end namespace H5Utils::Print #endif //> !HDF5Writer_H5Print_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h index c78a9c086cbe..f1be348b98b6 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h @@ -14,7 +14,7 @@ * Provides a base class for H5Mergers */ -namespace HDF5 { +namespace H5Utils { /** * @class Base class for H5Mergers * @@ -71,6 +71,6 @@ namespace HDF5 { H5::H5Location& targetLocation, const H5::DataSet& source) = 0; }; //> end class -} //> end namespace HDF5 +} //> end namespace H5Utils #endif //> !HDF5Writer_IH5Merger_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h index f3f05927bbcb..498b7f733454 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h @@ -14,7 +14,7 @@ * Provides several helper functions for doing common parts of file merging. */ -namespace HDF5 { +namespace H5Utils { /** * @brief Make sure that two datasets can be merged. * @param target The dataset to merge into @@ -92,6 +92,6 @@ namespace HDF5 { */ std::size_t getRowSize(const H5::DataSet& ds, hsize_t axis); -} //> end namespace HDF5 +} //> end namespace H5Utils #endif //> !HDF5Writer_MergeUtils_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx index 038277dd0196..9810a16b1c12 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/DefaultMerger.cxx @@ -7,7 +7,7 @@ #include <exception> #include <iostream> -namespace HDF5 { +namespace H5Utils { DefaultMerger::DefaultMerger( hsize_t mergeAxis, @@ -111,4 +111,4 @@ namespace HDF5 { { return createDataSet(targetLocation, source, m_mergeAxis, m_chunkSize); } -} //> end namespace HDF5 +} //> end namespace H5Utils diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx index 5cdf2288835f..4c076d137658 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/H5Print.cxx @@ -5,7 +5,7 @@ #include "HDF5Utils/H5Print.h" #include <iomanip> -namespace HDF5 { namespace Print { +namespace H5Utils { namespace Print { std::ostream& operator<<(std::ostream& os, const H5::DataSet& ds) { os << os.fill() << ds.getObjName(); @@ -35,4 +35,4 @@ namespace HDF5 { namespace Print { os << std::setw(indent) << os.fill() << "}"; return os; } -} } //> end namespace HDF5::Print +} } //> end namespace H5Utils::Print diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx index 9919c2bb51a7..522686d23db9 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/IH5Merger.cxx @@ -4,7 +4,7 @@ #include "HDF5Utils/IH5Merger.h" -namespace HDF5 { +namespace H5Utils { IH5Merger::~IH5Merger() {} @@ -23,4 +23,4 @@ namespace HDF5 { merge(newGroup, source); return newGroup; } -} //> end namespace HDF5 +} //> end namespace H5Utils diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx index d57549b87572..facf94c2e5e3 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/Root/MergeUtils.cxx @@ -49,7 +49,7 @@ namespace { } -namespace HDF5 { +namespace H5Utils { bool checkDatasetsToMerge( const H5::DataSet& target, const H5::DataSet& source, @@ -268,4 +268,4 @@ namespace HDF5 { return eleSize * nRowElements; } -} //> end namespace HDF5 +} //> end namespace H5Utils diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx index 8f1dc792362d..86a2a0615b8d 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx @@ -109,7 +109,7 @@ int main(int argc, char* argv[]) { } // Make the merger - HDF5::DefaultMerger merger( + H5Utils::DefaultMerger merger( mergeAxis, chunkSize, requireSameFormat, buffer, bufferInRows); // Make the output file -- GitLab From a8b6186769c95954cab0a993c1da55156d21cbcc Mon Sep 17 00:00:00 2001 From: Dan Guest <dguest@cern.ch> Date: Mon, 15 Jul 2019 19:09:20 +0200 Subject: [PATCH 16/22] Remove stray CMakeLists file --- CMakeLists.txt | 52 -------------------------------------------------- 1 file changed, 52 deletions(-) delete mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 988426696aa9..000000000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration -# -# This is a template for a CMakeLists.txt file that can be used in a client -# project (work area) to set up building ATLAS packages against the configured -# release. -# - -# Set the minimum required CMake version: -cmake_minimum_required( VERSION 3.6 FATAL_ERROR ) - -# Make sure that all _ROOT variables *are* used when they are set. -if( POLICY CMP0074 ) - cmake_policy( SET CMP0074 NEW ) -endif() - -# If there's a directory called AtlasCMake in the project, -# and the user didn't specify AtlasCMake_DIR yet, then let's -# give it a default value. -if( IS_DIRECTORY ${CMAKE_SOURCE_DIR}/Build/AtlasCMake AND - NOT AtlasCMake_DIR AND NOT ENV{AtlasCMake_DIR} ) - set( AtlasCMake_DIR ${CMAKE_SOURCE_DIR}/Build/AtlasCMake ) -endif() - -# If there's a directory called AtlasLCG in the project, -# and the user didn't specify LCG_DIR yet, then let's -# give it a default value. -if( IS_DIRECTORY ${CMAKE_SOURCE_DIR}/Build/AtlasLCG AND - NOT LCG_DIR AND NOT ENV{LCG_DIR} ) - set( LCG_DIR ${CMAKE_SOURCE_DIR}/Build/AtlasLCG ) -endif() - -# Pick up a local version of the AtlasCMake code if it exists: -find_package( AtlasCMake QUIET ) - -# Find the project that we depend on: -find_package( AthAnalysis ) - -# Set up CTest: -atlas_ctest_setup() - -# Set up a work directory project: -atlas_project( WorkDir 21.2.80 - USE AthAnalysis 21.2.80 - FORTRAN ) - -# Set up the runtime environment setup script(s): -lcg_generate_env( SH_FILE ${CMAKE_BINARY_DIR}/${ATLAS_PLATFORM}/env_setup.sh ) -install( FILES ${CMAKE_BINARY_DIR}/${ATLAS_PLATFORM}/env_setup.sh - DESTINATION . ) - -# Set up CPack: -atlas_cpack_setup() -- GitLab From a69e12706d58765d3d8e52954b1ab0d5831a0500 Mon Sep 17 00:00:00 2001 From: Dan Guest <dguest@cern.ch> Date: Mon, 15 Jul 2019 19:12:49 +0200 Subject: [PATCH 17/22] Add copyright --- PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx index 86a2a0615b8d..333e936fa019 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/util/hdf5-merge.cxx @@ -1,3 +1,7 @@ +/* + Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration +*/ + #include "H5Cpp.h" #include <HDF5Utils/DefaultMerger.h> #include <boost/program_options.hpp> -- GitLab From 26249906f44afcfeb64797067216347219f6cec4 Mon Sep 17 00:00:00 2001 From: Dan Guest <dguest@cern.ch> Date: Mon, 15 Jul 2019 20:45:52 +0200 Subject: [PATCH 18/22] Change header guard names --- .../AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h | 6 +++--- .../AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h | 6 +++--- .../AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h | 6 +++--- .../AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h index a28b9d59253c..ddea153b8454 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/DefaultMerger.h @@ -2,8 +2,8 @@ Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration */ -#ifndef HDF5Writer_DefaultMerger_H -#define HDF5Writer_DefaultMerger_H +#ifndef HDF5Utils_DefaultMerger_H +#define HDF5Utils_DefaultMerger_H #include "HDF5Utils/IH5Merger.h" @@ -82,4 +82,4 @@ namespace H5Utils { }; //> end class DefaultMerger } //> end namespace H5Utils -#endif //> !HDF5Writer_DefaultMerger_H +#endif //> !HDF5Utils_DefaultMerger_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h index 867eb1c8805d..716159df8fe2 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/H5Print.h @@ -3,8 +3,8 @@ */ -#ifndef HDF5Writer_H5Print_H -#define HDF5Writer_H5Print_H +#ifndef HDF5Utils_H5Print_H +#define HDF5Utils_H5Print_H #include <H5Cpp.h> #include <iostream> @@ -24,4 +24,4 @@ namespace H5Utils { namespace Print { /// Print information about a group std::ostream& operator<<(std::ostream& os, const H5::Group& group); } } //> end namespace H5Utils::Print -#endif //> !HDF5Writer_H5Print_H +#endif //> !HDF5Utils_H5Print_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h index f1be348b98b6..9bc97db4fa16 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/IH5Merger.h @@ -2,8 +2,8 @@ Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration */ -#ifndef HDF5Writer_IH5Merger_H -#define HDF5Writer_IH5Merger_H +#ifndef HDF5Utils_IH5Merger_H +#define HDF5Utils_IH5Merger_H #include "H5Cpp.h" @@ -73,4 +73,4 @@ namespace H5Utils { }; //> end class } //> end namespace H5Utils -#endif //> !HDF5Writer_IH5Merger_H +#endif //> !HDF5Utils_IH5Merger_H diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h index 498b7f733454..a1981ecd9c02 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/HDF5Utils/MergeUtils.h @@ -2,8 +2,8 @@ Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration */ -#ifndef HDF5Writer_MergeUtils_H -#define HDF5Writer_MergeUtils_H +#ifndef HDF5Utils_MergeUtils_H +#define HDF5Utils_MergeUtils_H #include "H5Cpp.h" #include <string> @@ -94,4 +94,4 @@ namespace H5Utils { } //> end namespace H5Utils -#endif //> !HDF5Writer_MergeUtils_H +#endif //> !HDF5Utils_MergeUtils_H -- GitLab From 3ba431ccc3c4659460a9def9dc6d3c4527d4492c Mon Sep 17 00:00:00 2001 From: Dan Guest <dguest@cern.ch> Date: Tue, 16 Jul 2019 09:31:54 +0200 Subject: [PATCH 19/22] Fix HDF5 header path in CMake --- PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt index 69b6b2470c76..2ac69bad5880 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt @@ -39,4 +39,5 @@ unset(_exe_sources) # add the merge utility atlas_add_executable( hdf5-merge util/hdf5-merge.cxx + INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} LINK_LIBRARIES HDF5Utils Boost::program_options ) -- GitLab From e5a2b6e1b0eb3710602cb0d15cba0e27832da7cd Mon Sep 17 00:00:00 2001 From: Jon Burr <jon.burr@cern.ch> Date: Wed, 17 Jul 2019 09:35:51 +0200 Subject: [PATCH 20/22] Fixing issues in the CMakeLists.txt Replacing the use of the external 'boost' target with Boost_LIBRARIES. Unfortunately ATLAS CMake does not like external targets. Making the HDF5_INCLUDE_DIRS and ZLIB_INCLUDE_DIRS public includes. The libraries are publicly linked and everything else that is including it inside this file seems to need them so it seems the most logical approach. --- .../AnalysisCommon/HDF5Utils/CMakeLists.txt | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt index 2ac69bad5880..c3ed1f5dcc9e 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt @@ -18,7 +18,7 @@ atlas_add_library(HDF5Utils Root/IH5Merger.cxx Root/MergeUtils.cxx PUBLIC_HEADERS HDF5Utils - PRIVATE_INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} + INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} LINK_LIBRARIES ${HDF5_LIBRARIES} ${ZLIB_LIBRARIES}) # build a translation utility @@ -29,15 +29,13 @@ set( _exe_sources util/ttree2hdf5.cxx) atlas_add_executable(ttree2hdf5 ${_exe_sources} - INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS} util - ${Boost_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} - LINK_LIBRARIES HDF5Utils ${Boost_LIBRARIES} ${HDF5_LIBRARIES} - ${ROOT_LIBRARIES}) + INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} util ${Boost_INCLUDE_DIRS} + LINK_LIBRARIES HDF5Utils ${Boost_LIBRARIES} ${ROOT_LIBRARIES}) unset(_exe_sources) # add the merge utility atlas_add_executable( hdf5-merge util/hdf5-merge.cxx - INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} - LINK_LIBRARIES HDF5Utils Boost::program_options ) + INCLUDE_DIRS ${Boost_INCLUDE_DIRS} + LINK_LIBRARIES HDF5Utils ${Boost_LIBRARIES} ) -- GitLab From d2ea6f2161d5b44e890284c81ca02c4aeeb9bf47 Mon Sep 17 00:00:00 2001 From: Jon Burr <jon.burr@cern.ch> Date: Wed, 17 Jul 2019 10:06:57 +0200 Subject: [PATCH 21/22] Adding back the explicit dependencies on HDF5 --- PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt index c3ed1f5dcc9e..a3fc9653139e 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt @@ -29,13 +29,13 @@ set( _exe_sources util/ttree2hdf5.cxx) atlas_add_executable(ttree2hdf5 ${_exe_sources} - INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} util ${Boost_INCLUDE_DIRS} - LINK_LIBRARIES HDF5Utils ${Boost_LIBRARIES} ${ROOT_LIBRARIES}) + INCLUDE_DIRS ${ROOT_INCLUDE_DIRS} util ${Boost_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS} + LINK_LIBRARIES HDF5Utils ${Boost_LIBRARIES} ${ROOT_LIBRARIES} ${HDF5_LIBRARIES} ) unset(_exe_sources) # add the merge utility atlas_add_executable( hdf5-merge util/hdf5-merge.cxx - INCLUDE_DIRS ${Boost_INCLUDE_DIRS} - LINK_LIBRARIES HDF5Utils ${Boost_LIBRARIES} ) + INCLUDE_DIRS ${Boost_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS} + LINK_LIBRARIES HDF5Utils ${Boost_LIBRARIES} ${HDF5_LIBRARIES} ) -- GitLab From 763355613f7bf399684468dc4696041687bf490f Mon Sep 17 00:00:00 2001 From: Dan Guest <dguest@cern.ch> Date: Wed, 17 Jul 2019 10:58:54 +0200 Subject: [PATCH 22/22] Remove zlib dependence --- PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt index a3fc9653139e..0a991221f322 100644 --- a/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt +++ b/PhysicsAnalysis/AnalysisCommon/HDF5Utils/CMakeLists.txt @@ -18,8 +18,8 @@ atlas_add_library(HDF5Utils Root/IH5Merger.cxx Root/MergeUtils.cxx PUBLIC_HEADERS HDF5Utils - INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} - LINK_LIBRARIES ${HDF5_LIBRARIES} ${ZLIB_LIBRARIES}) + INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} + LINK_LIBRARIES ${HDF5_LIBRARIES}) # build a translation utility set( _exe_sources -- GitLab