From cfe49c1f1a4ea7fa07a2a5089a128ef7019e9a09 Mon Sep 17 00:00:00 2001 From: Nils Krumnack <nils.erik.krumnack@cern.ch> Date: Fri, 14 Mar 2025 15:17:02 -0500 Subject: [PATCH 1/4] add columnar tool wrapper and in-memory test fixture The tool wrappers are also shared with the python tool wrappers and the PHYSLITE test fixture (both to come in future commits). The in-memory test is the basic test fixture for columnar tools to check that they work in columnar mode. --- .../ColumnarExampleTools/CMakeLists.txt | 6 + .../test/gt_ColumnarToolTests.cxx | 222 +++++++++++++ .../ColumnarInterfaces/ColumnInfo.h | 8 + .../ColumnarTestFixtures/CMakeLists.txt | 15 + .../ColumnarTestFixtures/ColumnarMemoryTest.h | 221 +++++++++++++ .../Root/ColumnarMemoryTest.cxx | 300 ++++++++++++++++++ .../ColumnarToolWrapper/CMakeLists.txt | 11 + .../ColumnarToolWrapper/ColumnarToolHelpers.h | 27 ++ .../ColumnarToolWrapper/ColumnarToolWrapper.h | 222 +++++++++++++ .../Root/ColumnarToolHelpers.cxx | 41 +++ .../Root/ColumnarToolWrapper.cxx | 263 +++++++++++++++ 11 files changed, 1336 insertions(+) create mode 100644 PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx create mode 100644 PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt create mode 100644 PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h create mode 100644 PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx create mode 100644 PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt create mode 100644 PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolHelpers.h create mode 100644 PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolWrapper.h create mode 100644 PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolHelpers.cxx create mode 100644 PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx diff --git a/PhysicsAnalysis/Columnar/ColumnarExampleTools/CMakeLists.txt b/PhysicsAnalysis/Columnar/ColumnarExampleTools/CMakeLists.txt index 47df27c54a13..b0db960fa9e0 100644 --- a/PhysicsAnalysis/Columnar/ColumnarExampleTools/CMakeLists.txt +++ b/PhysicsAnalysis/Columnar/ColumnarExampleTools/CMakeLists.txt @@ -10,3 +10,9 @@ atlas_add_library (ColumnarExampleToolsLib PUBLIC_HEADERS ColumnarExampleTools INCLUDE_DIRS ${EIGEN_INCLUDE_DIRS} LINK_LIBRARIES ColumnarCoreLib AsgTools ColumnarMuonLib ColumnarTrackingLib) + +atlas_add_test( gt_ColumnarToolTests + SOURCES test/gt_ColumnarToolTests.cxx + INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} + LINK_LIBRARIES ${GTEST_LIBRARIES} ColumnarTestFixturesLib ColumnarExampleToolsLib + POST_EXEC_SCRIPT nopost.sh ) diff --git a/PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx b/PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx new file mode 100644 index 000000000000..40d6aedc5b76 --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx @@ -0,0 +1,222 @@ +/* + Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +*/ + +/// @author Nils Krumnack + + + +// +// includes +// + +#include "CxxUtils/checker_macros.h" +ATLAS_NO_CHECK_FILE_THREAD_SAFETY; + +#include <AsgTesting/UnitTest.h> +#include <AsgTools/AsgToolConfig.h> +#include <ColumnarTestFixtures/ColumnarMemoryTest.h> + +#include <ColumnarExampleTools/SimpleSelectorExampleTool.h> +#include <ColumnarExampleTools/OptionalColumnExampleTool.h> +#include <ColumnarExampleTools/ConfigurableColumnExampleTool.h> +#include <ColumnarExampleTools/ModularExampleTool.h> + +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + +// +// method implementations +// + +// All the tests could instead be placed inside the `columnar` +// namespace, but for actual tools the tool may actually exist in a +// different namespace, so I usually use a `using` statement like this. +using columnar::ColumnarMemoryTest; + + +// this is a test that manually loads data into memory, and then runs +// the columnar tool on it. this is useful for ensuring that the tool +// actually works and the relevant code paths have all been converted. +// +// this is about as close as we can get to a unit test for the tool as a +// whole, as it checks exactly defined inputs and outputs. however, +// full CP tools are usually so complex that it is often not feasible to +// do full unit testing this way. at times it is even hard to find a +// set of inputs that is even valid at all. +TEST_F (ColumnarMemoryTest, SimpleSelectorExampleTool) +{ + // check that we are in array mode + if (!checkMode()) + return; + + // set up the tool + auto tool = std::make_unique<columnar::SimpleSelectorExampleTool> (makeUniqueName()); + ASSERT_SUCCESS (tool->initialize ()); + + // this is a wrapper around the tool for this test + ColumnarTestToolHandle toolHandle (*tool); + toolHandle.initialize (); + + // print out some information about the tool + for (auto& name : toolHandle.getColumnNames()) + std::cout << "requested column: " << name << std::endl; + std::cout << "recommended systematics size: " << toolHandle.getRecommendedSystematics().size() << std::endl; + + // the in-memory data frame we are filling + ColumnMapType columnMap {toolHandle}; + + // the various columns we are loading into memory + columnMap.addColumn ("EventInfo", {0, 2}); + columnMap.addColumn ("Particles", {0, 1, 3}); + columnMap.addColumn ("Particles.pt", {10e5, 10e5, 1e3}); + columnMap.addColumn ("Particles.selection", {0, 0, 0}); + + // the expected output of the tool + columnMap.setExpectation ("Particles.selection", {1, 1, 0}); + + // connect the columns to the tool + columnMap.connectColumnsToTool (); + + // run the tool + columnMap.call (); + + // check the output + columnMap.checkExpectations (); +} + + +TEST_F (ColumnarMemoryTest, OptionalColumnExampleTool_present) +{ + if (!checkMode()) + return; + + auto tool = std::make_unique<columnar::OptionalColumnExampleTool> (makeUniqueName()); + ASSERT_SUCCESS (tool->initialize ()); + + ColumnarTestToolHandle toolHandle (*tool); + toolHandle.initialize (); + + for (auto& name : toolHandle.getColumnNames()) + std::cout << "requested column: " << name << std::endl; + std::cout << "recommended systematics size: " << toolHandle.getRecommendedSystematics().size() << std::endl; + + ColumnMapType columnMap {toolHandle}; + + columnMap.addColumn ("EventInfo", {0, 2}); + + columnMap.addColumn ("Particles", {0, 1, 3}); + columnMap.addColumn ("Particles.pt", {10e5, 10e5, 1e3}); + columnMap.addColumn ("Particles.ptCorr", {10e5, 1e3, 10e5}); + columnMap.addColumn ("Particles.selection", {0, 0, 0}); + + columnMap.setExpectation ("Particles.selection", {1, 0, 1}); + + columnMap.connectColumnsToTool (); + + columnMap.call (); + + columnMap.checkExpectations (); +} + + +TEST_F (ColumnarMemoryTest, OptionalColumnExampleTool_absent) +{ + if (!checkMode()) + return; + + auto tool = std::make_unique<columnar::OptionalColumnExampleTool> (makeUniqueName()); + ASSERT_SUCCESS (tool->initialize ()); + + ColumnarTestToolHandle toolHandle (*tool); + toolHandle.initialize (); + + for (auto& name : toolHandle.getColumnNames()) + std::cout << "requested column: " << name << std::endl; + std::cout << "recommended systematics size: " << toolHandle.getRecommendedSystematics().size() << std::endl; + + ColumnMapType columnMap {toolHandle}; + + columnMap.addColumn ("EventInfo", {0, 2}); + + columnMap.addColumn ("Particles", {0, 1, 3}); + columnMap.addColumn ("Particles.pt", {10e5, 10e5, 1e3}); + columnMap.addColumn ("Particles.selection", {0, 0, 0}); + + columnMap.setExpectation ("Particles.selection", {1, 1, 0}); + + columnMap.connectColumnsToTool (); + + columnMap.call (); + + columnMap.checkExpectations (); +} + + +TEST_F (ColumnarMemoryTest, ConfigurableColumnExampleTool) +{ + if (!checkMode()) + return; + + auto tool = std::make_unique<columnar::ConfigurableColumnExampleTool> (makeUniqueName()); + ASSERT_SUCCESS (tool->setProperty ("ptVar", "ptCorr")); + ASSERT_SUCCESS (tool->initialize ()); + + ColumnarTestToolHandle toolHandle (*tool); + toolHandle.initialize (); + + for (auto& name : toolHandle.getColumnNames()) + std::cout << "requested column: " << name << std::endl; + std::cout << "recommended systematics size: " << toolHandle.getRecommendedSystematics().size() << std::endl; + + ColumnMapType columnMap {toolHandle}; + + columnMap.addColumn ("EventInfo", {0, 2}); + + columnMap.addColumn ("Particles", {0, 1, 3}); + columnMap.addColumn ("Particles.ptCorr", {10e5, 10e5, 1e3}); + columnMap.addColumn ("Particles.selection", {0, 0, 0}); + + columnMap.setExpectation ("Particles.selection", {1, 1, 0}); + + columnMap.connectColumnsToTool (); + + columnMap.call (); + + columnMap.checkExpectations (); +} + + +TEST_F (ColumnarMemoryTest, ModularExampleTool) +{ + if (!checkMode()) + return; + + auto tool = std::make_unique<columnar::ModularExampleTool> (makeUniqueName()); + ASSERT_SUCCESS (tool->initialize ()); + + ColumnarTestToolHandle toolHandle (*tool); + toolHandle.initialize (); + + for (auto& name : toolHandle.getColumnNames()) + std::cout << "requested column: " << name << std::endl; + std::cout << "recommended systematics size: " << toolHandle.getRecommendedSystematics().size() << std::endl; + + ColumnMapType columnMap {toolHandle}; + + columnMap.addColumn ("EventInfo", {0, 2}); + + columnMap.addColumn ("Particles", {0, 1, 3}); + columnMap.addColumn ("Particles.pt", {10e5, 10e5, 1e3}); + columnMap.addColumn ("Particles.eta", {0, 3, 0}); + columnMap.addColumn ("Particles.selection", {0, 0, 0}); + + columnMap.setExpectation ("Particles.selection", {1, 0, 0}); + + columnMap.connectColumnsToTool (); + + columnMap.call (); + + columnMap.checkExpectations (); +} + +ATLAS_GOOGLE_TEST_MAIN diff --git a/PhysicsAnalysis/Columnar/ColumnarInterfaces/ColumnarInterfaces/ColumnInfo.h b/PhysicsAnalysis/Columnar/ColumnarInterfaces/ColumnarInterfaces/ColumnInfo.h index 279563cf34c3..afb42ab7dd0e 100644 --- a/PhysicsAnalysis/Columnar/ColumnarInterfaces/ColumnarInterfaces/ColumnInfo.h +++ b/PhysicsAnalysis/Columnar/ColumnarInterfaces/ColumnarInterfaces/ColumnInfo.h @@ -74,6 +74,14 @@ namespace columnar std::string offsetName {}; + /// @brief the fixed dimensions this column has (if any) + /// + /// For the most part we use dynamic dimensions via offset maps, but + /// sometimes the dimensions are hard-coded, which then uses these + /// dimensions. + std::vector<unsigned> fixedDimensions {}; + + /// @brief whether this is an offset column /// /// In part this is for consistency checks, i.e. other columns can diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt new file mode 100644 index 000000000000..bc8f79554b5b --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +# +# @author Nils Krumnack + +atlas_subdir( ColumnarTestFixtures ) + +find_package( GTest ) + +# Add the shared library: +atlas_add_library (ColumnarTestFixturesLib +ColumnarTestFixtures/*.h Root/*.cxx + PUBLIC_HEADERS ColumnarTestFixtures + INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} + LINK_LIBRARIES ${GTEST_LIBRARIES} ColumnarToolWrapperLib PATInterfaces AsgTestingLib + PRIVATE_LINK_LIBRARIES ComponentFactoryPreloaderLib ColumnarCoreLib) diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h new file mode 100644 index 000000000000..00825f6f2de8 --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h @@ -0,0 +1,221 @@ +/* + Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +*/ + +/// @author Nils Krumnack + + +#ifndef COLUMNAR_TEST_FIXTURES__COLUMNAR_MEMORY_TEST_H +#define COLUMNAR_TEST_FIXTURES__COLUMNAR_MEMORY_TEST_H + +#include <gtest/gtest.h> + +#include <AsgTools/AsgTool.h> +#include <ColumnarInterfaces/ColumnInfo.h> +#include <ColumnarInterfaces/IColumnarTool.h> +#include <ColumnarToolWrapper/ColumnarToolHelpers.h> +#include <ColumnarToolWrapper/ColumnarToolWrapper.h> +#include <PATInterfaces/ISystematicsTool.h> +#include <PATInterfaces/SystematicsUtil.h> + +namespace columnar +{ + struct ColumnarMemoryTest : public testing::Test + { + ColumnarMemoryTest (); + + /// \brief make a unique tool name to be used in unit tests + std::string makeUniqueName (); + + /// @brief check whether we have the right mode + static bool checkMode (); + + class ColumnarTestToolHandle; + struct ColumnMapType; + }; + + + + /// @brief a handle to a columnar tool for running tests + /// + /// This used to be shared with the python bindings, but there are + /// sufficient differences between testing and python bindings to + /// split the two. + + class ColumnarMemoryTest::ColumnarTestToolHandle final + { + /// Public Members + /// ============== + + public: + + explicit ColumnarTestToolHandle (asg::AsgTool& val_tool); + + /// rename the columns the tool uses + void renameContainers (const std::vector<std::pair<std::string,std::string>> renames); + + /// initialize the tool + void initialize (); + + /// set the tool to apply the given systematic variation + void applySystematicVariation (const std::string& sysName); + + /// get the expected column info + [[nodiscard]] std::vector<ColumnInfo> getColumnInfo () const; + + /// get the expected column names + std::vector<std::string> getColumnNames () const; + + /// get the recommended systematics + std::vector<std::string> getRecommendedSystematics () const; + + /// get the tool wrapper + [[nodiscard]] const ColumnarToolWrapper& getToolWrapper () const; + + /// get the contained tool + [[nodiscard]] IColumnarTool* getTool () const; + + + + /// Private Members + /// =============== + + private: + + IColumnarTool* m_tool = nullptr; + CP::ISystematicsTool* m_systTool = nullptr; + + std::shared_ptr<const ColumnarToolWrapper> m_toolWrapper; + }; + + + + struct ColumnarMemoryTest::ColumnMapType final + { + ColumnMapType (ColumnarTestToolHandle& val_toolHandle); + + void addColumn (const std::string& name, std::vector<std::any> data); + + void setExpectation (const std::string& name, std::vector<std::any> values); + + /// @brief add the columns we have to the tool + void connectColumnsToTool (); + + void call (); + + void checkExpectations (); + + private: + + template<typename T> T extractAny (const std::string& columnName, const std::any& value) + { + if (value.type() == typeid(float)) + return std::any_cast<float> (value); + if (value.type() == typeid(double)) + return std::any_cast<double> (value); + if (value.type() == typeid(int)) + return std::any_cast<int> (value); + if (value.type() == typeid(unsigned)) + return std::any_cast<unsigned> (value); + if (value.type() == typeid(std::size_t)) + return std::any_cast<std::size_t> (value); + throw std::logic_error ("column " + columnName + " received unsupported input " + value.type().name() + ", cast value or extend test handler to support it"); + } + + template<typename T> void addInputTyped (const std::string& name, const std::vector<std::any>& data) + { + std::vector<T> typedData; + for (auto& value : data) + typedData.emplace_back (extractAny<T> (name, value)); + inputs.emplace (name, std::move (typedData)); + } + + template<typename T> void addTypedColumn (const std::string& name, std::vector<T> data) + { + auto column = columnMap.find (name); + if (column == columnMap.end()) + throw std::runtime_error ("adding unknown column: " + name); + if (inputs.contains (name)) + throw std::runtime_error ("column added twice: " + name); + if (column->second.type != &typeid(T)) + throw std::runtime_error ("column " + name + " has wrong type: " + column->second.type->name()); + inputs.emplace (name, std::move (data)); + } + + template<typename T> void addExpectationTyped (const std::string& name, std::vector<std::any> data) + { + std::vector<T> typedData; + for (auto& value : data) + typedData.emplace_back (extractAny<T> (name, value)); + expectations.emplace (name, std::move (typedData)); + } + + ColumnarOffsetType columnSize (const std::string& name); + + template<typename T> std::span<const T> getOutputColumn (const std::string& name) + { + auto info = columnMap.find (name); + if (info == columnMap.end()) + throw std::runtime_error ("output column not found: " + name); + auto iter = activeColumns.find (name); + if (iter == activeColumns.end()) + throw std::runtime_error ("output column not set: " + name); + if (!std::holds_alternative<std::vector<T>> (iter->second)) + throw std::runtime_error ("output column has wrong type: " + name); + return std::span<const T> (std::get<std::vector<T>> (iter->second)); + } + + template<typename T> void checkExpectationTyped (const std::string& columnName) + { + auto outputIter = activeColumns.find (columnName); + if (outputIter == activeColumns.end()) + throw std::runtime_error ("output column not set: " + columnName); + if (!std::holds_alternative<std::vector<T>> (outputIter->second)) + throw std::runtime_error ("output column has wrong type: " + columnName); + auto& output = std::get<std::vector<T>> (outputIter->second); + + auto expectationIter = expectations.find (columnName); + if (expectationIter == expectations.end()) + throw std::runtime_error ("output column not found: " + columnName); + if (!std::holds_alternative<std::vector<T>> (expectationIter->second)) + throw std::runtime_error ("output column has wrong type: " + columnName); + auto& expectation = std::get<std::vector<T>> (expectationIter->second); + + SCOPED_TRACE (columnName); + EXPECT_EQ (output.size(), expectation.size()); + for (std::size_t index = 0; index != std::min (output.size(), expectation.size()); ++ index) + { + SCOPED_TRACE (index); + if constexpr (std::is_floating_point_v<T>) + EXPECT_NEAR (output[index], expectation[index], 1e-6); + else + EXPECT_EQ (output[index], expectation[index]); + } + std::cout << " columnMap.setExpectation (\"" << columnName << "\", {"; + for (std::size_t index = 0; index != expectation.size(); ++ index) + { + if (index != 0) + std::cout << ", "; + if constexpr (std::is_floating_point_v<T>) + std::cout << std::setprecision (8) << output[index]; + else if constexpr (std::is_same_v<T,char>) + std::cout << int (output[index]); + else + std::cout << output[index]; + } + std::cout << "});" << std::endl; + } + + ColumnarTestToolHandle* toolHandle = nullptr; + + std::unique_ptr<ColumnarToolWrapperData> m_columnData; + + std::unordered_map<std::string,const ColumnInfo> columnMap; + + std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> inputs; + std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> activeColumns; + std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> expectations; + }; +} + +#endif diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx new file mode 100644 index 000000000000..ecfc8e6d5abd --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx @@ -0,0 +1,300 @@ +/* + Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +*/ + +/// @author Nils Krumnack + + +// +// includes +// + +#include <ColumnarTestFixtures/ColumnarMemoryTest.h> + +#include <ColumnarCore/ColumnarDef.h> +#include <ComponentFactoryPreloader/ComponentFactoryPreloader.h> + +// +// method implementations +// + +namespace columnar +{ + ColumnarMemoryTest :: + ColumnarMemoryTest () + { + static std::once_flag flag; + std::call_once (flag, [] () + { + // Preload the component factories: Alternately this could be + // loaded and executed via a dictionary, but I had some technical + // issue with that, and this seems to be working. + CP::preloadComponentFactories(); + }); + } + + + + std::string ColumnarMemoryTest :: + makeUniqueName () + { + static std::atomic<unsigned> index = 0; + return "UniqueMemoryTestTool" + std::to_string(++index); + } + + + + bool ColumnarMemoryTest :: + checkMode () + { + return std::is_same_v<ColumnarModeDefault,ColumnarModeArray>; + } + + + + ColumnarMemoryTest::ColumnarTestToolHandle :: + ColumnarTestToolHandle (asg::AsgTool& val_tool) + { + m_tool = dynamic_cast<IColumnarTool*> (&val_tool); + if (m_tool == nullptr) + throw std::runtime_error ("tool does not implement IColumnarTool"); + m_systTool = dynamic_cast<CP::ISystematicsTool*> (m_tool); + } + + + + void ColumnarMemoryTest::ColumnarTestToolHandle :: + renameContainers (const std::vector<std::pair<std::string,std::string>> renames) + { + columnar::renameContainers (*m_tool, renames); + if (m_toolWrapper) + { + m_toolWrapper = std::make_shared<ColumnarToolWrapper> (m_tool); + } + } + + + + void ColumnarMemoryTest::ColumnarTestToolHandle :: + initialize () + { + m_toolWrapper = std::make_shared<ColumnarToolWrapper> (m_tool); + } + + + + void ColumnarMemoryTest::ColumnarTestToolHandle :: + applySystematicVariation (const std::string& sysName) + { + // by convention setting a systematic on a non-systematics tool + // will do nothing + if (m_systTool == nullptr) + return; + if (!m_systTool->applySystematicVariation (CP::SystematicSet (sysName)).isSuccess()) + throw std::runtime_error ("failed to apply systematic variation"); + } + + + + [[nodiscard]] std::vector<ColumnInfo> ColumnarMemoryTest::ColumnarTestToolHandle :: + getColumnInfo () const + { + if (!m_toolWrapper) + throw std::runtime_error ("tool not initialized"); + return m_toolWrapper->getColumnInfo (); + } + + + + std::vector<std::string> ColumnarMemoryTest::ColumnarTestToolHandle :: + getColumnNames () const + { + if (!m_toolWrapper) + throw std::runtime_error ("tool not initialized"); + return m_toolWrapper->getColumnNames (); + } + + + + std::vector<std::string> ColumnarMemoryTest::ColumnarTestToolHandle :: + getRecommendedSystematics () const + { + if (!m_systTool) + return {""}; + std::vector<std::string> result; + for (auto& sys : CP::make_systematics_vector (m_systTool->recommendedSystematics())) + result.push_back (sys.name()); + return result; + } + + + + [[nodiscard]] const ColumnarToolWrapper& ColumnarMemoryTest::ColumnarTestToolHandle :: + getToolWrapper () const + { + if (!m_toolWrapper) + throw std::runtime_error ("tool not initialized"); + return *m_toolWrapper; + } + + + + [[nodiscard]] IColumnarTool* ColumnarMemoryTest::ColumnarTestToolHandle :: + getTool () const + { + return m_tool; + } + + + + ColumnarMemoryTest::ColumnMapType :: + ColumnMapType (ColumnarTestToolHandle& val_toolHandle) + : toolHandle (&val_toolHandle), m_columnData (std::make_unique<ColumnarToolWrapperData> (&val_toolHandle.getToolWrapper())) + { + for (auto& column : toolHandle->getColumnInfo()) + { + auto [iter, success] = columnMap.emplace (column.name, ColumnInfo {column}); + if (!success) + throw std::runtime_error ("column already exists: " + column.name); + } + } + + + + void ColumnarMemoryTest::ColumnMapType :: + addColumn (const std::string& name, std::vector<std::any> data) + { + auto column = columnMap.find (name); + if (column == columnMap.end()) + throw std::runtime_error ("adding unknown column: " + name); + if (inputs.contains (name)) + throw std::runtime_error ("column added twice: " + name); + if (column->second.type == &typeid(float)) + addInputTyped<float> (name, data); + else if (column->second.type == &typeid(char)) + addInputTyped<char> (name, data); + else if (column->second.type == &typeid(int)) + addInputTyped<int> (name, data); + else if (column->second.type == &typeid(std::uint8_t)) + addInputTyped<std::uint8_t> (name, data); + else if (column->second.type == &typeid(std::uint16_t)) + addInputTyped<std::uint16_t> (name, data); + else if (column->second.type == &typeid(std::uint32_t)) + addInputTyped<std::uint32_t> (name, data); + else if (column->second.type == &typeid(std::uint64_t)) + addInputTyped<std::uint64_t> (name, data); + else + throw std::logic_error ("column name " + name + " has unsupported type " + column->second.type->name() + ", extend test handler to support it"); + } + + + + void ColumnarMemoryTest::ColumnMapType :: + setExpectation (const std::string& name, std::vector<std::any> values) + { + auto column = columnMap.find (name); + if (column == columnMap.end()) + throw std::runtime_error ("adding unknown column: " + name); + if (expectations.contains (name)) + throw std::runtime_error ("column added twice: " + name); + if (column->second.type == &typeid(float)) + addExpectationTyped<float> (name, values); + else if (column->second.type == &typeid(char)) + addExpectationTyped<char> (name, values); + else if (column->second.type == &typeid(int)) + addExpectationTyped<int> (name, values); + else if (column->second.type == &typeid(std::uint8_t)) + addExpectationTyped<std::uint8_t> (name, values); + else if (column->second.type == &typeid(std::uint16_t)) + addExpectationTyped<std::uint16_t> (name, values); + else if (column->second.type == &typeid(std::uint32_t)) + addExpectationTyped<std::uint32_t> (name, values); + else if (column->second.type == &typeid(std::uint64_t)) + addExpectationTyped<std::uint64_t> (name, values); + else + throw std::logic_error ("column name " + name + " has unsupported type " + column->second.type->name() + ", extend test handler to support it"); + } + + + + ColumnarOffsetType ColumnarMemoryTest::ColumnMapType :: + columnSize (const std::string& name) + { + auto iter = inputs.find (name); + if (iter == inputs.end()) + throw std::runtime_error ("column not found: " + name); + return std::visit ([] (const auto& data) { return data.size(); }, iter->second); + } + + + + void ColumnarMemoryTest::ColumnMapType :: + connectColumnsToTool () + { + for (auto& [name, columnInfo] : columnMap) + { + if (auto iter = inputs.find (columnInfo.name); iter != inputs.end()) + { + switch (columnInfo.accessMode) + { + case ColumnAccessMode::input: + std::visit ([&] (const auto& data) + { + m_columnData->setColumn (iter->first, data.size(), data.data()); + }, iter->second); + break; + case ColumnAccessMode::update: + case ColumnAccessMode::output: + std::visit ([&] (auto& data) + { + m_columnData->setColumn (iter->first, data.size(), data.data()); + }, activeColumns[iter->first] = iter->second); + break; + default: + throw std::runtime_error ("column mode unknown: " + columnInfo.name); + } + } + } + } + + + + void ColumnarMemoryTest::ColumnMapType :: + call () + { + m_columnData->call (); + } + + + + void ColumnarMemoryTest::ColumnMapType :: + checkExpectations () + { + for (auto& [name, info] : columnMap) + { + if (info.accessMode == ColumnAccessMode::input) + continue; + + auto iter = activeColumns.find (name); + if (iter == activeColumns.end()) + continue; + + if (info.type == &typeid(float)) + checkExpectationTyped<float> (name); + else if (info.type == &typeid(char)) + checkExpectationTyped<char> (name); + else if (info.type == &typeid(int)) + checkExpectationTyped<int> (name); + else if (info.type == &typeid(std::uint8_t)) + checkExpectationTyped<std::uint8_t> (name); + else if (info.type == &typeid(std::uint16_t)) + checkExpectationTyped<std::uint16_t> (name); + else if (info.type == &typeid(std::uint32_t)) + checkExpectationTyped<std::uint32_t> (name); + else if (info.type == &typeid(std::uint64_t)) + checkExpectationTyped<std::uint64_t> (name); + else + throw std::logic_error ("column name " + name + " has unsupported type " + info.type->name() + ", extend test handler to support it"); + } + } +} \ No newline at end of file diff --git a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt new file mode 100644 index 000000000000..11bec047650a --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +# +# @author Nils Krumnack + +atlas_subdir( ColumnarToolWrapper ) + +# Add the shared library: +atlas_add_library (ColumnarToolWrapperLib +ColumnarToolWrapper/*.h Root/*.cxx + PUBLIC_HEADERS ColumnarToolWrapper + LINK_LIBRARIES ColumnarInterfacesLib ) diff --git a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolHelpers.h b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolHelpers.h new file mode 100644 index 000000000000..b69daf744e05 --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolHelpers.h @@ -0,0 +1,27 @@ +/* + Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +*/ + +/// @author Nils Krumnack + + +#ifndef COLUMNAR_TOOL_WRAPPER_COLUMNAR_TOOL_HELPERS_H +#define COLUMNAR_TOOL_WRAPPER_COLUMNAR_TOOL_HELPERS_H + +#include <string> +#include <vector> + +namespace columnar +{ + class IColumnarTool; + + /// rename containers in the columnar tool + /// + /// The interface itself only allows renaming individual columns, but + /// sometimes it is nice to rename a whole container. This happens + /// strictly on the basis of the name, i.e. it being prefixed with + /// "Container.". + void renameContainers (IColumnarTool& tool, const std::vector<std::pair<std::string,std::string>>& renames); +} + +#endif \ No newline at end of file diff --git a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolWrapper.h b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolWrapper.h new file mode 100644 index 000000000000..1db67c030059 --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/ColumnarToolWrapper/ColumnarToolWrapper.h @@ -0,0 +1,222 @@ +/* + Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +*/ + +/// @author Nils Krumnack + + +#ifndef COLUMNAR_PYTHON_WRAPPER_PYTHON_TOOL_WRAPPER_H +#define COLUMNAR_PYTHON_WRAPPER_PYTHON_TOOL_WRAPPER_H + +#include <memory> +#include <string> +#include <typeinfo> +#include <unordered_map> +#include <vector> + +namespace columnar +{ + struct ColumnInfo; + class IColumnarTool; + + + /// @brief a class that wraps an IColumnarTool for use in Python + /// + /// This is not necessarily specific to python, but that is the + /// primary use case. Essentially all it does is provide a way to + /// preload columns by name, check that the columns are are consistent + /// with what the tool expects, and can then call the tool on those + /// columns. + /// + /// This class only does the meta-data handling, whereas the actual + /// handling of columnar data is done by @ref ColumnarToolWrapperData. Please + /// check its documentation for details. + + class ColumnarToolWrapper final + { + /// Public Members + /// ============== + + public: + + /// @brief constructor: wrap the given tool (non-owning) + explicit ColumnarToolWrapper (IColumnarTool *val_tool); + + /// @brief constructor: wrap the given tool (owning) + explicit ColumnarToolWrapper (std::shared_ptr<IColumnarTool> val_tool); + + + // copying would need special handling, so disabling it until needed + ColumnarToolWrapper (const ColumnarToolWrapper&) = delete; + ColumnarToolWrapper& operator= (const ColumnarToolWrapper&) = delete; + + + /// @brief get information on all defined columns + /// + /// This is mostly to make it easy for the caller to know which + /// columns are defined and connect them to the dataframe + /// automatically. You can ask either for the list of column names + /// or the full `ColumnInfo`, depending on what you need. + /// @{ + [[nodiscard]] std::vector<std::string> getColumnNames () const; + [[nodiscard]] std::vector<ColumnInfo> getColumnInfo () const; + /// @} + + + + /// Private Members + /// =============== + + private: + + // These two classes work closely together, but for new users it + // seemed clearer to make it a friend instead of a nested class. + friend class ColumnarToolWrapperData; + + + /// @brief the wrapped tool + const IColumnarTool *m_tool = nullptr; + + /// @brief the owning pointer to the tool + std::shared_ptr<const IColumnarTool> m_toolOwn; + + + /// @brief my cached information for the various columns needed + struct MyColumnInfo + { + const std::type_info *type = nullptr; + + bool isConst = false; + + unsigned index = 0u; + + unsigned fixedDimensions = 1u; + + bool isOffset = false; + + bool isOptional = false; + + int numpyType = -1; + + unsigned numpyBits = 0u; + + const std::pair<const std::string,MyColumnInfo> *offsets = nullptr; + }; + std::unordered_map<std::string,MyColumnInfo> m_columns; + + + /// @brief the number of columns that the tool expects (equal to the + /// greatest column index + 1) + unsigned m_numColumns = 0u; + }; + + + + /// @brief a class that holds the columnar data for a single call to + /// @ref ColumnarToolWrapper + /// + /// The idea is that this is a fairly lightweight class that can be + /// instantiated once per call and contain all thread-local + /// information (i.e. the pointers to the user data). For every use + /// of this tool the caller should create a new instance of this + /// class. + /// + /// The basic usage is something like this: + /// ``` + /// ColumnarToolWrapper wrapper {...}; + /// ColumnarToolWrapperData data {&wrapper}; + /// std::span<const float> input1 = ...; + /// data.setColumn ("input1", input1.size(), input1.data()); + /// std::span<float> output1 = ...; + /// data.setColumn ("output1", output1.size(), output1.data()); + /// data.call (); + /// ``` + + class ColumnarToolWrapperData + { + /// Public Members + /// ============== + + public: + + /// @brief constructor: wrap the given tool + explicit ColumnarToolWrapperData (const ColumnarToolWrapper *val_wrapper) noexcept; + + + /// @brief set the data for the given column picking up the type via + /// a template + template<typename CT> + void setColumn (const std::string& name, std::size_t size, CT* dataPtr) { + auto voidPtr = reinterpret_cast<const void*>(const_cast<const CT*>(dataPtr)); + setColumnVoid (name, size, voidPtr, typeid (std::decay_t<CT>), std::is_const_v<CT>); + } + + + /// @brief set the data for the given column with the user passing + /// in the type + void setColumnVoid (const std::string& name, std::size_t size, const void *dataPtr, const std::type_info& type, bool isConst); + + + /// @brief set the data for the given column with the user passing + /// in the type information from numpy + void setColumnNumpy (const std::string& name, std::size_t size, const void *dataPtr, int type, unsigned bits, bool isConst); + + + /// @brief call the tool + void call (); + + + + /// Testing/Validation Members + /// ========================== + /// + /// These members shouldn't be needed for regular users, but it can + /// help in debugging issues with the tool or the wrapper. + + public: + + + /// @brief check that all columns are valid + void checkColumnsValid (); + + + /// @brief get the data for the given column picking up the type via + /// a template + template<typename CT> + [[nodiscard]] std::pair<std::size_t,CT*> + getColumn (const std::string& name) + { + auto [size, ptr] = getColumnVoid (name, &typeid (std::decay_t<CT>), std::is_const_v<CT>); + if constexpr (std::is_const_v<CT>) + return std::make_pair (size, static_cast<CT*>(ptr)); + else + return std::make_pair (size, static_cast<CT*>(const_cast<void*>(ptr))); + } + + /// @brief get the data for the given column in a type-erased manner + [[nodiscard]] std::pair<std::size_t,const void*> + getColumnVoid (const std::string& name, const std::type_info *type, bool isConst); + + /// @brief the data vector we have assembled + [[nodiscard]] void** data () noexcept { + return m_data.data();} + + + + /// Private Members + /// =============== + + private: + + const ColumnarToolWrapper *m_wrapper = nullptr; + + std::vector<void*> m_data; + std::vector<std::size_t> m_dataSize; + std::vector<bool> m_columnIsChecked; + std::vector<bool> m_columnIsFilled; + + void checkColumn (const std::pair<const std::string,ColumnarToolWrapper::MyColumnInfo>& column); + }; +} + +#endif diff --git a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolHelpers.cxx b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolHelpers.cxx new file mode 100644 index 000000000000..6ea83a68b8e1 --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolHelpers.cxx @@ -0,0 +1,41 @@ +/* + Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +*/ + +/// @author Nils Krumnack + + +// +// includes +// + +#include <ColumnarToolWrapper/ColumnarToolHelpers.h> + +#include <ColumnarInterfaces/ColumnInfo.h> +#include <ColumnarInterfaces/IColumnarTool.h> + +// +// method implementations +// + +namespace columnar +{ + void renameContainers (IColumnarTool& tool, const std::vector<std::pair<std::string,std::string>>& renames) + { + if (!renames.empty()) + { + auto columnInfo = tool.getColumnInfo (); + for (auto& [from, to] : renames) + { + for (auto& column : columnInfo) + { + if (column.name.starts_with (from) && (column.name.size() == from.size() || column.name[from.size()] == '.')) + { + std::string newName = to + column.name.substr (from.size()); + tool.renameColumn (column.name, newName); + } + } + } + } + } +} diff --git a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx new file mode 100644 index 000000000000..298ed741441b --- /dev/null +++ b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx @@ -0,0 +1,263 @@ +/* + Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration +*/ + +/// @author Nils Krumnack + + +// +// includes +// + +#include <ColumnarToolWrapper/ColumnarToolWrapper.h> + +#include <ColumnarInterfaces/ColumnInfo.h> +#include <ColumnarInterfaces/IColumnarTool.h> +#include <algorithm> +#include <cstdint> +#include <stdexcept> +#include <typeindex> + +// +// method implementations +// + +namespace columnar +{ + ColumnarToolWrapper :: + ColumnarToolWrapper (IColumnarTool *val_tool) + : m_tool (val_tool) + { + constexpr unsigned numpySigned = 0; + constexpr unsigned numpyUnsigned = 1; + constexpr unsigned numpyFloat = 2; + std::unordered_map<std::type_index, std::pair<int,unsigned>> numpyTypes; + numpyTypes[typeid (float)] = { numpyFloat, sizeof (float) * 8 }; + numpyTypes[typeid (char)] = { std::is_signed_v<char> ? numpySigned : numpyUnsigned, sizeof (char) * 8 }; + numpyTypes[typeid (int)] = { numpySigned, sizeof (int) * 8 }; + numpyTypes[typeid (std::uint8_t)] = { numpyUnsigned, sizeof (std::uint8_t) * 8 }; + numpyTypes[typeid (std::uint16_t)] = { numpyUnsigned, sizeof (std::uint16_t) * 8 }; + numpyTypes[typeid (std::uint32_t)] = { numpyUnsigned, sizeof (std::uint32_t) * 8 }; + numpyTypes[typeid (std::uint64_t)] = { numpyUnsigned, sizeof (std::uint64_t) * 8 }; + + + auto toolColumns = m_tool->getColumnInfo(); + unsigned nextIndex = 1u; + for (auto& column : toolColumns) + { + MyColumnInfo myinfo; + myinfo.index = nextIndex++; + val_tool->setColumnIndex (column.name, myinfo.index); + myinfo.type = column.type; + switch (column.accessMode) + { + case ColumnAccessMode::input: + myinfo.isConst = true; + break; + case ColumnAccessMode::output: + myinfo.isConst = false; + break; + case ColumnAccessMode::update: + myinfo.isConst = false; + break; + } + for (unsigned dim : column.fixedDimensions) + myinfo.fixedDimensions *= dim; + myinfo.isOffset = column.isOffset; + myinfo.isOptional = column.isOptional; + + if (auto iter = numpyTypes.find (*column.type); iter != numpyTypes.end()) + { + myinfo.numpyType = iter->second.first; + myinfo.numpyBits = iter->second.second; + } + + auto [iter, success] = m_columns.emplace (column.name, std::move (myinfo)); + if (!success) + throw std::runtime_error ("column name already registered: " + column.name); + + if (m_numColumns <= myinfo.index) + m_numColumns = myinfo.index + 1; + } + + for (auto& column : toolColumns) + { + if (!column.offsetName.empty()) + { + auto offsetIter = m_columns.find (column.offsetName); + if (offsetIter == m_columns.end()) + throw std::runtime_error ("offset column name not found: " + column.offsetName); + if (*offsetIter->second.type != typeid (ColumnarOffsetType)) + throw std::runtime_error ("offset column has wrong type: " + column.offsetName); + if (!offsetIter->second.isOffset) + throw std::runtime_error ("offset column is not registered as offset: " + column.offsetName); + m_columns.at (column.name).offsets = &*offsetIter; + } + } + } + + + + ColumnarToolWrapper :: + ColumnarToolWrapper (std::shared_ptr<IColumnarTool> val_tool) + : ColumnarToolWrapper (val_tool.get()) + { + m_toolOwn = std::move (val_tool); + } + + + + ColumnarToolWrapperData :: + ColumnarToolWrapperData (const ColumnarToolWrapper *val_wrapper) noexcept + : m_wrapper (val_wrapper), + m_data (m_wrapper->m_numColumns, nullptr), + m_dataSize (m_wrapper->m_numColumns, 0u), + m_columnIsChecked (m_wrapper->m_numColumns, false), + m_columnIsFilled (m_wrapper->m_numColumns, false) + {} + + + + void ColumnarToolWrapperData :: + setColumnVoid (const std::string& name, std::size_t size, const void *dataPtr, const std::type_info& type, bool isConst) + { + auto column = m_wrapper->m_columns.find (name); + if (column == m_wrapper->m_columns.end()) + throw std::runtime_error ("unknown column name: " + name); + + if (type != *column->second.type) + throw std::runtime_error ("invalid type for column: " + name); + if (isConst && !column->second.isConst) + throw std::runtime_error ("assigning const vector to a non-const column: " + name); + if (column->second.index == 0) + throw std::runtime_error ("column has no index assigned: " + name); + if (m_columnIsFilled[column->second.index]) + throw std::runtime_error ("column filled multiple times: " + name); + m_columnIsFilled[column->second.index] = true; + m_data[column->second.index] = const_cast<void*>(dataPtr); + m_dataSize[column->second.index] = size; + } + + + + void ColumnarToolWrapperData :: + setColumnNumpy (const std::string& name, std::size_t size, const void *dataPtr, int type, unsigned bits, bool isConst) + { + auto column = m_wrapper->m_columns.find (name); + if (column == m_wrapper->m_columns.end()) + throw std::runtime_error ("unknown column name: " + name); + + if (type != column->second.numpyType || bits != column->second.numpyBits) + throw std::runtime_error ("invalid type for column: " + name + " (expected " + std::to_string (column->second.numpyType) + "/" + std::to_string (column->second.numpyBits) + " but got " + std::to_string (type) + "/" + std::to_string (bits) + ")"); + if (isConst && !column->second.isConst) + throw std::runtime_error ("assigning const vector to a non-const column: " + name); + if (column->second.index == 0) + throw std::runtime_error ("column has no index assigned: " + name); + if (m_columnIsFilled[column->second.index]) + throw std::runtime_error ("column filled multiple times: " + name); + m_columnIsFilled[column->second.index] = true; + m_data[column->second.index] = const_cast<void*>(dataPtr); + m_dataSize[column->second.index] = size; + } + + + + std::pair<std::size_t,const void*> ColumnarToolWrapperData :: + getColumnVoid (const std::string& name, const std::type_info *type, bool isConst) + { + auto column = m_wrapper->m_columns.find (name); + if (column == m_wrapper->m_columns.end()) + throw std::runtime_error ("unknown column name: " + name); + + if (*type != *column->second.type) + throw std::runtime_error ("invalid type for column: " + name); + if (!isConst && column->second.isConst) + throw std::runtime_error ("retrieving non-const vector from a const column: " + name); + if (m_data[column->second.index] != nullptr) + return std::make_pair (m_dataSize[column->second.index], + m_data[column->second.index]); + else + return std::make_pair (0u, nullptr); + } + + + + void ColumnarToolWrapperData :: + checkColumnsValid () + { + for (auto& column : m_wrapper->m_columns) + checkColumn (column); + } + + + + void ColumnarToolWrapperData :: + call () + { + checkColumnsValid (); + m_wrapper->m_tool->callVoid (m_data.data()); + } + + + + void ColumnarToolWrapperData :: + checkColumn (const std::pair<const std::string,ColumnarToolWrapper::MyColumnInfo>& column) + { + if (m_columnIsChecked.at(column.second.index)) + return; + if (!m_columnIsFilled.at(column.second.index)) + { + if (!column.second.isOptional) + throw std::runtime_error ("column not filled: " + column.first); + m_columnIsChecked[column.second.index] = true; + return; + } + + ColumnarOffsetType expectedSize = 1u; + if (column.second.offsets) + { + checkColumn (*column.second.offsets); + if (m_data[column.second.offsets->second.index] == nullptr) + throw std::runtime_error ("offset column not filled: " + column.second.offsets->first); + const auto offsetIndex = column.second.offsets->second.index; + auto *offsetsPtr = static_cast<const ColumnarOffsetType*>(m_data[offsetIndex]); + expectedSize = offsetsPtr[m_dataSize[offsetIndex]-1]; + } + expectedSize *= column.second.fixedDimensions; + + if (column.second.isOffset) + expectedSize += 1u; + + if (m_dataSize[column.second.index] != expectedSize) + throw std::runtime_error ("column size doesn't match expected size: " + column.first + ", found " + std::to_string (m_dataSize[column.second.index]) + " vs " + std::to_string (expectedSize)); + + if (column.second.isOffset) + { + auto *dataPtr = static_cast<const ColumnarOffsetType*>(m_data[column.second.index]); + if (dataPtr[0] != 0) + throw std::runtime_error ("offset column doesn't start with 0: " + column.first); + } + + m_columnIsChecked[column.second.index] = true; + } + + + + std::vector<std::string> ColumnarToolWrapper :: + getColumnNames () const + { + std::vector<std::string> result; + for (auto& column : m_columns) + result.push_back (column.first); + std::sort (result.begin(), result.end()); + return result; + } + + + + [[nodiscard]] std::vector<ColumnInfo> ColumnarToolWrapper :: + getColumnInfo () const + { + return m_tool->getColumnInfo(); + } +} -- GitLab From 7a75502fb8a67ec6f572fc9ad1bdda6ff51cac12 Mon Sep 17 00:00:00 2001 From: Nils Krumnack <nils.erik.krumnack@cern.ch> Date: Tue, 18 Mar 2025 10:32:30 -0500 Subject: [PATCH 2/4] fix Athena compilation failure --- .../Columnar/ColumnarTestFixtures/CMakeLists.txt | 7 ++++++- .../ColumnarTestFixtures/ColumnarMemoryTest.h | 1 + .../ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt index bc8f79554b5b..cd7628b583aa 100644 --- a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/CMakeLists.txt @@ -6,10 +6,15 @@ atlas_subdir( ColumnarTestFixtures ) find_package( GTest ) +set (extra_libs) +if (XAOD_STANDALONE) + set (extra_libs ComponentFactoryPreloaderLib) +endif () + # Add the shared library: atlas_add_library (ColumnarTestFixturesLib ColumnarTestFixtures/*.h Root/*.cxx PUBLIC_HEADERS ColumnarTestFixtures INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} LINK_LIBRARIES ${GTEST_LIBRARIES} ColumnarToolWrapperLib PATInterfaces AsgTestingLib - PRIVATE_LINK_LIBRARIES ComponentFactoryPreloaderLib ColumnarCoreLib) + PRIVATE_LINK_LIBRARIES ${extra_libs} ColumnarCoreLib) diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h index 00825f6f2de8..6ed83103c376 100644 --- a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h @@ -17,6 +17,7 @@ #include <ColumnarToolWrapper/ColumnarToolWrapper.h> #include <PATInterfaces/ISystematicsTool.h> #include <PATInterfaces/SystematicsUtil.h> +#include <span> namespace columnar { diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx index ecfc8e6d5abd..d0229616316c 100644 --- a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx @@ -12,7 +12,10 @@ #include <ColumnarTestFixtures/ColumnarMemoryTest.h> #include <ColumnarCore/ColumnarDef.h> + +#ifdef XAOD_STANDALONE #include <ComponentFactoryPreloader/ComponentFactoryPreloader.h> +#endif // // method implementations @@ -23,6 +26,7 @@ namespace columnar ColumnarMemoryTest :: ColumnarMemoryTest () { +#ifdef XAOD_STANDALONE static std::once_flag flag; std::call_once (flag, [] () { @@ -31,6 +35,7 @@ namespace columnar // issue with that, and this seems to be working. CP::preloadComponentFactories(); }); +#endif } -- GitLab From b5339e59b53cdfb667f31948e2a9773ab02dd680 Mon Sep 17 00:00:00 2001 From: Nils Krumnack <nils.erik.krumnack@cern.ch> Date: Tue, 25 Mar 2025 03:58:34 -0500 Subject: [PATCH 3/4] fix compiler warnings in Athena build --- .../ColumnarTestFixtures/ColumnarMemoryTest.h | 44 +++++++++---------- .../Root/ColumnarMemoryTest.cxx | 38 ++++++++-------- .../ColumnarToolWrapper/CMakeLists.txt | 3 +- .../Root/ColumnarToolWrapper.cxx | 7 ++- 4 files changed, 48 insertions(+), 44 deletions(-) diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h index 6ed83103c376..9042a1eb3059 100644 --- a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/ColumnarTestFixtures/ColumnarMemoryTest.h @@ -53,7 +53,7 @@ namespace columnar explicit ColumnarTestToolHandle (asg::AsgTool& val_tool); /// rename the columns the tool uses - void renameContainers (const std::vector<std::pair<std::string,std::string>> renames); + void renameContainers (const std::vector<std::pair<std::string,std::string>>& renames); /// initialize the tool void initialize (); @@ -74,7 +74,7 @@ namespace columnar [[nodiscard]] const ColumnarToolWrapper& getToolWrapper () const; /// get the contained tool - [[nodiscard]] IColumnarTool* getTool () const; + [[nodiscard]] IColumnarTool* getTool (); @@ -128,19 +128,19 @@ namespace columnar std::vector<T> typedData; for (auto& value : data) typedData.emplace_back (extractAny<T> (name, value)); - inputs.emplace (name, std::move (typedData)); + m_inputs.emplace (name, std::move (typedData)); } template<typename T> void addTypedColumn (const std::string& name, std::vector<T> data) { - auto column = columnMap.find (name); - if (column == columnMap.end()) + auto column = m_columnMap.find (name); + if (column == m_columnMap.end()) throw std::runtime_error ("adding unknown column: " + name); - if (inputs.contains (name)) + if (m_inputs.contains (name)) throw std::runtime_error ("column added twice: " + name); if (column->second.type != &typeid(T)) throw std::runtime_error ("column " + name + " has wrong type: " + column->second.type->name()); - inputs.emplace (name, std::move (data)); + m_inputs.emplace (name, std::move (data)); } template<typename T> void addExpectationTyped (const std::string& name, std::vector<std::any> data) @@ -148,18 +148,18 @@ namespace columnar std::vector<T> typedData; for (auto& value : data) typedData.emplace_back (extractAny<T> (name, value)); - expectations.emplace (name, std::move (typedData)); + m_expectations.emplace (name, std::move (typedData)); } ColumnarOffsetType columnSize (const std::string& name); template<typename T> std::span<const T> getOutputColumn (const std::string& name) { - auto info = columnMap.find (name); - if (info == columnMap.end()) + auto info = m_columnMap.find (name); + if (info == m_columnMap.end()) throw std::runtime_error ("output column not found: " + name); - auto iter = activeColumns.find (name); - if (iter == activeColumns.end()) + auto iter = m_activeColumns.find (name); + if (iter == m_activeColumns.end()) throw std::runtime_error ("output column not set: " + name); if (!std::holds_alternative<std::vector<T>> (iter->second)) throw std::runtime_error ("output column has wrong type: " + name); @@ -168,15 +168,15 @@ namespace columnar template<typename T> void checkExpectationTyped (const std::string& columnName) { - auto outputIter = activeColumns.find (columnName); - if (outputIter == activeColumns.end()) + auto outputIter = m_activeColumns.find (columnName); + if (outputIter == m_activeColumns.end()) throw std::runtime_error ("output column not set: " + columnName); if (!std::holds_alternative<std::vector<T>> (outputIter->second)) throw std::runtime_error ("output column has wrong type: " + columnName); auto& output = std::get<std::vector<T>> (outputIter->second); - auto expectationIter = expectations.find (columnName); - if (expectationIter == expectations.end()) + auto expectationIter = m_expectations.find (columnName); + if (expectationIter == m_expectations.end()) throw std::runtime_error ("output column not found: " + columnName); if (!std::holds_alternative<std::vector<T>> (expectationIter->second)) throw std::runtime_error ("output column has wrong type: " + columnName); @@ -192,7 +192,7 @@ namespace columnar else EXPECT_EQ (output[index], expectation[index]); } - std::cout << " columnMap.setExpectation (\"" << columnName << "\", {"; + std::cout << " m_columnMap.setExpectation (\"" << columnName << "\", {"; for (std::size_t index = 0; index != expectation.size(); ++ index) { if (index != 0) @@ -207,15 +207,15 @@ namespace columnar std::cout << "});" << std::endl; } - ColumnarTestToolHandle* toolHandle = nullptr; + ColumnarTestToolHandle* m_toolHandle = nullptr; std::unique_ptr<ColumnarToolWrapperData> m_columnData; - std::unordered_map<std::string,const ColumnInfo> columnMap; + std::unordered_map<std::string,const ColumnInfo> m_columnMap; - std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> inputs; - std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> activeColumns; - std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> expectations; + std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> m_inputs; + std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> m_activeColumns; + std::unordered_map<std::string, std::variant<std::vector<float>,std::vector<char>,std::vector<int>,std::vector<std::uint8_t>,std::vector<std::uint16_t>,std::vector<std::uint32_t>,std::vector<std::uint64_t>>> m_expectations; }; } diff --git a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx index d0229616316c..3c35ca22e174 100644 --- a/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx +++ b/PhysicsAnalysis/Columnar/ColumnarTestFixtures/Root/ColumnarMemoryTest.cxx @@ -69,7 +69,7 @@ namespace columnar void ColumnarMemoryTest::ColumnarTestToolHandle :: - renameContainers (const std::vector<std::pair<std::string,std::string>> renames) + renameContainers (const std::vector<std::pair<std::string,std::string>>& renames) { columnar::renameContainers (*m_tool, renames); if (m_toolWrapper) @@ -145,7 +145,7 @@ namespace columnar [[nodiscard]] IColumnarTool* ColumnarMemoryTest::ColumnarTestToolHandle :: - getTool () const + getTool () { return m_tool; } @@ -154,11 +154,11 @@ namespace columnar ColumnarMemoryTest::ColumnMapType :: ColumnMapType (ColumnarTestToolHandle& val_toolHandle) - : toolHandle (&val_toolHandle), m_columnData (std::make_unique<ColumnarToolWrapperData> (&val_toolHandle.getToolWrapper())) + : m_toolHandle (&val_toolHandle), m_columnData (std::make_unique<ColumnarToolWrapperData> (&val_toolHandle.getToolWrapper())) { - for (auto& column : toolHandle->getColumnInfo()) + for (auto& column : m_toolHandle->getColumnInfo()) { - auto [iter, success] = columnMap.emplace (column.name, ColumnInfo {column}); + auto [iter, success] = m_columnMap.emplace (column.name, ColumnInfo {column}); if (!success) throw std::runtime_error ("column already exists: " + column.name); } @@ -169,10 +169,10 @@ namespace columnar void ColumnarMemoryTest::ColumnMapType :: addColumn (const std::string& name, std::vector<std::any> data) { - auto column = columnMap.find (name); - if (column == columnMap.end()) + auto column = m_columnMap.find (name); + if (column == m_columnMap.end()) throw std::runtime_error ("adding unknown column: " + name); - if (inputs.contains (name)) + if (m_inputs.contains (name)) throw std::runtime_error ("column added twice: " + name); if (column->second.type == &typeid(float)) addInputTyped<float> (name, data); @@ -197,10 +197,10 @@ namespace columnar void ColumnarMemoryTest::ColumnMapType :: setExpectation (const std::string& name, std::vector<std::any> values) { - auto column = columnMap.find (name); - if (column == columnMap.end()) + auto column = m_columnMap.find (name); + if (column == m_columnMap.end()) throw std::runtime_error ("adding unknown column: " + name); - if (expectations.contains (name)) + if (m_expectations.contains (name)) throw std::runtime_error ("column added twice: " + name); if (column->second.type == &typeid(float)) addExpectationTyped<float> (name, values); @@ -225,8 +225,8 @@ namespace columnar ColumnarOffsetType ColumnarMemoryTest::ColumnMapType :: columnSize (const std::string& name) { - auto iter = inputs.find (name); - if (iter == inputs.end()) + auto iter = m_inputs.find (name); + if (iter == m_inputs.end()) throw std::runtime_error ("column not found: " + name); return std::visit ([] (const auto& data) { return data.size(); }, iter->second); } @@ -236,9 +236,9 @@ namespace columnar void ColumnarMemoryTest::ColumnMapType :: connectColumnsToTool () { - for (auto& [name, columnInfo] : columnMap) + for (auto& [name, columnInfo] : m_columnMap) { - if (auto iter = inputs.find (columnInfo.name); iter != inputs.end()) + if (auto iter = m_inputs.find (columnInfo.name); iter != m_inputs.end()) { switch (columnInfo.accessMode) { @@ -253,7 +253,7 @@ namespace columnar std::visit ([&] (auto& data) { m_columnData->setColumn (iter->first, data.size(), data.data()); - }, activeColumns[iter->first] = iter->second); + }, m_activeColumns[iter->first] = iter->second); break; default: throw std::runtime_error ("column mode unknown: " + columnInfo.name); @@ -275,13 +275,13 @@ namespace columnar void ColumnarMemoryTest::ColumnMapType :: checkExpectations () { - for (auto& [name, info] : columnMap) + for (auto& [name, info] : m_columnMap) { if (info.accessMode == ColumnAccessMode::input) continue; - auto iter = activeColumns.find (name); - if (iter == activeColumns.end()) + auto iter = m_activeColumns.find (name); + if (iter == m_activeColumns.end()) continue; if (info.type == &typeid(float)) diff --git a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt index 11bec047650a..e367948bdfb7 100644 --- a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt +++ b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/CMakeLists.txt @@ -8,4 +8,5 @@ atlas_subdir( ColumnarToolWrapper ) atlas_add_library (ColumnarToolWrapperLib ColumnarToolWrapper/*.h Root/*.cxx PUBLIC_HEADERS ColumnarToolWrapper - LINK_LIBRARIES ColumnarInterfacesLib ) + LINK_LIBRARIES ColumnarInterfacesLib + PRIVATE_LINK_LIBRARIES CxxUtils ) diff --git a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx index 298ed741441b..b48d37c8c0b3 100644 --- a/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx +++ b/PhysicsAnalysis/Columnar/ColumnarToolWrapper/Root/ColumnarToolWrapper.cxx @@ -13,6 +13,7 @@ #include <ColumnarInterfaces/ColumnInfo.h> #include <ColumnarInterfaces/IColumnarTool.h> +#include <CxxUtils/checker_macros.h> #include <algorithm> #include <cstdint> #include <stdexcept> @@ -134,7 +135,8 @@ namespace columnar if (m_columnIsFilled[column->second.index]) throw std::runtime_error ("column filled multiple times: " + name); m_columnIsFilled[column->second.index] = true; - m_data[column->second.index] = const_cast<void*>(dataPtr); + auto *castDataPtr ATLAS_THREAD_SAFE = const_cast<void*>(dataPtr); + m_data[column->second.index] = castDataPtr; m_dataSize[column->second.index] = size; } @@ -156,7 +158,8 @@ namespace columnar if (m_columnIsFilled[column->second.index]) throw std::runtime_error ("column filled multiple times: " + name); m_columnIsFilled[column->second.index] = true; - m_data[column->second.index] = const_cast<void*>(dataPtr); + auto *castDataPtr ATLAS_THREAD_SAFE = const_cast<void*>(dataPtr); + m_data[column->second.index] = castDataPtr; m_dataSize[column->second.index] = size; } -- GitLab From 33a8ae9cf5b27bafcace9087aefdc3d746c5dbd6 Mon Sep 17 00:00:00 2001 From: Nils Krumnack <nils.erik.krumnack@cern.ch> Date: Tue, 25 Mar 2025 08:17:34 -0500 Subject: [PATCH 4/4] remove ATLAS_NO_CHECK_FILE_THREAD_SAFETY from test --- .../ColumnarExampleTools/test/gt_ColumnarToolTests.cxx | 3 --- 1 file changed, 3 deletions(-) diff --git a/PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx b/PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx index 40d6aedc5b76..66125fc46a75 100644 --- a/PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx +++ b/PhysicsAnalysis/Columnar/ColumnarExampleTools/test/gt_ColumnarToolTests.cxx @@ -10,9 +10,6 @@ // includes // -#include "CxxUtils/checker_macros.h" -ATLAS_NO_CHECK_FILE_THREAD_SAFETY; - #include <AsgTesting/UnitTest.h> #include <AsgTools/AsgToolConfig.h> #include <ColumnarTestFixtures/ColumnarMemoryTest.h> -- GitLab