From ada7fce3a64a86ad9f0dd3b51fde3ff26d2d36cb Mon Sep 17 00:00:00 2001
From: Ho Chun Lau <ho.chun@cern.ch>
Date: Tue, 7 Jan 2025 15:13:56 +0100
Subject: [PATCH] first version of a run script for the CP Algorithms

Added printing flags, many flags are unavailable by only initiating initConfigFlags. Added cmake test for CPRun.py
---
 .../AnalysisAlgorithmsConfig/CMakeLists.txt   |  17 +++
 .../AnalysisAlgorithmsConfig/scripts/CPRun.py | 136 ++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100755 PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/scripts/CPRun.py

diff --git a/PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/CMakeLists.txt b/PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/CMakeLists.txt
index f01ab75f2f2..950cb36cf07 100644
--- a/PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/CMakeLists.txt
+++ b/PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/CMakeLists.txt
@@ -7,8 +7,12 @@ atlas_subdir( AnalysisAlgorithmsConfig )
 atlas_install_python_modules( python/*.py )
 atlas_install_data( data/* )
 
+set( CONFIG_PATH "${CMAKE_CURRENT_LIST_DIR}/data/test_configuration_Run2.yaml" )
+set( ASG_TEST_FILE_MC "/cvmfs/atlas-nightlies.cern.ch/repo/data/data-art/ASG/DAOD_PHYS/p6490/mc20_13TeV.410470.PhPy8EG_A14_ttbar_hdamp258p75_nonallhad.deriv.DAOD_PHYS.e6337_s3681_r13167_r13146_p6490/DAOD_PHYS.41651753._000001.pool.root.1")
+
 if( XAOD_STANDALONE )
    atlas_install_scripts( scripts/*_eljob.py )
+   atlas_install_scripts( scripts/CPRun.py )
 else()
    atlas_install_scripts( scripts/*_CA.py POST_BUILD_CMD ${ATLAS_FLAKE8} )
 endif()
@@ -22,3 +26,16 @@ atlas_add_test( ConfigTextCompareBlock
    SCRIPT python/ConfigText_unitTest.py --text-config AnalysisAlgorithmsConfig/test_configuration_Run2.yaml --compare-block --check-order
    POST_EXEC_SCRIPT nopost.sh
    PROPERTIES TIMEOUT 30 )
+
+if( XAOD_STANDALONE )
+
+   # this test is for testing that the algorithm monitors defined in EventLoop
+   # don't break a job of reasonable complexity.  they are tested here instead of
+   # in the EventLoop package, because we have a much more complex payload here.
+   atlas_add_test( TestCPRunFullSim
+	   SCRIPT CPRun.py --work-dir cmake_test --text-config AnalysisAlgorithmsConfig/test_configuration_Run2.yaml -e 150 --input-file ${ASG_TEST_FILE_MC}
+	   POST_EXEC_SCRIPT nopost.sh
+	   PROPERTIES TIMEOUT 900 
+	   )
+
+endif()
\ No newline at end of file
diff --git a/PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/scripts/CPRun.py b/PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/scripts/CPRun.py
new file mode 100755
index 00000000000..548d6f8b7f3
--- /dev/null
+++ b/PhysicsAnalysis/Algorithms/AnalysisAlgorithmsConfig/scripts/CPRun.py
@@ -0,0 +1,136 @@
+#! /usr/bin/env python
+
+# Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
+import argparse
+import logging
+from AthenaConfiguration.AllConfigFlags import initConfigFlags
+from AnalysisAlgorithmsConfig.ConfigText import TextConfig
+import ROOT
+from ROOT import PathResolver
+from AnaAlgorithm.AlgSequence import AlgSequence
+from AnalysisAlgorithmsConfig.ConfigAccumulator import ConfigAccumulator
+
+logger = logging.getLogger(__name__)
+
+def parseArguments():
+    parser = argparse.ArgumentParser(
+        description='Runscript for CP Algorithm unit tests')
+    parser.add_argument('--input-list', dest='input_list',
+                        help='path to text file containing list of input files')
+    parser.add_argument('--work-dir', dest='work_dir', default='workDir',
+                        help='path to work directory, containing output and intermediate files')
+    parser.add_argument('-e', '--max-events', dest='max_events',
+                        type=int, default=-1, help='Number of events to run')
+    parser.add_argument('-t', '--text-config', dest='text_config',
+                        help='path to the YAML configuration file')
+    parser.add_argument('--no-systematics', dest='no_systematics',
+                        action='store_true', help='Disable systematics')
+    parser.add_argument('--direct-driver', dest='direct_driver',
+                        action='store_true', help='Run the job with the direct driver')
+    parser.add_argument('--input-file', dest='input_file',
+                        help='path to a single input file')
+    args = parser.parse_args()
+    return args
+
+def parseInputFileList(path):
+    files = []
+    with open(path, 'r') as inputText:
+        for line in inputText.readlines():
+            # skip comments and empty lines
+            if line.startswith('#') or not line.strip():
+                continue
+            files += line.split(',')
+        # remove leading/trailing whitespaces, and \n
+        files = [file.strip() for file in files]
+    return files
+
+def makeJob(sampleHandler, args):
+    job = ROOT.EL.Job()
+    job.sampleHandler(sampleHandler)    
+    job.options().setDouble(ROOT.EL.Job.optMaxEvents, args.max_events)
+    job.options().setString(ROOT.EL.Job.optSubmitDirMode, 'unique-link')
+    return job
+
+# copied from makeTextAlgSequence
+
+
+def readYamlConfig(yaml_path):
+    yamlconfig = PathResolver.find_file(
+        yaml_path, "CALIBPATH", PathResolver.RecursiveSearch)
+    if not yamlconfig:
+        raise FileNotFoundError(f'PathResolver failed to locate \"{yaml_path}\" config file!'
+                                'Check if you have a typo in -t/--text-config argument or missing file in the analysis configuration sub-directory.')
+    logger.info("Setting up configuration based on YAML config:")
+    config = TextConfig(yamlconfig)
+    return config
+
+
+def makeAlgSequence(config, args, flags):
+    algSeq = AlgSequence()
+    logger.info("Configuring algorithms based on YAML file")
+    configSeq = config.configure()
+    logger.info("Configuring common services")
+    configAccumulator = ConfigAccumulator(autoconfigFromFlags=flags,
+                                          algSeq=algSeq,
+                                          noSystematics=args.no_systematics)
+    logger.info("Configuring algorithms")
+    configSeq.fullConfigure(configAccumulator)
+    return algSeq
+
+#copy from TopCP toolkit
+def prettyPrint(flags):
+    """
+    Print all the relevant flags we have set up, both from the
+    metadata and from our fall-back options.
+    """
+    logger.info("="*73)
+    logger.info("="*20 + "FLAG CONFIGURATION" + "="*20)
+    logger.info("="*73)
+    logger.info("   Input files:     %s", flags.Input.isMC)
+    logger.info("   RunNumber:       %s", flags.Input.RunNumbers)
+    logger.info("   MCCampaign:      %s", flags.Input.MCCampaign)
+    logger.info("   GeneratorInfo:   %s", flags.Input.GeneratorsInfo)
+    logger.info("="*73)
+
+def main():
+    args = parseArguments()
+    inputList = parseInputFileList(args.input_list) if args.input_list else [args.input_file]
+
+    ROOT.xAOD.Init().ignore()
+    sampleHandler = ROOT.SH.SampleHandler()
+
+    sampleFiles = ROOT.SH.SampleLocal()
+    logger.info("Adding files to the sample handler")
+    for file in inputList:
+        sampleFiles.add(file)
+    sampleHandler.add(sampleFiles)
+
+    logger.info("Reading file metadata")
+    flags = initConfigFlags()
+    flags.Input.Files = inputList
+    flags.lock()
+    prettyPrint(flags)
+
+    config = readYamlConfig(args.text_config)
+    algSeq = makeAlgSequence(config, args, flags)
+    logger.info("Alg Sequence: %s", algSeq)
+
+    job = makeJob(sampleHandler, args)
+    for alg in algSeq:
+        job.algsAdd(alg)
+    job.outputAdd(ROOT.EL.OutputStream('ANALYSIS'))
+
+    driver = ROOT.EL.DirectDriver() if args.direct_driver else ROOT.EL.ExecDriver()
+    driver.submit(job, args.work_dir)
+
+    return
+# test
+
+
+if __name__ == '__main__':
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        datefmt="%Y-%m-%d %H:%M:%S"
+    )
+    main()
-- 
GitLab