diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fb60ca72a7c624abce3e2f26edede713ff050ac8..c331573a9725750eb56c0cb54b8a6eb4381522a3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,12 +4,20 @@ compilation: stage: build tags: - cvmfs - before_script: - - cd /lib64 - - ln -s libssl.so.1.1.1k libssl.so - - ln -s libcrypto.so.1.1.1k libcrypto.so - - cd - script: - source /cvmfs/cms.cern.ch/cmsset_default.sh - - yum install -y python3 zsh + - yum install -y openssl-devel python3 zsh - make + +cmake: + stage: build + tags: + - cvmfs + script: + - source /cvmfs/cms.cern.ch/cmsset_default.sh + - yum install -y cmake3 openssl-devel python3 zsh + - ./install.sh + - cd CMSSW*/ + - shopt -s expand_aliases # allows aliases like cmsenv to be used + - cmsenv + - scram b -j$(nproc) diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b966ebdf3d8ace07b87c6c874e36028d012e1bf --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPLv3-or-later +# +# SPDX-FileCopyrightText: Louis Moureaux <louis.moureaux@cern.ch> + +cmake_minimum_required(VERSION 3.17...3.28 FATAL_ERROR) + +# Set a useful default install directory but let the user override it +set(CMAKE_INSTALL_PREFIX "${CMAKE_SOURCE_DIR}/tools" CACHE PATH + "Install path prefix, prepended onto install directories.") + +# Also set a default build type - optimized build with debug information +set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING + "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...") + +project(Installer VERSION 1.0 LANGUAGES C) # Need a language for GNUInstallDirs + +include(ExternalProject) +include(GNUInstallDirs) + +install(FILES libgit2.xml TYPE SYSCONF) + +set(TUNFOLD_HEADERS TUnfold.h + TUnfoldBinning.h + TUnfoldBinningXML.h + TUnfoldDensity.h + TUnfoldIterativeEM.h + TUnfoldSys.h +) +ExternalProject_Add(TUnfold + URL https://www.desy.de/~sschmitt/TUnfold/TUnfold_V17.9.tgz + URL_HASH SHA256=d7f66f6a0e007eb946180643b8879bb2b8918441106bc0305b82a97391a391dc + SOURCE_DIR "${CMAKE_SOURCE_DIR}/TUnfold" + CONFIGURE_COMMAND "" + BUILD_COMMAND make lib TUNFOLDVERSION='V17' + BUILD_IN_SOURCE TRUE + INSTALL_COMMAND install -DT libunfold.so <INSTALL_DIR>/${CMAKE_INSTALL_LIBDIR}/libtunfold.so + COMMAND install TUnfoldV17Dict_rdict.pcm <INSTALL_DIR>/${CMAKE_INSTALL_LIBDIR}/ + COMMAND install -d <INSTALL_DIR>/${CMAKE_INSTALL_INCLUDEDIR}/TUnfold + COMMAND install ${TUNFOLD_HEADERS} <INSTALL_DIR>/${CMAKE_INSTALL_INCLUDEDIR}/TUnfold/ + INSTALL_DIR "${CMAKE_INSTALL_PREFIX}" +) +configure_file(tunfold.xml.in tunfold.xml) +install(FILES "${CMAKE_BINARY_DIR}/tunfold.xml" TYPE SYSCONF) + +ExternalProject_Add(Darwin + GIT_REPOSITORY https://gitlab.cern.ch/Proto/Darwin.git + GIT_TAG origin/master + SOURCE_DIR "${CMAKE_SOURCE_DIR}/Darwin" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + INSTALL_DIR "${CMAKE_INSTALL_PREFIX}" + TEST_AFTER_INSTALL ON +) +configure_file(darwin.xml.in darwin.xml) +install(FILES "${CMAKE_BINARY_DIR}/darwin.xml" TYPE SYSCONF) + +ExternalProject_Add(tables + GIT_REPOSITORY https://gitlab.cern.ch/cms-analysis/general/DasAnalysisSystem/tables.git + GIT_TAG origin/master + SOURCE_DIR "${CMAKE_SOURCE_DIR}/tables" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" +) diff --git a/Makefile b/Makefile index 5a108b25aadcb4f945e94518fe69444a1cb94ef0..3210ae335f53ca94486c87a4c5e094577bce5d75 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,14 @@ CMSSW ?= CMSSW_12_4_0 GITLAB ?= https://gitlab.cern.ch -ORIGIN ?= $(GITLAB)/DasAnalysisSystem/Core.git +ORIGIN ?= $(GITLAB)/cms-analysis/general/DasAnalysisSystem/Core.git BASE ?= $(PWD) -TABLES ?= $(GITLAB)/DasAnalysisSystem/tables.git +TABLES ?= $(GITLAB)/cms-analysis/general/DasAnalysisSystem/tables.git .PHONY: all clean .NOTPARALLEL: all -all: $(CMSSW) libgit2 TUnfold ProtoDarwin tables +all: $(CMSSW) libgit2 TUnfold Darwin tables cd $(CMSSW) && \ eval `scramv1 runtime -sh` && \ scram b @@ -25,7 +25,7 @@ TUnfold: $(CMSSW) cd $(BASE)/$(CMSSW) && scram setup $(BASE)/tunfold.xml PlottingHelper: $(CMSSW) - git clone $(GITLAB)/DasAnalysisSystem/PlottingHelper.git + git clone $(GITLAB)/cms-analysis/general/DasAnalysisSystem/PlottingHelper.git cd $(CMSSW) && eval `scramv1 runtime -sh` && cd - && \ make -C PlottingHelper all -j2 @@ -36,12 +36,12 @@ FastNLO: # TODO # TODO: improve make statement & better interface with content of xml file (using `xmllint` --> check availability in CI) -ProtoDarwin: $(CMSSW) libgit2 tables - git clone $(GITLAB)/paconnor/ProtoDarwin.git +Darwin: $(CMSSW) libgit2 tables + git clone $(GITLAB)/Proto/Darwin.git cd $(CMSSW) && eval `scramv1 runtime -sh` && \ BOOST=$$(scram tool tag boost_header BOOSTHEADER_BASE) && \ - cd $(BASE)/ProtoDarwin && make -j BOOST=$$BOOST && \ - cd $(BASE)/$(CMSSW) && scram setup $(BASE)/protodarwin.xml + cd $(BASE)/Darwin && make -j BOOST=$$BOOST && \ + cd $(BASE)/$(CMSSW) && scram setup $(BASE)/darwin.xml libgit2: $(CMSSW) cd $(BASE)/$(CMSSW) && eval `scramv1 runtime -sh` && scram setup $(BASE)/libgit2.xml @@ -50,10 +50,11 @@ $(CMSSW): scramv1 project CMSSW $(CMSSW) cd $(CMSSW)/src && eval `scramv1 runtime -sh` && \ git clone https://github.com/cms-jet/JetToolbox.git JMEAnalysis/JetToolbox -b jetToolbox_120X && \ + (cd JMEAnalysis/JetToolbox; patch -p1 ../../../../jetToolbox_120X.patch) && \ git clone $(ORIGIN) Core tables: git clone $(TABLES) $@ clean: - @rm -rf $(CMSSW) TUnfold ProtoDarwin tables + @rm -rf $(CMSSW) TUnfold Darwin tables diff --git a/README.md b/README.md index 35311447890cd886f828adca820a028fbe4c6869..58059585de3a6a421ec35085d9771c76f1e5a781 100644 --- a/README.md +++ b/README.md @@ -2,46 +2,40 @@ Das Analysis System is a general project involving modular tools for physics analysis with high-level objects such as jets, leptons, and photons. The principle is to perform physics directly from the shell in atomic steps, applying one correction at a time and treating all systematic uncertainties simultaneously. -The group is divided in several repositories: +The GitLab group is divided in several repositories: +- The `Installer` repo contains the necessary scripts to install the suite. - The `Core` repo corresponds to a CMSSW module and is where most of the code for the analysis of CMS data is to be found. - The `Tables` repo contains the calibration of the high-level objects. -- The `Installer` repo contains the necessary scripts to install the suite. +- The `Darwin` repo is a mirror of a [general toolkit](https://protodarwin.docs.cern.ch) for physics analysis. - `PlottingHelper` is a useful [library](https://github.com/zleba/PlottingHelper) originally made by Radek ZlebcÃk (Charles University) to help make plots with ROOT. -- The `Darwin` repo is a mirror of a [non-CMS toolkit](https://protodarwin.docs.cern.ch) for physics analysis. -The `Campaigns` subgroup contains actual repos corresponding to different analyses with configs, plotting macros, and possibly CRAB outputs, or anything relevant to the reproducibility of an analysis. +The `Campaigns` subgroup contains actual repos corresponding to different analyses with configs, plotting macros, and possibly CRAB outputs, or anything relevant to the reproducibility of an analysis. A template repo is provided with suggestions and guidelines. ## Installation -### From scratch +### From scratch with the default installer -In general, it is recommended that you install the software on a fast disk to ensure fast compilation, but that you process the heavy n-tuples on a dedicated area (e.g. NFS at DESY, EOS at CERN). +In general, it is recommended that you install the software on a fast disk to ensure fast compilation (e.g. AFS), but that you process the heavy n-tuples on a dedicated area (e.g. NFS at DESY, EOS at CERN). Keeping the software neat and clean is important for reproducibility. -First clone the installer: -``` -git clone -``` -Then source the minimal environment for CMSSW, CRAB, and RUCIO: +1. Clone the `Installer` in a directory (we propose `DasAnalysisSystem`). Then source the minimal environment for CMSSW, CRAB, and RUCIO from that directory: ``` cd DasAnalysisSystem source ./setup ``` It is currently working at CERN (CH), at DESY (DE), at IIHE (BE), and for GitLab CI. Feel free to make a merge request to include your favourite facility. - -Finally, the installation is trivial: +2. Run the installation: ``` -make +./install.sh ``` -This takes a few minutes, then you're all set. - -#### Remarks - -1. You may overwrite (at your own risks) the CMSSW release by running `make CMSSW=CMSSW_X_Y_Z` where you tune `X`, `Y`, and `Z` to the values of the release that you want to test. This may be necessary to run over certain data sets; however, we cannot guarantee that the code will compile nor provide the same result as in the default release. -2. `git-cms-init` is not run, since it is a priori not useful for the framework to run, takes 100MB of space, and makes the installation slower. If it may happen to be useful, you have to add it in the Makefile. It must be run right after setting up the release, when it is locally still empty. -3. To use SSH instead of HTTPS, run `make GITLAB=ssh://git@gitlab.cern.ch:7999`. -4. The `PlottingHelper` is not cloned by default as it is only useful for plotting purposes. +*Remark*: if a CMSSW release has already been sourced, it will use that one instead of installing a new one. +3. Then change to the directory of the CMSSW release (by default, it is created in the local directory) and compile as follows: +``` +cmsenv +scram b -j$(nproc) +``` +4. After a few minutes, you're all set. You may have to rerun `cmsenv` to actually see the commands in the shell. -### From an existing CMSSW release or from an existing installation. +### By hand, step by step In the following, we explain the installation of the framework step by step. A few packages external to CMSSW are necessary (e.g. TUnfold), which you will need to tell CMSSW how to find them with `scram setup`. @@ -56,35 +50,26 @@ tar xvzf TUnfold_V17.9.tgz make lib TUNFOLDVERSION='V17' -j mv -f libunfold.so lib/libtunfold.so mv -f TUnfoldV17Dict_rdict.pcm lib/ -cd $CMSSW_BASE -scram setup $OLDPWD/../tunfold.xml -cd $OLDPWD/.. +(cd $CMSSW_BASE && scram setup $OLDPWD/../tunfold.xml) ``` 2. Tell CMSSW where to find libgit2: ``` -cd $CMSSW_BASE -scram setup $OLDPWD/libgit2.xml -cd $OLDPWD +(cd $CMSSW_BASE && scram setup $OLDPWD/libgit2.xml) ``` -3. Install ProtoDarwin: +3. Install Darwin: ``` -git clone $(GITLAB)/paconnor/ProtoDarwin.git +git clone https://gitlab.cern.ch/Proto/Darwin.git cmake3 -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$PWD/build/install cmake3 --build build -j`nproc` cmake3 --build build --target install -cd $CMSSW_BASE -scram setup $OLDPWD/../protodarwin.xml -cd $OLDPWD/.. +(cd $CMSSW_BASE && scram setup $OLDPWD/../darwin.xml) ``` -4. Clone the `Tables` wherever you like and the `Core` in CMSSW, and compile as usual with `scram b -j`. - -#### Remark - -If you want to rerun jet clustering (e.g. to have AK8 jets at a low transverse momentum), you need to clone the [JetToolbox](https://github.com/cms-jet/JetToolbox.git): +4. If you want to rerun jet clustering (e.g. to have AK8 jets at a low transverse momentum), you need to clone the branch `jetToolbox_120X` from the [JetToolbox](https://github.com/cms-jet/JetToolbox.git), and apply a minor patch with the following commands: ``` -git clone https://github.com/cms-jet/JetToolbox.git JMEAnalysis/JetToolbox -b jetToolbox_120X +cd $CMSSW_BASE/src/JMEAnalysis/JetToolbox +patch -p1 $OLDPWD/jetToolbox_120X.patch ``` -That version needs a minor [amendment](https://gitlab.cern.ch/DasAnalysisSystem/gitlab-profile/-/issues/5) to run. +5. Clone the `Tables` wherever you like and the `Core` in CMSSW, and compile as usual with `scram b -j`. ## Setting up the environment @@ -95,14 +80,14 @@ To run CRAB jobs, you will also need to set up a valid [grid certificate](https: voms-proxy-init --rfc --voms cms -valid 192:00 ``` -To make RUCIO requests, the `setup` file tried to guess your RUCIO username from your local username (unless was already set up). This is not guaranteed to work and you may have to define your RUCIO username ahead for sourcing the DAS environment. +To make [RUCIO](https://twiki.cern.ch/twiki/bin/viewauth/CMS/Rucio) requests, the `setup` file tried to guess your RUCIO username from your local username (unless was already set up). This is not guaranteed to work and you may have to define your RUCIO username ahead for sourcing the DAS environment. ### Good practices Fight against the increase of entropy by - pulling regularly, -- commiting regularly, +- committing regularly, - document your code, -- making regular merge requests. +- making frequent merge requests. Happy analysis! diff --git a/protodarwin.xml b/darwin.xml similarity index 93% rename from protodarwin.xml rename to darwin.xml index 40a39375589cbe2478d702f0d80be13d59e4652b..3d5d029d0893ecc63287aca2cb48eb0d864046f4 100644 --- a/protodarwin.xml +++ b/darwin.xml @@ -6,7 +6,7 @@ <lib name="DarwinDict"/> <info url="https://protodarwin.docs.cern.ch"/> <client> - <environment name="DARWIN" default="$CMSSW_BASE/../ProtoDarwin"/> + <environment name="DARWIN" default="$CMSSW_BASE/../Darwin"/> <environment name="INCLUDE" default="$DARWIN/interface"/> <environment name="LIBDIR" default="$DARWIN/build/lib"/> </client> diff --git a/darwin.xml.in b/darwin.xml.in new file mode 100644 index 0000000000000000000000000000000000000000..49896af108c40ac585dd543f0735c0e91d5b7da0 --- /dev/null +++ b/darwin.xml.in @@ -0,0 +1,21 @@ +<tool name="protodarwin" version="1.0"> <!-- TODO --> + <lib name="DarwinUserInfo"/> + <lib name="DarwinMetaInfo"/> + <lib name="DarwinOptions"/> + <lib name="DarwinObjects"/> + <info url="https://protodarwin.docs.cern.ch"/> + <client> + <environment name="DARWIN" default="${CMAKE_SOURCE_DIR}"/> + <environment name="INCLUDE" default="${CMAKE_INSTALL_FULL_INCLUDEDIR}/ProtoDarwin"/> + <environment name="LIBDIR" default="${CMAKE_INSTALL_FULL_LIBDIR}"/> + </client> + <use name="boost" /> + <use name="boost_program_options" /> + <runtime name="DARWIN_BASE" value="${CMAKE_SOURCE_DIR}" type="path"/> + <runtime name="DARWIN_FIRE_AND_FORGET" value="$CMSSW_BASE/lib/$SCRAM_ARCH" type="path"/><!-- TODO --> + <runtime name="PATH" value="${CMAKE_INSTALL_FULL_BINDIR}" type="path"/> + <runtime name="LD_LIBRARY_PATH" value="${CMAKE_INSTALL_FULL_LIBDIR}" type="path"/> + <runtime name="PYTHON3PATH" value="${CMAKE_INSTALL_PREFIX}/python" type="path"/> + <runtime name="DARWIN_GIT_REPO" value="$CMSSW_BASE/src/Core" type="path"/><!-- TODO --> + <runtime name="DARWIN_TABLES" value="${CMAKE_SOURCE_DIR}/tables" type="path"/> +</tool> diff --git a/install.sh b/install.sh new file mode 100755 index 0000000000000000000000000000000000000000..d75df5b58b62d4a79df4fcefbb52e343d34ed69a --- /dev/null +++ b/install.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +set -e + +BASE_DIR=$PWD + +# Get a CMSSW if we don't have one +if [ -z $CMSSW_BASE ]; then + version=CMSSW_12_4_0 + [ ! -d $version ] && scram p $version + cd $version + eval `scramv1 runtime -sh` + unset version +fi + +# Set up libgit2 +cd $CMSSW_BASE +scram setup $BASE_DIR/libgit2.xml + +# Download and build supporting tools +cd $BASE_DIR +cmake3 -B .build +cmake3 --build .build --target install -j$(nproc) + +# Get Core +cd $CMSSW_BASE/src +[ ! -d Core ] && git clone https://gitlab.cern.ch/cms-analysis/general/DasAnalysisSystem/Core.git + +# And JetToolbox +[ ! -d JMEAnalysis/JetToolbox ] && ( + git clone https://github.com/cms-jet/JetToolbox.git JMEAnalysis/JetToolbox -b jetToolbox_120X + cd JMEAnalysis/JetToolbox + patch -p1 <$BASE_DIR/jetToolbox_120X.patch +) + +# Set up scram tools in CMSSW -- must be done after fetching Core +cd $CMSSW_BASE +for xml in $BASE_DIR/tools/etc/*.xml; do + scram setup $xml +done diff --git a/jetToolbox_120X.patch b/jetToolbox_120X.patch new file mode 100644 index 0000000000000000000000000000000000000000..5151f912f2d9fe17616a5458a2b05679d9b4068e --- /dev/null +++ b/jetToolbox_120X.patch @@ -0,0 +1,26 @@ +diff --git a/python/jetToolbox_cff.py b/python/jetToolbox_cff.py +index f545dea..6c3e422 100644 +--- a/python/jetToolbox_cff.py ++++ b/python/jetToolbox_cff.py +@@ -164,9 +164,9 @@ def jetToolbox( proc, jetType, jetSequence, outputFile, + ] + + if not bTagDiscriminators: +- if jetSize == 0.4: bTagDiscriminators = defaultBTagDiscriminators +- elif jetSize == 0.8: bTagDiscriminators = defaultBoostedBTagDiscriminators +- else: ++ #if jetSize == 0.4: bTagDiscriminators = defaultBTagDiscriminators ++ #elif jetSize == 0.8: bTagDiscriminators = defaultBoostedBTagDiscriminators ++ #else: + bTagDiscriminators = None + print('|---- jetToolBox: btag discriminators are recommended for AK4 and AK8 jets. For a different cone size, please especify the discriminators. Turning OFF all the default btag discriminators.') + if not subjetBTagDiscriminators: +@@ -1305,7 +1305,7 @@ def jetToolbox( proc, jetType, jetSequence, outputFile, + + ################################################################################# + ###### Computing jetID +- from PhysicsTools.NanoAOD.jets_cff import looseJetId, tightJetId, tightJetIdLepVeto ++ from PhysicsTools.NanoAOD.jetsAK4_CHS_cff import looseJetId, tightJetId, tightJetIdLepVeto + + mod["looseJetId"] = mod["PATJets"]+"looseJetId" + _addProcessAndTask( proc, mod["looseJetId"], looseJetId.clone(src = cms.InputTag(mod["PATJets"]) )) diff --git a/tunfold.xml.in b/tunfold.xml.in new file mode 100644 index 0000000000000000000000000000000000000000..f19a1e205cc62b5a6fd23bf95ea65b468e5257fa --- /dev/null +++ b/tunfold.xml.in @@ -0,0 +1,13 @@ +<tool name="tunfold" version="17.9"> + <lib name="tunfold"/> + <info url="https://www.desy.de/~sschmitt/tunfold.html"/> + <client> + <environment name="TUNFOLD_BASE" default="${CMAKE_SOURCE_DIR}/TUnfold"/><!-- FIXME Needed? --> + <environment name="INCLUDE" default="${CMAKE_INSTALL_FULL_INCLUDEDIR}/TUnfold"/> + <environment name="LIBDIR" default="${CMAKE_INSTALL_FULL_LIBDIR}"/> + </client> + <use name="root_cxxdefaults"/> + <lib name="XMLIO" /> + <lib name="XMLParser" /> + <runtime name="LD_LIBRARY_PATH" value="$LIBDIR" type="path"/> +</tool>