Skip to content
Snippets Groups Projects
Commit 2fa29854 authored by ssummers's avatar ssummers
Browse files

Update part 3 for most recent model, hardware scales, and same datasets as part 1

parent 0cc6b531
Branches
No related tags found
No related merge requests found
......@@ -10,7 +10,7 @@ class L1TMLDemo_emulator_v1 : public hls4mlEmulator::Model {
private:
// Note: these need to match the defined model
static const int N_INPUT=26;
static const int N_INPUT=56;
static const int N_OUTPUT=1;
input_t _input[N_INPUT];
result_t _result[N_OUTPUT];
......@@ -33,7 +33,7 @@ public:
virtual void read_result(std::any result) {
// copy result
result_t *result_p = std::any_cast<result_t*>(result);
*result_p = _result;
*result_p = *_result;
}
};
......
......@@ -47,8 +47,12 @@ private:
unsigned nJet;
unsigned nNNIn;
typedef ap_fixed<16,6,AP_RND_CONV,AP_SAT> scale_t;
typedef ap_fixed<16,6,AP_RND_CONV,AP_SAT> bias_t;
// hls4ml emulator model path
std::string model_so_path;
std::vector<scale_t> scale;
std::vector<bias_t> bias;
};
......@@ -64,11 +68,18 @@ L1TMLDemoProducer::L1TMLDemoProducer(const edm::ParameterSet& cfg){
nTau = cfg.getParameter<unsigned>("nTau");
nJet = cfg.getParameter<unsigned>("nJet");
// total number of inputs to NN
nNNIn = 3 * (1 + nMu + nEG + nTau + nJet);
nNNIn = 2 + 3 * (nMu + nEG + nTau + nJet);
// store the path to the .so file
model_so_path = cfg.getParameter<std::string>("model_so_path");
// get the scaler parameters and cast them to fixed point types
std::vector<double> scale_double = cfg.getParameter<std::vector<double>>("scale");
std::transform(scale_double.begin(), scale_double.end(), std::back_inserter(scale), [](double s){ return (scale_t)s; });
// get the bias parameters and cast them to fixed point types
std::vector<double> bias_double = cfg.getParameter<std::vector<double>>("bias");
std::transform(bias_double.begin(), bias_double.end(), std::back_inserter(bias), [](double s){ return (bias_t)s; });
// produce
produces<nanoaod::FlatTable>("L1TMLDemo");
......@@ -92,55 +103,59 @@ void L1TMLDemoProducer::produce(edm::StreamID id, edm::Event& iEvent, const edm:
iEvent.getByToken(jetToken, jets);
iEvent.getByToken(sumToken, sums);
// The unscaled inputs are hwInts apart from ET that is in GeV with 0.5 GeV LSB
// The unscaled inputs are hwInts
// ap_fixed<14,13> is wide enough for all the ET, pT, eta, phi
ap_fixed<14,13>* X_unscaled = new ap_fixed<14,13>[nNNIn];
// initialize to zeros
for(unsigned i = 0; i < nNNIn; i++){
X_unscaled[i] = 0;
}
// fill the inputs
unsigned ix = 0;
// sums first, just find the MET
for(unsigned i = 0; i < sums->size(0); i++){
if(sums->at(0, i).getType() == l1t::EtSum::EtSumType::kMissingEt){
X_unscaled[ix++] = (float)(sums->at(0,i).hwPt())/2;
X_unscaled[ix++] = 0; // for eta
X_unscaled[ix++] = sums->at(0,i).hwPt();
X_unscaled[ix++] = sums->at(0,i).hwPhi();
}
}
// muons next
ix=3 * ( 1 );
for(unsigned i = 0; i < std::min(nMu, muons->size(0)); i++){
X_unscaled[ix++] = (float)(muons->at(0, i).hwPt())/2;
X_unscaled[ix++] = muons->at(0, i).hwEta();
X_unscaled[ix++] = muons->at(0, i).hwPhi();
// jets next
ix = 2 * ( 1 );
for(unsigned i = 0; i < std::min(nJet, jets->size(0)); i++){
X_unscaled[ix++] = jets->at(0, i).hwPt();
X_unscaled[ix++] = jets->at(0, i).hwEta();
X_unscaled[ix++] = jets->at(0, i).hwPhi();
}
// egammas next
ix = 3 * ( 1 + nMu );
ix = 2 * ( 1 + nJet );
for(unsigned i = 0; i < std::min(nEG, egammas->size(0)); i++){
X_unscaled[ix++] = (float)(egammas->at(0, i).hwPt())/2;
X_unscaled[ix++] = egammas->at(0, i).hwPt();
X_unscaled[ix++] = egammas->at(0, i).hwEta();
X_unscaled[ix++] = egammas->at(0, i).hwPhi();
}
// muons next
ix = 2 * ( 1 + nJet + nEG );
for(unsigned i = 0; i < std::min(nMu, muons->size(0)); i++){
X_unscaled[ix++] = muons->at(0, i).hwPt();
X_unscaled[ix++] = muons->at(0, i).hwEta();
X_unscaled[ix++] = muons->at(0, i).hwPhi();
}
// taus next
ix = 3 * ( 1 + nMu + nEG );
ix = 2 * ( 1 + nJet + nEG + nMu );
for(unsigned i = 0; i < std::min(nTau, taus->size(0)); i++){
X_unscaled[ix++] = (float)(taus->at(0, i).hwPt())/2;
X_unscaled[ix++] = taus->at(0, i).hwPt();
X_unscaled[ix++] = taus->at(0, i).hwEta();
X_unscaled[ix++] = taus->at(0, i).hwPhi();
}
// jets last
ix = 3 * ( 1 + nMu + nEG + nTau );
for(unsigned i = 0; i < std::min(nEG, jets->size(0)); i++){
X_unscaled[ix++] = (float)(jets->at(0, i).hwPt())/2;
X_unscaled[ix++] = jets->at(0, i).hwEta();
X_unscaled[ix++] = jets->at(0, i).hwPhi();
}
ap_fixed<16,7,AP_RND,AP_SAT>* X_scaled = new ap_fixed<16,7,AP_RND,AP_SAT>[nNNIn];
// scale the inputs
for(unsigned i = 0; i < nNNIn; i++){
X_scaled[i] = X_unscaled[i]; // placeholder
X_scaled[i] = (X_unscaled[i] - bias[i]) * scale[i];
//std::cout << X_scaled[i] << ",";
}
//std::cout << std::endl;
// load the NN emulator object
hls4mlEmulator::ModelLoader loader(model_so_path);
......
File deleted
# argparsing
from FWCore.ParameterSet.VarParsing import VarParsing
options = VarParsing('python')
options.register('signal', False, VarParsing.multiplicity.singleton, VarParsing.varType.bool)
options.parseArguments()
# import of standard configurations
import FWCore.ParameterSet.Config as cms
# load the model scales
# note you should not really load these from a pkl file for real CMSSW
import os
import pickle
scales_file = os.environ['MLATL1T_DIR'] + '/part1_outputs/hwScaler.pkl'
scales = pickle.load(open(scales_file, 'rb'))
# the standard scaler does (x - u) / s while we will do (x - u) * (1 / s) so invert s here
scale = 1. / scales.scale_
bias = scales.mean_
process = cms.Process("l1tMLDemo")
process.load('Configuration.StandardSequences.Services_cff')
......@@ -13,38 +29,44 @@ process.load('Configuration.StandardSequences.EndOfProcess_cff')
process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff')
process.maxEvents = cms.untracked.PSet(
input = cms.untracked.int32(-1)
input = cms.untracked.int32(100_000)
)
filelist = 'files_signal.txt' if options.signal else 'files_background.txt'
input_files = open(filelist).readlines()
process.source = cms.Source (
"PoolSource",
fileNames = cms.untracked.vstring('/store/relval/CMSSW_13_3_0_pre3/RelValMinBias_14TeV/GEN-SIM-DIGI-RAW/132X_mcRun3_2023_realistic_v4-v1/2580000/0911bb55-82fb-4a51-bb8f-be79f61b020d.root'),
fileNames = cms.untracked.vstring(input_files),
)
from Configuration.AlCa.GlobalTag import GlobalTag
process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:startup', '')
process.l1tDemoMLProducer = cms.EDProducer('L1TMLDemoProducer',
muToken = cms.InputTag("simGmtStage2Digis"),
egToken = cms.InputTag("simCaloStage2Digis"),
tauToken = cms.InputTag("simCaloStage2Digis"),
jetToken = cms.InputTag("simCaloStage2Digis"),
etSumToken = cms.InputTag("simCaloStage2Digis"),
muToken = cms.InputTag("gmtStage2Digis:Muon"),
egToken = cms.InputTag("caloStage2Digis:EGamma"),
tauToken = cms.InputTag("caloStage2Digis:Tau"),
jetToken = cms.InputTag("caloStage2Digis:Jet"),
etSumToken = cms.InputTag("caloStage2Digis:EtSum"),
nMu = cms.uint32(2),
nEg = cms.uint32(2),
nEg = cms.uint32(8),
nTau = cms.uint32(0),
nJet = cms.uint32(4),
model_so_path = cms.string("../data/L1TMLDemo_v1")
nJet = cms.uint32(8),
model_so_path = cms.string("../data/L1TMLDemo_v1"),
scale = cms.vdouble(*scale),
bias = cms.vdouble(*bias),
)
process.path = cms.Path(
process.l1tDemoMLProducer
)
oname = 'L1TMLDemo_NanoAOD_signal.root' if options.signal else 'L1TMLDemo_NanoAOD_background.root'
process.outnano = cms.OutputModule("NanoAODOutputModule",
fileName = cms.untracked.string("L1TMLDemo_NanoAOD.root"),
fileName = cms.untracked.string(oname),
outputCommands = cms.untracked.vstring("drop *", "keep nanoaodFlatTable_*_*_*"),
compressionLevel = cms.untracked.int32(4),
compressionAlgorithm = cms.untracked.string("ZLIB"),
)
process.end = cms.EndPath(process.outnano)
\ No newline at end of file
process.end = cms.EndPath(process.outnano)
File deleted
This diff is collapsed.
/store/mc/Run3Summer22MiniAODv4/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/MINIAODSIM/130X_mcRun3_2022_realistic_v5-v2/50000/67062162-0b04-4738-b8ab-c31cddf64a1d.root
/store/mc/Run3Summer22MiniAODv4/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/MINIAODSIM/130X_mcRun3_2022_realistic_v5-v2/50000/0a94d194-7c19-4d36-b540-fccacecbe60b.root
/store/mc/Run3Summer22MiniAODv4/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/MINIAODSIM/130X_mcRun3_2022_realistic_v5-v2/40000/504487f5-0a4f-46b2-9c53-e6687362db4f.root
......@@ -21,6 +21,8 @@ As of `hls4ml` `0.8.1`, when run outside of Vivado HLS, the C++ code loads the w
This one liner will replace the `#define` that would cause the weights to be loaded from txt files with one that will load them from the header files when we compile instead.
If you don't do this, when you `cmsRun` you will see a runtime error like `ERROR: file w2.txt does not exist`
```shell
find $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN \( -type d -name .git -prune \) -o -type f -print0 | xargs -0 sed -i 's/#ifndef __SYNTHESIS__/#ifdef __HLS4ML_LOAD_TXT_WEIGHTS__/'
```
......@@ -69,11 +71,24 @@ cp $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1.so $CMSSW_BASE/src/L1Tri
## 7.
Run the test config!
Run the test config over signal and background!
```shell
cd $CMSSW_BASE/src/L1Trigger/L1TMLDemo/test
cmsRun demoL1TMLNtuple.py
cmsRun demoL1TMLNtuple.py signal=True
cmsRun demoL1TMLNtuple.py signal=False
```
We run over the same datasets as part 1:
- Signal: `/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer22MiniAODv4-130X_mcRun3_2022_realistic_v5-v2/MINIAODSIM`
- Background: `/SingleNeutrino_E-10-gun/Run3Summer23BPixMiniAODv4-130X_mcRun3_2023_realistic_postBPix_v2-v2/MINIAODSIM`
This will produce the files
- `L1TMLDemo_NanoAOD_signal.root`
- `L1TMLDemo_NanoAOD_background.root`
*Note* when developing your own models, you may unfortunately run into segmentation violations while developing. The most common reason is that the input and output data type set in the producer mismatch the types used by the model emulator. In this emulator workflow, this causes a runtime error rather than a compile time error.
## 8.
Run the notebook part3.ipynb
%% Cell type:markdown id:3e175280 tags:
# CMSSW Emulator
In this exercise you will be guided through the steps to create, compile, and run the emulator of the hls4ml model you trained in part 2. The code in these steps should be executed from the command line on `lxplus` after doing `source setup.sh` from this `cms_mlatl1t_tutorial`.
When developing your own hls4ml NN emulators, you should compile and run your model emulator locally before delivering it to `cms-hls4ml`.
**Note** you need to run the steps described below in the terminal before going through the cells in this notebook!
## Prerequisite
You will need the HLS for the model of part 2.
## 1.
Copy the NN-specific part of the hls4ml project to the `cms-hls4ml` repo. We _don't_ copy `ap_types` since we'll reference them from the externals.
```shell
[ ! -d $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN ] && mkdir $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN
cp -r $MLATL1T_DIR/part2/L1TMLDemo_v1/firmware/{*.h,*.cpp,weights,nnet_utils} $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN
```
## 2.
As of `hls4ml` `0.8.1`, when run outside of Vivado HLS, the C++ code loads the weights from txt files. We need to force compilation of the weights from the header file instead.
This one liner will replace the `#define` that would cause the weights to be loaded from txt files with one that will load them from the header files when we compile instead.
If you don't do this, when you `cmsRun` you will see a runtime error like `ERROR: file w2.txt does not exist`
```shell
find $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN \( -type d -name .git -prune \) -o -type f -print0 | xargs -0 sed -i 's/#ifndef __SYNTHESIS__/#ifdef __HLS4ML_LOAD_TXT_WEIGHTS__/'
```
## 3.
`make` the hls4ml emulator interface shared object
```shell
cd $MLATL1T_DIR/part3/cms-hls4ml/hls4mlEmulatorExtras
make
mkdir lib64
mv libemulator_interface.so lib64
```
## 4.
`make` the `L1TMLDemo` model shared object
```shell
cd $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo
make
```
*Note* you might benefit from adding `-g` to `CXXFLAGS` to compile with debugging while developing.
The Makefile line would change to `CXXFLAGS := -O3 -fPIC -std=$(CPP_STANDARD) -g`.
## 5.
`scram build` compile the CMSSW code
```shell
cd $CMSSW_BASE/src
scram b -j8
```
## 6.
Copy the `L1TMLDemo` model shared object to the CMSSW area.
```shell
mkdir $CMSSW_BASE/src/L1Trigger/L1TMLDemo/data
cp $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1.so $CMSSW_BASE/src/L1Trigger/L1TMLDemo/data
```
## 7.
Run the test config over signal and background!
```shell
cd $CMSSW_BASE/src/L1Trigger/L1TMLDemo/test
cmsRun demoL1TMLNtuple.py signal=True
cmsRun demoL1TMLNtuple.py signal=False
```
We run over the same datasets as part 1:
- Signal: `/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer22MiniAODv4-130X_mcRun3_2022_realistic_v5-v2/MINIAODSIM`
- Background: `/SingleNeutrino_E-10-gun/Run3Summer23BPixMiniAODv4-130X_mcRun3_2023_realistic_postBPix_v2-v2/MINIAODSIM`
This will produce the files
- `L1TMLDemo_NanoAOD_signal.root`
- `L1TMLDemo_NanoAOD_background.root`
*Note* when developing your own models, you may unfortunately run into segmentation violations while developing. The most common reason is that the input and output data type set in the producer mismatch the types used by the model emulator. In this emulator workflow, this causes a runtime error rather than a compile time error.
## 8.
Run the notebook part3.ipynb
# Notebook
Now we can read the predictions from our Nano AOD ntuple and check they make sense compared to part 1 and part 2.
%% Cell type:code id:8d652e36 tags:
``` python
import numpy as np
import uproot
import awkward as ak
import matplotlib.pyplot as plt
import mplhep
```
%% Cell type:markdown id:f40b86af tags:
## Load data
Load our signal and background data with `uproot`
%% Cell type:code id:e8fc68f0 tags:
``` python
f_sig = uproot.open('part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD_signal.root')
f_bkg = uproot.open('part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD_background.root')
y_sig_cmssw = ak.flatten(f_sig['Events/L1TMLDemo_y'].array()).to_numpy()
y_bkg_cmssw = ak.flatten(f_bkg['Events/L1TMLDemo_y'].array()).to_numpy()
```
%% Cell type:markdown id:2069399f tags:
## Histogram
Plot the score distribution for signal and background
%% Cell type:code id:d55f97b8 tags:
``` python
bins=np.linspace(0, 1, 100)
w = bins[1]
h_sig, _ = np.histogram(y_sig_cmssw, bins=bins)
h_bkg, _ = np.histogram(y_bkg_cmssw, bins=bins)
h_sig = h_sig.astype('float') / np.sum(h_sig)
h_bkg = h_bkg.astype('float') / np.sum(h_bkg)
```
%% Cell type:markdown id:08e81144 tags:
## Plot
%% Cell type:code id:eda30259 tags:
``` python
mplhep.histplot(h_bkg, bins, label='Background')
mplhep.histplot(h_sig, bins, label='Signal')
plt.semilogy()
plt.legend()
plt.xlim(0,1)
plt.xlabel('CMSSW NN Emulator Prediction')
plt.ylabel('Frequency')
```
%% Cell type:code id:cc0de85f tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment