Update part 3 for most recent model, hardware scales, and same datasets as part 1

2fa29854 · ssummers · 0cc6b531 · 2fa29854 · 2fa29854 · 0cc6b531
Commit 2fa29854 authored 1 year ago by ssummers
--- a/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/L1TMLDemo_emulator_v1.cpp
+++ b/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/L1TMLDemo_emulator_v1.cpp
@@ -10,7 +10,7 @@ class L1TMLDemo_emulator_v1 : public hls4mlEmulator::Model {

 private:
    // Note: these need to match the defined model
-    static const int N_INPUT=26;
+    static const int N_INPUT=56;
    static const int N_OUTPUT=1;
    input_t _input[N_INPUT];
    result_t _result[N_OUTPUT];
@@ -33,7 +33,7 @@ public:
  virtual void read_result(std::any result) {
    // copy result
    result_t *result_p = std::any_cast<result_t*>(result);
-    *result_p = _result;
+    *result_p = *_result;
  }
  
 };

--- a/part3/cmssw/src/L1Trigger/L1TMLDemo/plugins/l1tDemoMLAnalyzer.cc
+++ b/part3/cmssw/src/L1Trigger/L1TMLDemo/plugins/l1tDemoMLAnalyzer.cc
@@ -47,8 +47,12 @@ private:
  unsigned nJet;
  unsigned nNNIn;

+  typedef ap_fixed<16,6,AP_RND_CONV,AP_SAT> scale_t;
+  typedef ap_fixed<16,6,AP_RND_CONV,AP_SAT> bias_t;
  // hls4ml emulator model path
  std::string model_so_path;
+  std::vector<scale_t> scale;
+  std::vector<bias_t> bias;

 };

@@ -64,11 +68,18 @@ L1TMLDemoProducer::L1TMLDemoProducer(const edm::ParameterSet& cfg){
  nTau = cfg.getParameter<unsigned>("nTau");
  nJet = cfg.getParameter<unsigned>("nJet");
  // total number of inputs to NN
-  nNNIn = 3 * (1 + nMu + nEG + nTau + nJet);
+  nNNIn = 2 + 3 * (nMu + nEG + nTau + nJet);

  // store the path to the .so file
  model_so_path = cfg.getParameter<std::string>("model_so_path");

+  // get the scaler parameters and cast them to fixed point types
+  std::vector<double> scale_double = cfg.getParameter<std::vector<double>>("scale");
+  std::transform(scale_double.begin(), scale_double.end(), std::back_inserter(scale), [](double s){ return (scale_t)s; });
+  // get the bias parameters and cast them to fixed point types
+  std::vector<double> bias_double = cfg.getParameter<std::vector<double>>("bias");
+  std::transform(bias_double.begin(), bias_double.end(), std::back_inserter(bias), [](double s){ return (bias_t)s; });
+
  // produce
  produces<nanoaod::FlatTable>("L1TMLDemo");

@@ -92,55 +103,59 @@ void L1TMLDemoProducer::produce(edm::StreamID id, edm::Event& iEvent, const edm:
  iEvent.getByToken(jetToken, jets);
  iEvent.getByToken(sumToken, sums);

-  // The unscaled inputs are hwInts apart from ET that is in GeV with 0.5 GeV LSB
+  // The unscaled inputs are hwInts
  // ap_fixed<14,13> is wide enough for all the ET, pT, eta, phi
  ap_fixed<14,13>* X_unscaled = new ap_fixed<14,13>[nNNIn];
+  // initialize to zeros
+  for(unsigned i = 0; i < nNNIn; i++){
+    X_unscaled[i] = 0;
+  }

  // fill the inputs
  unsigned ix = 0;
  // sums first, just find the MET
  for(unsigned i = 0; i < sums->size(0); i++){
    if(sums->at(0, i).getType() == l1t::EtSum::EtSumType::kMissingEt){
-      X_unscaled[ix++] = (float)(sums->at(0,i).hwPt())/2;
-      X_unscaled[ix++] = 0; // for eta
+      X_unscaled[ix++] = sums->at(0,i).hwPt();
      X_unscaled[ix++] = sums->at(0,i).hwPhi();
    }
  }
-  // muons next
-  ix=3 * ( 1 );
-  for(unsigned i = 0; i < std::min(nMu, muons->size(0)); i++){
-    X_unscaled[ix++] = (float)(muons->at(0, i).hwPt())/2;
-    X_unscaled[ix++] = muons->at(0, i).hwEta();
-    X_unscaled[ix++] = muons->at(0, i).hwPhi();
+  // jets next
+  ix = 2 * ( 1 );
+  for(unsigned i = 0; i < std::min(nJet, jets->size(0)); i++){
+    X_unscaled[ix++] = jets->at(0, i).hwPt();
+    X_unscaled[ix++] = jets->at(0, i).hwEta();
+    X_unscaled[ix++] = jets->at(0, i).hwPhi();
  }
  // egammas next
-  ix = 3 * ( 1 + nMu );
+  ix = 2 * ( 1 + nJet );
  for(unsigned i = 0; i < std::min(nEG, egammas->size(0)); i++){
-    X_unscaled[ix++] = (float)(egammas->at(0, i).hwPt())/2;
+    X_unscaled[ix++] = egammas->at(0, i).hwPt();
    X_unscaled[ix++] = egammas->at(0, i).hwEta();
    X_unscaled[ix++] = egammas->at(0, i).hwPhi();
  }
+  // muons next
+  ix = 2 * ( 1 + nJet + nEG );
+  for(unsigned i = 0; i < std::min(nMu, muons->size(0)); i++){
+    X_unscaled[ix++] = muons->at(0, i).hwPt();
+    X_unscaled[ix++] = muons->at(0, i).hwEta();
+    X_unscaled[ix++] = muons->at(0, i).hwPhi();
+  }
  // taus next
-  ix = 3 * ( 1 + nMu + nEG );
+  ix = 2 * ( 1 + nJet + nEG + nMu );
  for(unsigned i = 0; i < std::min(nTau, taus->size(0)); i++){
-    X_unscaled[ix++] = (float)(taus->at(0, i).hwPt())/2;
+    X_unscaled[ix++] = taus->at(0, i).hwPt();
    X_unscaled[ix++] = taus->at(0, i).hwEta();
    X_unscaled[ix++] = taus->at(0, i).hwPhi();
  }
-  // jets last
-  ix = 3 * ( 1 + nMu + nEG + nTau );
-  for(unsigned i = 0; i < std::min(nEG, jets->size(0)); i++){
-    X_unscaled[ix++] = (float)(jets->at(0, i).hwPt())/2;
-    X_unscaled[ix++] = jets->at(0, i).hwEta();
-    X_unscaled[ix++] = jets->at(0, i).hwPhi();
-  }
-

  ap_fixed<16,7,AP_RND,AP_SAT>* X_scaled = new ap_fixed<16,7,AP_RND,AP_SAT>[nNNIn];
  // scale the inputs
  for(unsigned i = 0; i < nNNIn; i++){
-    X_scaled[i] = X_unscaled[i]; // placeholder
+    X_scaled[i] = (X_unscaled[i] - bias[i]) * scale[i];
+    //std::cout << X_scaled[i] << ",";
  }
+  //std::cout << std::endl;

  // load the NN emulator object
  hls4mlEmulator::ModelLoader loader(model_so_path);

--- a/part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD.root
+++ b/part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD.root
--- a/part3/cmssw/src/L1Trigger/L1TMLDemo/test/demoL1TMLNtuple.py
+++ b/part3/cmssw/src/L1Trigger/L1TMLDemo/test/demoL1TMLNtuple.py
+# argparsing
+from FWCore.ParameterSet.VarParsing import VarParsing
+options = VarParsing('python')
+options.register('signal', False, VarParsing.multiplicity.singleton, VarParsing.varType.bool)
+options.parseArguments()
+
 # import of standard configurations
 import FWCore.ParameterSet.Config as cms

+# load the model scales
+# note you should not really load these from a pkl file for real CMSSW
+import os
+import pickle
+scales_file = os.environ['MLATL1T_DIR'] + '/part1_outputs/hwScaler.pkl'
+scales = pickle.load(open(scales_file, 'rb'))
+# the standard scaler does (x - u) / s while we will do (x - u) * (1 / s) so invert s here
+scale = 1. / scales.scale_
+bias = scales.mean_
+
 process = cms.Process("l1tMLDemo")

 process.load('Configuration.StandardSequences.Services_cff')
@@ -13,38 +29,44 @@ process.load('Configuration.StandardSequences.EndOfProcess_cff')
 process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff')

 process.maxEvents = cms.untracked.PSet(
-    input = cms.untracked.int32(-1)
+    input = cms.untracked.int32(100_000)
 )

+filelist = 'files_signal.txt' if options.signal else 'files_background.txt'
+input_files = open(filelist).readlines()
+
 process.source = cms.Source (
    "PoolSource",
-    fileNames = cms.untracked.vstring('/store/relval/CMSSW_13_3_0_pre3/RelValMinBias_14TeV/GEN-SIM-DIGI-RAW/132X_mcRun3_2023_realistic_v4-v1/2580000/0911bb55-82fb-4a51-bb8f-be79f61b020d.root'),
+    fileNames = cms.untracked.vstring(input_files),
 )

 from Configuration.AlCa.GlobalTag import GlobalTag
 process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:startup', '')

 process.l1tDemoMLProducer = cms.EDProducer('L1TMLDemoProducer',
-    muToken    = cms.InputTag("simGmtStage2Digis"),
-    egToken    = cms.InputTag("simCaloStage2Digis"),
-    tauToken   = cms.InputTag("simCaloStage2Digis"),
-    jetToken   = cms.InputTag("simCaloStage2Digis"),
-    etSumToken = cms.InputTag("simCaloStage2Digis"),
+    muToken    = cms.InputTag("gmtStage2Digis:Muon"),
+    egToken    = cms.InputTag("caloStage2Digis:EGamma"),
+    tauToken   = cms.InputTag("caloStage2Digis:Tau"),
+    jetToken   = cms.InputTag("caloStage2Digis:Jet"),
+    etSumToken = cms.InputTag("caloStage2Digis:EtSum"),
    nMu = cms.uint32(2),
-    nEg = cms.uint32(2),
+    nEg = cms.uint32(8),
    nTau = cms.uint32(0),
-    nJet = cms.uint32(4),
-    model_so_path = cms.string("../data/L1TMLDemo_v1")
+    nJet = cms.uint32(8),
+    model_so_path = cms.string("../data/L1TMLDemo_v1"),
+    scale = cms.vdouble(*scale),
+    bias = cms.vdouble(*bias),
 )

 process.path = cms.Path(
    process.l1tDemoMLProducer
 )

+oname = 'L1TMLDemo_NanoAOD_signal.root' if options.signal else 'L1TMLDemo_NanoAOD_background.root'
 process.outnano = cms.OutputModule("NanoAODOutputModule",
-    fileName = cms.untracked.string("L1TMLDemo_NanoAOD.root"),
+    fileName = cms.untracked.string(oname),
    outputCommands = cms.untracked.vstring("drop *", "keep nanoaodFlatTable_*_*_*"),
    compressionLevel = cms.untracked.int32(4),
    compressionAlgorithm = cms.untracked.string("ZLIB"),
 )
-process.end = cms.EndPath(process.outnano)
\ No newline at end of file
+process.end = cms.EndPath(process.outnano)
--- a/part3/cmssw/src/L1Trigger/L1TMLDemo/test/demoL1TMLNtuple.root
+++ b/part3/cmssw/src/L1Trigger/L1TMLDemo/test/demoL1TMLNtuple.root
--- a/part3/cmssw/src/L1Trigger/L1TMLDemo/test/files_background.txt
+++ b/part3/cmssw/src/L1Trigger/L1TMLDemo/test/files_background.txt
--- a/part3/cmssw/src/L1Trigger/L1TMLDemo/test/files_signal.txt
+++ b/part3/cmssw/src/L1Trigger/L1TMLDemo/test/files_signal.txt
+/store/mc/Run3Summer22MiniAODv4/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/MINIAODSIM/130X_mcRun3_2022_realistic_v5-v2/50000/67062162-0b04-4738-b8ab-c31cddf64a1d.root
+/store/mc/Run3Summer22MiniAODv4/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/MINIAODSIM/130X_mcRun3_2022_realistic_v5-v2/50000/0a94d194-7c19-4d36-b540-fccacecbe60b.root
+/store/mc/Run3Summer22MiniAODv4/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/MINIAODSIM/130X_mcRun3_2022_realistic_v5-v2/40000/504487f5-0a4f-46b2-9c53-e6687362db4f.root
--- a/part3/exercise.md
+++ b/part3/exercise.md
@@ -21,6 +21,8 @@ As of `hls4ml` `0.8.1`, when run outside of Vivado HLS, the C++ code loads the w

 This one liner will replace the `#define` that would cause the weights to be loaded from txt files with one that will load them from the header files when we compile instead.

+If you don't do this, when you `cmsRun` you will see a runtime error like `ERROR: file w2.txt does not exist`
+
 ```shell
 find $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN \( -type d -name .git -prune \) -o -type f -print0 | xargs -0 sed -i 's/#ifndef __SYNTHESIS__/#ifdef __HLS4ML_LOAD_TXT_WEIGHTS__/'
 ```
@@ -69,11 +71,24 @@ cp $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1.so $CMSSW_BASE/src/L1Tri

 ## 7.

-Run the test config!
+Run the test config over signal and background!

 ```shell
 cd $CMSSW_BASE/src/L1Trigger/L1TMLDemo/test
-cmsRun demoL1TMLNtuple.py
+cmsRun demoL1TMLNtuple.py signal=True
+cmsRun demoL1TMLNtuple.py signal=False
 ```

+We run over the same datasets as part 1:
+- Signal: `/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer22MiniAODv4-130X_mcRun3_2022_realistic_v5-v2/MINIAODSIM`
+- Background: `/SingleNeutrino_E-10-gun/Run3Summer23BPixMiniAODv4-130X_mcRun3_2023_realistic_postBPix_v2-v2/MINIAODSIM`
+
+This will produce the files
+- `L1TMLDemo_NanoAOD_signal.root`
+- `L1TMLDemo_NanoAOD_background.root`
+
 *Note* when developing your own models, you may unfortunately run into segmentation violations while developing. The most common reason is that the input and output data type set in the producer mismatch the types used by the model emulator. In this emulator workflow, this causes a runtime error rather than a compile time error.
+
+## 8.
+
+Run the notebook part3.ipynb
--- a/part3/part3.ipynb
+++ b/part3/part3.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3e175280",
+   "metadata": {},
+   "source": [
+    "# CMSSW Emulator\n",
+    "\n",
+    "In this exercise you will be guided through the steps to create, compile, and run the emulator of the hls4ml model you trained in part 2. The code in these steps should be executed from the command line on `lxplus` after doing `source setup.sh` from this `cms_mlatl1t_tutorial`.\n",
+    "\n",
+    "When developing your own hls4ml NN emulators, you should compile and run your model emulator locally before delivering it to `cms-hls4ml`.\n",
+    "\n",
+    "**Note** you need to run the steps described below in the terminal before going through the cells in this notebook!\n",
+    "\n",
+    "## Prerequisite\n",
+    "\n",
+    "You will need the HLS for the model of part 2.\n",
+    "\n",
+    "## 1.\n",
+    "\n",
+    "Copy the NN-specific part of the hls4ml project to the `cms-hls4ml` repo. We _don't_ copy `ap_types` since we'll reference them from the externals.\n",
+    "\n",
+    "```shell\n",
+    "[ ! -d $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN ] && mkdir $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN\n",
+    "cp -r $MLATL1T_DIR/part2/L1TMLDemo_v1/firmware/{*.h,*.cpp,weights,nnet_utils} $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN\n",
+    "```\n",
+    "\n",
+    "## 2.\n",
+    "\n",
+    "As of `hls4ml` `0.8.1`, when run outside of Vivado HLS, the C++ code loads the weights from txt files. We need to force compilation of the weights from the header file instead. \n",
+    "\n",
+    "This one liner will replace the `#define` that would cause the weights to be loaded from txt files with one that will load them from the header files when we compile instead.\n",
+    "\n",
+    "If you don't do this, when you `cmsRun` you will see a runtime error like `ERROR: file w2.txt does not exist`\n",
+    "\n",
+    "```shell\n",
+    "find $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN \\( -type d -name .git -prune \\) -o -type f -print0 | xargs -0 sed -i 's/#ifndef __SYNTHESIS__/#ifdef __HLS4ML_LOAD_TXT_WEIGHTS__/'\n",
+    "```\n",
+    "\n",
+    "## 3.\n",
+    "\n",
+    "`make` the hls4ml emulator interface shared object\n",
+    "\n",
+    "```shell\n",
+    "cd $MLATL1T_DIR/part3/cms-hls4ml/hls4mlEmulatorExtras\n",
+    "make\n",
+    "mkdir lib64\n",
+    "mv libemulator_interface.so lib64\n",
+    "```\n",
+    "\n",
+    "## 4.\n",
+    "\n",
+    "`make` the `L1TMLDemo` model shared object\n",
+    "\n",
+    "```shell\n",
+    "cd $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo\n",
+    "make\n",
+    "```\n",
+    "\n",
+    "*Note* you might benefit from adding `-g` to `CXXFLAGS` to compile with debugging while developing.\n",
+    "The Makefile line would change to `CXXFLAGS := -O3 -fPIC -std=$(CPP_STANDARD) -g`.\n",
+    "\n",
+    "\n",
+    "## 5.\n",
+    "\n",
+    "`scram build` compile the CMSSW code\n",
+    "\n",
+    "```shell\n",
+    "cd $CMSSW_BASE/src\n",
+    "scram b -j8\n",
+    "```\n",
+    "\n",
+    "## 6.\n",
+    "\n",
+    "Copy the `L1TMLDemo` model shared object to the CMSSW area.\n",
+    "\n",
+    "```shell\n",
+    "mkdir $CMSSW_BASE/src/L1Trigger/L1TMLDemo/data\n",
+    "cp $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1.so $CMSSW_BASE/src/L1Trigger/L1TMLDemo/data\n",
+    "```\n",
+    "\n",
+    "## 7.\n",
+    "\n",
+    "Run the test config over signal and background!\n",
+    "\n",
+    "```shell\n",
+    "cd $CMSSW_BASE/src/L1Trigger/L1TMLDemo/test\n",
+    "cmsRun demoL1TMLNtuple.py signal=True\n",
+    "cmsRun demoL1TMLNtuple.py signal=False\n",
+    "```\n",
+    "\n",
+    "We run over the same datasets as part 1:\n",
+    "- Signal: `/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer22MiniAODv4-130X_mcRun3_2022_realistic_v5-v2/MINIAODSIM`\n",
+    "- Background: `/SingleNeutrino_E-10-gun/Run3Summer23BPixMiniAODv4-130X_mcRun3_2023_realistic_postBPix_v2-v2/MINIAODSIM`\n",
+    "\n",
+    "This will produce the files\n",
+    "- `L1TMLDemo_NanoAOD_signal.root`\n",
+    "- `L1TMLDemo_NanoAOD_background.root`\n",
+    "\n",
+    "*Note* when developing your own models, you may unfortunately run into segmentation violations while developing. The most common reason is that the input and output data type set in the producer mismatch the types used by the model emulator. In this emulator workflow, this causes a runtime error rather than a compile time error.\n",
+    "\n",
+    "## 8.\n",
+    "\n",
+    "Run the notebook part3.ipynb\n",
+    "\n",
+    "# Notebook\n",
+    "\n",
+    "Now we can read the predictions from our Nano AOD ntuple and check they make sense compared to part 1 and part 2."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d652e36",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import uproot\n",
+    "import awkward as ak\n",
+    "import matplotlib.pyplot as plt\n",
+    "import mplhep"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f40b86af",
+   "metadata": {},
+   "source": [
+    "## Load data\n",
+    "Load our signal and background data with `uproot`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8fc68f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f_sig = uproot.open('part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD_signal.root')\n",
+    "f_bkg = uproot.open('part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD_background.root')\n",
+    "y_sig_cmssw = ak.flatten(f_sig['Events/L1TMLDemo_y'].array()).to_numpy()\n",
+    "y_bkg_cmssw = ak.flatten(f_bkg['Events/L1TMLDemo_y'].array()).to_numpy()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2069399f",
+   "metadata": {},
+   "source": [
+    "## Histogram\n",
+    "\n",
+    "Plot the score distribution for signal and background"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d55f97b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bins=np.linspace(0, 1, 100)\n",
+    "w = bins[1]\n",
+    "h_sig, _ = np.histogram(y_sig_cmssw, bins=bins)\n",
+    "h_bkg, _ = np.histogram(y_bkg_cmssw, bins=bins)\n",
+    "h_sig = h_sig.astype('float') / np.sum(h_sig)\n",
+    "h_bkg = h_bkg.astype('float') / np.sum(h_bkg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "08e81144",
+   "metadata": {},
+   "source": [
+    "## Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eda30259",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mplhep.histplot(h_bkg, bins, label='Background')\n",
+    "mplhep.histplot(h_sig, bins, label='Signal')\n",
+    "plt.semilogy()\n",
+    "plt.legend()\n",
+    "plt.xlim(0,1)\n",
+    "plt.xlabel('CMSSW NN Emulator Prediction')\n",
+    "plt.ylabel('Frequency')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc0de85f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:markdown id:3e175280 tags:
+
+# CMSSW Emulator
+
+In this exercise you will be guided through the steps to create, compile, and run the emulator of the hls4ml model you trained in part 2. The code in these steps should be executed from the command line on `lxplus` after doing `source setup.sh` from this `cms_mlatl1t_tutorial`.
+
+When developing your own hls4ml NN emulators, you should compile and run your model emulator locally before delivering it to `cms-hls4ml`.
+
+**Note** you need to run the steps described below in the terminal before going through the cells in this notebook!
+
+## Prerequisite
+
+You will need the HLS for the model of part 2.
+
+## 1.
+
+Copy the NN-specific part of the hls4ml project to the `cms-hls4ml` repo. We _don't_ copy `ap_types` since we'll reference them from the externals.
+
+```shell
+[ ! -d $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN ] && mkdir $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN
+cp -r $MLATL1T_DIR/part2/L1TMLDemo_v1/firmware/{*.h,*.cpp,weights,nnet_utils} $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN
+```
+
+## 2.
+
+As of `hls4ml` `0.8.1`, when run outside of Vivado HLS, the C++ code loads the weights from txt files. We need to force compilation of the weights from the header file instead.
+
+This one liner will replace the `#define` that would cause the weights to be loaded from txt files with one that will load them from the header files when we compile instead.
+
+If you don't do this, when you `cmsRun` you will see a runtime error like `ERROR: file w2.txt does not exist`
+
+```shell
+find $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1/NN \( -type d -name .git -prune \) -o -type f -print0 | xargs -0 sed -i 's/#ifndef __SYNTHESIS__/#ifdef __HLS4ML_LOAD_TXT_WEIGHTS__/'
+```
+
+## 3.
+
+`make` the hls4ml emulator interface shared object
+
+```shell
+cd $MLATL1T_DIR/part3/cms-hls4ml/hls4mlEmulatorExtras
+make
+mkdir lib64
+mv libemulator_interface.so lib64
+```
+
+## 4.
+
+`make` the `L1TMLDemo` model shared object
+
+```shell
+cd $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo
+make
+```
+
+*Note* you might benefit from adding `-g` to `CXXFLAGS` to compile with debugging while developing.
+The Makefile line would change to `CXXFLAGS := -O3 -fPIC -std=$(CPP_STANDARD) -g`.
+
+
+## 5.
+
+`scram build` compile the CMSSW code
+
+```shell
+cd $CMSSW_BASE/src
+scram b -j8
+```
+
+## 6.
+
+Copy the `L1TMLDemo` model shared object to the CMSSW area.
+
+```shell
+mkdir $CMSSW_BASE/src/L1Trigger/L1TMLDemo/data
+cp $MLATL1T_DIR/part3/cms-hls4ml/L1TMLDemo/L1TMLDemo_v1.so $CMSSW_BASE/src/L1Trigger/L1TMLDemo/data
+```
+
+## 7.
+
+Run the test config over signal and background!
+
+```shell
+cd $CMSSW_BASE/src/L1Trigger/L1TMLDemo/test
+cmsRun demoL1TMLNtuple.py signal=True
+cmsRun demoL1TMLNtuple.py signal=False
+```
+
+We run over the same datasets as part 1:
+- Signal: `/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer22MiniAODv4-130X_mcRun3_2022_realistic_v5-v2/MINIAODSIM`
+- Background: `/SingleNeutrino_E-10-gun/Run3Summer23BPixMiniAODv4-130X_mcRun3_2023_realistic_postBPix_v2-v2/MINIAODSIM`
+
+This will produce the files
+- `L1TMLDemo_NanoAOD_signal.root`
+- `L1TMLDemo_NanoAOD_background.root`
+
+*Note* when developing your own models, you may unfortunately run into segmentation violations while developing. The most common reason is that the input and output data type set in the producer mismatch the types used by the model emulator. In this emulator workflow, this causes a runtime error rather than a compile time error.
+
+## 8.
+
+Run the notebook part3.ipynb
+
+# Notebook
+
+Now we can read the predictions from our Nano AOD ntuple and check they make sense compared to part 1 and part 2.
+
+%% Cell type:code id:8d652e36 tags:
+
+``` python
+import numpy as np
+import uproot
+import awkward as ak
+import matplotlib.pyplot as plt
+import mplhep
+```
+
+%% Cell type:markdown id:f40b86af tags:
+
+## Load data
+Load our signal and background data with `uproot`
+
+%% Cell type:code id:e8fc68f0 tags:
+
+``` python
+f_sig = uproot.open('part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD_signal.root')
+f_bkg = uproot.open('part3/cmssw/src/L1Trigger/L1TMLDemo/test/L1TMLDemo_NanoAOD_background.root')
+y_sig_cmssw = ak.flatten(f_sig['Events/L1TMLDemo_y'].array()).to_numpy()
+y_bkg_cmssw = ak.flatten(f_bkg['Events/L1TMLDemo_y'].array()).to_numpy()
+```
+
+%% Cell type:markdown id:2069399f tags:
+
+## Histogram
+
+Plot the score distribution for signal and background
+
+%% Cell type:code id:d55f97b8 tags:
+
+``` python
+bins=np.linspace(0, 1, 100)
+w = bins[1]
+h_sig, _ = np.histogram(y_sig_cmssw, bins=bins)
+h_bkg, _ = np.histogram(y_bkg_cmssw, bins=bins)
+h_sig = h_sig.astype('float') / np.sum(h_sig)
+h_bkg = h_bkg.astype('float') / np.sum(h_bkg)
+```
+
+%% Cell type:markdown id:08e81144 tags:
+
+## Plot
+
+%% Cell type:code id:eda30259 tags:
+
+``` python
+mplhep.histplot(h_bkg, bins, label='Background')
+mplhep.histplot(h_sig, bins, label='Signal')
+plt.semilogy()
+plt.legend()
+plt.xlim(0,1)
+plt.xlabel('CMSSW NN Emulator Prediction')
+plt.ylabel('Frequency')
+```
+
+%% Cell type:code id:cc0de85f tags:
+
+``` python
+```