From 60dc617b6b279b825f6d5ae706c99e670f041130 Mon Sep 17 00:00:00 2001 From: Davide Valsecchi <davide.valsecchi@cern.ch> Date: Mon, 11 Apr 2022 14:35:29 +0200 Subject: [PATCH 1/2] Update to extra function --- workflows/base.py | 15 ++++++++++++--- workflows/mem.py | 5 ++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/workflows/base.py b/workflows/base.py index 096fe894..248d21fe 100644 --- a/workflows/base.py +++ b/workflows/base.py @@ -181,9 +181,15 @@ class ttHbbBaseProcessor(processor.ProcessorABC): for (obj, obj_hists) in zip([self.events.MuonGood, self.events.ElectronGood, self.events.JetGood], [self.muon_hists, self.electron_hists, self.jet_hists]): fill_histograms_object(self, obj, obj_hists) - def process_extra(self) -> ak.Array: + def process_extra_before_presel(self) -> ak.Array: pass + def process_extra_after_presel(self) -> ak.Array: + pass + + def fill_histograms_extra(self): + pass + def process(self, events): self.events = events self.load_metadata() @@ -203,6 +209,8 @@ class ttHbbBaseProcessor(processor.ProcessorABC): self.apply_object_preselection() self.count_objects() self.apply_triggers() + # Possible extra work before applying preselections + self.process_extra_before_presel() # This will remove all the events not passing preselection from further self.apply_preselection_cuts() self.nEvents_after_presel = self.nevents @@ -216,11 +224,12 @@ class ttHbbBaseProcessor(processor.ProcessorABC): self.compute_weights() # This function is empty in the base processor, but can be overriden in processors derived from the class ttHbbBaseProcessor - self.process_extra() + self.process_extra_after_presel() # Fill histograms self.fill_histograms() - + self.fill_histograms_extra() + return self.output def postprocess(self, accumulator): diff --git a/workflows/mem.py b/workflows/mem.py index 2afc3298..2a0301e1 100644 --- a/workflows/mem.py +++ b/workflows/mem.py @@ -76,11 +76,10 @@ class MEMStudiesProcessor(ttHbbBaseProcessor): self.events["nbquark"] = ak.count(self.events.BQuark.pt, axis=1) self.events["nbquark_matched"] = ak.count(self.events.BQuarkMatched.pt, axis=1) - def fill_histograms(self): - super().fill_histograms() + def fill_histograms_extra(self): fill_histograms_object(self, self.events.BQuarkMatched, self.bquark_hists) - def process_extra(self) -> ak.Array: + def process_extra_after_presel(self) -> ak.Array: self.parton_matching() self.count_bquarks() print(self.events.nbquark) -- GitLab From 0ca54be5df7a1387ae5a06efc1dadb671ad0bf34 Mon Sep 17 00:00:00 2001 From: Davide Valsecchi <davide.valsecchi@cern.ch> Date: Mon, 11 Apr 2022 15:51:58 +0200 Subject: [PATCH 2/2] Added deltaR min for parton matching --- notebooks/parton_matching_test.ipynb | 266 ++++++++++++++++++++------- notebooks/save_vectors.ipynb | 227 +++++++++++++++++++++++ workflows/mem.py | 11 +- 3 files changed, 435 insertions(+), 69 deletions(-) create mode 100644 notebooks/save_vectors.ipynb diff --git a/notebooks/parton_matching_test.ipynb b/notebooks/parton_matching_test.ipynb index af15a512..b1b88167 100644 --- a/notebooks/parton_matching_test.ipynb +++ b/notebooks/parton_matching_test.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 48, "id": "0c89709e", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 49, "id": "bbbfdba9", "metadata": {}, "outputs": [], @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 50, "id": "7b3a934a", "metadata": {}, "outputs": [], @@ -57,7 +57,7 @@ " higgs = events.GenPart[isHiggs & isHard & hasTwoChildren]\n", " bquarks = ak.concatenate( (bquarks, ak.flatten(higgs.children, axis=2)), axis=1 )\n", " # Sort b-quarks by pt\n", - " bquarks = ak.with_name(bquarks[ak.argsort(bquarks.pt)], name='PtEtaPhiMCandidate')\n", + " bquarks = ak.with_name(bquarks[ak.argsort(bquarks.pt, ascending=False)], name='PtEtaPhiMCandidate')\n", "\n", " # Compute deltaR(b, jet) and save the nearest jet (deltaR matching)\n", " deltaR = ak.flatten(bquarks.metric_table(events.Jet), axis=2)\n", @@ -113,80 +113,78 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 51, "id": "d29f5d66", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[(3, 0), (1, 5), (2, 1), (3, 7), (0, 7), (2, 5), (1, 1), (2, 2), (0, 0), (2, 3), (3, 3), (0, 3), (0, 4), (1, 3), (2, 0), (3, 5), (1, 0), (1, 2), (0, 2), (3, 1), (2, 7), (3, 2), (0, 8), (1, 7), (0, 1), (3, 4), (0, 5), (3, 8), (2, 8), (2, 4), (0, 6), (1, 8), (1, 4), (2, 6), (3, 6), (1, 6)]\n", + "[(0, 0), (2, 5), (1, 1), (0, 7), (3, 7), (1, 5), (2, 1), (1, 2), (3, 0), (1, 3), (0, 3), (3, 3), (3, 4), (2, 3), (1, 0), (0, 5), (2, 0), (2, 2), (3, 2), (0, 1), (1, 7), (0, 2), (3, 8), (2, 7), (3, 1), (0, 4), (3, 5), (0, 8), (1, 8), (1, 4), (3, 6), (2, 8), (2, 4), (1, 6), (0, 6), (2, 6)]\n", "\n", - "3 [0, 7, 3, 5, 1, 2, 4, 8, 6]\n", - "3 [0, 3, 10, 15, 19, 21, 25, 27, 34]\n", + "0 [0, 7, 3, 5, 1, 2, 4, 8, 6]\n", + "0 [0, 3, 10, 15, 19, 21, 25, 27, 34]\n", "0 [True, False, False, True, False, False, False, False, True, False, True, False, False, False, True, True, True, False, False, True, False, True, False, False, False, True, False, True, False, False, False, False, False, False, True, False]\n", "\n", - "1 [5, 1, 3, 2, 7, 8, 4, 6]\n", - "1 [1, 6, 13, 17, 23, 31, 32, 35]\n", + "2 [5, 1, 3, 2, 7, 8, 4, 6]\n", + "2 [1, 6, 13, 17, 23, 31, 32, 35]\n", "1 [True, True, False, True, False, True, True, False, True, False, True, False, False, True, True, True, True, True, False, True, False, True, False, True, False, True, True, True, False, False, False, True, True, False, True, True]\n", "\n", - "2 [1, 2, 3, 7, 8, 4, 6]\n", - "2 [2, 7, 9, 20, 28, 29, 33]\n", + "1 [1, 2, 3, 7, 8, 4, 6]\n", + "1 [2, 7, 9, 20, 28, 29, 33]\n", "2 [True, True, True, True, False, True, True, True, True, True, True, False, False, True, True, True, True, True, False, True, True, True, False, True, True, True, True, True, True, True, False, True, True, True, True, True]\n", "\n", - "3 []\n", - "3 []\n", + "0 []\n", + "0 []\n", "3 [True, True, True, True, False, True, True, True, True, True, True, False, False, True, True, True, True, True, False, True, True, True, False, True, True, True, True, True, True, True, False, True, True, True, True, True]\n", "\n", - "0 [7, 3, 4, 2, 8, 6]\n", - "0 [4, 11, 12, 18, 22, 30]\n", + "3 [7, 3, 4, 2, 8, 6]\n", + "3 [4, 11, 12, 18, 22, 30]\n", "4 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", "\n", - "2 []\n", - "2 []\n", - "5 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", - "\n", "1 []\n", "1 []\n", - "6 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", + "5 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", "\n", "2 []\n", "2 []\n", + "6 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", + "\n", + "1 []\n", + "1 []\n", "7 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", "\n", - "0 []\n", - "0 []\n", + "3 []\n", + "3 []\n", "8 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", "\n", - "2 []\n", - "2 []\n", + "1 []\n", + "1 []\n", "9 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", "\n", - "3 []\n", - "3 []\n", - "10 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", - "\n", "0 []\n", "0 []\n", + "10 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", + "\n", + "3 []\n", + "3 []\n", "11 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", "\n", - "0 []\n", - "0 []\n", + "3 []\n", + "3 []\n", "12 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", "\n", - "1 []\n", - "1 []\n", - "13 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", - "\n", "2 []\n", "2 []\n", + "13 [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]\n", + "\n", + "1 []\n", + "1 []\n", "idx_matchedPair [[0, 1, 2, 3, None, None, None, None, ... None, None, None, None, None, None, None]]\n", "matchedJet [[Jet, Jet, Jet, Jet], [Jet, Jet, Jet, Jet, ... Jet, Jet, Jet], [Jet, Jet, Jet, Jet]]\n", "[True, True, True, True, True, True, True, ... True, True, True, True, True, True]\n", - "deltaR [[1.72, 1.42, 2.78, 1.91, 2.57, 0.163, 0.682, ... 2.6, 1.99, 2.96, 2.12, 3.79, 1.46]]\n" + "deltaR [[3.13, 0.186, 1.56, 0.423, 1.68, 1.68, ... 0.0284, 2.37, 3.82, 2.03, 2.53, 2.15]]\n" ] } ], @@ -197,7 +195,126 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 44, + "id": "01499ba4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[0, 1, 2, 4], [0, 1, 2, 3]]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx_matchedPair[1:3].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "49c8f7be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[0, 5, 1, 7], [2, 0, 8, 5]]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx_matchedJet[1:3].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "00773b40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[0, 2, 1, 3], [1, 0, 3, 2]]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx_matchedParton[1:3].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "13f09baa", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<Array [[2, 4, 5, 1], [0, ... 2], [3, 2, 5, 0]] type='100 * var * ?int64'>" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx_matchedJet" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "9da3223d", + "metadata": {}, + "outputs": [], + "source": [ + "ak.num?" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "ec6b7cdd", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<JetArray [Jet, Jet, Jet, Jet, ... Jet, Jet, Jet] type='10 * jet'>" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "events.Jet[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "8d71d14d", "metadata": {}, "outputs": [], @@ -210,10 +327,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 17, "id": "250cd760", "metadata": { - "scrolled": true + "scrolled": false }, "outputs": [ { @@ -232,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "id": "21516f70", "metadata": { "scrolled": true @@ -279,7 +396,7 @@ " 8.262446403503418]" ] }, - "execution_count": 8, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -290,7 +407,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 19, "id": "5148db8f", "metadata": { "scrolled": true @@ -352,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 20, "id": "e935d2ec", "metadata": {}, "outputs": [ @@ -374,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 21, "id": "a8a4bb44", "metadata": { "scrolled": true @@ -421,7 +538,7 @@ " (1, 6)]" ] }, - "execution_count": 11, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -432,7 +549,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 22, "id": "668e8399", "metadata": {}, "outputs": [ @@ -445,7 +562,7 @@ " 1.165016770362854]" ] }, - "execution_count": 12, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -456,7 +573,26 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 54, + "id": "d1b1d94b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<Array [[2, 4, 5, 1], [0, ... 2], [3, 2, 5, 0]] type='100 * var * ?int64'>" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 23, "id": "0d72e50f", "metadata": { "scrolled": true @@ -503,7 +639,7 @@ " 8.262446403503418]" ] }, - "execution_count": 13, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -514,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 24, "id": "98aba475", "metadata": {}, "outputs": [ @@ -524,7 +660,7 @@ "<Array [[0, 1, 2, 3, 4, ... 40, 41, 42, 43]] type='100 * var * int64'>" ] }, - "execution_count": 14, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -535,7 +671,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 25, "id": "219989a0", "metadata": {}, "outputs": [ @@ -545,7 +681,7 @@ "<Array [16.9, 39.3, 66.4, 246] type='4 * ?float32'>" ] }, - "execution_count": 19, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -556,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 26, "id": "255106e7", "metadata": {}, "outputs": [ @@ -566,7 +702,7 @@ "<Array [246, 39.3, 66.4, 16.9] type='4 * ?float32'>" ] }, - "execution_count": 20, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -577,7 +713,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 27, "id": "4e49f074", "metadata": {}, "outputs": [ @@ -587,7 +723,7 @@ "<Array [0.0579, 0.0648, 0.241, 1.17] type='4 * ?float32'>" ] }, - "execution_count": 22, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -598,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 28, "id": "a39cacb2", "metadata": {}, "outputs": [ @@ -608,7 +744,7 @@ "<Array [1.58, 3.2, 1.9, -0.0397] type='4 * ?float32'>" ] }, - "execution_count": 24, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -619,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 29, "id": "6def3622", "metadata": {}, "outputs": [ @@ -629,7 +765,7 @@ "<Array [1.52, 3.24, 1.79, 1.04] type='4 * ?float32[parameters={\"__doc__\": \"eta\"}]'>" ] }, - "execution_count": 23, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -640,7 +776,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "id": "b1a11a16", "metadata": {}, "outputs": [ diff --git a/notebooks/save_vectors.ipynb b/notebooks/save_vectors.ipynb new file mode 100644 index 00000000..c0eb9b58 --- /dev/null +++ b/notebooks/save_vectors.ipynb @@ -0,0 +1,227 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "9c4b81ec", + "metadata": {}, + "outputs": [], + "source": [ + "from coffea.nanoevents import NanoEventsFactory, NanoAODSchema\n", + "from coffea import hist, processor\n", + "import numpy as np\n", + "import awkward as ak\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "658fcd8d", + "metadata": {}, + "outputs": [], + "source": [ + "filename = \"/pnfs/psi.ch/cms/trivcat/store/user/mmarcheg/RunIIFall17NanoAODv7/ttHTobb_M125_TuneCP5_13TeV-powheg-pythia8/587E2464-42CA-3A45-BD49-D23E49F658E6.root\"\n", + "#events = NanoEventsFactory.from_root(filename, schemaclass=NanoAODSchema).events()\n", + "events = NanoEventsFactory.from_root(filename, schemaclass=NanoAODSchema, entry_stop=100).events()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "020adf45", + "metadata": {}, + "outputs": [], + "source": [ + "def parton_matching(dr_min=0.5):\n", + " # Select b-quarks at LHE level\n", + " isOutgoing = events.LHEPart.status == 1\n", + " isB = abs(events.LHEPart.pdgId) == 5\n", + " bquarks = events.LHEPart[isB & isOutgoing]\n", + "\n", + " # Select b-quarks at Gen level, coming from H->bb decay\n", + " if dataset == 'ttHTobb':\n", + " isHiggs = events.GenPart.pdgId == 25\n", + " isHard = events.GenPart.hasFlags(['fromHardProcess'])\n", + " hasTwoChildren = ak.num(events.GenPart.childrenIdxG, axis=2) == 2\n", + " higgs = events.GenPart[isHiggs & isHard & hasTwoChildren]\n", + " bquarks = ak.concatenate( (bquarks, ak.flatten(higgs.children, axis=2)), axis=1 )\n", + " # Sort b-quarks by pt\n", + " bquarks = ak.with_name(bquarks[ak.argsort(bquarks.pt, ascending=False)], name='PtEtaPhiMCandidate')\n", + "\n", + " # Compute deltaR(b, jet) and save the nearest jet (deltaR matching)\n", + " deltaR = ak.flatten(bquarks.metric_table(events.Jet), axis=2)\n", + " \n", + " maskDR = deltaR<dr_min\n", + " deltaRcut = deltaR[maskDR]\n", + " idx_pairs_sorted = ak.argsort(deltaRcut, axis=1)\n", + " pairs = ak.argcartesian([bquarks, events.Jet])[maskDR]\n", + " pairs_sorted = pairs[idx_pairs_sorted]\n", + " idx_bquarks, idx_Jet = ak.unzip(pairs_sorted)\n", + "\n", + " hasMatch = ak.zeros_like(idx_Jet, dtype=bool)\n", + " Npairmax = ak.max(ak.num(idx_bquarks))\n", + " # Loop over the (parton, jet) pairs\n", + " for idx_pair in range(Npairmax):\n", + " idx_bquark = ak.pad_none(idx_bquarks, Npairmax)[:,idx_pair]\n", + " idx_match_candidates = idx_Jet[ak.fill_none( (idx_bquarks == idx_bquark) & ~hasMatch, False)]\n", + " idx_pair_candidates = ak.local_index(idx_Jet)[ak.fill_none( (idx_bquarks == idx_bquark) & ~hasMatch, False)]\n", + " \n", + " if idx_pair == 0:\n", + " idx_matchedJet = ak.unflatten( ak.firsts(idx_match_candidates), 1 )\n", + " idx_matchedParton = ak.unflatten( idx_bquark, 1 )\n", + " idx_matchedPair = ak.unflatten( ak.firsts(idx_pair_candidates), 1 )\n", + " else:\n", + " # If the partons are matched in all events or the number of jets is smaller than the number of partons, stop iterating\n", + " if ak.all( ( (ak.count(idx_matchedJet, axis=1) == ak.count(bquarks.pt, axis=1)) | (ak.count(events.Jet.pt, axis=1) < ak.count(bquarks.pt, axis=1) ) ) ): break\n", + " idx_matchedJet = ak.concatenate( (idx_matchedJet, ak.unflatten( ak.firsts(idx_match_candidates), 1 ) ), axis=1 )\n", + " idx_matchedParton = ak.concatenate( (idx_matchedParton, ak.unflatten( idx_bquark, 1 )), axis=1 )\n", + " idx_matchedPair = ak.concatenate( (idx_matchedPair, ak.unflatten( ak.firsts(idx_pair_candidates), 1 ) ), axis=1 )\n", + " # The mask `hasMatch` masks to False the \n", + " hasMatch = hasMatch | ak.fill_none(idx_Jet == ak.fill_none(ak.firsts(idx_match_candidates), -99), False) | ak.fill_none(idx_bquarks == idx_bquark, False)\n", + " \n", + " idx_matchedParton = idx_matchedParton[~ak.is_none(idx_matchedJet, axis=1)]\n", + " idx_matchedJet = idx_matchedJet[~ak.is_none(idx_matchedJet, axis=1)]\n", + " dr_matchedJet = deltaRcut[idx_pairs_sorted][~ak.is_none(idx_matchedPair, axis=1)]\n", + " idx_matchedPair = idx_matchedPair[~ak.is_none(idx_matchedPair, axis=1)]\n", + " matchedJet = events.Jet[idx_matchedJet]\n", + " matchedParton = bquarks[idx_matchedParton]\n", + " hasMatchedPartons = ak.count(idx_matchedParton, axis=1) == ak.count(bquarks.pt, axis=1)\n", + " #for cut in self._selections.keys():\n", + " # print(events.metadata[\"dataset\"], cut, \"matched partons =\", round(100*ak.sum(hasMatchedPartons[self._cuts.all(*self._selections[cut])])/ak.size(hasMatchedPartons[self._cuts.all(*self._selections[cut])]), 2), \"%\")\n", + " events[\"BQuark\"] = bquarks\n", + " events[\"JetMatched\"] = matchedJet\n", + " events[\"BQuarkMatched\"] = matchedParton\n", + " events[\"BQuarkMatched\"] = ak.with_field(events.BQuarkMatched, dr_matchedJet, \"drMatchedJet\")\n", + " return bquarks, matchedJet, idx_matchedJet, idx_matchedParton, idx_matchedPair, dr_matchedJet" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "ae658778", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = \"ttHTobb\"\n", + "bquarks, matchedJets, idx_matchedJet, idx_matchedParton, idx_matchedPair, dr_matchedJet = parton_matching(0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "5dd7ee4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<JetArray [[Jet, Jet, Jet, ... Jet, Jet, Jet]] type='100 * var * ?jet'>" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matchedJets" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "37a56b7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<Array [[0.026, 0.0406, ... 0.0284, 0.0342]] type='100 * var * ?float32'>" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dr_matchedJet" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "5db2cd44", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([175., 86., 34., 18., 15., 9., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0.]),\n", + " array([0. , 0.05128205, 0.1025641 , 0.15384615, 0.20512821,\n", + " 0.25641026, 0.30769231, 0.35897436, 0.41025641, 0.46153846,\n", + " 0.51282051, 0.56410256, 0.61538462, 0.66666667, 0.71794872,\n", + " 0.76923077, 0.82051282, 0.87179487, 0.92307692, 0.97435897,\n", + " 1.02564103, 1.07692308, 1.12820513, 1.17948718, 1.23076923,\n", + " 1.28205128, 1.33333333, 1.38461538, 1.43589744, 1.48717949,\n", + " 1.53846154, 1.58974359, 1.64102564, 1.69230769, 1.74358974,\n", + " 1.79487179, 1.84615385, 1.8974359 , 1.94871795, 2. ]),\n", + " <BarContainer object of 39 artists>)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAARNUlEQVR4nO3dfYxldX3H8fenoBgfAXekRFkXzKrBRhc7odaqxYdWHqpom1g21oLSrrTSaGzaoDRqTExpq7UxtphVCZDoiopUWrGVoi1tLeig67IoIOBa2azsCBa1Gir47R/3jB7GmZ07cx92+fl+JTdz7u93Hr785vCZs+ece26qCklSW35ufxcgSRo/w12SGmS4S1KDDHdJapDhLkkNOnh/FwCwbt262rBhw/4uQ5IeUK677rpvVdXMUn0HRLhv2LCBubm5/V2GJD2gJPn6cn2elpGkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNWjHck1yQZG+Snb22S5Js7167kmzv2jck+UGv7z0TrF2StIxh7nO/EHg3cPFCQ1X99sJ0kncAd/fmv7WqNo2pPknSGqwY7lV1dZINS/UlCfAy4HljrkuSNIJRP6H6bOCOqvpqr+3oJF8EvgP8WVX9+1ILJtkCbAFYv379SEVsOOcTy/btOu+UkdYtSQ9Eo15Q3Qxs673fA6yvquOA1wMfTPLIpRasqq1VNVtVszMzSz4aQZK0RmsO9yQHA78JXLLQVlX3VNWd3fR1wK3AE0ctUpK0OqMcub8AuLGqbl9oSDKT5KBu+hhgI3DbaCVKklZrmFshtwH/BTwpye1Jzuy6TuP+p2QAngPs6G6N/ChwVlXdNcZ6JUlDGOZumc3LtJ+xRNulwKWjlyVJGoWfUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoNWDPckFyTZm2Rnr+0tSXYn2d69Tu71vSHJLUluSvLCSRUuSVreMEfuFwInLtH+zqra1L2uAEhyLHAa8JRumb9LctC4ipUkDWfFcK+qq4G7hlzfqcCHquqeqvoacAtw/Aj1SZLWYJRz7mcn2dGdtjmsa3ss8I3ePLd3bT8lyZYkc0nm5ufnRyhDkrTYWsP9fOAJwCZgD/CO1a6gqrZW1WxVzc7MzKyxDEnSUtYU7lV1R1XdV1U/At7LT0697AaO6s36uK5NkjRFawr3JEf23r4UWLiT5nLgtCSHJDka2Ah8brQSJUmrdfBKMyTZBpwArEtyO/Bm4IQkm4ACdgGvBqiqG5J8GPgycC/wmqq6byKVS5KWtWK4V9XmJZrfv4/53wa8bZSiJEmj8ROqktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ1aMdyTXJBkb5Kdvba/SnJjkh1JLktyaNe+IckPkmzvXu+ZYO2SpGUMc+R+IXDiorYrgV+oqqcCNwNv6PXdWlWbutdZ4ylTkrQaK4Z7VV0N3LWo7VNVdW/39hrgcROoTZK0RuM45/4q4JO990cn+WKSf0vy7OUWSrIlyVySufn5+TGUIUlaMFK4JzkXuBf4QNe0B1hfVccBrwc+mOSRSy1bVVuraraqZmdmZkYpQ5K0yJrDPckZwG8AL6+qAqiqe6rqzm76OuBW4IljqFOStAprCvckJwJ/Cry4qr7fa59JclA3fQywEbhtHIVKkoZ38EozJNkGnACsS3I78GYGd8ccAlyZBOCa7s6Y5wBvTfJD4EfAWVV115IrliRNzIrhXlWbl2h+/zLzXgpcOmpRkqTR+AlVSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aKhwT3JBkr1JdvbaDk9yZZKvdj8P69qT5F1JbkmyI8nTJ1W8JGlpwx65XwicuKjtHOCqqtoIXNW9BzgJ2Ni9tgDnj16mJGk1hgr3qroauGtR86nARd30RcBLeu0X18A1wKFJjhxDrZKkIY1yzv2IqtrTTX8TOKKbfizwjd58t3dt95NkS5K5JHPz8/MjlCFJWmwsF1SrqoBa5TJbq2q2qmZnZmbGUYYkqTNKuN+xcLql+7m3a98NHNWb73FdmyRpSkYJ98uB07vp04GP99p/t7tr5hnA3b3TN5KkKTh4mJmSbANOANYluR14M3Ae8OEkZwJfB17WzX4FcDJwC/B94JVjrlmStIKhwr2qNi/T9fwl5i3gNaMUJUkajZ9QlaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGjTUs2UeyDac84l99u8675QpVSJJ0+ORuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWjNn1BN8iTgkl7TMcCbgEOB3wfmu/Y3VtUVa92OJGn11hzuVXUTsAkgyUHAbuAy4JXAO6vq7eMoUJK0euM6LfN84Naq+vqY1idJGsG4wv00YFvv/dlJdiS5IMlhSy2QZEuSuSRz8/PzS80iSVqjkcM9yYOBFwMf6ZrOB57A4JTNHuAdSy1XVVuraraqZmdmZkYtQ5LUM44j95OAL1TVHQBVdUdV3VdVPwLeCxw/hm1IklZhHOG+md4pmSRH9vpeCuwcwzYkSasw0pd1JHkY8GvAq3vNf5lkE1DArkV9kqQpGCncq+p/gUcvanvFSBVJkkbmJ1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgkb4gGyDJLuC7wH3AvVU1m+Rw4BJgA7ALeFlVfXvUbUmShjOuI/fnVtWmqprt3p8DXFVVG4GruveSpCmZ1GmZU4GLuumLgJdMaDuSpCWMI9wL+FSS65Js6dqOqKo93fQ3gSPGsB1J0pBGPucOPKuqdid5DHBlkhv7nVVVSWrxQt0fgi0A69evH0MZkqQFIx+5V9Xu7ude4DLgeOCOJEcCdD/3LrHc1qqararZmZmZUcuQJPWMFO5JHpbkEQvTwK8DO4HLgdO72U4HPj7KdiRJqzPqaZkjgMuSLKzrg1X1T0k+D3w4yZnA14GXjbgdSdIqjBTuVXUb8LQl2u8Enj/KuiVJa+cnVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg8bxNXsPaBvO+cQ++3edd8qUKpGk8fHIXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVozeGe5Kgkn0ny5SQ3JHlt1/6WJLuTbO9eJ4+vXEnSMEa5z/1e4I+r6gtJHgFcl+TKru+dVfX20cuTJK3FmsO9qvYAe7rp7yb5CvDYcRUmSVq7sZxzT7IBOA64tms6O8mOJBckOWyZZbYkmUsyNz8/P44yJEmdkcM9ycOBS4HXVdV3gPOBJwCbGBzZv2Op5apqa1XNVtXszMzMqGVIknpGCvckD2IQ7B+oqo8BVNUdVXVfVf0IeC9w/OhlSpJWY5S7ZQK8H/hKVf11r/3I3mwvBXauvTxJ0lqMcrfMrwCvAK5Psr1reyOwOckmoIBdwKtH2IYkaQ1GuVvmP4As0XXF2suRJI2Dn1CVpAYZ7pLUoJ/5b2Jaid/UJOmByCN3SWqQ4S5JDTLcJalBnnMfkefkJR2IPHKXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDfJWyAnb162S3iYpaVI8cpekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUETC/ckJya5KcktSc6Z1HYkST9tIuGe5CDgb4GTgGOBzUmOncS2JEk/bVJH7scDt1TVbVX1f8CHgFMntC1J0iKTevzAY4Fv9N7fDvxSf4YkW4At3dvvJblphO2tA741wvKTss+68hdTrOT+HpDjtR9Z1+pY1+qMUtfjl+vYb8+WqaqtwNZxrCvJXFXNjmNd42Rdq2Ndq2Ndq/OzVtekTsvsBo7qvX9c1yZJmoJJhfvngY1Jjk7yYOA04PIJbUuStMhETstU1b1Jzgb+GTgIuKCqbpjEtjpjOb0zAda1Ota1Ota1Oj9TdaWqJrFeSdJ+5CdUJalBhrskNeiADveVHmGQ5JAkl3T91ybZ0Ot7Q9d+U5IXTrmu1yf5cpIdSa5K8vhe331JtnevsV5kHqKuM5LM97b/e72+05N8tXudPuW63tmr6eYk/9Prm+R4XZBkb5Kdy/Qnybu6unckeXqvb5LjtVJdL+/quT7JZ5M8rde3q2vfnmRuynWdkOTu3u/rTb2+iT2OZIi6/qRX085unzq865vkeB2V5DNdFtyQ5LVLzDO5fayqDsgXgwuxtwLHAA8GvgQcu2iePwTe002fBlzSTR/bzX8IcHS3noOmWNdzgYd203+wUFf3/nv7cbzOAN69xLKHA7d1Pw/rpg+bVl2L5v8jBhfgJzpe3bqfAzwd2LlM/8nAJ4EAzwCunfR4DVnXMxe2x+ARH9f2+nYB6/bTeJ0A/OOo+8C461o074uAT09pvI4Ent5NPwK4eYn/Jye2jx3IR+7DPMLgVOCibvqjwPOTpGv/UFXdU1VfA27p1jeVuqrqM1X1/e7tNQzu85+0UR758ELgyqq6q6q+DVwJnLif6toMbBvTtvepqq4G7trHLKcCF9fANcChSY5ksuO1Yl1V9dluuzC9/WuY8VrORB9Hssq6prl/7amqL3TT3wW+wuDT+30T28cO5HBf6hEGiwfmx/NU1b3A3cCjh1x2knX1ncngL/OChySZS3JNkpeMqabV1PVb3T//Pppk4YNmB8R4daevjgY+3Wue1HgNY7naJzleq7V4/yrgU0muy+ARH9P2y0m+lOSTSZ7StR0Q45XkoQwC8tJe81TGK4NTxscB1y7qmtg+tt8eP/CzIMnvALPAr/aaH19Vu5McA3w6yfVVdeuUSvoHYFtV3ZPk1Qz+1fO8KW17GKcBH62q+3pt+3O8DmhJnssg3J/Va35WN16PAa5McmN3ZDsNX2Dw+/pekpOBvwc2Tmnbw3gR8J9V1T/Kn/h4JXk4gz8or6uq74xz3ftyIB+5D/MIgx/Pk+Rg4FHAnUMuO8m6SPIC4FzgxVV1z0J7Ve3uft4G/CuDv+ZTqauq7uzV8j7gF4dddpJ19ZzGon8yT3C8hrFc7fv98RpJnsrgd3hqVd250N4br73AZYzvdOSKquo7VfW9bvoK4EFJ1nEAjFdnX/vXRMYryYMYBPsHqupjS8wyuX1sEhcSxnQx4mAGFxGO5icXYZ6yaJ7XcP8Lqh/upp/C/S+o3sb4LqgOU9dxDC4gbVzUfhhwSDe9DvgqY7qwNGRdR/amXwpcUz+5ePO1rr7DuunDp1VXN9+TGVzcyjTGq7eNDSx/gfAU7n+x63OTHq8h61rP4DrSMxe1Pwx4RG/6s8CJU6zr5xd+fwxC8r+7sRtqH5hUXV3/oxicl3/YtMar+2+/GPibfcwzsX1sbIM7iReDK8k3MwjKc7u2tzI4GgZ4CPCRbkf/HHBMb9lzu+VuAk6acl3/AtwBbO9el3ftzwSu73bu64Ezp1zXnwM3dNv/DPDk3rKv6sbxFuCV06yre/8W4LxFy016vLYBe4AfMjineSZwFnBW1x8GXzpza7f92SmN10p1vQ/4dm//muvaj+nG6kvd7/ncKdd1dm//uobeH5+l9oFp1dXNcwaDmyz6y016vJ7F4Jz+jt7v6uRp7WM+fkCSGnQgn3OXJK2R4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIa9P8q80cbCficDAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(ak.flatten(dr_matchedJet), bins=np.linspace(0,2, 40))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67cfd405", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/workflows/mem.py b/workflows/mem.py index 2a0301e1..1737c0a5 100644 --- a/workflows/mem.py +++ b/workflows/mem.py @@ -28,11 +28,14 @@ class MEMStudiesProcessor(ttHbbBaseProcessor): # Compute deltaR(b, jet) and save the nearest jet (deltaR matching) deltaR = ak.flatten(bquarks.metric_table(self.events.JetGood), axis=2) - idx_pairs_sorted = ak.argsort(deltaR, axis=1) - pairs = ak.argcartesian([bquarks, self.events.JetGood]) + # keeping only the pairs with a deltaR min + maskDR = deltaR<dr_min + deltaRcut = deltaR[maskDR] + idx_pairs_sorted = ak.argsort(deltaRcut, axis=1) + pairs = ak.argcartesian([bquarks, events.Jet])[maskDR] pairs_sorted = pairs[idx_pairs_sorted] - idx_bquarks, idx_JetGood = ak.unzip(pairs_sorted) - + idx_bquarks, idx_Jet = ak.unzip(pairs_sorted) + hasMatch = ak.zeros_like(idx_JetGood, dtype=bool) Npairmax = ak.max(ak.num(idx_bquarks)) # Loop over the (parton, jet) pairs -- GitLab