Change basic example and add test sample

e7d6d4a2 · Massimiliano Galli · 0909044b · e7d6d4a2 · e7d6d4a2 · e7d6d4a2
Commit e7d6d4a2 authored 3 years ago by Massimiliano Galli
--- a/notebooks/basics.ipynb
+++ b/notebooks/basics.ipynb
@@ -11,7 +11,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -25,20 +25,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "fileset = {\n",
-    "    \"DYJetsToLL_0J_TuneCP5_13TeV-amcatnloFXFX-pythia8\": [\n",
-    "        \"root://cms-xrd-global.cern.ch//store/user/gallim/HggNanoDY_10_6_26-das/DYJetsToLL_0J_TuneCP5_13TeV-amcatnloFXFX-pythia8/crab_HggNANO_UL17_DY-das/220126_135531/0000/DY-RunIISummer19UL17NanoAODv2_1-17.root\"\n",
+    "    \"DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8\": [\n",
+    "        \"samples/skimmed_nano/DYJetsToLL--UL17_v6-v2_109_Skim.root\"\n",
    "    ]\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -48,7 +48,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -62,9 +62,48 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "71312b6f136a462b8583d211ce582a68",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Preprocessing:   0%|          | 0/1 [00:00<?, ?file/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e712d79bc9a642e8b731f4c56ac3673d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Processing:   0%|          | 0/1 [00:00<?, ?chunk/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:216: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`\n",
+      "  output[f\"{prefix}_{subfield}\"] = awkward.to_numpy(\n",
+      "/work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:220: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`\n",
+      "  output[field] = awkward.to_numpy(diphotons[field])\n"
+     ]
+    }
+   ],
   "source": [
    "iterative_run = processor.Runner(\n",
    "    executor = processor.IterativeExecutor(compression=None),\n",

 %% Cell type:markdown id: tags:

 # Basic Example

 This short notebook shows how to get started with HiggsDNA and [Coffea](https://github.com/CoffeaTeam/coffea).

 %% Cell type:code id: tags:

 ``` python
 from higgs_dna.utils.logger_utils import setup_logger
 from higgs_dna.workflows import DYStudiesProcessor

 from coffea import processor
 import json
 from importlib import resources
 ```

 %% Cell type:code id: tags:

 ``` python
 fileset = {
-    "DYJetsToLL_0J_TuneCP5_13TeV-amcatnloFXFX-pythia8": [
-        "root://cms-xrd-global.cern.ch//store/user/gallim/HggNanoDY_10_6_26-das/DYJetsToLL_0J_TuneCP5_13TeV-amcatnloFXFX-pythia8/crab_HggNANO_UL17_DY-das/220126_135531/0000/DY-RunIISummer19UL17NanoAODv2_1-17.root"
+    "DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8": [
+        "samples/skimmed_nano/DYJetsToLL--UL17_v6-v2_109_Skim.root"
    ]
 }
 ```

 %% Cell type:code id: tags:

 ``` python
 with resources.open_text("higgs_dna.metaconditions", "Era2017_legacy_xgb_v1.json") as f:
    metaconditions = json.load(f)
 ```

 %% Cell type:code id: tags:

 ``` python
 processor_instance = DYStudiesProcessor(
    metaconditions=metaconditions,
    do_systematics=False,
    apply_trigger=True,
    output_location="output/basics"
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 iterative_run = processor.Runner(
    executor = processor.IterativeExecutor(compression=None),
    schema=processor.NanoAODSchema,
 )

 out = iterative_run(
    fileset,
    treename="Events",
    processor_instance=processor_instance,
 )
 ```
+
+%% Output
+
+
+
+    /work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:216: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
+      output[f"{prefix}_{subfield}"] = awkward.to_numpy(
+    /work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:220: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
+      output[field] = awkward.to_numpy(diphotons[field])

--- a/notebooks/samples
+++ b/notebooks/samples
+../tests/samples
\ No newline at end of file
--- a/tests/samples/skimmed_nano/DYJetsToLL--UL17_v6-v2_109_Skim.root
+++ b/tests/samples/skimmed_nano/DYJetsToLL--UL17_v6-v2_109_Skim.root
--- a/tests/samples/skimmed_nano/README.md
+++ b/tests/samples/skimmed_nano/README.md
@@ -9,3 +9,13 @@ using nanoAOD-tools with the following command:
 ```
 python scripts/nano_postproc.py output_dir input_nanoAOD.root -N 100
 ```
+
+**DYJetsToLL--UL17_v6-v2_109_Skim.root**
+
+Produced with
+
+```
+python scripts/nano_postproc.py /work/gallim/devel/HiggsDNA/tests/samples/skimmed_nano /pnfs/psi.ch/cms/trivcat/store/user/gallim/HggNano_UL17_DY-TnPCompare/DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8/crab_HggNANO_UL17_DY-TnPCompare/220202_072553/0000/DYJetsToLL--UL17_v6-v2_109.root -N 100
+```
+
+the input file is also available on DAS.
--- a/tests/samples/skimmed_nano/ttH_M125_2017.root
+++ b/tests/samples/skimmed_nano/ttH_M125_2017.root