From bcf2199fc9a7ff7c1d3468ccec0af5ad11ae1100 Mon Sep 17 00:00:00 2001
From: Daniel Estrada <daniel.estrada.acevedo@cern.ch>
Date: Wed, 26 Oct 2022 23:58:44 +0200
Subject: [PATCH 1/2] Speed-up DAC scan plotting routine

---
 gemos/analysis/dac_scan_analysis.py | 115 ++++++++++++++++------------
 1 file changed, 67 insertions(+), 48 deletions(-)

diff --git a/gemos/analysis/dac_scan_analysis.py b/gemos/analysis/dac_scan_analysis.py
index 22962a0..4e4334a 100644
--- a/gemos/analysis/dac_scan_analysis.py
+++ b/gemos/analysis/dac_scan_analysis.py
@@ -1,12 +1,21 @@
-"""DAC scan routines"""
+"""
+@file dac_scan_analysis.py
+@author Antonello Pellecchia (antonello.pallecchia@cern.ch)
+@brief DAC scan routines
+@date 2022-12-06                                     
+"""
+__author__ = "Antonello Pellecchia"
+__maintainer__ = ["Camilla Galloni, Laurent Petre", "Daniel Estrada"]
 
-from enum import Enum
-import os
 
+import os
 import matplotlib.pyplot as plt
+import gc
 import mplhep
-import numpy as np
 import pandas as pd
+from numpy import ceil, argmin
+from enum import Enum
+from multiprocessing import cpu_count, Pool
 
 from gemos.analysis.vfat_calibrations import add_parameters
 
@@ -96,7 +105,7 @@ def analyze_scan(input_df):
         nominal_value = NOMINAL_DAC_VALUES[dac_name][0]
 
         # Calculate chosen ADC value as closest to nominal DAC value
-        chosen_index = np.argmin(abs(adc_values - nominal_value))
+        chosen_index = argmin(abs(adc_values - nominal_value))
         chosen_dac = dac_values[chosen_index]
 
         # Issue warnings
@@ -129,57 +138,50 @@ def analyze_scan(input_df):
     return pd.DataFrame.from_records(output_list).astype(int, errors="ignore")
 
 
-def plot_scan(input_df, output_df, output_dir):
+def plot_scan(vfat_group, output_df, output_dir):
     """Plot all DAC rate scans"""
+    (fed, slot, oh, vfat), vfat_df = vfat_group
 
-    output_df = output_df.set_index(["fed", "slot", "oh", "vfat", "dac-name"]).sort_index()
-
-    vfat_group = input_df.groupby(["fed", "slot", "oh", "vfat"])
+    dac_group = vfat_df.groupby("dac-name")
 
-    for (fed, slot, oh, vfat), vfat_df in vfat_group:
-        dac_group = vfat_df.groupby("dac-name")
+    # Prepare figure for plotting
+    nrows, ncols = 3, int(ceil(len(dac_group) / 3))
+    dac_scan_fig, dac_scan_axs = plt.subplots(nrows, ncols, figsize=(55, 25))
+    dac_scan_axs = dac_scan_axs.flat
 
-        # Prepare figure for plotting
-        nrows, ncols = 3, int(np.ceil(len(dac_group) / 3))
-        dac_scan_fig, dac_scan_axs = plt.subplots(nrows, ncols, figsize=(55, 25))
-        dac_scan_axs = dac_scan_axs.flat
+    # Plot all DAC
+    for group_index, (dac_name, dac_df) in enumerate(dac_group):
+        # Plot DAC scan for current VFAT and register
+        dac_scan_axs[group_index].set_xlabel("DAC units")
+        dac_scan_axs[group_index].set_ylabel(f"{dac_name} ({NOMINAL_DAC_VALUES[dac_name][1]})")
 
-        # Plot all DAC
-        for group_index, (dac_name, dac_df) in enumerate(dac_group):
-            # Plot DAC scan for current VFAT and register
-            dac_scan_axs[group_index].set_xlabel("DAC units")
-            dac_scan_axs[group_index].set_ylabel(f"{dac_name} ({NOMINAL_DAC_VALUES[dac_name][1]})")
+        dac_scan_axs[group_index].plot(dac_df["dac-value"], dac_df["adc-value"], ".", color="black")
 
-            dac_scan_axs[group_index].plot(
-                dac_df["dac-value"], dac_df["adc-value"], ".", color="black"
-            )
-
-            # Draw line pointing to set DAC value
-            nominal_value = NOMINAL_DAC_VALUES[dac_name][0]
-            chosen_dac = output_df.loc[(fed, slot, oh, vfat, dac_name), "dac-value"]
+        # Draw line pointing to set DAC value
+        nominal_value = NOMINAL_DAC_VALUES[dac_name][0]
+        chosen_dac = output_df.loc[(fed, slot, oh, vfat, dac_name), "dac-value"]
 
-            dac_scan_axs[group_index].plot(
-                [chosen_dac, chosen_dac],
-                [min(dac_df["adc-value"]), nominal_value],
-                "--",
-                color="blue",
-            )
-            dac_scan_axs[group_index].plot(
-                [min(dac_df["dac-value"]), chosen_dac],
-                [nominal_value, nominal_value],
-                "--",
-                color="blue",
-            )
+        dac_scan_axs[group_index].plot(
+            [chosen_dac, chosen_dac],
+            [min(dac_df["adc-value"]), nominal_value],
+            "--",
+            color="blue",
+        )
+        dac_scan_axs[group_index].plot(
+            [min(dac_df["dac-value"]), chosen_dac],
+            [nominal_value, nominal_value],
+            "--",
+            color="blue",
+        )
 
-        # Save the figure
-        output_figure = "{}/fed{}-slot{}-oh{}-vfat{}.png".format(output_dir, fed, slot, oh, vfat)
-        dac_scan_fig.savefig(output_figure)
-        print("Scan plot saved to {}".format(output_figure))
+    # Save the figure
+    output_figure = "{}/fed{}-slot{}-oh{}-vfat{}.png".format(output_dir, fed, slot, oh, vfat)
+    dac_scan_fig.savefig(output_figure)
+    print("Scan plot saved to {}".format(output_figure))
 
-        # Delete the figure
-        # FIXME: Reuse the figure
-        plt.close(dac_scan_fig)
-        del dac_scan_fig
+    # Make sure to keep the memory usage reasonable
+    plt.close(dac_scan_fig)
+    gc.collect()
 
 
 def create_configuration(
@@ -225,6 +227,23 @@ def create_configuration(
 
         # Plot!
         os.makedirs("{}/plots".format(output_dir), exist_ok=True)
-        plot_scan(input_df, output_df, output_dir / "plots")
+
+        output_df = output_df.set_index(["fed", "slot", "oh", "vfat", "dac-name"]).sort_index()
+
+        pool = Pool(cpu_count())
+
+        for vfat_group in input_df.groupby(["fed", "slot", "oh", "vfat"]):
+            # one image per vfat
+            pool.apply_async(
+                plot_scan,
+                args=(
+                    vfat_group,
+                    output_df,
+                    output_dir / "plots",
+                ),
+            )
+
+        pool.close()
+        pool.join()
 
     return output_filename
-- 
GitLab


From 2d502bb51f5a9e70cd7e0a6e85a1a62a2e1a0666 Mon Sep 17 00:00:00 2001
From: Daniel Estrada <daniel.estrada.acevedo@cern.ch>
Date: Tue, 6 Dec 2022 23:21:08 +0100
Subject: [PATCH 2/2] Speed-up S-bit rate scan plotting routine

In addition to the parallelization of the plotting via the
multiprocessing module, the analysis routine itself has been optimized.
Indeed, it was possible to rewrite the analyze_scan function as an
helper function, avoiding the if-else statements.
---
 gemos/analysis/threshold_scan_analysis.py | 148 ++++++++++++----------
 1 file changed, 82 insertions(+), 66 deletions(-)

diff --git a/gemos/analysis/threshold_scan_analysis.py b/gemos/analysis/threshold_scan_analysis.py
index 74f1c19..771b082 100644
--- a/gemos/analysis/threshold_scan_analysis.py
+++ b/gemos/analysis/threshold_scan_analysis.py
@@ -1,71 +1,61 @@
-"""S-bit rate scan routines"""
+"""
+@file dac_scan_analysis.py
+@author Antonello Pellecchia (antonello.pallecchia@cern.ch)
+@brief S-bit rate scan routines
+@date 2022-12-06                                     
+"""
+__author__ = "Antonello Pellecchia"
+__maintainer__ = ["Laurent Petre", "Daniel Estrada"]
 
-import os
 
-import numpy as np
+import os
+import gc
 import pandas as pd
+from multiprocessing import Pool, cpu_count
 import matplotlib.pyplot as plt
 import mplhep
+from numpy import ceil
+from multiprocessing import Pool, cpu_count
 
 plt.style.use(mplhep.style.CMS)
 
 
-def create_configuration(input_filenames, output_directory, target_rate, plotting=True):
-    def analyze_scan(group):
-        """Analyze the threshold scan for a given VFAT"""
-        (fed, slot, oh, vfat), df = group
-
-        # Set threshold as first value for which rate <= target_rate, if it exists
-        df_filtered = df[df["rate"] <= target_rate]
-        if len(df_filtered) != 0:
-            threshold_arm_dac = df_filtered.iloc[0]["dac-value"]
-        else:
-            threshold_arm_dac = df.iloc[-1]["dac-value"]
-
-        vfat_dict = dict(
-            [(key, [df.iloc[0][key]]) for key in ["fed", "slot", "oh", "vfat"]]
-            + [("threshold", [threshold_arm_dac])]
-        )
-        vfat_df = pd.DataFrame(vfat_dict)
-        return vfat_df
-
-    def plot_scan_oh(df):
-        """Plot threshold scans for all VFATs in single OH"""
-
-        def plot_scan_vfat(vfat, df, ax, time_interval):
-            """Plot threshold scan for single VFAT"""
-            ax.plot(df["dac-value"], df["rate"], "o")
-            # The axis limit is chosen to be twice the maximum rate aquired in the time interval of the scan
-            ax.set_ylim(1, time_interval * 40080000 * 2)
-            ax.set_xlim(0, 255)
-            ax.title.set_text(f"VFAT {vfat}")
-            ax.set_xlabel("THR_ARM_DAC")
-            ax.set_ylabel("S-bt rate (Hz)")
-            ax.set_yscale("log")
-
-        fed = df.iloc[0]["fed"]
-        slot = df.iloc[0]["slot"]
-        oh = df.iloc[0]["oh"]
-
-        # The s-bit counters increase of maximum 1 per each bunch crossing;
-        # the bunch crissing rate in LHC is 40.07897 MHz approximated to 4008000 MHz
-        # giving a possible increase in the s-bit counters of maximum 4008000 per second.
-        # The time interval used for the scan can be estimated in second as:
-        time_interval = (df["rate"].max() // 40080000) + 1
-
-        vfat_group = df.groupby(["vfat"])
-
-        nrows, ncols = 3, int(np.ceil(len(vfat_group) / 3))  # Round-up the number of columns
-        thr_scan_fig, thr_scan_axs = plt.subplots(nrows, ncols, figsize=(ncols * 10, nrows * 10))
-        thr_scan_axs = thr_scan_axs.flat
-
-        for group_index, (vfat_index, vfat_df) in enumerate(vfat_group):
-            plot_scan_vfat(vfat_index, vfat_df, thr_scan_axs[group_index], time_interval)
-
-        output_figure = "{}/plots/fed{}-slot{}-oh{}.png".format(output_directory, fed, slot, oh)
-        thr_scan_fig.savefig(output_figure)
-        print("Scan plot saved to {}".format(output_figure))
+def plot_scan_oh(group, output_directory):
+    """Plot threshold scans for all VFATs in single OH"""
+    (fed, slot, oh), df = group
+
+    # The S-bit counters increase of maximum 1 per each bunch crossing;
+    # the bunch crossing rate in LHC is 40.07897 MHz approximated to 4008000 MHz
+    # giving a possible increase in the s-bit counters of maximum 4008000 per second.
+    # The time interval used for the scan can be estimated in second as:
+    time_interval = (df["rate"].max() // 40080000) + 1
+
+    nrows, ncols = 3, int(ceil(df["vfat"].max() / 3))  # Round-up the number of columns
+    fig, axs = plt.subplots(nrows, ncols, figsize=(ncols * 10, nrows * 10))
+    axs = axs.flat
+
+    for vfat, vfat_df in df.groupby("vfat"):
+        """Plot threshold scan for single VFAT"""
+        axs[vfat].semilogy(vfat_df["dac-value"], vfat_df["rate"], "o")
+        # The axis limit is chosen to be twice the maximum rate aquired in the time interval of the scan
+        axs[vfat].set_ylim(1, time_interval * 40080000 * 2)
+        axs[vfat].set_xlim(0, 255)
+        axs[vfat].title.set_text(f"VFAT {vfat}")
+        axs[vfat].set_xlabel("THR_ARM_DAC")
+        axs[vfat].set_ylabel("S-bt rate (Hz)")
 
+    plt.tight_layout()
+
+    output_figure = "{}/plots/fed{}-slot{}-oh{}.png".format(output_directory, fed, slot, oh)
+    fig.savefig(output_figure)
+    print("Scan plot saved to {}".format(output_figure))
+
+    # Make sure to keep the memory usage reasonable
+    plt.close(fig)
+    gc.collect()
+
+
+def create_configuration(input_filenames, output_directory, target_rate, plotting=True):
     # Load the input files
     df = pd.concat((pd.read_csv(f, sep=";", dtype=int) for f in input_filenames), ignore_index=True)
 
@@ -78,20 +68,46 @@ def create_configuration(input_filenames, output_directory, target_rate, plottin
         .reset_index()
     )
 
-    # Create one file for all VFATs
-    threshold_df = pd.concat(
-        (analyze_scan(group) for group in df.groupby(["fed", "slot", "oh", "vfat"]))
-    )
+    # Compute the threshold values
+    condition = df["rate"].le(target_rate)
 
+    def analyze_scan(df):
+        """Helper function to get the threshold value for a given VFAT"""
+        try:  # Set threshold as first value for which rate <= target_rate, if it exists
+            threshold_arm_dac = df.loc[condition].iloc[0]
+        except IndexError:  # Use the highest value scanned, if the condition is never met
+            threshold_arm_dac = df.iloc[-1]
+
+        return threshold_arm_dac
+
+    df["threshold"] = df.groupby(["fed", "slot", "oh", "vfat"])["dac-value"].transform(analyze_scan)
+
+    # Save thresholds
     os.makedirs(output_directory, exist_ok=True)
+    output_filename = output_directory / "threshold-values.dat"
+
+    df.drop_duplicates(["fed", "slot", "oh", "vfat"]).loc[
+        :, ["fed", "slot", "oh", "vfat", "threshold"]
+    ].to_csv(output_filename, sep=";", index=False)
+
+    print("Output threshold file written to {}".format(output_filename))
 
     # Plot threshold scans for all VFATs in same OH in one canvas
     if plotting:
         os.makedirs(output_directory / "plots", exist_ok=True)
-        df.groupby(["fed", "slot", "oh"]).apply(plot_scan_oh)
 
-    output_filename = output_directory / "threshold-values.dat"
-    threshold_df.to_csv(output_filename, sep=";", index=False)
+        pool = Pool(cpu_count())
+
+        for group in df.groupby(["fed", "slot", "oh"]):
+            pool.apply_async(
+                plot_scan_oh,
+                args=(
+                    group,
+                    output_directory,
+                ),
+            )
+
+        pool.close()
+        pool.join()
 
-    print("Output threshold file written to {}".format(output_filename))
     return output_filename
-- 
GitLab