From 2fca4e4996ce889fff32e028f2f5f17b3cba6584 Mon Sep 17 00:00:00 2001 From: Camilla Galloni Date: Wed, 7 Sep 2022 14:41:20 +0200 Subject: [PATCH 1/5] Create a cluster mask rate scan analysis routine --- gemos/analysis/cluster_scan_analysis.py | 90 +++++++++++++++++++++++++ gemos/cli.py | 28 ++++++++ 2 files changed, 118 insertions(+) create mode 100644 gemos/analysis/cluster_scan_analysis.py diff --git a/gemos/analysis/cluster_scan_analysis.py b/gemos/analysis/cluster_scan_analysis.py new file mode 100644 index 0000000..4fa5f1c --- /dev/null +++ b/gemos/analysis/cluster_scan_analysis.py @@ -0,0 +1,90 @@ +"""Cluster mask rate scan routines""" +import os + +import pandas as pd +import matplotlib.pyplot as plt +import mplhep + +plt.style.use(mplhep.style.CMS) + +SIGMA = 2 + + +def analyze_scan(input_filenames, output_directory, plotting=True): + def plot_scan_oh(df, df_condition): + """ + Plot cluster rates for single OptoHybrid: masked, unmasked, + and the delays for which there is a gain. + """ + nrows, ncols = 1, 1 + df.reset_index() + + fig, ax = plt.subplots(nrows, ncols, figsize=(55, 25)) + + def plot_scan(df, ax, pos=0): + df.reset_index() + ax.plot(df["delay"], df["cluster-rate-masked"], "o", label="Masked", markersize=20) + ax.plot(df["delay"], df["cluster-rate-unmasked"], "x", label="Unmasked", markersize=20) + ax.plot( + df_condition["delay"], + df_condition["cluster-rate-unmasked"], + "x", + label="Unmasked (Masked-Unmasked > {}sigma)".format(SIGMA), + markersize=20, + color="green", + ) + + # Style the plot with legend and limits on the axes + plt.legend(loc="upper left") + top = df.loc[df["delay"] == -1]["cluster-rate-masked"].values[0] * 1.5 + ax.set_ylim(bottom=0, top=top) + ax.set_xlim(-3, 33) + ax.set_xlabel("Delay (BX)") + ax.set_ylabel("Cluster rate (Hz)") + + fed = df.iloc[0]["fed"] + slot = df.iloc[0]["slot"] + oh = df.iloc[0]["oh"] + + output_figure = "{}/plots/fed{}-slot{}-oh{}.png".format(output_directory, fed, slot, oh) + plot_scan(df, ax) + fig.savefig(output_figure) + print("Scan plot saved to {}".format(output_figure)) + + # Load the input files + df = pd.concat((pd.read_csv(f, sep=";", dtype=int) for f in input_filenames), ignore_index=True) + + # Calculate the delays value for which the masking decreases the cluster rate of more than + # {SIGMA} sigma. Binomial statistics is assumed: + # * In absence of flower events a mask of width 1 BX can decrease the rate of p = 1/l1a-period; + # * The total number of events N are from the cluster rate unmasked; + # * The sigma = sqrt ( p N (1-p) ). + # If the cluster rate masked deviates more than {SIGMA}sigma from the unmasked cluster rate, + # then it means that the masking is effective in reducing the flower events. + df_l1a_rate = df[df["l1a-period"] != 0] + df_condition = df_l1a_rate[ + (df_l1a_rate["cluster-rate-unmasked"] - df_l1a_rate["cluster-rate-masked"]) ** 2 + > SIGMA ** 2 + * df_l1a_rate["cluster-rate-unmasked"] + / df_l1a_rate["l1a-period"] + * (1 - 1 / df_l1a_rate["l1a-period"]) + ] + + # Save output file of analysis with delays that satisfy the condition + os.makedirs(output_directory, exist_ok=True) + output_filename = output_directory / "cluster-values.dat" + df_condition.to_csv(output_filename, sep=";", index=False) + + # Save cluster rate plots if desired + if plotting: + os.makedirs(output_directory / "plots", exist_ok=True) + + for (fed, slot, oh), df_oh in df.groupby(["fed", "slot", "oh"]): + df_oh_condition = df_condition[ + (df_condition["fed"] == fed) + & (df_condition["slot"] == slot) + & (df_condition["oh"] == oh) + ] + plot_scan_oh(df_oh, df_oh_condition) + + return output_filename diff --git a/gemos/cli.py b/gemos/cli.py index be6f02f..0de6573 100644 --- a/gemos/cli.py +++ b/gemos/cli.py @@ -6,6 +6,7 @@ import pathlib from gemos.analysis import dac_scan_analysis from gemos.analysis import gbt_phase_scan from gemos.analysis import threshold_scan_analysis +from gemos.analysis import cluster_scan_analysis from gemos.analysis import vfat_calibrations from gemos.analysis import vfat_parameters @@ -98,6 +99,33 @@ def main(): ) ) + # "analyze cluster" subcommand + analyze_cluster_parser = analyze_subparsers.add_parser("cluster") + analyze_cluster_parser.add_argument( + "-p", + "--plotting", + action="store_true", + help="Specify in case cluster mask rate plots are desired", + ) + analyze_cluster_parser.add_argument( + "inputfiles", + type=pathlib.Path, + nargs="+", + help="Files containing the S-bit rate scan results", + ) + analyze_cluster_parser.add_argument( + "outputdir", + type=pathlib.Path, + help="Output directory in which to store the optimal cluster masks resulting from the analysis", + ) + analyze_cluster_parser.set_defaults( + func=lambda args: cluster_scan_analysis.analyze_scan( + input_filenames=args.inputfiles, + output_directory=args.outputdir, + plotting=args.plotting, + ) + ) + # "create-config" command create_config_parser = subparsers.add_parser("create-config") create_config_subparsers = create_config_parser.add_subparsers(required=True, dest="subcommand") -- GitLab From d20a0a39204e584ae70fa1e5fc4aa41e8eebc1f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurent=20P=C3=A9tr=C3=A9?= Date: Wed, 7 Sep 2022 17:52:46 +0200 Subject: [PATCH 2/5] Implement plotting multiprocessing in cluster mask scan analysis The mp.Pool methods send tasks to the workers by pickling them. Only top-level defined functions are pickable. Implement the following changes: * Move analyze_scan.plot_scan_oh() to _plot_scan_oh() * Print out the error messages, if any. Any exception thrown in a worker thread was previously discarded * Close the PyPlot figure and trigger the Python garbage collector. This aims at minimizig the plotting memory usage --- gemos/analysis/cluster_scan_analysis.py | 95 +++++++++++++++---------- 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/gemos/analysis/cluster_scan_analysis.py b/gemos/analysis/cluster_scan_analysis.py index 4fa5f1c..f641b59 100644 --- a/gemos/analysis/cluster_scan_analysis.py +++ b/gemos/analysis/cluster_scan_analysis.py @@ -1,5 +1,7 @@ """Cluster mask rate scan routines""" +import gc import os +from multiprocessing import cpu_count, Pool import pandas as pd import matplotlib.pyplot as plt @@ -10,47 +12,52 @@ plt.style.use(mplhep.style.CMS) SIGMA = 2 -def analyze_scan(input_filenames, output_directory, plotting=True): - def plot_scan_oh(df, df_condition): - """ - Plot cluster rates for single OptoHybrid: masked, unmasked, - and the delays for which there is a gain. - """ - nrows, ncols = 1, 1 - df.reset_index() +def _plot_scan_oh(output_directory, df, df_condition): + """ + Plot cluster rates for single OptoHybrid: masked, unmasked, + and the delays for which there is a gain. + """ + nrows, ncols = 1, 1 + df.reset_index() - fig, ax = plt.subplots(nrows, ncols, figsize=(55, 25)) - - def plot_scan(df, ax, pos=0): - df.reset_index() - ax.plot(df["delay"], df["cluster-rate-masked"], "o", label="Masked", markersize=20) - ax.plot(df["delay"], df["cluster-rate-unmasked"], "x", label="Unmasked", markersize=20) - ax.plot( - df_condition["delay"], - df_condition["cluster-rate-unmasked"], - "x", - label="Unmasked (Masked-Unmasked > {}sigma)".format(SIGMA), - markersize=20, - color="green", - ) + fig, ax = plt.subplots(nrows, ncols, figsize=(55, 25)) - # Style the plot with legend and limits on the axes - plt.legend(loc="upper left") - top = df.loc[df["delay"] == -1]["cluster-rate-masked"].values[0] * 1.5 - ax.set_ylim(bottom=0, top=top) - ax.set_xlim(-3, 33) - ax.set_xlabel("Delay (BX)") - ax.set_ylabel("Cluster rate (Hz)") - - fed = df.iloc[0]["fed"] - slot = df.iloc[0]["slot"] - oh = df.iloc[0]["oh"] + def plot_scan(df, ax): + df.reset_index() + ax.plot(df["delay"], df["cluster-rate-masked"], "o", label="Masked", markersize=20) + ax.plot(df["delay"], df["cluster-rate-unmasked"], "x", label="Unmasked", markersize=20) + ax.plot( + df_condition["delay"], + df_condition["cluster-rate-unmasked"], + "x", + label="Unmasked (Masked-Unmasked > {}sigma)".format(SIGMA), + markersize=20, + color="green", + ) + + # Style the plot with legend and limits on the axes + plt.legend(loc="upper left") + top = df.loc[df["delay"] == -1]["cluster-rate-masked"].values[0] * 1.5 + ax.set_ylim(bottom=0, top=top) + ax.set_xlim(-3, 33) + ax.set_xlabel("Delay (BX)") + ax.set_ylabel("Cluster rate (Hz)") + + fed = df.iloc[0]["fed"] + slot = df.iloc[0]["slot"] + oh = df.iloc[0]["oh"] + + output_figure = "{}/fed{}-slot{}-oh{}.png".format(output_directory, fed, slot, oh) + plot_scan(df, ax) + fig.savefig(output_figure) + print("Scan plot saved to {}".format(output_figure)) + + # Make sure to keep the memory usage reasonable + plt.close(fig) + gc.collect() - output_figure = "{}/plots/fed{}-slot{}-oh{}.png".format(output_directory, fed, slot, oh) - plot_scan(df, ax) - fig.savefig(output_figure) - print("Scan plot saved to {}".format(output_figure)) +def analyze_scan(input_filenames, output_directory, plotting=True): # Load the input files df = pd.concat((pd.read_csv(f, sep=";", dtype=int) for f in input_filenames), ignore_index=True) @@ -79,12 +86,24 @@ def analyze_scan(input_filenames, output_directory, plotting=True): if plotting: os.makedirs(output_directory / "plots", exist_ok=True) + pool = Pool(cpu_count()) + for (fed, slot, oh), df_oh in df.groupby(["fed", "slot", "oh"]): df_oh_condition = df_condition[ (df_condition["fed"] == fed) & (df_condition["slot"] == slot) & (df_condition["oh"] == oh) ] - plot_scan_oh(df_oh, df_oh_condition) + + pool.apply_async( + _plot_scan_oh, + args=(output_directory / "plots", df_oh, df_oh_condition), + error_callback=lambda e: print( + "Error: " + repr(e) # Just display the error for now + ), + ) + + pool.close() + pool.join() return output_filename -- GitLab From 8e5365b8a63e6fefcf2160c934998ccabc540725 Mon Sep 17 00:00:00 2001 From: Camilla Galloni Date: Fri, 23 Sep 2022 18:12:41 +0200 Subject: [PATCH 3/5] Add routine to create OH config from cluster scan results --- gemos/analysis/oh_parameters.py | 105 ++++++++++++++++++++++++++++++++ gemos/cli.py | 30 +++++++++ 2 files changed, 135 insertions(+) create mode 100644 gemos/analysis/oh_parameters.py diff --git a/gemos/analysis/oh_parameters.py b/gemos/analysis/oh_parameters.py new file mode 100644 index 0000000..2360fa7 --- /dev/null +++ b/gemos/analysis/oh_parameters.py @@ -0,0 +1,105 @@ +"""OptoHybrid configuration file routines""" + +import os +import pandas as pd + + +def create_configuration( + input_directory, + output_directory, + cluster_mask_filenames, +): + def write_configuration(df): + """Write the configuration file for a given OptoHybrid""" + + fed = df.iloc[0]["fed"] + slot = df.iloc[0]["slot"] + oh = df.iloc[0]["oh"] + + input_cfg_path = "{}/fed{}-slot{}/config-oh{}.cfg".format(input_directory, fed, slot, oh) + output_cfg_path = "{}/fed{}-slot{}/config-oh{}.cfg".format(output_directory, fed, slot, oh) + + # Read OH configuration file + if os.path.isfile(input_cfg_path): + with open(input_cfg_path, "r") as input_cfg_file: + input_cfg_lines = input_cfg_file.readlines() + else: + print("Using empty default input configuration for ({}, {}, {})".format(fed, slot, oh)) + input_cfg_lines = list() + + output_cfg_lines = list() + + # Write lines contained in DataFrame to OH cfg file + for line in input_cfg_lines: + # Skip empty lines and comments: + if line[0] == "\n" or line[0] == "#": + pass + else: + + config_pair = line.split(" ") + config_parameter = config_pair[0] + + # Filter DataFrame to get values for current parameter: + df_parameter = df[(df["parameter"] == config_parameter)] + + n_parameters = len(df_parameter) + # If no rows in DataFrame, leave line as in default config file + if n_parameters > 0: + if n_parameters > 1: + # Warn user if there are multiple config instances for same parameters on same OH + print( + "Warning: duplicate parameter {} for fed {}, slot {}, oh{}. Using last specified values".format( + config_parameter, fed, slot, oh + ) + ) + # Update config file line with last parameter value in DataFrame: + config_pair[1] = str(df_parameter.iloc[-1]["value"]) + "\n" + line = " ".join(config_pair) + + output_cfg_lines.append(line) + + # Write the configuration file + os.makedirs(os.path.dirname(output_cfg_path), exist_ok=True) + with open(output_cfg_path, "w") as output_cfg_file: + output_cfg_file.writelines(output_cfg_lines) + return output_cfg_path + + # Sanitize input + if not output_directory: + output_directory = input_directory + + # Parse the parameters files + parameter_dataframes = list() # Append here all DataFrames to be later concatenated + + if cluster_mask_filenames: + df_cluster = pd.concat( + (pd.read_csv(f, sep=";") for f in cluster_mask_filenames), ignore_index=True + ) + + # Keep only needed parameters + df_cluster = df_cluster[["fed", "slot", "oh", "delay"]] + + # Compute the value for the register given the delays that need to be masked + df_mask = pd.DataFrame(columns=["fed", "slot", "oh", "parameter", "value"]) + for (fed, slot, oh), df in df_cluster.groupby(["fed", "slot", "oh"]): + mask = 0 + for delay in df["delay"].values: + mask += 2 ** delay + df_mask.loc[len(df_mask.index)] = [ + fed, + slot, + oh, + "TRIG.L1A_MASK.L1A_MASK_BITMASK", + mask, + ] + + parameter_dataframes.append(df_mask) + + df_oh_parameters = pd.concat(parameter_dataframes, ignore_index=True) + + # Update the configuration files for each OH present in the parameters DataFrame + groups = df_oh_parameters.groupby(["fed", "slot", "oh"]) + output_filenames = groups.apply(write_configuration) + + print("OptoHybrid configuration files written in {}".format(output_directory)) + return output_filenames diff --git a/gemos/cli.py b/gemos/cli.py index 0de6573..e5825d0 100644 --- a/gemos/cli.py +++ b/gemos/cli.py @@ -9,6 +9,7 @@ from gemos.analysis import threshold_scan_analysis from gemos.analysis import cluster_scan_analysis from gemos.analysis import vfat_calibrations from gemos.analysis import vfat_parameters +from gemos.analysis import oh_parameters def main(): @@ -147,6 +148,35 @@ def main(): func=lambda args: gbt_phase_scan.create_configuration(args.inputfiles, args.outputdir) ) + # "create-config oh" subcommand + create_config_oh_parser = create_config_subparsers.add_parser("oh") + create_config_oh_parser.add_argument( + "-c", + "--cluster-mask", + dest="cluster_mask_files", + type=pathlib.Path, + action="append", + help="File containing the delays for the cluster bit masking", + ) + create_config_oh_parser.add_argument( + "inputdir", + type=pathlib.Path, + help="Input directory from which to read the OH configuration files", + ) + create_config_oh_parser.add_argument( + "outputdir", + type=pathlib.Path, + nargs="?", + help="Output directory in which to store the OH configuration files (defaults to )", + ) + create_config_oh_parser.set_defaults( + func=lambda args: oh_parameters.create_configuration( + input_directory=args.inputdir, + output_directory=args.outputdir, + cluster_mask_filenames=args.cluster_mask_files, + ) + ) + # "create-config vfat" subcommand create_config_vfat_parser = create_config_subparsers.add_parser("vfat") create_config_vfat_parser.add_argument( -- GitLab From 3a45cb67483b93fd6ea6c9f017f1e48d03e22439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurent=20P=C3=A9tr=C3=A9?= Date: Thu, 27 Oct 2022 13:19:10 +0200 Subject: [PATCH 4/5] Automatically add new paramters to config files --- gemos/analysis/oh_parameters.py | 9 +++++++++ gemos/analysis/vfat_parameters.py | 13 +++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/gemos/analysis/oh_parameters.py b/gemos/analysis/oh_parameters.py index 2360fa7..ab29d15 100644 --- a/gemos/analysis/oh_parameters.py +++ b/gemos/analysis/oh_parameters.py @@ -52,12 +52,21 @@ def create_configuration( config_parameter, fed, slot, oh ) ) + # Update config file line with last parameter value in DataFrame: config_pair[1] = str(df_parameter.iloc[-1]["value"]) + "\n" line = " ".join(config_pair) + # Remove used parameters + df.drop(df_parameter.index, inplace=True) + output_cfg_lines.append(line) + # Append new configuration parameters + df.drop_duplicates("parameter", keep="last", inplace=True) + for _, row in df.iterrows(): + output_cfg_lines.append("{} {}\n".format(row["parameter"], row["value"])) + # Write the configuration file os.makedirs(os.path.dirname(output_cfg_path), exist_ok=True) with open(output_cfg_path, "w") as output_cfg_file: diff --git a/gemos/analysis/vfat_parameters.py b/gemos/analysis/vfat_parameters.py index 064a7e4..220db76 100644 --- a/gemos/analysis/vfat_parameters.py +++ b/gemos/analysis/vfat_parameters.py @@ -1,4 +1,4 @@ -"""VFAT config file routines""" +"""VFAT configuration file routines""" import os import pandas as pd @@ -25,7 +25,7 @@ def create_configuration( latency_filenames, ): def write_configuration(df): - """Write the configuration file for a given OptoHybrid""" + """Write the configuration file for a given VFAT""" fed = df.iloc[0]["fed"].astype(int) slot = df.iloc[0]["slot"].astype(int) @@ -84,12 +84,21 @@ def create_configuration( config_parameter, fed, slot, oh, vfat ) ) + # Update config file line with last parameter value in DataFrame: config_pair[1] = str(df_parameter.iloc[-1]["value"]) + "\n" line = " ".join(config_pair) + # Remove used parameters + df.drop(df_parameter.index, inplace=True) + output_cfg_lines.append(line) + # Append new configuration parameters + df.drop_duplicates("parameter", keep="last", inplace=True) + for _, row in df.iterrows(): + output_cfg_lines.append("{} {}\n".format(row["parameter"], row["value"])) + # Write the configuration file os.makedirs(os.path.dirname(output_cfg_path), exist_ok=True) with open(output_cfg_path, "w") as output_cfg_file: -- GitLab From 598c5c672dd69dd153df94e5a09b5a3c910cc189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurent=20P=C3=A9tr=C3=A9?= Date: Thu, 27 Oct 2022 14:56:16 +0200 Subject: [PATCH 5/5] Ensure that all input config files are present in output Before this commit, if the input and output directories were different, only the updated configuration files were present in the output. Ensure that all input configuration files are first copied to the output path. --- gemos/analysis/oh_parameters.py | 6 +++++- gemos/analysis/vfat_parameters.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/gemos/analysis/oh_parameters.py b/gemos/analysis/oh_parameters.py index ab29d15..10524f8 100644 --- a/gemos/analysis/oh_parameters.py +++ b/gemos/analysis/oh_parameters.py @@ -1,6 +1,8 @@ """OptoHybrid configuration file routines""" import os +from shutil import copytree + import pandas as pd @@ -74,7 +76,9 @@ def create_configuration( return output_cfg_path # Sanitize input - if not output_directory: + if output_directory: + copytree(input_directory, output_directory, dirs_exist_ok=True) + else: output_directory = input_directory # Parse the parameters files diff --git a/gemos/analysis/vfat_parameters.py b/gemos/analysis/vfat_parameters.py index 220db76..304b381 100644 --- a/gemos/analysis/vfat_parameters.py +++ b/gemos/analysis/vfat_parameters.py @@ -1,6 +1,8 @@ """VFAT configuration file routines""" import os +from shutil import copytree + import pandas as pd from gemos.analysis.vfat_calibrations import add_parameters @@ -113,7 +115,9 @@ def create_configuration( print("Warning: VFAT mapping and calibration files must both be provided") # Sanitize input - if not output_directory: + if output_directory: + copytree(input_directory, output_directory, dirs_exist_ok=True) + else: output_directory = input_directory # Parse the parameters files -- GitLab