diff --git a/gemos/analysis/thr_scan_analysis.py b/gemos/analysis/thr_scan_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..b2c2661c64ec2328098156a17a27cf169a231409 --- /dev/null +++ b/gemos/analysis/thr_scan_analysis.py @@ -0,0 +1,342 @@ +""" **************************************************************************** +* @file thr_scan_analysis.py * +* @author Daniel Estrada (daniel.estrada.acevedo@cern.ch) * +* @brief Threshold scan analyze routines * +* @date 2022-4-12 * +* * +******************************************************************************** +""" +__author__ = "Daniel Estrada" + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import os +import mplhep +from pathlib import Path +from multiprocessing import cpu_count, Pool + +plt.style.use(mplhep.style.CMS) + + +def compute_ratios(df): + ratios = { + "not-readout": {"fed": [], "oh": [], "vfat": []}, + "broken": {"fed": [], "oh": [], "vfat": []}, + "not-fully-working": {"fed": [], "oh": [], "vfat": []}, + } + + def compute_per(kind): + group_by = { + "fed": "fed", + "oh": ["fed", "slot", "oh"], + "vfat": ["fed", "slot", "oh", "vfat"], + } + + print("For ", kind) + + for indexs, group in df.groupby(group_by[kind]): + group = group.drop_duplicates(["fed", "slot", "oh", "vfat", "channel"]) + + ratios["not-readout"][kind].append( + [indexs, len(group[group["state"] == "notReadout"]) / len(group)] + ) + ratios["broken"][kind].append( + [indexs, len(group[group["state"] == "broken"]) / len(group)] + ) + ratios["not-fully-working"][kind].append( + [indexs, len(group[group["state"] == "notFullyWork"]) / len(group)] + ) + + # For endcap + compute_per("fed") + # For chamber + compute_per("oh") + # For vfat + compute_per("vfat") + + return ratios + + +def report(df, output_dir): + """Writes a report of the percentages of not-readout, broken, and + not-fully-working channels. + :param pandas.DataFrame df: dataFrame with the channels information. + :param string output_dir: Path to save the output file. + """ + ratios = compute_ratios(df) + # Saving into a file + with open(output_dir / "summary_report.txt", "w") as f: + f.write( + "Report of the percentages of not-readout, broken, and not-fully-working channels\n" + + 24 * " " + + "for VFAT, chamber and endcap\n" + + "=================================================================================\n" + ) + for key in ratios.keys(): + f.write(">> " + str(key) + (76 - len(key)) * "-" + "\n") + for per, values in ratios[key].items(): + f.write(str(per) + (20 - len(per)) * " " + "ratio\n") + for i in values: + f.write(str(i[0]) + (20 - len(str(i[0]))) * " " + str(i[1]) + "\n") + f.write("\n") + print("Summary report saved to ", output_dir / "summary_report.txt") + + +def filter_not_readout(input_df, output_dir): + """Identifies and labels the not-readout vfat and generates a file with their + information. A vfat is considered such as not-readout if its channels never + send hits. + + :param pandas.DataFrame input_df: Thershold input file. + :param string output_dir: Path to save the output file. + + """ + input_df["sum"] = input_df.groupby(["fed", "slot", "oh", "vfat"])["hits"].transform("sum") + input_df.loc[input_df["sum"] == 0, "state"] = "notReadout" + input_df.drop("sum", inplace=True, axis=1) + + # saving in a file the not-readout vfats + filename = output_dir / "not-readout.dat" + input_df.loc[input_df["state"] == "notReadout", "fed":"vfat"].drop_duplicates().to_csv( + filename, sep=";", header=True, index=False + ) + + +def filter_broken_channels(input_df, output_dir): + """Identifies and labels broken channels and generates a file with their + information. A channel is considered as broken if It has 0 hits for every + threshold parameter. + + :param pandas.DataFrame input_df: Thershold input file. + :param string output_dir: Path to save the output file. + """ + input_df["sum"] = input_df.groupby(["fed", "slot", "oh", "vfat", "channel"])["hits"].transform( + "sum" + ) + input_df.loc[(input_df["state"] != "notReadout") & (input_df["sum"] == 0), "state"] = "broken" + input_df.drop("sum", inplace=True, axis=1) + + # saving in a file the broken channels + + filename = output_dir / "broken.dat" + input_df.loc[input_df["state"] == "broken", "fed":"channel"].drop_duplicates().to_csv( + filename, sep=";", header=True, index=False + ) + + +# -----------functions to use in filter not_functional_channels----------------- +compute_mh1 = lambda df: df.mean() - df.std() * 3 +compute_mh2 = lambda df: df.median() - 1.5 * (df.quantile(0.75) - df.quantile(0.75)) + + +def compute_pct_NFW(df): + try: + return df.value_counts(normalize=True).to_dict()[True] + except: + return 0 + + +# ------------------------------------------------------------------------------ + + +def filter_not_functional_channels(input_df, output_dir, pct_condition=0.3): + """Identifies and labels the not-functional channels and generates a file + with their information. A channel is considered such as not-fully working if + has low number of hits. + + :param pandas.DataFrame input_df: Thershold input file. + :param string output_dir: Path to save the output file. + """ + + # ------------------------ filter in two steps ----------------------------- + # first: check if hits in a channel is minior than a minimun value + input_df["h/t"] = input_df["hits"] / input_df["triggers"] + input_df["mh1"] = input_df.groupby(["parameter", "fed", "slot", "oh", "vfat"])["h/t"].transform( + compute_mh1 + ) + input_df["h/t < mh1"] = input_df["h/t"].lt(input_df["mh1"]) + + # second: check if a channel appear discarted by most than a "percentage_condition" of parameters. + input_df["pct"] = input_df.groupby(["fed", "slot", "oh", "vfat", "channel"])[ + "h/t < mh1" + ].transform(compute_pct_NFW) + input_df.loc[ + (input_df["state"] != "notReadout") + & (input_df["state"] != "broken") + & (input_df["pct"] > pct_condition), + "state", + ] = "notFullyWork" + + # saving in a file the not-Fully working channels + filename = output_dir / "not-fully-working.dat" + input_df.loc[input_df["state"] == "notFullyWork", "fed":"channel"].drop_duplicates().to_csv( + filename, sep=";", header=True, index=False + ) + + input_df.drop(["h/t", "mh1", "h/t < mh1"], inplace=True, axis=1) + + +def analysis_plot(df, param, fed, slot, oh, mapping_filename, nrows, ncols, output_dir): + """function to perform the scatter VFAT plot of hits per parameter with the minimun + threshold values computed""" + + fig, axs = plt.subplots(nrows, ncols, figsize=(75, 25)) + axs = axs.flat + + graph_name = output_dir / "plots/hitsAnalysis/{}_{}_{}_{}.png".format(param, fed, slot, oh) + + filter = (df["state"] != "notReadout") & (df["state"] != "broken") + + for vfat, df in df[filter].groupby("vfat"): + try: + # the chip ID is extracted from vfat_map.dat + mapping_df = pd.read_csv(mapping_filename, sep=";") + chipID = mapping_df[ + (mapping_df["fed"] == fed) + & (mapping_df["slot"] == slot) + & (mapping_df["oh"] == oh) + & (mapping_df["vfat"] == vfat) + ]["chip-id"].values[0] + except: + chipID = "Unknown" + + data = df["hits"] / df["triggers"] + mean = data.mean() + med = data.median() + mh1 = compute_mh1(data) + mh2 = compute_mh2(data) + + axs[vfat].scatter(df["channel"], data, c="black") + axs[vfat].hlines(mean, 0, 129, colors="darkred", label="avg") + axs[vfat].hlines(med, 0, 129, colors="darkblue", label="med") + axs[vfat].hlines( + mh1, 0, 129, colors="darkred", linestyles="dashed", label=r"$5 \times \sigma $" + ) + axs[vfat].hlines( + mh2, 0, 129, colors="darkblue", linestyles="dashed", label=r"$Q_1 - 3 \times IQR$" + ) + axs[vfat].set_title("VFAT {} chipID {}".format(vfat, chipID), fontsize=18) + axs[vfat].set_xlabel("VFAT Channel", fontsize=15) + axs[vfat].set_ylabel("HITS/TRIGGERS [DAC units]", fontsize=15) + axs[vfat].tick_params(axis="x", labelsize=10) + axs[vfat].tick_params(axis="y", labelsize=10) + + axs[vfat].legend(bbox_to_anchor=(1.05, 0), loc="lower left", fontsize=50) + fig.savefig(graph_name) + print("analysis plot saved to ", graph_name) + + +def threshold_plot(df, fed, slot, oh, mapping_filename, nrows, ncols, output_dir): + """Performs the summary threshold plot for a chamber. It produces a figure + with nrows*ncols 2D plots of the numbers of hits per channel and threshold. + + :param pandas.DataFrame df: Chamber data frame. + :param int fed: to identify the fed. + :param int slot: to identify the AMC slot.1 + :param int oh: to identify the oh. + :param pandas.DataFrame mapping_df: Vfat mapping file. + :param int nrows: number of rows to organize the subplots. + :param int ncols: number of cols to organize the subplots. + :param pandas.DataFrame output_dir: Path to save the images. + """ + fig, axs = plt.subplots(nrows, ncols, figsize=(55, 25)) + axs = axs.flat + + df.loc[ + (df["state"] == "notReadout") & (df["state"] == "broken") & (df["state"] == "notFullyWork") + ]["hits"] == 0 + + graph_name = output_dir / "plots/ThresSumary_{}_{}_{}.png".format(fed, slot, oh) + + for vfat, df in df.groupby("vfat"): + # a map per chamber vfat is builded + map = [] + for _, df in df.groupby("parameter"): + map.append(list(df["hits"] / df["triggers"])) + map = np.array(map) + + try: + # the chip ID is extracted from vfat_map.dat + mapping_df = pd.read_csv(mapping_filename, sep=";") + chipID = mapping_df[ + (mapping_df["fed"] == fed) + & (mapping_df["slot"] == slot) + & (mapping_df["oh"] == oh) + & (mapping_df["vfat"] == vfat) + ]["chip-id"].values[0] + except: + chipID = "Unknown" + + im = axs[vfat].imshow(map, origin="lower", aspect=128.0 / 30.0) + axs[vfat].set_title("VFAT {} chipID {}".format(vfat, chipID), fontsize=18) + axs[vfat].set_xlabel("VFAT Channel", fontsize=13) + axs[vfat].set_ylabel("THR_ARM_DAC [DAC units]", fontsize=13) + axs[vfat].tick_params(axis="x", labelsize=10) + axs[vfat].tick_params(axis="y", labelsize=10) + fig.colorbar(im, ax=axs[vfat], pad=0.01, fraction=0.048) + + plt.tight_layout() + fig.savefig(graph_name) + print("Threshold plot saved to ", graph_name) + + +def create_configuration( + input_filenames, + output_dir, + mapping_filename=None, + plotting=False, +): + """Routine to perform the analysis""" + os.makedirs("{}".format(output_dir), exist_ok=True) + + # Load the input files + input_df = pd.concat((pd.read_csv(f, sep=";") for f in input_filenames), ignore_index=True) + # triggers 0 is excluded because its "hits" always are 0. + input_df = input_df[input_df["triggers"] != 0] + + # Channels initialized as healthy + input_df = input_df.assign(state=["healthy"] * len(input_df)) + + # Find problematic channels + print("filtering not-readout channels...") + filter_not_readout(input_df, output_dir) + print("filtering broken channels...") + filter_broken_channels(input_df, output_dir) + print("filtering not-fully functional channels...") + filter_not_functional_channels(input_df, output_dir) + + print("Computing percentage reports...") + report(input_df, output_dir) + + # Perform plots + if plotting: + print("plotting...") + os.makedirs("{}/plots".format(output_dir), exist_ok=True) + + if mapping_filename == None: + print("No mapping file provided...") + + nrows, ncols = 3, int(np.ceil(len(input_df["vfat"].drop_duplicates()) / 3)) + + pool = Pool(cpu_count()) + for (fed, slot, oh), df in input_df.groupby(["fed", "slot", "oh"]): + # one image per chamber + pool.apply_async( + threshold_plot, args=(df, fed, slot, oh, mapping_filename, nrows, ncols, output_dir) + ) + pool.close() + pool.join() + + print("Done!") + + +# To executed the code directly + +if __name__ == "__main__": + # Change the input and mapping file paths + input_file_name = [Path("/afs/cern.ch/user/d/destrada/private/threshold_fed1467.csv")] + mapping_file_name = Path("/afs/cern.ch/user/d/destrada/private/4Yechan/vfat_map.dat") + output_dir = Path(".") + + create_configuration(input_file_name, output_dir, mapping_file_name, True) diff --git a/gemos/analysis/threshold_scan_analysis.py b/gemos/analysis/threshold_scan_analysis.py index 9200bb5142c7fa02cc3019fc7765a0cbb55cb022..74f1c192c7b7c2876bf94689de8fd36737a847d0 100644 --- a/gemos/analysis/threshold_scan_analysis.py +++ b/gemos/analysis/threshold_scan_analysis.py @@ -32,9 +32,12 @@ def create_configuration(input_filenames, output_directory, target_rate, plottin def plot_scan_oh(df): """Plot threshold scans for all VFATs in single OH""" - def plot_scan_vfat(vfat, df, ax): + def plot_scan_vfat(vfat, df, ax, time_interval): """Plot threshold scan for single VFAT""" ax.plot(df["dac-value"], df["rate"], "o") + # The axis limit is chosen to be twice the maximum rate aquired in the time interval of the scan + ax.set_ylim(1, time_interval * 40080000 * 2) + ax.set_xlim(0, 255) ax.title.set_text(f"VFAT {vfat}") ax.set_xlabel("THR_ARM_DAC") ax.set_ylabel("S-bt rate (Hz)") @@ -44,6 +47,12 @@ def create_configuration(input_filenames, output_directory, target_rate, plottin slot = df.iloc[0]["slot"] oh = df.iloc[0]["oh"] + # The s-bit counters increase of maximum 1 per each bunch crossing; + # the bunch crissing rate in LHC is 40.07897 MHz approximated to 4008000 MHz + # giving a possible increase in the s-bit counters of maximum 4008000 per second. + # The time interval used for the scan can be estimated in second as: + time_interval = (df["rate"].max() // 40080000) + 1 + vfat_group = df.groupby(["vfat"]) nrows, ncols = 3, int(np.ceil(len(vfat_group) / 3)) # Round-up the number of columns @@ -51,7 +60,7 @@ def create_configuration(input_filenames, output_directory, target_rate, plottin thr_scan_axs = thr_scan_axs.flat for group_index, (vfat_index, vfat_df) in enumerate(vfat_group): - plot_scan_vfat(vfat_index, vfat_df, thr_scan_axs[group_index]) + plot_scan_vfat(vfat_index, vfat_df, thr_scan_axs[group_index], time_interval) output_figure = "{}/plots/fed{}-slot{}-oh{}.png".format(output_directory, fed, slot, oh) thr_scan_fig.savefig(output_figure) diff --git a/gemos/cli.py b/gemos/cli.py index be6f02fac64bdf50cf332f427e2975381972da36..1389355e41b75d40af0f2877b51b59c67677ec8e 100644 --- a/gemos/cli.py +++ b/gemos/cli.py @@ -6,6 +6,7 @@ import pathlib from gemos.analysis import dac_scan_analysis from gemos.analysis import gbt_phase_scan from gemos.analysis import threshold_scan_analysis +from gemos.analysis import thr_scan_analysis from gemos.analysis import vfat_calibrations from gemos.analysis import vfat_parameters @@ -97,7 +98,40 @@ def main(): plotting=args.plotting, ) ) - + # "analyze thresh" + analyze_th_parser = analyze_subparsers.add_parser("thresh") + analyze_th_parser.add_argument( + "-p", + "--plotting", + action="store_true", + help="Specify in case summary threshold plots are desired", + ) + analyze_th_parser.add_argument( + "-m", + "--mapping", + dest="mapping_file", + type=pathlib.Path, + help="File containing the VFAT chip ID", + ) + analyze_th_parser.add_argument( + "inputfiles", + type=pathlib.Path, + nargs="+", + help="Files containing the Threshold scan results", + ) + analyze_th_parser.add_argument( + "outputdir", + type=pathlib.Path, + help="Output directory to save the results of the analysis", + ) + analyze_th_parser.set_defaults( + func=lambda args: thr_scan_analysis.create_configuration( + input_filenames=args.inputfiles, + output_dir=args.outputdir, + mapping_filename=args.mapping_file, + plotting=args.plotting, + ) + ) # "create-config" command create_config_parser = subparsers.add_parser("create-config") create_config_subparsers = create_config_parser.add_subparsers(required=True, dest="subcommand")