Commit feb2d5af authored by Daniel Estrada Acevedo's avatar Daniel Estrada Acevedo
Browse files

Change the way to label problematic channels

parent 469e7acf
Pipeline #4132920 passed with stages
in 1 minute and 48 seconds
......@@ -15,7 +15,6 @@ import os
import mplhep
from pathlib import Path
from multiprocessing import cpu_count, Pool
from time import sleep
plt.style.use(mplhep.style.CMS)
......@@ -40,13 +39,13 @@ def compute_ratios(df):
group = group.drop_duplicates(["fed", "slot", "oh", "vfat", "channel"])
ratios["not-readout"][kind].append(
[indexs, len(group[group["notReadout"] == True]) / len(group)]
[indexs, len(group[group["state"] == "notReadout"]) / len(group)]
)
ratios["broken"][kind].append(
[indexs, len(group[group["broken"] == True]) / len(group)]
[indexs, len(group[group["state"] == "broken"]) / len(group)]
)
ratios["not-fully-working"][kind].append(
[indexs, len(group[group["notFullyWork"] == True]) / len(group)]
[indexs, len(group[group["state"] == "notFullyWork"]) / len(group)]
)
# For endcap
......@@ -90,7 +89,7 @@ def filter_not_readout(input_df, output_dir):
:param pandas.DataFrame input_df: Thershold input file.
:param string output_dir: Path to save the output file.
:return pandas.Dataframe output_df: The input file with not-fully working channels discarted.
:return pandas.Dataframe : The input Dataframe with not-fully working channels labeled.
"""
discard_list = []
......@@ -104,12 +103,13 @@ def filter_not_readout(input_df, output_dir):
filename = output_dir / "not-readout.dat"
discard_df.sort_index().to_csv(filename, sep=";", header=True, index=False)
# filtering not-readout channels
# label not-readout channels
input_df = input_df.set_index(["fed", "slot", "oh", "vfat"])
not_readout_channels = input_df.index.isin([tuple(i) for i in discard_df.values])
output_df = input_df.assign(notReadout=not_readout_channels).reset_index()
input_df = input_df.reset_index()
input_df.loc[not_readout_channels, "state"] = "notReadout"
return output_df
return input_df
def filter_broken_channels(input_df, output_dir):
......@@ -119,14 +119,15 @@ def filter_broken_channels(input_df, output_dir):
:param pandas.DataFrame input_df: Thershold input file.
:param string output_dir: Path to save the output file.
:return pandas.Dataframe output_df: The input file with broken channels discarted.
:return pandas.Dataframe : The input Dataframe with broken channels labeled.
"""
discard_list = []
filter = input_df["state"] != "notReadout"
for (fed, slot, oh, vfat, channel), df in input_df.groupby(
for (fed, slot, oh, vfat, channel), df in input_df.loc[filter].groupby(
["fed", "slot", "oh", "vfat", "channel"]
):
if ((df["hits"] == 0) & (~df["notReadout"])).all():
if (df["hits"] == 0).all():
discard_list.append(
{"fed": fed, "slot": slot, "oh": oh, "vfat": vfat, "channel": channel}
)
......@@ -137,12 +138,13 @@ def filter_broken_channels(input_df, output_dir):
filename = output_dir / "broken.dat"
discard_df.sort_index().to_csv(filename, sep=";", header=True, index=False)
# filtering broken channels
# label broken channels
input_df = input_df.set_index(["fed", "slot", "oh", "vfat", "channel"])
broken_channels = input_df.index.isin([tuple(i) for i in discard_df.values])
output_df = input_df.assign(broken=broken_channels).reset_index()
input_df = input_df.reset_index()
input_df.loc[broken_channels, "state"] = "broken"
return output_df
return input_df
def filter_not_functional_channels(input_df, output_dir):
......@@ -154,14 +156,14 @@ def filter_not_functional_channels(input_df, output_dir):
the lower outer fence of "hits"/"triggers" (across all the vfat channels data).
That is, computing the Interquartil range(IQR), the lower outer fence (L) is:
L = Q1 - 3 * IQR
with IQR = Q3-Q1, where Q1 and Q3 are percentil 25 and 75 of data respectivly.
if L differ from tipycal "hits/triggers" more than 40% and a channel is not
with IQR = Q3-Q1, where Q1 and Q3 are percentil 25 and 75 of data respectively.
If L differs from tipycal "hits/triggers" more than 40% and a channel is not
over this minumum value, it is discarted.
:param pandas.DataFrame input_df: Thershold input file.
:param string output_dir: Path to save the output file.
:return pandas.Dataframe: output_df The input file with not-fully working channels discarted.
:return pandas.Dataframe: The input Dataframe with not-fully working channels labeled.
"""
def compute_min_hits_value(hits):
......@@ -209,12 +211,9 @@ def filter_not_functional_channels(input_df, output_dir):
]
discard_list = []
filter = (input_df["state"] != "notReadout") & (input_df["state"] != "broken")
# not cosider channels labeled as nonReadout or broken
for param, df in input_df[
(input_df["broken"] == False) & (input_df["notReadout"] == False)
].groupby("parameter"):
for param, df in input_df[filter].groupby("parameter"):
for eta in range(8):
df1 = df[df["vfat"] % 8 == eta]
hits = df1["hits"] / df1["triggers"]
......@@ -222,21 +221,17 @@ def filter_not_functional_channels(input_df, output_dir):
diff_with_Ref = abs(hits_thresh_ref[param] - min_hits_value) / hits_thresh_ref[param]
# first condition : if min_hits_value is less of 40% different of
# the expected hits value vfat has normal channels
# the expected hits value, vfat has normal channels
if diff_with_Ref < 0.4:
continue
# second condition
condition = hits >= min_hits_value
if (condition == True).all():
continue
temp_df = df1.loc[
~condition,
["fed", "slot", "oh", "vfat", "channel"],
]
temp_df = temp_df.assign(mhval=np.ones(shape=len(temp_df)) * min_hits_value)
discard_list.append(temp_df)
discard_list.append(df1.loc[~condition, ["fed", "slot", "oh", "vfat", "channel"]])
discard_df = (
pd.concat(discard_list, ignore_index=False)
......@@ -248,14 +243,14 @@ def filter_not_functional_channels(input_df, output_dir):
filename = output_dir / "not-fully-working.dat"
discard_df.to_csv(filename, sep=";", header=True, index=False)
# filtering not-fully working channels
# label not-fully working channels
input_df = input_df.set_index(["fed", "slot", "oh", "vfat", "channel"])
not_functional_channels = input_df.index.isin(
[tuple(i) for i in discard_df[["fed", "slot", "oh", "vfat", "channel"]].values]
)
output_df = input_df.assign(notFullyWork=(not_functional_channels)).reset_index()
not_functional_channels = input_df.index.isin([tuple(i) for i in discard_df.values])
input_df = input_df.reset_index()
input_df.loc[not_functional_channels, "state"] = "notFullyWork"
return output_df
return input_df
def threshold_plot(df, fed, slot, oh, mapping_filename, nrows, ncols, output_dir):
......@@ -274,7 +269,9 @@ def threshold_plot(df, fed, slot, oh, mapping_filename, nrows, ncols, output_dir
fig, axs = plt.subplots(nrows, ncols, figsize=(55, 25))
axs = axs.flat
df.loc[(df["notReadout"]) & (df["broken"]) & (df["notFullyWork"])]["hits"] == 0
df.loc[
(df["state"] == "notReadout") & (df["state"] == "broken") & (df["state"] == "notFullyWork")
]["hits"] == 0
graph_name = output_dir / "plots/ThresSumary_{}_{}_{}.png".format(fed, slot, oh)
......@@ -324,28 +321,32 @@ def create_configuration(
# triggers 0 is excluded because its 'hits' always are 0.
input_df = input_df[input_df["triggers"] != 0]
# Channels initialized as healthy
input_df = input_df.assign(state=["healthy"] * len(input_df))
# Find problematic channels
print("Filtering not-readout channels...")
output_df = filter_not_readout(input_df, output_dir)
print("Filtering broken channels...")
output_df = filter_broken_channels(output_df, output_dir)
print("Filtering not-fully functional channels...")
output_df = filter_not_functional_channels(output_df, output_dir)
print("filtering not-readout channels...")
input_df = filter_not_readout(input_df, output_dir)
print("filtering broken channels...")
input_df = filter_broken_channels(input_df, output_dir)
print("filtering not-fully functional channels...")
input_df = filter_not_functional_channels(input_df, output_dir)
print("Computing percentage reports...")
report(output_df, output_dir)
report(input_df, output_dir)
# # Perform plots
# Perform plots
if plotting:
print("plotting...")
os.makedirs("{}/plots".format(output_dir), exist_ok=True)
if mapping_file_name==None:
if mapping_filename == None:
print("No mapping file provided...")
nrows, ncols = 3, int(np.ceil(len(output_df["vfat"].drop_duplicates()) / 3))
nrows, ncols = 3, int(np.ceil(len(input_df["vfat"].drop_duplicates()) / 3))
pool = Pool(cpu_count())
for (fed, slot, oh), df in output_df.groupby(["fed", "slot", "oh"]):
for (fed, slot, oh), df in input_df.groupby(["fed", "slot", "oh"]):
# one image per chamber
pool.apply_async(
threshold_plot, args=(df, fed, slot, oh, mapping_filename, nrows, ncols, output_dir)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment