diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/CMakeLists.txt b/Tracker/TrackerRecAlgs/NoisyStripFinder/CMakeLists.txt index d78cc17e7b1ef8b6cd24ddef92748f163beddbfc..5a85e55131b3ef638671c33f5ec3f742bf48203d 100644 --- a/Tracker/TrackerRecAlgs/NoisyStripFinder/CMakeLists.txt +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/CMakeLists.txt @@ -17,5 +17,5 @@ atlas_add_component( NoisyStripFinder atlas_install_python_modules( python/*.py ) -atlas_install_scripts( share/*.py test/*.py ) +atlas_install_scripts( share/*.py share/*.sh ) diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/README.md b/Tracker/TrackerRecAlgs/NoisyStripFinder/README.md index b5160d73192745354a96c2df9f6da7f56c592744..407b1d255379dd7d6ecf2b3c9ae5f9e016a1192b 100644 --- a/Tracker/TrackerRecAlgs/NoisyStripFinder/README.md +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/README.md @@ -1,3 +1,65 @@ +Noisy strips are now part of the conditions database and are used in default reco + +To update the database, the following steps should be used. + +1) Determine the runs to update. +A handy utility to find all runs of a given type is findFaserRunsByType.py: + +`findFaserRunsByType.py -t Physics -o physics_runs.txt 7730-8370` + +2) Submit jobs to create the noisy strip DB for each run +From a release directory, best to create a subdirectory for this and then: + +`submitNoisyStripJobs.py --release .. physics_runs.txt` + +Can also just specify run numbers (or a range) instead of a text file of runs + +If you want to do this interactively, this script submits jobs to run the following: +`NoisyStripFinderJob.py` +`makeNoisyStripDB.py` + +3) Check that all of the jobs finished successfully (or can check individual runs) + +`checkNoisyStripJobs.py physics_runs.txt` + +This can also write out a file of runs to submit again + +4) Check that the actual noisy strips found make sense +This reads the individual DB files in each subdirectory, can also specify specific runs + +`checkNoisyStripDB.py physics_runs.txt` + +5) Merge the individual runs into a single DB +Note this script writes the individual runs with open-ended IOVs. +This means the last run merged will be used for all later data until the DB is updated again. +So if you are merging multiple times (with the --append option), it is important to do ths in chronological order. + +`mergeNoisyStripDB.py physics_runs.txt` + +Note, this can be slow. Use --verbose to watch the progress. + +6) Test the database +The resulting database by default has the name noisy_strips.db. +This can be copied to the data/sqlite200 subdirectory of the working directory and reco jobs will use this. +Check here for details: +https://gitlab.cern.ch/faser/calypso/-/blob/master/Database/ConnectionManagement/FaserAuthentication/data/dblookup.xml + +7) Merge with production DB +This updated noisy strips folder /SCT/DAQ/NoisyStrips now needs to be merged into the production DB. +First copy the current DB from CVMFS to some local directory. + +`cp /cvmfs/faser.cern.ch/repo/sw/database/DBRelease/current/sqlite200/ALLP200.db .` + +Next, use AtlCoolCopy to merge the updates into this file: + +`AtlCoolCopy "sqlite://;schema=noisy_strips.db;dbname=CONDBR3" "sqlite://;schema=ALLP200.db;dbname=CONDBR3" ` + +This can also be slow. + +Finally, the ALLP200.db file should be installed on cvmfs once everything is verified to be correct. + +Older instructions from Tobias when he was developing this package are here: + Mask noisy strips: 1) Run the NoisyStripFinderDbg.py on raw data files diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/test/NoisyStripFinderJob.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/NoisyStripFinderJob.py similarity index 89% rename from Tracker/TrackerRecAlgs/NoisyStripFinder/test/NoisyStripFinderJob.py rename to Tracker/TrackerRecAlgs/NoisyStripFinder/share/NoisyStripFinderJob.py index e9f61e4b3a68806c47639941e9186fd0d061f878..b6dc5c514d05c558310bf1108e505deca1e8098c 100755 --- a/Tracker/TrackerRecAlgs/NoisyStripFinder/test/NoisyStripFinderJob.py +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/NoisyStripFinderJob.py @@ -63,10 +63,18 @@ replicaSvc.UseCOOLSQLite = True replicaSvc.UseCOOLFrontier = False replicaSvc.UseGeomSQLite = True +# Don't print out every event +from AthenaConfiguration.ComponentFactory import CompFactory +eventLoop = CompFactory.AthenaEventLoopMgr() +eventLoop.EventPrintoutInterval = 1000 +acc.addService(eventLoop) + if args.verbose: acc.foreach_component("*").OutputLevel = VERBOSE + acc.printConfig() else: acc.foreach_component("*").OutputLevel = INFO sc = acc.run(maxEvents=args.nevents) +print(f"Job finished with {sc.isSuccess()} => {not sc.isSuccess()}") sys.exit(not sc.isSuccess()) diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripDB.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripDB.py new file mode 100755 index 0000000000000000000000000000000000000000..6ad335d516206eedff8d298d2eb180e70d4003a4 --- /dev/null +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripDB.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# +# Sept 2022, E. Torrence +# +# Script to merge individual run DBs into a master DB +# +# Usage: +# ./mergeNoisyDBRuns.py -h +# +import sys +import argparse +import subprocess + +from pathlib import Path + +from PyCool import cool +from CoolConvUtilities.AtlCoolLib import indirectOpen + +def parse_arguments(): + + description="Script to merge DBs from individual runs into one DB\n" + parser = argparse.ArgumentParser(description, + formatter_class=argparse.RawTextHelpFormatter) + + + parser.add_argument("runs", nargs='+', help="Specify FASER runs or range") + parser.add_argument("--threshold", default="0.01", help="Threshold to calls trip noisy (0.01)") + parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output") + return parser.parse_args() + +# Take a string and turn it into a list of integers +# Can specify single values, ranges, or comma separated lists of both +def parseRunList(runlist): + + run_list = [] + + # Check if this is a file with run numbers + if len(runlist) == 1: + path = Path(runlist[0]) + if path.exists() and path.is_file(): + print(f"Reading runs from {path}") + # Try reading each line as a run number + with path.open() as f: + for line in f.readlines(): + line = line.strip() + if len(line) == 0: continue + if line[0] in ['#', '!']: continue + if not line.isnumeric(): + print(f"Error parsing {line}") + continue + run_list.append(int(line)) + # Done reading file + return(run_list) + elif '-' in runlist[0]: + pass + elif ',' in runlist[0]: + pass + elif not runlist[0].isnumeric(): + print(f"File {path} doesn't exist!") + return run_list + + for string in runlist: + tokens = string.split(',') + + for segment in tokens: + + if len(segment) == 0: continue + + if '-' in segment: # Range of runs + start, end = segment.split('-') + if not start.isnumeric(): + print(f"Found invalid run {start}") + continue + if not end.isnumeric(): + print(f"Found invalid run {end}") + continue + start = int(start) + end = int(end) + run_list.extend(list(range(int(start), int(end)+1))) + + else: + if not segment.isnumeric(): + print(f"Found invalid run {segment}") + continue + run_list.append(int(segment)) + + return(run_list) + +class NoisyRunAnalyzer: + + def __init__(self, verbose=False, threshold=0.01): + self.verbose = verbose + + self.run_dict = {} + self.noise_threshold = threshold + + def addRun(self, runnum): + + if self.verbose: print(f"\nRun {runnum}") + + runstr = f'{runnum:06d}' + infile = Path(f'{runstr}/noisy_{runstr}.db') + if not infile.is_file(): + print(f"{runstr}/noisy_{runstr}.db doesn't exist!") + return + + db_string = f'sqlite://;schema={runstr}/noisy_{runstr}.db;dbname=CONDBR3' + try: + self.db = indirectOpen(db_string, readOnly=True, oracle=False, debug=False) + except Exception as e: + print(e) + return + + # Now read all channels + folder_string = "/SCT/DAQ/NoisyStrips" + try: + self.folder = self.db.getFolder(folder_string) + except Exception as e: + print(e) + return + + if self.folder is None: + print(f"Can't access folder {folder_string} in {db_string}") + return + + channels = cool.ChannelSelection.all() + iov_lo = (runnum<<32) + iov_hi = ((runnum+1)<<32) - 1 + tag = '' + + try: + itr = self.folder.browseObjects(iov_lo, iov_hi, channels, tag) + except Exception as e: + print(e) + return + + rd = self.run_dict.get(runnum, None) + if rd is None: + self.run_dict[runnum] = {} + + # Now iterate through objects (should only be one IOV, but multiple channels) + while itr.goToNext(): + obj = itr.currentRef() + if self.verbose: print(obj.payload()) + + sensor = obj.payload()['sensor'] + strip = obj.payload()['strip'] + occupancy = obj.payload()['occupancy'] + + if occupancy < self.noise_threshold: continue + + sensor_dict = self.run_dict[runnum].get(sensor, None) + if sensor_dict is None: + self.run_dict[runnum][sensor] = {} + + strip_dict = self.run_dict[runnum][sensor].get(strip, None) + if strip_dict is None: + self.run_dict[runnum][sensor][strip] = {} + + self.run_dict[runnum][sensor][strip] = occupancy + + if self.verbose: print(self.run_dict) + + # Done, close the database + self.db.closeDatabase() + + def printRunSummary(self): + + for run in self.run_dict: + + #print(f"Run {run}: {len(self.run_dict[run])} sensors with noisy strips") + + noisy_by_layer = [0] * 12 + noisy_strips_by_layer = [0] * 12 + + for sensor in self.run_dict[run]: + layer = sensor // 16 + noisy_by_layer[layer] += 1 + noisy_strips_by_layer[layer] += len(self.run_dict[run][sensor]) + + #print(f"Sensors by layer: ", end='') + #[ print(f' {n:3d}', end='') for n in noisy_by_layer] + #print() + + print(f"Run {run} strips > {100*self.noise_threshold:3.1f}% by layer: ", end='') + [ print(f' {n:3d}', end='') for n in noisy_strips_by_layer] + print() + + +# Command-line execution +if __name__ == "__main__": + + # Parse the command-line arguments + args = parse_arguments() + + run_list = parseRunList(args.runs) + run_list.sort() + + nra = NoisyRunAnalyzer(verbose=args.verbose, threshold=float(args.threshold)) + + for runnum in run_list: + nra.addRun(runnum) + + nra.printRunSummary() + diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripHist.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripHist.py new file mode 100755 index 0000000000000000000000000000000000000000..0ace2cb64ed224633bc5cf16ac81046384a5b3c2 --- /dev/null +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripHist.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# +# Simple utility to run as a post script +# after the noisyStripFinder +# +# Usage: checkNoisyStripHist.py <histfile> [return code] +# +import sys +if len(sys.argv) <= 1: + sys.exit(1) +filename = sys.argv[1] +# +# Check previous return code if it is provided +if len(sys.argv) >= 3: + rc = int(sys.argv[2]) + if rc: sys.exit(rc) +# +# Check histogram entries +# This causes a segfault. Lets try without ROOT +#from ROOT import TFile +if False: + import ROOT + try: + f = ROOT.TFile.Open(filename, 'r') + except Exception as e: + print(e) + sys.exit(1) + else: + n = f.Get("numEvents").GetVal() + print(f"Found {filename} with {n} entries") + sys.exit(n==0) + +from pathlib import Path +f = Path(filename) +if not f.is_file(): sys.exit(1) + +size = f.stat().st_size + +if size < 1000: # Almost certainly empty + print(f"Found {f} with size {size}!") + sys.exit(1) + +sys.exit(0) diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripJobs.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripJobs.py new file mode 100755 index 0000000000000000000000000000000000000000..937c7e21d5a39812279ffdfb481025e78738dc20 --- /dev/null +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripJobs.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +# +# Sept 2022, E. Torrence +# +# Script to check NoisyStrips jobs for problems +# +# Usage: +# ./checkNoisyJobs.py -h +# +import sys +import argparse +from pathlib import Path + +import ROOT + +def parse_arguments(): + + description="Script to check noisy strip finding jobs\n" + parser = argparse.ArgumentParser(description, + formatter_class=argparse.RawTextHelpFormatter) + + + parser.add_argument("runs", nargs='+', help="Specify FASER runs or range") + + parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output") + parser.add_argument("--write_missing", action="store_true", help="Write out missing runs to file") + + return parser.parse_args() + +# Take a string and turn it into a list of integers +# Can specify single values, ranges, or comma separated lists of both +def parseRunList(runlist): + + run_list = [] + + # Check if this is a file with run numbers + if len(runlist) == 1: + path = Path(runlist[0]) + if path.exists() and path.is_file(): + print(f"Reading runs from {path}") + # Try reading each line as a run number + with path.open() as f: + for line in f.readlines(): + line = line.strip() + if len(line) == 0: continue + if line[0] in ['#', '!']: continue + if not line.isnumeric(): + print(f"Error parsing {line}") + continue + run_list.append(int(line)) + # Done reading file + return(run_list) + elif '-' in runlist[0]: + pass + elif ',' in runlist[0]: + pass + elif not runlist[0].isnumeric(): + print(f"File {path} doesn't exist!") + return run_list + + for string in runlist: + tokens = string.split(',') + + for segment in tokens: + + if len(segment) == 0: continue + + if '-' in segment: # Range of runs + start, end = segment.split('-') + if not start.isnumeric(): + print(f"Found invalid run {start}") + continue + if not end.isnumeric(): + print(f"Found invalid run {end}") + continue + start = int(start) + end = int(end) + run_list.extend(list(range(int(start), int(end)+1))) + + else: + if not segment.isnumeric(): + print(f"Found invalid run {segment}") + continue + run_list.append(int(segment)) + + return(run_list) + +class JobChecker: + + def __init__(self, run=None): + self.select_run(run) + + self.verbose = True + self.check_all = False + + self.check_function_list = [] + self.check_function_list.append(self.check_directory) + self.check_function_list.append(self.check_histograms) + self.check_function_list.append(self.check_dbfile) + + def select_run(self, run): + self.run_number = run + if run is None: return + self.run_string = f'{run:06d}' + + def check_run(self, run=None): + # Return true on error + + if run is not None: + self.select_run(run) + + if run is None: + print("No run specified!") + return True + + for func in self.check_function_list: + if func(): return True + + return False + + def check_directory(self): + + directory_path = Path(self.run_string) + if not directory_path.exists(): + print(f"* Directory {self.run_string} not found!") + elif self.verbose: + print(f" => Directory {self.run_string} found") + return( not directory_path.exists() ) + + def check_dbfile(self): + + dbfile_path = Path(f'{self.run_string}/noisy_{self.run_string}.db') + if not dbfile_path.exists(): + print(f"* Database file {dbfile_path} not found!") + elif self.verbose: + print(f" => Database file {dbfile_path} found") + return( not dbfile_path.exists() ) + + def check_histograms(self): + + # First, find the submit files + directory_path = Path(self.run_string) + + submit_list = directory_path.glob('noise*.sub') + + missing = False + + for filepath in submit_list: + + # Ignore DAG + if '.dag.' in str(filepath): continue + + filestem = filepath.stem + hist_file = directory_path / Path(f'{filestem}.root') + if hist_file.exists(): + if self.verbose: print(f" => Found histogram file {hist_file}") + + # Check number of events? + try: + f = ROOT.TFile.Open(str(hist_file), 'r') + except Exception as e: + print(e) + missing = True + else: + n = f.Get("numEvents").GetVal() + if self.verbose: + print(f"{hist_file} found with {n} entries") + if n == 0: + print(f"{hist_file} found with {n} entries") + missing = True + + continue + + # Histogram doesn't exist + missing = True + print(f"* Histogram file {hist_file} missing! ", end="") + + # See if we can figure out why + logfile_path = directory_path / Path(f'{filestem}.log') + if not logfile_path.exists(): + print("=> log file not found") + continue + + import subprocess + if subprocess.call(['/bin/grep', "Killed", f"{logfile_path}"], + stdout=subprocess.DEVNULL): + # True means no match + pass + + else: + # False means match + # See if we can find the time + rc = subprocess.run(['/bin/grep', 'Job finished after', f"{logfile_path}"], + stdout=subprocess.PIPE, + universal_newlines=True) + + if rc.returncode: + # Can't find running time + print("=> job appears to have been killed") + else: + timestr = rc.stdout.replace('Job finished after ', '') + print(f"=> job appears to have been killed after {timestr}") + continue + + # Can't figure out why + print('=> unknown problem') + + return missing + +# Command-line execution +if __name__ == "__main__": + + # Parse the command-line arguments + args = parse_arguments() + + run_list = parseRunList(args.runs) + run_list.sort() + + good_runs = [] + missing_runs = [] + + jc = JobChecker() + jc.verbose = args.verbose + + for runnum in run_list: + + if args.verbose: print(f"\nRun {runnum}") + + if jc.check_run(runnum): + missing_runs.append(runnum) + else: + good_runs.append(runnum) + + print(f"Found {len(good_runs)} good runs and {len(missing_runs)} missing runs") + if args.write_missing: + missing_file="missing_runs.txt" + with open(missing_file, "w") as f: + [f.write(f"{run}\n") for run in missing_runs] + print(f"Wrote {len(missing_runs)} missing runs to {missing_file}") diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/findFaserRunsByType.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/findFaserRunsByType.py new file mode 100755 index 0000000000000000000000000000000000000000..5a6809ea3556f4bd5d56fc492ae615941b631372 --- /dev/null +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/findFaserRunsByType.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# +# Sept 2022, E. Torrence +# +# Script to find FASER runs taken in Physics +# +# Usage: +# ./findRuns.py -h +# +import json +import argparse +import requests + +from pathlib import Path + +def parse_arguments(): + + description="Script to find PHYSICS runs in a range\n" + parser = argparse.ArgumentParser(description, + formatter_class=argparse.RawTextHelpFormatter) + + + parser.add_argument("runs", nargs='+', help="Specify FASER runs or range") + + parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output") + parser.add_argument("-o", "--output", default="findRuns.txt", help="Specify output file") + parser.add_argument("-t", "--type", default="Physics", help="Run type to match") + return parser.parse_args() + +# Take a string and turn it into a list of integers +# Can specify single values, ranges, or comma separated lists of both +def parseRunList(runlist): + + run_list = [] + + # Check if this is a file with run numbers + if len(runlist) == 1: + path = Path(runlist[0]) + if path.exists() and path.is_file(): + print(f"Reading runs from {path}") + # Try reading each line as a run number + with path.open() as f: + for line in f.readlines(): + line = line.strip() + if len(line) == 0: continue + if line[0] in ['#', '!']: continue + if not line.isnumeric(): + print(f"Error parsing {line}") + continue + run_list.append(int(line)) + # Done reading file + return(run_list) + + for string in runlist: + tokens = string.split(',') + + for segment in tokens: + + if len(segment) == 0: continue + + if '-' in segment: # Range of runs + start, end = segment.split('-') + start = int(start) + end = int(end) + run_list.extend(list(range(int(start), int(end)+1))) + + else: + run_list.append(int(segment)) + + return(run_list) + +# Command-line execution +if __name__ == "__main__": + + # Parse the command-line arguments + args = parse_arguments() + + run_list = parseRunList(args.runs) + run_list.sort() + + with open(args.output, "w") as f: + f.write(f"# findRuns.py") + [f.write(f" {run}") for run in args.runs] + f.write("\n") + + # faser-runinfo address + url = "https://faser-runinfo.app.cern.ch/cgibin/" + + # Cycle through range + for run in run_list: + query = f"{url}/getRunInfo.py?runno={run}" + response = requests.get(query) + + if not response.json(): + if args.verbose: + print(f"Couldn't find run {run}") + + continue + + run_type = response.json()['type'] + if args.verbose: + print(f"Run {run} has type {run_type}") + if run_type != args.type: continue + + with open(args.output, "a") as f: + f.write(f"{run}\n") + + # End of loop over runs diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/makeNoisyStripDB.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/makeNoisyStripDB.py index 11ab39ae5341432be8dd2f624496601294635e81..4d58e4f4df7d7c7be06f6290c9ce16cd09d94661 100755 --- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/makeNoisyStripDB.py +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/makeNoisyStripDB.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import sys @@ -9,8 +9,9 @@ from CoolConvUtilities.AtlCoolLib import indirectOpen parser = argparse.ArgumentParser() parser.add_argument("file", nargs="+", help="full path to input file") -parser.add_argument("-t", "--threshold", type=float, default=0.01, help="add strips with an occupancy larger this threshold to the database") +parser.add_argument("-t", "--threshold", type=float, default=0.001, help="add strips with an occupancy larger this threshold to the database") parser.add_argument("--force", "-f", action="store_true", help="Overwrite existing DB") +parser.add_argument("--permissive", action="store_true", help="Allow some input files to be missing") parser.add_argument("--output", "-o", default="noisy_strips.db", help="Specify output DB") parser.add_argument("--isMC", action="store_true", help="Write MC DB (default: real data") args = parser.parse_args() @@ -26,6 +27,7 @@ HistDict = {} ROOT.TH1.AddDirectory(0) # This is necessary in order to have the histogram data after closing the file +trigger = None iovlo = cool.ValidityKeyMax iovhi = cool.ValidityKeyMin @@ -33,8 +35,23 @@ iovhi = cool.ValidityKeyMin skipList = ["numEvents", "trigger", "IOVLoRun", "IOVLoLB", "IOVHiRun", "IOVHiLB"] for inputfile in args.file: - f = ROOT.TFile.Open(inputfile, "r") - numEvents += f.Get("numEvents").GetVal() + # Check that this exists + if not os.path.exists(inputfile) : + if args.permissive: continue + print(f"File {inputfile} not found!") + sys.exit(1) + + try: + f = ROOT.TFile.Open(inputfile, "r") + except Exception as e: + print(e) + if args.permissive: continue + sys.exit(1) + + n = f.Get("numEvents").GetVal() + print(f"Found {n} events in {inputfile}") + if n == 0: continue + numEvents += n lorun = f.Get("IOVLoRun").GetVal() hirun = f.Get("IOVHiRun").GetVal() lo = (lorun << 32) @@ -42,8 +59,13 @@ for inputfile in args.file: if lo < iovlo: iovlo = lo if hi > iovhi: iovhi = hi - if nfiles == 0: + if trigger is None: trigger = f.Get("trigger").GetVal() + else: + t = f.Get("trigger").GetVal() + if t != trigger: + print(f"Trigger mismatch! {t} != {trigger} in {inputfile}") + sys.exit(1) # This shouldn't happen for rootkey in f.GetKeyNames(): @@ -58,9 +80,8 @@ for inputfile in args.file: nfiles += 1 f.Close() -print("Total # of root files analyzed = ", nfiles) +print(f"Total {nfiles} analyzed with {numEvents} events") print(f"Trigger mask = 0x{trigger:02x}") -print("Total number of events = ", numEvents) print(f"IOV from {(iovlo >> 32)}/{(iovlo & 0xFFFFFFFF)} to {(iovhi >> 32)}/{(iovhi & 0xFFFFFFFF)}") # Write DB diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/mergeNoisyStripDB.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/mergeNoisyStripDB.py new file mode 100755 index 0000000000000000000000000000000000000000..121aedcc342604846ef9911559ef8b5efb0818a9 --- /dev/null +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/mergeNoisyStripDB.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# +# Sept 2022, E. Torrence +# +# Script to merge individual run DBs into a master DB +# +# Usage: +# ./mergeNoisyDBRuns.py -h +# +import sys +import argparse +import subprocess + +from pathlib import Path + +def parse_arguments(): + + description="Script to merge DBs from individual runs into one DB\n" + parser = argparse.ArgumentParser(description, + formatter_class=argparse.RawTextHelpFormatter) + + + parser.add_argument("runs", nargs='+', help="Specify FASER runs or range") + + parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output") + parser.add_argument("-a", "--append", action="store_true", help="Append (rather than overwrite) existing file") + parser.add_argument("-o", "--output", default="noisy_strips.db", help="Specify output DB name") + return parser.parse_args() + +# Take a string and turn it into a list of integers +# Can specify single values, ranges, or comma separated lists of both +def parseRunList(runlist): + + run_list = [] + + # Check if this is a file with run numbers + if len(runlist) == 1: + path = Path(runlist[0]) + if path.exists() and path.is_file(): + print(f"Reading runs from {path}") + # Try reading each line as a run number + with path.open() as f: + for line in f.readlines(): + line = line.strip() + if len(line) == 0: continue + if line[0] in ['#', '!']: continue + if not line.isnumeric(): + print(f"Error parsing {line}") + continue + run_list.append(int(line)) + # Done reading file + return(run_list) + elif '-' in runlist[0]: + pass + elif ',' in runlist[0]: + pass + elif not runlist[0].isnumeric(): + print(f"File {path} doesn't exist!") + return run_list + + for string in runlist: + tokens = string.split(',') + + for segment in tokens: + + if len(segment) == 0: continue + + if '-' in segment: # Range of runs + start, end = segment.split('-') + if not start.isnumeric(): + print(f"Found invalid run {start}") + continue + if not end.isnumeric(): + print(f"Found invalid run {end}") + continue + start = int(start) + end = int(end) + run_list.extend(list(range(int(start), int(end)+1))) + + else: + if not segment.isnumeric(): + print(f"Found invalid run {segment}") + continue + run_list.append(int(segment)) + + return(run_list) + + +# Command-line execution +if __name__ == "__main__": + + # Parse the command-line arguments + args = parse_arguments() + + run_list = parseRunList(args.runs) + run_list.sort() + + first = True + + for runnum in run_list: + + if args.verbose: print(f"\nRun {runnum}") + runstr = f'{runnum:06d}' + infile = Path(f'{runstr}/noisy_{runstr}.db') + if not infile.is_file(): + print(f"{runstr}/noisy_{runstr}.db doesn't exist!") + continue + + command = ['AtlCoolCopy'] + command.append(f'sqlite://;schema={runstr}/noisy_{runstr}.db;dbname=CONDBR3') + command.append(f'sqlite://;schema={args.output};dbname=CONDBR3') + if first: + first = False + target = Path(args.output) + if not target.is_file(): + print(f"Creating file {args.output}") + command.append("-create") + elif args.append: + print(f"Appending to existing file {args.output}") + else: + print(f"Deleting existing file {args.output}") + target.unlink() + command.append("-create") + + command.extend(["-alliov", "-nrls", f"{runnum}", "0"]) + + if args.verbose: print(command) + rc = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + universal_newlines=True) + + if args.verbose: print(rc.stdout) + diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/runFaserScript.sh b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/runFaserScript.sh new file mode 100755 index 0000000000000000000000000000000000000000..97d599b73e1046223cc0c6e938062b1206c6e82c --- /dev/null +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/runFaserScript.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# +# Wrapper to run any python script in the athena environment +# Typically used to set up a script for use in condor +# +# runFaserScript.sh --rel <release_directory> script.py arguments... +# +function print_usage { + echo "Usage: runFaserScript.sh --rel <release_directory> [--log <logfile>] script.py [arguments]" + echo " The first uption must be the release directory where asetup is called" + echo " All other options are passed to script.py" + echo " Options: " + echo " -h - print usage" + echo " --rel <release_directory> - specify release directory" + echo " --log <logfile> - redirect script output to logfile" + echo " -- End of options considered by this script" +} +# +release_directory="" +logfile="" +while [ -n "$1" ] +do + case "$1" in + -h | --help) + print_usage + exit 0;; + + --rel) + release_directory="$2"; + shift; + shift;; + + --log) + logfile="$2"; + shift; + shift;; + + --) # Signal that everything else should be executed + shift; + break;; + + *) + # Nothing we recognize, execute everything remaining + break;; + esac +done + +if [ -z "$release_directory" ]; then + echo "Must specify release" + print_usage + exit 1 +fi + +# Redirect to log file if requested +if [ ! -z "$logfile" ]; then +#logfile="post_`date +%m%d-%H%M%S`.log" + exec >& "$logfile" +fi +# +# Set up release +starting_directory=`pwd` +echo "cd $release_directory" +cd $release_directory +# +# Set up the release +export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase +# Must pass something or source will pass *this* script's arguments instead +source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh -- +echo "fsetup Athena,22.0.49" +asetup --input=calypso/asetup.faser Athena,22.0.49 +echo "source run/setup.sh" +source run/setup.sh +# +# Go back to where we started +echo "cd $starting_directory" +cd $starting_directory +# +# Now run the command +echo "$@" +eval "$@" diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/submitNoisyStripJobs.py b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/submitNoisyStripJobs.py new file mode 100755 index 0000000000000000000000000000000000000000..ed780ab1b21c584180be5c0fab4dd8685a3c0abc --- /dev/null +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/submitNoisyStripJobs.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 +# +# Sept 2022, E. Torrence +# +# Script to run noisy strip finder on a given run +# This creates condor submitssion scripts and a DAG +# and submits those to run the jobs +# +# Usage: +# submitNoisyStripJobs.py -h +# +import os +import sys +import argparse +import subprocess + +from pathlib import Path + +def parse_arguments(): + + description="Script to submit jobs to find noisy strips" + parser = argparse.ArgumentParser(description, + formatter_class=argparse.RawTextHelpFormatter) + + + parser.add_argument("runs", nargs='+', help="Specify FASER runs") + + parser.add_argument("--per_job", type=int, default=25, help="Specify maximum files per job") + parser.add_argument("--release", default='.', help="Specify path to release directory") + parser.add_argument("--nosubmit", action="store_true", help="Don't submit jobs") + parser.add_argument("--nocleanup", action="store_true", help="Don't cleanup output directory on completion") + parser.add_argument("--queue", default="longlunch", help="Specify queue (longlunch=2h (default), workday=8h)") + parser.add_argument("--rawdir", default="/eos/experiment/faser/raw/2022", + help="Specify raw data directory (default: /eos/experiment/faser/raw/2022)") + + return parser.parse_args() + +# Take a string and turn it into a list of integers +# Can specify single values, ranges, or comma separated lists of both +def parseRunList(runlist): + + run_list = [] + + # Check if this is a file with run numbers + if len(runlist) == 1: + path = Path(runlist[0]) + if path.exists() and path.is_file(): + print(f"Reading runs from {path}") + # Try reading each line as a run number + with path.open() as f: + for line in f.readlines(): + line = line.strip() + if len(line) == 0: continue + if line[0] in ['#', '!']: continue + if not line.isnumeric(): + print(f"Error parsing {line}") + continue + run_list.append(int(line)) + # Done reading file + return(run_list) + + for string in runlist: + tokens = string.split(',') + + for segment in tokens: + + if len(segment) == 0: continue + + if '-' in segment: # Range of runs + start, end = segment.split('-') + start = int(start) + end = int(end) + run_list.extend(list(range(int(start), int(end)+1))) + + else: + run_list.append(int(segment)) + + return(run_list) + +# Command-line execution +if __name__ == "__main__": + + # Parse the command-line arguments + args = parse_arguments() + + run_list = parseRunList(args.runs) + run_list.sort() + + # Check some things + rel_dir = Path(args.release) + package_dir = rel_dir / Path("calypso/Tracker/TrackerRecAlgs/NoisyStripFinder") + + # Script to allow python scripts to be run in condor in the FASER environment + env_exec = package_dir / Path("share/runFaserScript.sh") + + if not env_exec.exists(): + print(f"Can't find executable in release directory {args.release}") + sys.exit(1) + + print(f"Start processing {len(run_list)} runs") + + for run in run_list: + print(f"Working on run {run}") + + runstr = f"{run:06d}" + + # Get file list + raw_dir = Path(f'{args.rawdir}/{runstr}') + file_list = list(raw_dir.glob("Faser-Physics*.raw")) + + # Now we need to decide what to do + nraw = len(file_list) + njobs = (nraw-1) // args.per_job + 1 + if njobs == 1: + print(f"{nraw} raw files found, submitting {njobs} job") + else: + print(f"{nraw} raw files found, submitting {njobs} jobs") + + if njobs == 0: continue + + # Create a directory for this + jobdir = Path(runstr) + if jobdir.exists(): + print(f"Directory {jobdir} exists, deleting...") + import shutil + shutil.rmtree(jobdir.resolve()) + + jobdir.mkdir(exist_ok=True) + submit_list = [] + + # Start the DAG file + dagfile = jobdir / Path(f"noise_{runstr}.dag") + with open(dagfile, 'w') as d: + d.write(f"# Auto-generated DAG submission script for {runstr}\n") + + for job in range(njobs): + + jobstr = f"{job:03d}" + if njobs == 1: + jobname = f"noise_{runstr}" + else: + jobname = f"noise_{runstr}_{jobstr}" + subfile = jobdir / Path(jobname+".sub") + + ilo = job * args.per_job + ihi = ilo + args.per_job + job_files = file_list[ilo:ihi] + + # + # Generate a job submission script + print(f"Writing {subfile}") + submit_list.append(jobname) + with open(subfile, "w") as f: + f.write(f"# Auto-generated submission script for {jobname}\n") + # Set the queue workday = 8h, longlunch = 2h might be enough + f.write(f'+JobFlavour = "{args.queue}"\n') + f.write(f"executable = {env_exec.resolve()}\n") + f.write(f"output = {jobdir.resolve()}/{jobname}.out\n") + f.write(f"error = {jobdir.resolve()}/{jobname}.err\n") + f.write(f"log = {jobdir.resolve()}/{jobname}.log\n") + # No newline as we need to add input files + f.write(f"arguments = --rel {rel_dir.resolve()} NoisyStripFinderJob.py --out {jobname}.root ") + [f.write(f" {filename}") for filename in job_files] + f.write("\n") + f.write("queue") + + # Also add this to our DAG + with open(dagfile, 'a') as d: + d.write(f"JOB {jobname} {subfile.name}\n") + # Also check that the histogram isn't empty + # This can fix some file read errors + d.write(f"SCRIPT POST {jobname} {env_exec.resolve()} --rel {rel_dir.resolve()} checkNoisyStripHist.py {jobname}.root $RETURN\n") + + # Done writing individual jobs + + # Add the merge job to the DAG + with open(dagfile, 'a') as d: + d.write(f"JOB merge_{runstr} merge_{runstr}.sub\n") + d.write("PARENT") + for jobname in submit_list: + d.write(f" {jobname}") + d.write(f" CHILD merge_{runstr}\n") + # Add a retry directive + d.write(f"RETRY ALL_NODES 1\n") + + # Write the merge job submit script + jobname = f"merge_{runstr}" + subfile = jobdir / Path(jobname+".sub") + with open(subfile, "w") as f: + f.write(f"# Auto-generated submission script for {jobname}\n") + f.write(f"output = {jobdir.resolve()}/{jobname}.out\n") + f.write(f"error = {jobdir.resolve()}/{jobname}.err\n") + f.write(f"log = {jobdir.resolve()}/{jobname}.log\n") + + #f.write('+JobFlavour = "workday"\n') # 8 hours, longlunch might be enough + #f.write(f"executable = {hist_exec.resolve()}\n") + f.write(f"executable = {env_exec.resolve()}\n") + + # No newline as we need to add input files + # f.write(f"arguments = --rel {rel_dir.resolve()} --force -o noisy_{runstr}.db") + f.write(f"arguments = --rel {rel_dir.resolve()} makeNoisyStripDB.py --force -o noisy_{runstr}.db") + [f.write(f" {filename}.root") for filename in submit_list] + f.write("\n") + + # Provide files to transfer + f.write(f"transfer_input_files = {submit_list[0]}.root") + [f.write(f",{filename}.root") for filename in submit_list[1:]] + f.write("\n") + f.write(f"should_transfer_files = IF_NEEDED\n") + + # Don't forget queue command + f.write("queue") + + # Do we want a cleanup script? + if not args.nocleanup: + with open(dagfile, 'a') as d: + d.write(f"SCRIPT POST merge_{runstr} cleanup.sh $RETURN\n") + + cleanup_file = jobdir / Path("cleanup.sh") + with open(cleanup_file, 'w') as f: + f.write("#!/bin/bash\n") + f.write('if [[ $1 != "0" ]]; then\n') + f.write(' exit $1\n') + f.write('fi\n') + # f.write('rm noise_{runstr}.dag.* \n') + f.write('rm *.log\n') + f.write('rm *.err\n') + f.write('rm eventLoopHeartBeat.txt\n') + f.write('rm *.cc\n') + for job in submit_list: + f.write(f'gzip {job}.out\n') + #f.write('gzip merge*.out\n') + f.write('exit 0\n') + + # And make it executable + import stat + cleanup_file.chmod(cleanup_file.stat().st_mode | stat.S_IEXEC) + + if not args.nosubmit: + print(f"Submitting noise_{runstr}.dag") + startdir = os.getcwd() + os.chdir(jobdir) + + # lxplus python3 is 3.6.8, so use old subprocess.run arguments + proc = subprocess.run(["/usr/bin/condor_submit_dag", f"noise_{runstr}.dag"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + + if len(proc.stdout) > 0: + print(proc.stdout) + if len(proc.stderr) > 0: + print(proc.stderr) + + # Chaange back to our starting point + os.chdir(startdir) diff --git a/Tracker/TrackerRecAlgs/NoisyStripFinder/src/NoisyStripFinder.cxx b/Tracker/TrackerRecAlgs/NoisyStripFinder/src/NoisyStripFinder.cxx index f665127f964ee94b71ac2d0eeb00fc027e280a2d..c5f3f907852e40b072151c5e53c94ebdf6b3ea1b 100644 --- a/Tracker/TrackerRecAlgs/NoisyStripFinder/src/NoisyStripFinder.cxx +++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/src/NoisyStripFinder.cxx @@ -56,7 +56,7 @@ StatusCode NoisyStripFinder::execute(const EventContext& ctx) const { if (!(xaod->tap() & m_triggerMask.value())) return StatusCode::SUCCESS; // only process events that pass the trigger mask - ATH_MSG_INFO("trigger passed mask"); + ATH_MSG_DEBUG("trigger passed mask"); ++m_numberOfEvents; // Keep track of run @@ -116,12 +116,12 @@ StatusCode NoisyStripFinder::execute(const EventContext& ctx) const { StatusCode NoisyStripFinder::finalize() { ATH_MSG_INFO("NoisyStripFinder::finalize()"); - ATH_MSG_INFO( m_numberOfEvents << " events processed" ); + ATH_MSG_INFO( m_numberOfEvents << " events found" ); ATH_MSG_INFO( m_numberOfRDOCollection << " RDO collections processed" ); ATH_MSG_INFO( m_numberOfRDO<< " RawData" ); - ATH_MSG_INFO( "Number of sensors found = " << NoisyStrip_histmap.size() << " out of 144" ); + ATH_MSG_INFO( "Number of sensors found = " << NoisyStrip_histmap.size() << " out of 192" ); - for (int ihash = 0; ihash < 144; ++ihash){ // print out the sensors that are missing + for (int ihash = 0; ihash < 192; ++ihash){ // print out the sensors that are missing if ( NoisyStrip_histmap.count(ihash) == 0 ){ ATH_MSG_INFO("missing sensor # " << ihash); } @@ -166,6 +166,7 @@ StatusCode NoisyStripFinder::finalize() ATH_MSG_INFO( "---------- hot strip occupancy >= 0.1 for Tracker Sensor hash = "<< it->first <<" ----------" ); int i = 1; while (i <= 768){ + // This is only for information if ( it->second->GetBinContent(i)/(double)m_numberOfEvents >= 0.01 ){ ATH_MSG_INFO( "hot strip # = " << i-1 << ", hit occupancy = " << it->second->GetBinContent(i)/(double)m_numberOfEvents ); // print out hot strips }