Merge branch 'faserrec-noise' into 'master'

NoisyStripFinder update See merge request !272

Merge branch 'faserrec-noise' into 'master'
df3d4685 · Eric Torrence · 9ab6714d · 524ca88e · df3d4685 · df3d4685
Commit df3d4685 authored 2 years ago by Eric Torrence
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/CMakeLists.txt
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/CMakeLists.txt
@@ -17,5 +17,5 @@ atlas_add_component( NoisyStripFinder

 atlas_install_python_modules( python/*.py )

-atlas_install_scripts( share/*.py test/*.py )
+atlas_install_scripts( share/*.py share/*.sh )

--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/README.md
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/README.md
+Noisy strips are now part of the conditions database and are used in default reco
+
+To update the database, the following steps should be used.
+
+1) Determine the runs to update.  
+A handy utility to find all runs of a given type is findFaserRunsByType.py:
+
+`findFaserRunsByType.py -t Physics -o physics_runs.txt 7730-8370`
+
+2) Submit jobs to create the noisy strip DB for each run
+From a release directory, best to create a subdirectory for this and then:
+
+`submitNoisyStripJobs.py --release .. physics_runs.txt`
+
+Can also just specify run numbers (or a range) instead of a text file of runs
+
+If you want to do this interactively, this script submits jobs to run the following:
+`NoisyStripFinderJob.py`
+`makeNoisyStripDB.py`
+
+3) Check that all of the jobs finished successfully (or can check individual runs)
+
+`checkNoisyStripJobs.py physics_runs.txt`
+
+This can also write out a file of runs to submit again
+
+4) Check that the actual noisy strips found make sense
+This reads the individual DB files in each subdirectory, can also specify specific runs
+
+`checkNoisyStripDB.py physics_runs.txt`
+
+5) Merge the individual runs into a single DB
+Note this script writes the individual runs with open-ended IOVs. 
+This means the last run merged will be used for all later data until the DB is updated again.  
+So if you are merging multiple times (with the --append option), it is important to do ths in chronological order.  
+
+`mergeNoisyStripDB.py physics_runs.txt`
+
+Note, this can be slow.  Use --verbose to watch the progress.
+
+6) Test the database
+The resulting database by default has the name noisy_strips.db.  
+This can be copied to the data/sqlite200 subdirectory of the working directory and reco jobs will use this.
+Check here for details:
+https://gitlab.cern.ch/faser/calypso/-/blob/master/Database/ConnectionManagement/FaserAuthentication/data/dblookup.xml
+
+7) Merge with production DB
+This updated noisy strips folder /SCT/DAQ/NoisyStrips now needs to be merged into the production DB.  
+First copy the current DB from CVMFS to some local directory.
+
+`cp /cvmfs/faser.cern.ch/repo/sw/database/DBRelease/current/sqlite200/ALLP200.db .`
+
+Next, use AtlCoolCopy to merge the updates into this file:
+
+`AtlCoolCopy "sqlite://;schema=noisy_strips.db;dbname=CONDBR3" "sqlite://;schema=ALLP200.db;dbname=CONDBR3" `
+
+This can also be slow.
+
+Finally, the ALLP200.db file should be installed on cvmfs once everything is verified to be correct.
+
+Older instructions from Tobias when he was developing this package are here:
+
 Mask noisy strips:

 1) Run the NoisyStripFinderDbg.py on raw data files

--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/test/NoisyStripFinderJob.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/test/NoisyStripFinderJob.py
@@ -63,10 +63,18 @@ replicaSvc.UseCOOLSQLite = True
 replicaSvc.UseCOOLFrontier = False
 replicaSvc.UseGeomSQLite = True

+# Don't print out every event
+from AthenaConfiguration.ComponentFactory import CompFactory
+eventLoop = CompFactory.AthenaEventLoopMgr()
+eventLoop.EventPrintoutInterval = 1000
+acc.addService(eventLoop)
+
 if args.verbose:
    acc.foreach_component("*").OutputLevel = VERBOSE
+    acc.printConfig()
 else:
    acc.foreach_component("*").OutputLevel = INFO

 sc = acc.run(maxEvents=args.nevents)
+print(f"Job finished with {sc.isSuccess()} => {not sc.isSuccess()}")
 sys.exit(not sc.isSuccess())
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripDB.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripDB.py
+#!/usr/bin/env python3
+#
+# Sept 2022, E. Torrence
+#
+# Script to merge individual run DBs into a master DB
+#
+# Usage:
+#  ./mergeNoisyDBRuns.py -h
+#
+import sys
+import argparse
+import subprocess
+
+from pathlib import Path
+
+from PyCool import cool
+from CoolConvUtilities.AtlCoolLib import indirectOpen
+
+def parse_arguments():
+
+    description="Script to merge DBs from individual runs into one DB\n"
+    parser = argparse.ArgumentParser(description,
+                                     formatter_class=argparse.RawTextHelpFormatter)
+
+
+    parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
+    parser.add_argument("--threshold", default="0.01", help="Threshold to calls trip noisy (0.01)")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
+    return parser.parse_args()
+
+# Take a string and turn it into a list of integers
+# Can specify single values, ranges, or comma separated lists of both
+def parseRunList(runlist):
+
+    run_list = []
+
+    # Check if this is a file with run numbers
+    if len(runlist) == 1:
+        path = Path(runlist[0])
+        if path.exists() and path.is_file():
+            print(f"Reading runs from {path}")
+            # Try reading each line as a run number
+            with path.open() as f: 
+                for line in f.readlines():
+                    line = line.strip()
+                    if len(line) == 0: continue
+                    if line[0] in ['#', '!']: continue
+                    if not line.isnumeric():
+                        print(f"Error parsing {line}")
+                        continue
+                    run_list.append(int(line))
+            # Done reading file
+            return(run_list)
+        elif '-' in runlist[0]:
+            pass
+        elif ',' in runlist[0]:
+            pass
+        elif not runlist[0].isnumeric():
+            print(f"File {path} doesn't exist!")
+            return run_list
+
+    for string in runlist:
+        tokens = string.split(',')
+
+        for segment in tokens:
+
+            if len(segment) == 0: continue
+
+            if '-' in segment:  # Range of runs
+                start, end = segment.split('-')
+                if not start.isnumeric():
+                    print(f"Found invalid run {start}")
+                    continue
+                if not end.isnumeric():
+                    print(f"Found invalid run {end}")
+                    continue
+                start = int(start)
+                end = int(end)
+                run_list.extend(list(range(int(start), int(end)+1)))
+
+            else:
+                if not segment.isnumeric():
+                    print(f"Found invalid run {segment}")
+                    continue
+                run_list.append(int(segment))
+
+    return(run_list)
+
+class NoisyRunAnalyzer:
+
+    def __init__(self, verbose=False, threshold=0.01):
+        self.verbose = verbose
+
+        self.run_dict = {}
+        self.noise_threshold = threshold
+
+    def addRun(self, runnum):
+
+        if self.verbose: print(f"\nRun {runnum}")
+
+        runstr = f'{runnum:06d}'
+        infile = Path(f'{runstr}/noisy_{runstr}.db')
+        if not infile.is_file():
+            print(f"{runstr}/noisy_{runstr}.db doesn't exist!")
+            return
+
+        db_string = f'sqlite://;schema={runstr}/noisy_{runstr}.db;dbname=CONDBR3'
+        try:
+            self.db = indirectOpen(db_string, readOnly=True, oracle=False, debug=False)
+        except Exception as e:
+            print(e)
+            return
+
+        # Now read all channels
+        folder_string = "/SCT/DAQ/NoisyStrips"
+        try:
+            self.folder = self.db.getFolder(folder_string)
+        except Exception as e:
+            print(e)
+            return
+
+        if self.folder is None:
+            print(f"Can't access folder {folder_string} in {db_string}")
+            return
+
+        channels = cool.ChannelSelection.all()
+        iov_lo = (runnum<<32)
+        iov_hi = ((runnum+1)<<32) - 1
+        tag = ''
+
+        try:
+            itr = self.folder.browseObjects(iov_lo, iov_hi, channels, tag)
+        except Exception as e:
+            print(e)
+            return
+
+        rd = self.run_dict.get(runnum, None)
+        if rd is None:
+            self.run_dict[runnum] = {}
+            
+        # Now iterate through objects (should only be one IOV, but multiple channels)
+        while itr.goToNext():
+            obj = itr.currentRef()
+            if self.verbose: print(obj.payload())
+
+            sensor = obj.payload()['sensor']
+            strip = obj.payload()['strip']
+            occupancy = obj.payload()['occupancy']
+
+            if occupancy < self.noise_threshold: continue
+
+            sensor_dict = self.run_dict[runnum].get(sensor, None)
+            if sensor_dict is None:
+                self.run_dict[runnum][sensor] = {}
+
+            strip_dict = self.run_dict[runnum][sensor].get(strip, None)
+            if strip_dict is None:
+                self.run_dict[runnum][sensor][strip] = {}
+
+            self.run_dict[runnum][sensor][strip] = occupancy
+
+        if self.verbose: print(self.run_dict)
+
+        # Done, close the database
+        self.db.closeDatabase()
+
+    def printRunSummary(self):
+
+        for run in self.run_dict:
+
+            #print(f"Run {run}: {len(self.run_dict[run])} sensors with noisy strips")
+
+            noisy_by_layer = [0] * 12
+            noisy_strips_by_layer = [0] * 12
+
+            for sensor in self.run_dict[run]:
+                layer = sensor // 16
+                noisy_by_layer[layer] += 1
+                noisy_strips_by_layer[layer] += len(self.run_dict[run][sensor])
+
+            #print(f"Sensors by layer: ", end='')
+            #[ print(f' {n:3d}', end='') for n in noisy_by_layer]
+            #print()
+
+            print(f"Run {run} strips > {100*self.noise_threshold:3.1f}% by layer: ", end='')
+            [ print(f' {n:3d}', end='') for n in noisy_strips_by_layer]
+            print()
+
+
+# Command-line execution
+if __name__ == "__main__":
+
+    # Parse the command-line arguments
+    args = parse_arguments()
+
+    run_list = parseRunList(args.runs)
+    run_list.sort()
+
+    nra = NoisyRunAnalyzer(verbose=args.verbose, threshold=float(args.threshold))
+
+    for runnum in run_list:
+        nra.addRun(runnum)
+
+    nra.printRunSummary()
+
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripHist.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripHist.py
+#!/usr/bin/env python3
+#
+# Simple utility to run as a post script
+# after the noisyStripFinder
+#
+# Usage: checkNoisyStripHist.py <histfile> [return code]
+#
+import sys
+if len(sys.argv) <= 1:
+    sys.exit(1)
+filename = sys.argv[1]
+#
+# Check previous return code if it is provided
+if len(sys.argv) >= 3:
+    rc = int(sys.argv[2])
+    if rc: sys.exit(rc)
+#
+# Check histogram entries
+# This causes a segfault.  Lets try without ROOT
+#from ROOT import TFile
+if False:
+    import ROOT
+    try:
+        f = ROOT.TFile.Open(filename, 'r')
+    except Exception as e:
+        print(e)
+        sys.exit(1)
+    else:
+        n = f.Get("numEvents").GetVal()
+        print(f"Found {filename} with {n} entries")
+        sys.exit(n==0)
+
+from pathlib import Path
+f = Path(filename)
+if not f.is_file(): sys.exit(1)
+
+size = f.stat().st_size
+
+if size < 1000:   # Almost certainly empty
+    print(f"Found {f} with size {size}!")
+    sys.exit(1)
+
+sys.exit(0)
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripJobs.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/checkNoisyStripJobs.py
+#!/usr/bin/env python3
+#
+# Sept 2022, E. Torrence
+#
+# Script to check NoisyStrips jobs for problems
+#
+# Usage:
+#  ./checkNoisyJobs.py -h
+#
+import sys
+import argparse
+from pathlib import Path
+
+import ROOT
+
+def parse_arguments():
+
+    description="Script to check noisy strip finding jobs\n"
+    parser = argparse.ArgumentParser(description,
+                                     formatter_class=argparse.RawTextHelpFormatter)
+
+
+    parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
+
+    parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
+    parser.add_argument("--write_missing", action="store_true", help="Write out missing runs to file")
+
+    return parser.parse_args()
+
+# Take a string and turn it into a list of integers
+# Can specify single values, ranges, or comma separated lists of both
+def parseRunList(runlist):
+
+    run_list = []
+
+    # Check if this is a file with run numbers
+    if len(runlist) == 1:
+        path = Path(runlist[0])
+        if path.exists() and path.is_file():
+            print(f"Reading runs from {path}")
+            # Try reading each line as a run number
+            with path.open() as f: 
+                for line in f.readlines():
+                    line = line.strip()
+                    if len(line) == 0: continue
+                    if line[0] in ['#', '!']: continue
+                    if not line.isnumeric():
+                        print(f"Error parsing {line}")
+                        continue
+                    run_list.append(int(line))
+            # Done reading file
+            return(run_list)
+        elif '-' in runlist[0]:
+            pass
+        elif ',' in runlist[0]:
+            pass
+        elif not runlist[0].isnumeric():
+            print(f"File {path} doesn't exist!")
+            return run_list
+
+    for string in runlist:
+        tokens = string.split(',')
+
+        for segment in tokens:
+
+            if len(segment) == 0: continue
+
+            if '-' in segment:  # Range of runs
+                start, end = segment.split('-')
+                if not start.isnumeric():
+                    print(f"Found invalid run {start}")
+                    continue
+                if not end.isnumeric():
+                    print(f"Found invalid run {end}")
+                    continue
+                start = int(start)
+                end = int(end)
+                run_list.extend(list(range(int(start), int(end)+1)))
+
+            else:
+                if not segment.isnumeric():
+                    print(f"Found invalid run {segment}")
+                    continue
+                run_list.append(int(segment))
+
+    return(run_list)
+
+class JobChecker:
+
+    def __init__(self, run=None):
+        self.select_run(run)
+
+        self.verbose = True
+        self.check_all = False
+
+        self.check_function_list = []
+        self.check_function_list.append(self.check_directory)
+        self.check_function_list.append(self.check_histograms)
+        self.check_function_list.append(self.check_dbfile)
+
+    def select_run(self, run):
+        self.run_number = run
+        if run is None: return
+        self.run_string = f'{run:06d}'
+
+    def check_run(self, run=None):
+        # Return true on error
+
+        if run is not None:
+            self.select_run(run)
+
+        if run is None:
+            print("No run specified!")
+            return True
+
+        for func in self.check_function_list:
+            if func(): return True
+
+        return False
+
+    def check_directory(self):
+
+        directory_path = Path(self.run_string)
+        if not directory_path.exists():
+            print(f"* Directory {self.run_string} not found!")
+        elif self.verbose:
+            print(f" => Directory {self.run_string} found")
+        return( not directory_path.exists() )
+
+    def check_dbfile(self):
+
+        dbfile_path = Path(f'{self.run_string}/noisy_{self.run_string}.db')
+        if not dbfile_path.exists():
+            print(f"* Database file {dbfile_path} not found!")
+        elif self.verbose:
+            print(f" => Database file {dbfile_path} found")
+        return( not dbfile_path.exists() )
+
+    def check_histograms(self):
+
+        # First, find the submit files
+        directory_path = Path(self.run_string)
+
+        submit_list = directory_path.glob('noise*.sub')
+
+        missing = False
+
+        for filepath in submit_list:
+
+            # Ignore DAG
+            if '.dag.' in str(filepath): continue
+
+            filestem = filepath.stem
+            hist_file = directory_path / Path(f'{filestem}.root')
+            if hist_file.exists():
+                if self.verbose: print(f" => Found histogram file {hist_file}")
+
+                # Check number of events?
+                try:
+                    f = ROOT.TFile.Open(str(hist_file), 'r')
+                except Exception as e:
+                    print(e)
+                    missing = True
+                else:
+                    n = f.Get("numEvents").GetVal()
+                    if self.verbose:
+                        print(f"{hist_file} found with {n} entries")
+                    if n == 0:
+                        print(f"{hist_file} found with {n} entries")
+                        missing = True
+
+                continue
+
+            # Histogram doesn't exist
+            missing = True
+            print(f"* Histogram file {hist_file} missing! ", end="")
+
+            # See if we can figure out why
+            logfile_path = directory_path / Path(f'{filestem}.log')
+            if not logfile_path.exists():
+                print("=> log file not found") 
+                continue
+
+            import subprocess
+            if subprocess.call(['/bin/grep', "Killed", f"{logfile_path}"], 
+                               stdout=subprocess.DEVNULL):
+                # True means no match
+                pass
+
+            else:
+                # False means match
+                # See if we can find the time
+                rc = subprocess.run(['/bin/grep', 'Job finished after', f"{logfile_path}"],
+                                    stdout=subprocess.PIPE,
+                                    universal_newlines=True)
+
+                if rc.returncode:
+                    # Can't find running time
+                    print("=> job appears to have been killed")
+                else:
+                    timestr = rc.stdout.replace('Job finished after ', '')
+                    print(f"=> job appears to have been killed after {timestr}")
+                continue
+
+            # Can't figure out why
+            print('=> unknown problem')
+
+        return missing
+
+# Command-line execution
+if __name__ == "__main__":
+
+    # Parse the command-line arguments
+    args = parse_arguments()
+
+    run_list = parseRunList(args.runs)
+    run_list.sort()
+
+    good_runs = []
+    missing_runs = []
+
+    jc = JobChecker()
+    jc.verbose = args.verbose
+
+    for runnum in run_list:
+
+        if args.verbose: print(f"\nRun {runnum}")
+
+        if jc.check_run(runnum):
+            missing_runs.append(runnum)
+        else:
+            good_runs.append(runnum)
+
+    print(f"Found {len(good_runs)} good runs and {len(missing_runs)} missing runs")
+    if args.write_missing:
+        missing_file="missing_runs.txt"
+        with open(missing_file, "w") as f:
+            [f.write(f"{run}\n") for run in missing_runs]
+        print(f"Wrote {len(missing_runs)} missing runs to {missing_file}")
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/findFaserRunsByType.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/findFaserRunsByType.py
+#!/usr/bin/env python3
+#
+# Sept 2022, E. Torrence
+#
+# Script to find FASER runs taken in Physics
+#
+# Usage:
+#  ./findRuns.py -h
+#
+import json
+import argparse
+import requests
+
+from pathlib import Path
+
+def parse_arguments():
+
+    description="Script to find PHYSICS runs in a range\n"
+    parser = argparse.ArgumentParser(description,
+                                     formatter_class=argparse.RawTextHelpFormatter)
+
+
+    parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
+
+    parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
+    parser.add_argument("-o", "--output", default="findRuns.txt", help="Specify output file")
+    parser.add_argument("-t", "--type", default="Physics", help="Run type to match")
+    return parser.parse_args()
+
+# Take a string and turn it into a list of integers
+# Can specify single values, ranges, or comma separated lists of both
+def parseRunList(runlist):
+
+    run_list = []
+
+    # Check if this is a file with run numbers
+    if len(runlist) == 1:
+        path = Path(runlist[0])
+        if path.exists() and path.is_file():
+            print(f"Reading runs from {path}")
+            # Try reading each line as a run number
+            with path.open() as f: 
+                for line in f.readlines():
+                    line = line.strip()
+                    if len(line) == 0: continue
+                    if line[0] in ['#', '!']: continue
+                    if not line.isnumeric():
+                        print(f"Error parsing {line}")
+                        continue
+                    run_list.append(int(line))
+            # Done reading file
+            return(run_list)
+
+    for string in runlist:
+        tokens = string.split(',')
+
+        for segment in tokens:
+
+            if len(segment) == 0: continue
+
+            if '-' in segment:  # Range of runs
+                start, end = segment.split('-')
+                start = int(start)
+                end = int(end)
+                run_list.extend(list(range(int(start), int(end)+1)))
+
+            else:
+                run_list.append(int(segment))
+
+    return(run_list)
+
+# Command-line execution
+if __name__ == "__main__":
+
+    # Parse the command-line arguments
+    args = parse_arguments()
+
+    run_list = parseRunList(args.runs)
+    run_list.sort()
+
+    with open(args.output, "w") as f:
+        f.write(f"# findRuns.py")
+        [f.write(f" {run}") for run in args.runs]
+        f.write("\n")
+
+    # faser-runinfo address
+    url = "https://faser-runinfo.app.cern.ch/cgibin/"
+
+    # Cycle through range
+    for run in run_list:
+        query = f"{url}/getRunInfo.py?runno={run}"
+        response = requests.get(query)
+        
+        if not response.json():
+            if args.verbose:
+                print(f"Couldn't find run {run}")
+
+            continue
+
+        run_type = response.json()['type']
+        if args.verbose:
+            print(f"Run {run} has type {run_type}")
+        if run_type != args.type: continue
+
+        with open(args.output, "a") as f:
+            f.write(f"{run}\n")
+
+    # End of loop over runs
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/makeNoisyStripDB.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/makeNoisyStripDB.py
-#!/usr/bin/env python
+#!/usr/bin/env python3

 import os
 import sys
@@ -9,8 +9,9 @@ from CoolConvUtilities.AtlCoolLib import indirectOpen

 parser = argparse.ArgumentParser()
 parser.add_argument("file", nargs="+", help="full path to input file")
-parser.add_argument("-t", "--threshold", type=float, default=0.01, help="add strips with an occupancy larger this threshold to the database")
+parser.add_argument("-t", "--threshold", type=float, default=0.001, help="add strips with an occupancy larger this threshold to the database")
 parser.add_argument("--force", "-f", action="store_true", help="Overwrite existing DB")
+parser.add_argument("--permissive", action="store_true", help="Allow some input files to be missing")
 parser.add_argument("--output", "-o", default="noisy_strips.db", help="Specify output DB")
 parser.add_argument("--isMC", action="store_true", help="Write MC DB (default: real data")
 args = parser.parse_args()
@@ -26,6 +27,7 @@ HistDict = {}

 ROOT.TH1.AddDirectory(0) # This is necessary in order to have the histogram data after closing the file

+trigger = None
 iovlo = cool.ValidityKeyMax
 iovhi = cool.ValidityKeyMin

@@ -33,8 +35,23 @@ iovhi = cool.ValidityKeyMin
 skipList = ["numEvents", "trigger", "IOVLoRun", "IOVLoLB", "IOVHiRun", "IOVHiLB"]

 for inputfile in args.file:
-    f = ROOT.TFile.Open(inputfile, "r")
-    numEvents += f.Get("numEvents").GetVal()
+    # Check that this exists
+    if not os.path.exists(inputfile) :
+        if args.permissive: continue
+        print(f"File {inputfile} not found!")
+        sys.exit(1)
+
+    try:
+        f = ROOT.TFile.Open(inputfile, "r")
+    except Exception as e:
+        print(e)
+        if args.permissive: continue
+        sys.exit(1)
+
+    n = f.Get("numEvents").GetVal()
+    print(f"Found {n} events in {inputfile}")
+    if n == 0: continue
+    numEvents += n
    lorun = f.Get("IOVLoRun").GetVal()
    hirun = f.Get("IOVHiRun").GetVal()
    lo = (lorun << 32)
@@ -42,8 +59,13 @@ for inputfile in args.file:
    if lo < iovlo: iovlo = lo
    if hi > iovhi: iovhi = hi

-    if nfiles == 0:
+    if trigger is None:
        trigger = f.Get("trigger").GetVal()
+    else:
+        t = f.Get("trigger").GetVal()
+        if t != trigger:
+            print(f"Trigger mismatch! {t} != {trigger} in {inputfile}")
+            sys.exit(1)  # This shouldn't happen

    for rootkey in f.GetKeyNames():

@@ -58,9 +80,8 @@ for inputfile in args.file:
    nfiles += 1
    f.Close()

-print("Total # of root files analyzed = ", nfiles)
+print(f"Total {nfiles} analyzed with {numEvents} events")
 print(f"Trigger mask = 0x{trigger:02x}")
-print("Total number of events = ", numEvents)
 print(f"IOV from {(iovlo >> 32)}/{(iovlo & 0xFFFFFFFF)} to {(iovhi >> 32)}/{(iovhi & 0xFFFFFFFF)}")

 # Write DB

--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/mergeNoisyStripDB.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/mergeNoisyStripDB.py
+#!/usr/bin/env python3
+#
+# Sept 2022, E. Torrence
+#
+# Script to merge individual run DBs into a master DB
+#
+# Usage:
+#  ./mergeNoisyDBRuns.py -h
+#
+import sys
+import argparse
+import subprocess
+
+from pathlib import Path
+
+def parse_arguments():
+
+    description="Script to merge DBs from individual runs into one DB\n"
+    parser = argparse.ArgumentParser(description,
+                                     formatter_class=argparse.RawTextHelpFormatter)
+
+
+    parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
+
+    parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
+    parser.add_argument("-a", "--append", action="store_true", help="Append (rather than overwrite) existing file")
+    parser.add_argument("-o", "--output", default="noisy_strips.db", help="Specify output DB name")
+    return parser.parse_args()
+
+# Take a string and turn it into a list of integers
+# Can specify single values, ranges, or comma separated lists of both
+def parseRunList(runlist):
+
+    run_list = []
+
+    # Check if this is a file with run numbers
+    if len(runlist) == 1:
+        path = Path(runlist[0])
+        if path.exists() and path.is_file():
+            print(f"Reading runs from {path}")
+            # Try reading each line as a run number
+            with path.open() as f: 
+                for line in f.readlines():
+                    line = line.strip()
+                    if len(line) == 0: continue
+                    if line[0] in ['#', '!']: continue
+                    if not line.isnumeric():
+                        print(f"Error parsing {line}")
+                        continue
+                    run_list.append(int(line))
+            # Done reading file
+            return(run_list)
+        elif '-' in runlist[0]:
+            pass
+        elif ',' in runlist[0]:
+            pass
+        elif not runlist[0].isnumeric():
+            print(f"File {path} doesn't exist!")
+            return run_list
+
+    for string in runlist:
+        tokens = string.split(',')
+
+        for segment in tokens:
+
+            if len(segment) == 0: continue
+
+            if '-' in segment:  # Range of runs
+                start, end = segment.split('-')
+                if not start.isnumeric():
+                    print(f"Found invalid run {start}")
+                    continue
+                if not end.isnumeric():
+                    print(f"Found invalid run {end}")
+                    continue
+                start = int(start)
+                end = int(end)
+                run_list.extend(list(range(int(start), int(end)+1)))
+
+            else:
+                if not segment.isnumeric():
+                    print(f"Found invalid run {segment}")
+                    continue
+                run_list.append(int(segment))
+
+    return(run_list)
+
+
+# Command-line execution
+if __name__ == "__main__":
+
+    # Parse the command-line arguments
+    args = parse_arguments()
+
+    run_list = parseRunList(args.runs)
+    run_list.sort()
+
+    first = True
+
+    for runnum in run_list:
+
+        if args.verbose: print(f"\nRun {runnum}")
+        runstr = f'{runnum:06d}'
+        infile = Path(f'{runstr}/noisy_{runstr}.db')
+        if not infile.is_file():
+            print(f"{runstr}/noisy_{runstr}.db doesn't exist!")
+            continue
+
+        command = ['AtlCoolCopy']
+        command.append(f'sqlite://;schema={runstr}/noisy_{runstr}.db;dbname=CONDBR3')
+        command.append(f'sqlite://;schema={args.output};dbname=CONDBR3')
+        if first:
+            first = False
+            target = Path(args.output)
+            if not target.is_file():
+                print(f"Creating file {args.output}")
+                command.append("-create")
+            elif args.append:
+                print(f"Appending to existing file {args.output}")
+            else:
+                print(f"Deleting existing file {args.output}")
+                target.unlink()
+                command.append("-create")
+
+        command.extend(["-alliov", "-nrls", f"{runnum}", "0"])
+
+        if args.verbose: print(command)
+        rc = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, 
+                                universal_newlines=True)
+
+        if args.verbose: print(rc.stdout)
+
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/runFaserScript.sh
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/runFaserScript.sh
+#!/bin/bash
+#
+# Wrapper to run any python script in the athena environment
+# Typically used to set up a script for use in condor
+#
+# runFaserScript.sh --rel <release_directory> script.py arguments...
+#
+function print_usage {
+  echo "Usage: runFaserScript.sh --rel <release_directory> [--log <logfile>] script.py [arguments]"
+  echo "  The first uption must be the release directory where asetup is called"
+  echo "  All other options are passed to script.py"
+  echo "  Options: "
+  echo "    -h - print usage"
+  echo "    --rel <release_directory> - specify release directory"
+  echo "    --log <logfile> - redirect script output to logfile"
+  echo "    -- End of options considered by this script"
+}
+#
+release_directory=""
+logfile=""
+while [ -n "$1" ]
+do
+  case "$1" in
+      -h | --help)
+	  print_usage
+	  exit 0;;
+
+      --rel)
+	  release_directory="$2";
+	  shift;
+	  shift;;
+
+      --log)
+	  logfile="$2";
+	  shift;
+	  shift;;
+
+      --) # Signal that everything else should be executed
+	  shift;
+	  break;; 
+
+      *) 
+	  # Nothing we recognize, execute everything remaining
+	  break;;
+  esac
+done
+
+if [ -z "$release_directory" ]; then
+    echo "Must specify release"
+    print_usage
+    exit 1
+fi
+
+# Redirect to log file if requested
+if [ ! -z "$logfile" ]; then
+#logfile="post_`date +%m%d-%H%M%S`.log"
+    exec >& "$logfile"
+fi
+#
+# Set up release
+starting_directory=`pwd`
+echo "cd $release_directory"
+cd $release_directory
+#
+# Set up the release 
+export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase
+# Must pass something or source will pass *this* script's arguments instead
+source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh --
+echo "fsetup Athena,22.0.49"
+asetup --input=calypso/asetup.faser Athena,22.0.49
+echo "source run/setup.sh"
+source run/setup.sh
+#
+# Go back to where we started
+echo "cd $starting_directory"
+cd $starting_directory
+#
+# Now run the command
+echo "$@"
+eval "$@"
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/share/submitNoisyStripJobs.py
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/share/submitNoisyStripJobs.py
+#!/usr/bin/env python3
+#
+# Sept 2022, E. Torrence
+#
+# Script to run noisy strip finder on a given run
+# This creates condor submitssion scripts and a DAG
+# and submits those to run the jobs
+#
+# Usage:
+#  submitNoisyStripJobs.py -h
+#
+import os
+import sys
+import argparse
+import subprocess
+
+from pathlib import Path
+
+def parse_arguments():
+
+    description="Script to submit jobs to find noisy strips"
+    parser = argparse.ArgumentParser(description,
+                                     formatter_class=argparse.RawTextHelpFormatter)
+
+
+    parser.add_argument("runs", nargs='+', help="Specify FASER runs")
+
+    parser.add_argument("--per_job", type=int, default=25, help="Specify maximum files per job")
+    parser.add_argument("--release", default='.', help="Specify path to release directory")
+    parser.add_argument("--nosubmit", action="store_true", help="Don't submit jobs")
+    parser.add_argument("--nocleanup", action="store_true", help="Don't cleanup output directory on completion")
+    parser.add_argument("--queue", default="longlunch", help="Specify queue (longlunch=2h (default), workday=8h)")
+    parser.add_argument("--rawdir", default="/eos/experiment/faser/raw/2022", 
+                        help="Specify raw data directory (default: /eos/experiment/faser/raw/2022)")
+
+    return parser.parse_args()
+
+# Take a string and turn it into a list of integers
+# Can specify single values, ranges, or comma separated lists of both
+def parseRunList(runlist):
+
+    run_list = []
+
+    # Check if this is a file with run numbers
+    if len(runlist) == 1:
+        path = Path(runlist[0])
+        if path.exists() and path.is_file():
+            print(f"Reading runs from {path}")
+            # Try reading each line as a run number
+            with path.open() as f:
+                for line in f.readlines(): 
+                    line = line.strip()
+                    if len(line) == 0: continue
+                    if line[0] in ['#', '!']: continue
+                    if not line.isnumeric():
+                        print(f"Error parsing {line}")
+                        continue
+                    run_list.append(int(line))
+            # Done reading file
+            return(run_list)
+
+    for string in runlist:
+        tokens = string.split(',')
+
+        for segment in tokens:
+
+            if len(segment) == 0: continue
+
+            if '-' in segment:  # Range of runs
+                start, end = segment.split('-')
+                start = int(start)
+                end = int(end)
+                run_list.extend(list(range(int(start), int(end)+1)))
+
+            else:
+                run_list.append(int(segment))
+
+    return(run_list)
+
+# Command-line execution
+if __name__ == "__main__":
+
+    # Parse the command-line arguments
+    args = parse_arguments()
+
+    run_list = parseRunList(args.runs)
+    run_list.sort()
+
+    # Check some things
+    rel_dir = Path(args.release)
+    package_dir = rel_dir / Path("calypso/Tracker/TrackerRecAlgs/NoisyStripFinder")
+
+    # Script to allow python scripts to be run in condor in the FASER environment
+    env_exec = package_dir / Path("share/runFaserScript.sh")
+
+    if not env_exec.exists():
+        print(f"Can't find executable in release directory {args.release}")
+        sys.exit(1)
+
+    print(f"Start processing {len(run_list)} runs")
+
+    for run in run_list:
+        print(f"Working on run {run}")
+
+        runstr = f"{run:06d}"
+
+        # Get file list
+        raw_dir = Path(f'{args.rawdir}/{runstr}')
+        file_list = list(raw_dir.glob("Faser-Physics*.raw"))
+
+        # Now we need to decide what to do
+        nraw = len(file_list)
+        njobs = (nraw-1) // args.per_job + 1
+        if njobs == 1:
+            print(f"{nraw} raw files found, submitting {njobs} job")
+        else:
+            print(f"{nraw} raw files found, submitting {njobs} jobs")
+
+        if njobs == 0: continue
+
+        # Create a directory for this
+        jobdir = Path(runstr)
+        if jobdir.exists():
+            print(f"Directory {jobdir} exists, deleting...")
+            import shutil
+            shutil.rmtree(jobdir.resolve())
+
+        jobdir.mkdir(exist_ok=True)
+        submit_list = []
+
+        # Start the DAG file
+        dagfile = jobdir / Path(f"noise_{runstr}.dag")
+        with open(dagfile, 'w') as d:
+            d.write(f"# Auto-generated DAG submission script for {runstr}\n")
+
+        for job in range(njobs):
+
+            jobstr = f"{job:03d}"
+            if njobs == 1:
+                jobname = f"noise_{runstr}"
+            else:
+                jobname = f"noise_{runstr}_{jobstr}"
+            subfile = jobdir / Path(jobname+".sub")
+
+            ilo = job * args.per_job 
+            ihi = ilo + args.per_job 
+            job_files = file_list[ilo:ihi]
+
+            #
+            # Generate a job submission script
+            print(f"Writing {subfile}")
+            submit_list.append(jobname)
+            with open(subfile, "w") as f:
+                f.write(f"# Auto-generated submission script for {jobname}\n")
+                # Set the queue workday = 8h, longlunch = 2h might be enough
+                f.write(f'+JobFlavour = "{args.queue}"\n') 
+                f.write(f"executable = {env_exec.resolve()}\n")
+                f.write(f"output = {jobdir.resolve()}/{jobname}.out\n")
+                f.write(f"error = {jobdir.resolve()}/{jobname}.err\n")
+                f.write(f"log = {jobdir.resolve()}/{jobname}.log\n")
+                # No newline as we need to add input files
+                f.write(f"arguments = --rel {rel_dir.resolve()} NoisyStripFinderJob.py --out {jobname}.root ")
+                [f.write(f" {filename}") for filename in job_files]
+                f.write("\n")
+                f.write("queue")
+
+            # Also add this to our DAG
+            with open(dagfile, 'a') as d:
+                d.write(f"JOB {jobname} {subfile.name}\n")
+                # Also check that the histogram isn't empty
+                # This can fix some file read errors
+                d.write(f"SCRIPT POST {jobname} {env_exec.resolve()} --rel {rel_dir.resolve()} checkNoisyStripHist.py {jobname}.root $RETURN\n")
+
+        # Done writing individual jobs
+
+        # Add the merge job to the DAG
+        with open(dagfile, 'a') as d:
+            d.write(f"JOB merge_{runstr} merge_{runstr}.sub\n")
+            d.write("PARENT")
+            for jobname in submit_list:
+                d.write(f" {jobname}")
+            d.write(f" CHILD merge_{runstr}\n")
+            # Add a retry directive
+            d.write(f"RETRY ALL_NODES 1\n")
+
+        # Write the merge job submit script
+        jobname = f"merge_{runstr}"
+        subfile = jobdir / Path(jobname+".sub")
+        with open(subfile, "w") as f:
+            f.write(f"# Auto-generated submission script for {jobname}\n")
+            f.write(f"output = {jobdir.resolve()}/{jobname}.out\n")
+            f.write(f"error = {jobdir.resolve()}/{jobname}.err\n")
+            f.write(f"log = {jobdir.resolve()}/{jobname}.log\n")
+
+            #f.write('+JobFlavour = "workday"\n') # 8 hours, longlunch might be enough
+            #f.write(f"executable = {hist_exec.resolve()}\n")
+            f.write(f"executable = {env_exec.resolve()}\n")
+
+            # No newline as we need to add input files
+            # f.write(f"arguments = --rel {rel_dir.resolve()} --force -o noisy_{runstr}.db")
+            f.write(f"arguments = --rel {rel_dir.resolve()} makeNoisyStripDB.py --force -o noisy_{runstr}.db")
+            [f.write(f" {filename}.root") for filename in submit_list]
+            f.write("\n")
+
+            # Provide files to transfer
+            f.write(f"transfer_input_files = {submit_list[0]}.root")
+            [f.write(f",{filename}.root") for filename in submit_list[1:]]
+            f.write("\n")
+            f.write(f"should_transfer_files = IF_NEEDED\n")
+
+            # Don't forget queue command
+            f.write("queue")
+
+        # Do we want a cleanup script?
+        if not args.nocleanup:
+            with open(dagfile, 'a') as d:
+                d.write(f"SCRIPT POST merge_{runstr} cleanup.sh $RETURN\n")
+
+            cleanup_file = jobdir / Path("cleanup.sh")
+            with open(cleanup_file, 'w') as f:
+                f.write("#!/bin/bash\n")
+                f.write('if [[ $1 != "0" ]]; then\n')
+                f.write(' exit $1\n')
+                f.write('fi\n')
+                # f.write('rm noise_{runstr}.dag.* \n')
+                f.write('rm *.log\n')
+                f.write('rm *.err\n')
+                f.write('rm eventLoopHeartBeat.txt\n')
+                f.write('rm *.cc\n')
+                for job in submit_list:
+                    f.write(f'gzip {job}.out\n')
+                #f.write('gzip merge*.out\n')
+                f.write('exit 0\n')
+
+            # And make it executable
+            import stat
+            cleanup_file.chmod(cleanup_file.stat().st_mode | stat.S_IEXEC)
+
+        if not args.nosubmit:
+            print(f"Submitting noise_{runstr}.dag")
+            startdir = os.getcwd()
+            os.chdir(jobdir)
+
+            # lxplus python3 is 3.6.8, so use old subprocess.run arguments
+            proc = subprocess.run(["/usr/bin/condor_submit_dag", f"noise_{runstr}.dag"],
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE,
+                                  universal_newlines=True)
+
+            if len(proc.stdout) > 0:
+                print(proc.stdout)
+            if len(proc.stderr) > 0:
+                print(proc.stderr)
+
+            # Chaange back to our starting point
+            os.chdir(startdir)
--- a/Tracker/TrackerRecAlgs/NoisyStripFinder/src/NoisyStripFinder.cxx
+++ b/Tracker/TrackerRecAlgs/NoisyStripFinder/src/NoisyStripFinder.cxx
@@ -56,7 +56,7 @@ StatusCode NoisyStripFinder::execute(const EventContext& ctx) const {

  if (!(xaod->tap() & m_triggerMask.value())) return StatusCode::SUCCESS; // only process events that pass the trigger mask

-  ATH_MSG_INFO("trigger passed mask");
+  ATH_MSG_DEBUG("trigger passed mask");
  ++m_numberOfEvents;

  // Keep track of run
@@ -116,12 +116,12 @@ StatusCode NoisyStripFinder::execute(const EventContext& ctx) const {
 StatusCode NoisyStripFinder::finalize() 
 {
  ATH_MSG_INFO("NoisyStripFinder::finalize()");
-  ATH_MSG_INFO( m_numberOfEvents << " events processed" );
+  ATH_MSG_INFO( m_numberOfEvents << " events found" );
  ATH_MSG_INFO( m_numberOfRDOCollection << " RDO collections processed" );
  ATH_MSG_INFO( m_numberOfRDO<< " RawData" );
-  ATH_MSG_INFO( "Number of sensors found = " << NoisyStrip_histmap.size() << " out of 144" );
+  ATH_MSG_INFO( "Number of sensors found = " << NoisyStrip_histmap.size() << " out of 192" );

-  for (int ihash = 0; ihash < 144; ++ihash){ // print out the sensors that are missing 
+  for (int ihash = 0; ihash < 192; ++ihash){ // print out the sensors that are missing 
    if ( NoisyStrip_histmap.count(ihash) == 0 ){
      ATH_MSG_INFO("missing sensor # " << ihash);
    }
@@ -166,6 +166,7 @@ StatusCode NoisyStripFinder::finalize()
    ATH_MSG_INFO( "---------- hot strip occupancy >= 0.1 for Tracker Sensor hash = "<< it->first <<" ----------" );
    int i = 1;
    while (i <= 768){
+      // This is only for information
      if ( it->second->GetBinContent(i)/(double)m_numberOfEvents >= 0.01 ){
        ATH_MSG_INFO( "hot strip # = " << i-1 << ", hit occupancy = " << it->second->GetBinContent(i)/(double)m_numberOfEvents ); // print out hot strips
      }