Skip to content
Snippets Groups Projects
Commit df3d4685 authored by Eric Torrence's avatar Eric Torrence
Browse files

Merge branch 'faserrec-noise' into 'master'

NoisyStripFinder update

See merge request !272
parents 9ab6714d 524ca88e
Branches
No related tags found
No related merge requests found
Showing
with 1167 additions and 12 deletions
......@@ -17,5 +17,5 @@ atlas_add_component( NoisyStripFinder
atlas_install_python_modules( python/*.py )
atlas_install_scripts( share/*.py test/*.py )
atlas_install_scripts( share/*.py share/*.sh )
Noisy strips are now part of the conditions database and are used in default reco
To update the database, the following steps should be used.
1) Determine the runs to update.
A handy utility to find all runs of a given type is findFaserRunsByType.py:
`findFaserRunsByType.py -t Physics -o physics_runs.txt 7730-8370`
2) Submit jobs to create the noisy strip DB for each run
From a release directory, best to create a subdirectory for this and then:
`submitNoisyStripJobs.py --release .. physics_runs.txt`
Can also just specify run numbers (or a range) instead of a text file of runs
If you want to do this interactively, this script submits jobs to run the following:
`NoisyStripFinderJob.py`
`makeNoisyStripDB.py`
3) Check that all of the jobs finished successfully (or can check individual runs)
`checkNoisyStripJobs.py physics_runs.txt`
This can also write out a file of runs to submit again
4) Check that the actual noisy strips found make sense
This reads the individual DB files in each subdirectory, can also specify specific runs
`checkNoisyStripDB.py physics_runs.txt`
5) Merge the individual runs into a single DB
Note this script writes the individual runs with open-ended IOVs.
This means the last run merged will be used for all later data until the DB is updated again.
So if you are merging multiple times (with the --append option), it is important to do ths in chronological order.
`mergeNoisyStripDB.py physics_runs.txt`
Note, this can be slow. Use --verbose to watch the progress.
6) Test the database
The resulting database by default has the name noisy_strips.db.
This can be copied to the data/sqlite200 subdirectory of the working directory and reco jobs will use this.
Check here for details:
https://gitlab.cern.ch/faser/calypso/-/blob/master/Database/ConnectionManagement/FaserAuthentication/data/dblookup.xml
7) Merge with production DB
This updated noisy strips folder /SCT/DAQ/NoisyStrips now needs to be merged into the production DB.
First copy the current DB from CVMFS to some local directory.
`cp /cvmfs/faser.cern.ch/repo/sw/database/DBRelease/current/sqlite200/ALLP200.db .`
Next, use AtlCoolCopy to merge the updates into this file:
`AtlCoolCopy "sqlite://;schema=noisy_strips.db;dbname=CONDBR3" "sqlite://;schema=ALLP200.db;dbname=CONDBR3" `
This can also be slow.
Finally, the ALLP200.db file should be installed on cvmfs once everything is verified to be correct.
Older instructions from Tobias when he was developing this package are here:
Mask noisy strips:
1) Run the NoisyStripFinderDbg.py on raw data files
......
......@@ -63,10 +63,18 @@ replicaSvc.UseCOOLSQLite = True
replicaSvc.UseCOOLFrontier = False
replicaSvc.UseGeomSQLite = True
# Don't print out every event
from AthenaConfiguration.ComponentFactory import CompFactory
eventLoop = CompFactory.AthenaEventLoopMgr()
eventLoop.EventPrintoutInterval = 1000
acc.addService(eventLoop)
if args.verbose:
acc.foreach_component("*").OutputLevel = VERBOSE
acc.printConfig()
else:
acc.foreach_component("*").OutputLevel = INFO
sc = acc.run(maxEvents=args.nevents)
print(f"Job finished with {sc.isSuccess()} => {not sc.isSuccess()}")
sys.exit(not sc.isSuccess())
#!/usr/bin/env python3
#
# Sept 2022, E. Torrence
#
# Script to merge individual run DBs into a master DB
#
# Usage:
# ./mergeNoisyDBRuns.py -h
#
import sys
import argparse
import subprocess
from pathlib import Path
from PyCool import cool
from CoolConvUtilities.AtlCoolLib import indirectOpen
def parse_arguments():
description="Script to merge DBs from individual runs into one DB\n"
parser = argparse.ArgumentParser(description,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
parser.add_argument("--threshold", default="0.01", help="Threshold to calls trip noisy (0.01)")
parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
return parser.parse_args()
# Take a string and turn it into a list of integers
# Can specify single values, ranges, or comma separated lists of both
def parseRunList(runlist):
run_list = []
# Check if this is a file with run numbers
if len(runlist) == 1:
path = Path(runlist[0])
if path.exists() and path.is_file():
print(f"Reading runs from {path}")
# Try reading each line as a run number
with path.open() as f:
for line in f.readlines():
line = line.strip()
if len(line) == 0: continue
if line[0] in ['#', '!']: continue
if not line.isnumeric():
print(f"Error parsing {line}")
continue
run_list.append(int(line))
# Done reading file
return(run_list)
elif '-' in runlist[0]:
pass
elif ',' in runlist[0]:
pass
elif not runlist[0].isnumeric():
print(f"File {path} doesn't exist!")
return run_list
for string in runlist:
tokens = string.split(',')
for segment in tokens:
if len(segment) == 0: continue
if '-' in segment: # Range of runs
start, end = segment.split('-')
if not start.isnumeric():
print(f"Found invalid run {start}")
continue
if not end.isnumeric():
print(f"Found invalid run {end}")
continue
start = int(start)
end = int(end)
run_list.extend(list(range(int(start), int(end)+1)))
else:
if not segment.isnumeric():
print(f"Found invalid run {segment}")
continue
run_list.append(int(segment))
return(run_list)
class NoisyRunAnalyzer:
def __init__(self, verbose=False, threshold=0.01):
self.verbose = verbose
self.run_dict = {}
self.noise_threshold = threshold
def addRun(self, runnum):
if self.verbose: print(f"\nRun {runnum}")
runstr = f'{runnum:06d}'
infile = Path(f'{runstr}/noisy_{runstr}.db')
if not infile.is_file():
print(f"{runstr}/noisy_{runstr}.db doesn't exist!")
return
db_string = f'sqlite://;schema={runstr}/noisy_{runstr}.db;dbname=CONDBR3'
try:
self.db = indirectOpen(db_string, readOnly=True, oracle=False, debug=False)
except Exception as e:
print(e)
return
# Now read all channels
folder_string = "/SCT/DAQ/NoisyStrips"
try:
self.folder = self.db.getFolder(folder_string)
except Exception as e:
print(e)
return
if self.folder is None:
print(f"Can't access folder {folder_string} in {db_string}")
return
channels = cool.ChannelSelection.all()
iov_lo = (runnum<<32)
iov_hi = ((runnum+1)<<32) - 1
tag = ''
try:
itr = self.folder.browseObjects(iov_lo, iov_hi, channels, tag)
except Exception as e:
print(e)
return
rd = self.run_dict.get(runnum, None)
if rd is None:
self.run_dict[runnum] = {}
# Now iterate through objects (should only be one IOV, but multiple channels)
while itr.goToNext():
obj = itr.currentRef()
if self.verbose: print(obj.payload())
sensor = obj.payload()['sensor']
strip = obj.payload()['strip']
occupancy = obj.payload()['occupancy']
if occupancy < self.noise_threshold: continue
sensor_dict = self.run_dict[runnum].get(sensor, None)
if sensor_dict is None:
self.run_dict[runnum][sensor] = {}
strip_dict = self.run_dict[runnum][sensor].get(strip, None)
if strip_dict is None:
self.run_dict[runnum][sensor][strip] = {}
self.run_dict[runnum][sensor][strip] = occupancy
if self.verbose: print(self.run_dict)
# Done, close the database
self.db.closeDatabase()
def printRunSummary(self):
for run in self.run_dict:
#print(f"Run {run}: {len(self.run_dict[run])} sensors with noisy strips")
noisy_by_layer = [0] * 12
noisy_strips_by_layer = [0] * 12
for sensor in self.run_dict[run]:
layer = sensor // 16
noisy_by_layer[layer] += 1
noisy_strips_by_layer[layer] += len(self.run_dict[run][sensor])
#print(f"Sensors by layer: ", end='')
#[ print(f' {n:3d}', end='') for n in noisy_by_layer]
#print()
print(f"Run {run} strips > {100*self.noise_threshold:3.1f}% by layer: ", end='')
[ print(f' {n:3d}', end='') for n in noisy_strips_by_layer]
print()
# Command-line execution
if __name__ == "__main__":
# Parse the command-line arguments
args = parse_arguments()
run_list = parseRunList(args.runs)
run_list.sort()
nra = NoisyRunAnalyzer(verbose=args.verbose, threshold=float(args.threshold))
for runnum in run_list:
nra.addRun(runnum)
nra.printRunSummary()
#!/usr/bin/env python3
#
# Simple utility to run as a post script
# after the noisyStripFinder
#
# Usage: checkNoisyStripHist.py <histfile> [return code]
#
import sys
if len(sys.argv) <= 1:
sys.exit(1)
filename = sys.argv[1]
#
# Check previous return code if it is provided
if len(sys.argv) >= 3:
rc = int(sys.argv[2])
if rc: sys.exit(rc)
#
# Check histogram entries
# This causes a segfault. Lets try without ROOT
#from ROOT import TFile
if False:
import ROOT
try:
f = ROOT.TFile.Open(filename, 'r')
except Exception as e:
print(e)
sys.exit(1)
else:
n = f.Get("numEvents").GetVal()
print(f"Found {filename} with {n} entries")
sys.exit(n==0)
from pathlib import Path
f = Path(filename)
if not f.is_file(): sys.exit(1)
size = f.stat().st_size
if size < 1000: # Almost certainly empty
print(f"Found {f} with size {size}!")
sys.exit(1)
sys.exit(0)
#!/usr/bin/env python3
#
# Sept 2022, E. Torrence
#
# Script to check NoisyStrips jobs for problems
#
# Usage:
# ./checkNoisyJobs.py -h
#
import sys
import argparse
from pathlib import Path
import ROOT
def parse_arguments():
description="Script to check noisy strip finding jobs\n"
parser = argparse.ArgumentParser(description,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
parser.add_argument("--write_missing", action="store_true", help="Write out missing runs to file")
return parser.parse_args()
# Take a string and turn it into a list of integers
# Can specify single values, ranges, or comma separated lists of both
def parseRunList(runlist):
run_list = []
# Check if this is a file with run numbers
if len(runlist) == 1:
path = Path(runlist[0])
if path.exists() and path.is_file():
print(f"Reading runs from {path}")
# Try reading each line as a run number
with path.open() as f:
for line in f.readlines():
line = line.strip()
if len(line) == 0: continue
if line[0] in ['#', '!']: continue
if not line.isnumeric():
print(f"Error parsing {line}")
continue
run_list.append(int(line))
# Done reading file
return(run_list)
elif '-' in runlist[0]:
pass
elif ',' in runlist[0]:
pass
elif not runlist[0].isnumeric():
print(f"File {path} doesn't exist!")
return run_list
for string in runlist:
tokens = string.split(',')
for segment in tokens:
if len(segment) == 0: continue
if '-' in segment: # Range of runs
start, end = segment.split('-')
if not start.isnumeric():
print(f"Found invalid run {start}")
continue
if not end.isnumeric():
print(f"Found invalid run {end}")
continue
start = int(start)
end = int(end)
run_list.extend(list(range(int(start), int(end)+1)))
else:
if not segment.isnumeric():
print(f"Found invalid run {segment}")
continue
run_list.append(int(segment))
return(run_list)
class JobChecker:
def __init__(self, run=None):
self.select_run(run)
self.verbose = True
self.check_all = False
self.check_function_list = []
self.check_function_list.append(self.check_directory)
self.check_function_list.append(self.check_histograms)
self.check_function_list.append(self.check_dbfile)
def select_run(self, run):
self.run_number = run
if run is None: return
self.run_string = f'{run:06d}'
def check_run(self, run=None):
# Return true on error
if run is not None:
self.select_run(run)
if run is None:
print("No run specified!")
return True
for func in self.check_function_list:
if func(): return True
return False
def check_directory(self):
directory_path = Path(self.run_string)
if not directory_path.exists():
print(f"* Directory {self.run_string} not found!")
elif self.verbose:
print(f" => Directory {self.run_string} found")
return( not directory_path.exists() )
def check_dbfile(self):
dbfile_path = Path(f'{self.run_string}/noisy_{self.run_string}.db')
if not dbfile_path.exists():
print(f"* Database file {dbfile_path} not found!")
elif self.verbose:
print(f" => Database file {dbfile_path} found")
return( not dbfile_path.exists() )
def check_histograms(self):
# First, find the submit files
directory_path = Path(self.run_string)
submit_list = directory_path.glob('noise*.sub')
missing = False
for filepath in submit_list:
# Ignore DAG
if '.dag.' in str(filepath): continue
filestem = filepath.stem
hist_file = directory_path / Path(f'{filestem}.root')
if hist_file.exists():
if self.verbose: print(f" => Found histogram file {hist_file}")
# Check number of events?
try:
f = ROOT.TFile.Open(str(hist_file), 'r')
except Exception as e:
print(e)
missing = True
else:
n = f.Get("numEvents").GetVal()
if self.verbose:
print(f"{hist_file} found with {n} entries")
if n == 0:
print(f"{hist_file} found with {n} entries")
missing = True
continue
# Histogram doesn't exist
missing = True
print(f"* Histogram file {hist_file} missing! ", end="")
# See if we can figure out why
logfile_path = directory_path / Path(f'{filestem}.log')
if not logfile_path.exists():
print("=> log file not found")
continue
import subprocess
if subprocess.call(['/bin/grep', "Killed", f"{logfile_path}"],
stdout=subprocess.DEVNULL):
# True means no match
pass
else:
# False means match
# See if we can find the time
rc = subprocess.run(['/bin/grep', 'Job finished after', f"{logfile_path}"],
stdout=subprocess.PIPE,
universal_newlines=True)
if rc.returncode:
# Can't find running time
print("=> job appears to have been killed")
else:
timestr = rc.stdout.replace('Job finished after ', '')
print(f"=> job appears to have been killed after {timestr}")
continue
# Can't figure out why
print('=> unknown problem')
return missing
# Command-line execution
if __name__ == "__main__":
# Parse the command-line arguments
args = parse_arguments()
run_list = parseRunList(args.runs)
run_list.sort()
good_runs = []
missing_runs = []
jc = JobChecker()
jc.verbose = args.verbose
for runnum in run_list:
if args.verbose: print(f"\nRun {runnum}")
if jc.check_run(runnum):
missing_runs.append(runnum)
else:
good_runs.append(runnum)
print(f"Found {len(good_runs)} good runs and {len(missing_runs)} missing runs")
if args.write_missing:
missing_file="missing_runs.txt"
with open(missing_file, "w") as f:
[f.write(f"{run}\n") for run in missing_runs]
print(f"Wrote {len(missing_runs)} missing runs to {missing_file}")
#!/usr/bin/env python3
#
# Sept 2022, E. Torrence
#
# Script to find FASER runs taken in Physics
#
# Usage:
# ./findRuns.py -h
#
import json
import argparse
import requests
from pathlib import Path
def parse_arguments():
description="Script to find PHYSICS runs in a range\n"
parser = argparse.ArgumentParser(description,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
parser.add_argument("-o", "--output", default="findRuns.txt", help="Specify output file")
parser.add_argument("-t", "--type", default="Physics", help="Run type to match")
return parser.parse_args()
# Take a string and turn it into a list of integers
# Can specify single values, ranges, or comma separated lists of both
def parseRunList(runlist):
run_list = []
# Check if this is a file with run numbers
if len(runlist) == 1:
path = Path(runlist[0])
if path.exists() and path.is_file():
print(f"Reading runs from {path}")
# Try reading each line as a run number
with path.open() as f:
for line in f.readlines():
line = line.strip()
if len(line) == 0: continue
if line[0] in ['#', '!']: continue
if not line.isnumeric():
print(f"Error parsing {line}")
continue
run_list.append(int(line))
# Done reading file
return(run_list)
for string in runlist:
tokens = string.split(',')
for segment in tokens:
if len(segment) == 0: continue
if '-' in segment: # Range of runs
start, end = segment.split('-')
start = int(start)
end = int(end)
run_list.extend(list(range(int(start), int(end)+1)))
else:
run_list.append(int(segment))
return(run_list)
# Command-line execution
if __name__ == "__main__":
# Parse the command-line arguments
args = parse_arguments()
run_list = parseRunList(args.runs)
run_list.sort()
with open(args.output, "w") as f:
f.write(f"# findRuns.py")
[f.write(f" {run}") for run in args.runs]
f.write("\n")
# faser-runinfo address
url = "https://faser-runinfo.app.cern.ch/cgibin/"
# Cycle through range
for run in run_list:
query = f"{url}/getRunInfo.py?runno={run}"
response = requests.get(query)
if not response.json():
if args.verbose:
print(f"Couldn't find run {run}")
continue
run_type = response.json()['type']
if args.verbose:
print(f"Run {run} has type {run_type}")
if run_type != args.type: continue
with open(args.output, "a") as f:
f.write(f"{run}\n")
# End of loop over runs
#!/usr/bin/env python
#!/usr/bin/env python3
import os
import sys
......@@ -9,8 +9,9 @@ from CoolConvUtilities.AtlCoolLib import indirectOpen
parser = argparse.ArgumentParser()
parser.add_argument("file", nargs="+", help="full path to input file")
parser.add_argument("-t", "--threshold", type=float, default=0.01, help="add strips with an occupancy larger this threshold to the database")
parser.add_argument("-t", "--threshold", type=float, default=0.001, help="add strips with an occupancy larger this threshold to the database")
parser.add_argument("--force", "-f", action="store_true", help="Overwrite existing DB")
parser.add_argument("--permissive", action="store_true", help="Allow some input files to be missing")
parser.add_argument("--output", "-o", default="noisy_strips.db", help="Specify output DB")
parser.add_argument("--isMC", action="store_true", help="Write MC DB (default: real data")
args = parser.parse_args()
......@@ -26,6 +27,7 @@ HistDict = {}
ROOT.TH1.AddDirectory(0) # This is necessary in order to have the histogram data after closing the file
trigger = None
iovlo = cool.ValidityKeyMax
iovhi = cool.ValidityKeyMin
......@@ -33,8 +35,23 @@ iovhi = cool.ValidityKeyMin
skipList = ["numEvents", "trigger", "IOVLoRun", "IOVLoLB", "IOVHiRun", "IOVHiLB"]
for inputfile in args.file:
f = ROOT.TFile.Open(inputfile, "r")
numEvents += f.Get("numEvents").GetVal()
# Check that this exists
if not os.path.exists(inputfile) :
if args.permissive: continue
print(f"File {inputfile} not found!")
sys.exit(1)
try:
f = ROOT.TFile.Open(inputfile, "r")
except Exception as e:
print(e)
if args.permissive: continue
sys.exit(1)
n = f.Get("numEvents").GetVal()
print(f"Found {n} events in {inputfile}")
if n == 0: continue
numEvents += n
lorun = f.Get("IOVLoRun").GetVal()
hirun = f.Get("IOVHiRun").GetVal()
lo = (lorun << 32)
......@@ -42,8 +59,13 @@ for inputfile in args.file:
if lo < iovlo: iovlo = lo
if hi > iovhi: iovhi = hi
if nfiles == 0:
if trigger is None:
trigger = f.Get("trigger").GetVal()
else:
t = f.Get("trigger").GetVal()
if t != trigger:
print(f"Trigger mismatch! {t} != {trigger} in {inputfile}")
sys.exit(1) # This shouldn't happen
for rootkey in f.GetKeyNames():
......@@ -58,9 +80,8 @@ for inputfile in args.file:
nfiles += 1
f.Close()
print("Total # of root files analyzed = ", nfiles)
print(f"Total {nfiles} analyzed with {numEvents} events")
print(f"Trigger mask = 0x{trigger:02x}")
print("Total number of events = ", numEvents)
print(f"IOV from {(iovlo >> 32)}/{(iovlo & 0xFFFFFFFF)} to {(iovhi >> 32)}/{(iovhi & 0xFFFFFFFF)}")
# Write DB
......
#!/usr/bin/env python3
#
# Sept 2022, E. Torrence
#
# Script to merge individual run DBs into a master DB
#
# Usage:
# ./mergeNoisyDBRuns.py -h
#
import sys
import argparse
import subprocess
from pathlib import Path
def parse_arguments():
description="Script to merge DBs from individual runs into one DB\n"
parser = argparse.ArgumentParser(description,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("runs", nargs='+', help="Specify FASER runs or range")
parser.add_argument("-v", "--verbose", action="store_true", help="Debugging output")
parser.add_argument("-a", "--append", action="store_true", help="Append (rather than overwrite) existing file")
parser.add_argument("-o", "--output", default="noisy_strips.db", help="Specify output DB name")
return parser.parse_args()
# Take a string and turn it into a list of integers
# Can specify single values, ranges, or comma separated lists of both
def parseRunList(runlist):
run_list = []
# Check if this is a file with run numbers
if len(runlist) == 1:
path = Path(runlist[0])
if path.exists() and path.is_file():
print(f"Reading runs from {path}")
# Try reading each line as a run number
with path.open() as f:
for line in f.readlines():
line = line.strip()
if len(line) == 0: continue
if line[0] in ['#', '!']: continue
if not line.isnumeric():
print(f"Error parsing {line}")
continue
run_list.append(int(line))
# Done reading file
return(run_list)
elif '-' in runlist[0]:
pass
elif ',' in runlist[0]:
pass
elif not runlist[0].isnumeric():
print(f"File {path} doesn't exist!")
return run_list
for string in runlist:
tokens = string.split(',')
for segment in tokens:
if len(segment) == 0: continue
if '-' in segment: # Range of runs
start, end = segment.split('-')
if not start.isnumeric():
print(f"Found invalid run {start}")
continue
if not end.isnumeric():
print(f"Found invalid run {end}")
continue
start = int(start)
end = int(end)
run_list.extend(list(range(int(start), int(end)+1)))
else:
if not segment.isnumeric():
print(f"Found invalid run {segment}")
continue
run_list.append(int(segment))
return(run_list)
# Command-line execution
if __name__ == "__main__":
# Parse the command-line arguments
args = parse_arguments()
run_list = parseRunList(args.runs)
run_list.sort()
first = True
for runnum in run_list:
if args.verbose: print(f"\nRun {runnum}")
runstr = f'{runnum:06d}'
infile = Path(f'{runstr}/noisy_{runstr}.db')
if not infile.is_file():
print(f"{runstr}/noisy_{runstr}.db doesn't exist!")
continue
command = ['AtlCoolCopy']
command.append(f'sqlite://;schema={runstr}/noisy_{runstr}.db;dbname=CONDBR3')
command.append(f'sqlite://;schema={args.output};dbname=CONDBR3')
if first:
first = False
target = Path(args.output)
if not target.is_file():
print(f"Creating file {args.output}")
command.append("-create")
elif args.append:
print(f"Appending to existing file {args.output}")
else:
print(f"Deleting existing file {args.output}")
target.unlink()
command.append("-create")
command.extend(["-alliov", "-nrls", f"{runnum}", "0"])
if args.verbose: print(command)
rc = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=True)
if args.verbose: print(rc.stdout)
#!/bin/bash
#
# Wrapper to run any python script in the athena environment
# Typically used to set up a script for use in condor
#
# runFaserScript.sh --rel <release_directory> script.py arguments...
#
function print_usage {
echo "Usage: runFaserScript.sh --rel <release_directory> [--log <logfile>] script.py [arguments]"
echo " The first uption must be the release directory where asetup is called"
echo " All other options are passed to script.py"
echo " Options: "
echo " -h - print usage"
echo " --rel <release_directory> - specify release directory"
echo " --log <logfile> - redirect script output to logfile"
echo " -- End of options considered by this script"
}
#
release_directory=""
logfile=""
while [ -n "$1" ]
do
case "$1" in
-h | --help)
print_usage
exit 0;;
--rel)
release_directory="$2";
shift;
shift;;
--log)
logfile="$2";
shift;
shift;;
--) # Signal that everything else should be executed
shift;
break;;
*)
# Nothing we recognize, execute everything remaining
break;;
esac
done
if [ -z "$release_directory" ]; then
echo "Must specify release"
print_usage
exit 1
fi
# Redirect to log file if requested
if [ ! -z "$logfile" ]; then
#logfile="post_`date +%m%d-%H%M%S`.log"
exec >& "$logfile"
fi
#
# Set up release
starting_directory=`pwd`
echo "cd $release_directory"
cd $release_directory
#
# Set up the release
export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase
# Must pass something or source will pass *this* script's arguments instead
source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh --
echo "fsetup Athena,22.0.49"
asetup --input=calypso/asetup.faser Athena,22.0.49
echo "source run/setup.sh"
source run/setup.sh
#
# Go back to where we started
echo "cd $starting_directory"
cd $starting_directory
#
# Now run the command
echo "$@"
eval "$@"
#!/usr/bin/env python3
#
# Sept 2022, E. Torrence
#
# Script to run noisy strip finder on a given run
# This creates condor submitssion scripts and a DAG
# and submits those to run the jobs
#
# Usage:
# submitNoisyStripJobs.py -h
#
import os
import sys
import argparse
import subprocess
from pathlib import Path
def parse_arguments():
description="Script to submit jobs to find noisy strips"
parser = argparse.ArgumentParser(description,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("runs", nargs='+', help="Specify FASER runs")
parser.add_argument("--per_job", type=int, default=25, help="Specify maximum files per job")
parser.add_argument("--release", default='.', help="Specify path to release directory")
parser.add_argument("--nosubmit", action="store_true", help="Don't submit jobs")
parser.add_argument("--nocleanup", action="store_true", help="Don't cleanup output directory on completion")
parser.add_argument("--queue", default="longlunch", help="Specify queue (longlunch=2h (default), workday=8h)")
parser.add_argument("--rawdir", default="/eos/experiment/faser/raw/2022",
help="Specify raw data directory (default: /eos/experiment/faser/raw/2022)")
return parser.parse_args()
# Take a string and turn it into a list of integers
# Can specify single values, ranges, or comma separated lists of both
def parseRunList(runlist):
run_list = []
# Check if this is a file with run numbers
if len(runlist) == 1:
path = Path(runlist[0])
if path.exists() and path.is_file():
print(f"Reading runs from {path}")
# Try reading each line as a run number
with path.open() as f:
for line in f.readlines():
line = line.strip()
if len(line) == 0: continue
if line[0] in ['#', '!']: continue
if not line.isnumeric():
print(f"Error parsing {line}")
continue
run_list.append(int(line))
# Done reading file
return(run_list)
for string in runlist:
tokens = string.split(',')
for segment in tokens:
if len(segment) == 0: continue
if '-' in segment: # Range of runs
start, end = segment.split('-')
start = int(start)
end = int(end)
run_list.extend(list(range(int(start), int(end)+1)))
else:
run_list.append(int(segment))
return(run_list)
# Command-line execution
if __name__ == "__main__":
# Parse the command-line arguments
args = parse_arguments()
run_list = parseRunList(args.runs)
run_list.sort()
# Check some things
rel_dir = Path(args.release)
package_dir = rel_dir / Path("calypso/Tracker/TrackerRecAlgs/NoisyStripFinder")
# Script to allow python scripts to be run in condor in the FASER environment
env_exec = package_dir / Path("share/runFaserScript.sh")
if not env_exec.exists():
print(f"Can't find executable in release directory {args.release}")
sys.exit(1)
print(f"Start processing {len(run_list)} runs")
for run in run_list:
print(f"Working on run {run}")
runstr = f"{run:06d}"
# Get file list
raw_dir = Path(f'{args.rawdir}/{runstr}')
file_list = list(raw_dir.glob("Faser-Physics*.raw"))
# Now we need to decide what to do
nraw = len(file_list)
njobs = (nraw-1) // args.per_job + 1
if njobs == 1:
print(f"{nraw} raw files found, submitting {njobs} job")
else:
print(f"{nraw} raw files found, submitting {njobs} jobs")
if njobs == 0: continue
# Create a directory for this
jobdir = Path(runstr)
if jobdir.exists():
print(f"Directory {jobdir} exists, deleting...")
import shutil
shutil.rmtree(jobdir.resolve())
jobdir.mkdir(exist_ok=True)
submit_list = []
# Start the DAG file
dagfile = jobdir / Path(f"noise_{runstr}.dag")
with open(dagfile, 'w') as d:
d.write(f"# Auto-generated DAG submission script for {runstr}\n")
for job in range(njobs):
jobstr = f"{job:03d}"
if njobs == 1:
jobname = f"noise_{runstr}"
else:
jobname = f"noise_{runstr}_{jobstr}"
subfile = jobdir / Path(jobname+".sub")
ilo = job * args.per_job
ihi = ilo + args.per_job
job_files = file_list[ilo:ihi]
#
# Generate a job submission script
print(f"Writing {subfile}")
submit_list.append(jobname)
with open(subfile, "w") as f:
f.write(f"# Auto-generated submission script for {jobname}\n")
# Set the queue workday = 8h, longlunch = 2h might be enough
f.write(f'+JobFlavour = "{args.queue}"\n')
f.write(f"executable = {env_exec.resolve()}\n")
f.write(f"output = {jobdir.resolve()}/{jobname}.out\n")
f.write(f"error = {jobdir.resolve()}/{jobname}.err\n")
f.write(f"log = {jobdir.resolve()}/{jobname}.log\n")
# No newline as we need to add input files
f.write(f"arguments = --rel {rel_dir.resolve()} NoisyStripFinderJob.py --out {jobname}.root ")
[f.write(f" {filename}") for filename in job_files]
f.write("\n")
f.write("queue")
# Also add this to our DAG
with open(dagfile, 'a') as d:
d.write(f"JOB {jobname} {subfile.name}\n")
# Also check that the histogram isn't empty
# This can fix some file read errors
d.write(f"SCRIPT POST {jobname} {env_exec.resolve()} --rel {rel_dir.resolve()} checkNoisyStripHist.py {jobname}.root $RETURN\n")
# Done writing individual jobs
# Add the merge job to the DAG
with open(dagfile, 'a') as d:
d.write(f"JOB merge_{runstr} merge_{runstr}.sub\n")
d.write("PARENT")
for jobname in submit_list:
d.write(f" {jobname}")
d.write(f" CHILD merge_{runstr}\n")
# Add a retry directive
d.write(f"RETRY ALL_NODES 1\n")
# Write the merge job submit script
jobname = f"merge_{runstr}"
subfile = jobdir / Path(jobname+".sub")
with open(subfile, "w") as f:
f.write(f"# Auto-generated submission script for {jobname}\n")
f.write(f"output = {jobdir.resolve()}/{jobname}.out\n")
f.write(f"error = {jobdir.resolve()}/{jobname}.err\n")
f.write(f"log = {jobdir.resolve()}/{jobname}.log\n")
#f.write('+JobFlavour = "workday"\n') # 8 hours, longlunch might be enough
#f.write(f"executable = {hist_exec.resolve()}\n")
f.write(f"executable = {env_exec.resolve()}\n")
# No newline as we need to add input files
# f.write(f"arguments = --rel {rel_dir.resolve()} --force -o noisy_{runstr}.db")
f.write(f"arguments = --rel {rel_dir.resolve()} makeNoisyStripDB.py --force -o noisy_{runstr}.db")
[f.write(f" {filename}.root") for filename in submit_list]
f.write("\n")
# Provide files to transfer
f.write(f"transfer_input_files = {submit_list[0]}.root")
[f.write(f",{filename}.root") for filename in submit_list[1:]]
f.write("\n")
f.write(f"should_transfer_files = IF_NEEDED\n")
# Don't forget queue command
f.write("queue")
# Do we want a cleanup script?
if not args.nocleanup:
with open(dagfile, 'a') as d:
d.write(f"SCRIPT POST merge_{runstr} cleanup.sh $RETURN\n")
cleanup_file = jobdir / Path("cleanup.sh")
with open(cleanup_file, 'w') as f:
f.write("#!/bin/bash\n")
f.write('if [[ $1 != "0" ]]; then\n')
f.write(' exit $1\n')
f.write('fi\n')
# f.write('rm noise_{runstr}.dag.* \n')
f.write('rm *.log\n')
f.write('rm *.err\n')
f.write('rm eventLoopHeartBeat.txt\n')
f.write('rm *.cc\n')
for job in submit_list:
f.write(f'gzip {job}.out\n')
#f.write('gzip merge*.out\n')
f.write('exit 0\n')
# And make it executable
import stat
cleanup_file.chmod(cleanup_file.stat().st_mode | stat.S_IEXEC)
if not args.nosubmit:
print(f"Submitting noise_{runstr}.dag")
startdir = os.getcwd()
os.chdir(jobdir)
# lxplus python3 is 3.6.8, so use old subprocess.run arguments
proc = subprocess.run(["/usr/bin/condor_submit_dag", f"noise_{runstr}.dag"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
if len(proc.stdout) > 0:
print(proc.stdout)
if len(proc.stderr) > 0:
print(proc.stderr)
# Chaange back to our starting point
os.chdir(startdir)
......@@ -56,7 +56,7 @@ StatusCode NoisyStripFinder::execute(const EventContext& ctx) const {
if (!(xaod->tap() & m_triggerMask.value())) return StatusCode::SUCCESS; // only process events that pass the trigger mask
ATH_MSG_INFO("trigger passed mask");
ATH_MSG_DEBUG("trigger passed mask");
++m_numberOfEvents;
// Keep track of run
......@@ -116,12 +116,12 @@ StatusCode NoisyStripFinder::execute(const EventContext& ctx) const {
StatusCode NoisyStripFinder::finalize()
{
ATH_MSG_INFO("NoisyStripFinder::finalize()");
ATH_MSG_INFO( m_numberOfEvents << " events processed" );
ATH_MSG_INFO( m_numberOfEvents << " events found" );
ATH_MSG_INFO( m_numberOfRDOCollection << " RDO collections processed" );
ATH_MSG_INFO( m_numberOfRDO<< " RawData" );
ATH_MSG_INFO( "Number of sensors found = " << NoisyStrip_histmap.size() << " out of 144" );
ATH_MSG_INFO( "Number of sensors found = " << NoisyStrip_histmap.size() << " out of 192" );
for (int ihash = 0; ihash < 144; ++ihash){ // print out the sensors that are missing
for (int ihash = 0; ihash < 192; ++ihash){ // print out the sensors that are missing
if ( NoisyStrip_histmap.count(ihash) == 0 ){
ATH_MSG_INFO("missing sensor # " << ihash);
}
......@@ -166,6 +166,7 @@ StatusCode NoisyStripFinder::finalize()
ATH_MSG_INFO( "---------- hot strip occupancy >= 0.1 for Tracker Sensor hash = "<< it->first <<" ----------" );
int i = 1;
while (i <= 768){
// This is only for information
if ( it->second->GetBinContent(i)/(double)m_numberOfEvents >= 0.01 ){
ATH_MSG_INFO( "hot strip # = " << i-1 << ", hit occupancy = " << it->second->GetBinContent(i)/(double)m_numberOfEvents ); // print out hot strips
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment