Commit 78318d77 authored by Ryunosuke O'Neil's avatar Ryunosuke O'Neil
Browse files

Merge branch 'roneil-docstrings' into 'master'

Add pre-commit hook for google-convention python docstrings. Fix docstrings where needed, and add missing docstrings.

See merge request !62
parents 0b6f139f 50f84601
Pipeline #4010179 passed with stages
in 2 minutes and 58 seconds
......@@ -36,3 +36,12 @@ repos:
rev: 73edff357d446703066009ecd6ae4074740a3157
hooks:
- id: lb-add-copyright
- repo: https://github.com/pycqa/pydocstyle
rev: 6.1.1 # pick a git hash / tag to point to
hooks:
- id: pydocstyle
exclude: (^tests)|cern_sso.py$
args:
- --convention=google
- --add-ignore=D104,D100,D200
###############################################################################
# (c) Copyright 2020 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2020-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -29,6 +29,11 @@ from .parsing import parse_yaml, render_yaml, validate_yaml
def write_jsroot_compression_options(dynamic_dir):
"""Write options file to configure JSROOT-compatible compression on job output files.
Args:
dynamic_dir: Location to write the use-jsroot-compression.py options file.
"""
with open(join(dynamic_dir, "use-jsroot-compression.py"), "wt") as fp:
fp.write(
"\n".join(
......
###############################################################################
# (c) Copyright 2021 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2021-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -23,7 +23,7 @@ from hist import Hist
@dataclass
class CheckResult:
"""Class for representing the return result of ntuple checks"""
"""Class for representing the return result of ntuple checks."""
check_type: str
passed: bool
......@@ -32,10 +32,13 @@ class CheckResult:
def run_job_checks(checks_list, check_data, test_ntuple_path_list):
"""
Run the checks listed for the given job, using data from an ntuple made earlier (by `lb-ap test`).
"""Run the checks listed using data from ntuples.
Run a list of checks (can be a subset of all the checks defined in check_data).
Requires that the data ntuples are already created (eg. by `lb-ap test`).
Returns dict of CheckResult objects
Returns:
dict: CheckResult objects
"""
check_results = {}
......@@ -106,13 +109,18 @@ def num_entries(
count,
tree_pattern,
):
"""Check that all matching TTree objects contain a minimum number of entries.
"""Number of entries check.
Check that all matching TTree objects contain a minimum number of entries.
:param filepath_list: List of paths to files to analyse
:param count: The minimum number of entries required
:param tree_pattern: A regular expression for the TTree objects to check
:returns: A CheckResult object, containing tree_data key/values:
* num_entries: the total number of events in the TTree
Args:
filepath_list (list[file-like]): List of paths to files to analyse
count (int): The minimum number of entries required
tree_pattern (regex): A regular expression for the TTree objects to check
Returns:
A CheckResult object, which for each tree contains tree_data key/values:
num_entries: The total number of events in the TTree
"""
result = CheckResult("num_entries", False)
for filepath in filepath_list:
......@@ -162,29 +170,34 @@ def range_check(
mean_tolerance,
std_tolerance,
):
"""Check if there is at least one entry in the TTree object with a specific
variable falling in a pre-defined range. The histogram is then produced as output.
"""Range check.
Check if there is at least one entry in the TTree object with a specific variable falling in a pre-defined range.
If the expected mean and standard deviation values are given in input, they are compared with the observed ones
and their agreement within the provided *_tolerance is checked.
It is possible to blind some regions.
:param filepath_list: List of paths to files to analyse
:param expression: Name of the variable (or expression depending on varibales in the TTree) to be checked
:param limits: Pre-defined range
:param n_bins: number of bins for the histogram
:param blind_ranges: regions to be blinded in the histogram
:param exp_mean: Expected mean value (optional)
:param exp_std: Expected standard deviation (optional)
:param mean_tolerance: Maximum shift tolerated between expected and observed mean values (optional)
:param std_tolerance: Maximum shift tolerated between expected and observed values of standard deviation (optional)
:param tree_pattern: A regular expression for the TTree object to check
:returns: A CheckResult object, containing tree_data key/values:
* histograms: filled 1D histogram of the quantity defined by the expression parameter
* num_entries: the total number of entries in the histogram (with blind ranges applied)
* mean: the mean of the histogram (approximated using binned data)
* variance: the variance of the histogram (approximated using binned data)
* stddev: the standard deviation of the histogram (approximated using binned data)
* num_entries_in_mean_window: the number of events falling in the exp_mean +- exp_std region (with blind ranges applied)
and their agreement within the provided *_tolerance is checked. It is also possible to blind some regions.
Args:
filepath_list (list[file-like]): List of paths to files to analyse
expression (str): Name of the variable (or expression depending on varibales in the TTree) to be checked
limits (dict): Pre-defined range for x axis
n_bins (int): Number of bins for the histogram
blind_ranges (dict): Regions to be blinded in the histogram
exp_mean (float): Expected mean value (optional)
exp_std (float): Expected standard deviation (optional)
mean_tolerance (float): Maximum shift tolerated between expected and observed mean values (optional)
std_tolerance (float): Maximum shift tolerated between expected and observed values of standard deviation
(optional)
tree_pattern (regex): A regular expression for the TTree object to check
Returns:
A CheckResult object, which for each tree contains tree_data key/values:
histograms (list[Hist]): Filled 1D histogram of the quantity defined by the expression parameter
num_entries (float): The total number of entries in the histogram (with blind ranges applied)
mean (float): The mean of the histogram (approximated using binned data)
variance (float): The variance of the histogram (approximated using binned data)
stddev (float): The standard deviation of the histogram (approximated using binned data)
num_entries_in_mean_window (float): The number of events falling in the exp_mean +- exp_std region (with
blind ranges applied)
"""
result = CheckResult("range", True)
bin_centers = None
......@@ -329,17 +342,22 @@ def range_check_nd(
blind_ranges,
tree_pattern,
):
"""Produce 2-dimensional histograms of variables taken from a TTree object.
:param filepath_list: List of paths to files to analyse
:param expressions: Name of the variables (or expressions) to be checked.
:param limits: Pre-defined ranges
:param n_bins: number of bins for the histogram
:param blind_ranges: regions to be blinded in the histogram
:param tree_pattern: A regular expression for the TTree object to check
:returns: A CheckResult object, containing tree_data key/values:
* histograms: a list of filled histograms of the quantities defined by the expression parameters
* num_entries: the total number of entries in the histogram (with blind ranges applied)
"""N-dimensional range check.
Produce 2-dimensional histograms of variables taken from a TTree object.
Args:
filepath_list (list[file-like]): List of paths to files to analyse
expressions (dict): Name of the variables (or expressions) to be checked.
limits (dict): Pre-defined ranges
n_bins (dict): Number of bins for the histogram
blind_ranges (dict): Regions to be blinded in the histogram
tree_pattern (regex): A regular expression for the TTree object to check
Returns:
A CheckResult object, which for each tree contains tree_data key/values:
histograms (list[Hist]): A list of filled histograms of the quantities defined by the expression parameters
num_entries (float): The total number of entries in the histogram (with blind ranges applied)
"""
result = CheckResult("range_nd", True)
......@@ -526,16 +544,21 @@ def num_entries_per_invpb(
tree_pattern,
lumi_pattern,
):
"""Check that the matching TTree objects contain a minimum number of entries per unit luminosity (pb-1).
:param filepath_list: List of paths to files to analyse
:param count_per_invpb: The minimum number of entries per unit luminosity required
:param tree_pattern: A regular expression for the TTree objects to check
:param lumi_pattern: A regular expression for the TTree object containing the luminosity information
:returns: A CheckResult object, containing tree_data key/values:
* num_entries: the total number of events in the TTree
* lumi_invpb: the total luminosity, in inverse picobarns
* num_entries_per_invpb: the total number of events divided by the total luminosity
"""Number of entries per inverse picobarn check.
Check that the matching TTree objects contain a minimum number of entries per unit luminosity (pb-1).
Args:
filepath_list (list[file-like]): List of paths to files to analyse
count_per_invpb (float): The minimum number of entries per unit luminosity required
tree_pattern (regex): A regular expression for the TTree objects to check
lumi_pattern (regex): A regular expression for the TTree object containing the luminosity information
Returns:
A CheckResult object, which for each tree contains tree_data key/values:
num_entries (float): The total number of events in the TTree
lumi_invpb (float): The total luminosity, in inverse picobarns
num_entries_per_invpb (float): The total number of events divided by the total luminosity
"""
result = CheckResult("num_entries_per_invpb", True)
......@@ -627,34 +650,38 @@ def range_check_bkg_subtracted(
blind_ranges,
tree_pattern,
):
"""Check if there is at least one entry in the TTree object with a specific
variable falling in a pre-defined range. The background-subtracted histogram is then produced as output.
Background is subtracted assuming a linear distribution. In particular, signal ([m-s, m+s])
and background ([m-b-delta, m-b] U [m+b, m+b+delta]) windows have to be defined on a control variable.
Then, one histogram is created for events falling in the signal region and another histogram is created
for events falling in the background region.
The subtraction, using the proper scaling factor, is finally performed.
It is possible to blind some regions.
:param filepath_list: List of paths to files to analyse
:param expression: Name of the variable (or expression depending on varibales in the TTree) to be checked
:param limits: Pre-defined range
:param expr_for_subtraction: Name of the control variable (or expression depending on varibales in the TTree)
to be used to perform background subtraction
:param mean_sig: expected mean value of expr_for_subtraction variable. The signal window will be centered around this value.
:param background_shift: Shift, w.r.t the "mean_sig" value, used to define the two background regions.
:param background_window: Length of the background windows (of expr_for_subtraction variable).
:param signal_window: Length of the signal window (of expr_for_subtraction variable) used for background subtraction.
The window is centered around the value of "mean_sig".
:param n_bins: number of bins for the histogram
:param blind_ranges: regions to be blinded in the histogram
:param tree_pattern: A regular expression for the TTree object to check
:returns: A CheckResult object, containing tree_data key/values:
* histograms: a list of filled 1D histograms,
Index 0: the control variable used to perform the subtraction
Index 1: events in the signal window
Index 2: events in the background window
Index 3: the background-subtracted result
"""Range check with background subtraction.
Check if there is at least one entry in the TTree object with a specific variable falling in a pre-defined range.
The background-subtracted histogram is then produced as output. Background is subtracted assuming a linear
distribution. In particular, signal ([m-s, m+s]) and background ([m-b-delta, m-b] U [m+b, m+b+delta]) windows have
to be defined on a control variable. Then, one histogram is created for events falling in the signal region and
another histogram is created for events falling in the background region. The subtraction, using the proper scaling
factor, is finally performed. It is also possible to blind some regions.
Args:
filepath_list (list[file-like]): List of paths to files to analyse
expression (str): Name of the variable (or expression depending on varibales in the TTree) to be checked
limits (dict): Pre-defined range
expr_for_subtraction (str): Name of the control variable (or expression depending on varibales in the TTree) to
be used to perform background subtraction
mean_sig (float): Expected mean value of expr_for_subtraction variable. The signal window will be centered
around this value.
background_shift (float): Shift, w.r.t the "mean_sig" value, used to define the two background regions.
background_window (float): Length of the background windows (of expr_for_subtraction variable).
signal_window (float): Length of the signal window (of expr_for_subtraction variable) used for background
subtraction. The window is centered around the value of "mean_sig".
n_bins (int): Number of bins for the histogram
blind_ranges (dict): Regions to be blinded in the histogram
tree_pattern (regex): A regular expression for the TTree object to check
Returns:
A CheckResult object, which for each tree contains tree_data key/values:
histograms: A list of filled 1D histograms,
Index 0: The control variable used to perform the subtraction
Index 1: Events in the signal window
Index 2: Events in the background window
Index 3: The background-subtracted result
"""
result = CheckResult("range_bkg_subtracted", True)
......@@ -843,12 +870,17 @@ def branches_exist(
branches,
tree_pattern,
):
"""Check that all matching TTree objects contain a minimum number of entries.
"""Branches exist check.
Check that all matching TTree objects contain a minimum number of entries.
Args:
filepath_list: List of paths to files to analyse
branches: List of branches that will be required to exist in TTree objects
tree_pattern: A regular expression for the TTree objects to check
:param filepath_list: List of paths to files to analyse
:param branches: List of branches that will be required to exist in TTree objects
:param tree_pattern: A regular expression for the TTree objects to check
:returns: A CheckResult object, containing no tree_data key/values (empty dict)
Returns:
A CheckResult object, which for each tree contains no tree_data key/values (an empty dict)
"""
result = CheckResult("branches_exist", True)
for filepath in filepath_list:
......
###############################################################################
# (c) Copyright 2021 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2021-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -8,8 +8,7 @@
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
"""
Contains utility functions used to display and save the output of the checks.
"""Contains utility functions used to display and save the output of the checks.
"""
import copy
import json
......@@ -18,8 +17,12 @@ import uproot
def hist_to_root(job_name, check_results, output_path):
"""
Save histograms in a root file.
"""Save histograms in a root file.
Args:
job_name (str): The job name. Unused.
check_results (dict): Dictionary of check results
output_path: Output directory for root file.
"""
# Create the file only if the check produce histograms in output
checks_with_histo = ["range", "range_nd", "range_bkg_subtracted"]
......@@ -36,6 +39,14 @@ def hist_to_root(job_name, check_results, output_path):
def serialise_hist(obj):
"""Serialise a Hist histogram object into a python dictionary representation.
Args:
obj: The histogram object to be serialised.
Returns:
dict: Representation of histogram (version 1)
"""
serialised_obj = {
"version": 1,
"name": str(obj.name),
......@@ -59,8 +70,15 @@ def checks_to_JSON(
all_check_results,
json_output_path=None,
):
"""
Serialise information about all checks into a JSON format
"""Serialise information about all checks into a JSON format.
Args:
checks_data (dict): Checks data dictionary.
all_check_results (dict): CheckResults.
json_output_path (str, optional): Path to save JSON file if desired. Defaults to None.
Returns:
str: JSON string representation of checks output.
"""
all_check_results_copy = copy.deepcopy(all_check_results)
......
###############################################################################
# (c) Copyright 2020 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2020-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -15,9 +15,16 @@ from os.path import dirname, join
def project_uses_cmt(app_name, app_version):
"""Determine if a application needs the CMT fallback to be applied
"""Determine if a application needs the CMT fallback to be applied.
This does not aim to be comprehensive.
Args:
app_name (str): LHCb application name.
app_version (str): LHCb application version.
Returns:
bool: whether the application/version should have the fallback applied.
"""
if app_name.lower() != "davinci":
return False
......@@ -30,7 +37,13 @@ def project_uses_cmt(app_name, app_version):
def setup_lbrun_environment(siteroot, repository_dir, setup_cmt):
"""Set up the fake siteroot for lb-run to use when testing"""
"""Set up the fake siteroot for lb-run to use when testing.
Args:
siteroot (str): The directory where the lb-run fake siteroot will be created
repository_dir (str): Repository directory
setup_cmt (bool): whether to use fallback hack for CMT-style projects
"""
os.environ["CMAKE_PREFIX_PATH"] = str(siteroot)
fake_dbase = join(siteroot, "DBASE")
fake_install_dir = join(fake_dbase, "AnalysisProductions", "v999999999999")
......
###############################################################################
# (c) Copyright 2020 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2020-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -11,4 +11,12 @@
def lint_all(data):
"""Placeholder for linting.
Args:
data: ...
Raises:
NotImplementedError: This function is not implemented.
"""
raise NotImplementedError()
###############################################################################
# (c) Copyright 2020 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2020-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -15,7 +15,17 @@ import os
def validate_bk_query(bk_query):
"""Perform a cursory check that the BK path is sane"""
"""Perform a cursory check that the BK path is sane.
Args:
bk_query (str): BKQuery string.
Raises:
ValueError: The BKQuery is not valid.
Returns:
bool: True if the BKQuery is valid.
"""
# Make a dict of the query parts
mydict = _buildBKQuery(bk_query)
......@@ -47,7 +57,17 @@ def validate_bk_query(bk_query):
def _buildBKQuery(bkPath=""):
"""Builds a dictionary from a BK path. Taken from LHCbDirac BKClient"""
"""Builds a dictionary from a BK path. Taken from LHCbDirac BKClient.
Args:
bkPath (str, optional): Bookkeeping path. Defaults to "".
Raises:
ValueError: BK path is invalid.
Returns:
dict: Result of the BKQuery.
"""
bkQueryDict = {}
if bkPath:
......
###############################################################################
# (c) Copyright 2021 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2021-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -17,18 +17,21 @@ from LbAPCommon.parsing import is_simulation_job
def validate_options(json_file: str, ntuple_fn: str, job_name: str, prod_data: dict):
"""
Validate existence of expected TTrees and ensure at least one
DecayTree and MCDecayTree (for MC samples) exists in output files
:param json_file: json_file listing the expected TTrees
:param ntuple_fn: Local test output tuple to validate against
:param job_name: Name of job to validate
:param prod_data: Entire production information from yaml parsing
:returns: Errors if any expected TTrees are not found in job output,
warnings if at least one (MC)DecayTree isn't found in job output
"""
"""Validate YAML options.
Check existence of expected TTrees and ensure at least one
DecayTree and MCDecayTree (for MC samples) exist(s) in output files.
Args:
json_file (str): json_file listing the expected TTrees
ntuple_fn (str): Local test output tuple to validate against
job_name (str): Name of job to validate
prod_data (dict): Entire production information from yaml parsing
Returns:
tuple[list[str]]: Errors if any expected TTrees are not found in job output,
warnings if at least one (MC)DecayTree isn't found in job output
"""
with open(json_file, "rb") as fp:
json_dump = json.load(fp)
......
###############################################################################
# (c) Copyright 2021 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2021-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -9,9 +9,6 @@
# or submit itself to any jurisdiction. #
###############################################################################
# This file needs to support Python 2 to maintain support for the legacy stacks
from __future__ import print_function
import json
import pickle
import pprint
......@@ -25,6 +22,11 @@ from DecayTreeTupleBase.DecayTreeTupleBaseConf import (
def parse_args():
"""Parse command line arguments with `argparse`.
Returns:
ArgumentParser-parsed arguments.
"""
parser = ArgumentParser()
parser.add_argument("--pkl", required=True, help="Pickle file to read")
parser.add_argument("--output", required=True, help="Name of output .json file")
......@@ -34,9 +36,14 @@ def parse_args():
return parser.parse_args()
def pkl_json_dump(pkl, output, debug):
"Move relevant information from pickle to json format"
def pkl_json_dump(pkl, output, debug=False):
"""Move relevant information from pickle to JSON format.
Args:
pkl (str): Pickle file path.
output (str): Output JSON file path.
debug (bool, optional): Debug flag. Defaults to False.
"""
output_dict = {"DecayTreeTuple": [], "MCDecayTreeTuple": [], "EventTuple": []}
# Open the pickle file
......@@ -65,4 +72,4 @@ if __name__ == "__main__":
args = parse_args()
pkl_json_dump(args.pkl, args.output, args.debug)
pkl_json_dump(args.pkl, args.output, debug=args.debug)
###############################################################################
# (c) Copyright 2021 CERN for the benefit of the LHCb Collaboration #
# (c) Copyright 2021-2022 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
......@@ -187,6 +187,19 @@ def _ordered_dict_to_dict(a):
def render_yaml(raw_yaml):
"""Render a "raw" YAML jinja template.
Accepts LbAP yaml configuration jinja template and renders it into a full YAML configuration.
Args:
raw_yaml (str): YAML jinja-template string
Raises:
ValueError: raised if jinja2 couldn't render the raw_yaml string.
Returns:
str: a jinja-rendered YAML string.
"""
try:
rendered_yaml = jinja2.Template(
raw_yaml, undefined=jinja2.StrictUndefined
......@@ -200,9 +213,17 @@ def render_yaml(raw_yaml):
def _validate_proc_pass_map(job_names, proc_pass_map):
"""
"""Build processing paths and validate them from a processing pass map.
Given a list of step job names (in correct order), and the processing pass map,
build the processing path for each step and verify the length is below 100.
Args:
job_names (list[str]): a list containing step job names.
proc_pass_map (dict): A dictionary mapping job names to processing pass
Raises:
ValueError: raised if the processing path length is over 100 characters
"""
for i, job_name in enumerate(job_names):
proc_passes = map(proc_pass_map.get, job_names[:i] + [job_name])
......@@ -225,11 +246,18 @@ def _validate_proc_pass_map(job_names, proc_pass_map):
def create_proc_pass_map(job_names, version, default_proc_pass="default"):
"""
"""Create a job name to processing pass map.
Given a list of step job names and the production version, produce a
job_name --> processing pass mapping.
The processing pass map is validated by _validate_proc_pass_map
Args:
job_names (list): step job names
version (str): LbAPproduction version
default_proc_pass (str, optional): the default processing pass. Defaults to "default".
Returns:
dict: a step job name to processing pass map
"""
proc_pass_prefix = f"AnaProd-{version}-"
proc_pass_map = {}
......@@ -278,11 +306,16 @@ def create_proc_pass_map(job_names, version, default_proc_pass="default"):
def is_simulation_job(prod_data: dict, job_name: str):
"""Determine if a job is using MC input or not.
:param prod_data: Entire production information from yaml parsing
:param job_name: Name of the job to determine if it's using MC input or not
:returns: True if the job is using MC input, False if it is not