Commit 64f49718 authored by Ben Couturier's avatar Ben Couturier
Browse files

Merge branch 'aiwieder/api' into 'master'

Add AnalysisData().available_tags and start setting up "pretty" summaries using rich

See merge request !4
parents 4afd8357 cf4e9036
Pipeline #3288491 passed with stage
in 2 minutes and 2 seconds
......@@ -23,6 +23,7 @@ install_requires =
beautifulsoup4
click
click-log
rich
[options.packages.find]
where=src
......@@ -42,6 +43,7 @@ console_scripts =
apd-cache = apd.command:cmd_cache_ap_info
apd-list-pfns = apd.command:cmd_list_pfns
apd-list-samples = apd.command:cmd_list_samples
apd-summary = apd.command:cmd_summary
###############################################################################
# Linting
......
......@@ -12,6 +12,7 @@
import itertools
import logging
import os
from collections import defaultdict
from apd.ap_info import (
SampleCollection,
......@@ -28,7 +29,7 @@ def _validate_tags(tags, default_tags=None):
"""Method that checks the dictionary of tag names, values that should be used
to filter the data accordingly.
- Special cases ate handled: tags "name" and "version" as well as "data" and "mc"
- Special cases are handled: tags "name" and "version" as well as "data" and "mc"
(which are converted to a "config" value).
- tag values cannot be None
- tag values cannot be of type bytes
......@@ -201,6 +202,11 @@ class AnalysisData:
logger.debug("Fetching Analysis Production data from %s", api_url)
self.samples = fetch_ap_info(working_group, analysis, None, api_url)
self.available_tags = defaultdict(set)
for sample in self.samples:
for k, v in self.samples._sampleTags(sample).items():
self.available_tags[k].add(v)
def __call__(
self, *, version=None, name=None, return_pfns=True, check_data=True, **tags
):
......@@ -215,7 +221,7 @@ class AnalysisData:
if iterable(version):
raise Exception("version argument doesn't support iterables")
# Establishing the list of damples to run on
# Establishing the list of samples to run on
samples = self.samples
if name:
......@@ -247,7 +253,7 @@ class AnalysisData:
for tagname, tagvalue in effective_tags.items():
logger.debug("Filtering for %s = %s", tagname, tagvalue)
# Appying the filters in one go
# Applying the filters in one go
samples = samples.filter(**effective_tags)
logger.debug("Matched %d samples", len(samples))
......@@ -278,6 +284,27 @@ class AnalysisData:
return samples
def __str__(self):
txt = f"AnalysysProductions: {self.working_group} / {self.analysis}\n"
txt = f"AnalysisProductions: {self.working_group} / {self.analysis}\n"
txt += str(self.samples)
return txt
def summary(self, tags: list = None) -> dict:
summary = {}
if tags:
for tag in tags:
if tag in self.available_tags:
try:
values = sorted(self.available_tags[tag])
except TypeError as exc:
raise ValueError(
f"Could not sort the values for tag ({tag}). Please check that the values are sensible.\n"
) from exc
values = list(self.available_tags[tag])
summary[tag] = values
else:
raise ValueError(
f"Requested tag ({tag}) not valid for the given production (wg: {self.working_group}, analysis: {self.analysis})!"
)
else:
summary = self.available_tags
return summary
......@@ -20,6 +20,7 @@ import click_log
from .analysis_data import AnalysisData
from .ap_info import cache_ap_info
from .rich_console import console
logger = logging.getLogger("apd")
click_log.basic_config(logger)
......@@ -28,7 +29,7 @@ click_log.basic_config(logger)
def exception_handler(exception_type, exception, _):
# All your trace are belong to us!
# your format
print("%s: %s" % (exception_type.__name__, exception))
print(f"{exception_type.__name__}: {exception}")
sys.excepthook = exception_handler
......@@ -190,3 +191,42 @@ def cmd_list_samples(
filter_tags |= dict(zip(tag, value))
matching = datasets(check_data=False, return_pfns=False, **filter_tags)
click.echo(matching)
@click.command()
@click.argument("working_group")
@click.argument("analysis")
@click.option(
"--cache_directory",
default=os.environ.get("APD_METADATA_CACHE_DIR", None),
help="Specify location of the cached analysis data files",
)
@click.option(
"--tag",
default=None,
help="Tag for which the values should be listed",
multiple=True,
)
@click_log.simple_verbosity_option(logger)
def cmd_summary(
working_group,
analysis,
cache_directory,
tag,
):
# Dealing with the cache
if not cache_directory:
cache_directory = "/tmp/apd_cache"
logger.debug("Cache directory not set, using %s", cache_directory)
if not os.path.exists(cache_directory):
logger.debug(
"Caching information for %s/%s to %s",
working_group,
analysis,
cache_directory,
)
cache_ap_info(cache_directory, working_group, analysis)
# Loading the dataset and displaying its summary
datasets = AnalysisData(working_group, analysis, metadata_cache=cache_directory)
console.print(datasets.summary(tag))
###############################################################################
# (c) Copyright 2021 CERN for the benefit of the LHCb Collaboration #
# #
# This software is distributed under the terms of the GNU General Public #
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
# #
# In applying this licence, CERN does not waive the privileges and immunities #
# granted to it by virtue of its status as an Intergovernmental Organization #
# or submit itself to any jurisdiction. #
###############################################################################
from rich.console import Console
console = Console()
error_console = Console(stderr=True, style="bold red")
......@@ -85,3 +85,10 @@ def test_sample_check_load_dataset_error(apinfo_multipleversions):
polarity="magup",
)
datasets()
def test_summary(apinfo_multipleversions):
datasets = AnalysisData("SL", "RDs", metadata_cache=apinfo_multipleversions)
tagname = "datatype"
dt = set(datasets.summary([tagname])[tagname])
assert dt == set(["2011", "2018", "2016", "2017", "2012"])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment