Commit b227e220 authored by Alexander Froch's avatar Alexander Froch
Browse files

Merge branch birk-md-example-code-replacement with refs/heads/master into...

Merge branch birk-md-example-code-replacement with refs/heads/master into refs/merge-requests/476/train
parents 8d57b0c6 cbb87393
Pipeline #3704741 passed with stages
in 26 minutes and 54 seconds
......@@ -9,6 +9,7 @@
- pip install --upgrade pip setuptools wheel
- pip install mkdocs==${MKDOCS_VERSION} mkdocs-material==${MATERIAL_VERSION} mkdocs-static-i18n==${I18NSTATIC_VERSION}
- cp changelog.md docs/setup/changelog.md
- python .gitlab/workflow/replace_placeholders_in_md.py --input "docs/**/*.md" --no_backup --exclude docs/setup/development/good_practices_docs.md
- mkdocs build -d public
- if [[ -f _redirects ]]; then cp _redirects public; fi;
- mv sphinx-docs public/sphinx-docs
......
"""Script to replace placeholders in .md files with the content of the file that is
specified in the placeholder."""
import argparse
from collections import Counter
from glob import glob
from shutil import copyfile
def GetParser():
"""
Argument parser for example code replacer
Returns
-------
args: parse_args
Raises
------
ValueError
If output file is specified but multiple input files are given.
"""
parser = argparse.ArgumentParser(
description=(
"Script which allow to replace placeholders inside a markdown "
"file with the actual content of files located in your repo.\n"
"Such placeholders have to be of the form §§§<filename>:<start>:<end>§§§. "
" If start(end) is empty, the first(last) line is used."
"Indentation of the pasted content is the same as the indentation of"
"the placeholder. Valid examples are: "
"§§§file.py§§§, §§§file.py:10§§§, §§§file.py:10:20§§§"
)
)
parser.add_argument(
"-i",
"--input",
type=str,
required=True,
help=(
"Name of the input file(s). Should be markdown files. Wildcards are "
"supported"
),
)
parser.add_argument(
"-o",
"--output",
type=str,
required=False,
default=None,
help=(
"Name of the output file (.md file). If none is provided, the input file "
"will be overwritten and a copy of the original file is saved with a .bkp "
"ending. Only makes sense if `input` is a single file."
),
)
parser.add_argument(
"-n",
"--no_backup",
action="store_true",
default=False,
help="Do not save a backup of the original file.",
)
parser.add_argument(
"-e",
"--exclude",
action="append",
default=None,
help="Exclude this file.",
type=str,
)
args = parser.parse_args()
print(args.exclude)
# Translate wildcard and remove excluded files
args.input = glob(args.input, recursive=True)
if args.exclude is not None:
print(f"\x1b[1;32;40mExcluding the following files: {args.exclude}\x1b[0m")
args.input = [
filename for filename in args.input if filename not in args.exclude
]
if len(args.input) != 1 and args.output is not None:
raise ValueError(
"You specified an output file but more than one input was given."
"This option is only supported for a single input file."
)
return args
def line_contains_placeholder(line):
"""Helper function to check if a line contains a valid placeholder
Parameters
----------
line : str
Line which is checked if a valid placeholder is in there
Returns
-------
bool
True if all criteria are satisfied, otherwise false
"""
char_counter = Counter(line)
if char_counter["§"] == 6:
return True
return False
def replace_placeholder_with_file_content(
input_file: str, output_file: str = None, no_backup: bool = False
):
"""Function to replace placeholders of the form "§§§<filename>:<start>:<end>§§§"
with the actual content of the file <filename> from line <start> to line <end>.
Parameters
----------
input_file : str
Filename of the markdown file which is searched for placeholders of the form
§§§<filename>:<start>:<end>§§§.
output_file : str, optional
Filename of the output file (with the placeholders replaced). If not specified,
the output file will overwrite the input file and a copy of the original file
will be saved. By default None
no_backup : bool, optional
Option to overwrite original file without saving a backup file, by default False
Raises
------
ValueError
If placeholder contains invalid number of colons. Valid numbers are 0, 1, 2
"""
print(f"{90 * '-'}\nProcessing {input_file}")
# Check if any placeholder is in the file. If not, stop here already
with open(input_file, "r") as original_file:
if "§§§" not in original_file.read():
print(f"File {input_file} does not contain any placeholders. Skipped.")
return
# Save backup of input file if no output filename is specified
if output_file is None or output_file == input_file:
if no_backup is False:
copyfile(input_file, f"{input_file}.bkp")
print(
"Input filename is equal to output filename or no output filename "
f"specified. Saving backup of input file as {input_file}.bkp"
)
output_file = input_file
output_file_content = ""
replaced_placeholders = []
skipped_placeholders = []
with open(input_file, "r") as original_file:
# Loop over lines in input file and search for lines containing "§§§"
for original_line in original_file:
if line_contains_placeholder(original_line):
# Extract filename, start line and end line fro placeholder which
# has to be specified like §§§<filename>:<start>:<end>§§§
placeholder = original_line.split("§§§")[1]
# Check how many colons are in the placeholder
# Translate to python index + convert to start=0, end=-1 in case
# where no number is specified
char_counter = Counter(placeholder)
if char_counter[":"] == 0:
replacement_file = placeholder
start, end = 0, -1
elif char_counter[":"] == 1:
replacement_file, start = placeholder.split(":")
start = 0 if start == "" else int(start) - 1
end = -1
elif char_counter[":"] == 2:
replacement_file, start, end = placeholder.split(":")
start = 0 if start == "" else int(start) - 1
end = -1 if end == "" else int(end)
else:
raise ValueError(
f"Placeholder {placeholder} contains more than 2 colons. "
"Supported number of colons are 0, 1 and 2."
)
# Extract indentation for replacement lines
indentation = original_line.split("§§§")[0]
try:
with open(replacement_file, "r") as rep_content:
replacement = ""
# Add all selected lines from the file, but always add the
# indentation of the placeholder
for line in rep_content.readlines()[start:end]:
replacement += f"{indentation}{line}"
replaced_placeholders.append(replacement_file)
except FileNotFoundError:
replacement = (
f"FileNotFoundError: File '{replacement_file}' not found.\n "
f"\nOriginal line in input file:\n\n{original_line}"
)
skipped_placeholders.append(replacement_file)
# Add to md file
output_file_content += replacement
else:
output_file_content += original_line
with open(output_file, "w") as md_file_new:
md_file_new.write(output_file_content)
print("SUMMARY:")
print(f"Replaced placeholders: {replaced_placeholders}")
if len(skipped_placeholders) > 0:
print(f"\x1b[1;33;40mSkipped placeholders: {skipped_placeholders}\x1b[0m")
def main():
"""Main function that is called when executing the script."""
args = GetParser()
print(f"Replacing placeholders in the following files: {args.input}")
# Process each input file with the replacement function
for input_file in args.input:
replace_placeholder_with_file_content(
input_file=input_file,
output_file=args.output if len(args.input) == 1 else None,
no_backup=args.no_backup,
)
if __name__ == "__main__":
main()
......@@ -4,6 +4,7 @@
### Latest
- Implemented placeholder for code snippets in markdown files [!476](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/476)
- Fixing branch unit test (problem with changing style of matplotlib globally) [!478](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/478)
- Streamline h5 ntuples and samples overview with that of ftag-docs [!479](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/479)
- Adding dummy data generation of multi-class classification output [!475](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/merge_requests/475)
......
......@@ -6,63 +6,5 @@ To set up the inputs for the plots, have a look [here](../index.md).
Then we can start the actual plotting part
```py
from umami.plotting import roc, roc_plot
# here the plotting of the roc starts
plot_roc = roc_plot(
n_ratio_panels=2, ylabel="background rejection", xlabel="b-jets efficiency"
)
plot_roc.add_roc(
roc(
sig_eff,
rnnip_ujets_rej,
n_test=n_test,
rej_class="ujets",
signal_class="bjets",
label="RNNIP",
),
reference=True,
)
plot_roc.add_roc(
roc(
sig_eff,
dips_ujets_rej,
n_test=n_test,
rej_class="ujets",
signal_class="bjets",
label="DIPS",
),
)
plot_roc.add_roc(
roc(
sig_eff,
rnnip_cjets_rej,
n_test=n_test,
rej_class="cjets",
signal_class="bjets",
label="RNNIP",
),
reference=True,
)
plot_roc.add_roc(
roc(
sig_eff,
dips_cjets_rej,
n_test=n_test,
rej_class="cjets",
signal_class="bjets",
label="DIPS",
),
)
# setting which flavour rejection ratio is drawn in which ratio panel
plot_roc.set_ratio_class(1, "ujets", label="light-flavour jets ratio")
plot_roc.set_ratio_class(2, "cjets", label="c-jets ratio")
# if you want to swap the ratios just uncomment the following 2 lines
# plot_roc.set_ratio_class(2, "ujets", label="light-flavour jets ratio")
# plot_roc.set_ratio_class(1, "cjets", label="c-jets ratio")
plot_roc.set_leg_rej_labels("ujets", "light-flavour jets rejection")
plot_roc.set_leg_rej_labels("cjets", "c-jets rejection")
plot_roc.draw()
plot_roc.savefig("roc.pdf")
§§§examples/plotting/plot_rocs.py§§§
```
\ No newline at end of file
### Writing documentation
#### File content placeholders
Inserting code snippets of examples often makes understanding the documentation much
easier. However, keeping code snippets in the docs in sync with the content of the
files they are referring to can get exhausting and is easily forgotten.
To avoid this, you can make use of the following syntax for placing a code snippet
in the documentation you are writing.
```md
§§§<filename>:<start>:<end>§§§
```
This will replace the original line in the markdown file with the content of the
file `<filename>` from line `<start>` to line `<end>`.
The file in the repository will no be changed, but before building the
docs, a script will create a processed copy of the corresponding markdown file.
**Further examples**
Below you can find different versions for inserting (parts) of the file
`examples/plotting/plot_rocs.py` into your markdown file.
| Placeholder | Result |
|-------------|--------|
|`§§§examples/plotting/plot_rocs.py§§§` | whole file |
|`§§§examples/plotting/plot_rocs.py::§§§` | whole file |
|`§§§examples/plotting/plot_rocs.py:10:20§§§` | from line 10 to line 20 |
|`§§§examples/plotting/plot_rocs.py::10§§§` | from top to line 10 |
|`§§§examples/plotting/plot_rocs.py:10§§§` | from line 10 to bottom |
|`§§§examples/plotting/plot_rocs.py:10:§§§` | from line 10 to bottom |
\ No newline at end of file
......@@ -4,7 +4,7 @@ If you wan to get started with umami you can pick an issue and work on it, well
are the issues labeled with [`good-first-issue`](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/issues?label_name%5B%5D=good-first-issue) which can be found [here](https://gitlab.cern.ch/atlas-flavor-tagging-tools/algorithms/umami/-/issues?scope=all&state=opened&label_name[]=good-first-issue). Please tell us if you are working on an issue and create already in the beginning a merge request marked as `Draft`. This helps us to see who is working at what.
Please follow the good coding practices which are also summarised [here](good-practices.md).
Please follow the good coding practices which are also summarised [here](good_practices_code.md).
## Test suite
......
......@@ -26,7 +26,8 @@ nav:
- Installation: setup/installation.md
- Development:
- setup/development/index.md
- Good code practices: setup/development/good-practices.md
- Good practices code: setup/development/good_practices_code.md
- Good practices docs: setup/development/good_practices_docs.md
- VS Code: setup/development/VS_code.md
- Changelog: setup/changelog.md
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment