Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
inference
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
HH
tools
inference
Merge requests
!93
add plot_likelihood_scan_1d, fix the best fit issue
Code
Review changes
Check out branch
Download
Patches
Plain diff
Open
add plot_likelihood_scan_1d, fix the best fit issue
yilai/inference:sync
into
master
Overview
0
Commits
1
Pipelines
1
Changes
2
Open
Yihui Lai
requested to merge
yilai/inference:sync
into
master
1 year ago
Overview
0
Commits
1
Pipelines
1
Changes
2
Expand
0
0
Merge request reports
Compare
master
master (HEAD)
and
latest version
latest version
7a4f20ac
1 commit,
1 year ago
2 files
+
425
−
2
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
2
Search (e.g. *.vue) (Ctrl+P)
dhi/plots/likelihoods.py
+
424
−
1
Options
@@ -34,7 +34,7 @@ colors = colors.root
@use_style
(
"
dhi_default
"
)
def
plot_likelihood_scan
s
_1d
(
def
plot_likelihood_scan_1d
(
paths
,
poi
,
data
,
@@ -472,6 +472,429 @@ def plot_likelihood_scans_1d(
if
hep_data_path
:
hdt
.
save_hep_data
(
hep_data
,
hep_data_path
)
@use_style
(
"
dhi_default
"
)
def
plot_likelihood_scans_1d
(
paths
,
poi
,
data
,
theory_value
=
None
,
ranges_path
=
None
,
hep_data_path
=
None
,
show_best_fit
=
False
,
show_best_fit_error
=
True
,
show_best_fit_line
=
None
,
show_best_fit_indicators
=
None
,
show_significances
=
(
1
,
2
,
3
,
5
),
shift_negative_values
=
False
,
interpolate_above
=
None
,
v_lines
=
None
,
x_min
=
None
,
x_max
=
None
,
y_min
=
None
,
y_max
=
None
,
y_log
=
False
,
model_parameters
=
None
,
campaign
=
None
,
show_points
=
True
,
cms_postfix
=
None
,
style
=
None
,
):
"""
Plots multiple curves of 1D likelihood scans of a POI *poi1* and *poi2*, and saves it at *paths*.
All information should be passed as a list *data*. Entries must be dictionaries with the
following content:
-
"
values
"
: A mapping to lists of values or a record array with keys
"
<poi1_name>
"
,
"
<poi2_name>
"
and
"
dnll2
"
.
-
"
poi_mins
"
: A list of two floats describing the best fit value of the two POIs. When not
set, the minima are estimated from the interpolated curve.
-
"
name
"
: A name of the data to be shown in the legend.
*theory_value* can be a 3-tuple denoting the nominal theory prediction of the POI and its up and
down uncertainties which is drawn as a vertical bar. When *ranges_path* is set, one and two
sigma intervals of the scan parameter are saved to the given file. When *hep_data_path* is set,
a yml data file compatible with the HEPData format
(https://hepdata-submission.readthedocs.io/en/latest/data_yaml.html) is stored at that path.
When *show_best_fit* (*show_best_fit_error*) is *True*, the best fit error value (and its
uncertainty) is shown in the corresponding legend entry. When *show_best_fit_line* is *True*, a
vertical line is shown at the position of the best fit value. When *show_best_fit_indicators* is
*True* and only a single scan is shown, vertical indicators of the one and two sigma intervals
of the best fit value, when requested in *show_significances*, are shown. The two latter
arguments default to the value of *show_best_fit*.
To overlay lines and labels denoting integer significances corresponding to 1D likelihood scans,
*show_significances* can be set to *True* to show significances up to 9 sigma, or a list of
sigmas (integer, >= 1) or confidence levels (float, < 1). In case there are negative dnll2
values, *shift_negative_values* can be set to *True* to shift them vertically so that the
minimum is located at 0 again. When *interpolate_above* is defined, values that exceed this
threshold are removed and interpolated using adjacent values instead.
*v_lines* can be a list of x-values at which vertical, dashed lines are drawn for visual
guidance. *x_min* and *x_max* define the x-axis range of POI, and *y_min* and *y_max* control
the range of the y-axis. When *y_log* is *True*, the y-axis is plotted with a logarithmic scale.
When *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign*
should refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When
*show_points* is *True*, the central scan points are drawn on top of the interpolated curve.
*cms_postfix* is shown as the postfix behind the CMS label.
Supported values for *style*:
-
"
paper
"
Example: https://cms-hh.web.cern.ch/tools/inference/tasks/likelihood.html#1d_1
"""
import
plotlib.root
as
r
ROOT
=
import_ROOT
()
# style-based adjustments
style
=
Style
.
new
(
style
)
if
style
.
matches
(
"
paper
"
):
cms_postfix
=
None
# input checks and transformations
if
theory_value
is
not
None
:
theory_value
=
make_list
(
theory_value
)
if
not
show_best_fit
:
show_best_fit_error
=
False
if
show_best_fit_line
is
None
:
show_best_fit_line
=
show_best_fit
if
show_best_fit_indicators
is
None
:
show_best_fit_indicators
=
show_best_fit
# validate data entries
for
i
,
d
in
enumerate
(
data
):
# convert likelihood values to arrays
assert
"
values
"
in
d
values
=
d
[
"
values
"
]
if
isinstance
(
values
,
np
.
ndarray
):
values
=
{
k
:
values
[
k
]
for
k
in
values
.
dtype
.
names
}
assert
poi
in
values
assert
"
dnll2
"
in
values
# check poi minimum
d
.
setdefault
(
"
poi_mins
"
,
None
)
if
not
isinstance
(
d
[
"
poi_mins
"
],
(
list
,
tuple
)):
d
[
"
poi_mins
"
]
=
[
d
[
"
poi_mins
"
]]
# default name
d
.
setdefault
(
"
name
"
,
str
(
i
+
1
))
# origin (for printouts)
d
[
"
origin
"
]
=
None
if
not
d
[
"
name
"
]
else
"
entry
'
{}
'"
.
format
(
d
[
"
name
"
])
# drop all fields except for required ones
values
=
{
k
:
np
.
array
(
v
,
dtype
=
np
.
float32
)
for
k
,
v
in
values
.
items
()
if
k
in
[
poi
,
"
dnll2
"
]
}
# preprocess values (nan detection, negative shift)
values
[
"
dnll2
"
],
values
[
poi
]
=
_preprocess_values
(
values
[
"
dnll2
"
],
(
poi
,
values
[
poi
]),
remove_nans
=
True
,
remove_above
=
interpolate_above
,
shift_negative_values
=
shift_negative_values
,
origin
=
d
[
"
origin
"
],
min_is_external
=
d
[
"
poi_mins
"
]
is
not
None
,
)
d
[
"
values
"
]
=
values
# prepare hep data
hep_data
=
None
if
hep_data_path
:
hep_data
=
hdt
.
create_hist_data
()
# perform scans
scans
=
[
evaluate_likelihood_scan_1d
(
d
[
"
values
"
][
poi
],
d
[
"
values
"
][
"
dnll2
"
],
poi_min
=
d
[
"
poi_mins
"
][
0
],
origin
=
d
[
"
origin
"
],
)
for
d
in
data
]
# set x range
if
x_min
is
None
:
x_min
=
min
([
min
(
d
[
"
values
"
][
poi
])
for
d
in
data
])
if
x_max
is
None
:
x_max
=
max
([
max
(
d
[
"
values
"
][
poi
])
for
d
in
data
])
# set y range
y_max_value
=
max
([
d
[
"
values
"
][
"
dnll2
"
][(
d
[
"
values
"
][
poi
]
>=
x_min
)
&
(
d
[
"
values
"
][
poi
]
<=
x_max
)].
max
()
for
d
in
data
])
y_min_value
=
min
([
d
[
"
values
"
][
"
dnll2
"
][(
d
[
"
values
"
][
poi
]
>=
x_min
)
&
(
d
[
"
values
"
][
poi
]
<=
x_max
)].
min
()
for
d
in
data
])
y_min
,
y_max
,
y_max_line
=
get_y_range
(
y_min_value
,
y_max_value
,
y_min
,
y_max
,
log
=
y_log
)
# start plotting
r
.
setup_style
()
canvas
,
(
pad
,)
=
r
.
routines
.
create_canvas
(
pad_props
=
{
"
Logy
"
:
y_log
})
pad
.
cd
()
draw_objs
=
[]
legend_entries
=
[]
# dummy histogram to control axes
x_title
=
to_root_latex
(
poi_data
[
poi
].
label
)
if
"
unit
"
in
poi_data
[
poi
]:
x_title
=
"
{} ({})
"
.
format
(
x_title
,
to_root_latex
(
poi_data
[
poi
].
unit
))
y_title
=
"
-2 #Delta log(L)
"
h_dummy
=
ROOT
.
TH1F
(
"
dummy
"
,
"
;{};{}
"
.
format
(
x_title
,
y_title
),
1
,
x_min
,
x_max
)
r
.
setup_hist
(
h_dummy
,
pad
=
pad
,
props
=
{
"
LineWidth
"
:
0
,
"
Minimum
"
:
y_min
,
"
Maximum
"
:
y_max
},
)
draw_objs
.
append
((
h_dummy
,
"
HIST
"
))
if
show_significances
:
# horizontal significance lines and labels
if
isinstance
(
show_significances
,
(
list
,
tuple
)):
sigs
=
list
(
show_significances
)
else
:
sigs
=
list
(
range
(
1
,
9
+
1
))
for
sig
in
sigs
:
# get the dnll2 value and build the label
is_cl
=
isinstance
(
sig
,
float
)
and
sig
<
1
if
is_cl
:
# convert confidence level to dnll2 value
dnll2
=
get_chi2_level_from_cl
(
sig
,
1
)
else
:
# convert significance to dnll2 value
sig
=
int
(
round_scientific
(
sig
))
dnll2
=
get_chi2_level
(
sig
,
1
)
# do not show when vertically out of range
if
dnll2
>=
y_max_line
:
continue
# vertical indicators at 1 and 2 sigma when only one curve is shown
if
show_best_fit_indicators
and
len
(
data
)
==
1
and
scans
[
0
]
and
sig
in
[
1
,
2
]:
values
=
list
(
map
(
lambda
s
:
getattr
(
scans
[
0
],
"
poi_{}{}
"
.
format
(
s
,
sig
)),
"
pm
"
))
for
value
in
values
:
if
value
is
None
or
not
(
x_min
<
value
<
x_max
):
continue
line
=
ROOT
.
TLine
(
value
,
y_min
,
value
,
scans
[
0
].
interp
(
value
))
r
.
setup_line
(
line
,
props
=
{
"
LineColor
"
:
colors
.
black
,
"
LineStyle
"
:
2
,
"
NDC
"
:
False
},
)
draw_objs
.
append
(
line
)
# create the line
sig_line
=
ROOT
.
TLine
(
x_min
,
dnll2
,
x_max
,
dnll2
)
r
.
setup_line
(
sig_line
,
props
=
{
"
NDC
"
:
False
,
"
LineWidth
"
:
1
},
color
=
colors
.
grey
,
)
draw_objs
.
append
(
sig_line
)
# create and position the label
if
y_log
:
sig_label_y
=
math
.
log
(
dnll2
/
y_min
)
/
math
.
log
(
y_max
/
y_min
)
else
:
sig_label_y
=
dnll2
/
(
y_max
-
y_min
)
sig_label_y
*=
1.0
-
pad
.
GetTopMargin
()
-
pad
.
GetBottomMargin
()
sig_label_y
+=
pad
.
GetBottomMargin
()
+
0.00375
if
is_cl
:
sig_label
=
"
{:f}
"
.
format
(
sig
*
100
).
rstrip
(
"
0
"
).
rstrip
(
"
.
"
)
+
"
%
"
else
:
sig_label
=
"
{}#sigma
"
.
format
(
sig
)
sig_label
=
r
.
routines
.
create_top_right_label
(
sig_label
,
pad
=
pad
,
x_offset
=
12
,
y
=
sig_label_y
,
props
=
{
"
TextSize
"
:
18
,
"
TextColor
"
:
colors
.
grey
,
"
TextAlign
"
:
31
},
)
draw_objs
.
append
(
sig_label
)
# draw verical lines at requested values
if
v_lines
:
for
x
in
v_lines
:
if
x_min
<
x
<
x_max
:
line
=
ROOT
.
TLine
(
x
,
y_min
,
x
,
y_max_line
)
r
.
setup_line
(
line
,
props
=
{
"
LineColor
"
:
colors
.
black
,
"
LineStyle
"
:
2
,
"
NDC
"
:
False
},
)
draw_objs
.
append
(
line
)
# special case regarding color handling: when all entry names are valid keys in br_hh_colors,
# replace the default color sequence to deterministically assign same colors to channels
_color_sequence
=
color_sequence
if
all
(
d
[
"
name
"
]
in
br_hh_colors
.
root
for
d
in
data
):
_color_sequence
=
[
br_hh_colors
.
root
[
d
[
"
name
"
]]
for
d
in
data
]
# perform scans and draw nll curves
parameter_ranges
=
OrderedDict
()
g_nlls
=
[]
for
d
,
scan
,
col
,
ms
in
zip
(
data
,
scans
,
_color_sequence
,
marker_sequence
):
if
not
scan
:
warn
(
"
1D likelihood evaluation failed for entry
'
{}
'"
.
format
(
d
[
"
name
"
]))
# draw the curve
g_nll
=
create_tgraph
(
len
(
d
[
"
values
"
][
poi
]),
d
[
"
values
"
][
poi
],
d
[
"
values
"
][
"
dnll2
"
],
)
r
.
setup_graph
(
g_nll
,
props
=
{
"
LineWidth
"
:
2
,
"
MarkerStyle
"
:
ms
,
"
MarkerSize
"
:
1.2
},
color
=
colors
[
col
],
)
draw_objs
.
append
((
g_nll
,
"
SAME,CP
"
if
show_points
else
"
SAME,C
"
))
g_nlls
.
append
(
g_nll
)
# legend entry with optional best fit value
g_label
=
expand_hh_channel_label
(
d
[
"
name
"
])
if
scan
and
show_best_fit
:
if
show_best_fit_error
:
bf_label
=
scan
.
num_min
.
str
(
format
=
"
%.2f
"
,
style
=
"
root
"
,
force_asymmetric
=
True
,
styles
=
{
"
space
"
:
""
},
)
else
:
bf_label
=
"
{:.2f}
"
.
format
(
scan
.
num_min
())
if
g_label
:
g_label
+=
"
, {}
"
.
format
(
bf_label
)
else
:
g_label
=
"
{} = {}
"
.
format
(
to_root_latex
(
poi_data
[
poi
].
label
),
bf_label
)
legend_entries
.
append
((
g_nll
,
g_label
,
"
LP
"
if
show_points
else
"
L
"
))
# vertical line denoting the best fit value
if
show_best_fit_line
and
scan
and
(
x_min
<=
scan
.
poi_min
<=
x_max
):
line_fit
=
ROOT
.
TLine
(
scan
.
poi_min
,
y_min
,
scan
.
poi_min
,
y_max_line
)
r
.
setup_line
(
line_fit
,
props
=
{
"
LineWidth
"
:
2
,
"
NDC
"
:
False
},
color
=
colors
[
col
],
)
draw_objs
.
append
(
line_fit
)
# store parameter ranges
key
=
poi
if
d
[
"
name
"
]:
key
+=
"
__{}
"
.
format
(
d
[
"
name
"
])
parameter_ranges
[
key
]
=
scan
[
"
summary
"
]
if
scan
else
None
# theory prediction with uncertainties
if
theory_value
:
has_thy_err
=
len
(
theory_value
)
==
3
if
has_thy_err
:
# theory graph
g_thy
=
create_tgraph
(
1
,
theory_value
[
0
],
y_min
,
theory_value
[
2
],
theory_value
[
1
],
0
,
y_max_line
-
y_min
,
)
r
.
setup_graph
(
g_thy
,
props
=
{
"
LineColor
"
:
colors
.
red
,
"
FillStyle
"
:
1001
,
"
FillColor
"
:
colors
.
red_trans_50
},
)
draw_objs
.
append
((
g_thy
,
"
SAME,02
"
))
legend_entries
.
append
((
g_thy
,
"
Standard Model
"
,
"
LF
"
))
# theory line
line_thy
=
ROOT
.
TLine
(
theory_value
[
0
],
y_min
,
theory_value
[
0
],
y_max_line
)
r
.
setup_line
(
line_thy
,
props
=
{
"
NDC
"
:
False
},
color
=
colors
.
red
)
draw_objs
.
append
(
line_thy
)
if
not
has_thy_err
:
legend_entries
.
append
((
line_thy
,
"
Standard Model
"
,
"
L
"
))
# fill hep data
if
hep_data
:
# scan value as independent variable
scan_values
=
sorted
(
set
(
chain
(
*
(
map
(
float
,
d
[
"
values
"
][
poi
])
for
d
in
data
))))
hdt
.
create_independent_variable
(
poi_data
[
poi
].
label
,
parent
=
hep_data
,
values
=
[
Number
(
v
,
default_format
=-
2
)
for
v
in
scan_values
],
)
# dnll2 values as dependent variables
for
d
,
g_nll
in
zip
(
data
,
g_nlls
):
label
=
r
"
$-2\Delta\log(L)$
"
if
d
.
get
(
"
name
"
):
label
+=
"
,
"
+
d
[
"
name
"
]
hdt
.
create_dependent_variable_from_graph
(
g_nll
,
x_values
=
scan_values
,
parent
=
hep_data
,
label
=
label
,
rounding_method
=-
2
,
transform
=
(
lambda
i
,
x
,
y
,
err
:
(
x
,
max
(
y
,
0.0
),
err
)),
)
# legend
legend_cols
=
min
(
int
(
math
.
ceil
(
len
(
legend_entries
)
/
4.0
)),
3
)
legend_rows
=
int
(
math
.
ceil
(
len
(
legend_entries
)
/
float
(
legend_cols
)))
legend
=
r
.
routines
.
create_legend
(
pad
=
pad
,
width
=
legend_cols
*
210
,
n
=
legend_rows
,
props
=
{
"
NColumns
"
:
legend_cols
,
"
TextSize
"
:
18
},
)
r
.
fill_legend
(
legend
,
legend_entries
)
draw_objs
.
append
(
legend
)
legend_box
=
r
.
routines
.
create_legend_box
(
legend
,
pad
,
"
trl
"
,
props
=
{
"
LineWidth
"
:
0
,
"
FillColor
"
:
colors
.
white_trans_70
},
)
draw_objs
.
insert
(
-
1
,
legend_box
)
# cms label
cms_labels
=
r
.
routines
.
create_cms_labels
(
pad
=
pad
,
postfix
=
cms_postfix
or
""
,
layout
=
"
outside_horizontal
"
,
)
draw_objs
.
extend
(
cms_labels
)
# model parameter labels
if
model_parameters
:
param_kwargs
=
{}
if
legend_cols
==
3
:
param_kwargs
[
"
y_offset
"
]
=
1.0
-
0.25
*
pad
.
GetTopMargin
()
-
legend
.
GetY1
()
draw_objs
.
extend
(
create_model_parameters
(
model_parameters
,
pad
,
**
param_kwargs
))
# campaign label
if
campaign
:
campaign_label
=
to_root_latex
(
campaign_labels
.
get
(
campaign
,
campaign
))
campaign_label
=
r
.
routines
.
create_top_right_label
(
campaign_label
,
pad
=
pad
)
draw_objs
.
append
(
campaign_label
)
# draw all objects
r
.
routines
.
draw_objects
(
draw_objs
)
# save
r
.
update_canvas
(
canvas
)
for
path
in
make_list
(
paths
):
canvas
.
SaveAs
(
path
)
# save parameter ranges
if
ranges_path
:
ranges_path
=
prepare_output
(
ranges_path
)
with
open
(
ranges_path
,
"
w
"
)
as
f
:
json
.
dump
(
parameter_ranges
,
f
,
indent
=
4
)
print
(
"
saved parameter ranges to {}
"
.
format
(
ranges_path
))
# save hep data
if
hep_data_path
:
hdt
.
save_hep_data
(
hep_data
,
hep_data_path
)
@use_style
(
"
dhi_default
"
)
def
plot_likelihood_scan_2d
(
Loading