Yihui Lai · 7a4f20ac
--- a/dhi/plots/likelihoods.py

+ 424

− 1
+++ b/dhi/plots/likelihoods.py

+ 424

− 1
 @@ -34,7 +34,7 @@ colors = colors.root


 @use_style("dhi_default")
-def plot_likelihood_scans_1d(
+def plot_likelihood_scan_1d(
    paths,
    poi,
    data,
 @@ -472,6 +472,429 @@ def plot_likelihood_scans_1d(
    if hep_data_path:
        hdt.save_hep_data(hep_data, hep_data_path)

+@use_style("dhi_default")
+def plot_likelihood_scans_1d(
+    paths,
+    poi,
+    data,
+    theory_value=None,
+    ranges_path=None,
+    hep_data_path=None,
+    show_best_fit=False,
+    show_best_fit_error=True,
+    show_best_fit_line=None,
+    show_best_fit_indicators=None,
+    show_significances=(1, 2, 3, 5),
+    shift_negative_values=False,
+    interpolate_above=None,
+    v_lines=None,
+    x_min=None,
+    x_max=None,
+    y_min=None,
+    y_max=None,
+    y_log=False,
+    model_parameters=None,
+    campaign=None,
+    show_points=True,
+    cms_postfix=None,
+    style=None,
+):
+    """
+    Plots multiple curves of 1D likelihood scans of a POI *poi1* and *poi2*, and saves it at *paths*.
+    All information should be passed as a list *data*. Entries must be dictionaries with the
+    following content:
+
+        - "values": A mapping to lists of values or a record array with keys "<poi1_name>",
+          "<poi2_name>" and "dnll2".
+        - "poi_mins": A list of two floats describing the best fit value of the two POIs. When not
+          set, the minima are estimated from the interpolated curve.
+        - "name": A name of the data to be shown in the legend.
+
+    *theory_value* can be a 3-tuple denoting the nominal theory prediction of the POI and its up and
+    down uncertainties which is drawn as a vertical bar. When *ranges_path* is set, one and two
+    sigma intervals of the scan parameter are saved to the given file. When *hep_data_path* is set,
+    a yml data file compatible with the HEPData format
+    (https://hepdata-submission.readthedocs.io/en/latest/data_yaml.html) is stored at that path.
+
+    When *show_best_fit* (*show_best_fit_error*) is *True*, the best fit error value (and its
+    uncertainty) is shown in the corresponding legend entry. When *show_best_fit_line* is *True*, a
+    vertical line is shown at the position of the best fit value. When *show_best_fit_indicators* is
+    *True* and only a single scan is shown, vertical indicators of the one and two sigma intervals
+    of the best fit value, when requested in *show_significances*, are shown. The two latter
+    arguments default to the value of *show_best_fit*.
+
+    To overlay lines and labels denoting integer significances corresponding to 1D likelihood scans,
+    *show_significances* can be set to *True* to show significances up to 9 sigma, or a list of
+    sigmas (integer, >= 1) or confidence levels (float, < 1). In case there are negative dnll2
+    values, *shift_negative_values* can be set to *True* to shift them vertically so that the
+    minimum is located at 0 again. When *interpolate_above* is defined, values that exceed this
+    threshold are removed and interpolated using adjacent values instead.
+
+    *v_lines* can be a list of x-values at which vertical, dashed lines are drawn for visual
+    guidance. *x_min* and *x_max* define the x-axis range of POI, and *y_min* and *y_max* control
+    the range of the y-axis. When *y_log* is *True*, the y-axis is plotted with a logarithmic scale.
+    When *model_parameters* can be a dictionary of key-value pairs of model parameters. *campaign*
+    should refer to the name of a campaign label defined in *dhi.config.campaign_labels*. When
+    *show_points* is *True*, the central scan points are drawn on top of the interpolated curve.
+    *cms_postfix* is shown as the postfix behind the CMS label.
+
+    Supported values for *style*:
+
+        - "paper"
+
+    Example: https://cms-hh.web.cern.ch/tools/inference/tasks/likelihood.html#1d_1
+    """
+    import plotlib.root as r
+    ROOT = import_ROOT()
+
+    # style-based adjustments
+    style = Style.new(style)
+    if style.matches("paper"):
+        cms_postfix = None
+
+    # input checks and transformations
+    if theory_value is not None:
+        theory_value = make_list(theory_value)
+    if not show_best_fit:
+        show_best_fit_error = False
+    if show_best_fit_line is None:
+        show_best_fit_line = show_best_fit
+    if show_best_fit_indicators is None:
+        show_best_fit_indicators = show_best_fit
+
+    # validate data entries
+    for i, d in enumerate(data):
+        # convert likelihood values to arrays
+        assert "values" in d
+        values = d["values"]
+        if isinstance(values, np.ndarray):
+            values = {k: values[k] for k in values.dtype.names}
+        assert poi in values
+        assert "dnll2" in values
+        # check poi minimum
+        d.setdefault("poi_mins", None)
+        if not isinstance(d["poi_mins"], (list, tuple)):
+            d["poi_mins"] = [d["poi_mins"]]
+        # default name
+        d.setdefault("name", str(i + 1))
+        # origin (for printouts)
+        d["origin"] = None if not d["name"] else "entry '{}'".format(d["name"])
+        # drop all fields except for required ones
+        values = {
+            k: np.array(v, dtype=np.float32)
+            for k, v in values.items()
+            if k in [poi, "dnll2"]
+        }
+        # preprocess values (nan detection, negative shift)
+        values["dnll2"], values[poi] = _preprocess_values(
+            values["dnll2"],
+            (poi, values[poi]),
+            remove_nans=True,
+            remove_above=interpolate_above,
+            shift_negative_values=shift_negative_values,
+            origin=d["origin"],
+            min_is_external=d["poi_mins"] is not None,
+        )
+        d["values"] = values
+
+    # prepare hep data
+    hep_data = None
+    if hep_data_path:
+        hep_data = hdt.create_hist_data()
+
+    # perform scans
+    scans = [
+        evaluate_likelihood_scan_1d(
+            d["values"][poi],
+            d["values"]["dnll2"],
+            poi_min=d["poi_mins"][0],
+            origin=d["origin"],
+        )
+        for d in data
+    ]
+
+    # set x range
+    if x_min is None:
+        x_min = min([min(d["values"][poi]) for d in data])
+    if x_max is None:
+        x_max = max([max(d["values"][poi]) for d in data])
+
+    # set y range
+    y_max_value = max([
+        d["values"]["dnll2"][(d["values"][poi] >= x_min) & (d["values"][poi] <= x_max)].max()
+        for d in data
+    ])
+    y_min_value = min([
+        d["values"]["dnll2"][(d["values"][poi] >= x_min) & (d["values"][poi] <= x_max)].min()
+        for d in data
+    ])
+    y_min, y_max, y_max_line = get_y_range(y_min_value, y_max_value, y_min, y_max, log=y_log)
+
+    # start plotting
+    r.setup_style()
+    canvas, (pad,) = r.routines.create_canvas(pad_props={"Logy": y_log})
+    pad.cd()
+    draw_objs = []
+    legend_entries = []
+
+    # dummy histogram to control axes
+    x_title = to_root_latex(poi_data[poi].label)
+    if "unit" in poi_data[poi]:
+        x_title = "{} ({})".format(x_title, to_root_latex(poi_data[poi].unit))
+    y_title = "-2 #Delta log(L)"
+    h_dummy = ROOT.TH1F("dummy", ";{};{}".format(x_title, y_title), 1, x_min, x_max)
+    r.setup_hist(
+        h_dummy,
+        pad=pad,
+        props={"LineWidth": 0, "Minimum": y_min, "Maximum": y_max},
+    )
+    draw_objs.append((h_dummy, "HIST"))
+
+    if show_significances:
+        # horizontal significance lines and labels
+        if isinstance(show_significances, (list, tuple)):
+            sigs = list(show_significances)
+        else:
+            sigs = list(range(1, 9 + 1))
+        for sig in sigs:
+            # get the dnll2 value and build the label
+            is_cl = isinstance(sig, float) and sig < 1
+            if is_cl:
+                # convert confidence level to dnll2 value
+                dnll2 = get_chi2_level_from_cl(sig, 1)
+            else:
+                # convert significance to dnll2 value
+                sig = int(round_scientific(sig))
+                dnll2 = get_chi2_level(sig, 1)
+
+            # do not show when vertically out of range
+            if dnll2 >= y_max_line:
+                continue
+
+            # vertical indicators at 1 and 2 sigma when only one curve is shown
+            if show_best_fit_indicators and len(data) == 1 and scans[0] and sig in [1, 2]:
+                values = list(map(lambda s: getattr(scans[0], "poi_{}{}".format(s, sig)), "pm"))
+                for value in values:
+                    if value is None or not (x_min < value < x_max):
+                        continue
+                    line = ROOT.TLine(value, y_min, value, scans[0].interp(value))
+                    r.setup_line(
+                        line,
+                        props={"LineColor": colors.black, "LineStyle": 2, "NDC": False},
+                    )
+                    draw_objs.append(line)
+
+            # create the line
+            sig_line = ROOT.TLine(x_min, dnll2, x_max, dnll2)
+            r.setup_line(
+                sig_line,
+                props={"NDC": False, "LineWidth": 1},
+                color=colors.grey,
+            )
+            draw_objs.append(sig_line)
+
+            # create and position the label
+            if y_log:
+                sig_label_y = math.log(dnll2 / y_min) / math.log(y_max / y_min)
+            else:
+                sig_label_y = dnll2 / (y_max - y_min)
+            sig_label_y *= 1.0 - pad.GetTopMargin() - pad.GetBottomMargin()
+            sig_label_y += pad.GetBottomMargin() + 0.00375
+            if is_cl:
+                sig_label = "{:f}".format(sig * 100).rstrip("0").rstrip(".") + "%"
+            else:
+                sig_label = "{}#sigma".format(sig)
+            sig_label = r.routines.create_top_right_label(
+                sig_label,
+                pad=pad,
+                x_offset=12,
+                y=sig_label_y,
+                props={"TextSize": 18, "TextColor": colors.grey, "TextAlign": 31},
+            )
+            draw_objs.append(sig_label)
+
+    # draw verical lines at requested values
+    if v_lines:
+        for x in v_lines:
+            if x_min < x < x_max:
+                line = ROOT.TLine(x, y_min, x, y_max_line)
+                r.setup_line(
+                    line,
+                    props={"LineColor": colors.black, "LineStyle": 2, "NDC": False},
+                )
+                draw_objs.append(line)
+
+    # special case regarding color handling: when all entry names are valid keys in br_hh_colors,
+    # replace the default color sequence to deterministically assign same colors to channels
+    _color_sequence = color_sequence
+    if all(d["name"] in br_hh_colors.root for d in data):
+        _color_sequence = [br_hh_colors.root[d["name"]] for d in data]
+
+    # perform scans and draw nll curves
+    parameter_ranges = OrderedDict()
+    g_nlls = []
+    for d, scan, col, ms in zip(data, scans, _color_sequence, marker_sequence):
+        if not scan:
+            warn("1D likelihood evaluation failed for entry '{}'".format(d["name"]))
+
+        # draw the curve
+        g_nll = create_tgraph(
+            len(d["values"][poi]),
+            d["values"][poi],
+            d["values"]["dnll2"],
+        )
+        r.setup_graph(
+            g_nll,
+            props={"LineWidth": 2, "MarkerStyle": ms, "MarkerSize": 1.2},
+            color=colors[col],
+        )
+        draw_objs.append((g_nll, "SAME,CP" if show_points else "SAME,C"))
+        g_nlls.append(g_nll)
+
+        # legend entry with optional best fit value
+        g_label = expand_hh_channel_label(d["name"])
+        if scan and show_best_fit:
+            if show_best_fit_error:
+                bf_label = scan.num_min.str(
+                    format="%.2f",
+                    style="root",
+                    force_asymmetric=True,
+                    styles={"space": ""},
+                )
+            else:
+                bf_label = "{:.2f}".format(scan.num_min())
+            if g_label:
+                g_label += ", {}".format(bf_label)
+            else:
+                g_label = "{} = {}".format(to_root_latex(poi_data[poi].label), bf_label)
+        legend_entries.append((g_nll, g_label, "LP" if show_points else "L"))
+
+        # vertical line denoting the best fit value
+        if show_best_fit_line and scan and (x_min <= scan.poi_min <= x_max):
+            line_fit = ROOT.TLine(scan.poi_min, y_min, scan.poi_min, y_max_line)
+            r.setup_line(
+                line_fit,
+                props={"LineWidth": 2, "NDC": False},
+                color=colors[col],
+            )
+            draw_objs.append(line_fit)
+
+        # store parameter ranges
+        key = poi
+        if d["name"]:
+            key += "__{}".format(d["name"])
+        parameter_ranges[key] = scan["summary"] if scan else None
+
+    # theory prediction with uncertainties
+    if theory_value:
+        has_thy_err = len(theory_value) == 3
+        if has_thy_err:
+            # theory graph
+            g_thy = create_tgraph(
+                1,
+                theory_value[0],
+                y_min,
+                theory_value[2],
+                theory_value[1],
+                0,
+                y_max_line - y_min,
+            )
+            r.setup_graph(
+                g_thy,
+                props={"LineColor": colors.red, "FillStyle": 1001, "FillColor": colors.red_trans_50},
+            )
+            draw_objs.append((g_thy, "SAME,02"))
+            legend_entries.append((g_thy, "Standard Model", "LF"))
+        # theory line
+        line_thy = ROOT.TLine(theory_value[0], y_min, theory_value[0], y_max_line)
+        r.setup_line(line_thy, props={"NDC": False}, color=colors.red)
+        draw_objs.append(line_thy)
+        if not has_thy_err:
+            legend_entries.append((line_thy, "Standard Model", "L"))
+
+    # fill hep data
+    if hep_data:
+        # scan value as independent variable
+        scan_values = sorted(set(chain(*(map(float, d["values"][poi]) for d in data))))
+        hdt.create_independent_variable(
+            poi_data[poi].label,
+            parent=hep_data,
+            values=[Number(v, default_format=-2) for v in scan_values],
+        )
+
+        # dnll2 values as dependent variables
+        for d, g_nll in zip(data, g_nlls):
+            label = r"$-2\Delta\log(L)$"
+            if d.get("name"):
+                label += ", " + d["name"]
+            hdt.create_dependent_variable_from_graph(
+                g_nll,
+                x_values=scan_values,
+                parent=hep_data,
+                label=label,
+                rounding_method=-2,
+                transform=(lambda i, x, y, err: (x, max(y, 0.0), err)),
+            )
+
+    # legend
+    legend_cols = min(int(math.ceil(len(legend_entries) / 4.0)), 3)
+    legend_rows = int(math.ceil(len(legend_entries) / float(legend_cols)))
+    legend = r.routines.create_legend(
+        pad=pad,
+        width=legend_cols * 210,
+        n=legend_rows,
+        props={"NColumns": legend_cols, "TextSize": 18},
+    )
+    r.fill_legend(legend, legend_entries)
+    draw_objs.append(legend)
+    legend_box = r.routines.create_legend_box(
+        legend,
+        pad,
+        "trl",
+        props={"LineWidth": 0, "FillColor": colors.white_trans_70},
+    )
+    draw_objs.insert(-1, legend_box)
+
+    # cms label
+    cms_labels = r.routines.create_cms_labels(
+        pad=pad,
+        postfix=cms_postfix or "",
+        layout="outside_horizontal",
+    )
+    draw_objs.extend(cms_labels)
+
+    # model parameter labels
+    if model_parameters:
+        param_kwargs = {}
+        if legend_cols == 3:
+            param_kwargs["y_offset"] = 1.0 - 0.25 * pad.GetTopMargin() - legend.GetY1()
+        draw_objs.extend(create_model_parameters(model_parameters, pad, **param_kwargs))
+
+    # campaign label
+    if campaign:
+        campaign_label = to_root_latex(campaign_labels.get(campaign, campaign))
+        campaign_label = r.routines.create_top_right_label(campaign_label, pad=pad)
+        draw_objs.append(campaign_label)
+
+    # draw all objects
+    r.routines.draw_objects(draw_objs)
+
+    # save
+    r.update_canvas(canvas)
+    for path in make_list(paths):
+        canvas.SaveAs(path)
+
+    # save parameter ranges
+    if ranges_path:
+        ranges_path = prepare_output(ranges_path)
+        with open(ranges_path, "w") as f:
+            json.dump(parameter_ranges, f, indent=4)
+        print("saved parameter ranges to {}".format(ranges_path))
+
+    # save hep data
+    if hep_data_path:
+        hdt.save_hep_data(hep_data, hep_data_path)
+

 @use_style("dhi_default")
 def plot_likelihood_scan_2d(