Treatment of 0 bins for shapes

Open andrzejnovak opened this issue 3 years ago • 0 comments

Summary

I've ran into slightly opaque error messages trying to fit templates with some zeros in them

with numpy as backend

FailedMinimization: Inequality constraints incompatible

with minuit as backend

...
W FCN result is NaN for [ nan nan nan ]
...
W VariableMetricBuilder Edm is NaN; stop iterations
...
W MnPosDef Matrix forced pos-def by adding to diagonal nan

These went away after filtering the bins with 0 data out of the templates, but it would be neater to either catch them with a more transparent error or emit a warning and ignore/pad with small value automatically.

Related to #293

Additional Information

To replicate

import hist
import numpy as np
import mplhep as hep
import matplotlib.pyplot as plt
import copy
import pprint

templates = []

hall = hist.Hist(hist.axis.Regular(50, 0, 10))
for i in [3, 5, 7]:
    h = hist.Hist(
        hist.axis.Regular(50, 0, 10),
    )
    h.fill(np.random.normal(i, 0.1, 10000))
    templates.append(h)

    hall.fill(np.random.normal(i, 1, round(10000 * np.random.uniform(0.7, 1.3))))

hep.set_style("CMS")
hep.histplot(templates, stack=True, histtype="fill", label=["A", "B", "C"])
hep.histplot(
    hall, histtype="errorbar", color="black", capsize=2, yerr=True, label="Total"
)
plt.legend()

def freefit(data, tmps, debug=True):
    N = len(tmps)

    import pyhf

    spec = {
        "channels": [
            {
                "name": "some_region",
                "samples": [],
            }
        ],
        "measurements": [
            {
                "config": {"parameters": [], "poi": "normalization_2"},
                "name": "N_free_floating",
            }
        ],
        "observations": [{"data": [], "name": "some_region"}],
        "version": "1.0.0",
    }

    # fill in data
    for i in range(N):
        spec["channels"][0]["samples"].append(
            {
                "data": [],
                "modifiers": [
                    {
                        "data": None,
                        "name": "normalization_{}".format(i),
                        "type": "normfactor",
                    }
                ],
                "name": str(i),
            }
        )
        spec["channels"][0]["samples"][i]["data"] = list(tmps[i])
    spec["observations"][0]["data"] = list(data)

    if debug:
        pprint.pprint(spec, compact=True)

    ws = pyhf.Workspace(spec)
    model = ws.model()
    data = ws.data(model)
    return pyhf.infer.mle.fit(data, model)


temps = [list(templates[i].to_numpy()[0]) for i in range(len(templates))]
data = list(hall.to_numpy()[0])

import pyhf

pyhf.set_backend("numpy")

fit_results = freefit(data, temps, debug=1)
fit_results

Generating the templates with std=1 ~ normal width works fines

Code of Conduct

[X] I agree to follow the Code of Conduct

Apr 27 '22 19:04 andrzejnovak