pyhf
pyhf copied to clipboard
Treatment of 0 bins for shapes
Summary
I've ran into slightly opaque error messages trying to fit templates with some zeros in them
with numpy as backend
FailedMinimization: Inequality constraints incompatible
with minuit as backend
...
W FCN result is NaN for [ nan nan nan ]
...
W VariableMetricBuilder Edm is NaN; stop iterations
...
W MnPosDef Matrix forced pos-def by adding to diagonal nan
These went away after filtering the bins with 0 data out of the templates, but it would be neater to either catch them with a more transparent error or emit a warning and ignore/pad with small value automatically.
Related to #293
Additional Information
To replicate
import hist
import numpy as np
import mplhep as hep
import matplotlib.pyplot as plt
import copy
import pprint
templates = []
hall = hist.Hist(hist.axis.Regular(50, 0, 10))
for i in [3, 5, 7]:
h = hist.Hist(
hist.axis.Regular(50, 0, 10),
)
h.fill(np.random.normal(i, 0.1, 10000))
templates.append(h)
hall.fill(np.random.normal(i, 1, round(10000 * np.random.uniform(0.7, 1.3))))
hep.set_style("CMS")
hep.histplot(templates, stack=True, histtype="fill", label=["A", "B", "C"])
hep.histplot(
hall, histtype="errorbar", color="black", capsize=2, yerr=True, label="Total"
)
plt.legend()
def freefit(data, tmps, debug=True):
N = len(tmps)
import pyhf
spec = {
"channels": [
{
"name": "some_region",
"samples": [],
}
],
"measurements": [
{
"config": {"parameters": [], "poi": "normalization_2"},
"name": "N_free_floating",
}
],
"observations": [{"data": [], "name": "some_region"}],
"version": "1.0.0",
}
# fill in data
for i in range(N):
spec["channels"][0]["samples"].append(
{
"data": [],
"modifiers": [
{
"data": None,
"name": "normalization_{}".format(i),
"type": "normfactor",
}
],
"name": str(i),
}
)
spec["channels"][0]["samples"][i]["data"] = list(tmps[i])
spec["observations"][0]["data"] = list(data)
if debug:
pprint.pprint(spec, compact=True)
ws = pyhf.Workspace(spec)
model = ws.model()
data = ws.data(model)
return pyhf.infer.mle.fit(data, model)
temps = [list(templates[i].to_numpy()[0]) for i in range(len(templates))]
data = list(hall.to_numpy()[0])
import pyhf
pyhf.set_backend("numpy")
fit_results = freefit(data, temps, debug=1)
fit_results
Generating the templates with std=1 ~ normal width works fines
Code of Conduct
- [X] I agree to follow the Code of Conduct