fix: Report expected p-values based on their quantiles under backgrou…

…nd hypotheses (#1162) * Add two ROOT validation scripts for toys * Use same initialization parameters for b and s+b toys * Leave (expected) p-value calculation to calculators - Add pvalue and expected_pvalues to calculator API * Toy-based calculator returns expected values based on CLs distribution quantiles Co-authored-by: Matthew Feickert <[email protected]> Co-authored-by: Giordon Stark <[email protected]>
scikit-hep · Jan 20, 2021 · fed49ca · fed49ca
1 parent 0e71f2f
commit fed49ca
Show file tree

Hide file tree

Showing 11 changed files with 704 additions and 82 deletions.
diff --git a/README.rst b/README.rst
@@ -120,6 +120,7 @@ Implemented variations:
   - ☑ ShapeFactor
   - ☑ StatError
   - ☑ Lumi Uncertainty
+  - ☑ Non-asymptotic calculators
 
 Computational Backends:
   - ☑ NumPy
@@ -138,7 +139,6 @@ Todo
 ----
 
 -  ☐ StatConfig
--  ☐ Non-asymptotic calculators
 
 results obtained from this package are validated against output computed
 from HistFactory workspaces

diff --git a/docs/api.rst b/docs/api.rst
@@ -121,6 +121,7 @@ Inference
 
 .. autosummary::
    :toctree: _generated/
+   :template: modifierclass.rst
 
    test_statistics.q0
    test_statistics.qmu

diff --git a/docs/examples/notebooks/learn/UsingCalculators.ipynb b/docs/examples/notebooks/learn/UsingCalculators.ipynb
@@ -206,6 +206,66 @@
     "p_expected"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "However, these sorts of steps are somewhat time-consuming and lengthy, and depending on the calculator chosen, may differ a little bit. The calculator API also serves to harmonize the extraction of the observed $p$-values:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CL_sb = 0.08389430261678055\n",
+      "CL_b = 0.5\n",
+      "CL_s = CL_sb / CL_b = 0.1677886052335611\n"
+     ]
+    }
+   ],
+   "source": [
+    "p_sb, p_b, p_s = asymp_calc.pvalues(teststat, sb_dist, b_dist)\n",
+    "\n",
+    "print(f'CL_sb = {p_sb}')\n",
+    "print(f'CL_b = {p_b}')\n",
+    "print(f'CL_s = CL_sb / CL_b = {p_s}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "and the expected $p$-values:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "exp. CL_sb = [0.0003632946734314658, 0.008671732658283807, 0.08389430261678055, 0.3522160809113284, 0.7325868885677684]\n",
+      "exp. CL_b = [0.022750131948179195, 0.15865525393145707, 0.5, 0.8413447460685429, 0.9772498680518208]\n",
+      "exp. CL_s = CL_sb / CL_b = [0.01596890401598493, 0.05465770873260968, 0.1677886052335611, 0.4186346709326618, 0.7496413276864433]\n"
+     ]
+    }
+   ],
+   "source": [
+    "p_exp_sb, p_exp_b, p_exp_s = asymp_calc.expected_pvalues(sb_dist, b_dist)\n",
+    "\n",
+    "print(f'exp. CL_sb = {p_exp_sb}')\n",
+    "print(f'exp. CL_b = {p_exp_b}')\n",
+    "print(f'exp. CL_s = CL_sb / CL_b = {p_exp_s}')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -219,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +297,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -255,7 +315,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -264,7 +324,7 @@
        "array(1.90259087)"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -287,7 +347,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -311,7 +371,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -325,9 +385,7 @@
     }
    ],
    "source": [
-    "p_sb = sb_dist.pvalue(teststat)\n",
-    "p_b = b_dist.pvalue(teststat)\n",
-    "p_s = p_sb / p_b\n",
+    "p_sb, p_b, p_s = asymp_calc.pvalues(teststat, sb_dist, b_dist)\n",
     "\n",
     "print(f'CL_sb = {p_sb}')\n",
     "print(f'CL_b = {p_b}')\n",
@@ -343,24 +401,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[0.0, 0.047058823529411764, 0.15, 0.3758865248226951, 1.0]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "exp. CL_sb = [0.0, 0.008, 0.084, 0.318, 1.0]\n",
+      "exp. CL_b = [0.024, 0.17, 0.52, 0.846, 1.0]\n",
+      "exp. CL_s = CL_sb / CL_b = [0.0, 0.047058823529411764, 0.16153846153846155, 0.3758865248226951, 1.0]\n"
+     ]
     }
    ],
    "source": [
-    "teststat_expected = [b_dist.expected_value(i) for i in [2, 1, 0, -1, -2]]\n",
-    "p_expected = [sb_dist.pvalue(t) / b_dist.pvalue(t) for t in teststat_expected]\n",
-    "p_expected"
+    "p_exp_sb, p_exp_b, p_exp_s = asymp_calc.expected_pvalues(sb_dist, b_dist)\n",
+    "\n",
+    "print(f'exp. CL_sb = {p_exp_sb}')\n",
+    "print(f'exp. CL_b = {p_exp_b}')\n",
+    "print(f'exp. CL_s = CL_sb / CL_b = {p_exp_s}')"
    ]
   }
  ],

diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
@@ -139,59 +139,37 @@ def hypotest(
     )
 
     teststat = calc.teststatistic(poi_test)
-    sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
-
-    CLsb = sig_plus_bkg_distribution.pvalue(teststat)
-    CLb = b_only_distribution.pvalue(teststat)
-    CLs = CLsb / CLb
-
-    tensorlib, _ = get_backend()
-    # Ensure that all CL values are 0-d tensors
-    CLsb, CLb, CLs = (
-        tensorlib.astensor(CLsb),
-        tensorlib.astensor(CLb),
-        tensorlib.astensor(CLs),
+    sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(poi_test)
+
+    tb, _ = get_backend()
+    CLsb_obs, CLb_obs, CLs_obs = tuple(
+        tb.astensor(pvalue)
+        for pvalue in calc.pvalues(
+            teststat, sig_plus_bkg_distribution, bkg_only_distribution
+        )
+    )
+    CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues(
+        sig_plus_bkg_distribution, bkg_only_distribution
     )
 
     is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0'
 
-    _returns = [CLsb if is_q0 else CLs]
+    _returns = [CLsb_obs if is_q0 else CLs_obs]
     if return_tail_probs:
         if is_q0:
-            _returns.append([CLb])
+            _returns.append([CLb_obs])
         else:
-            _returns.append([CLsb, CLb])
+            _returns.append([CLsb_obs, CLb_obs])
+
+    pvalues_exp_band = [
+        tb.astensor(pvalue) for pvalue in (CLsb_exp if is_q0 else CLs_exp)
+    ]
     if return_expected_set:
-        CLs_exp = []
-        for n_sigma in [2, 1, 0, -1, -2]:
-
-            expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
-
-            if is_q0:
-                # despite the name in this case this is the discovery p value
-                CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat)
-            else:
-                CLs = sig_plus_bkg_distribution.pvalue(
-                    expected_bonly_teststat
-                ) / b_only_distribution.pvalue(expected_bonly_teststat)
-            CLs_exp.append(tensorlib.astensor(CLs))
         if return_expected:
-            _returns.append(CLs_exp[2])
-        _returns.append(CLs_exp)
+            _returns.append(tb.astensor(pvalues_exp_band[2]))
+        _returns.append(pvalues_exp_band)
     elif return_expected:
-        n_sigma = 0
-        expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
-
-        if is_q0:
-            # despite the name in this case this is the discovery p value
-            CLs = sig_plus_bkg_distribution.pvalue(expected_bonly_teststat)
-        else:
-            CLs = sig_plus_bkg_distribution.pvalue(
-                expected_bonly_teststat
-            ) / b_only_distribution.pvalue(expected_bonly_teststat)
-
-        _returns.append(tensorlib.astensor(CLs))
-
+        _returns.append(tb.astensor(pvalues_exp_band[2]))
     # Enforce a consistent return type of the observed CLs
     return tuple(_returns) if len(_returns) > 1 else _returns[0]