Merge branch 'master' into feature/qcd_hist_hook

uhh-cms · Jan 27, 2025 · 0413741 · 0413741
2 parents cb9637e + 1596340
commit 0413741
Show file tree

Hide file tree

Showing 12 changed files with 216 additions and 78 deletions.
diff --git a/hbt/calibration/default.py b/hbt/calibration/default.py
@@ -8,11 +8,16 @@
 from columnflow.calibration.cms.met import met_phi
 from columnflow.calibration.cms.jets import jec, jec_nominal, jer
 from columnflow.calibration.cms.tau import tec, tec_nominal
+from columnflow.calibration.cms.egamma import eer, eec
 from columnflow.production.cms.mc_weight import mc_weight
-from columnflow.production.cms.seeds import deterministic_event_seeds, deterministic_jet_seeds
+from columnflow.production.cms.supercluster_eta import electron_sceta
+from columnflow.production.cms.seeds import (
+    deterministic_event_seeds, deterministic_jet_seeds, deterministic_electron_seeds,
+    deterministic_photon_seeds,
+)
 from columnflow.util import maybe_import
 
-from hbt.util import IF_RUN_2
+from hbt.util import IF_RUN_2, IF_RUN_3_2022
 
 ak = maybe_import("awkward")
 
@@ -32,9 +37,13 @@
 @calibrator(
     uses={
         mc_weight, custom_deterministic_event_seeds, deterministic_jet_seeds,
+        deterministic_photon_seeds, deterministic_electron_seeds,
+        electron_sceta,
     },
     produces={
         mc_weight, custom_deterministic_event_seeds, deterministic_jet_seeds,
+        deterministic_photon_seeds, deterministic_electron_seeds,
+        electron_sceta,
     },
 )
 def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
@@ -46,12 +55,28 @@ def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
     # !! so no manual sorting needed here (but necessary if, e.g., jec is applied before)
     events = self[custom_deterministic_event_seeds](events, **kwargs)
     events = self[deterministic_jet_seeds](events, **kwargs)
+    events = self[deterministic_electron_seeds](events, **kwargs)
 
+    events = self[electron_sceta](events, **kwargs)
     if self.dataset_inst.is_data or not self.global_shift_inst.is_nominal:
         events = self[self.jec_nominal_cls](events, **kwargs)
+        # egamma scale calibrations should only be applied to data
+        # so if the global shift is not nominal, we are in the shifted case
+        # and will only execute something if it's data
+        if self.dataset_inst.is_data:
+            if self.has_dep(self.electron_scale_nominal_cls):
+                events = self[self.electron_scale_nominal_cls](events, **kwargs)
+        else:
+            if self.has_dep(self.electron_res_nominal_cls):
+                events = self[self.electron_res_nominal_cls](events, **kwargs)
     else:
         events = self[self.jec_full_cls](events, **kwargs)
         events = self[self.deterministic_jer_cls](events, **kwargs)
+        # in this block, we are in the nominal case in MC
+        if self.has_dep(self.electron_res_cls):
+            events = self[self.electron_res_cls](events, **kwargs)
+        if self.has_dep(self.electron_scale_cls):
+            events = self[self.electron_scale_cls](events, **kwargs)
 
     if self.config_inst.campaign.x.run == 2:
         events = self[self.met_phi_cls](events, **kwargs)
@@ -101,6 +126,25 @@ def default_init(self: Calibrator) -> None:
         self.config_inst.x.calib_met_phi_cls = met_phi.derive("met_phi", cls_dict={
             "met_name": met_name,
         })
+
+        # derive electron scale calibrators
+        self.config_inst.x.calib_electron_scale_cls = eec.derive("eec_full", cls_dict={
+        })
+
+        self.config_inst.x.calib_electron_scale_nominal_cls = eec.derive("eec_nominal", cls_dict={
+            "with_uncertainties": False,
+        })
+
+        # derive electron resolution calibrator
+        self.config_inst.x.calib_electron_res_cls = eer.derive("eer_full", cls_dict={
+            "deterministic_seed_index": 0,
+        })
+
+        self.config_inst.x.calib_electron_res_nominal_cls = eer.derive("eer_nominal", cls_dict={
+            "deterministic_seed_index": 0,
+            "with_uncertainties": False,
+        })
+
         # change the flag
         self.config_inst.set_aux(flag, True)
 
@@ -110,6 +154,10 @@ def default_init(self: Calibrator) -> None:
     self.tec_cls = self.config_inst.x.calib_jec_cls
     self.tec_nominal_cls = self.config_inst.x.calib_jec_cls
     self.met_phi_cls = self.config_inst.x.calib_met_phi_cls
+    self.electron_scale_cls = self.config_inst.x.calib_electron_scale_cls
+    self.electron_scale_nominal_cls = self.config_inst.x.calib_electron_scale_nominal_cls
+    self.electron_res_cls = self.config_inst.x.calib_electron_res_cls
+    self.electron_res_nominal_cls = self.config_inst.x.calib_electron_res_nominal_cls
 
     # collect derived calibrators and add them to the calibrator uses and produces
     derived_calibrators = {
@@ -119,6 +167,10 @@ def default_init(self: Calibrator) -> None:
         self.tec_cls,
         self.tec_nominal_cls,
         IF_RUN_2(self.met_phi_cls),
+        IF_RUN_3_2022(self.electron_scale_cls),
+        IF_RUN_3_2022(self.electron_scale_nominal_cls),
+        IF_RUN_3_2022(self.electron_res_cls),
+        IF_RUN_3_2022(self.electron_res_nominal_cls),
     }
     self.uses |= derived_calibrators
     self.produces |= derived_calibrators
diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
@@ -7,6 +7,7 @@
 from __future__ import annotations
 
 import os
+import re
 import itertools
 import functools
 
@@ -347,7 +348,6 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
 
         # add the dataset
         dataset = cfg.add_dataset(campaign.get_dataset(dataset_name))
-
         # add tags to datasets
         if dataset.name.startswith("data_e_"):
             dataset.add_tag({"etau", "emu_from_e", "ee"})
@@ -361,8 +361,12 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             dataset.add_tag({"has_top", "single_top", "st"})
         if dataset.name.startswith("dy_"):
             dataset.add_tag("dy")
+        if re.match(r"^dy_m50toinf_\dj_(|pt.+_)amcatnlo$", dataset.name):
+            dataset.add_tag("dy_stitched")
         if dataset.name.startswith("w_lnu_"):
             dataset.add_tag("w_lnu")
+        if re.match(r"^w_lnu_\dj_(|pt.+_)amcatnlo$", dataset.name):
+            dataset.add_tag("w_lnu_stitched")
         # datasets that are known to have no lhe info at all
         if law.util.multi_match(dataset.name, [
             r"^(ww|wz|zz)_.*pythia$",
@@ -512,10 +516,29 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             dataset.name for dataset in cfg.datasets
             if dataset.is_mc and not dataset.has_tag("signal")
         ]),
+        "backgrounds_unstitched": (backgrounds_unstitched := [
+            dataset.name for dataset in cfg.datasets
+            if (
+                dataset.is_mc and
+                not dataset.has_tag("signal") and
+                not dataset.has_tag({"dy_stitched", "w_lnu_stitched"}, mode=any)
+            )
+        ]),
         "sm_ggf": (sm_ggf_group := ["hh_ggf_hbb_htt_kl1_kt1_powheg", *backgrounds]),
-        "sm": (sm_group := ["hh_ggf_hbb_htt_kl1_kt1_powheg", "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph", *backgrounds]),
+        "sm": (sm_group := [
+            "hh_ggf_hbb_htt_kl1_kt1_powheg",
+            "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph",
+            *backgrounds,
+        ],
+        ),
+        "sm_unstitched": (sm_group_unstitched := [
+            "hh_ggf_hbb_htt_kl1_kt1_powheg",
+            "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph",
+            *backgrounds_unstitched,
+        ]),
         "sm_ggf_data": data_group + sm_ggf_group,
         "sm_data": data_group + sm_group,
+        "sm_data_unstitched": data_group + sm_group_unstitched,
         "dy": [dataset.name for dataset in cfg.datasets if dataset.has_tag("dy")],
         "w_lnu": [dataset.name for dataset in cfg.datasets if dataset.has_tag("w_lnu")],
     }
@@ -750,6 +773,12 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     corrector_kwargs = {"wp": "Medium", "wp_VSe": "VVLoose"} if run == 3 else {}
     cfg.x.tec = TECConfig(tagger=cfg.x.tau_tagger, corrector_kwargs=corrector_kwargs)
 
+    # pec config
+    from columnflow.calibration.cms.egamma import EGammaCorrectionConfig
+
+    cfg.x.eec = EGammaCorrectionConfig(correction_set="Scale")
+    cfg.x.eer = EGammaCorrectionConfig(correction_set="Smearing")
+
     # tau ID working points
     if campaign.x.version < 10:
         cfg.x.tau_id_working_points = DotDict.wrap({
@@ -1061,6 +1090,30 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     cfg.add_shift(name="e_down", id=91, type="shape")
     add_shift_aliases(cfg, "e", {"electron_weight": "electron_weight_{direction}"})
 
+    # electron shifts
+    # TODO: energy corrections are currently only available for 2022 (Jan 2025)
+    #       include them when available
+    if run == 3 and year == 2022:
+        cfg.add_shift(name="eec_up", id=92, type="shape", tags={"eec"})
+        cfg.add_shift(name="eec_down", id=93, type="shape", tags={"eec"})
+        add_shift_aliases(
+            cfg,
+            "eec",
+            {
+                "Electron.pt": "Electron.pt_scale_{direction}",
+            },
+        )
+
+        cfg.add_shift(name="eer_up", id=94, type="shape", tags={"eer"})
+        cfg.add_shift(name="eer_down", id=95, type="shape", tags={"eer"})
+        add_shift_aliases(
+            cfg,
+            "eer",
+            {
+                "Electron.pt": "Electron.pt_res_{direction}",
+            },
+        )
+
     cfg.add_shift(name="mu_up", id=100, type="shape")
     cfg.add_shift(name="mu_down", id=101, type="shape")
     add_shift_aliases(cfg, "mu", {"muon_weight": "muon_weight_{direction}"})
@@ -1123,10 +1176,10 @@ def add_external(name, value):
         if year == 2016:
             json_postfix = f"{'pre' if campaign.has_tag('preVFP') else 'post'}VFP"
         json_pog_era = f"{year}{json_postfix}_UL"
-        json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-7439b936"
+        json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-377439e8"
     elif run == 3:
         json_pog_era = f"{year}_Summer{year2}{campaign.x.postfix}"
-        json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-7439b936"
+        json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-377439e8"
     else:
         assert False
 
@@ -1184,6 +1237,13 @@ def add_external(name, value):
         add_external("muon_sf", (f"{json_mirror}/POG/MUO/{json_pog_era}/muon_Z.json.gz", "v1"))
         # electron scale factors
         add_external("electron_sf", (f"{json_mirror}/POG/EGM/{json_pog_era}/electron.json.gz", "v1"))
+
+        # TODO: electron (and photon) energy corrections and smearing are only available for 2022
+        #       include them when available
+        if year == 2022:
+            # electron energy correction and smearing
+            add_external("electron_ss", (f"{json_mirror}/POG/EGM/{json_pog_era}/electronSS.json.gz", "v1"))
+
         # tau energy correction and scale factors
         # TODO: remove tag pog mirror once integrated centrally
         json_mirror_tau_pog = "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-taupog"

diff --git a/hbt/config/variables.py b/hbt/config/variables.py
@@ -41,14 +41,6 @@ def add_variables(config: od.Config) -> None:
         x_title="Luminosity block",
         discrete_x=True,
     )
-    add_variable(
-        config,
-        name="n_jet",
-        expression="n_jet",
-        binning=(11, -0.5, 10.5),
-        x_title="Number of jets",
-        discrete_x=True,
-    )
     add_variable(
         config,
         name="n_hhbtag",
@@ -640,6 +632,15 @@ def build_hh(events, which=None):
         x_title=r"Subleading muon $\phi$",
     )
 
+    add_variable(
+        config,
+        name="njets",
+        expression=lambda events: ak.num(events.Jet["pt"], axis=1),
+        aux={"inputs": {"Jet.pt"}},
+        binning=(11, -0.5, 10.5),
+        x_title=r"Number of jets",
+    )
+
     for proc in ["hh", "tt", "dy"]:
         # outputs of the resonant pDNN at SM-like mass and spin values
         add_variable(

diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py
@@ -156,6 +156,14 @@ def electron_selection(
     return default_mask, veto_mask
 
 
+@electron_selection.init
+def electron_selection_init(self) -> None:
+    from columnflow.config_util import get_shifts_from_sources
+    if self.config_inst.campaign.x.run == 3 and self.config_inst.campaign.x.year == 2022:
+        self.shifts.update(get_shifts_from_sources(self.config_inst, "eec"))
+        self.shifts.update(get_shifts_from_sources(self.config_inst, "eer"))
+
+
 @selector(
     uses={"{Electron,TrigObj}.{pt,eta,phi}"},
     exposed=False,
@@ -345,6 +353,22 @@ def tau_selection(
     return base_mask, iso_mask
 
 
+@tau_selection.init
+def tau_selection_init(self: Selector) -> None:
+    # register tec shifts
+    self.shifts |= {
+        shift_inst.name
+        for shift_inst in self.config_inst.shifts
+        if shift_inst.has_tag("tec")
+    }
+
+    # Add columns for the right tau tagger
+    self.uses |= {
+        f"Tau.id{self.config_inst.x.tau_tagger}VS{tag}"
+        for tag in ("e", "mu", "jet")
+    }
+
+
 @selector(
     uses={"{Tau,TrigObj}.{pt,eta,phi}"},
     # shifts are declared dynamically below in tau_selection_init
@@ -414,22 +438,6 @@ def tau_trigger_matching(
     return matches
 
 
-@tau_selection.init
-def tau_selection_init(self: Selector) -> None:
-    # register tec shifts
-    self.shifts |= {
-        shift_inst.name
-        for shift_inst in self.config_inst.shifts
-        if shift_inst.has_tag("tec")
-    }
-
-    # Add columns for the right tau tagger
-    self.uses |= {
-        f"Tau.id{self.config_inst.x.tau_tagger}VS{tag}"
-        for tag in ("e", "mu", "jet")
-    }
-
-
 @selector(
     uses={
         electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching,