diff --git a/pyhf/__init__.py b/pyhf/__init__.py index 8c47e48be6..5bd16e5e80 100644 --- a/pyhf/__init__.py +++ b/pyhf/__init__.py @@ -1,6 +1,7 @@ from . import tensor, optimize from .version import __version__ from . import events + tensorlib = tensor.numpy_backend() default_backend = tensorlib optimizer = optimize.scipy_optimizer() @@ -49,24 +50,36 @@ def set_backend(backend, custom_optimizer=None): optimizer_changed = False if backend.name == 'tensorflow': - new_optimizer = custom_optimizer if custom_optimizer else optimize.tflow_optimizer(backend) + new_optimizer = ( + custom_optimizer if custom_optimizer else optimize.tflow_optimizer(backend) + ) if tensorlib.name == 'tensorflow': tensorlib_changed |= bool(backend.session != tensorlib.session) elif backend.name == 'pytorch': - new_optimizer = custom_optimizer if custom_optimizer else optimize.pytorch_optimizer(tensorlib=backend) + new_optimizer = ( + custom_optimizer + if custom_optimizer + else optimize.pytorch_optimizer(tensorlib=backend) + ) # TODO: Add support for mxnet_optimizer() # elif tensorlib.name == 'mxnet': # new_optimizer = custom_optimizer if custom_optimizer else mxnet_optimizer() else: - new_optimizer = custom_optimizer if custom_optimizer else optimize.scipy_optimizer() + new_optimizer = ( + custom_optimizer if custom_optimizer else optimize.scipy_optimizer() + ) optimizer_changed = bool(optimizer != new_optimizer) # set new backend tensorlib = backend optimizer = new_optimizer # trigger events - if tensorlib_changed: events.trigger("tensorlib_changed")() - if optimizer_changed: events.trigger("optimizer_changed")() + if tensorlib_changed: + events.trigger("tensorlib_changed")() + if optimizer_changed: + events.trigger("optimizer_changed")() + from .pdf import Model + __all__ = ['Model', 'utils', 'modifiers', '__version__'] diff --git a/pyhf/commandline.py b/pyhf/commandline.py index b47c1fb4b2..419a814413 100644 --- a/pyhf/commandline.py +++ b/pyhf/commandline.py @@ -1,6 +1,4 @@ import logging -logging.basicConfig() -log = logging.getLogger(__name__) import click import json @@ -14,16 +12,29 @@ from .pdf import Model from .version import __version__ +logging.basicConfig() +log = logging.getLogger(__name__) + @click.group(context_settings=dict(help_option_names=['-h', '--help'])) @click.version_option(version=__version__) def pyhf(): pass + @pyhf.command() @click.argument('entrypoint-xml', type=click.Path(exists=True)) -@click.option('--basedir', help='The base directory for the XML files to point relative to.', type=click.Path(exists=True), default=os.getcwd()) -@click.option('--output-file', help='The location of the output json file. If not specified, prints to screen.', default=None) +@click.option( + '--basedir', + help='The base directory for the XML files to point relative to.', + type=click.Path(exists=True), + default=os.getcwd(), +) +@click.option( + '--output-file', + help='The location of the output json file. If not specified, prints to screen.', + default=None, +) @click.option('--track-progress/--hide-progress', default=True) def xml2json(entrypoint_xml, basedir, output_file, track_progress): """ Entrypoint XML: The top-level XML file for the PDF definition. """ @@ -36,6 +47,7 @@ def xml2json(entrypoint_xml, basedir, output_file, track_progress): log.debug("Written to {0:s}".format(output_file)) sys.exit(0) + @pyhf.command() @click.argument('workspace', default='-') @click.argument('xmlfile', default='-') @@ -45,12 +57,19 @@ def json2xml(workspace, xmlfile, specroot, dataroot): with click.open_file(workspace, 'r') as specstream: d = json.load(specstream) with click.open_file(xmlfile, 'w') as outstream: - outstream.write(writexml.writexml(d, specroot, dataroot,'').decode('utf-8')) + outstream.write( + writexml.writexml(d, specroot, dataroot, '').decode('utf-8') + ) sys.exit(0) + @pyhf.command() @click.argument('workspace', default='-') -@click.option('--output-file', help='The location of the output json file. If not specified, prints to screen.', default=None) +@click.option( + '--output-file', + help='The location of the output json file. If not specified, prints to screen.', + default=None, +) @click.option('--measurement', default=None) @click.option('-p', '--patch', multiple=True) @click.option('--qualify-names/--no-qualify-names', default=False) @@ -60,10 +79,14 @@ def cls(workspace, output_file, measurement, qualify_names, patch): measurements = d['toplvl']['measurements'] measurement_names = [m['name'] for m in measurements] measurement_index = 0 - + log.debug('measurements defined:\n\t{0:s}'.format('\n\t'.join(measurement_names))) if measurement and measurement not in measurement_names: - log.error('no measurement by name \'{0:s}\' exists, pick from one of the valid ones above'.format(measurement)) + log.error( + 'no measurement by name \'{0:s}\' exists, pick from one of the valid ones above'.format( + measurement + ) + ) sys.exit(1) else: if not measurement and len(measurements) > 1: @@ -72,16 +95,27 @@ def cls(workspace, output_file, measurement, qualify_names, patch): elif measurement: measurement_index = measurement_names.index(measurement) - log.debug('calculating CLs for measurement {0:s}'.format(measurements[measurement_index]['name'])) - spec = {'channels':d['channels']} + log.debug( + 'calculating CLs for measurement {0:s}'.format( + measurements[measurement_index]['name'] + ) + ) + spec = {'channels': d['channels']} for p in patch: with click.open_file(p, 'r') as read_file: p = jsonpatch.JsonPatch(json.loads(read_file.read())) spec = p.apply(spec) - p = Model(spec, poiname=measurements[measurement_index]['config']['poi'], qualify_names=qualify_names) - observed = sum((d['data'][c] for c in p.config.channels),[]) + p.config.auxdata + p = Model( + spec, + poiname=measurements[measurement_index]['config']['poi'], + qualify_names=qualify_names, + ) + observed = sum((d['data'][c] for c in p.config.channels), []) + p.config.auxdata result = runOnePoint(1.0, observed, p) - result = {'CLs_obs': result[-2].tolist()[0], 'CLs_exp': result[-1].ravel().tolist()} + result = { + 'CLs_obs': result[-2].tolist()[0], + 'CLs_exp': result[-1].ravel().tolist(), + } if output_file is None: print(json.dumps(result, indent=4, sort_keys=True)) else: diff --git a/pyhf/constraints.py b/pyhf/constraints.py index d7708717c1..eab46f8abd 100644 --- a/pyhf/constraints.py +++ b/pyhf/constraints.py @@ -1,14 +1,15 @@ from . import get_backend, default_backend from . import events + class gaussian_constraint_combined(object): - def __init__(self,pdfconfig): + def __init__(self, pdfconfig): # iterate over all constraints order doesn't matter.... self.par_indices = list(range(len(pdfconfig.suggested_init()))) self.data_indices = list(range(len(pdfconfig.auxdata))) self.parset_and_slice = [ - (pdfconfig.param_set(cname),pdfconfig.par_slice(cname)) + (pdfconfig.param_set(cname), pdfconfig.par_slice(cname)) for cname in pdfconfig.auxdata_order ] self._precompute() @@ -20,11 +21,12 @@ def _precompute(self): normal_constraint_data = [] normal_constraint_mean_indices = [] normal_constraint_sigmas = [] - for parset,parslice in self.parset_and_slice: + for parset, parslice in self.parset_and_slice: end_index = start_index + parset.n_parameters thisauxdata = self.data_indices[start_index:end_index] start_index = end_index - if not parset.pdf_type == 'normal': continue + if not parset.pdf_type == 'normal': + continue # many constraints are defined on a unit gaussian # but we reserved the possibility that a paramset @@ -34,39 +36,66 @@ def _precompute(self): try: normal_constraint_sigmas.append(parset.sigmas) except AttributeError: - normal_constraint_sigmas.append([1.]*len(thisauxdata)) + normal_constraint_sigmas.append([1.0] * len(thisauxdata)) normal_constraint_data.append(thisauxdata) normal_constraint_mean_indices.append(self.par_indices[parslice]) if normal_constraint_mean_indices: - normal_mean_idc = default_backend.concatenate(list(map(lambda x: default_backend.astensor(x,dtype = 'int'),normal_constraint_mean_indices))) - normal_sigmas = default_backend.concatenate(list(map(default_backend.astensor,normal_constraint_sigmas))) - normal_data = default_backend.concatenate(list(map(lambda x: default_backend.astensor(x,dtype = 'int'),normal_constraint_data))) - - self.normal_data = tensorlib.astensor(default_backend.tolist(normal_data),dtype = 'int') - self.normal_sigmas = tensorlib.astensor(default_backend.tolist(normal_sigmas)) - self.normal_mean_idc = tensorlib.astensor(default_backend.tolist(normal_mean_idc),dtype = 'int') + normal_mean_idc = default_backend.concatenate( + list( + map( + lambda x: default_backend.astensor(x, dtype='int'), + normal_constraint_mean_indices, + ) + ) + ) + normal_sigmas = default_backend.concatenate( + list(map(default_backend.astensor, normal_constraint_sigmas)) + ) + normal_data = default_backend.concatenate( + list( + map( + lambda x: default_backend.astensor(x, dtype='int'), + normal_constraint_data, + ) + ) + ) + + self.normal_data = tensorlib.astensor( + default_backend.tolist(normal_data), dtype='int' + ) + self.normal_sigmas = tensorlib.astensor( + default_backend.tolist(normal_sigmas) + ) + self.normal_mean_idc = tensorlib.astensor( + default_backend.tolist(normal_mean_idc), dtype='int' + ) else: - self.normal_data, self.normal_sigmas, self.normal_mean_idc = None, None, None + self.normal_data, self.normal_sigmas, self.normal_mean_idc = ( + None, + None, + None, + ) - def logpdf(self,auxdata,pars): + def logpdf(self, auxdata, pars): if self.normal_data is None: return 0 tensorlib, _ = get_backend() - normal_data = tensorlib.gather(auxdata,self.normal_data) - normal_means = tensorlib.gather(pars,self.normal_mean_idc) - normal = tensorlib.normal_logpdf(normal_data,normal_means,self.normal_sigmas) + normal_data = tensorlib.gather(auxdata, self.normal_data) + normal_means = tensorlib.gather(pars, self.normal_mean_idc) + normal = tensorlib.normal_logpdf(normal_data, normal_means, self.normal_sigmas) return tensorlib.sum(normal) + class poisson_constraint_combined(object): - def __init__(self,pdfconfig): + def __init__(self, pdfconfig): # iterate over all constraints order doesn't matter.... self.par_indices = list(range(len(pdfconfig.suggested_init()))) self.data_indices = list(range(len(pdfconfig.auxdata))) self.mod_and_slice = [ - (pdfconfig.param_set(cname),pdfconfig.par_slice(cname)) + (pdfconfig.param_set(cname), pdfconfig.par_slice(cname)) for cname in pdfconfig.auxdata_order ] self._precompute() @@ -79,11 +108,12 @@ def _precompute(self): poisson_constraint_data = [] poisson_constraint_rate_indices = [] poisson_constraint_rate_factors = [] - for parset,parslice in self.mod_and_slice: + for parset, parslice in self.mod_and_slice: end_index = start_index + parset.n_parameters thisauxdata = self.data_indices[start_index:end_index] start_index = end_index - if not parset.pdf_type == 'poisson': continue + if not parset.pdf_type == 'poisson': + continue poisson_constraint_data.append(thisauxdata) poisson_constraint_rate_indices.append(self.par_indices[parslice]) @@ -95,29 +125,62 @@ def _precompute(self): try: poisson_constraint_rate_factors.append(parset.factors) except AttributeError: - poisson_constraint_rate_factors.append(default_backend.shape(self.par_indices[parslice])) - + poisson_constraint_rate_factors.append( + default_backend.shape(self.par_indices[parslice]) + ) if poisson_constraint_rate_indices: - poisson_rate_idc = default_backend.concatenate(list(map(lambda x: default_backend.astensor(x,dtype = 'int'), poisson_constraint_rate_indices))) - poisson_rate_fac = default_backend.concatenate(list(map(lambda x: default_backend.astensor(x,dtype = 'float'), poisson_constraint_rate_factors))) - poisson_data = default_backend.concatenate(list(map(lambda x: default_backend.astensor(x,dtype = 'int'), poisson_constraint_data))) - - self.poisson_data = tensorlib.astensor(default_backend.tolist(poisson_data),dtype = 'int') - self.poisson_rate_idc = tensorlib.astensor(default_backend.tolist(poisson_rate_idc),dtype = 'int') - self.poisson_rate_fac = tensorlib.astensor(default_backend.tolist(poisson_rate_fac),dtype = 'float') + poisson_rate_idc = default_backend.concatenate( + list( + map( + lambda x: default_backend.astensor(x, dtype='int'), + poisson_constraint_rate_indices, + ) + ) + ) + poisson_rate_fac = default_backend.concatenate( + list( + map( + lambda x: default_backend.astensor(x, dtype='float'), + poisson_constraint_rate_factors, + ) + ) + ) + poisson_data = default_backend.concatenate( + list( + map( + lambda x: default_backend.astensor(x, dtype='int'), + poisson_constraint_data, + ) + ) + ) + + self.poisson_data = tensorlib.astensor( + default_backend.tolist(poisson_data), dtype='int' + ) + self.poisson_rate_idc = tensorlib.astensor( + default_backend.tolist(poisson_rate_idc), dtype='int' + ) + self.poisson_rate_fac = tensorlib.astensor( + default_backend.tolist(poisson_rate_fac), dtype='float' + ) else: - self.poisson_rate_idc, self.poisson_data, self.poisson_rate_fac = None, None, None + self.poisson_rate_idc, self.poisson_data, self.poisson_rate_fac = ( + None, + None, + None, + ) - def logpdf(self,auxdata,pars): + def logpdf(self, auxdata, pars): if self.poisson_data is None: return 0 tensorlib, _ = get_backend() - poisson_data = tensorlib.gather(auxdata,self.poisson_data) - poisson_rate_base = tensorlib.gather(pars,self.poisson_rate_idc) - poisson_factors = self.poisson_rate_fac + poisson_data = tensorlib.gather(auxdata, self.poisson_data) + poisson_rate_base = tensorlib.gather(pars, self.poisson_rate_idc) + poisson_factors = self.poisson_rate_fac poisson_rate = tensorlib.product( - tensorlib.stack([poisson_rate_base, poisson_factors]), axis=0) - poisson = tensorlib.poisson_logpdf(poisson_data,poisson_rate) + tensorlib.stack([poisson_rate_base, poisson_factors]), axis=0 + ) + poisson = tensorlib.poisson_logpdf(poisson_data, poisson_rate) return tensorlib.sum(poisson) diff --git a/pyhf/events.py b/pyhf/events.py index a7c2be6793..6190c08d74 100644 --- a/pyhf/events.py +++ b/pyhf/events.py @@ -1,7 +1,10 @@ __events = {} __disabled_events = set([]) -def noop(*args, **kwargs): pass + +def noop(*args, **kwargs): + pass + class Callables(list): def __call__(self, *args, **kwargs): @@ -11,77 +14,86 @@ def __call__(self, *args, **kwargs): def __repr__(self): return "Callables(%s)" % list.__repr__(self) -""" - - This is meant to be used as a decorator. - >>> @pyhf.events.subscribe('myevent') - ... def test(a,b): - ... print a+b - ... - >>> pyhf.events.trigger_myevent(1,2) - 3 -""" def subscribe(event): + """ + This is meant to be used as a decorator. + """ + # Example: + # + # >>> @pyhf.events.subscribe('myevent') + # ... def test(a,b): + # ... print a+b + # ... + # >>> pyhf.events.trigger_myevent(1,2) + # 3 global __events + def __decorator(func): __events.setdefault(event, Callables()).append(func) return func + return __decorator -""" +def register(event): + """ This is meant to be used as a decorator to register a function for triggering events. This creates two events: "::before" and "::after" + """ + # Examples: + # + # >>> @pyhf.events.register('test_func') + # ... def test(a,b): + # ... print a+b + # ... + # >>> @pyhf.events.subscribe('test_func::before') + # ... def precall(): + # ... print 'before call' + # ... + # >>> @pyhf.events.subscribe('test_func::after') + # ... def postcall(): + # ... print 'after call' + # ... + # >>> test(1,2) + # "before call" + # 3 + # "after call" + # >>> - >>> @pyhf.events.register('test_func') - ... def test(a,b): - ... print a+b - ... - >>> @pyhf.events.subscribe('test_func::before') - ... def precall(): - ... print 'before call' - ... - >>> @pyhf.events.subscribe('test_func::after') - ... def postcall(): - ... print 'after call' - ... - >>> test(1,2) - "before call" - 3 - "after call" - >>> - -""" -def register(event): def _register(func): def register_wrapper(*args, **kwargs): trigger("{0:s}::before".format(event))() result = func(*args, **kwargs) trigger("{0:s}::after".format(event))() return result + return register_wrapper + return _register -""" -Trigger an event if not disabled. -""" + def trigger(event): + """ + Trigger an event if not disabled. + """ global __events, __disabled_events, noop is_noop = bool(event in __disabled_events or event not in __events) return noop if is_noop else __events.get(event) -""" -Disable an event from firing. -""" + def disable(event): + """ + Disable an event from firing. + """ global __disabled_events __disabled_events.add(event) -""" -Enable an event to be fired if disabled. -""" + def enable(event): + """ + Enable an event to be fired if disabled. + """ global __disabled_events __disabled_events.remove(event) diff --git a/pyhf/interpolate.py b/pyhf/interpolate.py index 84eab19a34..324eda2825 100644 --- a/pyhf/interpolate.py +++ b/pyhf/interpolate.py @@ -1,10 +1,12 @@ import logging -log = logging.getLogger(__name__) from . import get_backend, default_backend from . import exceptions from . import events +log = logging.getLogger(__name__) + + def _slow_hfinterp_looper(histogramssets, alphasets, func): all_results = [] for histoset, alphaset in zip(histogramssets, alphasets): @@ -15,12 +17,13 @@ def _slow_hfinterp_looper(histogramssets, alphasets, func): histo_result = set_result[-1] for alpha in alphaset: alpha_result = [] - for down,nom,up in zip(histo[0],histo[1],histo[2]): + for down, nom, up in zip(histo[0], histo[1], histo[2]): v = func(down, nom, up, alpha) alpha_result.append(v) histo_result.append(alpha_result) return all_results + class _hfinterpolator_code0(object): def __init__(self, histogramssets): # nb: this should never be a tensor, store in default backend (e.g. numpy) @@ -32,57 +35,74 @@ def __init__(self, histogramssets): def _precompute(self): tensorlib, _ = get_backend() - self.deltas_up = tensorlib.astensor(self._histogramssets[:,:,2] - self._histogramssets[:,:,1]) - self.deltas_dn = tensorlib.astensor(self._histogramssets[:,:,1] - self._histogramssets[:,:,0]) + self.deltas_up = tensorlib.astensor( + self._histogramssets[:, :, 2] - self._histogramssets[:, :, 1] + ) + self.deltas_dn = tensorlib.astensor( + self._histogramssets[:, :, 1] - self._histogramssets[:, :, 0] + ) self.broadcast_helper = tensorlib.ones(self.deltas_up.shape) - self.mask_on = tensorlib.ones(self.alphasets_shape) + self.mask_on = tensorlib.ones(self.alphasets_shape) self.mask_off = tensorlib.zeros(self.alphasets_shape) def _precompute_alphasets(self, alphasets_shape): - if alphasets_shape == self.alphasets_shape: return + if alphasets_shape == self.alphasets_shape: + return tensorlib, _ = get_backend() - self.mask_on = tensorlib.ones(alphasets_shape) + self.mask_on = tensorlib.ones(alphasets_shape) self.mask_off = tensorlib.zeros(alphasets_shape) self.alphasets_shape = alphasets_shape def __call__(self, alphasets): tensorlib, _ = get_backend() self._precompute_alphasets(tensorlib.shape(alphasets)) - where_alphasets_positive = tensorlib.where(alphasets > 0, self.mask_on, self.mask_off) + where_alphasets_positive = tensorlib.where( + alphasets > 0, self.mask_on, self.mask_off + ) # s: set under consideration (i.e. the modifier) # a: alpha variation # h: histogram affected by modifier # b: bin of histogram - alphas_times_deltas_up = tensorlib.einsum('sa,shb->shab',alphasets, self.deltas_up) - alphas_times_deltas_dn = tensorlib.einsum('sa,shb->shab',alphasets, self.deltas_dn) + alphas_times_deltas_up = tensorlib.einsum( + 'sa,shb->shab', alphasets, self.deltas_up + ) + alphas_times_deltas_dn = tensorlib.einsum( + 'sa,shb->shab', alphasets, self.deltas_dn + ) - masks = tensorlib.einsum('sa,shb->shab', where_alphasets_positive, self.broadcast_helper) + masks = tensorlib.einsum( + 'sa,shb->shab', where_alphasets_positive, self.broadcast_helper + ) return tensorlib.where(masks, alphas_times_deltas_up, alphas_times_deltas_dn) + def _hfinterp_code0(histogramssets, alphasets): tensorlib, _ = get_backend() interpolator = _hfinterpolator_code0(tensorlib.tolist(histogramssets)) return interpolator(alphasets) + def _slow_hfinterp_code0(histogramssets, alphasets): def summand(down, nom, up, alpha): delta_up = up - nom delta_down = nom - down if alpha > 0: - delta = delta_up*alpha + delta = delta_up * alpha else: - delta = delta_down*alpha + delta = delta_down * alpha return delta return _slow_hfinterp_looper(histogramssets, alphasets, summand) + def _hfinterp_code1(histogramssets, alphasets): tensorlib, _ = get_backend() interpolator = _hfinterpolator_code1(tensorlib.tolist(histogramssets)) return interpolator(alphasets) + class _hfinterpolator_code1(object): def __init__(self, histogramssets): # nb: this should never be a tensor, store in default backend (e.g. numpy) @@ -94,20 +114,37 @@ def __init__(self, histogramssets): def _precompute(self): tensorlib, _ = get_backend() - self.deltas_up = tensorlib.astensor(default_backend.divide(self._histogramssets[:,:,2], self._histogramssets[:,:,1])) - self.deltas_dn = tensorlib.astensor(default_backend.divide(self._histogramssets[:,:,0], self._histogramssets[:,:,1])) + self.deltas_up = tensorlib.astensor( + default_backend.divide( + self._histogramssets[:, :, 2], self._histogramssets[:, :, 1] + ) + ) + self.deltas_dn = tensorlib.astensor( + default_backend.divide( + self._histogramssets[:, :, 0], self._histogramssets[:, :, 1] + ) + ) self.broadcast_helper = tensorlib.ones(self.deltas_up.shape) - self.bases_up = tensorlib.einsum('sa,shb->shab', tensorlib.ones(self.alphasets_shape), self.deltas_up) - self.bases_dn = tensorlib.einsum('sa,shb->shab', tensorlib.ones(self.alphasets_shape), self.deltas_dn) - self.mask_on = tensorlib.ones(self.alphasets_shape) + self.bases_up = tensorlib.einsum( + 'sa,shb->shab', tensorlib.ones(self.alphasets_shape), self.deltas_up + ) + self.bases_dn = tensorlib.einsum( + 'sa,shb->shab', tensorlib.ones(self.alphasets_shape), self.deltas_dn + ) + self.mask_on = tensorlib.ones(self.alphasets_shape) self.mask_off = tensorlib.zeros(self.alphasets_shape) def _precompute_alphasets(self, alphasets_shape): - if alphasets_shape == self.alphasets_shape: return + if alphasets_shape == self.alphasets_shape: + return tensorlib, _ = get_backend() - self.bases_up = tensorlib.einsum('sa,shb->shab', tensorlib.ones(alphasets_shape), self.deltas_up) - self.bases_dn = tensorlib.einsum('sa,shb->shab', tensorlib.ones(alphasets_shape), self.deltas_dn) - self.mask_on = tensorlib.ones(alphasets_shape) + self.bases_up = tensorlib.einsum( + 'sa,shb->shab', tensorlib.ones(alphasets_shape), self.deltas_up + ) + self.bases_dn = tensorlib.einsum( + 'sa,shb->shab', tensorlib.ones(alphasets_shape), self.deltas_dn + ) + self.mask_on = tensorlib.ones(alphasets_shape) self.mask_off = tensorlib.zeros(alphasets_shape) self.alphasets_shape = alphasets_shape return @@ -115,29 +152,33 @@ def _precompute_alphasets(self, alphasets_shape): def __call__(self, alphasets): tensorlib, _ = get_backend() self._precompute_alphasets(tensorlib.shape(alphasets)) - where_alphasets_positive = tensorlib.where(alphasets > 0, self.mask_on, self.mask_off) + where_alphasets_positive = tensorlib.where( + alphasets > 0, self.mask_on, self.mask_off + ) # s: set under consideration (i.e. the modifier) # a: alpha variation # h: histogram affected by modifier # b: bin of histogram - exponents = tensorlib.einsum('sa,shb->shab', tensorlib.abs(alphasets), self.broadcast_helper) - masks = tensorlib.einsum('sa,shb->shab', where_alphasets_positive, self.broadcast_helper) + exponents = tensorlib.einsum( + 'sa,shb->shab', tensorlib.abs(alphasets), self.broadcast_helper + ) + masks = tensorlib.einsum( + 'sa,shb->shab', where_alphasets_positive, self.broadcast_helper + ) bases = tensorlib.where(masks, self.bases_up, self.bases_dn) return tensorlib.power(bases, exponents) - - def _slow_hfinterp_code1(histogramssets, alphasets): def product(down, nom, up, alpha): - delta_up = up/nom - delta_down = down/nom + delta_up = up / nom + delta_down = down / nom if alpha > 0: - delta = delta_up**alpha + delta = delta_up ** alpha else: - delta = delta_down**(-alpha) + delta = delta_down ** (-alpha) return delta return _slow_hfinterp_looper(histogramssets, alphasets, product) @@ -145,8 +186,10 @@ def product(down, nom, up, alpha): # interpolation codes come from https://cds.cern.ch/record/1456844/files/CERN-OPEN-2012-016.pdf def interpolator(interpcode, do_tensorized_calc=True): - interpcodes = {0: _hfinterp_code0 if do_tensorized_calc else _slow_hfinterp_code0, - 1: _hfinterp_code1 if do_tensorized_calc else _slow_hfinterp_code1} + interpcodes = { + 0: _hfinterp_code0 if do_tensorized_calc else _slow_hfinterp_code0, + 1: _hfinterp_code1 if do_tensorized_calc else _slow_hfinterp_code1, + } try: return interpcodes[interpcode] diff --git a/pyhf/paramsets.py b/pyhf/paramsets.py index ddcc59c6c2..0bd81ea331 100644 --- a/pyhf/paramsets.py +++ b/pyhf/paramsets.py @@ -1,27 +1,31 @@ from . import get_backend from . import exceptions + class paramset(object): def __init__(self, **kwargs): self.n_parameters = kwargs.pop('n_parameters') self.suggested_init = kwargs.pop('inits') self.suggested_bounds = kwargs.pop('bounds') + class unconstrained(paramset): pass + class constrained_by_normal(paramset): def __init__(self, **kwargs): - super(constrained_by_normal,self).__init__(**kwargs) + super(constrained_by_normal, self).__init__(**kwargs) self.pdf_type = 'normal' self.auxdata = kwargs.pop('auxdata') def expected_data(self, pars): return pars + class constrained_by_poisson(paramset): def __init__(self, **kwargs): - super(constrained_by_poisson,self).__init__(**kwargs) + super(constrained_by_poisson, self).__init__(**kwargs) self.pdf_type = 'poisson' self.auxdata = kwargs.pop('auxdata') self.factors = kwargs.pop('factors') @@ -29,22 +33,23 @@ def __init__(self, **kwargs): def expected_data(self, pars): tensorlib, _ = get_backend() return tensorlib.product( - tensorlib.stack([pars, tensorlib.astensor(self.factors)] - ), - axis=0 + tensorlib.stack([pars, tensorlib.astensor(self.factors)]), axis=0 ) + def reduce_paramset_requirements(paramset_requirements): reduced_paramset_requirements = {} # nb: normsys and histosys have different op_codes so can't currently be shared - param_keys = ['paramset_type', - 'n_parameters', - 'op_code', - 'inits', - 'bounds', - 'auxdata', - 'factors'] + param_keys = [ + 'paramset_type', + 'n_parameters', + 'op_code', + 'inits', + 'bounds', + 'auxdata', + 'factors', + ] for param_name in list(paramset_requirements.keys()): params = paramset_requirements[param_name] @@ -56,10 +61,15 @@ def reduce_paramset_requirements(paramset_requirements): for k in param_keys: if len(combined_param[k]) != 1: - raise exceptions.InvalidNameReuse("Multiple values for '{}' ({}) were found for {}. Use unique modifier names or use qualify_names=True when constructing the pdf.".format(k, list(combined_param[k]), param_name)) + raise exceptions.InvalidNameReuse( + "Multiple values for '{}' ({}) were found for {}. Use unique modifier names or use qualify_names=True when constructing the pdf.".format( + k, list(combined_param[k]), param_name + ) + ) else: v = combined_param[k].pop() - if isinstance(v, tuple): v = list(v) + if isinstance(v, tuple): + v = list(v) combined_param[k] = v reduced_paramset_requirements[param_name] = combined_param diff --git a/pyhf/pdf.py b/pyhf/pdf.py index 5aa2350d03..a9b6b576c4 100644 --- a/pyhf/pdf.py +++ b/pyhf/pdf.py @@ -1,6 +1,5 @@ import copy import logging -log = logging.getLogger(__name__) from . import get_backend, default_backend from . import exceptions @@ -9,10 +8,11 @@ from .constraints import gaussian_constraint_combined, poisson_constraint_combined from .paramsets import reduce_paramset_requirements +log = logging.getLogger(__name__) class _ModelConfig(object): - def __init__(self, spec, poiname = 'mu', qualify_names = False): + def __init__(self, spec, poiname='mu', qualify_names=False): self.poi_index = None self.par_map = {} self.par_order = [] @@ -38,7 +38,9 @@ def __init__(self, spec, poiname = 'mu', qualify_names = False): for modifier_def in sample['modifiers']: self.parameters.append(modifier_def['name']) if qualify_names: - fullname = '{}/{}'.format(modifier_def['type'],modifier_def['name']) + fullname = '{}/{}'.format( + modifier_def['type'], modifier_def['name'] + ) if modifier_def['name'] == poiname: poiname = fullname modifier_def['name'] = fullname @@ -46,19 +48,38 @@ def __init__(self, spec, poiname = 'mu', qualify_names = False): # get the paramset requirements for the given modifier. If # modifier does not exist, we'll have a KeyError try: - paramset_requirements = modifiers.registry[modifier_def['type']].required_parset(len(sample['data'])) + paramset_requirements = modifiers.registry[ + modifier_def['type'] + ].required_parset(len(sample['data'])) except KeyError: - log.exception('Modifier not implemented yet (processing {0:s}). Available modifiers: {1}'.format(modifier_def['type'], modifiers.registry.keys())) + log.exception( + 'Modifier not implemented yet (processing {0:s}). Available modifiers: {1}'.format( + modifier_def['type'], modifiers.registry.keys() + ) + ) raise exceptions.InvalidModifier() - self.modifiers.append((modifier_def['name'],modifier_def['type'])) + self.modifiers.append((modifier_def['name'], modifier_def['type'])) # check the shareability (e.g. for shapesys for example) is_shared = paramset_requirements['is_shared'] - if not(is_shared) and modifier_def['name'] in _paramsets_requirements: - raise ValueError("Trying to add unshared-paramset but other paramsets exist with the same name.") - if is_shared and not(_paramsets_requirements.get(modifier_def['name'], [{'is_shared': True}])[0]['is_shared']): - raise ValueError("Trying to add shared-paramset but other paramset of same name is indicated to be unshared.") - _paramsets_requirements.setdefault(modifier_def['name'],[]).append(paramset_requirements) + if ( + not (is_shared) + and modifier_def['name'] in _paramsets_requirements + ): + raise ValueError( + "Trying to add unshared-paramset but other paramsets exist with the same name." + ) + if is_shared and not ( + _paramsets_requirements.get( + modifier_def['name'], [{'is_shared': True}] + )[0]['is_shared'] + ): + raise ValueError( + "Trying to add shared-paramset but other paramset of same name is indicated to be unshared." + ) + _paramsets_requirements.setdefault(modifier_def['name'], []).append( + paramset_requirements + ) self.channels = list(set(self.channels)) self.samples = list(set(self.samples)) @@ -86,34 +107,42 @@ def par_slice(self, name): def param_set(self, name): return self.par_map[name]['paramset'] - def set_poi(self,name): - if name not in [x for x,_ in self.modifiers]: - raise exceptions.InvalidModel("The paramter of interest '{0:s}' cannot be fit as it is not declared in the model specification.".format(name)) + def set_poi(self, name): + if name not in [x for x, _ in self.modifiers]: + raise exceptions.InvalidModel( + "The paramter of interest '{0:s}' cannot be fit as it is not declared in the model specification.".format( + name + ) + ) s = self.par_slice(name) - assert s.stop-s.start == 1 + assert s.stop - s.start == 1 self.poi_index = s.start def _register_paramset(self, param_name, paramset): '''allocates n nuisance parameters and stores paramset > modifier map''' - log.info('adding modifier %s (%s new nuisance parameters)', param_name, paramset.n_parameters) + log.info( + 'adding modifier %s (%s new nuisance parameters)', + param_name, + paramset.n_parameters, + ) sl = slice(self.next_index, self.next_index + paramset.n_parameters) self.next_index = self.next_index + paramset.n_parameters self.par_order.append(param_name) - self.par_map[param_name] = { - 'slice': sl, - 'paramset': paramset, - } + self.par_map[param_name] = {'slice': sl, 'paramset': paramset} def _create_and_register_paramsets(self, paramsets_requirements): - for param_name, paramset_requirements in reduce_paramset_requirements(paramsets_requirements).items(): + for param_name, paramset_requirements in reduce_paramset_requirements( + paramsets_requirements + ).items(): paramset_type = paramset_requirements.get('paramset_type') paramset = paramset_type(**paramset_requirements) self._register_paramset(param_name, paramset) + class Model(object): def __init__(self, spec, **config_kwargs): - self.spec = copy.deepcopy(spec) #may get modified by config + self.spec = copy.deepcopy(spec) # may get modified by config self.schema = config_kwargs.pop('schema', utils.get_default_schema()) # run jsonschema validation of input specification against the (provided) schema log.info("Validating spec against schema: {0:s}".format(self.schema)) @@ -123,28 +152,31 @@ def __init__(self, spec, **config_kwargs): self._create_nominal_and_modifiers() - #this is tricky, must happen before constraint - #terms try to access auxdata but after - #combined mods have been created that - #set the aux data + # this is tricky, must happen before constraint + # terms try to access auxdata but after + # combined mods have been created that + # set the aux data for k in sorted(self.config.par_map.keys()): parset = self.config.param_set(k) - if hasattr(parset,'pdf_type'): #is constrained + if hasattr(parset, 'pdf_type'): # is constrained self.config.auxdata += parset.auxdata self.config.auxdata_order.append(k) - self.constraints_gaussian = gaussian_constraint_combined(self.config) self.constraints_poisson = poisson_constraint_combined(self.config) - def _create_nominal_and_modifiers(self): default_data_makers = { - 'histosys': lambda: {'hi_data': [], 'lo_data': [], 'nom_data': [],'mask': []}, + 'histosys': lambda: { + 'hi_data': [], + 'lo_data': [], + 'nom_data': [], + 'mask': [], + }, 'normsys': lambda: {'hi': [], 'lo': [], 'nom_data': [], 'mask': []}, 'normfactor': lambda: {'mask': []}, 'shapefactor': lambda: {'mask': []}, - 'shapesys': lambda: {'mask': [], 'uncrt': [], 'nom_data' :[]}, + 'shapesys': lambda: {'mask': [], 'uncrt': [], 'nom_data': []}, 'staterror': lambda: {'mask': [], 'uncrt': [], 'nom_data': []}, } @@ -159,31 +191,39 @@ def _create_nominal_and_modifiers(self): # We don't actually set up the modifier data here for no-ops, but we do # set up the entire structure mega_mods = {} - for m,mtype in self.config.modifiers: + for m, mtype in self.config.modifiers: for s in self.config.samples: - mega_mods.setdefault(s,{})[m] = { + mega_mods.setdefault(s, {})[m] = { 'type': mtype, 'name': m, - 'data': default_data_makers[mtype]() + 'data': default_data_makers[mtype](), } # helper maps channel-name/sample-name to pairs of channel-sample structs helper = {} for c in self.spec['channels']: for s in c['samples']: - helper.setdefault(c['name'],{})[s['name']] = (c,s) + helper.setdefault(c['name'], {})[s['name']] = (c, s) mega_samples = {} for s in self.config.samples: mega_nom = [] for c in self.config.channels: - defined_samp = helper.get(c,{}).get(s) + defined_samp = helper.get(c, {}).get(s) defined_samp = None if not defined_samp else defined_samp[1] # set nominal to 0 for channel/sample if the pair doesn't exist - nom = defined_samp['data'] if defined_samp else [0.0]*self.config.channel_nbins[c] + nom = ( + defined_samp['data'] + if defined_samp + else [0.0] * self.config.channel_nbins[c] + ) mega_nom += nom - defined_mods = {x['name']:x for x in defined_samp['modifiers']} if defined_samp else {} - for m,mtype in self.config.modifiers: + defined_mods = ( + {x['name']: x for x in defined_samp['modifiers']} + if defined_samp + else {} + ) + for m, mtype in self.config.modifiers: # this is None if modifier doesn't affect channel/sample. thismod = defined_mods.get(m) if mtype == 'histosys': @@ -193,55 +233,68 @@ def _create_nominal_and_modifiers(self): mega_mods[s][m]['data']['lo_data'] += lo_data mega_mods[s][m]['data']['hi_data'] += hi_data mega_mods[s][m]['data']['nom_data'] += nom - mega_mods[s][m]['data']['mask'] += [maskval]*len(nom) #broadcasting + mega_mods[s][m]['data']['mask'] += [maskval] * len( + nom + ) # broadcasting pass elif mtype == 'normsys': maskval = True if thismod else False lo_factor = thismod['data']['lo'] if thismod else 1.0 hi_factor = thismod['data']['hi'] if thismod else 1.0 - mega_mods[s][m]['data']['nom_data'] += [1.0]*len(nom) - mega_mods[s][m]['data']['lo'] += [lo_factor]*len(nom) #broadcasting - mega_mods[s][m]['data']['hi'] += [hi_factor]*len(nom) - mega_mods[s][m]['data']['mask'] += [maskval] *len(nom) #broadcasting + mega_mods[s][m]['data']['nom_data'] += [1.0] * len(nom) + mega_mods[s][m]['data']['lo'] += [lo_factor] * len( + nom + ) # broadcasting + mega_mods[s][m]['data']['hi'] += [hi_factor] * len(nom) + mega_mods[s][m]['data']['mask'] += [maskval] * len( + nom + ) # broadcasting elif mtype in ['normfactor', 'shapefactor']: maskval = True if thismod else False - mega_mods[s][m]['data']['mask'] += [maskval]*len(nom) #broadcasting + mega_mods[s][m]['data']['mask'] += [maskval] * len( + nom + ) # broadcasting elif mtype in ['shapesys', 'staterror']: - uncrt = thismod['data'] if thismod else [0.0]*len(nom) - maskval = [True if thismod else False]*len(nom) - mega_mods[s][m]['data']['mask'] += maskval + uncrt = thismod['data'] if thismod else [0.0] * len(nom) + maskval = [True if thismod else False] * len(nom) + mega_mods[s][m]['data']['mask'] += maskval mega_mods[s][m]['data']['uncrt'] += uncrt mega_mods[s][m]['data']['nom_data'] += nom else: - raise RuntimeError('not sure how to combine {mtype} into the mega-channel'.format(mtype = mtype)) + raise RuntimeError( + 'not sure how to combine {mtype} into the mega-channel'.format( + mtype=mtype + ) + ) sample_dict = { 'name': 'mega_{}'.format(s), 'nom': mega_nom, - 'modifiers': list(mega_mods[s].values()) + 'modifiers': list(mega_mods[s].values()), } mega_samples[s] = sample_dict self.mega_mods = mega_mods - - tensorlib,_ = get_backend() + tensorlib, _ = get_backend() thenom = default_backend.astensor( [mega_samples[s]['nom'] for s in self.config.samples] ) - self.thenom = default_backend.reshape(thenom,( - 1, - len(self.config.samples), - 1, - sum(list(self.config.channel_nbins.values())) - ) + self.thenom = default_backend.reshape( + thenom, + ( + 1, + len(self.config.samples), + 1, + sum(list(self.config.channel_nbins.values())), + ), ) self.modifiers_appliers = { - k:c( - [m for m,mtype in self.config.modifiers if mtype == k], + k: c( + [m for m, mtype in self.config.modifiers if mtype == k], self.config, - mega_mods + mega_mods, ) - for k,c in modifiers.combined.items() + for k, c in modifiers.combined.items() } def expected_auxdata(self, pars): @@ -250,27 +303,32 @@ def expected_auxdata(self, pars): for parname in self.config.auxdata_order: # order matters! because we generated auxdata in a certain order thisaux = self.config.param_set(parname).expected_data( - pars[self.config.par_slice(parname)]) + pars[self.config.par_slice(parname)] + ) tocat = [thisaux] if auxdata is None else [auxdata, thisaux] auxdata = tensorlib.concatenate(tocat) return auxdata - def _modifications(self,pars): - factor_mods = ['normsys','staterror','shapesys','normfactor', 'shapefactor'] - delta_mods = ['histosys'] + def _modifications(self, pars): + factor_mods = ['normsys', 'staterror', 'shapesys', 'normfactor', 'shapefactor'] + delta_mods = ['histosys'] - deltas = list(filter(lambda x: x is not None,[ - self.modifiers_appliers[k].apply(pars) - for k in delta_mods - ])) - factors = list(filter(lambda x: x is not None,[ - self.modifiers_appliers[k].apply(pars) - for k in factor_mods - ])) + deltas = list( + filter( + lambda x: x is not None, + [self.modifiers_appliers[k].apply(pars) for k in delta_mods], + ) + ) + factors = list( + filter( + lambda x: x is not None, + [self.modifiers_appliers[k].apply(pars) for k in factor_mods], + ) + ) return deltas, factors - def expected_actualdata(self,pars): + def expected_actualdata(self, pars): """ For a single channel single sample, we compute @@ -301,14 +359,16 @@ def expected_actualdata(self,pars): allsum = tensorlib.concatenate(deltas + [tensorlib.astensor(self.thenom)]) - nom_plus_delta = tensorlib.sum(allsum,axis=0) - nom_plus_delta = tensorlib.reshape(nom_plus_delta,(1,)+tensorlib.shape(nom_plus_delta)) + nom_plus_delta = tensorlib.sum(allsum, axis=0) + nom_plus_delta = tensorlib.reshape( + nom_plus_delta, (1,) + tensorlib.shape(nom_plus_delta) + ) allfac = tensorlib.concatenate(factors + [nom_plus_delta]) - newbysample = tensorlib.product(allfac,axis=0) - newresults = tensorlib.sum(newbysample,axis=0) - return newresults[0] #only one alphas + newbysample = tensorlib.product(allfac, axis=0) + newresults = tensorlib.sum(newbysample, axis=0) + return newresults[0] # only one alphas def expected_data(self, pars, include_auxdata=True): tensorlib, _ = get_backend() @@ -318,20 +378,24 @@ def expected_data(self, pars, include_auxdata=True): if not include_auxdata: return expected_actual expected_constraints = self.expected_auxdata(pars) - tocat = [expected_actual] if expected_constraints is None else [expected_actual,expected_constraints] + tocat = ( + [expected_actual] + if expected_constraints is None + else [expected_actual, expected_constraints] + ) return tensorlib.concatenate(tocat) def constraint_logpdf(self, auxdata, pars): - normal = self.constraints_gaussian.logpdf(auxdata,pars) - poisson = self.constraints_poisson.logpdf(auxdata,pars) + normal = self.constraints_gaussian.logpdf(auxdata, pars) + poisson = self.constraints_poisson.logpdf(auxdata, pars) return normal + poisson def mainlogpdf(self, maindata, pars): tensorlib, _ = get_backend() lambdas_data = self.expected_actualdata(pars) - summands = tensorlib.poisson_logpdf(maindata, lambdas_data) - tosum = tensorlib.boolean_mask(summands,tensorlib.isfinite(summands)) - mainpdf = tensorlib.sum(tosum) + summands = tensorlib.poisson_logpdf(maindata, lambdas_data) + tosum = tensorlib.boolean_mask(summands, tensorlib.isfinite(summands)) + mainpdf = tensorlib.sum(tosum) return mainpdf def logpdf(self, pars, data): @@ -341,16 +405,19 @@ def logpdf(self, pars, data): cut = tensorlib.shape(data)[0] - len(self.config.auxdata) actual_data, aux_data = data[:cut], data[cut:] - mainpdf = self.mainlogpdf(actual_data,pars) + mainpdf = self.mainlogpdf(actual_data, pars) constraint = self.constraint_logpdf(aux_data, pars) result = mainpdf + constraint - return result * tensorlib.ones((1)) #ensure (1,) array shape also for numpy + return result * tensorlib.ones( + (1) + ) # ensure (1,) array shape also for numpy except: - log.error('eval failed for data {} pars: {}'.format( - tensorlib.tolist(data), - tensorlib.tolist(pars) - )) + log.error( + 'eval failed for data {} pars: {}'.format( + tensorlib.tolist(data), tensorlib.tolist(pars) + ) + ) raise def pdf(self, pars, data): diff --git a/pyhf/readxml.py b/pyhf/readxml.py index 64ad5fa225..05993fd578 100644 --- a/pyhf/readxml.py +++ b/pyhf/readxml.py @@ -1,11 +1,12 @@ import logging -log = logging.getLogger(__name__) import os import xml.etree.ElementTree as ET import numpy as np import tqdm +log = logging.getLogger(__name__) + def extract_error(h): """ @@ -24,8 +25,10 @@ def extract_error(h): err = h.variances if h.variances else h.numpy()[0] return np.sqrt(err).tolist() + def import_root_histogram(rootdir, filename, path, name): import uproot + # strip leading slashes as uproot doesn't use "/" for top-level path = path or '' path = path.strip('/') @@ -36,98 +39,120 @@ def import_root_histogram(rootdir, filename, path, name): try: h = f[os.path.join(path, name)] except KeyError: - raise KeyError('Both {0:s} and {1:s} were tried and not found in {2:s}'.format(name, os.path.join(path, name), os.path.join(rootdir, filename))) + raise KeyError( + 'Both {0:s} and {1:s} were tried and not found in {2:s}'.format( + name, os.path.join(path, name), os.path.join(rootdir, filename) + ) + ) return h.numpy()[0].tolist(), extract_error(h) -def process_sample(sample,rootdir,inputfile, histopath, channelname, track_progress=False): + +def process_sample( + sample, rootdir, inputfile, histopath, channelname, track_progress=False +): if 'InputFile' in sample.attrib: - inputfile = sample.attrib.get('InputFile') + inputfile = sample.attrib.get('InputFile') if 'HistoPath' in sample.attrib: histopath = sample.attrib.get('HistoPath') histoname = sample.attrib['HistoName'] - data,err = import_root_histogram(rootdir, inputfile, histopath, histoname) + data, err = import_root_histogram(rootdir, inputfile, histopath, histoname) modifiers = [] - modtags = tqdm.tqdm(sample.iter(), unit='modifier', disable=not(track_progress), total=len(sample)) + modtags = tqdm.tqdm( + sample.iter(), unit='modifier', disable=not (track_progress), total=len(sample) + ) for modtag in modtags: - modtags.set_description(' - modifier {0:s}({1:s})'.format(modtag.attrib.get('Name', 'n/a'), modtag.tag)) + modtags.set_description( + ' - modifier {0:s}({1:s})'.format( + modtag.attrib.get('Name', 'n/a'), modtag.tag + ) + ) if modtag == sample: continue if modtag.tag == 'OverallSys': - modifiers.append({ - 'name': modtag.attrib['Name'], - 'type': 'normsys', - 'data': {'lo': float(modtag.attrib['Low']), 'hi': float(modtag.attrib['High'])} - }) + modifiers.append( + { + 'name': modtag.attrib['Name'], + 'type': 'normsys', + 'data': { + 'lo': float(modtag.attrib['Low']), + 'hi': float(modtag.attrib['High']), + }, + } + ) elif modtag.tag == 'NormFactor': - modifiers.append({ - 'name': modtag.attrib['Name'], - 'type': 'normfactor', - 'data': None - }) + modifiers.append( + {'name': modtag.attrib['Name'], 'type': 'normfactor', 'data': None} + ) elif modtag.tag == 'HistoSys': - lo,_ = import_root_histogram(rootdir, - modtag.attrib.get('HistoFileLow',inputfile), - modtag.attrib.get('HistoPathLow',''), - modtag.attrib['HistoNameLow'] - ) - hi,_ = import_root_histogram(rootdir, - modtag.attrib.get('HistoFileHigh',inputfile), - modtag.attrib.get('HistoPathHigh',''), - modtag.attrib['HistoNameHigh'] - ) - modifiers.append({ - 'name': modtag.attrib['Name'], - 'type': 'histosys', - 'data': {'lo_data': lo, 'hi_data': hi} - }) + lo, _ = import_root_histogram( + rootdir, + modtag.attrib.get('HistoFileLow', inputfile), + modtag.attrib.get('HistoPathLow', ''), + modtag.attrib['HistoNameLow'], + ) + hi, _ = import_root_histogram( + rootdir, + modtag.attrib.get('HistoFileHigh', inputfile), + modtag.attrib.get('HistoPathHigh', ''), + modtag.attrib['HistoNameHigh'], + ) + modifiers.append( + { + 'name': modtag.attrib['Name'], + 'type': 'histosys', + 'data': {'lo_data': lo, 'hi_data': hi}, + } + ) elif modtag.tag == 'StatError' and modtag.attrib['Activate'] == 'True': - if modtag.attrib.get('HistoName','') == '': + if modtag.attrib.get('HistoName', '') == '': staterr = err else: - extstat,_ = import_root_histogram(rootdir, - modtag.attrib.get('HistoFile',inputfile), - modtag.attrib.get('HistoPath',''), - modtag.attrib['HistoName'] + extstat, _ = import_root_histogram( + rootdir, + modtag.attrib.get('HistoFile', inputfile), + modtag.attrib.get('HistoPath', ''), + modtag.attrib['HistoName'], ) - staterr = np.multiply(extstat,data).tolist() + staterr = np.multiply(extstat, data).tolist() if not staterr: raise RuntimeError('cannot determine stat error.') - modifiers.append({ - 'name': 'staterror_{}'.format(channelname), - 'type': 'staterror', - 'data': staterr - }) + modifiers.append( + { + 'name': 'staterror_{}'.format(channelname), + 'type': 'staterror', + 'data': staterr, + } + ) else: log.warning('not considering modifier tag %s', modtag) + return {'name': sample.attrib['Name'], 'data': data, 'modifiers': modifiers} - return { - 'name': sample.attrib['Name'], - 'data': data, - 'modifiers': modifiers - } -def process_data(sample,rootdir,inputfile, histopath): +def process_data(sample, rootdir, inputfile, histopath): if 'InputFile' in sample.attrib: - inputfile = sample.attrib.get('InputFile') + inputfile = sample.attrib.get('InputFile') if 'HistoPath' in sample.attrib: histopath = sample.attrib.get('HistoPath') histoname = sample.attrib['HistoName'] - data,_ = import_root_histogram(rootdir, inputfile, histopath, histoname) + data, _ = import_root_histogram(rootdir, inputfile, histopath, histoname) return data + def process_channel(channelxml, rootdir, track_progress=False): channel = channelxml.getroot() inputfile = channel.attrib.get('InputFile') histopath = channel.attrib.get('HistoPath') - samples = tqdm.tqdm(channel.findall('Sample'), unit='sample', disable=not(track_progress)) + samples = tqdm.tqdm( + channel.findall('Sample'), unit='sample', disable=not (track_progress) + ) data = channel.findall('Data') if data: @@ -138,27 +163,39 @@ def process_channel(channelxml, rootdir, track_progress=False): results = [] for sample in samples: - samples.set_description(' - sample {}'.format(sample.attrib.get('Name'))) - result = process_sample(sample, rootdir, inputfile, histopath, channelname, track_progress) - results.append(result) + samples.set_description(' - sample {}'.format(sample.attrib.get('Name'))) + result = process_sample( + sample, rootdir, inputfile, histopath, channelname, track_progress + ) + results.append(result) return channelname, parsed_data, results + def parse(configfile, rootdir, track_progress=False): toplvl = ET.parse(configfile) - inputs = tqdm.tqdm([x.text for x in toplvl.findall('Input')], unit='channel', disable=not(track_progress)) + inputs = tqdm.tqdm( + [x.text for x in toplvl.findall('Input')], + unit='channel', + disable=not (track_progress), + ) channels = {} for inp in inputs: inputs.set_description('Processing {}'.format(inp)) - channel, data, samples = process_channel(ET.parse(os.path.join(rootdir,inp)), rootdir, track_progress) + channel, data, samples = process_channel( + ET.parse(os.path.join(rootdir, inp)), rootdir, track_progress + ) channels[channel] = {'data': data, 'samples': samples} return { - 'toplvl':{ - 'resultprefix':toplvl.getroot().attrib['OutputFilePrefix'], - 'measurements': [{'name': x.attrib['Name'], 'config': {'poi': x.findall('POI')[0].text}} for x in toplvl.findall('Measurement')] + 'toplvl': { + 'resultprefix': toplvl.getroot().attrib['OutputFilePrefix'], + 'measurements': [ + {'name': x.attrib['Name'], 'config': {'poi': x.findall('POI')[0].text}} + for x in toplvl.findall('Measurement') + ], }, - 'channels': [{'name': k, 'samples': v['samples']} for k,v in channels.items()], - 'data': {k:v['data'] for k,v in channels.items()} + 'channels': [{'name': k, 'samples': v['samples']} for k, v in channels.items()], + 'data': {k: v['data'] for k, v in channels.items()}, } diff --git a/pyhf/simplemodels.py b/pyhf/simplemodels.py index 7cea8560fc..0d0eceabd4 100644 --- a/pyhf/simplemodels.py +++ b/pyhf/simplemodels.py @@ -1,5 +1,6 @@ from . import Model + def hepdata_like(signal_data, bkg_data, bkg_uncerts): spec = { 'channels': [ @@ -11,16 +12,20 @@ def hepdata_like(signal_data, bkg_data, bkg_uncerts): 'data': signal_data, 'modifiers': [ {'name': 'mu', 'type': 'normfactor', 'data': None} - ] + ], }, { 'name': 'background', 'data': bkg_data, 'modifiers': [ - {'name': 'uncorr_bkguncrt', 'type': 'shapesys', 'data': bkg_uncerts} - ] - } - ] + { + 'name': 'uncorr_bkguncrt', + 'type': 'shapesys', + 'data': bkg_uncerts, + } + ], + }, + ], } ] } diff --git a/pyhf/utils.py b/pyhf/utils.py index 150e3611f4..4e466ec8f8 100644 --- a/pyhf/utils.py +++ b/pyhf/utils.py @@ -76,9 +76,11 @@ def qmu(mu, data, pdf, init_pars, par_bounds): """ tensorlib, optimizer = get_backend() mubhathat = optimizer.constrained_bestfit( - loglambdav, mu, data, pdf, init_pars, par_bounds) + loglambdav, mu, data, pdf, init_pars, par_bounds + ) muhatbhat = optimizer.unconstrained_bestfit( - loglambdav, data, pdf, init_pars, par_bounds) + loglambdav, data, pdf, init_pars, par_bounds + ) qmu = loglambdav(mubhathat, data, pdf) - loglambdav(muhatbhat, data, pdf) qmu = tensorlib.where(muhatbhat[pdf.config.poi_index] > mu, [0], qmu) return qmu @@ -87,7 +89,8 @@ def qmu(mu, data, pdf, init_pars, par_bounds): def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds): _, optimizer = get_backend() bestfit_nuisance_asimov = optimizer.constrained_bestfit( - loglambdav, asimov_mu, data, pdf, init_pars, par_bounds) + loglambdav, asimov_mu, data, pdf, init_pars, par_bounds + ) return pdf.expected_data(bestfit_nuisance_asimov) @@ -151,16 +154,15 @@ def runOnePoint(muTest, data, pdf, init_pars=None, par_bounds=None): par_bounds = par_bounds or pdf.config.suggested_bounds() tensorlib, _ = get_backend() - asimov_mu = 0. - asimov_data = generate_asimov_data( - asimov_mu, data, pdf, init_pars, par_bounds) + asimov_mu = 0.0 + asimov_data = generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds) - qmu_v = tensorlib.clip( - qmu(muTest, data, pdf, init_pars, par_bounds), 0, max=None) + qmu_v = tensorlib.clip(qmu(muTest, data, pdf, init_pars, par_bounds), 0, max=None) sqrtqmu_v = tensorlib.sqrt(qmu_v) qmuA_v = tensorlib.clip( - qmu(muTest, asimov_data, pdf, init_pars, par_bounds), 0, max=None) + qmu(muTest, asimov_data, pdf, init_pars, par_bounds), 0, max=None + ) sqrtqmuA_v = tensorlib.sqrt(qmuA_v) CLsb, CLb, CLs = pvals_from_teststat(sqrtqmu_v, sqrtqmuA_v) @@ -168,7 +170,6 @@ def runOnePoint(muTest, data, pdf, init_pars=None, par_bounds=None): CLs_exp = [] for nsigma in [-2, -1, 0, 1, 2]: sqrtqmu_v_sigma = sqrtqmuA_v - nsigma - CLs_exp.append( - pvals_from_teststat(sqrtqmu_v_sigma, sqrtqmuA_v)[-1]) + CLs_exp.append(pvals_from_teststat(sqrtqmu_v_sigma, sqrtqmuA_v)[-1]) CLs_exp = tensorlib.astensor(CLs_exp) return qmu_v, qmuA_v, CLsb, CLb, CLs, CLs_exp diff --git a/pyhf/writexml.py b/pyhf/writexml.py index a93746b68c..c98d9eef8c 100644 --- a/pyhf/writexml.py +++ b/pyhf/writexml.py @@ -1,40 +1,45 @@ import os import xml.etree.cElementTree as ET -def measurement(lumi, lumierr, poi, param_settings = None, name = 'Meas1'): + +def measurement(lumi, lumierr, poi, param_settings=None, name='Meas1'): param_settings = param_settings or [] - meas = ET.Element("Measurement", Name = name, Lumi = str(lumi), LumiRelErr = str(lumierr)) - poiel = ET.Element('POI') + meas = ET.Element("Measurement", Name=name, Lumi=str(lumi), LumiRelErr=str(lumierr)) + poiel = ET.Element('POI') poiel.text = poi meas.append(poiel) for s in param_settings: - se = ET.Element('ParamSetting', **s['attrs']) + se = ET.Element('ParamSetting', **s['attrs']) se.text = ' '.join(s['params']) meas.append(se) return meas + def write_channel(channelspec, filename, data_rootdir): - #need to write channelfile here - with open(filename,'w') as f: - channel = ET.Element('Channel', Name = channelspec['name']) - channel = ET.Element('Channel', Name = channelspec['name']) - f.write(ET.tostring(channel, encoding = 'utf-8').decode('utf-8')) + # need to write channelfile here + with open(filename, 'w') as f: + channel = ET.Element('Channel', Name=channelspec['name']) + channel = ET.Element('Channel', Name=channelspec['name']) + f.write(ET.tostring(channel, encoding='utf-8').decode('utf-8')) pass -def writexml(spec, specdir, data_rootdir , result_outputprefix): - combination = ET.Element("Combination", OutputFilePrefix = result_outputprefix) +def writexml(spec, specdir, data_rootdir, result_outputprefix): + combination = ET.Element("Combination", OutputFilePrefix=result_outputprefix) for c in spec['channels']: - channelfilename = os.path.join(specdir,'channel_{}.xml'.format(c['name'])) - write_channel(c,channelfilename,data_rootdir) - inp = ET.Element("Input") + channelfilename = os.path.join(specdir, 'channel_{}.xml'.format(c['name'])) + write_channel(c, channelfilename, data_rootdir) + inp = ET.Element("Input") inp.text = channelfilename combination.append(inp) - - m = measurement(1,0.1,'SigXsecOverSM',[{'attrs': {'Const': 'True'}, 'params': ['Lumi' 'alpha_syst1']}]) + m = measurement( + 1, + 0.1, + 'SigXsecOverSM', + [{'attrs': {'Const': 'True'}, 'params': ['Lumi' 'alpha_syst1']}], + ) combination.append(m) - return ET.tostring(combination, encoding = 'utf-8') - + return ET.tostring(combination, encoding='utf-8')