Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use distributions in ML backends for better Poisson approximation #268

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
3bcae3c
Use TensorFlow Probability for distributions
matthewfeickert Sep 17, 2018
1d4c471
Don't use the Normal approximation for the NumPy backend in testing
matthewfeickert Sep 17, 2018
805b2c7
Use torch.distributions.Poisson for poisson p.m.f. approximation
matthewfeickert Sep 17, 2018
3503d0e
Use PyTorch's Poisson log_prob approximation for MXNet's poisson
matthewfeickert Sep 17, 2018
dd4568a
Add docstrings to NumPy backend poisson and normal
matthewfeickert Sep 17, 2018
01c6b81
Move examples to before Args and Returns
matthewfeickert Sep 17, 2018
dd198c0
Remove use of poisson_from_normal=True from everywhere
matthewfeickert Sep 17, 2018
bbbb1bd
Wrap json.load in with clause to safely load and close
matthewfeickert Sep 17, 2018
ead36db
Wrap click.open_file in with clause to safely load and close
matthewfeickert Sep 18, 2018
396a5dd
Split pytest into two runs given Travis memory constraints
matthewfeickert Sep 18, 2018
c816b8f
Revert back to using tf.distribution instead of tfp
matthewfeickert Sep 18, 2018
7158e61
Add names to notebook tests for visual clarity
matthewfeickert Sep 18, 2018
b02d7ae
Add a universal fix to starting backend
matthewfeickert Sep 20, 2018
94b6e8e
Improve docstring with Gamma function comment
matthewfeickert Sep 20, 2018
42f0296
update benchmark tests to use conftest backends
kratsg Sep 20, 2018
70d03c3
pip install minuit for benchmark tests
kratsg Sep 20, 2018
476e04b
Revert "use scipy.special.xlogy in poisson computation (#277)"
kratsg Sep 20, 2018
413c19e
test for poisson NaN
kratsg Sep 20, 2018
77e7151
fix up the test and we're good
kratsg Sep 20, 2018
5f371ea
more verbose pytest for logs, much nicer to debug
kratsg Sep 20, 2018
277af5b
remove conflict lines
kratsg Sep 20, 2018
503af27
fix test_tensor
kratsg Sep 20, 2018
fafe676
no verboseness
kratsg Sep 20, 2018
41e9351
Remove unused import of numpy_backend
matthewfeickert Sep 20, 2018
1c5d25e
fix up coverage
kratsg Sep 20, 2018
96b161e
add exception for InvalidMeasurement, fix up coverage
kratsg Sep 20, 2018
d1c7948
[skip ci] just removing an extraneous line
kratsg Sep 20, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ python:
- '2.7'
- '3.6'
before_install:
- pip install --upgrade pip setuptools
- pip install --upgrade pip setuptools wheel
install:
- pip install --ignore-installed -U -q -e .[tensorflow,torch,mxnet,minuit,develop] # Ensure right version of NumPy installed
- pip install --ignore-installed -U -q -e .[tensorflow,torch,mxnet,minuit,develop]
- pip freeze
script:
- pyflakes pyhf
- pytest --ignore tests/benchmarks/
- pytest -r sx --ignore tests/benchmarks/ --ignore tests/test_notebooks.py
after_success: coveralls

# always test (on both 'push' and 'pr' builds in Travis)
Expand All @@ -31,21 +32,31 @@ env:

jobs:
include:
- name: "Python 2.7 Notebook Tests"
python: '2.7'
script:
- pytest tests/test_notebooks.py
- name: "Python 3.6 Notebook Tests"
python: '3.6'
script:
- pytest tests/test_notebooks.py
- stage: benchmark
python: '3.6'
before_install:
- pip install --upgrade pip setuptools
- pip install --upgrade pip setuptools wheel
install:
- pip install --ignore-installed -U -q -e .[tensorflow,torch,mxnet,develop]
script: pytest --benchmark-sort=mean tests/benchmarks/
- pip install --ignore-installed -U -q -e .[tensorflow,torch,mxnet,minuit,develop]
- pip freeze
script: pytest -r sx --benchmark-sort=mean tests/benchmarks/
- stage: docs
python: '3.6'
before_install:
- sudo apt-get update
- sudo apt-get -qq install pandoc
- pip install --upgrade pip setuptools
- pip install --upgrade pip setuptools wheel
install:
- pip install --ignore-installed -U -q -e .[tensorflow,torch,mxnet,develop]
- pip install --ignore-installed -U -q -e .[tensorflow,torch,mxnet,minuit,develop]
- pip freeze
script:
- python -m doctest README.md
- cd docs && make html && cd -
Expand Down
115 changes: 65 additions & 50 deletions docs/examples/notebooks/histosys-pytorch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline"
]
Expand All @@ -24,36 +32,23 @@
"source": [
"import pyhf\n",
"from pyhf import Model\n",
"from pyhf.simplemodels import hepdata_like"
"from pyhf.simplemodels import hepdata_like\n",
"\n",
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" [120.0, 180.0, 100.0, 225.0]\n",
"[120.0, 180.0, 100.0, 225.0]\n",
"[[0, 10], [0, 10], [0, 10]]\n",
"['mu', 'uncorr_bkguncrt']\n",
"---\n",
"as numpy\n",
"-----\n",
"<type 'numpy.ndarray'> [-22.87785012]\n",
"---\n",
"as pytorch\n",
"-----\n",
"<class 'torch.autograd.variable.Variable'> Variable containing:\n",
"-22.8778\n",
"[torch.FloatTensor of size 1]\n",
"\n",
"---\n",
"as tensorflow\n",
"-----\n",
"<class 'tensorflow.python.framework.ops.Tensor'> [-22.877851486206055]\n"
"['mu', 'uncorr_bkguncrt']\n"
]
}
],
Expand All @@ -76,54 +71,74 @@
"\n",
"print(data)\n",
"print(par_bounds)\n",
"print(pdf.config.par_order)\n",
"\n",
"\n",
"print '---\\nas numpy\\n-----'\n",
"pyhf.tensorlib = pyhf.numpy_backend(poisson_from_normal = True)\n",
"v = pdf.logpdf(init_pars,data)\n",
"print type(v),v\n",
"\n",
"print '---\\nas pytorch\\n-----'\n",
"pyhf.tensorlib = pyhf.pytorch_backend()\n",
"v = pdf.logpdf(init_pars,data)\n",
"print type(v),v\n",
"\n",
"\n",
"print '---\\nas tensorflow\\n-----'\n",
"import tensorflow as tf\n",
"pyhf.tensorlib = pyhf.tensorflow_backend()\n",
"v = pdf.logpdf(init_pars,data)\n",
"\n",
"pyhf.tensorlib.session = tf.Session()\n",
"print type(v),pyhf.tensorlib.tolist(v)"
"print(pdf.config.par_order)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"# NumPy\n",
"<class 'numpy.ndarray'> [-23.57960517]\n",
"\n",
"# TensorFlow\n",
"<class 'tensorflow.python.framework.ops.Tensor'> Tensor(\"mul:0\", shape=(1,), dtype=float32)\n",
"\n",
"# PyTorch\n",
"<class 'torch.Tensor'> tensor([-23.5796])\n",
"\n",
"# MXNet\n",
"<class 'mxnet.ndarray.ndarray.NDArray'> \n",
"[-23.57959]\n",
"<NDArray 1 @cpu(0)>\n"
]
}
],
"source": [
"backends = [\n",
" pyhf.tensor.numpy_backend(),\n",
" pyhf.tensor.tensorflow_backend(session=tf.Session()),\n",
" pyhf.tensor.pytorch_backend(),\n",
" pyhf.tensor.mxnet_backend()\n",
"]\n",
"names = [\n",
" 'NumPy',\n",
" 'TensorFlow',\n",
" 'PyTorch',\n",
" 'MXNet'\n",
"]\n",
"\n",
"for backend,name in zip(backends,names):\n",
" print('\\n# {name}'.format(name=name))\n",
" pyhf.set_backend(backend)\n",
" v = pdf.logpdf(init_pars,data)\n",
" print(type(v), v)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"display_name": "Python 3",
"language": "python",
"name": "python2"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
Expand Down
20 changes: 11 additions & 9 deletions pyhf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,39 @@
optimizer = optimize.scipy_optimizer()
default_optimizer = optimizer


def get_backend():
"""
Get the current backend and the associated optimizer

Returns:
backend, optimizer

Example:
>>> import pyhf
>>> pyhf.get_backend()
(<pyhf.tensor.numpy_backend.numpy_backend object at 0x...>, <pyhf.optimize.opt_scipy.scipy_optimizer object at 0x...>)

Returns:
backend, optimizer
"""
global tensorlib
global optimizer
return tensorlib, optimizer

def set_backend(backend, custom_optimizer = None):

def set_backend(backend, custom_optimizer=None):
"""
Set the backend and the associated optimizer

Example:
>>> import pyhf
>>> import tensorflow as tf
>>> pyhf.set_backend(pyhf.tensor.tensorflow_backend(session=tf.Session()))

Args:
backend: One of the supported pyhf backends: NumPy,
TensorFlow, PyTorch, and MXNet

Returns:
None

Example:
>>> import pyhf
>>> import tensorflow as tf
>>> pyhf.set_backend(pyhf.tensor.tensorflow_backend(session=tf.Session()))
"""
global tensorlib
global optimizer
Expand All @@ -54,5 +55,6 @@ def set_backend(backend, custom_optimizer = None):
if custom_optimizer:
optimizer = custom_optimizer


from .pdf import Model
__all__ = ["Model", "utils", "modifiers"]
41 changes: 24 additions & 17 deletions pyhf/commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import os
import jsonpatch
import sys

from . import readxml
from . import writexml
Expand All @@ -28,36 +29,39 @@ def xml2json(entrypoint_xml, basedir, output_file, track_progress):
if output_file is None:
print(json.dumps(spec, indent=4, sort_keys=True))
else:
json.dump(spec, open(output_file, 'w+'), indent=4, sort_keys=True)
with open(output_file, 'w+') as out_file:
json.dump(spec, out_file, indent=4, sort_keys=True)
log.debug("Written to {0:s}".format(output_file))
sys.exit(0)

@pyhf.command()
@click.argument('workspace', default = '-')
@click.argument('xmlfile', default = '-')
@click.option('--specroot', default = click.Path(exists = True))
@click.option('--dataroot', default = click.Path(exists = True))
def json2xml(workspace,xmlfile,specroot,dataroot):
specstream = click.open_file(workspace)
outstream = click.open_file(xmlfile,'w')
d = json.load(specstream)

outstream.write(writexml.writexml(d,specroot,dataroot,'').decode('utf-8'))
@click.argument('workspace', default='-')
@click.argument('xmlfile', default='-')
@click.option('--specroot', default=click.Path(exists=True))
@click.option('--dataroot', default=click.Path(exists=True))
def json2xml(workspace, xmlfile, specroot, dataroot):
with click.open_file(workspace, 'r') as specstream:
d = json.load(specstream)
with click.open_file(xmlfile, 'w') as outstream:
outstream.write(writexml.writexml(d, specroot, dataroot,'').decode('utf-8'))
sys.exit(0)

@pyhf.command()
@click.argument('workspace', default = '-')
@click.argument('workspace', default='-')
@click.option('--output-file', help='The location of the output json file. If not specified, prints to screen.', default=None)
@click.option('--measurement', default=None)
@click.option('-p','--patch', multiple = True)
@click.option('-p', '--patch', multiple=True)
@click.option('--qualify-names/--no-qualify-names', default=False)
def cls(workspace, output_file, measurement, qualify_names, patch):
specstream = click.open_file(workspace)
d = json.load(specstream)
with click.open_file(workspace, 'r') as specstream:
d = json.load(specstream)
measurements = d['toplvl']['measurements']
measurement_names = [m['name'] for m in measurements]
measurement_index = 0
log.debug('measurements defined:\n\t{0:s}'.format('\n\t'.join(measurement_names)))
if measurement and measurement not in measurement_names:
log.error('no measurement by name \'{0:s}\' exists, pick from one of the valid ones above'.format(measurement))
sys.exit(1)
else:
if not measurement and len(measurements) > 1:
log.warning('multiple measurements defined. Taking the first measurement.')
Expand All @@ -68,13 +72,16 @@ def cls(workspace, output_file, measurement, qualify_names, patch):
log.debug('calculating CLs for measurement {0:s}'.format(measurements[measurement_index]['name']))
spec = {'channels':d['channels']}
for p in patch:
p = jsonpatch.JsonPatch(json.loads(click.open_file(p).read()))
with click.open_file(p, 'r') as read_file:
p = jsonpatch.JsonPatch(json.loads(read_file.read()))
spec = p.apply(spec)
p = Model(spec, poiname=measurements[measurement_index]['config']['poi'], qualify_names=qualify_names)
result = runOnePoint(1.0, sum((d['data'][c['name']] for c in d['channels']),[]) + p.config.auxdata, p)
result = {'CLs_obs': result[-2].tolist()[0], 'CLs_exp': result[-1].ravel().tolist()}
if output_file is None:
print(json.dumps(result, indent=4, sort_keys=True))
else:
json.dump(result, open(output_file, 'w+'), indent=4, sort_keys=True)
with open(output_file, 'w+') as out_file:
json.dump(result, out_file, indent=4, sort_keys=True)
log.debug("Written to {0:s}".format(output_file))
sys.exit(0)
5 changes: 5 additions & 0 deletions pyhf/exceptions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import sys

class InvalidMeasurement(Exception):
"""
InvalidMeasurement is raised when a specified measurement is invalid given the specification.
"""

class InvalidNameReuse(Exception):
pass

Expand Down
Loading