Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: switch permit_dask to True by default #922

Merged
merged 5 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 17 additions & 66 deletions src/coffea/nanoevents/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,7 @@
TrivialUprootOpener,
UprootSourceMapping,
)
from coffea.nanoevents.schemas import (
BaseSchema,
DelphesSchema,
NanoAODSchema,
PFNanoAODSchema,
PHYSLITESchema,
TreeMakerSchema,
)
from coffea.nanoevents.schemas import BaseSchema, NanoAODSchema
from coffea.nanoevents.util import key_to_tuple, quote, tuple_to_key, unquote

_offsets_label = quote(",!offsets")
Expand Down Expand Up @@ -240,7 +233,7 @@ def __setstate__(self, state):
def from_root(
cls,
file,
treepath="/Events",
treepath=uproot._util.unset,
entry_start=None,
entry_stop=None,
chunks_per_file=uproot._util.unset,
Expand All @@ -252,7 +245,7 @@ def from_root(
access_log=None,
iteritems_options={},
use_ak_forth=True,
permit_dask=False,
permit_dask=True,
lgray marked this conversation as resolved.
Show resolved Hide resolved
):
"""Quickly build NanoEvents from a root file

Expand Down Expand Up @@ -287,42 +280,24 @@ def from_root(
permit_dask:
Allow nanoevents to use dask as a backend.
"""

if treepath is not uproot._util.unset and not isinstance(
file, uproot.reading.ReadOnlyDirectory
):
raise ValueError(
"""Specification of treename by argument to from_root is no longer supported in coffea 2023.
Please use one of the allow types for "files" specified by uproot: https://github.com/scikit-hep/uproot5/blob/v5.1.2/src/uproot/_dask.py#L109-L132
"""
)

if (
permit_dask
and not isinstance(schemaclass, FunctionType)
and schemaclass.__dask_capable__
):
behavior = None
if schemaclass is BaseSchema:
from coffea.nanoevents.methods import base

behavior = base.behavior
elif schemaclass is NanoAODSchema:
from coffea.nanoevents.methods import nanoaod

behavior = nanoaod.behavior
elif schemaclass is PFNanoAODSchema:
from coffea.nanoevents.methods import nanoaod

behavior = nanoaod.behavior
elif schemaclass is TreeMakerSchema:
from coffea.nanoevents.methods import base, vector

behavior = {}
behavior.update(base.behavior)
behavior.update(vector.behavior)
elif schemaclass is PHYSLITESchema:
from coffea.nanoevents.methods import physlite

behavior = physlite.behavior
elif schemaclass is DelphesSchema:
from coffea.nanoevents.methods import delphes

behavior = delphes.behavior

map_schema = _map_schema_uproot(
schemaclass=schemaclass,
behavior=dict(behavior),
behavior=dict(schemaclass.behavior()),
metadata=metadata,
version="latest",
)
Expand Down Expand Up @@ -360,7 +335,7 @@ def from_root(
tree = file[treepath]
elif "<class 'uproot.rootio.ROOTDirectory'>" == str(type(file)):
raise RuntimeError(
"The file instance (%r) is an uproot3 type, but this module is only compatible with uproot4 or higher"
"The file instance (%r) is an uproot3 type, but this module is only compatible with uproot5 or higher"
% file
)
else:
Expand Down Expand Up @@ -463,33 +438,9 @@ def from_parquet(
and not isinstance(schemaclass, FunctionType)
and schemaclass.__dask_capable__
):
behavior = None
if schemaclass is BaseSchema:
from coffea.nanoevents.methods import base

behavior = base.behavior
elif schemaclass is NanoAODSchema:
from coffea.nanoevents.methods import nanoaod

behavior = nanoaod.behavior
elif schemaclass is TreeMakerSchema:
from coffea.nanoevents.methods import base, vector

behavior = {}
behavior.update(base.behavior)
behavior.update(vector.behavior)
elif schemaclass is PHYSLITESchema:
from coffea.nanoevents.methods import physlite

behavior = physlite.behavior
elif schemaclass is DelphesSchema:
from coffea.nanoevents.methods import delphes

behavior = delphes.behavior

map_schema = _map_schema_parquet(
schemaclass=schemaclass,
behavior=dict(behavior),
behavior=dict(schemaclass.behavior()),
metadata=metadata,
version="latest",
)
Expand Down Expand Up @@ -713,7 +664,7 @@ def events(self):

events = self._events()
if events is None:
behavior = dict(self._schema.behavior)
behavior = dict(self._schema.behavior())
behavior["__events_factory__"] = self
events = awkward.from_buffers(
self._schema.form,
Expand Down
4 changes: 2 additions & 2 deletions src/coffea/nanoevents/schemas/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ def __init__(self, base_form: Dict[str, Any]):
v for v in output.values()
]

@property
def behavior(self):
@classmethod
def behavior(cls):
"""Behaviors necessary to implement this schema"""
from coffea.nanoevents.methods import base, candidate

Expand Down
4 changes: 2 additions & 2 deletions src/coffea/nanoevents/schemas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ def form(self):
"""Awkward form of this schema"""
return self._form

@property
def behavior(self):
@classmethod
def behavior(cls):
"""Behaviors necessary to implement this schema"""
from coffea.nanoevents.methods import base

Expand Down
4 changes: 2 additions & 2 deletions src/coffea/nanoevents/schemas/delphes.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,8 @@ def _preprocess_branch_form(objname, form):

return output

@property
def behavior(self):
@classmethod
def behavior(cls):
"""Behaviors necessary to implement this schema"""
from coffea.nanoevents.methods import delphes

Expand Down
4 changes: 2 additions & 2 deletions src/coffea/nanoevents/schemas/nanoaod.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,8 @@ def _build_collections(self, field_names, input_contents):

return output.keys(), output.values()

@property
def behavior(self):
@classmethod
def behavior(cls):
"""Behaviors necessary to implement this schema"""
from coffea.nanoevents.methods import nanoaod

Expand Down
4 changes: 2 additions & 2 deletions src/coffea/nanoevents/schemas/pdune.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ def _build_collections(self, branch_forms):
# }
# return form

@property
def behavior(self):
@classmethod
def behavior(cls):
"""Behaviors necessary to implement this schema"""
from coffea.nanoevents.methods import pdune

Expand Down
4 changes: 2 additions & 2 deletions src/coffea/nanoevents/schemas/physlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def _create_eventindex_form(base_form, key):
}
return form

@property
def behavior(self):
@classmethod
def behavior(cls):
"""Behaviors necessary to implement this schema"""
from coffea.nanoevents.methods import physlite

Expand Down
4 changes: 2 additions & 2 deletions src/coffea/nanoevents/schemas/treemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def _build_collections(self, branch_forms):

return branch_forms

@property
def behavior(self):
@classmethod
def behavior(cls):
"""Behaviors necessary to implement this schema"""
from coffea.nanoevents.methods import base, vector

Expand Down
4 changes: 2 additions & 2 deletions tests/test_analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
fname = "tests/samples/nano_dy.root"
eagerevents = NanoEventsFactory.from_root(
{os.path.abspath(fname): "Events"},
schemaclass=NanoAODSchema.v6,
schemaclass=NanoAODSchema,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the .v6 specialization still work but is just not needed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now the .vX specializations are a little broken in their original implementation. The functional sub-classing thing you were doing doesn't quite fit into the daskiness.

metadata={"dataset": "DYJets"},
permit_dask=False,
).events()
dakevents = NanoEventsFactory.from_root(
{os.path.abspath(fname): "Events"},
schemaclass=NanoAODSchema,
metadata={"dataset": "DYJets"},
permit_dask=True,
).events()
uprootevents = uproot.dask({fname: "Events"})

Expand Down
26 changes: 18 additions & 8 deletions tests/test_nanoevents.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,11 @@ def crossref(events):
def test_read_nanomc(suffix):
path = os.path.abspath(f"tests/samples/nano_dy.{suffix}")
# parquet files were converted from even older nanoaod
nanoversion = NanoAODSchema.v6 if suffix == "root" else NanoAODSchema.v5
nanoversion = NanoAODSchema
factory = getattr(NanoEventsFactory, f"from_{suffix}")(
{path: "Events"}, schemaclass=nanoversion
{path: "Events"},
schemaclass=nanoversion,
permit_dask=False,
)
events = factory.events()

Expand Down Expand Up @@ -132,9 +134,11 @@ def test_read_from_uri(suffix):

path = Path(os.path.abspath(f"tests/samples/nano_dy.{suffix}")).as_uri()

nanoversion = NanoAODSchema.v6 if suffix == "root" else NanoAODSchema.v5
nanoversion = NanoAODSchema
factory = getattr(NanoEventsFactory, f"from_{suffix}")(
{path: "Events"}, schemaclass=nanoversion
{path: "Events"},
schemaclass=nanoversion,
permit_dask=False,
)
events = factory.events()

Expand All @@ -145,9 +149,11 @@ def test_read_from_uri(suffix):
def test_read_nanodata(suffix):
path = os.path.abspath(f"tests/samples/nano_dimuon.{suffix}")
# parquet files were converted from even older nanoaod
nanoversion = NanoAODSchema.v6 if suffix == "root" else NanoAODSchema.v5
nanoversion = NanoAODSchema
factory = getattr(NanoEventsFactory, f"from_{suffix}")(
{path: "Events"}, schemaclass=nanoversion
{path: "Events"},
schemaclass=nanoversion,
permit_dask=False,
)
events = factory.events()

Expand All @@ -158,7 +164,9 @@ def test_read_nanodata(suffix):
def test_missing_eventIds_error():
path = os.path.abspath("tests/samples/missing_luminosityBlock.root") + ":Events"
with pytest.raises(RuntimeError):
factory = NanoEventsFactory.from_root(path, schemaclass=NanoAODSchema)
factory = NanoEventsFactory.from_root(
path, schemaclass=NanoAODSchema, permit_dask=False
)
factory.events()


Expand All @@ -168,7 +176,9 @@ def test_missing_eventIds_warning():
RuntimeWarning, match=r"Missing event_ids \: \[\'luminosityBlock\'\]"
):
NanoAODSchema.error_missing_event_ids = False
factory = NanoEventsFactory.from_root(path, schemaclass=NanoAODSchema)
factory = NanoEventsFactory.from_root(
path, schemaclass=NanoAODSchema, permit_dask=False
)
factory.events()


Expand Down