Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Table.view_as() performance fixes #70

Merged
merged 3 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/lgdo/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def setup(level: int = logging.INFO, logger: logging.Logger | None = None) -> None:
"""Setup a colorful logging output.

If `logger` is None, sets up only the ``pygama`` logger.
If `logger` is None, sets up only the ``lgdo`` logger.

Parameters
----------
Expand All @@ -27,7 +27,7 @@ def setup(level: int = logging.INFO, logger: logging.Logger | None = None) -> No

Examples
--------
>>> from pygama import logging
>>> from lgdo import logging
>>> logging.setup(level=logging.DEBUG)
"""
handler = colorlog.StreamHandler()
Expand All @@ -36,7 +36,7 @@ def setup(level: int = logging.INFO, logger: logging.Logger | None = None) -> No
)

if logger is None:
logger = colorlog.getLogger("pygama")
logger = colorlog.getLogger("lgdo")

logger.setLevel(level)
logger.addHandler(handler)
2 changes: 2 additions & 0 deletions src/lgdo/types/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def view_as(
if self.nda.ndim == 1:
return pd.Series(self.nda, copy=False)

# if array is multi-dim, use awkward
return akpd.from_awkward(self.view_as("ak"))

if library == "np":
Expand All @@ -195,6 +196,7 @@ def view_as(
msg = "Pint does not support Awkward yet, you must view the data with_units=False"
raise ValueError(msg)

# NOTE: this is zero-copy!
return ak.Array(self.nda)

msg = f"{library} is not a supported third-party format."
Expand Down
48 changes: 27 additions & 21 deletions src/lgdo/types/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,49 +381,55 @@ def view_as(
forward physical units to the output data.
cols
a list of column names specifying the subset of the table's columns
to be added to the dataframe.
to be added to the data view structure.
prefix
The prefix to be added to the column names. Used when recursively getting the
dataframe of a table inside this table.
The prefix to be added to the column names. Used when recursively
getting the dataframe of a :class:`Table` inside this
:class:`Table`.

See Also
--------
.LGDO.view_as
"""
if cols is None:
cols = self.keys()

if library == "pd":
df = pd.DataFrame()
if cols is None:
cols = self.keys()

for col in cols:
column = self[col]
if isinstance(column, (Array, VectorOfVectors)):
tmp_ser = column.view_as("pd", with_units=with_units).rename(
prefix + str(col)
)
df = pd.DataFrame(tmp_ser) if df.empty else df.join(tmp_ser)
elif isinstance(column, Table):
tmp_df = column.view_as(
data = self[col]

if isinstance(data, Table):
log.debug(f"viewing Table {col=!r} recursively")

tmp_df = data.view_as(
"pd", with_units=with_units, prefix=f"{prefix}{col}_"
)
df = tmp_df if df.empty else df.join(tmp_df)
elif df.empty:
df[prefix + str(col)] = column.view_as("pd", with_units=with_units)
for k, v in tmp_df.items():
df[k] = v

else:
df[prefix + str(col)] = df.join(
column.view_as("pd", with_units=with_units)
log.debug(
f"viewing {type(data).__name__} column {col!r} as Pandas Series"
)
df[f"{prefix}{col}"] = data.view_as("pd", with_units=with_units)

return df

if library == "np":
msg = f"Format {library} is not supported for Tables."
msg = f"Format {library!r} is not supported for Tables."
raise TypeError(msg)

if library == "ak":
if with_units:
msg = "Pint does not support Awkward yet, you must view the data with_units=False"
raise ValueError(msg)

return ak.Array(self)
# NOTE: passing the Table directly (which inherits from a dict)
# makes it somehow really slow. Not sure why, but this could be due
# to extra LGDO fields (like "attrs")
return ak.Array({col: self[col].view_as("ak") for col in cols})

msg = f"{library} is not a supported third-party format."
msg = f"{library!r} is not a supported third-party format."
raise TypeError(msg)
Loading