Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ticket/PSB-167: Add quality to metadata table #2712

Merged
merged 2 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
extend-ignore = E203
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,28 @@
from allensdk.brain_observatory.behavior.behavior_session import (
BehaviorSession,
)
from allensdk.brain_observatory.ecephys._probe import ProbeWithLFPMeta
from allensdk.brain_observatory.ecephys.behavior_ecephys_session import (
BehaviorEcephysSession,
)
from allensdk.brain_observatory.ecephys._probe import ProbeWithLFPMeta
from allensdk.core.dataframe_utils import (
enforce_df_int_typing,
return_one_dataframe_row_only,
)

INTEGER_COLUMNS = [
"prior_exposures_to_image_set",
"ecephys_session_id",
"unit_count",
"probe_count",
"channel_count",
]

class VisualBehaviorNeuropixelsProjectCloudApi(ProjectCloudApiBase):

class VisualBehaviorNeuropixelsProjectCloudApi(ProjectCloudApiBase):
MANIFEST_COMPATIBILITY = ["0.1.0", "10.0.0"]

def _load_manifest_tables(self):

self._get_ecephys_session_table()
self._get_behavior_session_table()
self._get_unit_table()
Expand All @@ -44,36 +54,22 @@ def get_behavior_session(
-------
BehaviorSession
"""
row = self._behavior_session_table.query(
f"behavior_session_id=={behavior_session_id}"
row = return_one_dataframe_row_only(
input_table=self._behavior_session_table,
index_value=behavior_session_id,
table_name="behavior_session_table",
)
if row.shape[0] != 1:
raise RuntimeError(
"The behavior_session_table should have "
"1 and only 1 entry for a given "
"behavior_session_id. For "
f"{behavior_session_id} "
f" there are {row.shape[0]} entries."
)

row = row.squeeze()
ecephys_session_id = row.ecephys_session_id
# If a file_id for the behavior session is not set, attempt to load
# an associated ecephys session.
if row[self.cache.file_id_column] < 0 or np.isnan(
row[self.cache.file_id_column]
):
row = self._ecephys_session_table.query(
f"index=={ecephys_session_id}"
)

if len(row) == 0:
raise RuntimeError(
f"behavior_session: {behavior_session_id} "
f"corresponding to "
f"ecephys_session: {ecephys_session_id}"
f"does not exist in the behavior_session "
"or ecephys_session tables."
row = return_one_dataframe_row_only(
input_table=self._ecephys_session_table,
index_value=ecephys_session_id,
table_name="ecephys_session_table",
)

file_id = str(int(row[self.cache.file_id_column]))
Expand All @@ -84,7 +80,6 @@ def get_behavior_session(
def get_ecephys_session(
self, ecephys_session_id: int
) -> BehaviorEcephysSession:

"""get a BehaviorEcephysSession by specifying ecephys_session_id

Parameters
Expand All @@ -97,21 +92,15 @@ def get_ecephys_session(
BehaviorEcephysSession

"""
session_meta = self._ecephys_session_table.query(
f"index=={ecephys_session_id}"
session_meta = return_one_dataframe_row_only(
input_table=self._ecephys_session_table,
index_value=ecephys_session_id,
table_name="ecephys_session_table",
)
probes_meta = self._probe_table[
(self._probe_table["ecephys_session_id"] == ecephys_session_id)
& (self._probe_table["has_lfp_data"])
]
if session_meta.shape[0] != 1:
raise RuntimeError(
"The behavior_ecephys_session_table should "
"have 1 and only 1 entry for a given "
f"ecephys_session_id. For "
f"{ecephys_session_id} "
f" there are {session_meta.shape[0]} entries."
)
session_file_id = str(int(session_meta[self.cache.file_id_column]))
session_data_path = self._get_data_path(file_id=session_file_id)

Expand All @@ -133,10 +122,9 @@ def f():
probe_meta = {
p.name: ProbeWithLFPMeta(
lfp_csd_filepath=make_lazy_load_filepath_function(
file_id=str(int(getattr(
p, self.cache.file_id_column)))
),
lfp_sampling_rate=p.lfp_sampling_rate
file_id=str(int(getattr(p, self.cache.file_id_column)))
),
lfp_sampling_rate=p.lfp_sampling_rate,
)
for p in probes_meta.itertuples(index=False)
}
Expand All @@ -149,6 +137,7 @@ def f():
def _get_ecephys_session_table(self):
session_table_path = self._get_metadata_path(fname="ecephys_sessions")
df = pd.read_csv(session_table_path)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, use_pandas_type=True)
self._ecephys_session_table = df.set_index("ecephys_session_id")

def get_ecephys_session_table(self) -> pd.DataFrame:
Expand All @@ -161,6 +150,7 @@ def get_ecephys_session_table(self) -> pd.DataFrame:
def _get_behavior_session_table(self):
session_table_path = self._get_metadata_path(fname="behavior_sessions")
df = pd.read_csv(session_table_path)
df = enforce_df_int_typing(df, INTEGER_COLUMNS, use_pandas_type=True)
self._behavior_session_table = df.set_index("behavior_session_id")

def get_behavior_session_table(self) -> pd.DataFrame:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,23 @@
from allensdk.brain_observatory.behavior.behavior_session import (
BehaviorSession,
)
from allensdk.core.utilities import literal_col_eval
from allensdk.core.dataframe_utils import (
enforce_df_int_typing
enforce_df_int_typing,
return_one_dataframe_row_only,
)
from allensdk.core.utilities import literal_col_eval

COL_EVAL_LIST = ["ophys_experiment_id", "ophys_container_id", "driver_line"]
INTEGER_COLUMNS = ["session_number", "prior_exposures_to_image_set",
"ophys_session_id", "imaging_plane_group_count",
"imaging_plane_group", "targeted_areas",
"num_depths_per_area", "num_targeted_structures"]
INTEGER_COLUMNS = [
"session_number",
"prior_exposures_to_image_set",
"ophys_session_id",
"imaging_plane_group_count",
"imaging_plane_group",
"targeted_areas",
"num_depths_per_area",
"num_targeted_structures",
]


def sanitize_data_columns(
Expand Down Expand Up @@ -103,23 +110,23 @@ def get_behavior_session(
from the nwb file for the first-listed ophys_experiment.

"""
row = self._behavior_session_table.query(
f"behavior_session_id=={behavior_session_id}"
row = return_one_dataframe_row_only(
input_table=self._behavior_session_table,
index_value=behavior_session_id,
table_name="behavior_session_table",
)
if row.shape[0] != 1:
raise RuntimeError(
"The behavior_session_table should have "
"1 and only 1 entry for a given "
"behavior_session_id. For "
f"{behavior_session_id} "
f" there are {row.shape[0]} entries."
)
row = row.squeeze()
has_file_id = (not pd.isna(row[self.cache.file_id_column])
and row[self.cache.file_id_column] > 0)
has_file_id = (
not pd.isna(row[self.cache.file_id_column])
and row[self.cache.file_id_column] > 0
)
if not has_file_id:
oeid = row.ophys_experiment_id[0]
row = self._ophys_experiment_table.query(f"index=={oeid}")
row = return_one_dataframe_row_only(
input_table=self._ophys_experiment_table,
index_value=oeid,
table_name="ophys_experiment_table",
)
file_id = str(int(row[self.cache.file_id_column]))
data_path = self._get_data_path(file_id=file_id)
return BehaviorSession.from_nwb_path(nwb_path=str(data_path))
Expand All @@ -139,17 +146,11 @@ def get_behavior_ophys_experiment(
BehaviorOphysExperiment

"""
row = self._ophys_experiment_table.query(
f"index=={ophys_experiment_id}"
row = return_one_dataframe_row_only(
input_table=self._ophys_experiment_table,
index_value=ophys_experiment_id,
table_name="ophys_experiment_table",
)
if row.shape[0] != 1:
raise RuntimeError(
"The behavior_ophys_experiment_table should "
"have 1 and only 1 entry for a given "
f"ophys_experiment_id. For "
f"{ophys_experiment_id} "
f" there are {row.shape[0]} entries."
)
file_id = str(int(row[self.cache.file_id_column]))
data_path = self._get_data_path(file_id=file_id)
return BehaviorOphysExperiment.from_nwb_path(str(data_path))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def __get_prior_exposure_count(
elif agg_method == "cumsum":
df["to"] = to
df_index_name = df.index.name

def cumsum(x):
return x.cumsum().shift(fill_value=0).astype("int64")

Expand All @@ -184,8 +185,7 @@ def cumsum(x):
return counts.reindex(index)


def add_experience_level_ophys(
input_df: pd.DataFrame) -> pd.DataFrame:
def add_experience_level_ophys(input_df: pd.DataFrame) -> pd.DataFrame:
"""
adds a column to ophys tables that contains a string
indicating whether a session had exposure level of Familiar,
Expand All @@ -210,36 +210,34 @@ def add_experience_level_ophys(

# do not modify in place
table = input_df.copy(deep=True)
session_number = 'session_number' \
if 'session_number' in table.columns else 'session'
session_number = (
"session_number" if "session_number" in table.columns else "session"
)

# add experience_level column with strings indicating relevant conditions
table['experience_level'] = 'None'
table["experience_level"] = "None"

session_training = table.session_type.str.startswith('TRAINING')
session_training = table.session_type.str.startswith("TRAINING")
train_indices = table[session_training].index.values
table.loc[train_indices, 'experience_level'] = 'Training'
table.loc[train_indices, "experience_level"] = "Training"

session_0123 = table[session_number].isin([0, 1, 2, 3])
familiar_indices = table[session_0123].index.values

table.loc[familiar_indices, 'experience_level'] = 'Familiar'
table.loc[familiar_indices, "experience_level"] = "Familiar"

session_456 = table[session_number].isin([4, 5, 6])
zero_prior_exp = (table.prior_exposures_to_image_set == 0)
zero_prior_exp = table.prior_exposures_to_image_set == 0

novel_indices = table[session_456
& zero_prior_exp].index.values
novel_indices = table[session_456 & zero_prior_exp].index.values

table.loc[novel_indices, 'experience_level'] = 'Novel 1'
table.loc[novel_indices, "experience_level"] = "Novel 1"

session_456 = table[session_number].isin([4, 5, 6])
nonzero_prior_exp = (table.prior_exposures_to_image_set != 0)
novel_gt_1_indices = table[
session_456
& nonzero_prior_exp].index.values
nonzero_prior_exp = table.prior_exposures_to_image_set != 0
novel_gt_1_indices = table[session_456 & nonzero_prior_exp].index.values

table.loc[novel_gt_1_indices, 'experience_level'] = 'Novel >1'
table.loc[novel_gt_1_indices, "experience_level"] = "Novel >1"

return table

Expand Down
Loading