Skip to content

Commit

Permalink
[ENH] abstract study (#486)
Browse files Browse the repository at this point in the history
* update models and schema

* wip: try to use study endpoint as the same

* Revert "wip: try to use study endpoint as the same"

This reverts commit 77dc984.
make a new abstract_study endpoint instead

* inital add and test of abstract-study endpoint

* fix neurovault ingestion error

* rename to base_study

* add functionality for abstract studies

* fix ingestion of pmid

* fix table querying/loading
  • Loading branch information
jdkent authored Jun 19, 2023
1 parent 425c4cb commit 79dc9bf
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 21 deletions.
112 changes: 101 additions & 11 deletions store/neurostore/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import requests
from scipy import sparse
from dateutil.parser import parse as parse_date
from sqlalchemy import or_

from neurostore.database import db
from neurostore.models import (
Analysis,
Expand All @@ -20,6 +22,7 @@
Image,
Point,
Study,
BaseStudy,
Studyset,
Entity,
)
Expand All @@ -28,7 +31,7 @@

def ingest_neurovault(verbose=False, limit=20, overwrite=False):
# Store existing studies for quick lookup
all_studies = all_studies = {
all_studies = {
s.doi: s for s in Study.query.filter_by(source="neurovault").all()
}

Expand All @@ -37,16 +40,34 @@ def add_collection(data):
print("Skipping {} (already exists)...".format(data["DOI"]))
return
collection_id = data.pop("id")
doi = data.pop("DOI", None)
base_study = None
if doi:
base_study = BaseStudy.query.filter_by(doi=doi).one_or_none()

if base_study is None:
base_study = BaseStudy(
name=data.pop("name", None),
description=data.pop("description", None),
doi=data.pop("DOI", None),
authors=data.pop("authors", None),
publication=data.pop("journal_name", None),
metadata_=data,
level="group",
)
s = Study(
name=data.pop("name", None),
description=data.pop("description", None),
doi=data.pop("DOI", None),
authors=data.pop("authors", None),
publication=data.pop("journal_name", None),
name=data.pop("name", None) or base_study.name,
description=data.pop("description", None) or base_study.description,
doi=doi,
pmid=base_study.pmid,
authors=data.pop("authors", None) or base_study.authors,
publication=data.pop("journal_name", None) or base_study.publication,
source_id=collection_id,
metadata_=data,
source="neurovault",
level="group",
base_study=base_study,

)

space = data.get("coordinate_space", None)
Expand Down Expand Up @@ -104,7 +125,9 @@ def add_collection(data):
)
images.append(image)

db.session.add_all([s] + list(analyses.values()) + images + list(conditions))
db.session.add_all(
[base_study] + [s] + list(analyses.values()) + images + list(conditions)
)
db.session.commit()
all_studies[s.name] = s
return s
Expand Down Expand Up @@ -187,7 +210,61 @@ def ingest_neurosynth(max_rows=None):
for metadata_row, annotation_row in zip(
metadata.itertuples(), annotations.itertuples(index=False)
):
id_ = metadata_row.Index
base_study = None
doi = None if isinstance(metadata_row.doi, float) else metadata_row.doi
id_ = pmid = metadata_row.Index

# find an base_study based on available information
if doi is not None:
abstract_studies = BaseStudy.query.filter(
or_(BaseStudy.doi == doi, BaseStudy.pmid == pmid)
).all()

if len(abstract_studies) == 1:
base_study = abstract_studies[0]
elif len(abstract_studies) > 1:
source_base_study = abstract_studies[0]
# do not overwrite the verions column
# we want to append to this column
columns = [
c for c in source_base_study.__table__.columns if c != "versions"
]
for ab in abstract_studies[1:]:
for col in columns:
source_attr = getattr(source_base_study, col)
new_attr = getattr(ab, col)
setattr(source_base_study, col, source_attr or new_attr)
source_base_study.versions.extend(ab.versions)
# delete the extraneous record
db.session.delete(ab)

if doi is None:
base_study = BaseStudy.query.filter_by(pmid=pmid).one_or_none()

if base_study is None:
base_study = BaseStudy(
name=metadata_row.title,
doi=doi,
pmid=pmid,
authors=metadata_row.authors,
publication=metadata_row.journal,
year=metadata_row.year,
level="group",
)
else:
# try to update the abstract study if information is missing
study_info = {
"name": metadata_row.title,
"doi": doi,
"pmid": pmid,
"authors": metadata_row.authors,
"publication": metadata_row.journal,
"year": metadata_row.year,
"level": "group",
}
for col, value in study_info.items():
source_attr = getattr(base_study, col)
setattr(base_study, col, source_attr or value)
study_coord_data = coord_data.loc[[id_]]
md = {
"year": int(metadata_row.year),
Expand All @@ -201,10 +278,11 @@ def ingest_neurosynth(max_rows=None):
publication=metadata_row.journal,
metadata=md,
pmid=id_,
doi=None if isinstance(metadata_row.doi, float) else metadata_row.doi,
doi=doi,
source="neurosynth",
source_id=id_,
level="group",
base_study=base_study,
)
analyses = []
points = []
Expand Down Expand Up @@ -298,14 +376,26 @@ def ingest_neuroquery(max_rows=None):

# all_studies = {s.pmid: s for s in Study.query.filter(source="neuroquery").all()}
for id_, metadata_row in metadata.iterrows():
base_study = BaseStudy.query.filter_by(pmid=id_).one_or_none()

if base_study is None:
base_study = BaseStudy(
name=metadata_row["title"],
level="group",
pmid=id_
)
study_coord_data = coord_data.loc[[id_]]
s = Study(
name=metadata_row["title"],
metadata=dict(),
name=metadata_row["title"] or base_study.name,
source="neuroquery",
pmid=id_,
doi=base_study.doi,
year=base_study.year,
publication=base_study.publication,
authors=base_study.authors,
source_id=id_,
level="group",
base_study=base_study,
)
analyses = []
points = []
Expand Down
2 changes: 2 additions & 0 deletions store/neurostore/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .data import (
Studyset,
Annotation,
BaseStudy,
Study,
Analysis,
Condition,
Expand All @@ -16,6 +17,7 @@
__all__ = [
"Studyset",
"Annotation",
"BaseStudy",
"Study",
"Analysis",
"Condition",
Expand Down
26 changes: 25 additions & 1 deletion store/neurostore/models/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,29 @@ class AnnotationAnalysis(db.Model):
note = db.Column(MutableDict.as_mutable(JSONB))


class BaseStudy(BaseMixin, db.Model):
__tablename__ = "abstract_studies"

name = db.Column(db.String)
description = db.Column(db.String)
publication = db.Column(db.String)
doi = db.Column(db.String, nullable=True)
pmid = db.Column(db.String, nullable=True)
authors = db.Column(db.String)
year = db.Column(db.Integer)
public = db.Column(db.Boolean, default=True)
level = db.Column(db.String)
metadata_ = db.Column(JSONB)
user_id = db.Column(db.Text, db.ForeignKey("users.external_id"))
user = relationship("User", backref=backref("abstract_studies"))
# retrieve versions of same study
versions = relationship("Study", backref=backref("base_study"))
__table_args__ = (
db.CheckConstraint(level.in_(["group", "meta"])),
db.UniqueConstraint('doi', 'pmid', name='doi_pmid'),
)


class Study(BaseMixin, db.Model):
__tablename__ = "studies"

Expand All @@ -116,6 +139,7 @@ class Study(BaseMixin, db.Model):
source = db.Column(db.String)
source_id = db.Column(db.String)
source_updated_at = db.Column(db.DateTime(timezone=True))
base_study_id = db.Column(db.Text, db.ForeignKey('abstract_studies.id'))
user_id = db.Column(db.Text, db.ForeignKey("users.external_id"))
user = relationship("User", backref=backref("studies"))
analyses = relationship(
Expand Down Expand Up @@ -224,7 +248,7 @@ class AnalysisConditions(db.Model):

# purpose of Entity: you have an image/coordinate, but you do not
# know what level of analysis it represents
# NOT REALLY USED CURRENTLY
# NOT USED CURRENTLY
class Entity(BaseMixin, db.Model):
__tablename__ = "entities"

Expand Down
2 changes: 1 addition & 1 deletion store/neurostore/openapi
Submodule openapi updated 1 files
+183 −0 neurostore-openapi.yml
2 changes: 2 additions & 0 deletions store/neurostore/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .data import (
StudysetsView,
AnnotationsView,
BaseStudiesView,
StudiesView,
AnalysesView,
ConditionsView,
Expand All @@ -16,6 +17,7 @@
__all__ = [
"StudysetsView",
"AnnotationsView",
"BaseStudiesView",
"StudiesView",
"AnalysesView",
"ConditionsView",
Expand Down
14 changes: 11 additions & 3 deletions store/neurostore/resources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ..database import db
from .utils import get_current_user
from .nested import nested_load
from ..models import Studyset, User, Annotation
from ..models import Studyset, BaseStudy, User, Annotation
from ..schemas.data import StudysetSnapshot
from . import data as viewdata

Expand All @@ -28,6 +28,10 @@ class BaseView(MethodView):
_linked = {}
_composite_key = {}

def custom_record_update(record):
"""Custom processing of a record (defined in specific classes)"""
return record

@classmethod
def update_or_create(cls, data, id=None, commit=True):
"""
Expand Down Expand Up @@ -94,7 +98,9 @@ def update_or_create(cls, data, id=None, commit=True):
# DO NOT WANT PEOPLE TO BE ABLE TO ADD ANALYSES
# TO STUDIES UNLESS THEY OWN THE STUDY
v = PrtCls._model.query.filter_by(id=v["id"]).first()
if current_user != v.user and current_user.external_id != compose_bot:
if PrtCls._model is BaseStudy:
pass
elif current_user != v.user and current_user.external_id != compose_bot:
abort(403)
if k in cls._linked and v is not None:
LnCls = getattr(viewdata, cls._linked[k])
Expand All @@ -117,6 +123,8 @@ def update_or_create(cls, data, id=None, commit=True):
print(k)
raise AttributeError

record = cls.custom_record_update(record)

to_commit.append(record)

# Update nested attributes recursively
Expand Down Expand Up @@ -292,7 +300,7 @@ def search(self):
q = self.join_tables(q)

records = q.paginate(
page=args["page"], per_page=args["page_size"], error_out=False
page=args["page"], per_page=args["page_size"], error_out=False,
).items
content = self.serialize_records(records, args)
metadata = self.create_metadata(q)
Expand Down
Loading

0 comments on commit 79dc9bf

Please sign in to comment.