Skip to content

Commit

Permalink
2.0.2
Browse files Browse the repository at this point in the history
  • Loading branch information
mbaudis committed Oct 17, 2024
1 parent dfdd85b commit 78e1013
Show file tree
Hide file tree
Showing 23 changed files with 280 additions and 99 deletions.
1 change: 1 addition & 0 deletions .github/workflows/mk-bycon-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ jobs:
- run: pip install pymdown-extensions
- run: pip install mkdocs-mermaid2-plugin
- run: pip install mdx_gh_links
- run: pip install mkdocstrings-python
- run: mkdocs gh-deploy --force
3 changes: 0 additions & 3 deletions bycon/byconServiceLibs/geoloc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,6 @@ def __create_geo_longlat_query(self):
return





################################################################################
################################################################################
################################################################################
Expand Down
133 changes: 116 additions & 17 deletions bycon/byconServiceLibs/ontology_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import re
from random import sample
from progress.bar import Bar

from pymongo import MongoClient

Expand All @@ -8,6 +10,7 @@
DB_MONGOHOST,
mongo_and_or_query_from_list,
prdbug,
prjsonnice,
rest_path_value
)

Expand All @@ -16,10 +19,28 @@ def __init__(self):
self.query = {}
self.term_groups = []
self.unique_terms = []
self.ontology_maps = []
self.erroneous_maps = []
self.filters = BYC.get("BYC_FILTERS", [])
self.filter_definitions = BYC.get("filter_definitions", {})
# TODO: Shouldn't be hard coded here
self.filter_id_matches = ["NCIT", "pgx:icdom", "pgx:icdot", "UBERON"]
self.ds_id = BYC["BYC_DATASET_IDS"][0]

self.ontologymaps_coll = MongoClient(host=DB_MONGOHOST)["_byconServicesDB"]["ontologymaps"]
self.bios_coll = MongoClient(host=DB_MONGOHOST)[self.ds_id]["biosamples"]

self.combos = [
{
"icdom": "icdo_morphology",
"icdot": "icdo_topography",
"NCIT": "histological_diagnosis"
},
{
"icdot": "icdo_topography",
"UBERON": "sample_origin_detail"
}
]

self.__ontologymaps_query()

Expand All @@ -35,7 +56,11 @@ def ontology_maps_query(self):
# -------------------------------------------------------------------------#

def ontology_maps_results(self):
self.__retrieve_ontologymaps()
if len(self.query.keys()) < 1:
BYC["ERRORS"].append("No correct filter value provided!")
else:
self.__retrieve_ontologymaps()

return [
{
"term_groups": self.term_groups,
Expand All @@ -44,6 +69,33 @@ def ontology_maps_results(self):
]


# -------------------------------------------------------------------------#
# -------------------------------------------------------------------------#

def replace_ontology_maps(self):
self.__create_ontology_maps()
if BYC["TEST_MODE"] is True:
for o in self.ontology_maps:
prjsonnice(o)
print(f'==>> {len(self.ontology_maps)} maps would be created')
return self.ontology_maps
self.ontologymaps_coll.delete_many({})
for o in self.ontology_maps:
self.ontologymaps_coll.insert_one(o)
o_c = self.ontologymaps_coll.count_documents({})
print(f'==>> {o_c} maps have been created in the database')
return self.ontology_maps


# -------------------------------------------------------------------------#
# -------------------------------------------------------------------------#

def retrieve_erroneous_maps(self):
if len(self.ontology_maps) < 1:
self.__create_ontology_maps()
return self.erroneous_maps


# -------------------------------------------------------------------------#
# ----------------------------- private -----------------------------------#
# -------------------------------------------------------------------------#
Expand Down Expand Up @@ -80,37 +132,84 @@ def __ontologymaps_query(self):

self.query = mongo_and_or_query_from_list(q_list, "AND")


# -------------------------------------------------------------------------#
# -------------------------------------------------------------------------#

def __retrieve_ontologymaps(self):
u_c_d = { }
mongo_client = MongoClient(host=DB_MONGOHOST)
mongo_coll = mongo_client["_byconServicesDB"]["ontologymaps"]
for o in mongo_coll.find( self.query, { '_id': False } ):
for o in self.ontologymaps_coll.find( self.query, { '_id': False } ):
for c in o["code_group"]:
pre, code = re.split("[:-]", c["id"], maxsplit=1)
u_c_d.update( { c["id"]: { "id": c["id"], "label": c["label"] } } )
self.term_groups.append( o["code_group"] )
mongo_client.close( )

for k, u in u_c_d.items():
self.unique_terms.append(u)

# if "termGroups" in BYC["response_entity_id"]:
# t_g_s = []
# for tg in self.term_groups:
# t_l = []
# for t in tg:
# t_l.append(str(t.get("id", "")))
# t_l.append(str(t.get("label", "")))
# t_g_s.append("\t".join(t_l))

# if "text" in BYC_PARS.get("output", "___none___"):
# print_text_response("\n".join(t_g_s))
# results = c_g

if "termGroups" in BYC["response_entity_id"]:
t_g_s = []
for tg in self.term_groups:
t_l = []
for t in tg:
t_l.append(str(t.get("id", "")))
t_l.append(str(t.get("label", "")))
t_g_s.append("\t".join(t_l))

if "text" in BYC_PARS.get("output", "___none___"):
print_text_response("\n".join(t_g_s))
results = c_g

# -------------------------------------------------------------------------#
# -------------------------------------------------------------------------#

def __create_ontology_maps(self):
keyed_maps = {}
bios_no = self.bios_coll.count_documents({})

for c in self.combos:
map_type = "::".join(c.keys())
print(f'Re-generating {map_type} ontology maps from {bios_no} samples...')
bar = Bar(f'Processing {bios_no} from {self.ds_id}', max = bios_no, suffix='%(percent)d%%' )
for bios in self.bios_coll.find({}, { '_id': False }).limit(BYC_PARS.get("limit", 0)):
bar.next()
ids = []
qs = {}
cg = []
errors = []
for k, v in c.items():
o_re = re.compile(self.filter_definitions.get(k, {}).get("pattern", "___none___"))
o = bios.get(v, {"id": "___none___", "label": "___none___"})
oid = o.get("id")
ids.append(str(oid))
qs.update({f'{v}.id': oid})
cg.append(o)
if not o_re.match(str(oid)):
errors.append(f'{v}.id: {oid}')
uid = "::".join(ids)
if uid in keyed_maps.keys():
continue
keyed_maps.update({
uid: {
"id": uid,
"map_type": map_type,
"code_group": cg,
"local_query": qs,
"examples": [],
"errors": errors
}
})
bar.finish()

for k, v in keyed_maps.items():
examples = self.bios_coll.distinct("notes", v["local_query"])
s_no = min(10, len(examples))
e = sample(examples, s_no)
e = [t for t in e if len(t) > 2]
v.update({"examples": e})
if len(v.get("errors", 0)) > 0:
self.erroneous_maps.append(v)
continue
self.ontology_maps.append(v)

25 changes: 0 additions & 25 deletions bycon/config/datatable_mappings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,31 +169,6 @@ definitions:
type: string
db_key: collection_moment
indexed: True
# sex_id:
# type: string
# db_key: info.sex.id
# indexed: True
# compact: True
# sex_label:
# type: string
# db_key: info.sex.label
# compact: True
# followup_state_id:
# type: string
# db_key: followup_state.id
# default: 'EFO:0030039'
# indexed: True
# compact: True
# followup_state_label:
# type: string
# db_key: followup_state.label
# default: 'no followup status'
# compact: True
# followup_time:
# type: string
# db_key: followup_time
# indexed: True
# compact: True
# recurrence:
# db_key: info.recurrence
# type: string
Expand Down
1 change: 1 addition & 0 deletions bycon/config/services_entity_defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ sampleplots:

schemas:
request_entity_path_id: schemas
path_id_value_bycon_parameter: id

uploader:
request_entity_path_id: uploader
Expand Down
2 changes: 2 additions & 0 deletions bycon/lib/beacon_response_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,8 @@ def __acknowledge_HIT(self):
self.result_sets = rss


# -------------------------------------------------------------------------#

def __acknowledge_MISS(self):
if not "MISS" in (i_rs_r := BYC_PARS.get("include_resultset_responses", "ALL")).upper():
return
Expand Down
4 changes: 2 additions & 2 deletions bycon/lib/dataset_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def ds_id_from_accessid():
ds_id = h_o.get("source_db", False)
if (ds_id := str(h_o.get("source_db"))) not in BYC["DATABASE_NAMES"]:
return False
BYC.update({"BYC_DATASET_IDS": [ds_id]})
BYC.update({"BYC_DATASET_IDS": [ds_id]})
return True


Expand All @@ -93,7 +93,7 @@ def ds_id_from_default():
defaults: object = BYC["beacon_defaults"].get("defaults", {})
if (ds_id := str(defaults.get("default_dataset_id"))) not in BYC["DATABASE_NAMES"]:
return False
BYC.update({"BYC_DATASET_IDS": [ ds_id ]})
BYC.update({"BYC_DATASET_IDS": [ds_id]})
return True


1 change: 0 additions & 1 deletion bycon/lib/service_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,3 @@ def set_response_schema():
r_s = r_e.get("response_schema", "beaconInfoResponse")
BYC.update({"response_schema": r_s})


Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
"$schema": https://json-schema.org/draft/2020-12/schema
"$id": https://progenetix.org/services/schemas/pgxCollation/v2022-03-08
"$id": https://progenetix.org/services/schemas/pgxCollation/v2024-10-17
title: pgxCollation
meta:
contributors:
- description: "Michael Baudis"
id: "orcid:0000-0002-9903-4248"
provenance:
- description: "Progenetix `bycon` project"
id: 'https://github.com/progenetix/bycon/'
sb_status: community
description: >-
A Collation summarizes information of all biosamples matching a given term
(e.g. PMID or NCIT code), as well as the connected items if the code is part
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ properties:
diseases:
type: array
items:
$ref: ../common/disease.yaml
$ref: ../bycon-model/common/disease.yaml
info:
description: >-
Additional structured data which may not be common across different resources.
Expand All @@ -33,6 +33,8 @@ properties:
type: string
examples:
- HapMap project contributor
indexDisease:
$ref: ../bycon-model/common/disease.yaml
externalReferences:
type: array
items:
Expand Down
7 changes: 1 addition & 6 deletions byconServices/ontologymaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@
podmd"""

def ontologymaps():
OM = OntologyMaps()

query = OM.ontology_maps_query()
if len(query.keys()) < 1:
BYC["ERRORS"].append("No correct filter value provided!")
results = OM.ontology_maps_results()
results = OntologyMaps().ontology_maps_results()
BeaconErrorResponse().respond_if_errors()
ByconServiceResponse().print_populated_response(results)

11 changes: 6 additions & 5 deletions byconServices/schemas.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from bycon import prjsonhead, prjsontrue, BYC, BYC_PARS, BeaconErrorResponse, read_schema_file

"""podmd
This helper service reads and serves local schema definition files. The name of
the schema (corresponding to the file name minus extension) is provided either
as an `id` query parameter or as the first part of the path after `schemas/`.
* <https://progenetix.org/services/schemas/biosample>
podmd"""

def schemas():
if not (schema_name := BYC_PARS.get("id")):
if len(schema_name := BYC.get("request_entity_path_id_value", {})) > 0:
schema_name = schema_name[0]
if schema_name:
schema_name = schema_name.split('.').pop(0)
if (ids := BYC_PARS.get("id"), []):
schema_name = ids[0].split('.').pop(0)
if (s := read_schema_file(schema_name, "")):
prjsonhead()
prjsontrue(s)
Expand Down
27 changes: 12 additions & 15 deletions docs/changes.md
Original file line number Diff line number Diff line change
@@ -1,26 +1,17 @@
# Changes & To Do

## Notes about Previous Development

The `bycon` package was started during the development of the [**Beacon v2**](https://docs.genomebeacons.org)
specification with the aims to a) test and demonstrate features of the emerging
specification in a real-world use case while b) serving the needs of the [Progenetix](https://progenetix.org)
oncogenomic resource. Many of the recent changes are aimed at disentangling
the code base from this specific use case.

An earlier version of the Progenetix && Beacon "BeaconPlus" stack had been provided
through the Perl based [**PGX** project](http://github.com/progenetix/PGX/).

## Known Bugs

* pagination for download file handovers has some hiccups

## Changes Tracker

While changes are documented for individual point versions we actually do not
push releases out for all of them; they serve more as internal development
milestones.

### 2024-10-17 (v2.0.2)

* added a `ontologymapsReplacer.py` service app to (re-) create the `_byconServicesDB.ontologymaps` collection
- this uses the now extended `OntologyMaps` class
* fixed the `schemas` service (recently broken during path parsing refactoring)

### 2024-10-10 (v2.0.1)

* bug fix for query generation for `/datasets` endpoint where `testMode=true`
Expand Down Expand Up @@ -1067,3 +1058,9 @@ Bug fix release:
#### 2023-01-15

- [x] create bycon documentation subdomain & configure Github pages for it


## Known Bugs

* ... they change all the time ...

Loading

0 comments on commit 78e1013

Please sign in to comment.