Epimeta export rewrite (#922)

* preliminary epi rewrite * changes * changes * changes * fix * added back something accidentally deleted * moved code * fix test * remove old stuff * cleanup: * epimeta export rewrite * remove obsolete code * changes * merge fix * update admin site to browse data pivot by evidence type --------- Co-authored-by: Andy Shapiro <[email protected]>
shapiromatron · Oct 13, 2023 · bf9afe9 · bf9afe9
1 parent ce11ca0
commit bf9afe9
Show file tree

Hide file tree

Showing 3 changed files with 196 additions and 210 deletions.
diff --git a/hawc/apps/epimeta/exports.py b/hawc/apps/epimeta/exports.py
@@ -1,6 +1,158 @@
+import pandas as pd
+
+from ..common.exports import Exporter, ModelExport
 from ..common.helper import FlatFileExporter
-from ..study.models import Study
-from . import models
+from ..common.models import sql_display, sql_format, str_m2m
+from ..epi.exports import ResultMetricExport
+from ..study.exports import StudyExport
+from . import constants
+
+
+class MetaProtocolExport(ModelExport):
+    def get_value_map(self):
+        return {
+            "pk": "pk",
+            "url": "url",
+            "name": "name",
+            "protocol_type": "protocol_type",
+            "lit_search_strategy": "lit_search_strategy",
+            "lit_search_notes": "lit_search_notes",
+            "lit_search_start_date": "lit_search_start_date",
+            "lit_search_end_date": "lit_search_end_date",
+            "total_references": "total_references",
+            "inclusion_criteria": "inclusion_criteria",
+            "exclusion_criteria": "exclusion_criteria",
+            "total_studies_identified": "total_studies_identified",
+            "notes": "notes",
+        }
+
+    def get_annotation_map(self, query_prefix):
+        return {
+            "url": sql_format("/epi-meta/protocol/{}/", query_prefix + "id"),  # hardcoded URL
+            "protocol_type": sql_display(query_prefix + "protocol_type", constants.MetaProtocol),
+            "lit_search_strategy": sql_display(
+                query_prefix + "lit_search_strategy", constants.MetaLitSearch
+            ),
+            "inclusion_criteria": str_m2m(query_prefix + "inclusion_criteria__description"),
+            "exclusion_criteria": str_m2m(query_prefix + "exclusion_criteria__description"),
+        }
+
+    def prepare_df(self, df):
+        for key in [
+            self.get_column_name("lit_search_start_date"),
+            self.get_column_name("lit_search_end_date"),
+        ]:
+            if key in df.columns:
+                df.loc[:, key] = df[key].apply(lambda x: x.isoformat() if not pd.isna(x) else x)
+        return df
+
+
+class MetaResultExport(ModelExport):
+    def get_value_map(self):
+        return {
+            "pk": "pk",
+            "url": "url",
+            "label": "label",
+            "data_location": "data_location",
+            "health_outcome": "health_outcome",
+            "health_outcome_notes": "health_outcome_notes",
+            "exposure_name": "exposure_name",
+            "exposure_details": "exposure_details",
+            "number_studies": "number_studies",
+            "statistical_metric": "metric__metric",
+            "statistical_notes": "statistical_notes",
+            "n": "n",
+            "estimate": "estimate",
+            "lower_ci": "lower_ci",
+            "upper_ci": "upper_ci",
+            "ci_units": "ci_units",
+            "heterogeneity": "heterogeneity",
+            "adjustment_factors": "adjustment_factors_str",
+            "notes": "notes",
+        }
+
+    def get_annotation_map(self, query_prefix):
+        return {
+            "url": sql_format("/epi-meta/result/{}/", query_prefix + "id"),  # hardcoded URL
+            "adjustment_factors_str": str_m2m(query_prefix + "adjustment_factors__description"),
+        }
+
+
+class SingleResultExport(ModelExport):
+    def get_value_map(self):
+        return {
+            "pk": "pk",
+            "study": "study_id",
+            "exposure_name": "exposure_name",
+            "weight": "weight",
+            "n": "n",
+            "estimate": "estimate",
+            "lower_ci": "lower_ci",
+            "upper_ci": "upper_ci",
+            "ci_units": "ci_units",
+            "notes": "notes",
+        }
+
+
+class EpiMetaExporter(Exporter):
+    def build_modules(self) -> list[ModelExport]:
+        return [
+            StudyExport("study", "protocol__study"),
+            MetaProtocolExport("meta_protocol", "protocol"),
+            MetaResultExport("meta_result", ""),
+            SingleResultExport("single_result", "single_results"),
+        ]
+
+
+class EpiMetaDataPivotExporter(Exporter):
+    def build_modules(self) -> list[ModelExport]:
+        return [
+            StudyExport(
+                "study",
+                "protocol__study",
+                include=(
+                    "id",
+                    "short_citation",
+                    "published",
+                ),
+            ),
+            MetaProtocolExport(
+                "meta_protocol",
+                "protocol",
+                include=(
+                    "pk",
+                    "name",
+                    "protocol_type",
+                    "total_references",
+                    "total_studies_identified",
+                ),
+            ),
+            MetaResultExport(
+                "meta_result",
+                "",
+                include=(
+                    "pk",
+                    "label",
+                    "health_outcome",
+                    "exposure_name",
+                    "number_studies",
+                    "n",
+                    "estimate",
+                    "lower_ci",
+                    "upper_ci",
+                    "ci_units",
+                    "heterogeneity",
+                ),
+            ),
+            ResultMetricExport(
+                "metric",
+                "metric",
+                include=(
+                    "name",
+                    "abbreviation",
+                ),
+            ),
+        ]
 
 
 class MetaResultFlatComplete(FlatFileExporter):
@@ -9,36 +161,8 @@ class MetaResultFlatComplete(FlatFileExporter):
     epidemiological meta-result study type from scratch.
     """
 
-    def _get_header_row(self):
-        header = []
-        header.extend(Study.flat_complete_header_row())
-        header.extend(models.MetaProtocol.flat_complete_header_row())
-        header.extend(models.MetaResult.flat_complete_header_row())
-        header.extend(models.SingleResult.flat_complete_header_row())
-        return header
-
-    def _get_data_rows(self):
-        rows = []
-        identifiers_df = Study.identifiers_df(self.queryset, "protocol__study_id")
-        for obj in self.queryset:
-            ser = obj.get_json(json_encode=False)
-            row = []
-            row.extend(Study.flat_complete_data_row(ser["protocol"]["study"], identifiers_df))
-            row.extend(models.MetaProtocol.flat_complete_data_row(ser["protocol"]))
-            row.extend(models.MetaResult.flat_complete_data_row(ser))
-
-            if len(ser["single_results"]) == 0:
-                # print one-row with no single-results
-                row.extend([None] * 10)
-                rows.append(row)
-            else:
-                # print each single-result as a new row
-                for sr in ser["single_results"]:
-                    row_copy = list(row)  # clone
-                    row_copy.extend(models.SingleResult.flat_complete_data_row(sr))
-                    rows.append(row_copy)
-
-        return rows
+    def build_df(self) -> pd.DataFrame:
+        return EpiMetaExporter().get_df(self.queryset)
 
 
 class MetaResultFlatDataPivot(FlatFileExporter):
@@ -49,60 +173,35 @@ class MetaResultFlatDataPivot(FlatFileExporter):
     Note: data pivot does not currently include study confidence. Could be added if needed.
     """
 
-    def _get_header_row(self):
-        return [
-            "study id",
-            "study name",
-            "study published",
-            "protocol id",
-            "protocol name",
-            "protocol type",
-            "total references",
-            "identified references",
-            "key",
-            "meta result id",
-            "meta result label",
-            "health outcome",
-            "exposure",
-            "result references",
-            "statistical metric",
-            "statistical metric abbreviation",
-            "N",
-            "estimate",
-            "lower CI",
-            "upper CI",
-            "CI units",
-            "heterogeneity",
-        ]
+    def build_df(self) -> pd.DataFrame:
+        df = EpiMetaDataPivotExporter().get_df(self.queryset)
+
+        df["key"] = df["meta_result-pk"]
 
-    def _get_data_rows(self):
-        rows = []
-        for obj in self.queryset:
-            ser = obj.get_json(json_encode=False)
-            row = [
-                ser["protocol"]["study"]["id"],
-                ser["protocol"]["study"]["short_citation"],
-                ser["protocol"]["study"]["published"],
-                ser["protocol"]["id"],
-                ser["protocol"]["name"],
-                ser["protocol"]["protocol_type"],
-                ser["protocol"]["total_references"],
-                ser["protocol"]["total_studies_identified"],
-                ser["id"],  # repeat for data-pivot key
-                ser["id"],
-                ser["label"],
-                ser["health_outcome"],
-                ser["exposure_name"],
-                ser["number_studies"],
-                ser["metric"]["metric"],
-                ser["metric"]["abbreviation"],
-                ser["n"],
-                ser["estimate"],
-                ser["lower_ci"],
-                ser["upper_ci"],
-                ser["ci_units"],
-                ser["heterogeneity"],
-            ]
-            rows.append(row)
-
-        return rows
+        df = df.rename(
+            columns={
+                "study-id": "study id",
+                "study-short_citation": "study name",
+                "study-published": "study published",
+                "meta_protocol-pk": "protocol id",
+                "meta_protocol-name": "protocol name",
+                "meta_protocol-protocol_type": "protocol type",
+                "meta_protocol-total_references": "total references",
+                "meta_protocol-total_studies_identified": "identified references",
+                "meta_result-pk": "meta result id",
+                "meta_result-label": "meta result label",
+                "meta_result-health_outcome": "health outcome",
+                "meta_result-exposure_name": "exposure",
+                "meta_result-number_studies": "result references",
+                "metric-name": "statistical metric",
+                "metric-abbreviation": "statistical metric abbreviation",
+                "meta_result-n": "N",
+                "meta_result-estimate": "estimate",
+                "meta_result-lower_ci": "lower CI",
+                "meta_result-upper_ci": "upper CI",
+                "meta_result-ci_units": "CI units",
+                "meta_result-heterogeneity": "heterogeneity",
+            },
+            errors="raise",
+        )
+        return df