Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added monthly traffic summaries to analytics (#2707) #2718

Merged
merged 3 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions analytics/constants.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
# CHANGE THESE VALUES TO GENERATE NEW REPORTS
# The start and end dates of the current month (yyyy-mm-dd)
START_DATE_CURRENT = "2024-12-01"
END_DATE_CURRENT = "2024-12-31"
START_DATE_CURRENT = "2025-01-01"
END_DATE_CURRENT = "2025-01-31"
# The start and end dates of the prior months
START_DATE_PRIOR = "2024-11-01"
END_DATE_PRIOR = "2024-11-30"
START_DATE_PRIOR = "2024-12-01"
END_DATE_PRIOR = "2024-12-31"
# The name of the folder in which to save the report
PARENT_FOLDER_NAME = "December 2024 (demos)"
PARENT_FOLDER_NAME = "January 2025"

# The name of the spreadsheet with the report
SHEET_NAME = "HCA Portal"

HCA_PORTAL_ID = "361323030"
# Filter to exclude the Data Explorer
HCA_BROWSER_EXCLUDE_FILTER = {"filter": {"fieldName": "hostName", "stringFilter": {"matchType": "EXACT", "value": "data.humancellatlas.org"}}}
# Filter to include only the HCA Portal
HCA_PORTAL_ONLY_FILTER = {"filter": {"fieldName": "hostName", "stringFilter": {"matchType": "EXACT", "value": "data.humancellatlas.org"}}}
# Filter to include only the HCA Explorer
HCA_EXPLORER_ONLY_FILTER = {"filter": {"fieldName": "hostName", "stringFilter": {"matchType": "EXACT", "value": "explore.data.humancellatlas.org"}}}
SECRET_NAME = "HCA_ANALYTICS_REPORTING_CLIENT_SECRET_PATH"
ANALYTICS_START = "2021-01-01"
# The start date after which GA4 data is reliable
ANALYTICS_START = "2023-07-01"
HISTORIC_UA_DATA_PATH = "./users_over_time_history.json"

OAUTH_PORT = 8082
106 changes: 95 additions & 11 deletions analytics/generate_sheets_report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"import analytics.sheets_api as sheets\n",
"import analytics.sheets_elements as elements\n",
"import pandas as pd\n",
"import gspread\n",
"from constants import *"
]
},
Expand All @@ -28,10 +29,11 @@
"metadata": {},
"outputs": [],
"source": [
"ga_authentication, drive_authentication = ga.authenticate(\n",
"ga_authentication, drive_authentication, sheets_authentication = ga.authenticate(\n",
" SECRET_NAME,\n",
" ga.ga4_service_params,\n",
" ga.drive_service_params,\n",
" ga.sheets_service_params,\n",
" port=OAUTH_PORT\n",
")\n",
"\n",
Expand All @@ -41,14 +43,29 @@
" \"service_system\": ga_authentication,\n",
" \"start_date\": START_DATE_CURRENT,\n",
" \"end_date\": END_DATE_CURRENT,\n",
" \"property\": HCA_PORTAL_ID,\n",
"}\n",
"\n",
"hca_portal_params = {\n",
" **default_params,\n",
" \"base_dimension_filter\": HCA_BROWSER_EXCLUDE_FILTER,\n",
" \"property\": HCA_PORTAL_ID,\n",
" \"base_dimension_filter\": HCA_PORTAL_ONLY_FILTER,\n",
"}\n",
"\n",
"hca_combined_all_time_params = {\n",
" **default_params,\n",
" \"start_date\": ANALYTICS_START,\n",
"}\n",
"\n",
"hca_portal_all_time_params = {\n",
" **hca_combined_all_time_params,\n",
" \"base_dimension_filter\": HCA_PORTAL_ONLY_FILTER,\n",
"\n",
"}\n",
"\n"
"\n",
"hca_explorer_all_time_params = {\n",
" **hca_combined_all_time_params,\n",
" \"base_dimension_filter\": HCA_EXPLORER_ONLY_FILTER,\n",
"}"
]
},
{
Expand All @@ -57,6 +74,9 @@
"metadata": {},
"outputs": [],
"source": [
"df_monthly_summary_portal = elements.get_page_views_over_time_df(hca_portal_all_time_params)\n",
"df_monthly_summary_explorer = elements.get_page_views_over_time_df(hca_explorer_all_time_params)\n",
"df_monthly_summary_combined = elements.get_page_views_over_time_df(hca_combined_all_time_params, additional_data_path=HISTORIC_UA_DATA_PATH, additional_data_behavior=elements.ADDITIONAL_DATA_BEHAVIOR.ADD)\n",
"df_outbound = elements.get_outbound_links_change(hca_portal_params, START_DATE_CURRENT, END_DATE_CURRENT, START_DATE_PRIOR, END_DATE_PRIOR)\n",
"df_pageviews = elements.get_page_views_change(hca_portal_params, START_DATE_CURRENT, END_DATE_CURRENT, START_DATE_PRIOR, END_DATE_PRIOR)"
]
Expand All @@ -68,19 +88,39 @@
"outputs": [],
"source": [
"dict_spreadsheet = {\n",
" \"Portal Summary\": df_monthly_summary_portal,\n",
" \"Explorer Summary\": df_monthly_summary_explorer,\n",
" \"Combined Summary\": df_monthly_summary_combined,\n",
" \"Page Views\": df_pageviews,\n",
" \"Outbound Links\": df_outbound,\n",
"}\n",
"\n",
"summary_column_formatting = {\n",
" \"Month\": sheets.COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE,\n",
" \"Users Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" \"Total Pageviews Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
"}\n",
"\n",
"summary_worksheet_formatting = {\n",
" \"extra_columns\": 1,\n",
" \"extra_columns_width\": 2000,\n",
"}\n",
"\n",
"sheet = sheets.create_sheet_in_folder(\n",
" drive_authentication,\n",
" SHEET_NAME,\n",
" PARENT_FOLDER_NAME,\n",
" override_behavior=sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE\n",
")\n",
"\n",
"sheets.fill_spreadsheet_with_df_dict(\n",
" sheets.create_sheet_in_folder(\n",
" drive_authentication,\n",
" SHEET_NAME,\n",
" PARENT_FOLDER_NAME,\n",
" override_behavior=sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE\n",
" ),\n",
" sheet,\n",
" dict_spreadsheet,\n",
" sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE,\n",
" column_formatting_options={\n",
" \"Portal Summary\": summary_column_formatting,\n",
" \"Explorer Summary\": summary_column_formatting,\n",
" \"Combined Summary\": summary_column_formatting,\n",
" \"Page Views\": {\n",
" \"Total Users Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" \"Total Views Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
Expand All @@ -89,9 +129,53 @@
" \"Total Clicks Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" \"Total Users Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" }\n",
" },\n",
" sheet_formatting_options={\n",
" \"Portal Summary\": summary_worksheet_formatting,\n",
" \"Explorer Summary\": summary_worksheet_formatting,\n",
" \"Combined Summary\": summary_worksheet_formatting\n",
" }\n",
")"
")\n",
"for df, worksheet_name in zip(\n",
" (df_monthly_summary_portal, df_monthly_summary_explorer, df_monthly_summary_combined), \n",
" (\"Portal Summary\", \"Explorer Summary\", \"Combined Summary\")\n",
"):\n",
" worksheet = sheet.worksheet(worksheet_name)\n",
" date_range = sheets.WorksheetRange(\n",
" worksheet, \n",
" gspread.cell.Cell(1, 1), \n",
" gspread.cell.Cell(df.index.size + 1, 2)\n",
" )\n",
" users_range = sheets.WorksheetRange(\n",
" worksheet, \n",
" gspread.cell.Cell(1, 2), \n",
" gspread.cell.Cell(df.index.size + 1, 3)\n",
" )\n",
" pageviews_range = sheets.WorksheetRange(\n",
" worksheet, \n",
" gspread.cell.Cell(1, 3), \n",
" gspread.cell.Cell(df.index.size + 1, 4)\n",
" )\n",
" sheets.add_chart_to_sheet(\n",
" sheets_authentication,\n",
" sheet,\n",
" worksheet,\n",
" sheets.CHART_TYPES.LINE,\n",
" date_range,\n",
" [users_range, pageviews_range],\n",
" chart_position=gspread.cell.Cell(1, 6),\n",
" chart_position_offset_x=75,\n",
" chart_position_offset_y=25,\n",
" title=\"Pageviews and Users Over Time\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion analytics/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-e git+https://github.com/DataBiosphere/data-browser.git@e0ce2c7464107bbbc166f7e21fcc3c4426b6e553#egg=analytics&subdirectory=analytics/analytics_package
-e git+https://github.com/DataBiosphere/data-browser.git@e2653f5605cc3220d28299bfc2cc48205c23067d#egg=analytics&subdirectory=analytics/analytics_package
anyio==4.8.0
appnope==0.1.4
argon2-cffi==23.1.0
Expand Down
1 change: 1 addition & 0 deletions analytics/users_over_time_history.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"Users":{"1685577600000":7778,"1682899200000":7384,"1680307200000":6467,"1677628800000":6574,"1675209600000":5855,"1672531200000":4470,"1669852800000":4684,"1667260800000":4965,"1664582400000":4891,"1661990400000":4766,"1659312000000":4304,"1656633600000":4112,"1654041600000":4182,"1651363200000":4960,"1648771200000":3899,"1646092800000":3412,"1643673600000":3104,"1640995200000":2836,"1638316800000":2394,"1635724800000":2938,"1633046400000":2584,"1630454400000":2405,"1627776000000":2496,"1625097600000":2954,"1622505600000":1891,"1619827200000":0},"Total Pageviews":{"1685577600000":44146,"1682899200000":48780,"1680307200000":39497,"1677628800000":38763,"1675209600000":30800,"1672531200000":25796,"1669852800000":26073,"1667260800000":28929,"1664582400000":29034,"1661990400000":28711,"1659312000000":27741,"1656633600000":29631,"1654041600000":30890,"1651363200000":33258,"1648771200000":29391,"1646092800000":27114,"1643673600000":22216,"1640995200000":22452,"1638316800000":19856,"1635724800000":24409,"1633046400000":22963,"1630454400000":20424,"1627776000000":21700,"1625097600000":25562,"1622505600000":16683,"1619827200000":0}}