Update some plot graphs and team membership (#21)

2i2c-org · Feb 3, 2024 · d41a73c · d41a73c
1 parent e70437a
commit d41a73c
Show file tree

Hide file tree

Showing 7 changed files with 70 additions and 37 deletions.
diff --git a/.gitignore b/.gitignore
@@ -137,3 +137,4 @@ book/data/hub-activity.csv
 
 # A place to manually store data we use as part of data updating
 book/scripts/_data/
+_build
diff --git a/book/cloud.md b/book/cloud.md
@@ -5,13 +5,15 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.4
+    jupytext_version: 1.16.1
 kernelspec:
   display_name: Python 3 (ipykernel)
   language: python
   name: python3
 ---
 
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
 % CSS for the big numbers
 
 <style>
@@ -147,20 +149,19 @@ We break our hubs into two groups as some hubs have orders of magnitude more use
 ```{code-cell} ipython3
 :tags: [remove-input, remove-stderr, remove-stdout]
 
-queries = ["< 150", ">= 150"]
-for qu in queries:
-    hubs_small = df.query(f"users {qu} and scale=='Weekly'")["hub"].values
-    chs = []
-    groups = df.query("hub in @hubs_small").groupby("scale")
-    for scale in scale_ordering:
-        idata = groups.get_group(scale)
-        ch = alt.Chart(idata, title=f"{scale} users").mark_bar().encode(
-            alt.X("users:Q", bin=True),
-            y='count()',
-            color="scale",
-            tooltip=["users", "hub"],
-        ).interactive()
-        chs.append(ch)
-    display(Markdown(f"**For hubs {qu} weekly users.**"))
-    display(alt.hconcat(*chs))
+chs = []
+groups = df.groupby("scale")
+for scale in scale_ordering:
+    idata = groups.get_group(scale)
+    # ref for log plot: https://github.com/altair-viz/altair/issues/1074#issuecomment-411861659
+    ch = alt.Chart(idata, title=f"{scale} users").transform_calculate(
+        logusers = 'log(max(datum.users, 1))/log(10)'
+    ).mark_bar().encode(
+        alt.X("logusers:Q", bin=True),
+        y='count()',
+        color="cluster",
+        tooltip=["users", "hub"],
+    ).interactive()
+    chs.append(ch)
+display(alt.hconcat(*chs))
 ```
diff --git a/book/data/team.yml b/book/data/team.yml
@@ -1,9 +1,15 @@
+# Cross-organization
 - "choldgraf"
+- "haroldcampbell"
+# Partnerships
 - "colliand"
+- "jmunroe"
+# Product
+- "Gman0909"
+# Engineering
+- "AIDEA775"
 - "consideRatio"
 - "damianavila"
 - "georgianaelena"
-- "jmunroe"
-- "pnasrat"
 - "sgibson91"
 - "yuvipanda"
diff --git a/book/scripts/download_github_data.py b/book/scripts/download_github_data.py
@@ -1,3 +1,8 @@
+"""
+Scrape GitHub activity in key stakeholder GitHub organizations for
+all of 2i2c's team members. Store them in a local CSV file that is
+used in visualization notebooks to plot activity over time.
+"""
 from github_activity import get_activity
 from datetime import datetime, timedelta
 from zoneinfo import ZoneInfo
@@ -30,11 +35,10 @@
 ##
 # Determine which dates we need to grab new data
 ##
-# We only want the last 3 months, so figure out our start and end dates
-# If we already have data, then the start date is:
-#    whatever is later of (last data date, and today - 3 months)
 # Use two quarters of data
-N_DAYS = 180
+N_DAYS = 182
+# Use last year of data (for larger reports and grants)
+# N_DAYS = 365 + 90
 today = datetime.now(tz=ZoneInfo("UTC"))
 time_window_begin = today - timedelta(days=N_DAYS)
 time_window_end = today

diff --git a/book/scripts/download_hub_activity.py b/book/scripts/download_hub_activity.py
@@ -1,5 +1,5 @@
 """
-Download activer user data from each of our JupyterHubs and save them to a CSV.
+Download active user data from each of our JupyterHubs and save them to a CSV.
 This uses the cluster and hub data in our config folder, and grabs the active users
 data from the `metrics/` endpoint of each JupyterHub we run.
 

diff --git a/book/upstream.md b/book/upstream.md
@@ -5,14 +5,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.16.1
 kernelspec:
   display_name: Python 3 (ipykernel)
   language: python
   name: python3
-mystnb:
-  remove_code_source: true
-  output_stderr: remove
 ---
 
 # Upstream community activity
@@ -63,7 +60,7 @@ if path_data.exists():
     DATETIME_COLUMNS = ["createdAt", "updatedAt", "closedAt"]
     data = pd.read_csv(path_data, parse_dates=DATETIME_COLUMNS)
 else:
-    print("No data found, please run `python ../scripts/download_github_data.py`")
+    print("No data found, please run `python scripts/download_github_data.py`")
 ```
 
 ```{code-cell} ipython3
@@ -147,13 +144,17 @@ def visualize_over_time(df, on="updatedAt", title=""):
     return ch
 ```
 
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
 ## Comments by a 2i2c team member
 
 Comments are a reflection of where we're participating in conversations, discussions, brainstorming, guiding others, etc. They are a reflection of "overall activity" because comments tend to happen everywhere, and may not be associated with a specific change to the code.
 
-```{code-cell} ipython3
-visualize_over_time(comments.query("author in @team"), title="Comments made by a team member, over time")
-```
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+visualize_over_time(comments, title="Comments made by a team member, over time")
+
++++
 
 Now we break it down by repository to visualize where this activity has been directed.
 
@@ -165,37 +166,52 @@ Click a bar to show a GitHub search that roughly corresponds to the underlying d
 visualize_by_org_repo(comments, kind="commenter", title="Comments by a team member, by repository.")
 ```
 
-+++ {"tags": []}
++++ {"editable": true, "slideshow": {"slide_type": ""}}
 
 ## Issues opened by team members
 
 This shows issues that a 2i2c team member has opened over time.
 This gives an idea of where we are noticing issues and suggesting improvements in upstream repositories.
 
 ```{code-cell} ipython3
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
 issues = data.loc[["issues/" in ii for ii in data["url"].values]]
 issuesByUs = issues.dropna(subset="createdAt").query("author in @team")
-visualize_over_time(issuesByUs,on="closedAt", title="Issues opened by a team member, over time")
+visualize_over_time(issuesByUs, on="closedAt", title="Issues opened by a team member, over time")
 ```
 
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
 Now we break it down by repository to visualize where this activity has been directed.
 
 ```{tip}
 Click a bar to show a GitHub search that roughly corresponds to the underlying data.
 ```
 
 ```{code-cell} ipython3
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
 visualize_by_org_repo(issuesByUs, "Issues opened by a team member, by repository", kind="author")
 ```
 
-+++ {"tags": []}
-
 ## Merged PRs authored by team members
 
 Pull Requests that were authored by a 2i2c team member, and merged by anyone.
 This gives an idea of where we're committing code, documentation, and team policy improvements.
 
 ```{code-cell} ipython3
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
 authoredByUs = data.dropna(subset="closedAt").query("author in @team")
 visualize_over_time(authoredByUs, on="closedAt", title="PRs authored by a team member that were merged, over time")
 ```
@@ -226,5 +242,10 @@ Click a bar to show a GitHub search that roughly corresponds to the underlying d
 ```
 
 ```{code-cell} ipython3
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
 visualize_by_org_repo(mergedByUs, title="PRs merged by a team member, by repository")
 ```
diff --git a/noxfile.py b/noxfile.py
@@ -1,5 +1,6 @@
 import nox
 from shlex import split
+import os
 
 nox.options.reuse_existing_virtualenvs = True
 
@@ -11,7 +12,6 @@ def lab(session):
 @nox.session
 def docs(session):
     session.install('-r', 'requirements.txt')
-
     if "live" in session.posargs:
       session.install("sphinx-autobuild")
       session.run(*split("sphinx-autobuild -b dirhtml book book/_build/dirhtml --port 0"))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -137,3 +137,4 @@ book/data/hub-activity.csv

		# A place to manually store data we use as part of data updating
		book/scripts/_data/
		_build