diff --git a/examples/gaussian_processes/GP-Births.ipynb b/examples/gaussian_processes/GP-Births.ipynb
index 9c8877052..c29eac1e2 100644
--- a/examples/gaussian_processes/GP-Births.ipynb
+++ b/examples/gaussian_processes/GP-Births.ipynb
@@ -74,6 +74,7 @@
"import pandas as pd\n",
"import preliz as pz\n",
"import pymc as pm\n",
+ "import pytensor.tensor as pt\n",
"import seaborn as sns\n",
"import xarray as xr\n",
"\n",
@@ -698,10 +699,231 @@
"date = data_df[\"date\"].to_numpy()\n",
"year = data_df[\"year\"].to_numpy()\n",
"day_of_week_idx, day_of_week = data_df[\"day_of_week\"].factorize(sort=True)\n",
+ "day_of_week_no_monday = day_of_week[day_of_week != 1]\n",
"day_of_year2_idx, day_of_year2 = data_df[\"day_of_year2\"].factorize(sort=True)\n",
"births_relative100 = data_df[\"births_relative100\"].to_numpy()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "15fb0760",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " month | \n",
+ " day | \n",
+ " births | \n",
+ " day_of_year | \n",
+ " day_of_week | \n",
+ " id | \n",
+ " day_of_year2 | \n",
+ " date | \n",
+ " births_relative100 | \n",
+ " obs | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 8486 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1969-01-01 | \n",
+ " 87.947483 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 9002 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1969-01-02 | \n",
+ " 93.295220 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 9542 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1969-01-03 | \n",
+ " 98.891690 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 8960 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1969-01-04 | \n",
+ " 92.859939 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 8390 | \n",
+ " 5 | \n",
+ " 7 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1969-01-05 | \n",
+ " 86.952555 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 9560 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1969-01-06 | \n",
+ " 99.078239 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " 9738 | \n",
+ " 7 | \n",
+ " 2 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1969-01-07 | \n",
+ " 100.923001 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 8 | \n",
+ " 9734 | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 8 | \n",
+ " 8 | \n",
+ " 1969-01-08 | \n",
+ " 100.881546 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 9 | \n",
+ " 9434 | \n",
+ " 9 | \n",
+ " 4 | \n",
+ " 9 | \n",
+ " 9 | \n",
+ " 1969-01-09 | \n",
+ " 97.772396 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1969 | \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 10042 | \n",
+ " 10 | \n",
+ " 5 | \n",
+ " 10 | \n",
+ " 10 | \n",
+ " 1969-01-10 | \n",
+ " 104.073606 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year month day births day_of_year day_of_week id day_of_year2 \\\n",
+ "0 1969 1 1 8486 1 3 1 1 \n",
+ "1 1969 1 2 9002 2 4 2 2 \n",
+ "2 1969 1 3 9542 3 5 3 3 \n",
+ "3 1969 1 4 8960 4 6 4 4 \n",
+ "4 1969 1 5 8390 5 7 5 5 \n",
+ "5 1969 1 6 9560 6 1 6 6 \n",
+ "6 1969 1 7 9738 7 2 7 7 \n",
+ "7 1969 1 8 9734 8 3 8 8 \n",
+ "8 1969 1 9 9434 9 4 9 9 \n",
+ "9 1969 1 10 10042 10 5 10 10 \n",
+ "\n",
+ " date births_relative100 obs \n",
+ "0 1969-01-01 87.947483 0 \n",
+ "1 1969-01-02 93.295220 1 \n",
+ "2 1969-01-03 98.891690 2 \n",
+ "3 1969-01-04 92.859939 3 \n",
+ "4 1969-01-05 86.952555 4 \n",
+ "5 1969-01-06 99.078239 5 \n",
+ "6 1969-01-07 100.923001 6 \n",
+ "7 1969-01-08 100.881546 7 \n",
+ "8 1969-01-09 97.772396 8 \n",
+ "9 1969-01-10 104.073606 9 "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_df.head(10)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "c9e67e4e",
@@ -712,7 +934,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 14,
"id": "41ce3766",
"metadata": {},
"outputs": [],
@@ -740,7 +962,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 15,
"id": "b9112864",
"metadata": {},
"outputs": [
@@ -750,7 +972,7 @@
"Text(0.5, 1.0, 'Relative Births in the USA in 1969 - 1988\\nTransformed Data')"
]
},
- "execution_count": 14,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
@@ -794,12 +1016,22 @@
"\n",
"1. **Global trend.** We use a Gaussian process with an exponential quadratic kernel.\n",
"2. **Periodicity over years**: We use a Gaussian process with a periodic kernel. Observe that, since we are working on the normalized scale, the period should be `period=365.25 / obs_std` (and not `period=365.25` !).\n",
- "3. **Weekly seasonality**: We use a zero-sum-normal distribution to capture the relative difference across weekdays.\n",
+ "3. **Weekly seasonality**: We use a normal distribution on the day of the week one-hot-encoded values. As the data is standardized, in particular centered around zero, we do not need to add an intercept term. In addition, we set the coefficient of Monday to zero to avoid identifiability issues.\n",
"4. **Likelihood**: We use a Gaussian distribution.\n",
"\n",
"For all of the Gaussian processes components we use the Hilbert Space Gaussian Process (HSGP) approximation."
]
},
+ {
+ "cell_type": "markdown",
+ "id": "7f1ff063",
+ "metadata": {},
+ "source": [
+ "```{note}\n",
+ "This model corresponds to `Model 3: Slow trend + yearly seasonal trend + day of week` in {cite:p}`vehtari2022Birthdays`.\n",
+ "```"
+ ]
+ },
{
"cell_type": "markdown",
"id": "cfe8f408",
@@ -812,7 +1044,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 16,
"id": "cb235fe3",
"metadata": {},
"outputs": [
@@ -822,7 +1054,7 @@
"Text(0.5, 1.0, 'Prior distribution for the global trend Gaussian process')"
]
},
- "execution_count": 15,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
},
@@ -873,7 +1105,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 17,
"id": "260b21b2",
"metadata": {},
"outputs": [
@@ -886,254 +1118,278 @@
"\n",
"\n",
- "