Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug sar azureml #991

Merged
merged 8 commits into from
Dec 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion notebooks/00_quick_start/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ data preparation, model building, and model evaluation by using the utility func
| [rbm](rbm_movielens.ipynb)| MovieLens | Python CPU, GPU | Utilizing the Restricted Boltzmann Machine (rbm) [4] to predict movie ratings in a Python+GPU (TensorFlow) environment.<br>
| [rlrmc](rlrmc_movielens.ipynb) | Movielens | Python CPU | Utilizing the Riemannian Low-rank Matrix Completion (RLRMC) [6] to predict movie rating in a Python+CPU environment
| [sar](sar_movielens.ipynb) | MovieLens | Python CPU | Utilizing Simple Algorithm for Recommendation (SAR) algorithm to predict movie ratings in a Python+CPU environment.
| [sar_azureml](sar_movielens_with_azureml.ipynb)| MovieLens | Python CPU | An example of how to utilize and evaluate SAR using the [Azure Machine Learning service](https://docs.microsoft.com/azure/machine-learning/service/overview-what-is-azure-ml)(AzureML). It takes the content of the [sar quickstart notebook](sar_movielens.ipynb) and demonstrates how to use the power of the cloud to manage data, switch to powerful GPU machines, and monitor runs while training a model.
| [sar_azureml](sar_movielens_with_azureml.ipynb)| MovieLens | Python CPU | An example of how to utilize and evaluate SAR using the [Azure Machine Learning service](https://docs.microsoft.com/azure/machine-learning/service/overview-what-is-azure-ml) (AzureML). It takes the content of the [sar quickstart notebook](sar_movielens.ipynb) and demonstrates how to use the power of the cloud to manage data, switch to powerful GPU machines, and monitor runs while training a model.
| [wide-and-deep](wide_deep_movielens.ipynb) | MovieLens | Python CPU, GPU | Utilizing Wide-and-Deep Model (Wide-and-Deep) [5] to predict movie ratings in a Python+GPU (TensorFlow) environment.
| [xdeepfm](xdeepfm_criteo.ipynb) | Criteo, Synthetic Data | Python CPU, GPU | Utilizing the eXtreme Deep Factorization Machine (xDeepFM) [3] to learn both low and high order feature interactions for predicting CTR, in a Python+GPU (TensorFlow) environment.

Expand Down
134 changes: 50 additions & 84 deletions notebooks/00_quick_start/sar_movielens.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,16 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"System version: 3.6.8 |Anaconda, Inc.| (default, Dec 30 2018, 01:22:34) \n",
"System version: 3.7.3 | packaged by conda-forge | (default, Jul 1 2019, 21:52:21) \n",
"[GCC 7.3.0]\n",
"Pandas version: 0.24.1\n"
"Pandas version: 0.23.4\n"
]
}
],
Expand All @@ -60,12 +60,11 @@
"sys.path.append(\"../../\")\n",
"\n",
"import logging\n",
"import time\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import papermill as pm\n",
"\n",
"from reco_utils.common.timer import Timer\n",
"from reco_utils.dataset import movielens\n",
"from reco_utils.dataset.python_splitters import python_stratified_split\n",
"from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
Expand All @@ -91,7 +90,7 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 3,
"metadata": {
"tags": [
"parameters"
Expand All @@ -115,14 +114,14 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"4.93MB [00:01, 3.46MB/s] \n"
"100%|██████████| 4.81k/4.81k [00:02<00:00, 1.90kKB/s]\n"
]
},
{
Expand Down Expand Up @@ -201,7 +200,7 @@
"4 166 346 1.0 886397596"
]
},
"execution_count": 73,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -228,7 +227,7 @@
},
{
"cell_type": "code",
"execution_count": 74,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -237,7 +236,7 @@
},
{
"cell_type": "code",
"execution_count": 75,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -299,7 +298,7 @@
},
{
"cell_type": "code",
"execution_count": 76,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -334,73 +333,47 @@
},
{
"cell_type": "code",
"execution_count": 77,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2019-05-07 11:16:40,709 INFO Collecting user affinity matrix\n",
"2019-05-07 11:16:40,715 INFO Calculating time-decayed affinities\n",
"2019-05-07 11:16:40,766 INFO Creating index columns\n",
"2019-05-07 11:16:40,782 INFO Building user affinity sparse matrix\n",
"2019-05-07 11:16:40,787 INFO Calculating item co-occurrence\n",
"2019-05-07 11:16:40,910 INFO Calculating item similarity\n",
"2019-05-07 11:16:40,910 INFO Using jaccard based similarity\n",
"2019-05-07 11:16:40,990 INFO Done training\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Took 0.284792423248291 seconds for training.\n"
"Took 0.3302565817721188 seconds for training.\n"
]
}
],
"source": [
"start_time = time.time()\n",
"\n",
"model.fit(train)\n",
"with Timer() as train_time:\n",
" model.fit(train)\n",
"\n",
"train_time = time.time() - start_time\n",
"print(\"Took {} seconds for training.\".format(train_time))"
"print(\"Took {} seconds for training.\".format(train_time.interval))"
]
},
{
"cell_type": "code",
"execution_count": 78,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2019-05-07 11:16:41,003 INFO Calculating recommendation scores\n",
"2019-05-07 11:16:41,114 INFO Removing seen items\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Took 0.1463017463684082 seconds for prediction.\n"
"Took 0.21034361701458693 seconds for prediction.\n"
]
}
],
"source": [
"start_time = time.time()\n",
"\n",
"top_k = model.recommend_k_items(test, remove_seen=True)\n",
"with Timer() as test_time:\n",
" top_k = model.recommend_k_items(test, remove_seen=True)\n",
"\n",
"test_time = time.time() - start_time\n",
"print(\"Took {} seconds for prediction.\".format(test_time))"
"print(\"Took {} seconds for prediction.\".format(test_time.interval))"
]
},
{
"cell_type": "code",
"execution_count": 79,
"execution_count": 10,
"metadata": {
"scrolled": true
},
Expand Down Expand Up @@ -435,52 +408,53 @@
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>58</td>\n",
" <td>3.049881</td>\n",
" <td>204</td>\n",
" <td>3.313306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>3.053073</td>\n",
" <td>89</td>\n",
" <td>3.280465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>318</td>\n",
" <td>3.059262</td>\n",
" <td>11</td>\n",
" <td>3.233867</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>210</td>\n",
" <td>3.095604</td>\n",
" <td>367</td>\n",
" <td>3.192575</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>96</td>\n",
" <td>3.124997</td>\n",
" <td>423</td>\n",
" <td>3.131517</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" userID itemID prediction\n",
"0 1 58 3.049881\n",
"1 1 7 3.053073\n",
"2 1 318 3.059262\n",
"3 1 210 3.095604\n",
"4 1 96 3.124997"
"0 1 204 3.313306\n",
"1 1 89 3.280465\n",
"2 1 11 3.233867\n",
"3 1 367 3.192575\n",
"4 1 423 3.131517"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "display_data"
"output_type": "execute_result"
}
],
"source": [
"display(top_k.head())"
"top_k.head()"
]
},
{
Expand All @@ -494,7 +468,7 @@
},
{
"cell_type": "code",
"execution_count": 80,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -503,7 +477,7 @@
},
{
"cell_type": "code",
"execution_count": 81,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -512,7 +486,7 @@
},
{
"cell_type": "code",
"execution_count": 82,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -521,7 +495,7 @@
},
{
"cell_type": "code",
"execution_count": 83,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -530,7 +504,7 @@
},
{
"cell_type": "code",
"execution_count": 84,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand All @@ -557,17 +531,9 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2019-05-07 11:16:42,926 INFO Calculating recommendation scores\n",
"2019-05-07 11:16:43,033 INFO Removing seen items\n"
]
},
{
"data": {
"text/html": [
Expand Down Expand Up @@ -650,7 +616,7 @@
"4 876 288 3.0 879428101 NaN"
]
},
"execution_count": 85,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -737,8 +703,8 @@
"pm.record(\"ndcg\", eval_ndcg)\n",
"pm.record(\"precision\", eval_precision)\n",
"pm.record(\"recall\", eval_recall)\n",
"pm.record(\"train_time\", train_time)\n",
"pm.record(\"test_time\", test_time)"
"pm.record(\"train_time\", train_time.interval)\n",
"pm.record(\"test_time\", test_time.interval)"
]
}
],
Expand All @@ -759,7 +725,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.3"
}
},
"nbformat": 4,
Expand Down
Loading