-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01-prepare-dataset.sh
executable file
·32 lines (22 loc) · 1 KB
/
01-prepare-dataset.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/bin/bash
source variables.inc
gcloud config set project $PROJECT
PROJECT_ID=`gcloud config get-value project`
# Prepare the Dataset
# https://cloud.google.com/retail/docs/movie-rec-tutorial#prepare_the_dataset
# Import the Dataset
wget https://files.grouplens.org/datasets/movielens/ml-latest.zip
unzip ml-latest.zip
gsutil mb gs://${PROJECT}-movielens-data
gsutil cp ml-latest/movies.csv ml-latest/ratings.csv ml-latest/links.csv \
gs://${PROJECT}-movielens-data
bq --location=$LOCATION mk --project_id=${PROJECT} --dataset "movielens"
bq load --skip_leading_rows=1 "${PROJECT}:movielens.movies" \
gs://${PROJECT}-movielens-data/movies.csv \
movieId:integer,title,genres
bq load --skip_leading_rows=1 "${PROJECT}:movielens.ratings" \
gs://${PROJECT}-movielens-data/ratings.csv \
userId:integer,movieId:integer,rating:float,time:timestamp
bq load --skip_leading_rows=1 "${PROJECT}:movielens.links" \
gs://${PROJECT}-movielens-data/links.csv \
movieId:integer,imdbId:string,tmdbId:integer