-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrecommendation.py
81 lines (59 loc) · 3.97 KB
/
recommendation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
import numpy as np
import json
import re
import sys
import itertools
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
def generate_playlist_feature(complete_feature_set, playlist_df, weight_factor):
"""
Summarize a user's playlist into a single vector
Parameters:
complete_feature_set (pandas dataframe): Dataframe which includes all of the features for the spotify songs
playlist_df (pandas dataframe): playlist dataframe
weight_factor (float): float value that represents the recency bias. The larger the recency bias, the most priority recent songs get. Value should be close to 1.
Returns:
playlist_feature_set_weighted_final (pandas series): single feature that summarizes the playlist
complete_feature_set_nonplaylist (pandas dataframe):
"""
complete_feature_set_playlist = complete_feature_set[complete_feature_set['id'].isin(playlist_df['id'].values)]#.drop('id', axis = 1).mean(axis =0)
complete_feature_set_playlist = complete_feature_set_playlist.merge(playlist_df[['id','date_added']], on = 'id', how = 'inner')
complete_feature_set_nonplaylist = complete_feature_set[~complete_feature_set['id'].isin(playlist_df['id'].values)]#.drop('id', axis = 1)
playlist_feature_set = complete_feature_set_playlist.sort_values('date_added',ascending=False)
most_recent_date = playlist_feature_set.iloc[0,-1]
for ix, row in playlist_feature_set.iterrows():
playlist_feature_set.loc[ix,'months_from_recent'] = int((most_recent_date.to_pydatetime() - row.iloc[-1].to_pydatetime()).days / 30)
playlist_feature_set['weight'] = playlist_feature_set['months_from_recent'].apply(lambda x: weight_factor ** (-x))
playlist_feature_set_weighted = playlist_feature_set.copy()
#print(playlist_feature_set_weighted.iloc[:,:-4].columns)
playlist_feature_set_weighted.update(playlist_feature_set_weighted.iloc[:,:-4].mul(playlist_feature_set_weighted.weight,0))
playlist_feature_set_weighted_final = playlist_feature_set_weighted.iloc[:, :-4]
#playlist_feature_set_weighted_final['id'] = playlist_feature_set['id']
return playlist_feature_set_weighted_final.sum(axis = 0), complete_feature_set_nonplaylist
#LATER ON MAIN TOO
#complete_feature_set_playlist_vector_EDM, complete_feature_set_nonplaylist_EDM = generate_playlist_feature(complete_feature_set, playlist_EDM, 1.09)
#complete_feature_set_playlist_vector_chill, complete_feature_set_nonplaylist_chill = generate_playlist_feature(complete_feature_set, playlist_chill, 1.09)
def generate_playlist_recos(df, features, nonplaylist_features):
"""
Pull songs from a specific playlist.
Parameters:
df (pandas dataframe): spotify dataframe
features (pandas series): summarized playlist feature
nonplaylist_features (pandas dataframe): feature set of songs that are not in the selected playlist
Returns:
non_playlist_df_top_40: Top 40 recommendations for that playlist
"""
non_playlist_df = df[df['id'].isin(nonplaylist_features['id'].values)]
non_playlist_df['sim'] = cosine_similarity(nonplaylist_features.drop('id', axis = 1).values, features.values.reshape(1, -1))[:,0]
non_playlist_df_top_40 = non_playlist_df.sort_values('sim',ascending = False).head(40)
non_playlist_df_top_40['url'] = non_playlist_df_top_40['id'].apply(lambda x: sp.track(x)['album']['images'][1]['url'])
return non_playlist_df_top_40
#later on main MAYBE. not sure
#edm_top40 = generate_playlist_recos(spotify_df, complete_feature_set_playlist_vector_EDM, complete_feature_set_nonplaylist_EDM)