-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
164 lines (131 loc) · 5.64 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# For this section of the project, I watched a tutorial by the YouTuber "Analytic with Adam"
# URL - https://www.youtube.com/watch?v=0FtcHjI5lmw to get a solid understanding on how the YouTube API requires call
# requests for comments. I did additional research in the actual GitHub repo for the Google api python client,
# URL for GitHub Repo - https://github.com/googleapis/google-api-python-client#readme
# URL for actual YouTube API Docs - https://developers.google.com/youtube/v3/docs
# I also made use of a Geeks for geeks article on writing to csv files in python
# URL - https://www.geeksforgeeks.org/writing-csv-files-in-python/#
# need to install demoji and googleapiclient in the
# via pip install demoji
# via pip install --upgrade google-api-python-client
from googleapiclient.discovery import build
import csv
import demoji
import sys
# this function finds and removes all emojis in a string, I use it because you cannot write emojis to a csv file
def clear_emojis(string):
for character in string:
if character == ',':
string = string.replace(character, ' ')
dem = demoji.findall(string)
for item in dem.keys():
string = string.replace(item, '')
return string
# This is my personal YouTube API Key, please be careful what videos you use so that I don't run out of API tokens
def video_data(argv):
api_key = 'AIzaSyD7RmH3pB12AhfaYUfo39gmoJT7gBpDLac'
video_id = argv
# empty list for storing reply
replies = []
# initializing statistic variables
title = ""
viewCount = 0
totLikes = 0
numComments = 0
filename = "test.csv"
# creating YouTube resource object
youtube = build('youtube', 'v3',
developerKey=api_key)
# getting general video statistics,
video_stats = youtube.videos().list(
part="snippet,statistics",
id=video_id
).execute()
# this try-catch adds extra input validation, if input passes prev regex but isn't and actual id, it'll get caught
# here
try:
if video_stats['items'][0]['kind'] != 'youtube#video':
print("Not a correct YouTube video ID")
# this will erase the current data in the csv
badCSV = open(filename, 'w')
badCSV.truncate(0)
badCSV.close()
return
except IndexError:
print("Not a YouTube video ID")
badCSV = open(filename, 'w')
badCSV.truncate(0)
badCSV.close()
return
# extracting statistics from the video_stats object
for item in video_stats['items']:
title = clear_emojis(item['snippet']['title'])
viewCount = item['statistics']['viewCount']
totLikes = item['statistics']['likeCount']
try:
numComments = item['statistics']['commentCount']
except(KeyError):
print("This video does not have comments enabled")
badCSV = open(filename, 'w')
badCSV.truncate(0)
badCSV.close()
print(title, " ", viewCount, " ", totLikes, " ", 0)
return
print(title, " ", viewCount, " ", totLikes, " ", numComments)
# retrieve youtube video comments
video_response = youtube.commentThreads().list(
part='snippet,replies',
videoId=video_id,
maxResults=100
).execute()
# first row of csv file
statsRows = [[title, viewCount, totLikes, numComments, ]]
# writing to csv file
with open(filename, 'w') as csvfile:
# creating a csv writer object
csvwriter = csv.writer(csvfile)
# writing the video stats
csvwriter.writerows(statsRows)
# iterate through all the comments
while video_response:
# extracting required info from each result object
for item in video_response['items']:
# Extracting comments
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
clear_emojis(comment)
commentLikes = item['snippet']['topLevelComment']['snippet']['likeCount']
# counting number of reply of comment
replyCount = item['snippet']['totalReplyCount']
# if reply exists
if replyCount > 0:
# iterate through all reply
for reply in item['replies']['comments']:
# Extract reply
reply = reply['snippet']['textDisplay']
# cleanReply = clear_emojis(reply)
# Store reply in list
replies.append(reply)
# create a list that will be written to the csv
dataRows = [[clear_emojis(comment), commentLikes, replyCount, ]]
# need a try-catch for when demoji cannot clear all the emojies in the comment
try:
csvwriter = csv.writer(csvfile)
csvwriter.writerows(dataRows)
except(UnicodeEncodeError):
# replaces the line with an emoji with a blank line
dataRows = []
# clear the list of replies
replies = []
# Again repeat
if 'nextPageToken' in video_response:
video_response = youtube.commentThreads().list(
part='snippet,replies',
videoId=video_id,
maxResults=100,
pageToken=video_response['nextPageToken']
).execute()
else:
break
if __name__ == "__main__":
# Call function with command line argument 1 which will be the YouTube video ID
video_data(sys.argv[1])