-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
95 lines (77 loc) · 2.59 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import requests
import time
import json
from collections import defaultdict
import os
# BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# DATA_FILE_DIR = os.path.join(BASE_DIR, 'my-class')
API_KEY = os.getenv("API_KEY")
if API_KEY is None:
print("Key not found")
exit
CHANNELS_API_URL = "https://www.googleapis.com/youtube/v3/channels"
PLAYLIST_API_URL = "https://www.googleapis.com/youtube/v3/playlistItems"
OUTPUT_FIELDS = ["video_id", "title", "video_published_at"]
last_video = "Samveda 2021-22 | Day-01 | 10th Class | First Language Kannada | Prose-1 | Yuddha "
channels_params = {
"key": API_KEY,
"part": "contentDetails",
}
playlist_params = {
"key": API_KEY,
"part": "snippet",
"maxResults": 50,
}
channel_id = "UCbdMik2cV8pea1jcWdX_CYA"
channels_params.update({"id": channel_id})
r = requests.get(
CHANNELS_API_URL,
params=channels_params,
).json()
uploads_id = r["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
playlist_params.update({"playlistId": uploads_id})
r = requests.get(
PLAYLIST_API_URL,
params=playlist_params,
).json()
YOUTUBE_URL_PREFIX = "https://www.youtube.com/watch?v="
dataset = []
for video in r["items"]:
if "Samveda 2021-22" in video["snippet"]["title"] and "Urdu" not in video["snippet"]["title"]:
title = video["snippet"]["title"]
date = video["snippet"]["publishedAt"]
url = YOUTUBE_URL_PREFIX+video["snippet"]["resourceId"]["videoId"]
dataset.append([date, title, url])
pageToken = r.get("nextPageToken")
done = False
while pageToken:
playlist_params.update({"pageToken": pageToken})
r = requests.get(
PLAYLIST_API_URL,
params=playlist_params,
).json()
for video in r["items"]:
title = ""
if "Samveda 2021-22" in video["snippet"]["title"] and "Urdu" not in video["snippet"]["title"]:
title = video["snippet"]["title"]
date = video["snippet"]["publishedAt"]
url = YOUTUBE_URL_PREFIX+video["snippet"]["resourceId"]["videoId"]
# print(date, title, url)
dataset.append([date, title, url])
if last_video in title:
done = True
break
if done:
break
pageToken = r.get("nextPageToken")
time.sleep(0.1)
dataset.reverse()
class_list = ["Classes 1-3", "1-3Class", "4th Class", "5th Class", "6th Class", "7th Class", "8th Class", "9th Class", "10th Class"]
res = defaultdict(list)
for entry in dataset:
for standard in class_list:
if standard in entry[1]:
res[standard].append(entry)
file_path = './data/data.json'
with open(file_path, 'w') as f:
json.dump(res, f, indent=2)