-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathParseInput.py
236 lines (198 loc) · 8.18 KB
/
ParseInput.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import datetime
import os
import re
import ffmpeg
import pafy
import requests
import yt_dlp
def parser(list_of_text):
"""
Read input from raw text and return the links and time
"""
# Youtube link regex
yt_link = re.compile(r"http(s)?:\/\/(www\.)?youtu.*")
pron_link = re.compile(r".*pornhub.*")
pic_link = re.compile(r"^http(s)?:\/\/.*jpg.*")
pics = [link.split() for link in list_of_text if re.match(pic_link, link)]
found_yt_links = [line.split()
for line in list_of_text if re.match(yt_link, line)]
found_pron = [line.split()
for line in list_of_text if re.match(pron_link, line)]
joined_links = found_yt_links + found_pron
return joined_links, pics
def hasNumbers(inputString):
"""
Function that returns true if a string contains a number
"""
return any(char.isdigit() for char in inputString)
def tedoius_time(time_string):
"""
Small function to change time format.
Used for make_time func
"""
start = ['start', 'begin', 'beginning', 'head', 'first']
end = ['slut', 'end', 'tail', 'finish',
'finito', 'fin', 'done', 'finished']
if time_string.lower() in start:
time_string = "00:00:00"
# We need this exact string for later
elif time_string.lower() in end:
return time_string
elif len(time_string) == 1:
time_string = f"00:00:0{time_string}"
elif len(time_string) == 2:
time_string = f"00:00:{time_string}"
elif len(time_string) == 3:
time_string = f"00:00{time_string}"
elif len(time_string) == 4:
time_string = f"00:0{time_string}"
elif len(time_string) == 5:
time_string = f"00:{time_string}"
elif len(time_string) == 6:
time_string = f"00{time_string}"
elif len(time_string) == 7:
time_string = f"0{time_string}"
elif len(time_string) > 8:
raise('Time string too long!')
return time_string
def make_time(parsed_file):
"""
Function that makes sure the time format is in 00:00:00
Else changes the time format into that
Takes the output from the parser(filename) func
"""
# Different ways to say end
end = ['slut', 'end', 'tail', 'finish',
'finito', 'fin', 'done', 'finished']
whole_clip = []
holder_list = parsed_file
for line in holder_list[:]:
try:
if not hasNumbers(line[1]):
whole_clip.append(line[0])
holder_list.remove(line)
except IndexError:
whole_clip.append(line[0])
holder_list.remove(line)
# Split time based on dash character
split_times = [line[1].split("-") for line in holder_list]
links = [line[0] for line in holder_list]
# Replace . with : for datetime calculations
for time in split_times:
time[0] = time[0].replace(".", ":")
time[1] = time[1].replace(".", ":")
# Change the time into the correct format by tedious if else statements. Assumes at least 1 character that is a second.
# Checks the start time. If time is "start" change it to 00:00:00.
time[0] = tedoius_time(time[0])
time[1] = tedoius_time(time[1])
# Add or subtract one second for good key frame
add_sub = datetime.timedelta(seconds=1)
# Subtract one second at the start of the interval (if possible) and add one at the end.
# To get the perfect keyframe
for time in split_times:
# Split the times into hh, mm, ss
first = time[0].split(":")
if not time[1].lower() in end:
last = time[1].split(":")
# Make a datetime object so we can perform calculations (date is irrelevant)
starts = datetime.datetime(2019, 1, 1, int(
first[0]), int(first[1]), int(first[2]))
if last:
ends = datetime.datetime(2019, 1, 1, int(
last[0]), int(last[1]), int(last[2]))
# Perform calculations and if start is already at 00:00:00 do nothing
starts -= add_sub
start_delta = str(starts).split()[1]
start_delta = start_delta.split(":")
# For ffmpeg you give start time and then run time (not end time)
offset = datetime.timedelta(hours=int(start_delta[0]), minutes=int(
start_delta[1]), seconds=int(start_delta[2]))
if last:
ends += add_sub
# This is how long the clip should be
ends -= offset
# Assign new intervals
time[1] = str(ends).split()[1]
starts = str(starts).split()
if starts[1] != "23:59:59":
time[0] = starts[1]
# Zip the two lists together. But return a list of this, since a zip object can only be used once.
zipped = zip(links, split_times)
return list(zipped), whole_clip
def download_whole(link, playlist):
"""
Function that downloads a whole video when no interval is supplied
Downloaded to the same place where yt_vids is saved to (from save_link_time func)
"""
SAVE_PATH = 'content'
if playlist:
ydl_opts = {"nocheckcertificate": True, "noplaylist": False, "ignoreerrors": True,
'outtmpl': f'{SAVE_PATH}/%(title)s.%(ext)s',
'format': 'bestvideo[ext=mp4][vcodec!*=av01]+bestaudio[ext=m4a]'}
else:
ydl_opts = {"nocheckcertificate": True, "noplaylist": True,
'outtmpl': f'{SAVE_PATH}/%(title)s.%(ext)s',
'format': 'bestvideo[ext=mp4][vcodec!*=av01]+bestaudio[ext=m4a]'}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([link])
except yt_dlp.utils.ExtractorError or yt_dlp.utils.DownloadError:
print(f"Couldn't download {link}")
def download_interval(link):
"""
Function to download videos in specified intervals
Takes a list (link) and a path as inputs
"""
end = ['slut', 'end', 'tail', 'finish',
'finito', 'fin', 'done', 'finished']
try:
video = pafy.new(link[0], ydl_opts={
'nocheckcertificate': True, "noplaylist": True})
# Only downloads the video if the video hasn't been downloaded before
if not os.path.exists(os.path.join("content", f"{video.title}.mp4")):
video_s = video.getbestvideo()
# TODO: add a way to get the second best stream (third etc.) when an error occurs using Pafy.videostreams and going through the list
video_a = video.getbestaudio()
# Checks if the end point is a string
if link[1][1].lower() in end:
# Where is the stream, where should we start, how long should it run
mp4_vid = ffmpeg.input(
video_s.url, ss=link[1][0], t=video.duration)
mp4_aud = ffmpeg.input(
video_a.url, ss=link[1][0], t=video.duration)
else:
# Where is the stream, where should we start, how long should it run
mp4_vid = ffmpeg.input(
video_s.url, ss=link[1][0], t=link[1][1])
mp4_aud = ffmpeg.input(
video_a.url, ss=link[1][0], t=link[1][1])
# Do the processing
try:
(
ffmpeg
.concat(
# Specify what you want from the streams (v for video and a for audio)
mp4_vid['v'],
mp4_aud['a'],
# One video stream and one audio stream
v=1,
a=1
)
# Output is title of video with mp4 ending
.output(os.path.join("content", f'{video.title}.mp4'))
.run()
)
except TypeError as e:
print(f"An error occurred e 0: {e}")
except ffmpeg._run.Error as e:
print(f"An error occurred e 1: {e}")
except Exception as e:
print(f"I couldn't download {link} due to: {e}")
def download_pics(pics_link):
"""
Function to download pictures from the input sequence
"""
r = requests.get(pics_link[0])
name = pics_link[0].split('/')[-1]
with open(os.path.join("content", f"{name}"), "wb") as dl:
dl.write(r.content)