From d46ebc0044d535b59f97d241d36785439c5284b5 Mon Sep 17 00:00:00 2001 From: LuluDavid Date: Fri, 29 Oct 2021 15:41:17 +0200 Subject: [PATCH 1/4] Adding a no-overwrites option that avoids re-downloading a song if it already exists locally --- spotify_dl/spotify_dl.py | 5 ++++- spotify_dl/youtube.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/spotify_dl/spotify_dl.py b/spotify_dl/spotify_dl.py index 998b2744..cb1e454f 100755 --- a/spotify_dl/spotify_dl.py +++ b/spotify_dl/spotify_dl.py @@ -35,6 +35,8 @@ def spotify_dl(): help='Don\'t convert downloaded songs to mp3') parser.add_argument('-s', '--scrape', action="store", help="Use HTML Scraper for YouTube Search", default=True) + parser.add_argument('-n', '--no_overwrites', action='store_true', + help="Do not download if the target file already exists", default=False) parser.add_argument('-V', '--verbose', action='store_true', help='Show more information on what''s happening.') parser.add_argument('-v', '--version', action='store_true', @@ -88,7 +90,8 @@ def spotify_dl(): if args.keep_playlist_order: file_name_f = playlist_num_filename - download_songs(songs, save_path, args.format_str, args.skip_mp3, args.keep_playlist_order, file_name_f) + download_songs(songs, save_path, args.format_str, args.skip_mp3, args.keep_playlist_order, args.no_overwrites, + file_name_f) if __name__ == '__main__': diff --git a/spotify_dl/youtube.py b/spotify_dl/youtube.py index 84cc5a9b..c3baea02 100644 --- a/spotify_dl/youtube.py +++ b/spotify_dl/youtube.py @@ -20,7 +20,7 @@ def playlist_num_filename(song): def download_songs(songs, download_directory, format_string, skip_mp3, - keep_playlist_order=False, file_name_f=default_filename): + keep_playlist_order=False, no_overwrites=False, file_name_f=default_filename): """ Downloads songs from the YouTube URL passed to either current directory or download_directory, is it is passed. :param songs: Dictionary of songs and associated artist @@ -28,6 +28,7 @@ def download_songs(songs, download_directory, format_string, skip_mp3, :param format_string: format string for the file conversion :param skip_mp3: Whether to skip conversion to MP3 :param keep_playlist_order: Whether to keep original playlist ordering. Also, prefixes songs files with playlist num + :param no_overwrites: Whether we should not download the song if it already exists :param file_name_f: optional func(song) -> str that returns a filename for the download (without extension) """ log.debug(f"Downloading to {download_directory}") @@ -49,6 +50,8 @@ def download_songs(songs, download_directory, format_string, skip_mp3, '-metadata', 'artist=' + song.get('artist'), '-metadata', 'album=' + song.get('album')] } + if no_overwrites: + ydl_opts['no-overwrites'] = True if not skip_mp3: mp3_postprocess_opts = { 'key': 'FFmpegExtractAudio', From a8a7863bc9c8c6dbe0a58f54d29e821d11ce1b77 Mon Sep 17 00:00:00 2001 From: LuluDavid Date: Fri, 29 Oct 2021 16:29:52 +0200 Subject: [PATCH 2/4] As yt-dlp does not deal with mp3 files (but only source video files), we implement our own option instead of delegating --- spotify_dl/spotify_dl.py | 5 ++-- spotify_dl/youtube.py | 52 +++++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/spotify_dl/spotify_dl.py b/spotify_dl/spotify_dl.py index cb1e454f..cc3f3e92 100755 --- a/spotify_dl/spotify_dl.py +++ b/spotify_dl/spotify_dl.py @@ -35,8 +35,9 @@ def spotify_dl(): help='Don\'t convert downloaded songs to mp3') parser.add_argument('-s', '--scrape', action="store", help="Use HTML Scraper for YouTube Search", default=True) - parser.add_argument('-n', '--no_overwrites', action='store_true', - help="Do not download if the target file already exists", default=False) + parser.add_argument('-w', '--no-overwrites', action='store_true', + help="Whether we should avoid overwriting the target audio file if it already exists", + default=False) parser.add_argument('-V', '--verbose', action='store_true', help='Show more information on what''s happening.') parser.add_argument('-v', '--version', action='store_true', diff --git a/spotify_dl/youtube.py b/spotify_dl/youtube.py index c3baea02..c6b18248 100644 --- a/spotify_dl/youtube.py +++ b/spotify_dl/youtube.py @@ -28,9 +28,10 @@ def download_songs(songs, download_directory, format_string, skip_mp3, :param format_string: format string for the file conversion :param skip_mp3: Whether to skip conversion to MP3 :param keep_playlist_order: Whether to keep original playlist ordering. Also, prefixes songs files with playlist num - :param no_overwrites: Whether we should not download the song if it already exists + :param no_overwrites: Whether we should avoid overwriting the song if it already exists :param file_name_f: optional func(song) -> str that returns a filename for the download (without extension) """ + overwrites = not no_overwrites log.debug(f"Downloading to {download_directory}") for song in songs: query = f"{song.get('artist')} - {song.get('name')} Lyrics".replace(":", "").replace("\"", "") @@ -50,8 +51,6 @@ def download_songs(songs, download_directory, format_string, skip_mp3, '-metadata', 'artist=' + song.get('artist'), '-metadata', 'album=' + song.get('album')] } - if no_overwrites: - ydl_opts['no-overwrites'] = True if not skip_mp3: mp3_postprocess_opts = { 'key': 'FFmpegExtractAudio', @@ -69,25 +68,30 @@ def download_songs(songs, download_directory, format_string, skip_mp3, continue if not skip_mp3: - try: - song_file = MP3(path.join(f"{file_path}.mp3"), ID3=EasyID3) - except mutagen.MutagenError as e: - log.debug(e) - print('Failed to download: {}, please ensure YouTubeDL is up-to-date. '.format(query)) - continue - song_file['date'] = song.get('year') - if keep_playlist_order: - song_file['tracknumber'] = str(song.get('playlist_num')) + mp3filename = f"{file_path}.mp3" + mp3file_path = path.join(mp3filename) + if overwrites or not path.exists(mp3file_path): + try: + song_file = MP3(mp3file_path, ID3=EasyID3) + except mutagen.MutagenError as e: + log.debug(e) + print('Failed to download: {}, please ensure YouTubeDL is up-to-date. '.format(query)) + continue + song_file['date'] = song.get('year') + if keep_playlist_order: + song_file['tracknumber'] = str(song.get('playlist_num')) + else: + song_file['tracknumber'] = str(song.get('num')) + '/' + str(song.get('num_tracks')) + song_file['genre'] = song.get('genre') + song_file.save() + song_file = MP3(mp3filename, ID3=ID3) + if song.get('cover') is not None: + song_file.tags['APIC'] = APIC( + encoding=3, + mime='image/jpeg', + type=3, desc=u'Cover', + data=urllib.request.urlopen(song.get('cover')).read() + ) + song_file.save() else: - song_file['tracknumber'] = str(song.get('num')) + '/' + str(song.get('num_tracks')) - song_file['genre'] = song.get('genre') - song_file.save() - song_file = MP3(f"{file_path}.mp3", ID3=ID3) - if song.get('cover') is not None: - song_file.tags['APIC'] = APIC( - encoding=3, - mime='image/jpeg', - type=3, desc=u'Cover', - data=urllib.request.urlopen(song.get('cover')).read() - ) - song_file.save() + print('File {} already exists, we do not overwrite it '.format(mp3filename)) From 1842bcb80ee235310699ec5a5fc5b55d344894a0 Mon Sep 17 00:00:00 2001 From: LuluDavid Date: Mon, 10 Jan 2022 13:22:35 +0100 Subject: [PATCH 3/4] Corrected urllib's insecurity --- spotify_dl/youtube.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/spotify_dl/youtube.py b/spotify_dl/youtube.py index c6b18248..06def6e1 100644 --- a/spotify_dl/youtube.py +++ b/spotify_dl/youtube.py @@ -85,13 +85,19 @@ def download_songs(songs, download_directory, format_string, skip_mp3, song_file['genre'] = song.get('genre') song_file.save() song_file = MP3(mp3filename, ID3=ID3) - if song.get('cover') is not None: - song_file.tags['APIC'] = APIC( - encoding=3, - mime='image/jpeg', - type=3, desc=u'Cover', - data=urllib.request.urlopen(song.get('cover')).read() - ) + cover = song.get('cover') + if cover is not None: + if cover.lower().startswith('http'): + req = urllib.request.Request(cover) + else: + raise ValueError from None + with urllib.request.urlopen(req) as resp: + song_file.tags['APIC'] = APIC( + encoding=3, + mime='image/jpeg', + type=3, desc=u'Cover', + data=resp.read() + ) song_file.save() else: print('File {} already exists, we do not overwrite it '.format(mp3filename)) From 4683f9be4856c685d77589119c4b314c224bb1b9 Mon Sep 17 00:00:00 2001 From: LuluDavid Date: Mon, 10 Jan 2022 14:15:12 +0100 Subject: [PATCH 4/4] Disable bandit warning on the problematic line --- spotify_dl/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spotify_dl/youtube.py b/spotify_dl/youtube.py index 06def6e1..3efc1d77 100644 --- a/spotify_dl/youtube.py +++ b/spotify_dl/youtube.py @@ -91,7 +91,7 @@ def download_songs(songs, download_directory, format_string, skip_mp3, req = urllib.request.Request(cover) else: raise ValueError from None - with urllib.request.urlopen(req) as resp: + with urllib.request.urlopen(req) as resp: # nosec song_file.tags['APIC'] = APIC( encoding=3, mime='image/jpeg',