Remove custom YT search in favour of youtube-dl's search (#122)

SathyaBhat · web-flow · commit 7aacc4542272 · 2020-11-17T23:18:10.000+02:00
* remove custom YT search in favour of youtubedl's search fixes #115 * remove test not in use anymore * address some lint errors
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 spotipy==2.16
 google-api-python-client==1.6.2
-youtube-dl>=2015.12.23
+youtube-dl>=2020.11.17
 sentry-sdk==0.14.3
 colorama==0.4.3
 click==7.0
diff --git a/spotify_dl/constants.py b/spotify_dl/constants.py
@@ -1,8 +1,4 @@
 __all__ = ['VERSION']
 
-YOUTUBE_API_SERVICE_NAME = "youtube"
-YOUTUBE_API_VERSION = "v3"
-VIDEO = 'youtube#video'
-YOUTUBE_VIDEO_URL = 'https://www.youtube.com/watch?v='
 VERSION = '7.0.0'
 SAVE_PATH = '~/.spotifydl'
diff --git a/spotify_dl/scaffold.py b/spotify_dl/scaffold.py
@@ -12,6 +12,11 @@
 
 
 def check_for_tokens():
+    """
+    Checks if the required API keys for Spotify has been set.
+    :param name: Name to be cleaned up
+    :return string containing the cleaned name
+    """
     log.debug('Checking for tokens')
     CLIENT_ID = getenv('SPOTIPY_CLIENT_ID')
     CLIENT_SECRET = getenv('SPOTIPY_CLIENT_SECRET')
@@ -28,19 +33,4 @@ def check_for_tokens():
                 https://developer.spotify.com/my-applications
         ''')
         return False
-
-    YOUTUBE_DEV_KEY = getenv('YOUTUBE_DEV_KEY')
-    log.debug("YouTube dev key: {}".format(YOUTUBE_DEV_KEY))
-    if YOUTUBE_DEV_KEY is None:
-        print('''
-            Youtube Data API token has not been setup. You can do this by
-            setting environment variables like so:
-
-            export YOUTUBE_DEV_KEY='your-youtube-dev-key'
-
-            Generate the key from
-            https://console.developers.google.com/apis/api/youtube/overview
-            
-            Using HTML Scraper as a fallback.
-            ''')
     return True
diff --git a/spotify_dl/spotify.py b/spotify_dl/spotify.py
@@ -1,4 +1,3 @@
-import youtube_dl
 from spotify_dl.scaffold import *
 from spotify_dl.utils import sanitize
 
@@ -19,7 +18,8 @@ def fetch_tracks(sp, item_type, url):
         while True:
             for item in items['items']:
                 track_name = item['track']['name']
-                track_artist = " ".join([artist['name'] for artist in item['track']['artists']])
+                log.debug("Artist: {}".format(item['track']['artists']))
+                track_artist = ", ".join([artist['name'] for artist in item['track']['artists']])
                 songs_dict.update({track_name: track_artist})
                 offset += 1
 
@@ -48,48 +48,6 @@ def fetch_tracks(sp, item_type, url):
     return songs_dict
 
 
-def download_songs(songs_dict, download_directory, format_string, skip_mp3):
-    """
-    Downloads songs from the YouTube URL passed to either current directory or download_directory, is it is passed.
-    :param songs_dict: Dictionary of songs and associated artist
-    :param download_directory: Location where to save
-    :param format_string: format string for the file conversion
-    :param skip_mp3: Whether to skip conversion to MP3
-    """
-    download_directory = f"{download_directory}\\"
-    log.debug(f"Downloading to {download_directory}")
-    for number, item in enumerate(songs_dict):
-        log.debug('Songs to download: %s', item)
-        
-        url_, track_, artist_ = item
-        download_archive = download_directory + 'downloaded_songs.txt'
-        outtmpl = download_directory + '%(title)s.%(ext)s'
-        ydl_opts = {
-            'format': format_string,
-            'download_archive': download_archive,
-            'outtmpl': outtmpl,
-            'noplaylist': True,
-            'postprocessor_args': ['-metadata', 'title=' + str(track_),
-                                   '-metadata', 'artist=' + str(artist_),
-                                   '-metadata', 'track=' + str(number + 1)]
-        }
-        if not skip_mp3:
-            mp3_postprocess_opts = {
-                'key': 'FFmpegExtractAudio',
-                'preferredcodec': 'mp3',
-                'preferredquality': '192',
-            }
-            ydl_opts['postprocessors'] = [mp3_postprocess_opts.copy()]
-
-        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-            try:
-                log.debug(ydl.download([url_]))
-            except Exception as e:
-                log.debug(e)
-                print('Failed to download: {}'.format(url_))
-                continue
-
-
 def parse_spotify_url(url):
     """
     Parse the provided Spotify playlist URL and determine if it is a playlist, track or album.
diff --git a/spotify_dl/spotify_dl.py b/spotify_dl/spotify_dl.py
@@ -7,8 +7,8 @@
 import sys
 
 from spotify_dl.scaffold import log, check_for_tokens
-from spotify_dl.spotify import fetch_tracks, download_songs, parse_spotify_url, validate_spotify_url, get_item_name
-from spotify_dl.youtube import fetch_youtube_url, get_youtube_dev_key
+from spotify_dl.spotify import fetch_tracks, parse_spotify_url, validate_spotify_url, get_item_name
+from spotify_dl.youtube import download_songs
 from spotify_dl.constants import VERSION
 from spotify_dl.models import db, Song
 from spotipy.oauth2 import SpotifyClientCredentials
@@ -74,19 +74,13 @@ def spotify_dl():
     if args.output:
         item_type, item_id = parse_spotify_url(args.url)
         directory_name = get_item_name(sp, item_type, item_id)
-        path = Path(PurePath.joinpath(Path(args.output), Path(directory_name)))
-        path.mkdir(parents=True, exist_ok=True)
+        save_path = Path(PurePath.joinpath(Path(args.output), Path(directory_name)))
+        save_path.mkdir(parents=True, exist_ok=True)
         log.info("Saving songs to: {}".format(directory_name))
 
     songs = fetch_tracks(sp, item_type, args.url)
-    url = []
-    for song, artist in songs.items():
-        link = fetch_youtube_url(song + ' - ' + artist, get_youtube_dev_key())
-        if link:
-            url.append((link, song, artist))
-
     if args.download is True:
-        download_songs(url, str(path), args.format_str, args.skip_mp3)
+        download_songs(songs, str(save_path), args.format_str, args.skip_mp3)
 
 
 if __name__ == '__main__':
diff --git a/spotify_dl/youtube.py b/spotify_dl/youtube.py
@@ -1,87 +1,42 @@
-from os import getenv
-
-from googleapiclient.discovery import build
-from googleapiclient.http import HttpError
-from sentry_sdk import capture_exception
-
-
-from spotify_dl.constants import YOUTUBE_API_SERVICE_NAME
-from spotify_dl.constants import YOUTUBE_API_VERSION
-from spotify_dl.constants import VIDEO
-from spotify_dl.constants import YOUTUBE_VIDEO_URL
 from spotify_dl.scaffold import log
-from spotify_dl.cache import check_if_in_cache, save_to_cache
-from json import loads
-import requests
-from lxml import html # skipcq: BAN-B410
-import re
-
-
-from click import secho
-
-# skipcq: PYL-R1710
-def fetch_youtube_url(search_term, dev_key=None):
-    """
-    For each song name/artist name combo, fetch the YouTube URL and return the list of URLs.
-    :param search_term: Search term to be looked up on YouTube
-    :param dev_key: Youtube API key
-    """
-    in_cache, video_id = check_if_in_cache(search_term)
-    if in_cache:
-        return YOUTUBE_VIDEO_URL + video_id
-    if not dev_key:
-        YOUTUBE_SEARCH_BASE = "https://www.youtube.com/results?search_query="
-        try:
-            response = requests.get(YOUTUBE_SEARCH_BASE + search_term).content
-            html_response = html.fromstring(response)
-            video = html_response.xpath("//a[contains(@class, 'yt-uix-tile-link')]/@href")
-            video_id = re.search("((\?v=)[a-zA-Z0-9_-]{4,15})", video[0]).group(0)[3:]
-            log.debug(f"Found video id {video_id} for search term {search_term}")
-            _ = save_to_cache(search_term=search_term, video_id=video_id)
-            return YOUTUBE_VIDEO_URL + video_id
-        except AttributeError as e:
-            log.warning(f"Could not find scrape details for {search_term}")
-            capture_exception(e)
-            return None
-        except IndexError as e:
-            log.warning(f"Could not perform scrape search for {search_term}, got a different HTML")
-            capture_exception(e)
-            return None
-    else:
-        youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
-                        developerKey=dev_key,
-                        cache_discovery=False)
-        try:
-            in_cache, video_id = check_if_in_cache(search_term)
+import youtube_dl
 
-            if not in_cache:
-                search_response = youtube.search().list(q=search_term,
-                                                        part='id, snippet').execute()
-            for v in search_response['items']:
-                if v['id']['kind'] == VIDEO:
-                    video_id = v['id']['videoId']
-                    log.debug(f"Adding Video id {video_id}")
-                    _ = save_to_cache(search_term=search_term, video_id=video_id)     
-            return YOUTUBE_VIDEO_URL + video_id
-        except HttpError as err:
-            err_details = loads(err.content.decode('utf-8')).get('error').get('errors')
-            secho("Couldn't complete search due to following errors: ", fg='red')
-            for e in err_details:
-                error_reason = e.get('reason')
-                error_domain = e.get('domain')
-                error_message = e.get('message')
 
-                if error_reason == 'quotaExceeded' or error_reason == 'dailyLimitExceeded':
-                    secho(f"\tYou're over daily allowed quota. Unfortunately, YouTube restricts API keys to a max of 10,000 requests per day which translates to a maximum of 100 searches.", fg='red')
-                    secho(f"\tThe quota will be reset at midnight Pacific Time (PT)." ,fg='red')
-                    secho(f"\tYou can request for Quota increase from https://console.developers.google.com/apis/api/youtube.googleapis.com/quotas.", fg='red')
-                else:
-                    secho(f"\t Search failed due to {error_domain}:{error_reason}, message: {error_message}")
-            return None
-    
-def get_youtube_dev_key():
+def download_songs(songs, download_directory, format_string, skip_mp3):
     """
-    Fetches the Youtube Developer API key from the environment variable.
-    :return string containing the developer API key
+    Downloads songs from the YouTube URL passed to either current directory or download_directory, is it is passed.
+    :param songs: Dictionary of songs and associated artist
+    :param download_directory: Location where to save
+    :param format_string: format string for the file conversion
+    :param skip_mp3: Whether to skip conversion to MP3
     """
-    return getenv('YOUTUBE_DEV_KEY')
+    download_directory = f"{download_directory}\\"
+    log.debug(f"Downloading to {download_directory}")
+    for song, artist in songs.items():
+        query = f"{artist} - {song}".replace(":", "").replace("\"", "")
+        download_archive = download_directory + 'downloaded_songs.txt'
+        outtmpl = download_directory + '%(title)s.%(ext)s'
+        ydl_opts = {
+            'format': format_string,
+            'download_archive': download_archive,
+            'outtmpl': outtmpl,
+            'default_search': 'ytsearch',
+            'noplaylist': True,
+            'postprocessor_args': ['-metadata', 'title=' + song,
+                                   '-metadata', 'artist=' + artist]
+        }
+        if not skip_mp3:
+            mp3_postprocess_opts = {
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': 'mp3',
+                'preferredquality': '192',
+            }
+            ydl_opts['postprocessors'] = [mp3_postprocess_opts.copy()]
+
+        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+            try:
+                log.debug(ydl.download([query]))
+            except Exception as e:
+                log.debug(e)
+                print('Failed to download: {}, please ensure YouTubeDL is up-to-date. '.format(query))
+                continue
diff --git a/tests/test_youtube_url.py b/tests/test_youtube_url.py