From f9401b608b5c0b35d1c527c8226cb327af25c632 Mon Sep 17 00:00:00 2001 From: JMaximusIX Date: Thu, 6 Jun 2024 15:10:37 +0200 Subject: [PATCH 1/8] prefer synced lyrics, options to allow only plain/synced --- README.md | 10 ++- syncedlyrics/__init__.py | 87 +++++++++++++----------- syncedlyrics/cli.py | 12 +++- syncedlyrics/providers/base.py | 12 +++- syncedlyrics/providers/deezer.py | 20 +++--- syncedlyrics/providers/genius.py | 10 +-- syncedlyrics/providers/lrclib.py | 11 ++-- syncedlyrics/providers/lyricsify.py | 13 ++-- syncedlyrics/providers/megalobiz.py | 9 ++- syncedlyrics/providers/musixmatch.py | 41 +++++++----- syncedlyrics/providers/netease.py | 15 +++-- syncedlyrics/providers/spotify.py | 6 +- syncedlyrics/utils.py | 98 +++++++++++++++++++++------- 13 files changed, 225 insertions(+), 119 deletions(-) diff --git a/README.md b/README.md index 5303ed3..cade50a 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,11 @@ pip install syncedlyrics syncedlyrics "SEARCH_TERM" ``` + +By default, this will prefer time synced lyrics, but use plaintext lyrics, if no synced lyrics are available. +To only allow one type of lyrics specify `--plaintext-only` or `--synced-only` respectively + + #### Available Options | Flag | Description | | --- | --- | @@ -20,8 +25,9 @@ syncedlyrics "SEARCH_TERM" | `-p` | Space-separated list of [providers](#providers) to include in searching | | `-l` | Language code of the translation ([ISO 639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format) | | `-v` | Use this flag to show the logs | -| `--allow-plain` | Return a plain text (not synced) lyrics if no LRC format was found | -| `--enhanced` | Return an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format +| `--allow-plain`, `--plaintext-only` | Return plain text (not synced) lyrics | +| `--synced-only` | Only look for synced lyrics +| `--enhanced` | Searches for an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format. If it isn't available, search for regular synced lyrics. ### Python ```py diff --git a/syncedlyrics/__init__.py b/syncedlyrics/__init__.py index 7c0e12a..c916afb 100644 --- a/syncedlyrics/__init__.py +++ b/syncedlyrics/__init__.py @@ -11,7 +11,8 @@ from typing import List, Optional from .providers import Deezer, Lrclib, Musixmatch, NetEase, Megalobiz, Genius -from .utils import is_lrc_valid, save_lrc_file +from .utils import Lyrics, TargetType +from .providers.base import LRCProvider logger = logging.getLogger(__name__) @@ -19,8 +20,9 @@ def search( search_term: str, allow_plain_format: bool = False, + synced_only: bool = False, save_path: Optional[str] = None, - providers: Optional[List[str]] = None, + providers: List[str] = [], lang: Optional[str] = None, enhanced: bool = False, ) -> Optional[str]: @@ -28,12 +30,23 @@ def search( Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found. ### Arguments - `search_term`: The search term to find the track - - `allow_plain_format`: Return a plain text (not synced) lyrics if not LRC was found + - `allow_plain`: Return plain text (not synced) lyrics + - `synced_only`: Only look for synced lyrics - `save_path`: Path to save `.lrc` lyrics. No saving if `None` - `providers`: A list of provider names to include in searching; loops over all the providers as soon as an LRC is found - `lang`: Language of the translation along with the lyrics. **Only supported by Musixmatch** - `enhanced`: Returns word by word synced lyrics if available. **Only supported by Musixmatch** """ + if allow_plain_format and synced_only: + logger.error("--allow-plain and --synced-only flags cannot be used together.") + return None + target_type = TargetType.PREFER_SYNCED + if allow_plain_format: + target_type = TargetType.PLAINTEXT + elif synced_only: + target_type = TargetType.SYNCED_ONLY + lrc = Lyrics() + _providers = [ Musixmatch(lang=lang, enhanced=enhanced), Lrclib(), @@ -42,48 +55,48 @@ def search( Megalobiz(), Genius(), ] - if providers and any(providers): - # Filtering the providers - _providers = [ - p - for p in _providers - if p.__class__.__name__.lower() in [p.lower() for p in providers] - ] - if not _providers: - logger.error( - f"Providers {providers} not found in the list of available providers." - ) - return None - lrc = None - for provider in _providers: - logger.debug(f"Looking for an LRC on {provider.__class__.__name__}") + + for provider in _select_providers(_providers, providers): + logger.debug(f"Looking for an LRC on {provider}") try: - _l = provider.get_lrc(search_term) + lrc.update(provider.get_lrc(search_term)) except Exception as e: logger.error( - f"An error occurred while searching for an LRC on {provider.__class__.__name__}" + f"An error occurred while searching for an LRC on {provider}" ) logger.error(e) + if lang: + logger.error("Aborting, since `lang` is only supported by Musixmatch") continue - if enhanced and not _l: - # Since enhanced is only supported by Musixmatch, break if no LRC is found - break - check_translation = lang is not None and isinstance(provider, Musixmatch) - if is_lrc_valid(_l, allow_plain_format, check_translation): - logger.info( - f'synced-lyrics found for "{search_term}" on {provider.__class__.__name__}' - ) - lrc = _l + if lrc.is_preferred(target_type): + logger.info(f'Lyrics found for "{search_term}" on {provider}') break + elif lrc.is_acceptable(target_type): + logger.info(f'Found plaintext lyrics on {provider}, but continuing search for synced lyrics') else: - logger.debug( - f"Skip {provider.__class__.__name__} as the synced-lyrics is not valid. (allow_plain_format={allow_plain_format})" - ) - logger.debug(f"Lyrics: {_l}") - if not lrc: - logger.info(f'No synced-lyrics found for "{search_term}" :(') + logger.debug(f"No suitable lyrics found on {provider}, continuing search...") + if not lrc.is_acceptable(target_type): + logger.info(f'No suitable lyrics found for "{search_term}" :(') return None if save_path: save_path = save_path.format(search_term=search_term) - save_lrc_file(save_path, lrc) - return lrc + lrc.save_lrc_file(save_path, target_type) + return lrc.to_str(target_type) + + +def _select_providers(providers: List[LRCProvider], string_list: List[str]) -> List[LRCProvider]: + """ + Returns a list of provider classes based on the given string list. + """ + strings_lowercase = [p.lower() for p in string_list] + selection = [p for p in providers if str(p).lower() in strings_lowercase] + if not selection: + if string_list: + # List of providers specified but not found. + # Deliberately returning nothing instead of all to avoid unexpected behaviour. + logger.error(f"Providers {string_list} not found in the list of available providers.") + return [] + else: + # No providers specified, using all + return providers + return selection diff --git a/syncedlyrics/cli.py b/syncedlyrics/cli.py index b91b0e5..633bed1 100644 --- a/syncedlyrics/cli.py +++ b/syncedlyrics/cli.py @@ -30,8 +30,14 @@ def cli_handler(): "-v", "--verbose", help="Use this flag to show the logs", action="store_true" ) parser.add_argument( - "--allow-plain", - help="Return a plain text (not synced) lyrics if not LRC was found", + # Keeping --allow-plain for backwards compatibility, although --plaintext-only is more descriptive + "--allow-plain", "--plaintext-only", + help="Return plain text (not synced) lyrics", + action="store_true", + ) + parser.add_argument( + "--synced-only", + help="Only look for synced lyrics", action="store_true", ) parser.add_argument( @@ -42,9 +48,11 @@ def cli_handler(): args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) + lrc = search( args.search_term, args.allow_plain, + args.synced_only, args.output, args.p, lang=args.lang, diff --git a/syncedlyrics/providers/base.py b/syncedlyrics/providers/base.py index 4f13af5..17f6dd2 100644 --- a/syncedlyrics/providers/base.py +++ b/syncedlyrics/providers/base.py @@ -2,12 +2,15 @@ from typing import Optional import logging +from ..utils import Lyrics + class TimeoutSession(requests.Session): def request(self, method, url, **kwargs): - kwargs.setdefault("timeout", (2,5)) + kwargs.setdefault("timeout", (2, 5)) return super().request(method, url, **kwargs) + class LRCProvider: """ Base class for all of the synced (LRC format) lyrics providers. @@ -23,7 +26,10 @@ def __init__(self) -> None: self.logger = logging.getLogger(self.__class__.__name__) self.logger.addHandler(handler) - def get_lrc_by_id(self, track_id: str) -> Optional[str]: + def __str__(self) -> str: + return self.__class__.__name__ + + def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: """ Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found. @@ -32,7 +38,7 @@ def get_lrc_by_id(self, track_id: str) -> Optional[str]: """ raise NotImplementedError - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: """ Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found. """ diff --git a/syncedlyrics/providers/deezer.py b/syncedlyrics/providers/deezer.py index ded20fc..f5b8084 100644 --- a/syncedlyrics/providers/deezer.py +++ b/syncedlyrics/providers/deezer.py @@ -2,7 +2,7 @@ from typing import Optional from .base import LRCProvider -from ..utils import get_best_match +from ..utils import Lyrics, get_best_match # Currently broken # TODO: Fix invalid CSRF token @@ -31,22 +31,24 @@ def _api_call(self, method: str, json=None) -> dict: response = self.session.post(self.API_ENDPOINT, params=params, json=json) return response.json() - def get_lrc_by_id(self, track_id: str) -> Optional[str]: + def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: + lrc = Lyrics() lrc_response = self._api_call("song.getLyrics", json={"sng_id": track_id}) lrc_json_objs = lrc_response["results"].get("LYRICS_SYNC_JSON") if not lrc_json_objs: - # Returning the plain text lyrics - return lrc_response["results"].get("LYRICS_TEXT") - lrc = "" + lrc.unsynced = lrc_response["results"].get("LYRICS_TEXT") + return lrc + lrc_str = "" for chunk in lrc_json_objs: if chunk.get("lrc_timestamp") and chunk.get("line"): - lrc += f"{chunk['lrc_timestamp']} {chunk['line']}\n" - return lrc or None + lrc_str += f"{chunk['lrc_timestamp']} {chunk['line']}\n" + lrc.synced = lrc_str + return lrc - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+") search_results = self.session.get(url).json() - cmp_key = lambda t: f"{t.get('title')} {t.get('artist').get('name')}" + def cmp_key(t): return f"{t.get('title')} {t.get('artist').get('name')}" track = get_best_match(search_results.get("data", []), search_term, cmp_key) if not track: return None diff --git a/syncedlyrics/providers/genius.py b/syncedlyrics/providers/genius.py index 840cd47..b1f7916 100644 --- a/syncedlyrics/providers/genius.py +++ b/syncedlyrics/providers/genius.py @@ -2,7 +2,7 @@ from typing import Optional from .base import LRCProvider -from ..utils import generate_bs4_soup +from ..utils import Lyrics, generate_bs4_soup class Genius(LRCProvider): @@ -10,7 +10,7 @@ class Genius(LRCProvider): SEARCH_ENDPOINT = "https://genius.com/api/search/multi?per_page=5&q=" - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: params = {"q": search_term, "per_page": 5} cookies = { "obuid": "e3ee67e0-7df9-4181-8324-d977c6dc9250", @@ -27,7 +27,9 @@ def get_lrc(self, search_term: str) -> Optional[str]: els = soup.find_all("div", attrs={"data-lyrics-container": True}) if not els: return None - lrc = "" + lrc_str = "" for el in els: - lrc += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[") + lrc_str += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[") + lrc = Lyrics() + lrc.unsynced = lrc_str return lrc diff --git a/syncedlyrics/providers/lrclib.py b/syncedlyrics/providers/lrclib.py index aaac134..185dbbb 100644 --- a/syncedlyrics/providers/lrclib.py +++ b/syncedlyrics/providers/lrclib.py @@ -2,7 +2,7 @@ from typing import Optional from .base import LRCProvider -from ..utils import sort_results +from ..utils import Lyrics, sort_results class Lrclib(LRCProvider): @@ -16,15 +16,18 @@ class Lrclib(LRCProvider): def __init__(self) -> None: super().__init__() - def get_lrc_by_id(self, track_id: str) -> Optional[str]: + def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: url = self.LRC_ENDPOINT + track_id r = self.session.get(url) if not r.ok: return None track = r.json() - return track.get("syncedLyrics", track.get("plainLyrics")) + lrc = Lyrics() + lrc.synced = track.get("syncedLyrics") + lrc.unsynced = track.get("plainLyrics") + return lrc - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: url = self.SEARCH_ENDPOINT r = self.session.get(url, params={"q": search_term}) if not r.ok: diff --git a/syncedlyrics/providers/lyricsify.py b/syncedlyrics/providers/lyricsify.py index 066d9ba..8691199 100644 --- a/syncedlyrics/providers/lyricsify.py +++ b/syncedlyrics/providers/lyricsify.py @@ -3,7 +3,7 @@ from typing import Optional from bs4 import SoupStrainer from .base import LRCProvider -from ..utils import generate_bs4_soup, get_best_match +from ..utils import Lyrics, generate_bs4_soup, get_best_match # Currently broken # TODO: Bypassing Cloudflare anti-bot system @@ -19,16 +19,19 @@ def __init__(self) -> None: super().__init__() self.parser = "html.parser" - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+") - href_match = lambda h: h.startswith("/lyric/") + def href_match(h): return h.startswith("/lyric/") a_tags_boud = SoupStrainer("a", href=href_match) soup = generate_bs4_soup(self.session, url, parse_only=a_tags_boud) - cmp_key = lambda t: t.get_text().lower().replace("-", "") + def cmp_key(t): return t.get_text().lower().replace("-", "") a_tag = get_best_match(soup.find_all("a"), search_term, cmp_key) if not a_tag: return None # Scraping from the LRC page lrc_id = a_tag["href"].split(".")[-1] soup = generate_bs4_soup(self.session, self.ROOT_URL + a_tag["href"]) - return soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text() + lrc_str = soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text() + lrc = Lyrics() + lrc.add_unknown(lrc_str) + return lrc diff --git a/syncedlyrics/providers/megalobiz.py b/syncedlyrics/providers/megalobiz.py index 4863493..1a26369 100644 --- a/syncedlyrics/providers/megalobiz.py +++ b/syncedlyrics/providers/megalobiz.py @@ -3,7 +3,7 @@ from typing import Optional from bs4 import SoupStrainer from .base import LRCProvider -from ..utils import generate_bs4_soup, get_best_match +from ..utils import Lyrics, generate_bs4_soup, get_best_match class Megalobiz(LRCProvider): @@ -12,7 +12,7 @@ class Megalobiz(LRCProvider): ROOT_URL = "https://www.megalobiz.com" SEARCH_ENDPOINT = ROOT_URL + "/search/all?qry={q}&searchButton.x=0&searchButton.y=0" - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: url = self.SEARCH_ENDPOINT.format(q=search_term.replace(" ", "+")) def href_match(h: Optional[str]): @@ -35,4 +35,7 @@ def a_text(a): # Scraping from the LRC page lrc_id = a_tag["href"].split(".")[-1] soup = generate_bs4_soup(self.session, self.ROOT_URL + a_tag["href"]) - return soup.find("div", {"id": f"lrc_{lrc_id}_details"}).get_text() + lrc_str = soup.find("div", {"id": f"lrc_{lrc_id}_details"}).get_text() + lrc = Lyrics() + lrc.add_unknown(lrc_str) + return lrc diff --git a/syncedlyrics/providers/musixmatch.py b/syncedlyrics/providers/musixmatch.py index b04fb85..024d4aa 100644 --- a/syncedlyrics/providers/musixmatch.py +++ b/syncedlyrics/providers/musixmatch.py @@ -5,10 +5,11 @@ import json import os from .base import LRCProvider -from ..utils import get_best_match, format_time +from ..utils import Lyrics, get_best_match, format_time # Inspired from https://github.com/Marekkon5/onetagger/blob/0654131188c4df2b4b171ded7cdb927a4369746e/crates/onetagger-platforms/src/musixmatch.rs # Huge part converted from Rust to Py by ChatGPT :) +# Whyyyy did you convert it from a good language to a bad one? :P class Musixmatch(LRCProvider): @@ -60,11 +61,12 @@ def _get_token(self): with open(token_path, "w") as token_file: json.dump(token_data, token_file) - def get_lrc_by_id(self, track_id: str) -> Optional[str]: + def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: r = self._get( "track.subtitle.get", [("track_id", track_id), ("subtitle_format", "lrc")], ) + print(self.lang) if self.lang is not None: r_tr = self._get( "crowd.track.translations.get", @@ -76,36 +78,42 @@ def get_lrc_by_id(self, track_id: str) -> Optional[str]: ], ) body_tr = r_tr.json()["message"]["body"] + if not body_tr["translations_list"]: + raise Exception("Couldn't find translations") if not r.ok: return None body = r.json()["message"]["body"] if not body: return None - lrc = body["subtitle"]["subtitle_body"] - if self.lang is not None and body_tr: + lrc_str = body["subtitle"]["subtitle_body"] + if self.lang is not None: for i in body_tr["translations_list"]: org, tr = ( i["translation"]["subtitle_matched_line"], i["translation"]["description"], ) - lrc = lrc.replace(org, org + "\n" + f"({tr})") + lrc_str = lrc_str.replace(org, org + "\n" + f"({tr})") + lrc = Lyrics() + lrc.synced = lrc_str return lrc - def get_lrc_word_by_word(self, track_id: str) -> Optional[str]: + def get_lrc_word_by_word(self, track_id: str) -> Optional[Lyrics]: + lrc = Lyrics() r = self._get("track.richsync.get", [("track_id", track_id)]) if r.ok and r.json()["message"]["header"]["status_code"] == 200: lrc_raw = r.json()["message"]["body"]["richsync"]["richsync_body"] lrc_raw = json.loads(lrc_raw) - lrc = "" + lrc_str = "" for i in lrc_raw: - lrc += f"[{format_time(i['ts'])}] " + lrc_str += f"[{format_time(i['ts'])}] " for l in i["l"]: t = format_time(float(i["ts"]) + float(l["o"])) - lrc += f"<{t}> {l['c']} " - lrc += "\n" - return lrc + lrc_str += f"<{t}> {l['c']} " + lrc_str += "\n" + lrc.synced = lrc_str + return lrc - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: r = self._get( "track.search", [ @@ -120,12 +128,13 @@ def get_lrc(self, search_term: str) -> Optional[str]: return None body = r.json()["message"]["body"] tracks = body["track_list"] - cmp_key = lambda t: f"{t['track']['track_name']} {t['track']['artist_name']}" + def cmp_key(t): return f"{t['track']['track_name']} {t['track']['artist_name']}" track = get_best_match(tracks, search_term, cmp_key) if not track: return None track_id = track["track"]["track_id"] if self.enhanced: - return self.get_lrc_word_by_word(track_id) or self.get_lrc_by_id(track_id) - else: - return self.get_lrc_by_id(track_id) + lrc = self.get_lrc_word_by_word(track_id) + if lrc and lrc.synced: + return lrc + return self.get_lrc_by_id(track_id) diff --git a/syncedlyrics/providers/netease.py b/syncedlyrics/providers/netease.py index 3dd3a2a..ccb3387 100644 --- a/syncedlyrics/providers/netease.py +++ b/syncedlyrics/providers/netease.py @@ -2,7 +2,7 @@ from typing import Optional from .base import LRCProvider -from ..utils import get_best_match +from ..utils import Lyrics, get_best_match class NetEase(LRCProvider): @@ -24,22 +24,23 @@ def search_track(self, search_term: str) -> Optional[dict]: results = response.json().get("result", {}).get("songs") if not results: return None - cmp_key = lambda t: f"{t.get('name')} {t.get('artists')[0].get('name')}" + + def cmp_key(t): return f"{t.get('name')} { + t.get('artists')[0].get('name')}" track = get_best_match(results, search_term, cmp_key) # Update the session cookies from the new sent cookies for the next request. self.session.cookies.update(response.cookies) self.session.headers.update({"referer": response.url}) return track - def get_lrc_by_id(self, track_id: str) -> Optional[str]: + def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: params = {"id": track_id, "lv": 1} response = self.session.get(self.API_ENDPOINT_LYRICS, params=params) - lrc = response.json().get("lrc", {}).get("lyric") - if not lrc: - return None + lrc = Lyrics() + lrc.add_unknown(response.json().get("lrc", {}).get("lyric")) return lrc - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: track = self.search_track(search_term) if not track: return None diff --git a/syncedlyrics/providers/spotify.py b/syncedlyrics/providers/spotify.py index fd39a1c..aa28935 100644 --- a/syncedlyrics/providers/spotify.py +++ b/syncedlyrics/providers/spotify.py @@ -2,6 +2,7 @@ from typing import Optional from .base import LRCProvider +from ..utils import Lyrics class Spotify(LRCProvider): @@ -16,10 +17,11 @@ def get_track_id(cls, search_term: str) -> Optional[str]: # TODO: self.client.search(search_term) and processing the results raise NotImplementedError - def get_lrc_by_id(self, track_id: str) -> Optional[str]: + def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: # TODO: raise NotImplementedError - def get_lrc(self, search_term: str) -> Optional[str]: + def get_lrc(self, search_term: str) -> Optional[Lyrics]: # TODO: Use https://github.com/akashrchandran/spotify-lyrics-api + # Note: as of recently, only premium users can get lyrics from spotify, so this would require an account access token. raise NotImplementedError diff --git a/syncedlyrics/utils.py b/syncedlyrics/utils.py index 2116b02..ba90e54 100644 --- a/syncedlyrics/utils.py +++ b/syncedlyrics/utils.py @@ -1,39 +1,86 @@ """Utility functions for `syncedlyrics` package""" +from dataclasses import dataclass from bs4 import BeautifulSoup, FeatureNotFound import rapidfuzz from typing import Union, Callable, Optional import datetime +from enum import Enum, auto import re R_FEAT = re.compile(r"\((feat.+)\)", re.IGNORECASE) -def is_lrc_valid( - lrc: str, allow_plain_format: bool = False, check_translation: bool = False -) -> bool: - """Checks whether a given LRC string is valid or not.""" +class TargetType(Enum): + PLAINTEXT = auto(), + PREFER_SYNCED = auto(), + SYNCED_ONLY = auto(), + + +@dataclass +class Lyrics: + synced: Optional[str] = None + unsynced: Optional[str] = None + + def add_unknown(self, unknown: str): + type = identify_lyrics_type(unknown) + if type == "synced": + self.synced = unknown + elif type == "plaintext": + self.unsynced = unknown + + def update(self, other: Optional['Lyrics']): + if not other: + return + if other.synced: + self.synced = other.synced + if other.unsynced: + self.unsynced = other.unsynced + + def is_preferred(self, target_type: TargetType) -> bool: + return bool(self.synced or (target_type == TargetType.PLAINTEXT and self.unsynced)) + + def is_acceptable(self, target_type: TargetType) -> bool: + return bool(self.synced or (target_type != TargetType.SYNCED_ONLY and self.unsynced)) + + def to_str(self, target_type: TargetType) -> str: + if target_type == TargetType.PLAINTEXT: + return self.unsynced or synced_to_plaintext(self.synced) + elif target_type == TargetType.PREFER_SYNCED: + return self.synced or self.unsynced + return self.synced + + def save_lrc_file(self, path: str, target_type: TargetType): + """Saves the `.lrc` file""" + with open(path, "w", encoding="utf-8") as f: + f.write(self.to_str(target_type)) + + +def synced_to_plaintext(synced_lyrics: str) -> str: + return re.sub(r'\[\d+:\d+\.\d+\] ', '', synced_lyrics) + + +def identify_lyrics_type(lrc: str) -> str: + """Identifies the type of the LRC string""" if not lrc: - return False + return "invalid" lines = lrc.split("\n")[5:10] - if not allow_plain_format: - if not check_translation: - conds = ["[" in l for l in lrc.split("\n")[5:10]] - return all(conds) - else: - for i, line in enumerate(lines): - if "[" in line: - if i + 1 < len(lines): - next_line = lines[i + 1] - if "(" not in next_line: - return False - return True - - -def save_lrc_file(path: str, lrc_text: str): - """Saves the `.lrc` file""" - with open(path, "w", encoding="utf-8") as f: - f.write(lrc_text) + if all("[" in l for l in lines): + return "synced" + return "plaintext" + +# Currently not used, since I decided to instead throw an exception if the response contains no translation + +# def has_translation(lrc: str) -> bool: +# """Checks whether the LRC string has a translation or not""" +# lines = lrc.split("\n")[5:10] +# for i, line in enumerate(lines): +# if "[" in line: +# if i + 1 < len(lines): +# next_line = lines[i + 1] +# if "(" not in next_line: +# return False +# return True def generate_bs4_soup(session, url: str, **kwargs): @@ -86,8 +133,9 @@ def sort_results( function that takes a track and returns a string. """ if isinstance(compare_key, str): - compare_key = lambda t: t[compare_key] - sort_key = lambda t: str_score(compare_key(t), search_term) + def compare_key(t): return t[compare_key] + + def sort_key(t): return str_score(compare_key(t), search_term) return sorted(results, key=sort_key, reverse=True) From 64d3f9de3d17bec69cbc3b3b5acd1ab847fde4b2 Mon Sep 17 00:00:00 2001 From: JMaximusIX Date: Thu, 6 Jun 2024 15:37:50 +0200 Subject: [PATCH 2/8] fix test that validates translation --- syncedlyrics/utils.py | 23 +++++++++++------------ tests.py | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/syncedlyrics/utils.py b/syncedlyrics/utils.py index ba90e54..592e338 100644 --- a/syncedlyrics/utils.py +++ b/syncedlyrics/utils.py @@ -69,18 +69,17 @@ def identify_lyrics_type(lrc: str) -> str: return "synced" return "plaintext" -# Currently not used, since I decided to instead throw an exception if the response contains no translation - -# def has_translation(lrc: str) -> bool: -# """Checks whether the LRC string has a translation or not""" -# lines = lrc.split("\n")[5:10] -# for i, line in enumerate(lines): -# if "[" in line: -# if i + 1 < len(lines): -# next_line = lines[i + 1] -# if "(" not in next_line: -# return False -# return True + +def has_translation(lrc: str) -> bool: + """Checks whether the LRC string has a translation or not""" + lines = lrc.split("\n")[5:10] + for i, line in enumerate(lines): + if "[" in line: + if i + 1 < len(lines): + next_line = lines[i + 1] + if "(" not in next_line: + return False + return True def generate_bs4_soup(session, url: str, **kwargs): diff --git a/tests.py b/tests.py index f29a21b..5957741 100644 --- a/tests.py +++ b/tests.py @@ -29,7 +29,7 @@ def test_musixmatch(): def test_musixmatch_translation(): lrc = _test_provider("Musixmatch", lang="es") # not only testing there is a result, but the translation is also included - assert syncedlyrics.is_lrc_valid(lrc, check_translation=True) + assert syncedlyrics.utils.has_translation(lrc) def test_musixmatch_enhanced(): From b443a0c01583029c1ce0ca2d25e8d88687447317 Mon Sep 17 00:00:00 2001 From: JMaximusIX Date: Thu, 6 Jun 2024 15:40:21 +0200 Subject: [PATCH 3/8] fix netease test --- syncedlyrics/providers/netease.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/syncedlyrics/providers/netease.py b/syncedlyrics/providers/netease.py index ccb3387..d0bee9e 100644 --- a/syncedlyrics/providers/netease.py +++ b/syncedlyrics/providers/netease.py @@ -25,8 +25,7 @@ def search_track(self, search_term: str) -> Optional[dict]: if not results: return None - def cmp_key(t): return f"{t.get('name')} { - t.get('artists')[0].get('name')}" + def cmp_key(t): return f"{t.get('name')} {t.get('artists')[0].get('name')}" track = get_best_match(results, search_term, cmp_key) # Update the session cookies from the new sent cookies for the next request. self.session.cookies.update(response.cookies) From c5b10911086cb517c3af9f0f9080b90b5e71458b Mon Sep 17 00:00:00 2001 From: jmaximusix Date: Tue, 11 Jun 2024 16:46:38 +0200 Subject: [PATCH 4/8] remove debug print statement --- syncedlyrics/providers/musixmatch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/syncedlyrics/providers/musixmatch.py b/syncedlyrics/providers/musixmatch.py index 024d4aa..272acfe 100644 --- a/syncedlyrics/providers/musixmatch.py +++ b/syncedlyrics/providers/musixmatch.py @@ -66,7 +66,6 @@ def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: "track.subtitle.get", [("track_id", track_id), ("subtitle_format", "lrc")], ) - print(self.lang) if self.lang is not None: r_tr = self._get( "crowd.track.translations.get", From b0bd8097f4b89aa4996f8f5841b4ea956a06366a Mon Sep 17 00:00:00 2001 From: jmaximusix Date: Tue, 11 Jun 2024 18:47:02 +0200 Subject: [PATCH 5/8] reformatted using black --- poetry.lock | 4 ++-- syncedlyrics/__init__.py | 20 +++++++++++++------- syncedlyrics/cli.py | 3 ++- syncedlyrics/providers/deezer.py | 2 +- syncedlyrics/providers/lyricsify.py | 4 ++-- syncedlyrics/providers/musixmatch.py | 2 +- syncedlyrics/providers/netease.py | 3 +-- syncedlyrics/utils.py | 26 +++++++++++++++++--------- 8 files changed, 39 insertions(+), 25 deletions(-) diff --git a/poetry.lock b/poetry.lock index 121b368..eec54e4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "beautifulsoup4" @@ -500,4 +500,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = ">=3.8" -content-hash = "ea55fb196b5c04f1bb61711324f94b2336f0221a3e651206cba398b02d365d23" +content-hash = "4be7f99e1e88483e1d748619e80a65e33c8e9b0089063fd941f5ba4ef9af7f00" diff --git a/syncedlyrics/__init__.py b/syncedlyrics/__init__.py index c916afb..ed6b7ac 100644 --- a/syncedlyrics/__init__.py +++ b/syncedlyrics/__init__.py @@ -61,9 +61,7 @@ def search( try: lrc.update(provider.get_lrc(search_term)) except Exception as e: - logger.error( - f"An error occurred while searching for an LRC on {provider}" - ) + logger.error(f"An error occurred while searching for an LRC on {provider}") logger.error(e) if lang: logger.error("Aborting, since `lang` is only supported by Musixmatch") @@ -72,9 +70,13 @@ def search( logger.info(f'Lyrics found for "{search_term}" on {provider}') break elif lrc.is_acceptable(target_type): - logger.info(f'Found plaintext lyrics on {provider}, but continuing search for synced lyrics') + logger.info( + f"Found plaintext lyrics on {provider}, but continuing search for synced lyrics" + ) else: - logger.debug(f"No suitable lyrics found on {provider}, continuing search...") + logger.debug( + f"No suitable lyrics found on {provider}, continuing search..." + ) if not lrc.is_acceptable(target_type): logger.info(f'No suitable lyrics found for "{search_term}" :(') return None @@ -84,7 +86,9 @@ def search( return lrc.to_str(target_type) -def _select_providers(providers: List[LRCProvider], string_list: List[str]) -> List[LRCProvider]: +def _select_providers( + providers: List[LRCProvider], string_list: List[str] +) -> List[LRCProvider]: """ Returns a list of provider classes based on the given string list. """ @@ -94,7 +98,9 @@ def _select_providers(providers: List[LRCProvider], string_list: List[str]) -> L if string_list: # List of providers specified but not found. # Deliberately returning nothing instead of all to avoid unexpected behaviour. - logger.error(f"Providers {string_list} not found in the list of available providers.") + logger.error( + f"Providers {string_list} not found in the list of available providers." + ) return [] else: # No providers specified, using all diff --git a/syncedlyrics/cli.py b/syncedlyrics/cli.py index 633bed1..928f1b4 100644 --- a/syncedlyrics/cli.py +++ b/syncedlyrics/cli.py @@ -31,7 +31,8 @@ def cli_handler(): ) parser.add_argument( # Keeping --allow-plain for backwards compatibility, although --plaintext-only is more descriptive - "--allow-plain", "--plaintext-only", + "--allow-plain", + "--plaintext-only", help="Return plain text (not synced) lyrics", action="store_true", ) diff --git a/syncedlyrics/providers/deezer.py b/syncedlyrics/providers/deezer.py index f5b8084..f17391a 100644 --- a/syncedlyrics/providers/deezer.py +++ b/syncedlyrics/providers/deezer.py @@ -48,7 +48,7 @@ def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]: def get_lrc(self, search_term: str) -> Optional[Lyrics]: url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+") search_results = self.session.get(url).json() - def cmp_key(t): return f"{t.get('title')} {t.get('artist').get('name')}" + cmp_key = lambda t: f"{t.get('title')} {t.get('artist').get('name')}" track = get_best_match(search_results.get("data", []), search_term, cmp_key) if not track: return None diff --git a/syncedlyrics/providers/lyricsify.py b/syncedlyrics/providers/lyricsify.py index 8691199..e99f2ab 100644 --- a/syncedlyrics/providers/lyricsify.py +++ b/syncedlyrics/providers/lyricsify.py @@ -21,10 +21,10 @@ def __init__(self) -> None: def get_lrc(self, search_term: str) -> Optional[Lyrics]: url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+") - def href_match(h): return h.startswith("/lyric/") + href_match = lambda h: h.startswith("/lyric/") a_tags_boud = SoupStrainer("a", href=href_match) soup = generate_bs4_soup(self.session, url, parse_only=a_tags_boud) - def cmp_key(t): return t.get_text().lower().replace("-", "") + cmp_key = lambda t: t.get_text().lower().replace("-", "") a_tag = get_best_match(soup.find_all("a"), search_term, cmp_key) if not a_tag: return None diff --git a/syncedlyrics/providers/musixmatch.py b/syncedlyrics/providers/musixmatch.py index 272acfe..845e5e2 100644 --- a/syncedlyrics/providers/musixmatch.py +++ b/syncedlyrics/providers/musixmatch.py @@ -127,7 +127,7 @@ def get_lrc(self, search_term: str) -> Optional[Lyrics]: return None body = r.json()["message"]["body"] tracks = body["track_list"] - def cmp_key(t): return f"{t['track']['track_name']} {t['track']['artist_name']}" + cmp_key = lambda t: f"{t['track']['track_name']} {t['track']['artist_name']}" track = get_best_match(tracks, search_term, cmp_key) if not track: return None diff --git a/syncedlyrics/providers/netease.py b/syncedlyrics/providers/netease.py index d0bee9e..38c0c48 100644 --- a/syncedlyrics/providers/netease.py +++ b/syncedlyrics/providers/netease.py @@ -24,8 +24,7 @@ def search_track(self, search_term: str) -> Optional[dict]: results = response.json().get("result", {}).get("songs") if not results: return None - - def cmp_key(t): return f"{t.get('name')} {t.get('artists')[0].get('name')}" + cmp_key = lambda t: f"{t.get('name')} {t.get('artists')[0].get('name')}" track = get_best_match(results, search_term, cmp_key) # Update the session cookies from the new sent cookies for the next request. self.session.cookies.update(response.cookies) diff --git a/syncedlyrics/utils.py b/syncedlyrics/utils.py index 592e338..aac52c8 100644 --- a/syncedlyrics/utils.py +++ b/syncedlyrics/utils.py @@ -12,9 +12,9 @@ class TargetType(Enum): - PLAINTEXT = auto(), - PREFER_SYNCED = auto(), - SYNCED_ONLY = auto(), + PLAINTEXT = auto() + PREFER_SYNCED = auto() + SYNCED_ONLY = auto() @dataclass @@ -29,7 +29,7 @@ def add_unknown(self, unknown: str): elif type == "plaintext": self.unsynced = unknown - def update(self, other: Optional['Lyrics']): + def update(self, other: Optional["Lyrics"]): if not other: return if other.synced: @@ -38,10 +38,14 @@ def update(self, other: Optional['Lyrics']): self.unsynced = other.unsynced def is_preferred(self, target_type: TargetType) -> bool: - return bool(self.synced or (target_type == TargetType.PLAINTEXT and self.unsynced)) + return bool( + self.synced or (target_type == TargetType.PLAINTEXT and self.unsynced) + ) def is_acceptable(self, target_type: TargetType) -> bool: - return bool(self.synced or (target_type != TargetType.SYNCED_ONLY and self.unsynced)) + return bool( + self.synced or (target_type != TargetType.SYNCED_ONLY and self.unsynced) + ) def to_str(self, target_type: TargetType) -> str: if target_type == TargetType.PLAINTEXT: @@ -57,7 +61,7 @@ def save_lrc_file(self, path: str, target_type: TargetType): def synced_to_plaintext(synced_lyrics: str) -> str: - return re.sub(r'\[\d+:\d+\.\d+\] ', '', synced_lyrics) + return re.sub(r"\[\d+:\d+\.\d+\] ", "", synced_lyrics) def identify_lyrics_type(lrc: str) -> str: @@ -132,9 +136,13 @@ def sort_results( function that takes a track and returns a string. """ if isinstance(compare_key, str): - def compare_key(t): return t[compare_key] - def sort_key(t): return str_score(compare_key(t), search_term) + def compare_key(t): + return t[compare_key] + + def sort_key(t): + return str_score(compare_key(t), search_term) + return sorted(results, key=sort_key, reverse=True) From 963dfed3a6716571e5648504269c4263695a1bd2 Mon Sep 17 00:00:00 2001 From: jmaximusix Date: Tue, 11 Jun 2024 18:57:36 +0200 Subject: [PATCH 6/8] increased read timeout to 10 seconds since megalobiz sometimes is really slow --- syncedlyrics/providers/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/syncedlyrics/providers/base.py b/syncedlyrics/providers/base.py index 17f6dd2..2c5aeb2 100644 --- a/syncedlyrics/providers/base.py +++ b/syncedlyrics/providers/base.py @@ -7,7 +7,7 @@ class TimeoutSession(requests.Session): def request(self, method, url, **kwargs): - kwargs.setdefault("timeout", (2, 5)) + kwargs.setdefault("timeout", (2, 10)) return super().request(method, url, **kwargs) From 8a64cf7664adcf7b0303c549ced5c516690ab920 Mon Sep 17 00:00:00 2001 From: jmaximusix Date: Tue, 11 Jun 2024 19:01:42 +0200 Subject: [PATCH 7/8] commented out deezer, since it apparently broke --- README.md | 2 +- syncedlyrics/__init__.py | 2 +- tests.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cade50a..434d2ca 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ syncedlyrics.search("...", enhanced=True) ## Providers - [Musixmatch](https://www.musixmatch.com/) -- [Deezer](https://deezer.com/) +- ~~[Deezer](https://deezer.com/)~~ (Currently not working anymore) - [Lrclib](https://github.com/tranxuanthang/lrcget/issues/2#issuecomment-1326925928) - [NetEase](https://music.163.com/) - [Megalobiz](https://www.megalobiz.com/) diff --git a/syncedlyrics/__init__.py b/syncedlyrics/__init__.py index ed6b7ac..d735a68 100644 --- a/syncedlyrics/__init__.py +++ b/syncedlyrics/__init__.py @@ -50,7 +50,7 @@ def search( _providers = [ Musixmatch(lang=lang, enhanced=enhanced), Lrclib(), - Deezer(), + # Deezer(), NetEase(), Megalobiz(), Genius(), diff --git a/tests.py b/tests.py index 5957741..e7750fa 100644 --- a/tests.py +++ b/tests.py @@ -39,9 +39,9 @@ def test_musixmatch_enhanced(): def test_lrclib(): _test_provider("Lrclib") - -def test_deezer(): - _test_provider("Deezer") +# Not working (at least temporarily) +# def test_deezer(): +# _test_provider("Deezer") # Fails randomly on CI From c7170e00e00adf277f1b37c06d4e154849d0c702 Mon Sep 17 00:00:00 2001 From: jmaximusix Date: Tue, 11 Jun 2024 19:04:28 +0200 Subject: [PATCH 8/8] fixed pyproject.toml (using latest version of black formatter) --- poetry.lock | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index eec54e4..c8fbe05 100644 --- a/poetry.lock +++ b/poetry.lock @@ -500,4 +500,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = ">=3.8" -content-hash = "4be7f99e1e88483e1d748619e80a65e33c8e9b0089063fd941f5ba4ef9af7f00" +content-hash = "2416d5560d4b031c699fdd04ac77f3e4e4dfec4d41b0cac7ffb74c7cbff031d5" diff --git a/pyproject.toml b/pyproject.toml index 400fc94..ff0f20c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,8 +23,8 @@ rapidfuzz = "^3.6.2" syncedlyrics = "syncedlyrics.cli:cli_handler" [tool.poetry.group.dev.dependencies] -black = "^24.2.0" pytest = "^8.0.2" +black = "^24.4.2" [build-system] requires = ["poetry-core"]