moehmeni · moehmeni · Jun 11, 2024 · Jun 6, 2024 · Jun 6, 2024 · Jun 6, 2024
diff --git a/README.md b/README.md
@@ -13,15 +13,21 @@ pip install syncedlyrics
 syncedlyrics "SEARCH_TERM"
 ```
 
+
+By default, this will prefer time synced lyrics, but use plaintext lyrics, if no synced lyrics are available.
+To only allow one type of lyrics specify `--plaintext-only` or `--synced-only` respectively
+
+
 #### Available Options
 | Flag | Description |
 | --- | --- |
 | `-o` | Path to save `.lrc` lyrics, default="{search_term}.lrc" |
 | `-p` | Space-separated list of [providers](#providers) to include in searching |
 | `-l` | Language code of the translation ([ISO 639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format) |
 | `-v` | Use this flag to show the logs |
-| `--allow-plain` | Return a plain text (not synced) lyrics if no LRC format was found |
-| `--enhanced` | Return an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format
+| `--allow-plain`, `--plaintext-only` | Return plain text (not synced) lyrics |
+| `--synced-only` | Only look for synced lyrics
+| `--enhanced` | Searches for an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format. If it isn't available, search for regular synced lyrics.
 
 ### Python
 ```py

diff --git a/syncedlyrics/__init__.py b/syncedlyrics/__init__.py
@@ -11,29 +11,42 @@
 from typing import List, Optional
 
 from .providers import Deezer, Lrclib, Musixmatch, NetEase, Megalobiz, Genius
-from .utils import is_lrc_valid, save_lrc_file
+from .utils import Lyrics, TargetType
+from .providers.base import LRCProvider
 
 logger = logging.getLogger(__name__)
 
 
 def search(
     search_term: str,
     allow_plain_format: bool = False,
+    synced_only: bool = False,
     save_path: Optional[str] = None,
-    providers: Optional[List[str]] = None,
+    providers: List[str] = [],
     lang: Optional[str] = None,
     enhanced: bool = False,
 ) -> Optional[str]:
     """
     Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
     ### Arguments
     - `search_term`: The search term to find the track
-    - `allow_plain_format`: Return a plain text (not synced) lyrics if not LRC was found
+    - `allow_plain`: Return plain text (not synced) lyrics
+    - `synced_only`: Only look for synced lyrics
     - `save_path`: Path to save `.lrc` lyrics. No saving if `None`
     - `providers`: A list of provider names to include in searching; loops over all the providers as soon as an LRC is found
     - `lang`: Language of the translation along with the lyrics. **Only supported by Musixmatch**
     - `enhanced`: Returns word by word synced lyrics if available. **Only supported by Musixmatch**
     """
+    if allow_plain_format and synced_only:
+        logger.error("--allow-plain and --synced-only flags cannot be used together.")
+        return None
+    target_type = TargetType.PREFER_SYNCED
+    if allow_plain_format:
+        target_type = TargetType.PLAINTEXT
+    elif synced_only:
+        target_type = TargetType.SYNCED_ONLY
+    lrc = Lyrics()
+
     _providers = [
         Musixmatch(lang=lang, enhanced=enhanced),
         Lrclib(),
@@ -42,48 +55,48 @@ def search(
         Megalobiz(),
         Genius(),
     ]
-    if providers and any(providers):
-        # Filtering the providers
-        _providers = [
-            p
-            for p in _providers
-            if p.__class__.__name__.lower() in [p.lower() for p in providers]
-        ]
-    if not _providers:
-        logger.error(
-            f"Providers {providers} not found in the list of available providers."
-        )
-        return None
-    lrc = None
-    for provider in _providers:
-        logger.debug(f"Looking for an LRC on {provider.__class__.__name__}")
+
+    for provider in _select_providers(_providers, providers):
+        logger.debug(f"Looking for an LRC on {provider}")
         try:
-            _l = provider.get_lrc(search_term)
+            lrc.update(provider.get_lrc(search_term))
         except Exception as e:
             logger.error(
-                f"An error occurred while searching for an LRC on {provider.__class__.__name__}"
+                f"An error occurred while searching for an LRC on {provider}"
             )
             logger.error(e)
+            if lang:
+                logger.error("Aborting, since `lang` is only supported by Musixmatch")
             continue
-        if enhanced and not _l:
-            # Since enhanced is only supported by Musixmatch, break if no LRC is found
-            break
-        check_translation = lang is not None and isinstance(provider, Musixmatch)
-        if is_lrc_valid(_l, allow_plain_format, check_translation):
-            logger.info(
-                f'synced-lyrics found for "{search_term}" on {provider.__class__.__name__}'
-            )
-            lrc = _l
+        if lrc.is_preferred(target_type):
+            logger.info(f'Lyrics found for "{search_term}" on {provider}')
             break
+        elif lrc.is_acceptable(target_type):
+            logger.info(f'Found plaintext lyrics on {provider}, but continuing search for synced lyrics')
         else:
-            logger.debug(
-                f"Skip {provider.__class__.__name__} as the synced-lyrics is not valid. (allow_plain_format={allow_plain_format})"
-            )
-            logger.debug(f"Lyrics: {_l}")
-    if not lrc:
-        logger.info(f'No synced-lyrics found for "{search_term}" :(')
+            logger.debug(f"No suitable lyrics found on {provider}, continuing search...")
+    if not lrc.is_acceptable(target_type):
+        logger.info(f'No suitable lyrics found for "{search_term}" :(')
         return None
     if save_path:
         save_path = save_path.format(search_term=search_term)
-        save_lrc_file(save_path, lrc)
-    return lrc
+        lrc.save_lrc_file(save_path, target_type)
+    return lrc.to_str(target_type)
+
+
+def _select_providers(providers: List[LRCProvider], string_list: List[str]) -> List[LRCProvider]:
+    """
+    Returns a list of provider classes based on the given string list.
+    """
+    strings_lowercase = [p.lower() for p in string_list]
+    selection = [p for p in providers if str(p).lower() in strings_lowercase]
+    if not selection:
+        if string_list:
+            # List of providers specified but not found.
+            # Deliberately returning nothing instead of all to avoid unexpected behaviour.
+            logger.error(f"Providers {string_list} not found in the list of available providers.")
+            return []
+        else:
+            # No providers specified, using all
+            return providers
+    return selection
diff --git a/syncedlyrics/cli.py b/syncedlyrics/cli.py
@@ -30,8 +30,14 @@ def cli_handler():
         "-v", "--verbose", help="Use this flag to show the logs", action="store_true"
     )
     parser.add_argument(
-        "--allow-plain",
-        help="Return a plain text (not synced) lyrics if not LRC was found",
+        # Keeping --allow-plain for backwards compatibility, although --plaintext-only is more descriptive
+        "--allow-plain", "--plaintext-only",
+        help="Return plain text (not synced) lyrics",
+        action="store_true",
+    )
+    parser.add_argument(
+        "--synced-only",
+        help="Only look for synced lyrics",
         action="store_true",
     )
     parser.add_argument(
@@ -42,9 +48,11 @@ def cli_handler():
     args = parser.parse_args()
     if args.verbose:
         logging.basicConfig(level=logging.DEBUG)
+
     lrc = search(
         args.search_term,
         args.allow_plain,
+        args.synced_only,
         args.output,
         args.p,
         lang=args.lang,

diff --git a/syncedlyrics/providers/base.py b/syncedlyrics/providers/base.py
@@ -2,12 +2,15 @@
 from typing import Optional
 import logging
 
+from ..utils import Lyrics
+
 
 class TimeoutSession(requests.Session):
     def request(self, method, url, **kwargs):
-        kwargs.setdefault("timeout", (2,5))
+        kwargs.setdefault("timeout", (2, 5))
         return super().request(method, url, **kwargs)
 
+
 class LRCProvider:
     """
     Base class for all of the synced (LRC format) lyrics providers.
@@ -23,7 +26,10 @@ def __init__(self) -> None:
         self.logger = logging.getLogger(self.__class__.__name__)
         self.logger.addHandler(handler)
 
-    def get_lrc_by_id(self, track_id: str) -> Optional[str]:
+    def __str__(self) -> str:
+        return self.__class__.__name__
+
+    def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
         """
         Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
 
@@ -32,7 +38,7 @@ def get_lrc_by_id(self, track_id: str) -> Optional[str]:
         """
         raise NotImplementedError
 
-    def get_lrc(self, search_term: str) -> Optional[str]:
+    def get_lrc(self, search_term: str) -> Optional[Lyrics]:
         """
         Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
         """

diff --git a/syncedlyrics/providers/deezer.py b/syncedlyrics/providers/deezer.py
@@ -2,7 +2,7 @@
 
 from typing import Optional
 from .base import LRCProvider
-from ..utils import get_best_match
+from ..utils import Lyrics, get_best_match
 
 # Currently broken
 # TODO: Fix invalid CSRF token
@@ -31,22 +31,24 @@ def _api_call(self, method: str, json=None) -> dict:
         response = self.session.post(self.API_ENDPOINT, params=params, json=json)
         return response.json()
 
-    def get_lrc_by_id(self, track_id: str) -> Optional[str]:
+    def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
+        lrc = Lyrics()
         lrc_response = self._api_call("song.getLyrics", json={"sng_id": track_id})
         lrc_json_objs = lrc_response["results"].get("LYRICS_SYNC_JSON")
         if not lrc_json_objs:
-            # Returning the plain text lyrics
-            return lrc_response["results"].get("LYRICS_TEXT")
-        lrc = ""
+            lrc.unsynced = lrc_response["results"].get("LYRICS_TEXT")
+            return lrc
+        lrc_str = ""
         for chunk in lrc_json_objs:
             if chunk.get("lrc_timestamp") and chunk.get("line"):
-                lrc += f"{chunk['lrc_timestamp']} {chunk['line']}\n"
-        return lrc or None
+                lrc_str += f"{chunk['lrc_timestamp']} {chunk['line']}\n"
+        lrc.synced = lrc_str
+        return lrc
 
-    def get_lrc(self, search_term: str) -> Optional[str]:
+    def get_lrc(self, search_term: str) -> Optional[Lyrics]:
         url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+")
         search_results = self.session.get(url).json()
-        cmp_key = lambda t: f"{t.get('title')} {t.get('artist').get('name')}"
+        def cmp_key(t): return f"{t.get('title')} {t.get('artist').get('name')}"
         track = get_best_match(search_results.get("data", []), search_term, cmp_key)
         if not track:
             return None

diff --git a/syncedlyrics/providers/genius.py b/syncedlyrics/providers/genius.py
@@ -2,15 +2,15 @@
 
 from typing import Optional
 from .base import LRCProvider
-from ..utils import generate_bs4_soup
+from ..utils import Lyrics, generate_bs4_soup
 
 
 class Genius(LRCProvider):
     """Genius provider class"""
 
     SEARCH_ENDPOINT = "https://genius.com/api/search/multi?per_page=5&q="
 
-    def get_lrc(self, search_term: str) -> Optional[str]:
+    def get_lrc(self, search_term: str) -> Optional[Lyrics]:
         params = {"q": search_term, "per_page": 5}
         cookies = {
             "obuid": "e3ee67e0-7df9-4181-8324-d977c6dc9250",
@@ -27,7 +27,9 @@ def get_lrc(self, search_term: str) -> Optional[str]:
         els = soup.find_all("div", attrs={"data-lyrics-container": True})
         if not els:
             return None
-        lrc = ""
+        lrc_str = ""
         for el in els:
-            lrc += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[")
+            lrc_str += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[")
+        lrc = Lyrics()
+        lrc.unsynced = lrc_str
         return lrc
diff --git a/syncedlyrics/providers/lrclib.py b/syncedlyrics/providers/lrclib.py
@@ -2,7 +2,7 @@
 
 from typing import Optional
 from .base import LRCProvider
-from ..utils import sort_results
+from ..utils import Lyrics, sort_results
 
 
 class Lrclib(LRCProvider):
@@ -16,15 +16,18 @@ class Lrclib(LRCProvider):
     def __init__(self) -> None:
         super().__init__()
 
-    def get_lrc_by_id(self, track_id: str) -> Optional[str]:
+    def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
         url = self.LRC_ENDPOINT + track_id
         r = self.session.get(url)
         if not r.ok:
             return None
         track = r.json()
-        return track.get("syncedLyrics", track.get("plainLyrics"))
+        lrc = Lyrics()
+        lrc.synced = track.get("syncedLyrics")
+        lrc.unsynced = track.get("plainLyrics")
+        return lrc
 
-    def get_lrc(self, search_term: str) -> Optional[str]:
+    def get_lrc(self, search_term: str) -> Optional[Lyrics]:
         url = self.SEARCH_ENDPOINT
         r = self.session.get(url, params={"q": search_term})
         if not r.ok:

diff --git a/syncedlyrics/providers/lyricsify.py b/syncedlyrics/providers/lyricsify.py
@@ -3,7 +3,7 @@
 from typing import Optional
 from bs4 import SoupStrainer
 from .base import LRCProvider
-from ..utils import generate_bs4_soup, get_best_match
+from ..utils import Lyrics, generate_bs4_soup, get_best_match
 
 # Currently broken
 # TODO: Bypassing Cloudflare anti-bot system
@@ -19,16 +19,19 @@ def __init__(self) -> None:
         super().__init__()
         self.parser = "html.parser"
 
-    def get_lrc(self, search_term: str) -> Optional[str]:
+    def get_lrc(self, search_term: str) -> Optional[Lyrics]:
         url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+")
-        href_match = lambda h: h.startswith("/lyric/")
+        def href_match(h): return h.startswith("/lyric/")
         a_tags_boud = SoupStrainer("a", href=href_match)
         soup = generate_bs4_soup(self.session, url, parse_only=a_tags_boud)
-        cmp_key = lambda t: t.get_text().lower().replace("-", "")
+        def cmp_key(t): return t.get_text().lower().replace("-", "")
         a_tag = get_best_match(soup.find_all("a"), search_term, cmp_key)
         if not a_tag:
             return None
         # Scraping from the LRC page
         lrc_id = a_tag["href"].split(".")[-1]
         soup = generate_bs4_soup(self.session, self.ROOT_URL + a_tag["href"])
-        return soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text()
+        lrc_str = soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text()
+        lrc = Lyrics()
+        lrc.add_unknown(lrc_str)
+        return lrc
diff --git a/syncedlyrics/providers/megalobiz.py b/syncedlyrics/providers/megalobiz.py
@@ -3,7 +3,7 @@
 from typing import Optional
 from bs4 import SoupStrainer
 from .base import LRCProvider
-from ..utils import generate_bs4_soup, get_best_match
+from ..utils import Lyrics, generate_bs4_soup, get_best_match
 
 
 class Megalobiz(LRCProvider):
@@ -12,7 +12,7 @@ class Megalobiz(LRCProvider):
     ROOT_URL = "https://www.megalobiz.com"
     SEARCH_ENDPOINT = ROOT_URL + "/search/all?qry={q}&searchButton.x=0&searchButton.y=0"
 
-    def get_lrc(self, search_term: str) -> Optional[str]:
+    def get_lrc(self, search_term: str) -> Optional[Lyrics]:
         url = self.SEARCH_ENDPOINT.format(q=search_term.replace(" ", "+"))
 
         def href_match(h: Optional[str]):
@@ -35,4 +35,7 @@ def a_text(a):
         # Scraping from the LRC page
         lrc_id = a_tag["href"].split(".")[-1]
         soup = generate_bs4_soup(self.session, self.ROOT_URL + a_tag["href"])
-        return soup.find("div", {"id": f"lrc_{lrc_id}_details"}).get_text()
+        lrc_str = soup.find("div", {"id": f"lrc_{lrc_id}_details"}).get_text()
+        lrc = Lyrics()
+        lrc.add_unknown(lrc_str)
+        return lrc