Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefer synced lyrics, options to allow only plain/synced #47

Merged
merged 8 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,21 @@ pip install syncedlyrics
syncedlyrics "SEARCH_TERM"
```


By default, this will prefer time synced lyrics, but use plaintext lyrics, if no synced lyrics are available.
To only allow one type of lyrics specify `--plaintext-only` or `--synced-only` respectively


#### Available Options
| Flag | Description |
| --- | --- |
| `-o` | Path to save `.lrc` lyrics, default="{search_term}.lrc" |
| `-p` | Space-separated list of [providers](#providers) to include in searching |
| `-l` | Language code of the translation ([ISO 639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format) |
| `-v` | Use this flag to show the logs |
| `--allow-plain` | Return a plain text (not synced) lyrics if no LRC format was found |
| `--enhanced` | Return an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format
| `--allow-plain`, `--plaintext-only` | Return plain text (not synced) lyrics |
| `--synced-only` | Only look for synced lyrics
| `--enhanced` | Searches for an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format. If it isn't available, search for regular synced lyrics.

### Python
```py
Expand Down
87 changes: 50 additions & 37 deletions syncedlyrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,42 @@
from typing import List, Optional

from .providers import Deezer, Lrclib, Musixmatch, NetEase, Megalobiz, Genius
from .utils import is_lrc_valid, save_lrc_file
from .utils import Lyrics, TargetType
from .providers.base import LRCProvider

logger = logging.getLogger(__name__)


def search(
search_term: str,
allow_plain_format: bool = False,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think introducing a class as the response of the search function will break the compatibility anyway. So allow plain would be no longer necessary instead we can replace it with plaintext_only.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well yeah, I was unsure how you feel about it, I also think it would be cleaner to return the class, rename the "allow_plaintext" to "plaintext_only", even though it breaks compatibility.
However in the current form I tried not to break compatibility and therefore made the search function still return a string (selecting from the Lyrics class according to the preference). If you want I can quickly change that.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I thought you returned the Lyrics instance . In that case I think string is simpler and doesn't break anything so all good no change needed.

synced_only: bool = False,
save_path: Optional[str] = None,
providers: Optional[List[str]] = None,
providers: List[str] = [],
lang: Optional[str] = None,
enhanced: bool = False,
) -> Optional[str]:
"""
Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
### Arguments
- `search_term`: The search term to find the track
- `allow_plain_format`: Return a plain text (not synced) lyrics if not LRC was found
- `allow_plain`: Return plain text (not synced) lyrics
- `synced_only`: Only look for synced lyrics
- `save_path`: Path to save `.lrc` lyrics. No saving if `None`
- `providers`: A list of provider names to include in searching; loops over all the providers as soon as an LRC is found
- `lang`: Language of the translation along with the lyrics. **Only supported by Musixmatch**
- `enhanced`: Returns word by word synced lyrics if available. **Only supported by Musixmatch**
"""
if allow_plain_format and synced_only:
logger.error("--allow-plain and --synced-only flags cannot be used together.")
return None
target_type = TargetType.PREFER_SYNCED
if allow_plain_format:
target_type = TargetType.PLAINTEXT
elif synced_only:
target_type = TargetType.SYNCED_ONLY
lrc = Lyrics()

_providers = [
Musixmatch(lang=lang, enhanced=enhanced),
Lrclib(),
Expand All @@ -42,48 +55,48 @@ def search(
Megalobiz(),
Genius(),
]
if providers and any(providers):
# Filtering the providers
_providers = [
p
for p in _providers
if p.__class__.__name__.lower() in [p.lower() for p in providers]
]
if not _providers:
logger.error(
f"Providers {providers} not found in the list of available providers."
)
return None
lrc = None
for provider in _providers:
logger.debug(f"Looking for an LRC on {provider.__class__.__name__}")

for provider in _select_providers(_providers, providers):
logger.debug(f"Looking for an LRC on {provider}")
try:
_l = provider.get_lrc(search_term)
lrc.update(provider.get_lrc(search_term))
except Exception as e:
logger.error(
f"An error occurred while searching for an LRC on {provider.__class__.__name__}"
f"An error occurred while searching for an LRC on {provider}"
)
logger.error(e)
if lang:
logger.error("Aborting, since `lang` is only supported by Musixmatch")
continue
if enhanced and not _l:
# Since enhanced is only supported by Musixmatch, break if no LRC is found
break
check_translation = lang is not None and isinstance(provider, Musixmatch)
if is_lrc_valid(_l, allow_plain_format, check_translation):
logger.info(
f'synced-lyrics found for "{search_term}" on {provider.__class__.__name__}'
)
lrc = _l
if lrc.is_preferred(target_type):
logger.info(f'Lyrics found for "{search_term}" on {provider}')
break
elif lrc.is_acceptable(target_type):
logger.info(f'Found plaintext lyrics on {provider}, but continuing search for synced lyrics')
else:
logger.debug(
f"Skip {provider.__class__.__name__} as the synced-lyrics is not valid. (allow_plain_format={allow_plain_format})"
)
logger.debug(f"Lyrics: {_l}")
if not lrc:
logger.info(f'No synced-lyrics found for "{search_term}" :(')
logger.debug(f"No suitable lyrics found on {provider}, continuing search...")
if not lrc.is_acceptable(target_type):
logger.info(f'No suitable lyrics found for "{search_term}" :(')
return None
if save_path:
save_path = save_path.format(search_term=search_term)
save_lrc_file(save_path, lrc)
return lrc
lrc.save_lrc_file(save_path, target_type)
return lrc.to_str(target_type)


def _select_providers(providers: List[LRCProvider], string_list: List[str]) -> List[LRCProvider]:
"""
Returns a list of provider classes based on the given string list.
"""
strings_lowercase = [p.lower() for p in string_list]
selection = [p for p in providers if str(p).lower() in strings_lowercase]
if not selection:
if string_list:
# List of providers specified but not found.
# Deliberately returning nothing instead of all to avoid unexpected behaviour.
logger.error(f"Providers {string_list} not found in the list of available providers.")
return []
else:
# No providers specified, using all
return providers
return selection
12 changes: 10 additions & 2 deletions syncedlyrics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,14 @@ def cli_handler():
"-v", "--verbose", help="Use this flag to show the logs", action="store_true"
)
parser.add_argument(
"--allow-plain",
help="Return a plain text (not synced) lyrics if not LRC was found",
# Keeping --allow-plain for backwards compatibility, although --plaintext-only is more descriptive
"--allow-plain", "--plaintext-only",
help="Return plain text (not synced) lyrics",
action="store_true",
)
parser.add_argument(
"--synced-only",
help="Only look for synced lyrics",
action="store_true",
)
parser.add_argument(
Expand All @@ -42,9 +48,11 @@ def cli_handler():
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)

lrc = search(
args.search_term,
args.allow_plain,
args.synced_only,
args.output,
args.p,
lang=args.lang,
Expand Down
12 changes: 9 additions & 3 deletions syncedlyrics/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
from typing import Optional
import logging

from ..utils import Lyrics


class TimeoutSession(requests.Session):
def request(self, method, url, **kwargs):
kwargs.setdefault("timeout", (2,5))
kwargs.setdefault("timeout", (2, 5))
return super().request(method, url, **kwargs)


class LRCProvider:
"""
Base class for all of the synced (LRC format) lyrics providers.
Expand All @@ -23,7 +26,10 @@ def __init__(self) -> None:
self.logger = logging.getLogger(self.__class__.__name__)
self.logger.addHandler(handler)

def get_lrc_by_id(self, track_id: str) -> Optional[str]:
def __str__(self) -> str:
return self.__class__.__name__

def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
"""
Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.

Expand All @@ -32,7 +38,7 @@ def get_lrc_by_id(self, track_id: str) -> Optional[str]:
"""
raise NotImplementedError

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
"""
Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
"""
Expand Down
20 changes: 11 additions & 9 deletions syncedlyrics/providers/deezer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Optional
from .base import LRCProvider
from ..utils import get_best_match
from ..utils import Lyrics, get_best_match

# Currently broken
# TODO: Fix invalid CSRF token
Expand Down Expand Up @@ -31,22 +31,24 @@ def _api_call(self, method: str, json=None) -> dict:
response = self.session.post(self.API_ENDPOINT, params=params, json=json)
return response.json()

def get_lrc_by_id(self, track_id: str) -> Optional[str]:
def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
lrc = Lyrics()
lrc_response = self._api_call("song.getLyrics", json={"sng_id": track_id})
lrc_json_objs = lrc_response["results"].get("LYRICS_SYNC_JSON")
if not lrc_json_objs:
# Returning the plain text lyrics
return lrc_response["results"].get("LYRICS_TEXT")
lrc = ""
lrc.unsynced = lrc_response["results"].get("LYRICS_TEXT")
return lrc
lrc_str = ""
for chunk in lrc_json_objs:
if chunk.get("lrc_timestamp") and chunk.get("line"):
lrc += f"{chunk['lrc_timestamp']} {chunk['line']}\n"
return lrc or None
lrc_str += f"{chunk['lrc_timestamp']} {chunk['line']}\n"
lrc.synced = lrc_str
return lrc

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+")
search_results = self.session.get(url).json()
cmp_key = lambda t: f"{t.get('title')} {t.get('artist').get('name')}"
def cmp_key(t): return f"{t.get('title')} {t.get('artist').get('name')}"
track = get_best_match(search_results.get("data", []), search_term, cmp_key)
if not track:
return None
Expand Down
10 changes: 6 additions & 4 deletions syncedlyrics/providers/genius.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

from typing import Optional
from .base import LRCProvider
from ..utils import generate_bs4_soup
from ..utils import Lyrics, generate_bs4_soup


class Genius(LRCProvider):
"""Genius provider class"""

SEARCH_ENDPOINT = "https://genius.com/api/search/multi?per_page=5&q="

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
params = {"q": search_term, "per_page": 5}
cookies = {
"obuid": "e3ee67e0-7df9-4181-8324-d977c6dc9250",
Expand All @@ -27,7 +27,9 @@ def get_lrc(self, search_term: str) -> Optional[str]:
els = soup.find_all("div", attrs={"data-lyrics-container": True})
if not els:
return None
lrc = ""
lrc_str = ""
for el in els:
lrc += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[")
lrc_str += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[")
lrc = Lyrics()
lrc.unsynced = lrc_str
return lrc
11 changes: 7 additions & 4 deletions syncedlyrics/providers/lrclib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Optional
from .base import LRCProvider
from ..utils import sort_results
from ..utils import Lyrics, sort_results


class Lrclib(LRCProvider):
Expand All @@ -16,15 +16,18 @@ class Lrclib(LRCProvider):
def __init__(self) -> None:
super().__init__()

def get_lrc_by_id(self, track_id: str) -> Optional[str]:
def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
url = self.LRC_ENDPOINT + track_id
r = self.session.get(url)
if not r.ok:
return None
track = r.json()
return track.get("syncedLyrics", track.get("plainLyrics"))
lrc = Lyrics()
lrc.synced = track.get("syncedLyrics")
lrc.unsynced = track.get("plainLyrics")
return lrc

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
url = self.SEARCH_ENDPOINT
r = self.session.get(url, params={"q": search_term})
if not r.ok:
Expand Down
13 changes: 8 additions & 5 deletions syncedlyrics/providers/lyricsify.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Optional
from bs4 import SoupStrainer
from .base import LRCProvider
from ..utils import generate_bs4_soup, get_best_match
from ..utils import Lyrics, generate_bs4_soup, get_best_match

# Currently broken
# TODO: Bypassing Cloudflare anti-bot system
Expand All @@ -19,16 +19,19 @@ def __init__(self) -> None:
super().__init__()
self.parser = "html.parser"

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+")
href_match = lambda h: h.startswith("/lyric/")
def href_match(h): return h.startswith("/lyric/")
a_tags_boud = SoupStrainer("a", href=href_match)
soup = generate_bs4_soup(self.session, url, parse_only=a_tags_boud)
cmp_key = lambda t: t.get_text().lower().replace("-", "")
def cmp_key(t): return t.get_text().lower().replace("-", "")
a_tag = get_best_match(soup.find_all("a"), search_term, cmp_key)
if not a_tag:
return None
# Scraping from the LRC page
lrc_id = a_tag["href"].split(".")[-1]
soup = generate_bs4_soup(self.session, self.ROOT_URL + a_tag["href"])
return soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text()
lrc_str = soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text()
lrc = Lyrics()
lrc.add_unknown(lrc_str)
return lrc
9 changes: 6 additions & 3 deletions syncedlyrics/providers/megalobiz.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Optional
from bs4 import SoupStrainer
from .base import LRCProvider
from ..utils import generate_bs4_soup, get_best_match
from ..utils import Lyrics, generate_bs4_soup, get_best_match


class Megalobiz(LRCProvider):
Expand All @@ -12,7 +12,7 @@ class Megalobiz(LRCProvider):
ROOT_URL = "https://www.megalobiz.com"
SEARCH_ENDPOINT = ROOT_URL + "/search/all?qry={q}&searchButton.x=0&searchButton.y=0"

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
url = self.SEARCH_ENDPOINT.format(q=search_term.replace(" ", "+"))

def href_match(h: Optional[str]):
Expand All @@ -35,4 +35,7 @@ def a_text(a):
# Scraping from the LRC page
lrc_id = a_tag["href"].split(".")[-1]
soup = generate_bs4_soup(self.session, self.ROOT_URL + a_tag["href"])
return soup.find("div", {"id": f"lrc_{lrc_id}_details"}).get_text()
lrc_str = soup.find("div", {"id": f"lrc_{lrc_id}_details"}).get_text()
lrc = Lyrics()
lrc.add_unknown(lrc_str)
return lrc
Loading
Loading