Skip to content

Commit

Permalink
Merge pull request #47 from jmaximusix/prefer_synced
Browse files Browse the repository at this point in the history
Prefer synced lyrics, options to allow only plain/synced
  • Loading branch information
moehmeni authored Jun 11, 2024
2 parents 738f015 + c7170e0 commit e82d6ec
Show file tree
Hide file tree
Showing 16 changed files with 235 additions and 118 deletions.
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,21 @@ pip install syncedlyrics
syncedlyrics "SEARCH_TERM"
```


By default, this will prefer time synced lyrics, but use plaintext lyrics, if no synced lyrics are available.
To only allow one type of lyrics specify `--plaintext-only` or `--synced-only` respectively


#### Available Options
| Flag | Description |
| --- | --- |
| `-o` | Path to save `.lrc` lyrics, default="{search_term}.lrc" |
| `-p` | Space-separated list of [providers](#providers) to include in searching |
| `-l` | Language code of the translation ([ISO 639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format) |
| `-v` | Use this flag to show the logs |
| `--allow-plain` | Return a plain text (not synced) lyrics if no LRC format was found |
| `--enhanced` | Return an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format
| `--allow-plain`, `--plaintext-only` | Return plain text (not synced) lyrics |
| `--synced-only` | Only look for synced lyrics
| `--enhanced` | Searches for an [Enhanced](https://en.wikipedia.org/wiki/LRC_(file_format)#A2_extension:_word_time_tag) (word-level karaoke) format. If it isn't available, search for regular synced lyrics.

### Python
```py
Expand All @@ -40,7 +46,7 @@ syncedlyrics.search("...", enhanced=True)

## Providers
- [Musixmatch](https://www.musixmatch.com/)
- [Deezer](https://deezer.com/)
- ~~[Deezer](https://deezer.com/)~~ (Currently not working anymore)
- [Lrclib](https://github.com/tranxuanthang/lrcget/issues/2#issuecomment-1326925928)
- [NetEase](https://music.163.com/)
- [Megalobiz](https://www.megalobiz.com/)
Expand Down
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ rapidfuzz = "^3.6.2"
syncedlyrics = "syncedlyrics.cli:cli_handler"

[tool.poetry.group.dev.dependencies]
black = "^24.2.0"
pytest = "^8.0.2"
black = "^24.4.2"

[build-system]
requires = ["poetry-core"]
Expand Down
91 changes: 55 additions & 36 deletions syncedlyrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,79 +11,98 @@
from typing import List, Optional

from .providers import Deezer, Lrclib, Musixmatch, NetEase, Megalobiz, Genius
from .utils import is_lrc_valid, save_lrc_file
from .utils import Lyrics, TargetType
from .providers.base import LRCProvider

logger = logging.getLogger(__name__)


def search(
search_term: str,
allow_plain_format: bool = False,
synced_only: bool = False,
save_path: Optional[str] = None,
providers: Optional[List[str]] = None,
providers: List[str] = [],
lang: Optional[str] = None,
enhanced: bool = False,
) -> Optional[str]:
"""
Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
### Arguments
- `search_term`: The search term to find the track
- `allow_plain_format`: Return a plain text (not synced) lyrics if not LRC was found
- `allow_plain`: Return plain text (not synced) lyrics
- `synced_only`: Only look for synced lyrics
- `save_path`: Path to save `.lrc` lyrics. No saving if `None`
- `providers`: A list of provider names to include in searching; loops over all the providers as soon as an LRC is found
- `lang`: Language of the translation along with the lyrics. **Only supported by Musixmatch**
- `enhanced`: Returns word by word synced lyrics if available. **Only supported by Musixmatch**
"""
if allow_plain_format and synced_only:
logger.error("--allow-plain and --synced-only flags cannot be used together.")
return None
target_type = TargetType.PREFER_SYNCED
if allow_plain_format:
target_type = TargetType.PLAINTEXT
elif synced_only:
target_type = TargetType.SYNCED_ONLY
lrc = Lyrics()

_providers = [
Musixmatch(lang=lang, enhanced=enhanced),
Lrclib(),
Deezer(),
# Deezer(),
NetEase(),
Megalobiz(),
Genius(),
]
if providers and any(providers):
# Filtering the providers
_providers = [
p
for p in _providers
if p.__class__.__name__.lower() in [p.lower() for p in providers]
]
if not _providers:
logger.error(
f"Providers {providers} not found in the list of available providers."
)
return None
lrc = None
for provider in _providers:
logger.debug(f"Looking for an LRC on {provider.__class__.__name__}")

for provider in _select_providers(_providers, providers):
logger.debug(f"Looking for an LRC on {provider}")
try:
_l = provider.get_lrc(search_term)
lrc.update(provider.get_lrc(search_term))
except Exception as e:
logger.error(
f"An error occurred while searching for an LRC on {provider.__class__.__name__}"
)
logger.error(f"An error occurred while searching for an LRC on {provider}")
logger.error(e)
if lang:
logger.error("Aborting, since `lang` is only supported by Musixmatch")
continue
if enhanced and not _l:
# Since enhanced is only supported by Musixmatch, break if no LRC is found
if lrc.is_preferred(target_type):
logger.info(f'Lyrics found for "{search_term}" on {provider}')
break
check_translation = lang is not None and isinstance(provider, Musixmatch)
if is_lrc_valid(_l, allow_plain_format, check_translation):
elif lrc.is_acceptable(target_type):
logger.info(
f'synced-lyrics found for "{search_term}" on {provider.__class__.__name__}'
f"Found plaintext lyrics on {provider}, but continuing search for synced lyrics"
)
lrc = _l
break
else:
logger.debug(
f"Skip {provider.__class__.__name__} as the synced-lyrics is not valid. (allow_plain_format={allow_plain_format})"
f"No suitable lyrics found on {provider}, continuing search..."
)
logger.debug(f"Lyrics: {_l}")
if not lrc:
logger.info(f'No synced-lyrics found for "{search_term}" :(')
if not lrc.is_acceptable(target_type):
logger.info(f'No suitable lyrics found for "{search_term}" :(')
return None
if save_path:
save_path = save_path.format(search_term=search_term)
save_lrc_file(save_path, lrc)
return lrc
lrc.save_lrc_file(save_path, target_type)
return lrc.to_str(target_type)


def _select_providers(
providers: List[LRCProvider], string_list: List[str]
) -> List[LRCProvider]:
"""
Returns a list of provider classes based on the given string list.
"""
strings_lowercase = [p.lower() for p in string_list]
selection = [p for p in providers if str(p).lower() in strings_lowercase]
if not selection:
if string_list:
# List of providers specified but not found.
# Deliberately returning nothing instead of all to avoid unexpected behaviour.
logger.error(
f"Providers {string_list} not found in the list of available providers."
)
return []
else:
# No providers specified, using all
return providers
return selection
11 changes: 10 additions & 1 deletion syncedlyrics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ def cli_handler():
"-v", "--verbose", help="Use this flag to show the logs", action="store_true"
)
parser.add_argument(
# Keeping --allow-plain for backwards compatibility, although --plaintext-only is more descriptive
"--allow-plain",
help="Return a plain text (not synced) lyrics if not LRC was found",
"--plaintext-only",
help="Return plain text (not synced) lyrics",
action="store_true",
)
parser.add_argument(
"--synced-only",
help="Only look for synced lyrics",
action="store_true",
)
parser.add_argument(
Expand All @@ -42,9 +49,11 @@ def cli_handler():
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)

lrc = search(
args.search_term,
args.allow_plain,
args.synced_only,
args.output,
args.p,
lang=args.lang,
Expand Down
12 changes: 9 additions & 3 deletions syncedlyrics/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
from typing import Optional
import logging

from ..utils import Lyrics


class TimeoutSession(requests.Session):
def request(self, method, url, **kwargs):
kwargs.setdefault("timeout", (2,5))
kwargs.setdefault("timeout", (2, 10))
return super().request(method, url, **kwargs)


class LRCProvider:
"""
Base class for all of the synced (LRC format) lyrics providers.
Expand All @@ -23,7 +26,10 @@ def __init__(self) -> None:
self.logger = logging.getLogger(self.__class__.__name__)
self.logger.addHandler(handler)

def get_lrc_by_id(self, track_id: str) -> Optional[str]:
def __str__(self) -> str:
return self.__class__.__name__

def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
"""
Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
Expand All @@ -32,7 +38,7 @@ def get_lrc_by_id(self, track_id: str) -> Optional[str]:
"""
raise NotImplementedError

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
"""
Returns the synced lyrics of the song in [LRC](https://en.wikipedia.org/wiki/LRC_(file_format)) format if found.
"""
Expand Down
18 changes: 10 additions & 8 deletions syncedlyrics/providers/deezer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Optional
from .base import LRCProvider
from ..utils import get_best_match
from ..utils import Lyrics, get_best_match

# Currently broken
# TODO: Fix invalid CSRF token
Expand Down Expand Up @@ -31,19 +31,21 @@ def _api_call(self, method: str, json=None) -> dict:
response = self.session.post(self.API_ENDPOINT, params=params, json=json)
return response.json()

def get_lrc_by_id(self, track_id: str) -> Optional[str]:
def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
lrc = Lyrics()
lrc_response = self._api_call("song.getLyrics", json={"sng_id": track_id})
lrc_json_objs = lrc_response["results"].get("LYRICS_SYNC_JSON")
if not lrc_json_objs:
# Returning the plain text lyrics
return lrc_response["results"].get("LYRICS_TEXT")
lrc = ""
lrc.unsynced = lrc_response["results"].get("LYRICS_TEXT")
return lrc
lrc_str = ""
for chunk in lrc_json_objs:
if chunk.get("lrc_timestamp") and chunk.get("line"):
lrc += f"{chunk['lrc_timestamp']} {chunk['line']}\n"
return lrc or None
lrc_str += f"{chunk['lrc_timestamp']} {chunk['line']}\n"
lrc.synced = lrc_str
return lrc

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+")
search_results = self.session.get(url).json()
cmp_key = lambda t: f"{t.get('title')} {t.get('artist').get('name')}"
Expand Down
10 changes: 6 additions & 4 deletions syncedlyrics/providers/genius.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

from typing import Optional
from .base import LRCProvider
from ..utils import generate_bs4_soup
from ..utils import Lyrics, generate_bs4_soup


class Genius(LRCProvider):
"""Genius provider class"""

SEARCH_ENDPOINT = "https://genius.com/api/search/multi?per_page=5&q="

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
params = {"q": search_term, "per_page": 5}
cookies = {
"obuid": "e3ee67e0-7df9-4181-8324-d977c6dc9250",
Expand All @@ -27,7 +27,9 @@ def get_lrc(self, search_term: str) -> Optional[str]:
els = soup.find_all("div", attrs={"data-lyrics-container": True})
if not els:
return None
lrc = ""
lrc_str = ""
for el in els:
lrc += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[")
lrc_str += el.get_text(separator="\n", strip=True).replace("\n[", "\n\n[")
lrc = Lyrics()
lrc.unsynced = lrc_str
return lrc
11 changes: 7 additions & 4 deletions syncedlyrics/providers/lrclib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Optional
from .base import LRCProvider
from ..utils import sort_results
from ..utils import Lyrics, sort_results


class Lrclib(LRCProvider):
Expand All @@ -16,15 +16,18 @@ class Lrclib(LRCProvider):
def __init__(self) -> None:
super().__init__()

def get_lrc_by_id(self, track_id: str) -> Optional[str]:
def get_lrc_by_id(self, track_id: str) -> Optional[Lyrics]:
url = self.LRC_ENDPOINT + track_id
r = self.session.get(url)
if not r.ok:
return None
track = r.json()
return track.get("syncedLyrics", track.get("plainLyrics"))
lrc = Lyrics()
lrc.synced = track.get("syncedLyrics")
lrc.unsynced = track.get("plainLyrics")
return lrc

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
url = self.SEARCH_ENDPOINT
r = self.session.get(url, params={"q": search_term})
if not r.ok:
Expand Down
9 changes: 6 additions & 3 deletions syncedlyrics/providers/lyricsify.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Optional
from bs4 import SoupStrainer
from .base import LRCProvider
from ..utils import generate_bs4_soup, get_best_match
from ..utils import Lyrics, generate_bs4_soup, get_best_match

# Currently broken
# TODO: Bypassing Cloudflare anti-bot system
Expand All @@ -19,7 +19,7 @@ def __init__(self) -> None:
super().__init__()
self.parser = "html.parser"

def get_lrc(self, search_term: str) -> Optional[str]:
def get_lrc(self, search_term: str) -> Optional[Lyrics]:
url = self.SEARCH_ENDPOINT + search_term.replace(" ", "+")
href_match = lambda h: h.startswith("/lyric/")
a_tags_boud = SoupStrainer("a", href=href_match)
Expand All @@ -31,4 +31,7 @@ def get_lrc(self, search_term: str) -> Optional[str]:
# Scraping from the LRC page
lrc_id = a_tag["href"].split(".")[-1]
soup = generate_bs4_soup(self.session, self.ROOT_URL + a_tag["href"])
return soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text()
lrc_str = soup.find("div", {"id": f"lyrics_{lrc_id}_details"}).get_text()
lrc = Lyrics()
lrc.add_unknown(lrc_str)
return lrc
Loading

0 comments on commit e82d6ec

Please sign in to comment.