Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VidLii] Add 720p support (yt-dlp backport-ish) #30924

Merged
merged 12 commits into from
Jun 11, 2024
57 changes: 50 additions & 7 deletions youtube_dl/extractor/vidlii.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import re

from .common import InfoExtractor

from ..utils import (
ExtractorError,
dirkf marked this conversation as resolved.
Show resolved Hide resolved
float_or_none,
get_element_by_id,
int_or_none,
Expand Down Expand Up @@ -35,6 +37,25 @@ class VidLiiIE(InfoExtractor):
'categories': ['News & Politics'],
'tags': ['Vidlii', 'Jan', 'Videogames'],
}
}, { # HD
dirkf marked this conversation as resolved.
Show resolved Hide resolved
'url': 'https://www.vidlii.com/watch?v=2Ng8Abj2Fkl',
'md5': '450e7da379c884788c3a4fa02a3ce1a4',
'info_dict': {
'id': '2Ng8Abj2Fkl',
'ext': 'mp4',
'title': 'test',
'description': 'md5:cc55a86032a7b6b3cbfd0f6b155b52e9',
'thumbnail': 'https://www.vidlii.com/usfi/thmp/2Ng8Abj2Fkl.jpg',
'uploader': 'VidLii',
'uploader_url': 'https://www.vidlii.com/user/VidLii',
'upload_date': '20200927',
'duration': 5,
'view_count': int,
'comment_count': int,
'average_rating': float,
'categories': ['Film & Animation'],
'tags': ['1', '2'],
mrpapersonic marked this conversation as resolved.
Show resolved Hide resolved
},
}, {
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
'only_matching': True,
Expand All @@ -46,9 +67,31 @@ def _real_extract(self, url):
webpage = self._download_webpage(
'https://www.vidlii.com/watch?v=%s' % video_id, video_id)

video_url = self._search_regex(
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
'video url', group='url')
formats = []

def add_format(format_url, height=None):
height = int(self._search_regex(r"(\d+).mp4",
format_url, "height", default=360))
mrpapersonic marked this conversation as resolved.
Show resolved Hide resolved

formats.append({
'url': format_url,
'format_id': '%dp' % height if height else None,
'height': height,
})

sources = re.findall(
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
mrpapersonic marked this conversation as resolved.
Show resolved Hide resolved
webpage)

try:
self._request_webpage(sources[1][1], video_id, 'Checking HD URL')
add_format(sources[1][1])
except ExtractorError:
pass

add_format(sources[0][1])
mrpapersonic marked this conversation as resolved.
Show resolved Hide resolved

self._sort_formats(formats)

title = self._search_regex(
mrpapersonic marked this conversation as resolved.
Show resolved Hide resolved
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
Expand Down Expand Up @@ -83,9 +126,9 @@ def _real_extract(self, url):
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))

view_count = int_or_none(self._search_regex(
(r'<strong>(\d+)</strong> views',
r'Views\s*:\s*<strong>(\d+)</strong>'),
webpage, 'view count', fatal=False))
(r'<strong>([\d,]+)</strong> views',
r'Views\s*:\s*<strong>([\d,]+)</strong>'),
webpage, 'view count', fatal=False).replace(",", ""))
mrpapersonic marked this conversation as resolved.
Show resolved Hide resolved

comment_count = int_or_none(self._search_regex(
(r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)',
Expand All @@ -109,7 +152,7 @@ def _real_extract(self, url):

return {
'id': video_id,
'url': video_url,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
Expand Down