Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into trial
Browse files Browse the repository at this point in the history
  • Loading branch information
kclauhk authored Feb 19, 2024
2 parents 21c8101 + 4f04347 commit ca94e59
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 18 deletions.
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ yt-dlp.tar.gz: all
--exclude '__pycache__' \
--exclude '.pytest_cache' \
--exclude '.git' \
--exclude '__pyinstaller' \
-- \
README.md supportedsites.md Changelog.md LICENSE \
CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \
Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ include = [
"/setup.cfg",
"/supportedsites.md",
]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = [
"/yt_dlp/extractor/lazy_extractors.py",
"/completions",
Expand All @@ -105,7 +104,6 @@ artifacts = [

[tool.hatch.build.targets.wheel]
packages = ["yt_dlp"]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]

[tool.hatch.build.targets.wheel.shared-data]
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/__pyinstaller/hook-yt_dlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ def get_hidden_imports():
hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}')

excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts']
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
1 change: 1 addition & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@
from .filmweb import FilmwebIE
from .firsttv import FirstTVIE
from .fivetv import FiveTVIE
from .flextv import FlexTVIE
from .flickr import FlickrIE
from .floatplane import (
FloatplaneIE,
Expand Down
62 changes: 62 additions & 0 deletions yt_dlp/extractor/flextv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
UserNotLive,
parse_iso8601,
str_or_none,
traverse_obj,
url_or_none,
)


class FlexTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
_TESTS = [{
'url': 'https://www.flextv.co.kr/channels/231638/live',
'info_dict': {
'id': '231638',
'ext': 'mp4',
'title': r're:^214하나만\.\.\. ',
'thumbnail': r're:^https?://.+\.jpg',
'upload_date': r're:\d{8}',
'timestamp': int,
'live_status': 'is_live',
'channel': 'Hi별',
'channel_id': '244396',
},
'skip': 'The channel is offline',
}, {
'url': 'https://www.flextv.co.kr/channels/746/live',
'only_matching': True,
}]

def _real_extract(self, url):
channel_id = self._match_id(url)

try:
stream_data = self._download_json(
f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
channel_id, query={'option': 'all'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise UserNotLive(video_id=channel_id)
raise

playlist_url = stream_data['sources'][0]['url']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playlist_url, channel_id, 'mp4')

return {
'id': channel_id,
'formats': formats,
'subtitles': subtitles,
'is_live': True,
**traverse_obj(stream_data, {
'title': ('stream', 'title', {str}),
'timestamp': ('stream', 'createdAt', {parse_iso8601}),
'thumbnail': ('thumbUrl', {url_or_none}),
'channel': ('owner', 'name', {str}),
'channel_id': ('owner', 'id', {str_or_none}),
}),
}
55 changes: 41 additions & 14 deletions yt_dlp/extractor/nhk.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
join_nonempty,
parse_duration,
traverse_obj,
try_call,
unescapeHTML,
unified_timestamp,
url_or_none,
Expand Down Expand Up @@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
_TESTS = [{
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
'skip': 'Episode expired on 2023-04-16',
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
'skip': 'Episode expired on 2024-02-24',
'info_dict': {
'channel': 'NHK-FM',
'uploader': 'NHK-FM',
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
'id': '0449_01_3926210',
'ext': 'm4a',
'id': '0449_01_3853544',
'series': 'ジャズ・トゥナイト',
'uploader': 'NHK-FM',
'channel': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
'timestamp': 1680969600,
'title': 'ジャズ・トゥナイト NEWジャズ特集',
'upload_date': '20230408',
'release_timestamp': 1680962400,
'release_date': '20230408',
'was_live': True,
'release_date': '20240217',
'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
'timestamp': 1708185600,
'release_timestamp': 1708178400,
'upload_date': '20240217',
},
}, {
# playlist, airs every weekday so it should _hopefully_ be okay forever
Expand Down Expand Up @@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
'series': 'らじる文庫 by ラジオ深夜便 ',
'release_timestamp': 1481126700,
'upload_date': '20211101',
}
},
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
}, {
# news
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
Expand All @@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
},
}]

_API_URL_TMPL = None

def _extract_extended_description(self, episode_id, episode):
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
detail_url = try_call(
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
if not detail_url:
return

full_meta = traverse_obj(
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
('list', service, 0, {dict})) or {}
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)

def _extract_episode_info(self, headline, programme_id, series_meta):
episode_id = f'{programme_id}_{headline["headline_id"]}'
episode = traverse_obj(headline, ('file_list', 0, {dict}))
description = self._extract_extended_description(episode_id, episode)
if not description:
self.report_warning('Failed to get extended description, falling back to summary')
description = traverse_obj(episode, ('file_title_sub', {str}))

return {
**series_meta,
Expand All @@ -551,14 +571,21 @@ def _extract_episode_info(self, headline, programme_id, series_meta):
'was_live': True,
'series': series_meta.get('title'),
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
'description': description,
**traverse_obj(episode, {
'title': 'file_title',
'description': 'file_title_sub',
'timestamp': ('open_time', {unified_timestamp}),
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
}),
}

def _real_initialize(self):
if self._API_URL_TMPL:
return
api_config = self._download_xml(
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')

def _real_extract(self, url):
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
programme_id = f'{site_id}_{corner_id}'
Expand Down

0 comments on commit ca94e59

Please sign in to comment.