diff --git a/README.md b/README.md index 27a54ef68b..10a783abf8 100644 --- a/README.md +++ b/README.md @@ -1863,6 +1863,7 @@ The following extractors use this feature: * `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_downgraded`, and `tv_simply`. By default, `android_vr,web,web_safari` is used. If no JavaScript runtime/engine is available, then only `android_vr` is used. If logged-in cookies are passed to yt-dlp, then `tv_downgraded,web,web_safari` is used for free accounts and `tv_downgraded,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only successfully works around the age-restriction sometimes (e.g. if the video is embeddable), and may be added as a fallback if `android_vr` is unable to access a video. The `web_creator` client is added for age-restricted videos if account age-verification is required. Some clients, such as `web_creator` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details * `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. Using these will not skip any network requests, and in some cases will result in additional network requests. Currently, the default is `player_response`; however, typically these are for testing purposes only +* `webpage_client`: Client to use for the video webpage request. One of `web` or `web_safari` (default) * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_js_variant`: The player javascript variant to use for n/sig deciphering. The known variants are: `main`, `tcc`, `tce`, `es5`, `es6`, `es6_tcc`, `es6_tce`, `tv`, `tv_es6`, `phone`, `house`. The default is `tv`, and the others are for debugging purposes. You can use `actual` to go with what is prescribed by the site * `player_js_version`: The player javascript version to use for n/sig deciphering, in the format of `signature_timestamp@hash` (e.g. `20348@0004de42`). Currently, the default is to force `20514@9f4cc5e4`. You can use `actual` to go with what is prescribed by the site diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index c652525935..d3e4ab1176 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -957,15 +957,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor): url = { 'mweb': 'https://m.youtube.com', 'web': 'https://www.youtube.com', + 'web_safari': 'https://www.youtube.com', 'web_music': 'https://music.youtube.com', + 'web_creator': 'https://studio.youtube.com', 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1', 'tv': 'https://www.youtube.com/tv', }.get(client) if not url: return {} + + default_ytcfg = self._get_default_ytcfg(client) + + if default_ytcfg['REQUIRE_AUTH'] and not self.is_authenticated: + return {} + webpage = self._download_webpage_with_retries( url, video_id, note=f'Downloading {client.replace("_", " ").strip()} client config', - headers=traverse_obj(self._get_default_ytcfg(client), { + headers=traverse_obj(default_ytcfg, { 'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}), 'Referer': ('INNERTUBE_CONTEXT', 'thirdParty', 'embedUrl', {str}), })) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 49b8b06eeb..1ebef33676 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -140,11 +140,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): _RETURN_TYPE = 'video' # XXX: How to handle multifeed? _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt') - _DEFAULT_CLIENTS = ('android_vr', 'web', 'web_safari') + _DEFAULT_CLIENTS = ('android_vr', 'web_safari') _DEFAULT_JSLESS_CLIENTS = ('android_vr',) - _DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web', 'web_safari') + _DEFAULT_AUTHED_CLIENTS = ('tv_downgraded', 'web_safari') # Premium does not require POT (except for subtitles) - _DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator', 'web') + _DEFAULT_PREMIUM_CLIENTS = ('tv_downgraded', 'web_creator') + _WEBPAGE_CLIENTS = ('web', 'web_safari') + _DEFAULT_WEBPAGE_CLIENT = 'web_safari' _GEO_BYPASS = False @@ -2936,7 +2938,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # This can be detected with the embeds_enable_encrypted_host_flags_enforcement experiemnt flag, # but there is no harm in including encryptedHostFlags with all web_embedded player requests. encrypted_context = None - if client == 'web_embedded': + if _split_innertube_client(client)[2] == 'embedded': encrypted_context = traverse_obj(player_ytcfg, ( 'WEB_PLAYER_CONTEXT_CONFIGS', 'WEB_PLAYER_CONTEXT_CONFIG_ID_EMBEDDED_PLAYER', 'encryptedHostFlags')) @@ -3894,7 +3896,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): base_url = self.http_scheme() + '//www.youtube.com/' webpage_url = base_url + 'watch?v=' + video_id - webpage_client = 'web' + webpage_client = self._configuration_arg('webpage_client', [self._DEFAULT_WEBPAGE_CLIENT])[0] + if webpage_client not in self._WEBPAGE_CLIENTS: + self.report_warning( + f'Invalid webpage_client "{webpage_client}" requested; ' + f'falling back to {self._DEFAULT_WEBPAGE_CLIENT}', only_once=True) + webpage_client = self._DEFAULT_WEBPAGE_CLIENT webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url = self._initial_extract( url, smuggled_data, webpage_url, webpage_client, video_id)