From 696d9f2fc6bf0df0ae333dbf4b45bd10e59a7b75 Mon Sep 17 00:00:00 2001 From: Wolfgang Frisch Date: Wed, 24 Jan 2024 12:33:30 +0100 Subject: [PATCH] Add job option `ignore_incomplete_reads`. Sometimes web servers return incomplete responses, triggering an `InvalidChunkLength` exception in urlwatch. Enable this job option to ignore these errors. https://github.com/thp/urlwatch/issues/725 --- lib/urlwatch/jobs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/urlwatch/jobs.py b/lib/urlwatch/jobs.py index f4db8217..073d42e0 100644 --- a/lib/urlwatch/jobs.py +++ b/lib/urlwatch/jobs.py @@ -260,7 +260,7 @@ class UrlJob(Job): __required__ = ('url',) __optional__ = ('cookies', 'data', 'method', 'ssl_no_verify', 'ignore_cached', 'http_proxy', 'https_proxy', 'headers', 'ignore_connection_errors', 'ignore_http_error_codes', 'encoding', 'timeout', - 'ignore_timeout_errors', 'ignore_too_many_redirects') + 'ignore_timeout_errors', 'ignore_too_many_redirects', 'ignore_incomplete_reads') CHARSET_RE = re.compile('text/(html|plain); charset=([^;]*)') @@ -388,6 +388,8 @@ def ignore_error(self, exception): return True if isinstance(exception, requests.exceptions.TooManyRedirects) and self.ignore_too_many_redirects: return True + if isinstance(exception, requests.exceptions.ChunkedEncodingError) and self.ignore_incomplete_reads: + return True elif isinstance(exception, requests.exceptions.HTTPError): status_code = exception.response.status_code ignored_codes = []