From a11e128a8947b6637b1e7338f0e917c3ee35fb93 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 27 Oct 2021 11:28:19 -0700 Subject: [PATCH 1/8] add code to handle missing content-type header and a test to verify that it works --- synapse/rest/media/v1/media_repository.py | 5 ++- tests/rest/media/v1/test_media_storage.py | 46 +++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index abd88a2d4f9e..aec00ed4180c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -445,7 +445,10 @@ async def _download_remote_file( await finish() - media_type = headers[b"Content-Type"][0].decode("ascii") + if b"Content-Type" in headers: + media_type = headers[b"Content-Type"][0].decode("ascii") + else: + media_type = "application/octet-stream" upload_name = get_filename_from_headers(headers) time_now_ms = self.clock.time_msec() diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 4ae00755c957..4d1ebf685c7e 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -285,6 +285,52 @@ def _req(self, content_disposition): return channel + def _req_missing_headers(self, content_disposition): + + channel = make_request( + self.reactor, + FakeSite(self.download_resource, self.reactor), + "GET", + self.media_id, + shorthand=False, + await_result=False, + ) + self.pump() + + # We've made one fetch, to example.com, using the media URL, and asking + # the other server not to do a remote fetch + self.assertEqual(len(self.fetches), 1) + self.assertEqual(self.fetches[0][1], "example.com") + self.assertEqual( + self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id + ) + self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) + + headers = { + b"Content-Length": [b"%d" % (len(self.test_image.data))], + } + if content_disposition: + headers[b"Content-Disposition"] = [content_disposition] + + self.fetches[0][0].callback( + (self.test_image.data, (len(self.test_image.data), headers)) + ) + + self.pump() + self.assertEqual(channel.code, 200) + + return channel + + def test_handle_missing_content_type(self): + channel = self._req_missing_headers( + b"inline; filename=out" + self.test_image.extension + ) + headers = channel.headers + self.assertEqual(channel.code, 200) + self.assertEqual( + headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"] + ) + def test_disposition_filename_ascii(self): """ If the filename is filename= then Synapse will decode it as an From c1ac9818e5c01011a7f37373bb3c29bac25b0423 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 27 Oct 2021 11:39:21 -0700 Subject: [PATCH 2/8] add handling for missing content-type in the /upload endpoint as well --- synapse/rest/media/v1/upload_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 7dcb1428e490..8162094cf688 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -80,7 +80,7 @@ async def _async_render_POST(self, request: SynapseRequest) -> None: assert content_type_headers # for mypy media_type = content_type_headers[0].decode("ascii") else: - raise SynapseError(msg="Upload request missing 'Content-Type'", code=400) + media_type = "application/octet-stream" # if headers.hasHeader(b"Content-Disposition"): # disposition = headers.getRawHeaders(b"Content-Disposition")[0] From c95833d6705086769ae8b602d7fc47b5298f4f60 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 27 Oct 2021 12:32:58 -0700 Subject: [PATCH 3/8] slightly refactor test code to put private method in approriate place --- tests/rest/media/v1/test_media_storage.py | 63 +++++++++++------------ 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 4d1ebf685c7e..8e9f74fca4d8 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -285,44 +285,43 @@ def _req(self, content_disposition): return channel - def _req_missing_headers(self, content_disposition): - - channel = make_request( - self.reactor, - FakeSite(self.download_resource, self.reactor), - "GET", - self.media_id, - shorthand=False, - await_result=False, - ) - self.pump() + def test_handle_missing_content_type(self): + def _req_missing_headers(content_disposition): + channel = make_request( + self.reactor, + FakeSite(self.download_resource, self.reactor), + "GET", + self.media_id, + shorthand=False, + await_result=False, + ) + self.pump() - # We've made one fetch, to example.com, using the media URL, and asking - # the other server not to do a remote fetch - self.assertEqual(len(self.fetches), 1) - self.assertEqual(self.fetches[0][1], "example.com") - self.assertEqual( - self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id - ) - self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) + # We've made one fetch, to example.com, using the media URL, and asking + # the other server not to do a remote fetch + self.assertEqual(len(self.fetches), 1) + self.assertEqual(self.fetches[0][1], "example.com") + self.assertEqual( + self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id + ) + self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) - headers = { - b"Content-Length": [b"%d" % (len(self.test_image.data))], - } - if content_disposition: - headers[b"Content-Disposition"] = [content_disposition] + headers = { + b"Content-Length": [b"%d" % (len(self.test_image.data))], + } + if content_disposition: + headers[b"Content-Disposition"] = [content_disposition] - self.fetches[0][0].callback( - (self.test_image.data, (len(self.test_image.data), headers)) - ) + self.fetches[0][0].callback( + (self.test_image.data, (len(self.test_image.data), headers)) + ) - self.pump() - self.assertEqual(channel.code, 200) + self.pump() + self.assertEqual(channel.code, 200) - return channel + return channel - def test_handle_missing_content_type(self): - channel = self._req_missing_headers( + channel = _req_missing_headers( b"inline; filename=out" + self.test_image.extension ) headers = channel.headers From 97afea572e84b6f17d832064adc681b7df3a03c2 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 27 Oct 2021 13:35:38 -0700 Subject: [PATCH 4/8] handle possible null value for content-type when pulling from the local db --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index aec00ed4180c..a7346a982e91 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -214,7 +214,7 @@ async def get_local_media( self.mark_recently_accessed(None, media_id) - media_type = media_info["media_type"] + media_type = media_info.get("media_type", "application/octet-stream") media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] url_cache = media_info["url_cache"] From 36e706ba6dbebb31f37d24b32e96ff1d43662237 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 27 Oct 2021 13:52:40 -0700 Subject: [PATCH 5/8] add changelog --- changelog.d/11200.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/11200.bugfix diff --git a/changelog.d/11200.bugfix b/changelog.d/11200.bugfix new file mode 100644 index 000000000000..e16182acb0d6 --- /dev/null +++ b/changelog.d/11200.bugfix @@ -0,0 +1 @@ +Fixes a long-standing bug wherein a missing Content-Type header when downloading remote media causes Synapse to throw an error. \ No newline at end of file From 35ab2928c8b483031c442a28cc67656bd67c6470 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Thu, 28 Oct 2021 10:25:51 -0700 Subject: [PATCH 6/8] refactor test and add code to handle missing content-type in cached remote media --- synapse/rest/media/v1/media_repository.py | 9 +++- tests/rest/media/v1/test_media_storage.py | 55 +++++------------------ 2 files changed, 20 insertions(+), 44 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a7346a982e91..b39d88fc6cb1 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -214,7 +214,9 @@ async def get_local_media( self.mark_recently_accessed(None, media_id) - media_type = media_info.get("media_type", "application/octet-stream") + media_type = media_info.get("media_type") + if not media_type: + media_type = "application/octect-stream" media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] url_cache = media_info["url_cache"] @@ -333,6 +335,9 @@ async def _get_remote_media_impl( logger.info("Media is quarantined") raise NotFoundError() + if not media_info["media_type"]: + media_info["media_type"] = "application/octect-stream" + responder = await self.media_storage.fetch_media(file_info) if responder: return responder, media_info @@ -354,6 +359,8 @@ async def _get_remote_media_impl( raise e file_id = media_info["filesystem_id"] + if not media_info["media_type"]: + media_info["media_type"] = "application/octect-stream" file_info = FileInfo(server_name, file_id) # We generate thumbnails even if another process downloaded the media diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 8e9f74fca4d8..b3cef7907463 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -248,7 +248,7 @@ def prepare(self, reactor, clock, hs): self.media_id = "example.com/12345" - def _req(self, content_disposition): + def _req(self, content_disposition, include_content_type=True): channel = make_request( self.reactor, @@ -269,10 +269,16 @@ def _req(self, content_disposition): ) self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) - headers = { - b"Content-Length": [b"%d" % (len(self.test_image.data))], - b"Content-Type": [self.test_image.content_type], - } + if include_content_type: + headers = { + b"Content-Length": [b"%d" % (len(self.test_image.data))], + b"Content-Type": [self.test_image.content_type], + } + else: + headers = { + b"Content-Length": [b"%d" % (len(self.test_image.data))], + } + if content_disposition: headers[b"Content-Disposition"] = [content_disposition] @@ -286,44 +292,7 @@ def _req(self, content_disposition): return channel def test_handle_missing_content_type(self): - def _req_missing_headers(content_disposition): - channel = make_request( - self.reactor, - FakeSite(self.download_resource, self.reactor), - "GET", - self.media_id, - shorthand=False, - await_result=False, - ) - self.pump() - - # We've made one fetch, to example.com, using the media URL, and asking - # the other server not to do a remote fetch - self.assertEqual(len(self.fetches), 1) - self.assertEqual(self.fetches[0][1], "example.com") - self.assertEqual( - self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id - ) - self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) - - headers = { - b"Content-Length": [b"%d" % (len(self.test_image.data))], - } - if content_disposition: - headers[b"Content-Disposition"] = [content_disposition] - - self.fetches[0][0].callback( - (self.test_image.data, (len(self.test_image.data), headers)) - ) - - self.pump() - self.assertEqual(channel.code, 200) - - return channel - - channel = _req_missing_headers( - b"inline; filename=out" + self.test_image.extension - ) + channel = self._req(b"inline; filename=out" + self.test_image.extension, False) headers = channel.headers self.assertEqual(channel.code, 200) self.assertEqual( From 7c3c4af92f4e9f139ae3705a7077c82714139e65 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Fri, 29 Oct 2021 13:07:30 -0700 Subject: [PATCH 7/8] requested changes --- synapse/rest/media/v1/media_repository.py | 8 ++++---- tests/rest/media/v1/test_media_storage.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index b39d88fc6cb1..244ba261bbc4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -214,9 +214,9 @@ async def get_local_media( self.mark_recently_accessed(None, media_id) - media_type = media_info.get("media_type") + media_type = media_info["media_type"] if not media_type: - media_type = "application/octect-stream" + media_type = "application/octet-stream" media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] url_cache = media_info["url_cache"] @@ -336,7 +336,7 @@ async def _get_remote_media_impl( raise NotFoundError() if not media_info["media_type"]: - media_info["media_type"] = "application/octect-stream" + media_info["media_type"] = "application/octet-stream" responder = await self.media_storage.fetch_media(file_info) if responder: @@ -360,7 +360,7 @@ async def _get_remote_media_impl( file_id = media_info["filesystem_id"] if not media_info["media_type"]: - media_info["media_type"] = "application/octect-stream" + media_info["media_type"] = "application/octet-stream" file_info = FileInfo(server_name, file_id) # We generate thumbnails even if another process downloaded the media diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index b3cef7907463..4cf1ed5ddff0 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -269,15 +269,12 @@ def _req(self, content_disposition, include_content_type=True): ) self.assertEqual(self.fetches[0][3], {"allow_remote": "false"}) + headers = { + b"Content-Length": [b"%d" % (len(self.test_image.data))], + } + if include_content_type: - headers = { - b"Content-Length": [b"%d" % (len(self.test_image.data))], - b"Content-Type": [self.test_image.content_type], - } - else: - headers = { - b"Content-Length": [b"%d" % (len(self.test_image.data))], - } + headers[b"Content-Type"] = [self.test_image.content_type] if content_disposition: headers[b"Content-Disposition"] = [content_disposition] @@ -292,7 +289,10 @@ def _req(self, content_disposition, include_content_type=True): return channel def test_handle_missing_content_type(self): - channel = self._req(b"inline; filename=out" + self.test_image.extension, False) + channel = self._req( + b"inline; filename=out" + self.test_image.extension, + include_content_type=False, + ) headers = channel.headers self.assertEqual(channel.code, 200) self.assertEqual( From 576b867a5340728004c0da14c605cd1224dda5d0 Mon Sep 17 00:00:00 2001 From: Shay Date: Mon, 1 Nov 2021 08:48:20 -0700 Subject: [PATCH 8/8] Update changelog.d/11200.bugfix Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/11200.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/11200.bugfix b/changelog.d/11200.bugfix index e16182acb0d6..c85508198667 100644 --- a/changelog.d/11200.bugfix +++ b/changelog.d/11200.bugfix @@ -1 +1 @@ -Fixes a long-standing bug wherein a missing Content-Type header when downloading remote media causes Synapse to throw an error. \ No newline at end of file +Fix a long-standing bug wherein a missing `Content-Type` header when downloading remote media would cause Synapse to throw an error. \ No newline at end of file