From c7d7eee55232f911d7a2306c6344008a6e78f45b Mon Sep 17 00:00:00 2001 From: Jeyachandran Rathnam Date: Wed, 4 Jan 2023 21:02:11 -0500 Subject: [PATCH 1/4] Fix 14708 : Unescape HTML entities in Open Graph title --- synapse/rest/media/v1/oembed.py | 3 ++- tests/rest/media/v1/test_oembed.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 827afd868d65..b78533d19c90 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import html as html5 import logging import urllib.parse from typing import TYPE_CHECKING, List, Optional @@ -161,7 +162,7 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: title = oembed.get("title") if title and isinstance(title, str): - open_graph_response["og:title"] = title + open_graph_response["og:title"] = html5.unescape(title) author_name = oembed.get("author_name") if not isinstance(author_name, str): diff --git a/tests/rest/media/v1/test_oembed.py b/tests/rest/media/v1/test_oembed.py index 319ae8b1cc2a..3f7f1dbab9b7 100644 --- a/tests/rest/media/v1/test_oembed.py +++ b/tests/rest/media/v1/test_oembed.py @@ -150,3 +150,13 @@ def test_link(self) -> None: result = self.parse_response({"type": "link"}) self.assertIn("og:type", result.open_graph_result) self.assertEqual(result.open_graph_result["og:type"], "website") + + def test_title_html_entities(self) -> None: + """Test HTML entities in title""" + result = self.parse_response( + {"title": "Why JSON isn’t a Good Configuration Language"} + ) + self.assertEqual( + result.open_graph_result["og:title"], + "Why JSON isn’t a Good Configuration Language", + ) From 8ad0d7f528c37291fbf73c0d7eef31efbd70acef Mon Sep 17 00:00:00 2001 From: Jeyachandran Rathnam Date: Wed, 4 Jan 2023 21:11:38 -0500 Subject: [PATCH 2/4] Add changelog file --- changelog.d/14781.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14781.misc diff --git a/changelog.d/14781.misc b/changelog.d/14781.misc new file mode 100644 index 000000000000..b610aa5c0c90 --- /dev/null +++ b/changelog.d/14781.misc @@ -0,0 +1 @@ +Unescape HTML entities in Open Graph title. \ No newline at end of file From 39f6dbedfcbc11f28505aae1e1268e6324b186f6 Mon Sep 17 00:00:00 2001 From: Jeyachandran Rathnam Date: Fri, 6 Jan 2023 18:54:41 -0500 Subject: [PATCH 3/4] Rename html var to html_str, import html package correctly --- synapse/rest/media/v1/oembed.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index b78533d19c90..a3738a62507d 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import html as html5 +import html import logging import urllib.parse from typing import TYPE_CHECKING, List, Optional @@ -162,7 +162,9 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: title = oembed.get("title") if title and isinstance(title, str): - open_graph_response["og:title"] = html5.unescape(title) + # A common WordPress plug-in seems to incorrectly escape entities + # in the oEmbed response. + open_graph_response["og:title"] = html.unescape(title) author_name = oembed.get("author_name") if not isinstance(author_name, str): @@ -181,9 +183,9 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: # Process each type separately. oembed_type = oembed.get("type") if oembed_type == "rich": - html = oembed.get("html") - if isinstance(html, str): - calc_description_and_urls(open_graph_response, html) + html_str = oembed.get("html") + if isinstance(html_str, str): + calc_description_and_urls(open_graph_response, html_str) elif oembed_type == "photo": # If this is a photo, use the full image, not the thumbnail. @@ -193,8 +195,8 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: elif oembed_type == "video": open_graph_response["og:type"] = "video.other" - html = oembed.get("html") - if html and isinstance(html, str): + html_str = oembed.get("html") + if html_str and isinstance(html_str, str): calc_description_and_urls(open_graph_response, oembed["html"]) for size in ("width", "height"): val = oembed.get(size) From dff513f8ba49567027f1bd9fa559e8292c6f36cc Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 9 Jan 2023 08:49:33 -0500 Subject: [PATCH 4/4] Update 14781.misc --- changelog.d/14781.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/14781.misc b/changelog.d/14781.misc index b610aa5c0c90..04f565b41020 100644 --- a/changelog.d/14781.misc +++ b/changelog.d/14781.misc @@ -1 +1 @@ -Unescape HTML entities in Open Graph title. \ No newline at end of file +Unescape HTML entities in URL preview titles making use of oEmbed responses.