From 3421b6202b74eac810f94ba1954ac12ef3ea296c Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 16 Jan 2024 09:28:35 -0500 Subject: [PATCH 1/3] img: Support null "src" attributes, rather than fail an assert. --- html2text/__init__.py | 3 +-- test/empty-img-src.html | 1 + test/empty-img-src.md | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 test/empty-img-src.html create mode 100644 test/empty-img-src.md diff --git a/html2text/__init__.py b/html2text/__init__.py index ae4e154..3a46deb 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -537,8 +537,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: self.o("][" + str(a_props.count) + "]") if tag == "img" and start and not self.ignore_images: - if "src" in attrs: - assert attrs["src"] is not None + if "src" in attrs and attrs["src"] is not None: if not self.images_to_alt: attrs["href"] = attrs["src"] alt = attrs.get("alt") or self.default_image_alt diff --git a/test/empty-img-src.html b/test/empty-img-src.html new file mode 100644 index 0000000..865f94a --- /dev/null +++ b/test/empty-img-src.html @@ -0,0 +1 @@ + diff --git a/test/empty-img-src.md b/test/empty-img-src.md new file mode 100644 index 0000000..139597f --- /dev/null +++ b/test/empty-img-src.md @@ -0,0 +1,2 @@ + + From 32f1c60c015ea0b722d4171253a912e1671d364b Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 16 Jan 2024 09:37:30 -0500 Subject: [PATCH 2/3] img: Support null "width" and "height" attributes, rather than assert. --- html2text/__init__.py | 6 ++---- test/images_with_size.html | 4 ++++ test/images_with_size.md | 5 ++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/html2text/__init__.py b/html2text/__init__.py index 3a46deb..01a1340 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -548,11 +548,9 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: self.images_with_size and ("width" in attrs or "height" in attrs) ): self.o(" + + + + diff --git a/test/images_with_size.md b/test/images_with_size.md index c0548a9..7678ffa 100644 --- a/test/images_with_size.md +++ b/test/images_with_size.md @@ -3,4 +3,7 @@ src='image_with_width.jpg' width='300' alt='An image with a width attr' /> An image with a height attr An
 image with width and height ![](image_with_width_and_height.jpg) +width='300' height='300' /> ![](image_with_width_and_height.jpg) From 66dd5ba1d9471a99a1ebe732289d48c9e43eb8ad Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 16 Jan 2024 09:47:38 -0500 Subject: [PATCH 3/3] links: Support null "title" attribute with --reference-links. --- html2text/__init__.py | 3 +-- test/no_inline_links_example.html | 1 + test/no_inline_links_example.md | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/html2text/__init__.py b/html2text/__init__.py index 01a1340..9054388 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -823,8 +823,7 @@ def o( + "]: " + urlparse.urljoin(self.baseurl, link.attrs["href"]) ) - if "title" in link.attrs: - assert link.attrs["title"] is not None + if "title" in link.attrs and link.attrs["title"] is not None: self.out(" (" + link.attrs["title"] + ")") self.out("\n") else: diff --git a/test/no_inline_links_example.html b/test/no_inline_links_example.html index 5e4c45c..e7d428f 100644 --- a/test/no_inline_links_example.html +++ b/test/no_inline_links_example.html @@ -7,3 +7,4 @@ link text +Empty link title diff --git a/test/no_inline_links_example.md b/test/no_inline_links_example.md index c000b59..bd11d62 100644 --- a/test/no_inline_links_example.md +++ b/test/no_inline_links_example.md @@ -1,8 +1,10 @@ [Googler][1] No href No href but title available [ Example][2] [ [ [ link text -][3]][3]][3] +][3]][3]][3] [Empty link title][4] [1]: http://google.com [2]: http://example.com (Example title) [3]: http://example.com (abc) + + [4]: http://example.com