diff --git a/AUTHORS.rst b/AUTHORS.rst index 39f7a98..93c1938 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -19,6 +19,7 @@ The AUTHORS/Contributors are (and/or have been): * Albert Berger * Etienne Millon * John C F +* Mikhail Melnik Maintainer: diff --git a/ChangeLog.rst b/ChangeLog.rst index 3e90c1b..06eccb0 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -3,6 +3,7 @@ ---- * Fix #89: Python 3.5 support. +* Fix #113: inplace baseurl substitution for and tags 2016.1.8 diff --git a/html2text/__init__.py b/html2text/__init__.py index 4d856b5..648b8ea 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -420,9 +420,9 @@ def handle_tag(self, tag, attrs, start): try: title = escape_md(a['title']) except KeyError: - self.o("](" + escape_md(a['href']) + ")") + self.o("](" + escape_md(urlparse.urljoin(self.baseurl, a['href'])) + ")") else: - self.o("](" + escape_md(a['href']) + self.o("](" + escape_md(urlparse.urljoin(self.baseurl, a['href'])) + ' "' + title + '" )') else: i = self.previousIndex(a) @@ -476,7 +476,7 @@ def handle_tag(self, tag, attrs, start): self.o("![" + escape_md(alt) + "]") if self.inline_links: href = attrs.get('href') or '' - self.o("(" + escape_md(href) + ")") + self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")") else: i = self.previousIndex(attrs) if i is not None: diff --git a/test/inplace_baseurl_substitution.html b/test/inplace_baseurl_substitution.html new file mode 100644 index 0000000..cb55345 --- /dev/null +++ b/test/inplace_baseurl_substitution.html @@ -0,0 +1,11 @@ + + + +

+read2text header image +

+

+BrettTerpstra.com +

+ + diff --git a/test/inplace_baseurl_substitution.md b/test/inplace_baseurl_substitution.md new file mode 100644 index 0000000..bc73c97 --- /dev/null +++ b/test/inplace_baseurl_substitution.md @@ -0,0 +1,3 @@ +![read2text header image](http://brettterpstra.com/uploads/2012/01/read2textheader.jpg) + +[BrettTerpstra.com](http://brettterpstra.com/) diff --git a/test/test_html2text.py b/test/test_html2text.py index 052a069..2d678ee 100644 --- a/test/test_html2text.py +++ b/test/test_html2text.py @@ -173,6 +173,12 @@ def test_func(self): if base_fn not in ['bodywidth_newline.html', 'abbr_tag.html']: test_func = None + if base_fn == 'inplace_baseurl_substitution.html': + module_args['baseurl'] = 'http://brettterpstra.com' + module_args['body_width'] = 0 + # there is no way to specify baseurl in cli :( + test_cmd = None + return test_mod, test_cmd, test_func # Originally from http://stackoverflow.com/questions/32899/\