Skip to content

Commit

Permalink
Merge pull request #113 from ZumZoom/master
Browse files Browse the repository at this point in the history
add baseurl functionality to inplace urls

Thanks Mikhail Melnik @ZumZoom
  • Loading branch information
Alir3z4 committed Jan 31, 2016
2 parents d546297 + fbe1b76 commit 54ea451
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 3 deletions.
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The AUTHORS/Contributors are (and/or have been):
* Albert Berger <gh: nbdsp>
* Etienne Millon <[email protected]>
* John C F <gh: critiqjo>
* Mikhail Melnik <[email protected]>


Maintainer:
Expand Down
1 change: 1 addition & 0 deletions ChangeLog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
----

* Fix #89: Python 3.5 support.
* Fix #113: inplace baseurl substitution for <a> and <img> tags


2016.1.8
Expand Down
6 changes: 3 additions & 3 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,9 @@ def handle_tag(self, tag, attrs, start):
try:
title = escape_md(a['title'])
except KeyError:
self.o("](" + escape_md(a['href']) + ")")
self.o("](" + escape_md(urlparse.urljoin(self.baseurl, a['href'])) + ")")
else:
self.o("](" + escape_md(a['href'])
self.o("](" + escape_md(urlparse.urljoin(self.baseurl, a['href']))
+ ' "' + title + '" )')
else:
i = self.previousIndex(a)
Expand Down Expand Up @@ -476,7 +476,7 @@ def handle_tag(self, tag, attrs, start):
self.o("![" + escape_md(alt) + "]")
if self.inline_links:
href = attrs.get('href') or ''
self.o("(" + escape_md(href) + ")")
self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")")
else:
i = self.previousIndex(attrs)
if i is not None:
Expand Down
11 changes: 11 additions & 0 deletions test/inplace_baseurl_substitution.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<head></head>
<body>
<p>
<img src="/uploads/2012/01/read2textheader.jpg" alt="read2text header image" width="650" height="165"/>
</p>
<p>
<a href="/">BrettTerpstra.com</a>
</p>
</body>
</html>
3 changes: 3 additions & 0 deletions test/inplace_baseurl_substitution.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
![read2text header image](http://brettterpstra.com/uploads/2012/01/read2textheader.jpg)

[BrettTerpstra.com](http://brettterpstra.com/)
6 changes: 6 additions & 0 deletions test/test_html2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ def test_func(self):
if base_fn not in ['bodywidth_newline.html', 'abbr_tag.html']:
test_func = None

if base_fn == 'inplace_baseurl_substitution.html':
module_args['baseurl'] = 'http://brettterpstra.com'
module_args['body_width'] = 0
# there is no way to specify baseurl in cli :(
test_cmd = None

return test_mod, test_cmd, test_func

# Originally from http://stackoverflow.com/questions/32899/\
Expand Down

0 comments on commit 54ea451

Please sign in to comment.