Skip to content
This repository has been archived by the owner on Jan 12, 2023. It is now read-only.

Commit

Permalink
Extract method to format book URI for extension
Browse files Browse the repository at this point in the history
The `load_etext` function currently throws an exception when there is no
textual download candidate available for a given book. However, some
users might want to use Gutenberg to download non-textual versions of
books. All available formats of a book can already be looked up via the
formaturi metadata extractor, so this change exposes a method to enable
a client to format the download URL for an arbitrary extension.

See #105
  • Loading branch information
c-w committed Jun 26, 2018
1 parent 3e255eb commit 69a61ed
Showing 1 changed file with 26 additions and 12 deletions.
38 changes: 26 additions & 12 deletions gutenberg/acquire/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,26 @@ def _check_mirror_exists(mirror):
.format(mirror))


def _format_download_uri_for_extension(etextno, extension, mirror=None):
"""Returns the download location on the Project Gutenberg servers for a
given text and extension. The list of available extensions for a given
text can be found via the formaturi metadata extractor.
"""
uri_root = mirror or _GUTENBERG_MIRROR
uri_root = uri_root.strip().rstrip('/')
_check_mirror_exists(uri_root)

path = _etextno_to_uri_subdirectory(etextno)
uri = '{root}/{path}/{etextno}{extension}'.format(
root=uri_root,
path=path,
etextno=etextno,
extension=extension)

return uri


def _format_download_uri(etextno, mirror=None, prefer_ascii=False):
"""Returns the download location on the Project Gutenberg servers for a
given text.
Expand All @@ -64,10 +84,6 @@ def _format_download_uri(etextno, mirror=None, prefer_ascii=False):
Raises:
UnknownDownloadUri: If no download location can be found for the text.
"""
uri_root = mirror or _GUTENBERG_MIRROR
uri_root = uri_root.strip().rstrip('/')
_check_mirror_exists(uri_root)

# Check https://www.gutenberg.org/files/ for details about available
# extensions ;
# - .txt is plaintext us-ascii
Expand All @@ -77,18 +93,16 @@ def _format_download_uri(etextno, mirror=None, prefer_ascii=False):
utf8_first = ('-0.txt', '-8.txt', '.txt')
extensions = ascii_first if prefer_ascii else utf8_first
for extension in extensions:
path = _etextno_to_uri_subdirectory(etextno)
uri = '{root}/{path}/{etextno}{extension}'.format(
root=uri_root,
path=path,
etextno=etextno,
extension=extension)
uri = _format_download_uri_for_extension(etextno, extension)
response = requests.head(uri)
if response.ok:
return uri

raise UnknownDownloadUriException('Failed to find {0} on {1}.'
.format(etextno, uri_root))
raise UnknownDownloadUriException(
'Failed to find a textual download candidate for {0} on {1}. '
'Either the book does not exist or it is only available in '
'non-textual formats.'
.format(etextno, mirror or _GUTENBERG_MIRROR))


def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False):
Expand Down

0 comments on commit 69a61ed

Please sign in to comment.