From 55aa6313438da0215fb1cd3cbeca57bfd93406b6 Mon Sep 17 00:00:00 2001 From: ahxxm Date: Thu, 2 May 2024 08:19:46 +0000 Subject: [PATCH] download images and fix path --- odmpy/processing/ebook.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/odmpy/processing/ebook.py b/odmpy/processing/ebook.py index b2ab3a8..0e483d2 100644 --- a/odmpy/processing/ebook.py +++ b/odmpy/processing/ebook.py @@ -645,11 +645,14 @@ def process_ebook_loan( with open(asset_file_path, "wb") as f_out: f_out.write(res.content) - # download image to asset dir from decoded HTML - # e.g. '003' - if soup: - image_tag = soup.find("img", attrs={"src": True}) - if image_tag and isinstance(image_tag, Tag): + # download images to the same asset dir, fix soup image src + # e.g. '003' + # download into "Text/***_003_r1.jpg" and point to filename "***_003_r1.jpg" + if soup and media_type in ("application/xhtml+xml", "text/html"): + image_tags = soup.find_all("img", attrs={"src": True}) + for image_tag in image_tags: + if not isinstance(image_tag, Tag): + continue image_url = urlparse( urljoin(parsed_entry_url.geturl(), image_tag["src"]) ) @@ -661,6 +664,10 @@ def process_ebook_loan( res = libby_client.make_request(download_url, return_res=True) with open(image_file_path, "wb") as f_out: f_out.write(res.content) + image_tag["src"] = image_file_name + # overwrite the file with the updated soup + with open(asset_file_path, "w", encoding="utf-8") as f_out: + f_out.write(str(soup)) if soup: # try to min. soup searches where possible