From 4498388990a931d10c8ff1f4240377a5a5cc1342 Mon Sep 17 00:00:00 2001 From: "Dhruv Kanojia (Xonshiz)" Date: Sat, 16 Apr 2022 16:48:35 +0530 Subject: [PATCH] Fix for #299 Closes #299 Added a new `Cookie` parameter to be passed to utilize the cookie in case of 403 issue in Readcomiconline.li --- Changelog.md | 3 +- ReadMe.md | 5 ++- comic_dl/__version__.py | 2 +- comic_dl/comic_dl.py | 31 ++++--------- comic_dl/honcho.py | 3 +- comic_dl/sites/readcomicOnlineli.py | 68 ++++++++++++++++++++++++----- docs/source/notes.rst | 1 + 7 files changed, 77 insertions(+), 36 deletions(-) diff --git a/Changelog.md b/Changelog.md index e7b8fa4..642277b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -125,4 +125,5 @@ - Removed setup2.py file [2021.09.05] - Checking for existing CBZ/PDF files before downloading them again [Fix for #247] [2021.09.05] - Fix for chapter download at readmanganato -- Added support for webtoons.com (No audio download yet) [Fix for #284] [2021.09.05.1] \ No newline at end of file +- Added support for webtoons.com (No audio download yet) [Fix for #284] [2021.09.05.1] +- Fix for #299 [2022.04.16] \ No newline at end of file diff --git a/ReadMe.md b/ReadMe.md index 91f063a..c4e6994 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -178,12 +178,13 @@ Currently, the script supports these arguments : -pid, --page-id Takes the Page ID to download a particular "chapter number" of a manga. --comic Add this after -i if you are inputting a comic id or the EXACT comic name. [ Ex : -i "Deadpool Classic" --comic ] --comic-search, --search-comic Searches for a comic through the scraped data from ReadComicOnline.to +-comic-search, --search-comic Searches for a comic through the scraped data from ReadComicOnline.li [ Ex : -comic-search "Deadpool" ] -comic-info, --comic-info Lists all the information about the given comic (argument can be either comic id or the exact comic name). [ Ex : -comic-info "Deadpool Classic" ] or [ Ex : -comic-info 3865 ] --update Updates the comic database for the given argument. [ Ex: --update "Deadpool Classic" ] or [ Ex: --update "https://readcomiconline.li/Comic/Deadpool-Classic" ] +-cookie, --cookie Passes a cookie to be used throughout the session. ``` ## Language Codes: @@ -402,6 +403,8 @@ If you're here to make suggestions, please follow the basic syntax to post a req This should be enough, but it'll be great if you can add more ;) # Notes +* Readcomiconline.li has been a pain to work with and it might block you out a lot. Now you can use `--cookie` parameter to pass a working cookie. You can retrieve the cookie by checking network tab for `Cookie` value in request headers or by using an external browser plugin. + * comic.naver.com has korean characters and some OS won't handle those characters. So, instead of naming the file folder with the series name in korean, the script will download and name the folder with the comic's ID instead. * Bato.to requires you to "log in" to read some chapters. So, to be on a safe side, provide the username/password combination to the script via "-p" and "-u" arguments. diff --git a/comic_dl/__version__.py b/comic_dl/__version__.py index 62cc1fa..08e2e57 100644 --- a/comic_dl/__version__.py +++ b/comic_dl/__version__.py @@ -1,4 +1,4 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -__version__ = "2022.04.09" +__version__ = "2022.04.16" diff --git a/comic_dl/comic_dl.py b/comic_dl/comic_dl.py index 263349a..a2199ac 100644 --- a/comic_dl/comic_dl.py +++ b/comic_dl/comic_dl.py @@ -47,6 +47,7 @@ def __init__(self, argv): help='Tells the script which Quality of image to download (High/Low).', default='True') parser.add_argument('-i', '--input', nargs=1, help='Inputs the URL to comic.') + parser.add_argument('-cookie', '--cookie', nargs=1, help='Passes cookie (text format) to be used throughout the session.') # Chr1st-oo, added arguments parser.add_argument("--comic", action="store_true", help="Add this after -i if you are inputting a comic id or the EXACT comic name.") @@ -210,6 +211,7 @@ def __init__(self, argv): conversion = data["conversion"] keep_files = data["keep"] image_quality = data["image_quality"] + manual_cookie = data["cookie"] pbar_comic = tqdm(data["comics"], dynamic_ncols=True, desc="[Comic-dl] Auto processing", leave=True, unit='comic') for elKey in pbar_comic: @@ -227,7 +229,8 @@ def __init__(self, argv): chapter_range=download_range, conversion=conversion, keep_files=keep_files, image_quality=image_quality, username=el["username"], password=el["password"], - comic_language=el["comic_language"]) + comic_language=el["comic_language"], + cookie=manual_cookie) except Exception as ex: pbar_comic.write('[Comic-dl] Auto processing with error for %s : %s ' % (elKey, ex)) pbar_comic.set_postfix() @@ -246,6 +249,7 @@ def __init__(self, argv): print("Run the script with --help to see more information.") else: print_index = False + manual_cookie = None if args.print_index: print_index = True if not args.sorting: @@ -260,6 +264,8 @@ def __init__(self, argv): args.keep = ["True"] if not args.quality or args.quality == "True": args.quality = ["Best"] + if args.cookie: + manual_cookie = args.cookie[0] # user_input = unicode(args.input[0], encoding='latin-1') user_input = args.input[0] @@ -281,32 +287,13 @@ def __init__(self, argv): chapter_range=args.range, conversion=args.convert[0], keep_files=args.keep[0], image_quality=args.quality[0], username=args.username[0], password=args.password[0], - comic_language=args.manga_language[0], print_index=print_index) + comic_language=args.manga_language[0], print_index=print_index, + cookie=manual_cookie) end_time = time.time() total_time = end_time - start_time print("Total Time Taken To Complete : %s" % total_time) sys.exit() - # def string_formatter(self, my_string): - # temp = "" - # for char in my_string: - # print("Temp right now : {0}".format(char)) - # # temp = temp + str(char).replace(char, self.to_utf_8(char)) - # temp = temp + str(char).replace(char, self.to_utf_8(char)) - # - # print("Temp is : {0}".format(temp)) - # - # - # def to_utf_8(self, char): - # print("Received Key : {0}".format(char)) - # char_dict = { - # 'ë': '%C3%AB' - # } - # try: - # return char_dict[char] - # except KeyError: - # return char - @staticmethod def version(): print(__version__) diff --git a/comic_dl/honcho.py b/comic_dl/honcho.py index 40c5fe7..955527c 100644 --- a/comic_dl/honcho.py +++ b/comic_dl/honcho.py @@ -74,6 +74,7 @@ def checker(self, comic_url, download_directory, chapter_range, **kwargs): sorting = kwargs.get("sorting_order") comic_language = kwargs.get("comic_language") print_index = kwargs.get("print_index") + manual_cookies = kwargs.get("cookie", None) if log_flag is True: logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) @@ -99,7 +100,7 @@ def checker(self, comic_url, download_directory, chapter_range, **kwargs): chapter_range=chapter_range, conversion=kwargs.get("conversion"), keep_files=kwargs.get("keep_files"), image_quality=kwargs.get("image_quality"), - print_index=print_index) + print_index=print_index, manual_cookies=manual_cookies) return 0 elif domain in ["www.comic.naver.com", "comic.naver.com"]: comicNaver.ComicNaver(manga_url=comic_url, logger=logging, current_directory=current_directory, diff --git a/comic_dl/sites/readcomicOnlineli.py b/comic_dl/sites/readcomicOnlineli.py index decb9aa..9b0dfe8 100644 --- a/comic_dl/sites/readcomicOnlineli.py +++ b/comic_dl/sites/readcomicOnlineli.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import base64 from comic_dl import globalFunctions import re @@ -12,6 +13,7 @@ class ReadComicOnlineLi(object): def __init__(self, manga_url, download_directory, chapter_range, **kwargs): current_directory = kwargs.get("current_directory") + self.manual_cookie = kwargs.get("manual_cookies", None) conversion = kwargs.get("conversion") keep_files = kwargs.get("keep_files") self.logging = kwargs.get("log_flag") @@ -21,6 +23,21 @@ def __init__(self, manga_url, download_directory, chapter_range, **kwargs): self.print_index = kwargs.get("print_index") url_split = str(manga_url).split("/") + self.appended_headers = { + 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'en-US,en;q=0.9', + 'dnt': '1', + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"', + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-user': '?1', + 'upgrade-insecure-requests': '1', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36' + } if len(url_split) in [5]: # Sometimes, this value came out to be 6, instead of 5. Hmmmmmmmm weird. # Removing "6" from here, because it caused #47 @@ -39,11 +56,12 @@ def __init__(self, manga_url, download_directory, chapter_range, **kwargs): def single_chapter(self, comic_url, comic_name, download_directory, conversion, keep_files): # print("Received Comic Url : {0}".format(comic_url)) print("Fooling CloudFlare...Please Wait...") - appended_headers = { - 'referer': comic_url, - 'Accept': "*/*", - 'Cache-Control': 'no-cache' - } + if not comic_url.endswith("#1"): + comic_url += "#1" + + if not self.appended_headers.get('cookie', None) and self.manual_cookie: + self.appended_headers['cookie'] = self.manual_cookie + self.appended_headers['referer'] = comic_url chapter_number = str(comic_url).split("/")[5].split("?")[0].replace("-", " - ") file_directory = globalFunctions.GlobalFunctions().create_file_directory(chapter_number, comic_name) @@ -62,7 +80,7 @@ def single_chapter(self, comic_url, comic_name, download_directory, conversion, print('Converted File already exists. Skipping.') return 0 - source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10, append_headers=appended_headers) + source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10, append_headers=self.appended_headers) img_list = re.findall(r"lstImages.push\(\"(.*?)\"\);", str(source)) @@ -77,14 +95,16 @@ def single_chapter(self, comic_url, comic_name, download_directory, conversion, links = [] file_names = [] + print(img_list) + img_list = self.get_image_links(img_list) for current_chapter, image_link in enumerate(img_list): image_link = str(image_link).strip().replace("\\", "") logging.debug("Image Link : %s" % image_link) - image_link = image_link.replace("=s1600", "=s0").replace("/s1600", "/s0") # Change low quality to best. if str(self.image_quality).lower().strip() in ["low", "worst", "bad", "cancer", "mobile"]: image_link = image_link.replace("=s0", "=s1600").replace("/s0", "/s1600") + image_link = image_link.replace("=s1600", "=s0").replace("/s1600", "/s0") # Change low quality to best. current_chapter += 1 file_name = str(globalFunctions.GlobalFunctions().prepend_zeroes(current_chapter, len(img_list))) + ".jpg" @@ -109,7 +129,10 @@ def name_cleaner(self, url): def full_series(self, comic_url, comic_name, sorting, download_directory, chapter_range, conversion, keep_files): print("Fooling CloudFlare...Please Wait...") - source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10) + if not self.appended_headers.get('cookie', None) and self.manual_cookie: + self.appended_headers['cookie'] = self.manual_cookie + self.appended_headers['referer'] = comic_url + source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10, append_headers=self.appended_headers) all_links = [] @@ -157,7 +180,7 @@ def full_series(self, comic_url, comic_name, sorting, download_directory, chapte if str(sorting).lower() in ['new', 'desc', 'descending', 'latest']: for chap_link in all_links: - chap_link = "http://readcomiconline.li" + chap_link + chap_link = "https://readcomiconline.li" + chap_link try: self.single_chapter(comic_url=chap_link, comic_name=comic_name, download_directory=download_directory, conversion=conversion, keep_files=keep_files) @@ -172,7 +195,7 @@ def full_series(self, comic_url, comic_name, sorting, download_directory, chapte elif str(sorting).lower() in ['old', 'asc', 'ascending', 'oldest', 'a']: for chap_link in all_links[::-1]: - chap_link = "http://readcomiconline.to" + chap_link + chap_link = "https://readcomiconline.li" + chap_link try: self.single_chapter(comic_url=chap_link, comic_name=comic_name, download_directory=download_directory, conversion=conversion, keep_files=keep_files) @@ -186,3 +209,28 @@ def full_series(self, comic_url, comic_name, sorting, download_directory, chapte globalFunctions.GlobalFunctions().addOne(comic_url) return 0 + + def get_image_links(self, urls): + # JS logic extracted by : https://github.com/Xonshiz/comic-dl/issues/299#issuecomment-1098189279 + temp = [] + for url in urls: + print(url + '\n') + quality_ = None + if '=s0' in url: + url = url[:-3] + quality_ = '=s0' + else: + url = url[:-6] + quality_ = '=s1600' + # url = url.slice(4, 22) + url.slice(25); + url = url[4:22] + url[25:] + # url = url.slice(0, -6) + url.slice(-2); + url = url[0:-6] + url[-2:] + url = str(base64.b64decode(url).decode("utf-8")) + # url = url.slice(0, 13) + url.slice(17); + url = url[0:13] + url[17:] + # url = url.slice(0, -2) + (containsS0 ? '=s0' : '=s1600'); + url = url[0:-2] + quality_ + # return 'https://2.bp.blogspot.com/' + url; + temp.append('https://2.bp.blogspot.com/{0}'.format(url)) + return temp diff --git a/docs/source/notes.rst b/docs/source/notes.rst index d783178..75eb77d 100644 --- a/docs/source/notes.rst +++ b/docs/source/notes.rst @@ -1,5 +1,6 @@ Notes ===== +- Readcomiconline.li has been a pain to work with and it might block you out a lot. Now you can use `--cookie` parameter to pass a working cookie. You can retrieve the cookie by checking network tab for `Cookie` value in request headers or by using an external browser plugin. - comic.naver.com has korean characters and some OS won’t handle those characters. So, instead of naming the file folder with the series