Fix for #299

Closes #299 Added a new `Cookie` parameter to be passed to utilize the cookie in case of 403 issue in Readcomiconline.li
2022-04-16 16:48:35 +05:30 · 2022-04-16 16:48:35 +05:30 · 4498388990
parent e3218a285d
commit 4498388990
7 changed files with 77 additions and 36 deletions
--- a/Changelog.md
+++ b/Changelog.md
@ -125,4 +125,5 @@
 - Removed setup2.py file [2021.09.05]
 - Checking for existing CBZ/PDF files before downloading them again [Fix for #247] [2021.09.05]
 - Fix for chapter download at readmanganato
- Added support for webtoons.com (No audio download yet) [Fix for #284] [2021.09.05.1]
+- Added support for webtoons.com (No audio download yet) [Fix for #284] [2021.09.05.1]
+- Fix for #299 [2022.04.16]
--- a/ReadMe.md
+++ b/ReadMe.md
@ -178,12 +178,13 @@ Currently, the script supports these arguments :
 -pid, --page-id                        Takes the Page ID to download a particular "chapter number" of a manga.
 --comic                                Add this after -i if you are inputting a comic id or the EXACT comic name.
                                       [ Ex : -i "Deadpool Classic" --comic ]
-comic-search, --search-comic          Searches for a comic through the scraped data from ReadComicOnline.to
+-comic-search, --search-comic          Searches for a comic through the scraped data from ReadComicOnline.li
                                       [ Ex : -comic-search "Deadpool" ]
 -comic-info, --comic-info              Lists all the information about the given comic (argument can be either comic id or the exact comic name).
                                       [ Ex : -comic-info "Deadpool Classic" ] or [ Ex : -comic-info 3865 ]
 --update                               Updates the comic database for the given argument.
                                       [ Ex: --update "Deadpool Classic" ] or [ Ex: --update "https://readcomiconline.li/Comic/Deadpool-Classic" ]
+-cookie, --cookie                      Passes a cookie to be used throughout the session.
 ```

 ## Language Codes:
@ -402,6 +403,8 @@ If you're here to make suggestions, please follow the basic syntax to post a req
 This should be enough, but it'll be great if you can add more ;)

 # Notes
+* Readcomiconline.li has been a pain to work with and it might block you out a lot. Now you can use `--cookie` parameter to pass a working cookie. You can retrieve the cookie by checking network tab for `Cookie` value in request headers or by using an external browser plugin.
+
 * comic.naver.com has korean characters and some OS won't handle those characters. So, instead of naming the file folder with the series name in korean, the script will download and name the folder with the comic's ID instead.

 * Bato.to requires you to "log in" to read some chapters. So, to be on a safe side, provide the username/password combination to the script via "-p" and "-u" arguments.
--- a/comic_dl/version.py
+++ b/comic_dl/version.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

-__version__ = "2022.04.09"
+__version__ = "2022.04.16"
--- a/comic_dl/comic_dl.py
+++ b/comic_dl/comic_dl.py
@ -47,6 +47,7 @@ class ComicDL(object):
                            help='Tells the script which Quality of image to download (High/Low).', default='True')

        parser.add_argument('-i', '--input', nargs=1, help='Inputs the URL to comic.')
+        parser.add_argument('-cookie', '--cookie', nargs=1, help='Passes cookie (text format) to be used throughout the session.')

        # Chr1st-oo, added arguments
        parser.add_argument("--comic", action="store_true", help="Add this after -i if you are inputting a comic id or the EXACT comic name.")
@ -210,6 +211,7 @@ class ComicDL(object):
            conversion = data["conversion"]
            keep_files = data["keep"]
            image_quality = data["image_quality"]
+            manual_cookie = data["cookie"]
            pbar_comic = tqdm(data["comics"], dynamic_ncols=True, desc="[Comic-dl] Auto processing", leave=True,
                              unit='comic')
            for elKey in pbar_comic:
@ -227,7 +229,8 @@ class ComicDL(object):
                                            chapter_range=download_range, conversion=conversion,
                                            keep_files=keep_files, image_quality=image_quality,
                                            username=el["username"], password=el["password"],
-                                            comic_language=el["comic_language"])
+                                            comic_language=el["comic_language"],
+                                            cookie=manual_cookie)
                except Exception as ex:
                    pbar_comic.write('[Comic-dl] Auto processing with error for %s : %s ' % (elKey, ex))
            pbar_comic.set_postfix()
@ -246,6 +249,7 @@ class ComicDL(object):
                print("Run the script with --help to see more information.")
        else:
            print_index = False
+            manual_cookie = None
            if args.print_index:
                print_index = True
            if not args.sorting:
@ -260,6 +264,8 @@ class ComicDL(object):
                args.keep = ["True"]
            if not args.quality or args.quality == "True":
                args.quality = ["Best"]
+            if args.cookie:
+                manual_cookie = args.cookie[0]

            # user_input = unicode(args.input[0], encoding='latin-1')
            user_input = args.input[0]
@ -281,32 +287,13 @@ class ComicDL(object):
                                    chapter_range=args.range, conversion=args.convert[0],
                                    keep_files=args.keep[0], image_quality=args.quality[0],
                                    username=args.username[0], password=args.password[0],
-                                    comic_language=args.manga_language[0], print_index=print_index)
+                                    comic_language=args.manga_language[0], print_index=print_index,
+                                    cookie=manual_cookie)
            end_time = time.time()
            total_time = end_time - start_time
            print("Total Time Taken To Complete : %s" % total_time)
            sys.exit()

-    # def string_formatter(self, my_string):
-    #     temp = ""
-    #     for char in my_string:
-    #         print("Temp right now : {0}".format(char))
-    #         # temp = temp + str(char).replace(char, self.to_utf_8(char))
-    #         temp = temp + str(char).replace(char, self.to_utf_8(char))
-    #
-    #     print("Temp is : {0}".format(temp))
-    #
-    #
-    # def to_utf_8(self, char):
-    #     print("Received Key : {0}".format(char))
-    #     char_dict = {
-    #         'ë': '%C3%AB'
-    #     }
-    #     try:
-    #         return char_dict[char]
-    #     except KeyError:
-    #         return char
-
    @staticmethod
    def version():
        print(__version__)
--- a/comic_dl/honcho.py
+++ b/comic_dl/honcho.py
@ -74,6 +74,7 @@ class Honcho(object):
        sorting = kwargs.get("sorting_order")
        comic_language = kwargs.get("comic_language")
        print_index = kwargs.get("print_index")
+        manual_cookies = kwargs.get("cookie", None)

        if log_flag is True:
            logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG)
@ -99,7 +100,7 @@ class Honcho(object):
                                                chapter_range=chapter_range, conversion=kwargs.get("conversion"),
                                                keep_files=kwargs.get("keep_files"),
                                                image_quality=kwargs.get("image_quality"),
-                                                print_index=print_index)
+                                                print_index=print_index, manual_cookies=manual_cookies)
            return 0
        elif domain in ["www.comic.naver.com", "comic.naver.com"]:
            comicNaver.ComicNaver(manga_url=comic_url, logger=logging, current_directory=current_directory,
--- a/comic_dl/sites/readcomicOnlineli.py
+++ b/comic_dl/sites/readcomicOnlineli.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import base64

 from comic_dl import globalFunctions
 import re
@ -12,6 +13,7 @@ class ReadComicOnlineLi(object):
    def __init__(self, manga_url, download_directory, chapter_range, **kwargs):

        current_directory = kwargs.get("current_directory")
+        self.manual_cookie = kwargs.get("manual_cookies", None)
        conversion = kwargs.get("conversion")
        keep_files = kwargs.get("keep_files")
        self.logging = kwargs.get("log_flag")
@ -21,6 +23,21 @@ class ReadComicOnlineLi(object):
        self.print_index = kwargs.get("print_index")

        url_split = str(manga_url).split("/")
+        self.appended_headers = {
+            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+            'accept-encoding': 'gzip, deflate, br',
+            'accept-language': 'en-US,en;q=0.9',
+            'dnt': '1',
+            'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+            'sec-ch-ua-mobile': '?0',
+            'sec-ch-ua-platform': '"macOS"',
+            'sec-fetch-dest': 'document',
+            'sec-fetch-mode': 'navigate',
+            'sec-fetch-site': 'same-origin',
+            'sec-fetch-user': '?1',
+            'upgrade-insecure-requests': '1',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36'
+        }

        if len(url_split) in [5]:  # Sometimes, this value came out to be 6, instead of 5. Hmmmmmmmm weird.
            # Removing "6" from here, because it caused #47
@ -39,11 +56,12 @@ class ReadComicOnlineLi(object):
    def single_chapter(self, comic_url, comic_name, download_directory, conversion, keep_files):
        # print("Received Comic Url : {0}".format(comic_url))
        print("Fooling CloudFlare...Please Wait...")
-        appended_headers = {
-            'referer': comic_url,
-            'Accept': "*/*",
-            'Cache-Control': 'no-cache'
-        }
+        if not comic_url.endswith("#1"):
+            comic_url += "#1"
+
+        if not self.appended_headers.get('cookie', None) and self.manual_cookie:
+            self.appended_headers['cookie'] = self.manual_cookie
+        self.appended_headers['referer'] = comic_url
        chapter_number = str(comic_url).split("/")[5].split("?")[0].replace("-", " - ")

        file_directory = globalFunctions.GlobalFunctions().create_file_directory(chapter_number, comic_name)
@ -62,7 +80,7 @@ class ReadComicOnlineLi(object):
                print('Converted File already exists. Skipping.')
                return 0

-        source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10, append_headers=appended_headers)
+        source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10, append_headers=self.appended_headers)

        img_list = re.findall(r"lstImages.push\(\"(.*?)\"\);", str(source))

@ -77,14 +95,16 @@ class ReadComicOnlineLi(object):

        links = []
        file_names = []
+        print(img_list)
+        img_list = self.get_image_links(img_list)
        for current_chapter, image_link in enumerate(img_list):
            image_link = str(image_link).strip().replace("\\", "")

            logging.debug("Image Link : %s" % image_link)
-            image_link = image_link.replace("=s1600", "=s0").replace("/s1600", "/s0")  # Change low quality to best.

            if str(self.image_quality).lower().strip() in ["low", "worst", "bad", "cancer", "mobile"]:
                image_link = image_link.replace("=s0", "=s1600").replace("/s0", "/s1600")
+            image_link = image_link.replace("=s1600", "=s0").replace("/s1600", "/s0")  # Change low quality to best.

            current_chapter += 1
            file_name = str(globalFunctions.GlobalFunctions().prepend_zeroes(current_chapter, len(img_list))) + ".jpg"
@ -109,7 +129,10 @@ class ReadComicOnlineLi(object):

    def full_series(self, comic_url, comic_name, sorting, download_directory, chapter_range, conversion, keep_files):
        print("Fooling CloudFlare...Please Wait...")
-        source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10)
+        if not self.appended_headers.get('cookie', None) and self.manual_cookie:
+            self.appended_headers['cookie'] = self.manual_cookie
+        self.appended_headers['referer'] = comic_url
+        source, cookies = globalFunctions.GlobalFunctions().page_downloader(manga_url=comic_url, scrapper_delay=10, append_headers=self.appended_headers)

        all_links = []

@ -157,7 +180,7 @@ class ReadComicOnlineLi(object):

        if str(sorting).lower() in ['new', 'desc', 'descending', 'latest']:
            for chap_link in all_links:
-                chap_link = "http://readcomiconline.li" + chap_link
+                chap_link = "https://readcomiconline.li" + chap_link
                try:
                    self.single_chapter(comic_url=chap_link, comic_name=comic_name, download_directory=download_directory,
                                        conversion=conversion, keep_files=keep_files)
@ -172,7 +195,7 @@ class ReadComicOnlineLi(object):

        elif str(sorting).lower() in ['old', 'asc', 'ascending', 'oldest', 'a']:
            for chap_link in all_links[::-1]:
-                chap_link = "http://readcomiconline.to" + chap_link
+                chap_link = "https://readcomiconline.li" + chap_link
                try:
                    self.single_chapter(comic_url=chap_link, comic_name=comic_name, download_directory=download_directory,
                                        conversion=conversion, keep_files=keep_files)
@ -186,3 +209,28 @@ class ReadComicOnlineLi(object):
                    globalFunctions.GlobalFunctions().addOne(comic_url)

        return 0
+
+    def get_image_links(self, urls):
+        # JS logic extracted by : https://github.com/Xonshiz/comic-dl/issues/299#issuecomment-1098189279
+        temp = []
+        for url in urls:
+            print(url + '\n')
+            quality_ = None
+            if '=s0' in url:
+                url = url[:-3]
+                quality_ = '=s0'
+            else:
+                url = url[:-6]
+                quality_ = '=s1600'
+            # url = url.slice(4, 22) + url.slice(25);
+            url = url[4:22] + url[25:]
+            # url = url.slice(0, -6) + url.slice(-2);
+            url = url[0:-6] + url[-2:]
+            url = str(base64.b64decode(url).decode("utf-8"))
+            # url = url.slice(0, 13) + url.slice(17);
+            url = url[0:13] + url[17:]
+            # url = url.slice(0, -2) + (containsS0 ? '=s0' : '=s1600');
+            url = url[0:-2] + quality_
+            # return 'https://2.bp.blogspot.com/' + url;
+            temp.append('https://2.bp.blogspot.com/{0}'.format(url))
+        return temp
--- a/docs/source/notes.rst
+++ b/docs/source/notes.rst
@ -1,5 +1,6 @@
 Notes
 =====
+-  Readcomiconline.li has been a pain to work with and it might block you out a lot. Now you can use `--cookie` parameter to pass a working cookie. You can retrieve the cookie by checking network tab for `Cookie` value in request headers or by using an external browser plugin.

 -  comic.naver.com has korean characters and some OS won’t handle those
   characters. So, instead of naming the file folder with the series