From 38a13dfeae20f1e133a8bc0907888e97769d9951 Mon Sep 17 00:00:00 2001 From: Xonshiz Date: Thu, 16 Feb 2017 08:37:05 +0530 Subject: [PATCH] Fix for #4 Check the "Changelog" for more info. --- .idea/workspace.xml | 347 ++++++++++++++++++------ Changelog.md | 4 +- MANIFEST | 14 +- ReadMe.md | 2 +- comic_dl/__init__.py | 2 + comic_dl/downloader/cookies_required.py | 44 +-- comic_dl/downloader/universal.py | 25 +- comic_dl/sites/batoto.py | 95 ++++--- comic_dl/sites/comic_naver.py | 57 ++-- comic_dl/sites/gomanga.py | 52 ++-- comic_dl/sites/kisscomicus.py | 40 +-- comic_dl/sites/kissmanga.py | 86 +++--- comic_dl/sites/mangafox.py | 68 +++-- comic_dl/sites/readcomic.py | 52 ++-- comic_dl/sites/yomanga.py | 53 ++-- comic_dl/version.py | 2 +- docs/Changelog.md | 4 +- docs/index.md | 7 +- setup.cfg | 2 +- setup.py | 60 +--- 20 files changed, 598 insertions(+), 418 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 715863a..c902f95 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,7 +2,26 @@ + + + + + + + + + + + + + + + + + + + - + @@ -25,8 +44,8 @@ - - + + @@ -34,6 +53,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + @@ -44,38 +87,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -90,16 +106,41 @@ + + + Image L + Complete + sys + logging + re. + reques + I took + cfscrape. + sys. + requests. + shutil. + logging. + os. + + @@ -130,8 +171,6 @@ - - @@ -196,11 +235,15 @@ + + + + @@ -227,15 +270,15 @@ - @@ -498,16 +541,17 @@ - + - + - + + - + @@ -517,7 +561,6 @@ - @@ -534,6 +577,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -558,8 +689,6 @@ - - @@ -568,10 +697,7 @@ - - - - + @@ -593,27 +719,11 @@ - + - - - - - - - - - - - - - - - - - - - + + + @@ -625,33 +735,104 @@ - + - - + + + + + + + + + + + + + + + + + + - - - + - + - - - + + + + + - - + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Changelog.md b/Changelog.md index 8a8a0bb..ee0f39c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -15,4 +15,6 @@ - Added a YouTube Tutorial for the script [2016.12.30] - Site support for readcomiconlin.to [2017.01.02] - Added `Verbose Logging` [2017.01.22] -- Fixed chapter count error in Kissmanga [2017.01.22] \ No newline at end of file +- Fixed chapter count error in Kissmanga [2017.01.22] +- Fixed #4 [2017.02.16] +- Optimized Imports [2017.02.16] diff --git a/MANIFEST b/MANIFEST index ab00328..5979b17 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2,6 +2,18 @@ setup.cfg setup.py comic_dl\__init__.py -comic_dl\comic-dl.py +comic_dl\comic_dl.py comic_dl\honcho.py comic_dl\version.py +comic_dl\downloader\__init__.py +comic_dl\downloader\cookies_required.py +comic_dl\downloader\universal.py +comic_dl\sites\__init__.py +comic_dl\sites\batoto.py +comic_dl\sites\comic_naver.py +comic_dl\sites\gomanga.py +comic_dl\sites\kisscomicus.py +comic_dl\sites\kissmanga.py +comic_dl\sites\mangafox.py +comic_dl\sites\readcomic.py +comic_dl\sites\yomanga.py diff --git a/ReadMe.md b/ReadMe.md index 71fc8ca..81e2181 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -188,7 +188,7 @@ You can check the changelog [**`HERE`**](https://github.com/Xonshiz/comic-dl/blo If your're planning to open an issue for the script or ask for a new feature or anything that requires opening an Issue, then please do keep these things in mind. ### Reporting Issues -If you're going to report an issue, then please run the script again with the "-v or --verbose" argument. It should generate a file in the same directory, with the name "Error Log.log". Copy that log file's data and post it on a [Gist](https://gist.github.com/) and share that gist's link while reporting the issue here. +If you're going to report an issue, then please run the script again with the "-v or --verbose" argument. It should generate a file in the same directory, with the name "Error Log.log". Copy that log file's data and post it on a [Gist](https://gist.github.com/) and share that gist's link while reporting the issue here. Make sure you **EDIT OUT YOUR USERNAME AND PASSWORD**, if supplied within the command. If you don't include the verbose log, there are chances it'll take time to fix the issue(s) you're having. diff --git a/comic_dl/__init__.py b/comic_dl/__init__.py index e69de29..0440f7c 100644 --- a/comic_dl/__init__.py +++ b/comic_dl/__init__.py @@ -0,0 +1,2 @@ +import sites +import downloader \ No newline at end of file diff --git a/comic_dl/downloader/cookies_required.py b/comic_dl/downloader/cookies_required.py index 951df62..46b21fc 100644 --- a/comic_dl/downloader/cookies_required.py +++ b/comic_dl/downloader/cookies_required.py @@ -13,36 +13,36 @@ This module uses `requests` library to achieve the handling of cookies. from __future__ import absolute_import from __future__ import print_function -import os -import requests -import shutil +from os import path +from requests import get +from shutil import move,copyfileobj from downloader.universal import main as FileDownloader -import logging +from logging import debug, basicConfig, DEBUG def main(File_Name_Final,Directory_path,tasty_cookies,ddl_image, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final) - logging.debug("File Check Path : %s" % File_Check_Path) + debug("File Check Path : %s" % File_Check_Path) - if os.path.isfile(File_Check_Path): + if path.isfile(File_Check_Path): print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n') pass - if not os.path.isfile(File_Check_Path): + if not path.isfile(File_Check_Path): print('[Comic-dl] Downloading : ',File_Name_Final) - response = requests.get(ddl_image, stream=True,cookies=tasty_cookies) + response = get(ddl_image, stream=True,cookies=tasty_cookies) try: with open(File_Name_Final, 'wb') as out_file: - shutil.copyfileobj(response.raw, out_file) - File_Path = os.path.normpath(File_Name_Final) + copyfileobj(response.raw, out_file) + File_Path = path.normpath(File_Name_Final) except Exception as e: - logging.debug("File download error : %s" % e) + debug("File download error : %s" % e) print("Couldn't download file from : ",ddl_image) pass try: - shutil.move(File_Path,Directory_path) + move(File_Path,Directory_path) except Exception as e: print(e,'\n') pass @@ -50,27 +50,27 @@ def main(File_Name_Final,Directory_path,tasty_cookies,ddl_image, logger): def with_referer(File_Name_Final,Directory_path,tasty_cookies,ddl_image,referer, logger): File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final) - logging.debug("File Check Path : %s" % File_Check_Path) - logging.debug("Referrer Received : %s" % referer) + debug("File Check Path : %s" % File_Check_Path) + debug("Referrer Received : %s" % referer) - if os.path.isfile(File_Check_Path): + if path.isfile(File_Check_Path): print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n') pass - if not os.path.isfile(File_Check_Path): + if not path.isfile(File_Check_Path): print('[Comic-dl] Downloading : ',File_Name_Final) headers = {'Referer': referer} - response = requests.get(ddl_image, stream=True,cookies=tasty_cookies,headers=headers) + response = get(ddl_image, stream=True,cookies=tasty_cookies,headers=headers) try: with open(File_Name_Final, 'wb') as out_file: - shutil.copyfileobj(response.raw, out_file) - File_Path = os.path.normpath(File_Name_Final) + copyfileobj(response.raw, out_file) + File_Path = path.normpath(File_Name_Final) except Exception as e: - logging.debug("File download error : %s" % e) + debug("File download error : %s" % e) print("Couldn't download file from : ",ddl_image) pass try: - shutil.move(File_Path,Directory_path) + move(File_Path,Directory_path) except Exception as e: print(e,'\n') pass diff --git a/comic_dl/downloader/universal.py b/comic_dl/downloader/universal.py index cb7e81d..8b5cb46 100644 --- a/comic_dl/downloader/universal.py +++ b/comic_dl/downloader/universal.py @@ -11,40 +11,37 @@ ddl_image is the direct link to the image itself. from __future__ import absolute_import from __future__ import print_function -import os -# import urllib -import shutil +from os import path, remove +from shutil import move import urllib -#from urllib import URLError -# import sys -import logging +from logging import debug, basicConfig, DEBUG def main(File_Name_Final,Directory_path,ddl_image, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final) - logging.debug("File Check Path : %s" % File_Check_Path) + debug("File Check Path : %s" % File_Check_Path) - if os.path.isfile(File_Check_Path): + if path.isfile(File_Check_Path): print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n') pass - if not os.path.isfile(File_Check_Path): + if not path.isfile(File_Check_Path): print('[Comic-dl] Downloading : ',File_Name_Final) urllib.request.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36' try: urllib.request.urlretrieve(ddl_image, File_Name_Final) except Exception as e: - logging.debug("Error in retrieving image : %s" % e) + debug("Error in retrieving image : %s" % e) #filename, headers = urllib.urlretrieve(ddl_image,File_Name_Final) #print "File Name : ",filename #print "Headers : ",headers - File_Path = os.path.normpath(File_Name_Final) + File_Path = path.normpath(File_Name_Final) try: - shutil.move(File_Path,Directory_path) + move(File_Path,Directory_path) except Exception as e: print(e,'\n') - os.remove(File_Path) + remove(File_Path) pass diff --git a/comic_dl/sites/batoto.py b/comic_dl/sites/batoto.py index d07e37c..5f5a73e 100644 --- a/comic_dl/sites/batoto.py +++ b/comic_dl/sites/batoto.py @@ -3,10 +3,9 @@ from __future__ import absolute_import from __future__ import print_function -import re -import os -import sys -from more_itertools import unique_everseen +from re import search,sub,compile, findall +from os import path,makedirs +from sys import exit from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities @@ -15,7 +14,7 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from downloader.universal import main as FileDownloader from six.moves import range -import logging +from logging import debug, basicConfig, DEBUG """Bato serves the chapters in 2 ways : @@ -68,7 +67,7 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]: if str(User_Name) in ["N"] or str(User_Password) in ["N"]: print("Username or Password cannot be empty.") - sys.exit() + exit() print("Authenticating Your Username and Password ...") batoto_login(driver, User_Name, User_Password, logger) @@ -92,10 +91,10 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log ) except Exception as e: - logging.debug("Error in loading page : %s\nTrying to move on." % e) + debug("Error in loading page : %s\nTrying to move on." % e) pass page_title = str(driver.title) - logging.debug("Page Title : %s" % page_title) + debug("Page Title : %s" % page_title) """Batoto doesn't provide shit in the source code of the web page. Hence, we'll be using the outer HTML to scrap all the info we need. @@ -115,44 +114,44 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log "ERROR [10030]: The thing you're looking for is unavailable. It may be due to:"]: print("You cannot access this page. You'll need to log in to download this page.") driver.quit() - sys.exit() + exit() else: pass except Exception as e: - logging.debug("Error in access check : %s" % e) + debug("Error in access check : %s" % e) pass try: # Getting the Series Name from the tags of the web # page. Series_Name = str( - re.search( + search( '^(.*)\ \-', page_title).group(1)).strip().replace( '_', ' ').title() except Exception as e: - logging.debug("Error in Series Name : %s" % e) + debug("Error in Series Name : %s" % e) Series_Name = "Unkown Series" try: # Getting the Series Name from the tags of the web # page. volume_number = int( - str(re.search('vol (\d+)', page_title).group(1)).strip()) + str(search('vol (\d+)', page_title).group(1)).strip()) except Exception as e: - logging.debug("Error in Volume Number : %s" % e) + debug("Error in Volume Number : %s" % e) volume_number = '0' try: # Getting the Series Name from the tags of the web # page. chapter_number = int( - str(re.search('ch (\d+)', page_title).group(1)).strip()) + str(search('ch (\d+)', page_title).group(1)).strip()) except Exception as e: - logging.debug("Error in Chapter Number : %s" % e) + debug("Error in Chapter Number : %s" % e) chapter_number = '0' try: @@ -161,7 +160,7 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log '//*[@id="reader"]/div[1]/ul/li[3]/select').text).replace("/", " ").strip() except Exception as e: - logging.debug("Error in Group Name : %s\nMoving forward..." % e) + debug("Error in Group Name : %s\nMoving forward..." % e) # Some entries on batoto don't have a name. So, if we get to any such # occassion, let's be prepared. Group_Name_Finder = str('No Group') @@ -172,7 +171,7 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log page_list = driver.find_element_by_id('page_select') except Exception as e: - logging.debug("Error in Page Select : %s" % e) + debug("Error in Page Select : %s" % e) # If we cannot find the 'page_select' element, it means that this # chapter is showing all the images in one page. @@ -188,12 +187,12 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log '/' + "Chapter " + str(chapter_number) + " [" + str(Group_Name_Finder) + " ]" # Fix for "Special Characters" in The series name - File_Directory = re.sub( + File_Directory = sub( '[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) - Directory_path = os.path.normpath(File_Directory) + Directory_path = path.normpath(File_Directory) print('\n') print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number)) @@ -208,24 +207,24 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log Look at the last number for the image. Manipulate that and we have what we need. """ items_list = page_list.find_elements_by_tag_name("option") - logging.debug("Items List : %s" % items_list) + debug("Items List : %s" % items_list) for item in items_list: list_of_pages = item.text - logging.debug("List of Pages : %s" % list_of_pages) + debug("List of Pages : %s" % list_of_pages) lst_pag = str(list_of_pages) Last_Page_number = int( - str(re.search('(\d+)', lst_pag).group(1)).strip()) - logging.debug("Last Page Number : %s" % Last_Page_number) + str(search('(\d+)', lst_pag).group(1)).strip()) + debug("Last Page Number : %s" % Last_Page_number) img_link = driver.find_element_by_id('comic_page').get_attribute('src') - logging.debug("Image Link : %s" % img_link) + debug("Image Link : %s" % img_link) for i in range(1, Last_Page_number + 1): - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) if len(str(i)) == 1: ddl_image = str(img_link).replace( @@ -237,7 +236,7 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log 'img000001', 'img0000%s') % (i) File_Name_Final = str( - i).strip() + "." + str(re.search('\d\.(.*?)$', ddl_image).group(1)).strip() + i).strip() + "." + str(search('\d\.(.*?)$', ddl_image).group(1)).strip() FileDownloader(File_Name_Final, Directory_path, ddl_image, logger) print('\n') @@ -254,18 +253,18 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password, log Image_Links = soup.findAll('div', {'style': 'text-align:center;'}) for link in Image_Links: - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) x = link.findAll('img') for a in x: ddl_image = a['src'] - logging.debug("Image Download Link : %s" % ddl_image) + debug("Image Download Link : %s" % ddl_image) File_Name_Final = str( - re.search( + search( 'img0000(\d+)\.([jpg]|[png])', ddl_image).group(1)).strip() + "." + str( - re.search( + search( '\d\.(.*?)$', ddl_image).group(1)).strip() FileDownloader(File_Name_Final, Directory_path, ddl_image, logger) @@ -286,7 +285,7 @@ def whole_series(driver, url, current_directory, User_Name, User_Password, logge if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]: if str(User_Name) in ["N"] or str(User_Password) in ["N"]: print("Username or Password cannot be empty.") - sys.exit() + exit() print("Authenticating Your Username and Password ...") batoto_login(driver, User_Name, User_Password, logger) @@ -305,7 +304,7 @@ def whole_series(driver, url, current_directory, User_Name, User_Password, logge ) except Exception as e: - logging.debug("Error in loading the page : %s\nMoving ahead..." % e) + debug("Error in loading the page : %s\nMoving ahead..." % e) pass elem = driver.find_element_by_xpath("//*") Page_Source = elem.get_attribute("outerHTML").encode('utf-8') @@ -319,13 +318,13 @@ def whole_series(driver, url, current_directory, User_Name, User_Password, logge soup = BeautifulSoup(Page_Source, "html.parser") all_links = soup.findAll( 'tr', {'class': 'row lang_English chapter_row'}) - logging.debug("Image Links : %s" % all_links) + debug("Image Links : %s" % all_links) for link in all_links: x = link.findAll('a') for a in x: ddl_image = a['href'] - logging.debug("Second Image Link : %s" % ddl_image) + debug("Second Image Link : %s" % ddl_image) if "reader" in ddl_image: link_list.append(ddl_image) @@ -352,7 +351,7 @@ def whole_series(driver, url, current_directory, User_Name, User_Password, logge ) except Exception as e: - logging.debug("Error in loading the page : %s\nMoving ahead." % e) + debug("Error in loading the page : %s\nMoving ahead." % e) pass elem = driver.find_element_by_xpath("//*") Page_Source = elem.get_attribute("outerHTML").encode('utf-8') @@ -369,7 +368,7 @@ def whole_series(driver, url, current_directory, User_Name, User_Password, logge ddl_image = a['href'] if "reader" in ddl_image: link_list.append(ddl_image) - logging.debug("%s added in the bag!" % ddl_image) + debug("%s added in the bag!" % ddl_image) print("Total Chapters To Download : ", len(link_list)) #print(link_list) @@ -396,17 +395,17 @@ def batoto_login(driver, User_Name, User_Password, logger): ) except Exception as e: - logging.debug("Error in loading page : %s\nSaving screenshot and moving..." % e) + debug("Error in loading page : %s\nSaving screenshot and moving..." % e) # driver.save_screenshot('Single_exception.png') pass LoggedOut_Title = driver.title - logging.debug("Logged out Title : %s" % LoggedOut_Title) + debug("Logged out Title : %s" % LoggedOut_Title) driver.find_element_by_id('ips_username').send_keys(User_Name) driver.find_element_by_id('ips_password').send_keys(User_Password) driver.find_element_by_xpath('//*[@id="login"]/fieldset[2]/input').click() LoggedIn_Title = driver.title - logging.debug("Logged In Title : %s" % LoggedIn_Title) + debug("Logged In Title : %s" % LoggedIn_Title) """A little check to see whether we've logged in or not. Comparing the titles of the before and after logging pages. @@ -415,21 +414,21 @@ def batoto_login(driver, User_Name, User_Password, logger): if str(LoggedIn_Title).strip() == str(LoggedOut_Title).strip(): print("Couldn't log you in. Please check your credentials.") driver.quit() - sys.exit() + exit() def batoto_Url_Check(input_url, current_directory, User_Name, User_Password, logger): if logger == "True": - logging.basicConfig(format='[Comic-dl]%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='[Comic-dl]%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) - batoto_single_regex = re.compile( + batoto_single_regex = compile( 'https?://(?Pbato.to)/reader\#(?P[\d\w-]+)?(\/|.)') - batoto_whole_regex = re.compile( + batoto_whole_regex = compile( '^https?://(?Pbato.to)/comic/\_/comics/(?P[\d\w-]+)?(\/|.)$') #print "Inside" lines = input_url.split('\n') for line in lines: - found = re.search(batoto_single_regex, line) + found = search(batoto_single_regex, line) if found: match = found.groupdict() if match['extra_characters']: @@ -445,7 +444,7 @@ def batoto_Url_Check(input_url, current_directory, User_Name, User_Password, log else: pass - found = re.search(batoto_whole_regex, line) + found = search(batoto_whole_regex, line) if found: match = found.groupdict() if match['comic']: diff --git a/comic_dl/sites/comic_naver.py b/comic_dl/sites/comic_naver.py index d4f9158..4a52f2c 100644 --- a/comic_dl/sites/comic_naver.py +++ b/comic_dl/sites/comic_naver.py @@ -4,48 +4,49 @@ from __future__ import unicode_literals from __future__ import absolute_import from __future__ import print_function -import re -import sys -import os -import requests +from re import search,sub,compile, findall +from os import path,makedirs +from sys import exit +from logging import debug, basicConfig, DEBUG +from requests import Session,cookies from downloader.cookies_required import with_referer as FileDownloader from six.moves import range from six.moves import input -import logging + def single_chapter(url,current_directory, logger): - s = requests.Session() + s = Session() headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'} req = s.get(url,headers=headers) cookies = req.cookies page_source_1 = str(req.text.encode('utf-8')) try: - #Korean_Name = re.search(r'

(.*?)',str(page_source)).group(1) - Series_Name = re.search(r'titleId=(\d+)',url).group(1) + #Korean_Name = search(r'

(.*?)',str(page_source)).group(1) + Series_Name = search(r'titleId=(\d+)',url).group(1) except Exception as e: - logging.debug("Error in Series Name : %s" % e) + debug("Error in Series Name : %s" % e) Series_Name = "Unknown" try: - #chapter_number = int(re.search(r'\(.\d+)\<\/span\>',page_source_1).group(1)) - chapter_number = re.search(r'&no=(\d+)',url).group(1) + #chapter_number = int(search(r'\(.\d+)\<\/span\>',page_source_1).group(1)) + chapter_number = search(r'&no=(\d+)',url).group(1) except Exception as e: # print(e) - logging.debug("Error in Chapter Number : %s" % e) + debug("Error in Chapter Number : %s" % e) chapter_number = 0 img_regex = r'http://imgcomic.naver.net/webtoon/\d+/\d+/.+?\.(?:jpg|png|gif|bmp|JPG|PNG|GIF|BMP)' - img_links = list(re.findall(img_regex,page_source_1)) - logging.debug("Image Links : %s" % img_links) + img_links = list(findall(img_regex,page_source_1)) + debug("Image Links : %s" % img_links) Raw_File_Directory = str(Series_Name) +'/'+"Chapter "+str(chapter_number) - File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name + File_Directory = sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name - Directory_path = os.path.normpath(File_Directory) + Directory_path = path.normpath(File_Directory) print('\n') print('{:^80}'.format('=====================================================================\n')) @@ -53,8 +54,8 @@ def single_chapter(url,current_directory, logger): print('{:^80}'.format('=====================================================================\n')) for x,items in enumerate(img_links): - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) FileDownloader(str(x+1)+str(items[-4:]),Directory_path,cookies,items,url, logger) print('\n') @@ -67,41 +68,41 @@ def whole_series(url, current_directory, logger): - s = requests.Session() + s = Session() headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'} req = s.get(url,headers=headers) cookies = req.cookies page_source_1 = req.text.encode('utf-8') - titleId = re.search(r'titleId=(\d+)',url).group(1) + titleId = search(r'titleId=(\d+)',url).group(1) try: - first_link = int(re.search(r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %(titleId),page_source_1).group(1)) + first_link = int(search(r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %(titleId),page_source_1).group(1)) except Exception as e: first_link = eval(input("Please Enter the Last chapter of the series : ")) if not first_link: print("You failed to enter the last chapter count. Script will exit now.") - sys.exit() + exit() for x in range(1,int(first_link)): Chapter_Url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" %(titleId,x) - logging.debug("Chapter URL : %s" % Chapter_Url) + debug("Chapter URL : %s" % Chapter_Url) single_chapter(Chapter_Url,current_directory, logger) def comic_naver_Url_Check(input_url, current_directory, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) - comic_naver_single_regex = re.compile( + comic_naver_single_regex = compile( 'https?://(?Pcomic.naver.com)/webtoon/(?Pdetail.nhn)\?titleId\=(?P[\d]+)?(\/|.)') - comic_naver_whole_regex = re.compile( + comic_naver_whole_regex = compile( 'https?://(?Pcomic.naver.com)/webtoon/(?Plist.nhn)\?titleId\=(?P[\d]+)?(\/|.)') lines = input_url.split('\n') for line in lines: - found = re.search(comic_naver_single_regex, line) + found = search(comic_naver_single_regex, line) if found: match = found.groupdict() if match['detail']: @@ -111,7 +112,7 @@ def comic_naver_Url_Check(input_url, current_directory, logger): else: pass - found = re.search(comic_naver_whole_regex, line) + found = search(comic_naver_whole_regex, line) if found: match = found.groupdict() if match['list']: diff --git a/comic_dl/sites/gomanga.py b/comic_dl/sites/gomanga.py index 6a466a7..dc0e4cb 100644 --- a/comic_dl/sites/gomanga.py +++ b/comic_dl/sites/gomanga.py @@ -3,55 +3,55 @@ from __future__ import absolute_import from __future__ import print_function -import requests -import re -import os -import sys +from requests import Session,cookies from more_itertools import unique_everseen +from re import search,sub,compile, findall +from os import path,makedirs +from sys import exit from bs4 import BeautifulSoup from downloader.cookies_required import main as FileDownloader -import logging +from logging import debug, basicConfig, DEBUG def single_chapter(url,current_directory, logger): if not url: print("Couldn't get the URL. Please report it on Github Repository.") - sys.exit(0) + exit(0) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' } - s = requests.Session() + s = Session() response = s.get(url, headers=headers) tasty_cookies = response.cookies Page_source = str(response.text.encode('utf-8')) - Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + Series_Name = str(search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. try: - chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. + chapter_number = int(str(search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. except Exception as e: - logging.debug("Error in Chapter Number : %s" % e) + debug("Error in Chapter Number : %s" % e) chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) - File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name + File_Directory = sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name - Directory_path = os.path.normpath(File_Directory) + Directory_path = path.normpath(File_Directory) - ddl_image_list = re.findall('comics(.*?)\"', Page_source) + ddl_image_list = findall('comics(.*?)\"', Page_source) ddl_list = list(unique_everseen(ddl_image_list)) - logging.debug("Image Links : %s" % ddl_list) + debug("Image Links : %s" % ddl_list) print('\n') @@ -60,12 +60,12 @@ def single_chapter(url,current_directory, logger): for i in ddl_list: - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','') - logging.debug("Image Link : %s" % ddl_image) + debug("Image Link : %s" % ddl_image) - File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip() + File_Name_Final = str(findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip() FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image, logger) print('\n') @@ -80,13 +80,13 @@ def whole_series(url,current_directory, logger): } - s = requests.Session() + s = Session() response = s.get(url, headers=headers) tasty_cookies = response.cookies Page_source = str(response.text.encode('utf-8')) - Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + Series_Name = str(search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. soup = BeautifulSoup(Page_source, 'html.parser') @@ -96,19 +96,19 @@ def whole_series(url,current_directory, logger): x = link.findAll('a') for a in x: url = a['href'] - logging.debug("Final URL : %s" % url) + debug("Final URL : %s" % url) single_chapter(url,current_directory, logger) def gomanga_Url_Check(input_url,current_directory, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) - gomanga_single_regex = re.compile('https?://(?Pgomanga.co)/reader/read/(?P[\d\w-]+)/en/(?P\d+)?/(?P\d+)?()|(/page/(?P\d+)?)') - gomanga_whole_regex = re.compile('^https?://(?Pgomanga.co)/reader/(?Pseries)?/(?P[\d\w-]+)?(\/|.)$') + gomanga_single_regex = compile('https?://(?Pgomanga.co)/reader/read/(?P[\d\w-]+)/en/(?P\d+)?/(?P\d+)?()|(/page/(?P\d+)?)') + gomanga_whole_regex = compile('^https?://(?Pgomanga.co)/reader/(?Pseries)?/(?P[\d\w-]+)?(\/|.)$') lines = input_url.split('\n') for line in lines: - found = re.search(gomanga_single_regex, line) + found = search(gomanga_single_regex, line) if found: match = found.groupdict() if match['Chapter']: @@ -119,7 +119,7 @@ def gomanga_Url_Check(input_url,current_directory, logger): - found = re.search(gomanga_whole_regex, line) + found = search(gomanga_whole_regex, line) if found: match = found.groupdict() if match['comic']: diff --git a/comic_dl/sites/kisscomicus.py b/comic_dl/sites/kisscomicus.py index d8887f8..413ad83 100644 --- a/comic_dl/sites/kisscomicus.py +++ b/comic_dl/sites/kisscomicus.py @@ -1,19 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import cfscrape -import re +from re import search,sub,compile, findall +from os import path,makedirs from bs4 import BeautifulSoup -import os +from cfscrape import create_scraper +from logging import debug, basicConfig, DEBUG # from downloader.universal import main as FileDownloader from downloader.cookies_required import main as FileDownloader -import requests -import logging +from requests import session def single_chapter(url, directory, logger): - sess = requests.session() - sess = cfscrape.create_scraper(sess) + sess = session() + sess = create_scraper(sess) s = sess.get(url) cookies = sess.cookies connection = s.text.encode('utf-8') @@ -31,10 +31,10 @@ def single_chapter(url, directory, logger): Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + str(chapter_number) - File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', + File_Directory = sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name - Directory_path = os.path.normpath(File_Directory) + Directory_path = path.normpath(File_Directory) print('\n') print('{:^80}'.format('=====================================================================\n')) @@ -43,16 +43,16 @@ def single_chapter(url, directory, logger): # soup = BeautifulSoup(connection, "html.parser") linkFinder = soup.findAll('ul', {'class': 'list-image'}) - logging.debug("Image Links : %s" % linkFinder) + debug("Image Links : %s" % linkFinder) # print("Link Finder :s %s" % linkFinder) for link in linkFinder: x = link.findAll('img') for a in x: - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) ddlLink = a['src'] - logging.debug("Final URL : %s" % ddlLink) + debug("Final URL : %s" % ddlLink) fileName = str(ddlLink).split("/")[-1].strip() # print("Link : %s\nFile Name : %s" % (ddlLink, fileName)) FileDownloader(File_Name_Final=fileName, Directory_path=File_Directory, tasty_cookies=cookies, ddl_image=ddlLink, logger=logger) @@ -62,7 +62,7 @@ def single_chapter(url, directory, logger): def whole_series(url, directory, logger): - scraper = cfscrape.create_scraper() + scraper = create_scraper() connection = scraper.get(url).content soup = BeautifulSoup(connection, "html.parser") @@ -73,7 +73,7 @@ def whole_series(url, directory, logger): for a in x: # print(a['href']) url = "http://kisscomic.us" + a['href'] - logging.debug("Chapter URL : %s" % url) + debug("Chapter URL : %s" % url) single_chapter(url, directory, logger) print("Finished Downloading") @@ -81,13 +81,13 @@ def whole_series(url, directory, logger): def kissmcomicus_Url_Check(input_url, current_directory, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) - kissmcomicus_single_regex = re.compile('https?://(?P[^/]+)/chapters/(?P[\d\w-]+)(?:/Issue-)?') - kissmcomicus_whole_regex = re.compile('https?://(?P[^/]+)/comics/(?P[\d\w-]+)?') + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) + kissmcomicus_single_regex = compile('https?://(?P[^/]+)/chapters/(?P[\d\w-]+)(?:/Issue-)?') + kissmcomicus_whole_regex = compile('https?://(?P[^/]+)/comics/(?P[\d\w-]+)?') lines = input_url.split('\n') for line in lines: - found = re.search(kissmcomicus_single_regex, line) + found = search(kissmcomicus_single_regex, line) if found: match = found.groupdict() if match['comic']: @@ -96,7 +96,7 @@ def kissmcomicus_Url_Check(input_url, current_directory, logger): else: pass - found = re.search(kissmcomicus_whole_regex, line) + found = search(kissmcomicus_whole_regex, line) if found: match = found.groupdict() if match['comic_name']: diff --git a/comic_dl/sites/kissmanga.py b/comic_dl/sites/kissmanga.py index 98b2222..db25348 100644 --- a/comic_dl/sites/kissmanga.py +++ b/comic_dl/sites/kissmanga.py @@ -1,18 +1,21 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import re -import os -import sys +from __future__ import unicode_literals +from __future__ import absolute_import +from __future__ import print_function +from re import search,sub,compile, findall +from os import path,makedirs +from sys import exit from bs4 import BeautifulSoup from downloader.universal import main as FileDownloader -import cfscrape -import logging +from cfscrape import create_scraper +from logging import debug, basicConfig, DEBUG def single_chapter(url, current_directory, logger): - scraper = cfscrape.create_scraper() + scraper = create_scraper() Page_Source = scraper.get(str(url)).content @@ -21,37 +24,38 @@ def single_chapter(url, current_directory, logger): meta = formatted.findAll('title') meta_data = list(str(meta).split('\n')) + # print(meta_data) try: Series_Name = str(meta_data[2]) except Exception as e: # print (e) - logging.debug("Error in Series Name : %s" % e) + debug("Error in Series Name : %s" % e) Series_Name = "Unkown Series" try: # Getting the Volume Number from the page source. volume_number = int( - str(re.search('Vol\.(.*)\ Ch', Page_Source).group(1)).strip()) + str(search('Vol\.(.*)\ Ch', Page_Source).group(1)).strip()) except Exception as e: - logging.debug("Error in Volume Number : %s" % e) + debug("Error in Volume Number : %s" % e) volume_number = '0' try: chapter_number = str(meta_data[3]) except Exception as e: - logging.debug("Error in Chapter Number : %s\nTrying Something else." % e) + debug("Error in Chapter Number : %s\nTrying Something else." % e) try: # Getting the Volume Number from the page source. - chapter_number = str(re.search('Ch\.(.*)\:', str(Page_Source)).group(1)).strip() + chapter_number = str(search('Ch\.(.*)\:', str(Page_Source)).group(1)).strip() except Exception as e: - logging.debug("Error in Chapter Number : %s" % e) + debug("Error in Chapter Number : %s" % e) chapter_number = str('0') - all_links = re.findall('lstImages.push\(\"(.*)\"\)\;', str(formatted)) - logging.debug("Image Links : %s" % all_links) + all_links = findall('lstImages.push\(\"(.*)\"\)\;', str(formatted)) + debug("Image Links : %s" % all_links) if volume_number == '0': # Some series don't seem to have volumes mentioned. Let's assume @@ -59,34 +63,41 @@ def single_chapter(url, current_directory, logger): Raw_File_Directory = str(Series_Name) + '/' + \ "Chapter " + str(chapter_number) else: - logging.debug("Found the Volume. Making a directory.") + debug("Found the Volume. Making a directory.") Raw_File_Directory = str(Series_Name) + '/' + "Volume " + \ str(volume_number) + '/' + "Chapter " + str(chapter_number) # Fix for "Special Characters" in The series name - File_Directory = re.sub( + File_Directory = sub( '[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) - Directory_path = os.path.normpath(File_Directory) + Directory_path = path.normpath(File_Directory) print ('\n') print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number)) print('{:^80}'.format('=====================================================================\n')) for elements in all_links: - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) - ddl_image = str(elements).strip() + sane_url = str(elements).replace("%3a",":").replace("%2f","/").replace("&imgmax=30000","").replace("https://images1-focus-opensocial.googleusercontent.com/gadgets/proxy?container=focus&gadget=a&no_expand=1&resize_h=0&rewriteMime=image%2F*&url=","") + # print(sane_url) + if not path.exists(File_Directory): + makedirs(File_Directory) + ddl_image = str(sane_url).strip() try: - File_Name_Final = str(re.search( + File_Name_Final = str(search( 's0/(.*)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:]) except Exception as e: - logging.debug("Error in File Name : %s" % e) - File_Name_Final = str(re.search( - 'title\=(.*)\_(\d+)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:]) + debug("Error in File Name : %s" % e) + try: + File_Name_Final = str(search( + 'title\=(.*)\_(\d+)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:]) + except Exception as e: + debug("Error inside Error : %s" % e) + File_Name_Final = str(ddl_image[-6:]) + # print(File_Name_Final) FileDownloader(File_Name_Final, Directory_path, ddl_image, logger) print('\n') @@ -95,7 +106,7 @@ def single_chapter(url, current_directory, logger): def whole_series(url, current_directory, logger): - scraper = cfscrape.create_scraper() + scraper = create_scraper() Page_Source = scraper.get(str(url)).content @@ -103,7 +114,7 @@ def whole_series(url, current_directory, logger): soup = BeautifulSoup(Page_Source, "html.parser") all_links = soup.findAll('table', {'class': 'listing'}) - logging.debug("Chapter Links : %s" % all_links) + debug("Chapter Links : %s" % all_links) for link in all_links: x = link.findAll('a') @@ -113,41 +124,46 @@ def whole_series(url, current_directory, logger): if "Manga" in ddl_image: final_url = "http://kissmanga.com" + ddl_image link_list.append(final_url) - logging.debug("%s added in the bag!" % final_url) + debug("%s added in the bag!" % final_url) if int(len(link_list)) == '0': print("Sorry, I couldn't bypass KissManga's Hooman check. Please try again in a few minutes.") - sys.exit() + exit() print("Total Chapters To Download : ", len(link_list)) for item in link_list: url = str(item) - logging.debug("Chapter Links : %s" % url) + debug("Chapter Links : %s" % url) single_chapter(url, current_directory, logger) def kissmanga_Url_Check(input_url, current_directory, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) - kissmanga_single_regex = re.compile( - 'https?://(?Pkissmanga.com)/Manga/(?P[\d\w-]+)?/((?P[Vol\-\d]+)|(.*)(?P[Ch\-\d]+))\-(?P[\d\w-]+)\?(?P[\=\d\w-]+)') - kissmanga_whole_regex = re.compile( + kissmanga_single_regex = compile( + 'https?://(?Pkissmanga.com)/Manga/(?P[\d\w-]+)?/((?P[Vol\-\d]+)|(.*)(?P[Ch\d\w-]+))\-(?P[\d\w-]+)\?(?P[\=\d\w-]+)') + kissmanga_whole_regex = compile( '^https?://(?Pkissmanga.com)/Manga/(?P[\d\w\-]+)?(\/|.)$') lines = input_url.split('\n') + # print(lines) for line in lines: - found = re.search(kissmanga_single_regex, line) + found = search(kissmanga_single_regex, line) + # print(found) if found: match = found.groupdict() if match['Chap_Name']: url = str(input_url) + # print("Here inside!") single_chapter(url, current_directory, logger) + # print("Passed it") else: pass - found = re.search(kissmanga_whole_regex, line) + found = search(kissmanga_whole_regex, line) + # print(found) if found: match = found.groupdict() if match['comic']: diff --git a/comic_dl/sites/mangafox.py b/comic_dl/sites/mangafox.py index 047544a..c8e5d8a 100644 --- a/comic_dl/sites/mangafox.py +++ b/comic_dl/sites/mangafox.py @@ -3,12 +3,10 @@ from __future__ import absolute_import from __future__ import print_function -import requests -import os -import re -import sys -import shutil -from bs4 import BeautifulSoup +from requests import get +from re import search,sub,compile, findall +from os import path,makedirs +from sys import exit from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.support.ui import WebDriverWait @@ -16,7 +14,7 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from downloader.universal import main as FileDownloader from six.moves import range -import logging +from logging import debug, basicConfig, DEBUG def create_driver(): @@ -31,21 +29,21 @@ def create_driver(): def single_chapter(driver,url,current_directory, logger): try: - Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + Series_Name = str(search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. except Exception as e: - logging.debug("Error in Series Name : %s\nTrying something else." % e) - Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + debug("Error in Series Name : %s\nTrying something else." % e) + Series_Name = str(search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. try: - volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories. + volume_number = "Volume " + str(search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories. except Exception as e: - logging.debug("Error in Volume Number : %s" % e) + debug("Error in Volume Number : %s" % e) volume_number = "Volume 01" try: - chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. + chapter_number = int(str(search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. except Exception as e: - logging.debug("Error in Chapter Number : %s\nTrying something else." % e) + debug("Error in Chapter Number : %s\nTrying something else." % e) chapter_number = 0 # Getting the chapter count from the URL itself for naming the folder/dicrectories in float. if volume_number == '0': @@ -53,9 +51,9 @@ def single_chapter(driver,url,current_directory, logger): else: Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number) - File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name + File_Directory = sub('[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name - Directory_path = os.path.normpath(File_Directory) + Directory_path = path.normpath(File_Directory) driver.get(url) @@ -65,7 +63,7 @@ def single_chapter(driver,url,current_directory, logger): ) except Exception as e: - logging.debug("Error in loading the webpage : %s\nScreenshot saved." % e) + debug("Error in loading the webpage : %s\nScreenshot saved." % e) driver.save_screenshot("error.png") print("Couldn't load the element. I'll try to move ahead in any case.") print('\n') @@ -75,27 +73,27 @@ def single_chapter(driver,url,current_directory, logger): elem = driver.find_element_by_xpath("//*") Page_Source = str(elem.get_attribute("outerHTML").encode('utf-8')) - First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden. + First_chapter_link = str(search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden. - current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories. + current_chapter_count = int(str(search('current_page\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories. - last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories. + last_chapter_count = int(str(search('total_pages\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories. print('\n') print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number)) print('{:^80}'.format('=====================================================================\n')) - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) for x in range(current_chapter_count,last_chapter_count+1): driver.refresh() File_Name_Final = str(x)+'.jpg' link_container = driver.find_element_by_xpath('//*[@id="image"]') - logging.debug("Link Container : %s" % link_container) + debug("Link Container : %s" % link_container) ddl_image = str(link_container.get_attribute('src')) - logging.debug("Image Link : %s" % ddl_image) + debug("Image Link : %s" % ddl_image) FileDownloader(File_Name_Final,Directory_path,ddl_image, logger) driver.find_element_by_xpath('//*[@id="top_bar"]/div/a[2]').click() @@ -109,7 +107,7 @@ def whole_series(url,current_directory, logger): print("Couldn't get the URL. Please report it on Github Repository.") try: - Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + Series_Name = str(search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories. except Exception as e: print('Check if the URL is correct or not. Report on Github.') @@ -119,17 +117,17 @@ def whole_series(url,current_directory, logger): } - response = requests.get(url, headers=headers) + response = get(url, headers=headers) Page_source = str(response.text.encode('utf-8')) try: chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v" - links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source) + links = findall('{0}(.*?)html'.format(chapter_link_format),Page_source) if len(links) == 0: chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c" #print chapter_link_format - links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source) + links = findall('{0}(.*?)html'.format(chapter_link_format),Page_source) except Exception as e: @@ -149,14 +147,14 @@ def whole_series(url,current_directory, logger): def mangafox_Url_Check(input_url,current_directory, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) - mangafox_single_regex = re.compile('https?://(?Pmangafox.me)/manga/(?P[\d\w-]+)(?P(/v\d+)|(.))/(?Pc\d+(\.\d)?)?/(?P\d+)?\.html') - mangafox_whole_regex = re.compile('^https?://(?Pmangafox.me)/manga/(?P[\d\w-]+)?|(\/)$') + mangafox_single_regex = compile('https?://(?Pmangafox.me)/manga/(?P[\d\w-]+)(?P(/v\d+)|(.))/(?Pc\d+(\.\d)?)?/(?P\d+)?\.html') + mangafox_whole_regex = compile('^https?://(?Pmangafox.me)/manga/(?P[\d\w-]+)?|(\/)$') lines = input_url.split('\n') for line in lines: - found = re.search(mangafox_single_regex, line) + found = search(mangafox_single_regex, line) if found: match = found.groupdict() if match['issue']: @@ -168,13 +166,13 @@ def mangafox_Url_Check(input_url,current_directory, logger): print(e) driver.quit() driver.quit() - sys.exit() + exit() else: pass - found = re.search(mangafox_whole_regex, line) + found = search(mangafox_whole_regex, line) if found: match = found.groupdict() if match['comic_series']: @@ -184,6 +182,6 @@ def mangafox_Url_Check(input_url,current_directory, logger): whole_series(url,current_directory, logger) except Exception as e: print(e) - sys.exit() + exit() else: pass diff --git a/comic_dl/sites/readcomic.py b/comic_dl/sites/readcomic.py index ebb446a..720a1a7 100644 --- a/comic_dl/sites/readcomic.py +++ b/comic_dl/sites/readcomic.py @@ -6,23 +6,23 @@ from __future__ import absolute_import from __future__ import print_function from builtins import str from downloader.universal import main as FileDownloader -import re -import sys -import cfscrape -import os +from re import search,sub,compile, findall +from os import path,makedirs +from sys import exit from bs4 import BeautifulSoup -import logging +from cfscrape import create_scraper +from logging import debug, basicConfig, DEBUG def readcomic_Url_Check(input_url, current_directory, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) - Issue_Regex = re.compile('https?://(?P[^/]+)/Comic/(?P[\d\w-]+)(?:/Issue-)?(?P\d+)?') - Annual_Regex = re.compile('https?://(?P[^/]+)/Comic/(?P[\d\w-]+)(?:/Annual-)?(?P\d+)?') + Issue_Regex = compile('https?://(?P[^/]+)/Comic/(?P[\d\w-]+)(?:/Issue-)?(?P\d+)?') + Annual_Regex = compile('https?://(?P[^/]+)/Comic/(?P[\d\w-]+)(?:/Annual-)?(?P\d+)?') lines = input_url.split('\n') for line in lines: - found = re.search(Issue_Regex, line) + found = search(Issue_Regex, line) if found: match = found.groupdict() if match['issue']: @@ -34,7 +34,7 @@ def readcomic_Url_Check(input_url, current_directory, logger): url = str(input_url) Whole_Series(url, current_directory, logger) - found = re.search(Annual_Regex, line) + found = search(Annual_Regex, line) if found: match = found.groupdict() @@ -49,11 +49,11 @@ def readcomic_Url_Check(input_url, current_directory, logger): if not found: print() 'Please Check Your URL one again!' - sys.exit() + exit() def Single_Issue(url, current_directory, logger): - scraper = cfscrape.create_scraper() + scraper = create_scraper() connection = scraper.get(url).content Series_Name_Splitter = url.split('/') @@ -61,48 +61,48 @@ def Single_Issue(url, current_directory, logger): Issue_Number_Splitter = str(Series_Name_Splitter[5]) Issue_Or_Annual_Split = str(Issue_Number_Splitter).split("?") Issue_Or_Annual = str(Issue_Or_Annual_Split[0]).replace("-", " ").strip() - reg = re.findall(r'[(\d)]+', Issue_Number_Splitter) + reg = findall(r'[(\d)]+', Issue_Number_Splitter) Issue_Number = str(reg[0]) Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + str(Issue_Or_Annual) - File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', + File_Directory = sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name - Directory_path = os.path.normpath(File_Directory) + Directory_path = path.normpath(File_Directory) print('\n') print('{:^80}'.format('=====================================================================\n')) print('{:^80}'.format('%s - %s') % (Series_Name, Issue_Or_Annual)) print('{:^80}'.format('=====================================================================\n')) - linksList = re.findall('lstImages.push\(\"(.*?)\"\)\;', str(connection)) - logging.debug("Image Links : %s" % linksList) + linksList = findall('lstImages.push\(\"(.*?)\"\)\;', str(connection)) + debug("Image Links : %s" % linksList) for link in linksList: - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) fileName = str(linksList.index(link)) + ".jpg" - # logging.debug("Name of File : %s" % fileName) + # debug("Name of File : %s" % fileName) FileDownloader(fileName, Directory_path, link, logger) def Whole_Series(url, current_directory, logger): - scraper = cfscrape.create_scraper() + scraper = create_scraper() connection = scraper.get(url).content soup = BeautifulSoup(connection, "html.parser") - # logging.debug("Soup : %s" % soup) + # debug("Soup : %s" % soup) all_links = soup.findAll('table', {'class': 'listing'}) - # logging.debug("Issue Links : %s" % all_links) + # debug("Issue Links : %s" % all_links) for link in all_links: - # logging.debug("link : %s" % link) + # debug("link : %s" % link) x = link.findAll('a') - logging.debug("Actual Link : %s" % x) + debug("Actual Link : %s" % x) for a in x: url = "http://readcomiconline.to" + a['href'] - logging.debug("Final URL : %s" % url) + debug("Final URL : %s" % url) Single_Issue(url, current_directory=current_directory, logger=logger) print("Finished Downloading") diff --git a/comic_dl/sites/yomanga.py b/comic_dl/sites/yomanga.py index 3e49648..11e11d2 100644 --- a/comic_dl/sites/yomanga.py +++ b/comic_dl/sites/yomanga.py @@ -4,61 +4,62 @@ from __future__ import absolute_import from __future__ import print_function import requests -import re -import os -import sys from more_itertools import unique_everseen +from re import search,sub,compile, findall +from os import path,makedirs +from sys import exit from bs4 import BeautifulSoup from downloader.cookies_required import main as FileDownloader -import logging +from logging import debug, basicConfig, DEBUG +from requests import Session,cookies def single_chapter(url, current_directory, logger): if not url: print("Couldn't get the URL. Please report it on Github Repository.") - sys.exit(0) + exit(0) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' } - s = requests.Session() + s = Session() response = s.get(url, headers=headers) tasty_cookies = response.cookies Page_source = str(response.text.encode('utf-8')) - Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + Series_Name = str(search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. try: - chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. + chapter_number = int(str(search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. except Exception as e: - logging.debug("Error in Chapter Number : %s" % e) + debug("Error in Chapter Number : %s" % e) chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) - File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name - Directory_path = os.path.normpath(File_Directory) + File_Directory = sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name + Directory_path = path.normpath(File_Directory) - ddl_image_list = re.findall('comics(.*?)\"', Page_source) + ddl_image_list = findall('comics(.*?)\"', Page_source) ddl_list = list(unique_everseen(ddl_image_list)) - logging.debug("Image Links : %s" % ddl_list) + debug("Image Links : %s" % ddl_list) print('\n') print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number)) print('{:^80}'.format('=====================================================================\n')) for i in ddl_list: - if not os.path.exists(File_Directory): - os.makedirs(File_Directory) + if not path.exists(File_Directory): + makedirs(File_Directory) ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','') - logging.debug("Image Download Link : %s" % ddl_image) + debug("Image Download Link : %s" % ddl_image) - File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip() + File_Name_Final = str(findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip() FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image, logger) print('\n') @@ -73,36 +74,36 @@ def whole_series(url, current_directory, logger): } - s = requests.Session() + s = Session() response = s.get(url, headers=headers) tasty_cookies = response.cookies Page_source = str(response.text.encode('utf-8')) - Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + Series_Name = str(search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. soup = BeautifulSoup(Page_source, 'html.parser') chapter_text = soup.findAll('div',{'class':'title'}) - logging.debug("Chapter Text : %s" % chapter_text) + debug("Chapter Text : %s" % chapter_text) for link in chapter_text: x = link.findAll('a') for a in x: url = a['href'] - logging.debug("Chapter URL : %s" % url) + debug("Chapter URL : %s" % url) single_chapter(url, current_directory, logger) def yomanga_Url_Check(input_url, current_directory, logger): if logger == "True": - logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG) + basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=DEBUG) - yomanga_single_regex = re.compile('https?://(?Pyomanga.co)/reader/read/(?P[\d\w-]+)/en/(?P\d+)?/(?P\d+)?()|(/page/(?P\d+)?)') - yomanga_whole_regex = re.compile('^https?://(?Pyomanga.co)/reader/(?Pseries)?/(?P[\d\w-]+)?(\/|.)$') + yomanga_single_regex = compile('https?://(?Pyomanga.co)/reader/read/(?P[\d\w-]+)/en/(?P\d+)?/(?P\d+)?()|(/page/(?P\d+)?)') + yomanga_whole_regex = compile('^https?://(?Pyomanga.co)/reader/(?Pseries)?/(?P[\d\w-]+)?(\/|.)$') lines = input_url.split('\n') for line in lines: - found = re.search(yomanga_single_regex, line) + found = search(yomanga_single_regex, line) if found: match = found.groupdict() if match['Chapter']: @@ -113,7 +114,7 @@ def yomanga_Url_Check(input_url, current_directory, logger): - found = re.search(yomanga_whole_regex, line) + found = search(yomanga_whole_regex, line) if found: match = found.groupdict() if match['comic']: diff --git a/comic_dl/version.py b/comic_dl/version.py index b774ca7..32ffefa 100644 --- a/comic_dl/version.py +++ b/comic_dl/version.py @@ -4,4 +4,4 @@ Date Format : YY/MM/DD ''' -__version__ = '2017.01.22' +__version__ = '2017.02.16' diff --git a/docs/Changelog.md b/docs/Changelog.md index 8a8a0bb..f0393af 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -15,4 +15,6 @@ - Added a YouTube Tutorial for the script [2016.12.30] - Site support for readcomiconlin.to [2017.01.02] - Added `Verbose Logging` [2017.01.22] -- Fixed chapter count error in Kissmanga [2017.01.22] \ No newline at end of file +- Fixed chapter count error in Kissmanga [2017.01.22] +- Fixed #4 [2017.02.16] +- Optimized Imports [2017.02.16] \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index b7308bf..836d500 100644 --- a/docs/index.md +++ b/docs/index.md @@ -187,10 +187,9 @@ You can check the changelog [**`HERE`**](http://comic-dl.readthedocs.io/en/lates If your're planning to open an issue for the script or ask for a new feature or anything that requires opening an Issue, then please do keep these things in mind. ### Reporting Issues -If you're about to report some issue with the script, then please do include these things : -* The command your entered. Yes, with the URL -* The output of that command. You can simply copy the text from the terminal/command prompt and paste it. Make sure you put that inside inside `` (tilde). -* Your Operating System and python version. +If you're going to report an issue, then please run the script again with the "-v or --verbose" argument. It should generate a file in the same directory, with the name "Error Log.log". Copy that log file's data and post it on a [Gist](https://gist.github.com/) and share that gist's link while reporting the issue here. Make sure you **EDIT OUT YOUR USERNAME AND PASSWORD**, if supplied within the command. + +If you don't include the verbose log, there are chances it'll take time to fix the issue(s) you're having. ### Suggesting A Feature If you're here to make suggestions, please follow the basic syntax to post a request : diff --git a/setup.cfg b/setup.cfg index 224a779..7c7e55f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [metadata] -description-file = README.md \ No newline at end of file +description-file = ReadMe.md \ No newline at end of file diff --git a/setup.py b/setup.py index 21348b3..5dc5164 100644 --- a/setup.py +++ b/setup.py @@ -1,55 +1,26 @@ -#!/usr/bin/env python -# coding: utf-8 +from distutils.core import setup -import os -import sys -import comic_dl - -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - -if sys.argv[-1] == 'publish': - os.system('python setup.py sdist upload') - sys.exit() - -readme = open('README.txt').read() +readme = open('ReadMe.md').read() history = open('Changelog.md').read() -exec(compile(open('comic_dl/version.py').read(), - 'comic_dl/version.py', 'exec')) - setup( - name='comic-dl', - version=__version__, - description='Comic-dl is a command line tool to download Comics and Manga from various Manga and Comic sites easily.', - long_description=readme + '\n\n' + history, - author='Xonshiz', - author_email='xonshiz@psychoticelites.com', - url='https://github.com/Xonshiz/comic-dl', - download_url = 'https://codeload.github.com/Xonshiz/comic-dl/legacy.tar.gz/v2016.11.26(1)', - packages=[ - 'comic_dl', - 'comic_dl.sites', - 'comic_dl.downloader', - ], - package_dir={'comic_dl': 'comic_dl'}, - include_package_data=True, - install_requires=["selenium", + name = 'comic_dl', + packages = ['comic_dl','comic_dl.sites','comic_dl.downloader'], # this must be the same as the name above + install_requires=["selenium", "requests", "more_itertools", + "cfscrape", "bs4" ], - entry_points={ - 'console_scripts': [ - 'comic_dl = comic_dl:main' - ], - }, - license="MIT Licence", - zip_safe=False, - keywords = ['comic-dl', 'cli', 'comic downloader','manga downloader','mangafox','batoto','kissmanga'], - classifiers=[ + version = '2017.01.22', + description = 'Comic-dl is a command line tool to download Comics and Manga from various Manga and Comic sites easily.', + long_description=readme + '\n\n' + history, + author = 'Xonshiz', + author_email = 'xonshiz@psychoticelites.com', + url='https://github.com/Xonshiz/comic-dl', + download_url = 'https://codeload.github.com/Xonshiz/comic-dl/legacy.tar.gz/v2016.11.26(1)', + keywords = ['comic-dl', 'cli', 'comic downloader','manga downloader','mangafox','batoto','kissmanga','comic naver'], + classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'Intended Audience :: End Users/Desktop', @@ -65,5 +36,4 @@ setup( 'Operating System :: OS Independent', 'Topic :: Multimedia :: Graphics' ], - #test_suite='tests', ) \ No newline at end of file