merged Py 2 and 3

Check changelog for more info.
This commit is contained in:
Xonshiz 2016-12-28 18:10:04 +05:30
parent c3a0d81fae
commit 3d20f2e3ef
40 changed files with 388 additions and 1897 deletions

View File

@ -8,4 +8,6 @@
- Argument priority updated [2016.11.22]
- Site support for comic.naver.com [2016.11.26]
- Support for Python 3 [2016.11.26]
- Removed Kissmanga PhantomJS dependency [2016.12.23]
- Removed Kissmanga PhantomJS dependency [2016.12.23]
- Support for Python 2 and 3 has been merged [2016.12.28]
- Updated PhantomJS dependency in [supported sites](https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md) [2016.12.28]

View File

@ -2,7 +2,7 @@
[PhantomJS] = Denotes that these sites need PhantomJS to be able to download content.
* [Mangafox](http://mangafox.me/)
* [Mangafox](http://mangafox.me/) [PhantomJS]
* [YoManga](http://yomanga.co/)
* [GoManga](http://gomanga.co/)
* [Batoto](http://bato.to/) [PhantomJS]

View File

@ -1,68 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import argparse
from honcho import url_checker
from version import __version__
def version():
print('\n')
print('{:^80}'.format('Current Version : %s')%(__version__))
print('\n')
print('{:^80}'.format("More info : comic-dl -h"))
def usage():
print('\n')
print('{:^80}'.format('################################################'))
print('{:^80}'.format('Comic-DL Usage'))
print('{:^80}'.format('################################################\n'))
print('\n')
print('{:^80}'.format('Author : Xonshiz | Version : %s')%(__version__))
print('{:^80}'.format('-------------------------------------------------\n'))
print("Comic-dl is a command line tool to download manga and comics from various comic and manga sites.")
print("Using the script is pretty simple and should be easy for anyone familiar with a command line/shell.")
print('\n')
print('{:^80}'.format("USAGE : comic-dl -i <URL to comic>"))
print('\n')
print("Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ",'\n')
print("Available Arguments : ")
print('{:^80}'.format("-i,--input : Specifies the Input URL"))
print('{:^80}'.format("-h : Prints this help menu"))
print('{:^80}'.format("--version : Prints the current version and exits"))
print('{:^80}'.format("-a,--about : Shows the info about this script and exits."))
print('{:^80}'.format("-u,--username : Indicates username for a website."))
print('{:^80}'.format("-p,--password : Indicates password for a website."))
def main(argv):
current_directory = str(os.getcwd())
parser = argparse.ArgumentParser(description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
parser.add_argument('--version',action='store_true',help='Shows version and exits' )
parser.add_argument('-a','--about',action='store_true',help='Shows the info regarding this script' )
parser.add_argument('-i','--input',nargs=1,help='Inputs the URL to comic')
parser.add_argument('-p','--password',nargs=1,help='Indicates password for a website',default='None')
parser.add_argument('-u','--username',nargs=1,help='Indicates username for a website',default='None')
args = parser.parse_args()
if args.version:
version()
sys.exit()
if args.about:
usage()
sys.exit()
if args.input:
input_url = str(args.input[0]).strip()
User_Password = str(args.password[0].strip())
User_Name = str(args.username[0].strip())
url_checker(input_url,current_directory,User_Name,User_Password)
sys.exit()
if __name__ == "__main__":
main(sys.argv[1:])

View File

@ -1,72 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This module serves as a universal downloader for downloading Images.
This module supports handling of cookies.
This module needs a File_Name for the file to be downloaded,
Directory_path which is the directory path where you want to download the file,
tasty_cookies refer to the `cookies` you fetch from your session.
ddl_image is the direct link to the image itself.
This module uses `requests` library to achieve the handling of cookies.
"""
from __future__ import absolute_import
from __future__ import print_function
import os
import requests
import shutil
from downloader.universal import main as FileDownloader
def main(File_Name_Final,Directory_path,tasty_cookies,ddl_image):
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
pass
if not os.path.isfile(File_Check_Path):
print('[Comic-dl] Downloading : ',File_Name_Final)
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
print("Couldn't download file from : ",ddl_image)
pass
try:
shutil.move(File_Path,Directory_path)
except Exception as e:
print(e,'\n')
pass
def with_referer(File_Name_Final,Directory_path,tasty_cookies,ddl_image,referer):
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
pass
if not os.path.isfile(File_Check_Path):
print('[Comic-dl] Downloading : ',File_Name_Final)
headers = {'Referer': referer}
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies,headers=headers)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
print("Couldn't download file from : ",ddl_image)
pass
try:
shutil.move(File_Path,Directory_path)
except Exception as e:
print(e,'\n')
pass
if __name__ == '__main__':
main()

View File

@ -1,46 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This module serves as a universal downloader for downloading Images.
Note that this module does not support handling of cookies, for that you
need to refer to `cookies_required` module.
This module needs a File_Name for the file to be downloaded,
Directory_path which is the directory path where you want to download the file,
ddl_image is the direct link to the image itself.
"""
from __future__ import absolute_import
from __future__ import print_function
import os
import urllib
import shutil
import urllib.request
#from urllib import URLError
import sys
def main(File_Name_Final,Directory_path,ddl_image):
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
pass
if not os.path.isfile(File_Check_Path):
print('[Comic-dl] Downloading : ',File_Name_Final)
urllib.request.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
urllib.request.urlretrieve(ddl_image, File_Name_Final)
#filename, headers = urllib.urlretrieve(ddl_image,File_Name_Final)
#print "File Name : ",filename
#print "Headers : ",headers
File_Path = os.path.normpath(File_Name_Final)
try:
shutil.move(File_Path,Directory_path)
except Exception as e:
print(e,'\n')
os.remove(File_Path)
pass
if __name__ == '__main__':
main()

View File

@ -1,51 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""This python module decides which URL should be assigned to which other module from the site package.
"""
from __future__ import absolute_import
from __future__ import print_function
#import urllib as urllib2
from urllib.parse import urlparse
from sites.yomanga import yomanga_Url_Check
from sites.gomanga import gomanga_Url_Check
from sites.mangafox import mangafox_Url_Check
from sites.batoto import batoto_Url_Check
from sites.kissmanga import kissmanga_Url_Check
from sites.comic_naver import comic_naver_Url_Check
from downloader import universal,cookies_required
def url_checker(input_url, current_directory, User_Name, User_Password):
domain = urlparse(input_url).netloc
if domain in ['mangafox.me']:
mangafox_Url_Check(input_url, current_directory)
elif domain in ['yomanga.co']:
yomanga_Url_Check(input_url, current_directory)
elif domain in ['gomanga.co']:
gomanga_Url_Check(input_url, current_directory)
elif domain in ['bato.to']:
batoto_Url_Check(input_url, current_directory, User_Name, User_Password)
elif domain in ['kissmanga.com']:
kissmanga_Url_Check(input_url, current_directory)
elif domain in ['comic.naver.com']:
comic_naver_Url_Check(input_url, current_directory)
elif domain in ['']:
print('You need to specify at least 1 URL. Please run : comic-dl -h')
else:
print("%s is unsupported at the moment. Please request on Github repository."%(domain))

View File

@ -1,2 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

View File

@ -1,439 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import re
import os
import sys
from more_itertools import unique_everseen
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from downloader.universal import main as FileDownloader
from six.moves import range
"""Bato serves the chapters in 2 ways :
1.) All the images on 1 page by default. However, only some of the series/chapters have this thing.
2.) Traditional 1 page 1 image thing.
We can check which kind of page this is by checking the "NEXT ARROW" kind of thing to move to the next page.
batoto_login function open a fresh instance of selenium webdriver and logs the user in by sending user name and password to batoto login page.
The session is maintained and that instance of selenium webdriver is used to browse the pages to maintain the session and see the pages without
any restriction.
The script should show error if the user is trying the access the page not visible to logged out users and quit. Some instances of pages to replicate these validations :
1.) Page not available to logged out users : http://bato.to/reader#1f018238b7e945ed
2.) Single Page with all images : http://bato.to/reader#cb22bfed948294cb
3.) Traditional Manga Page : http://bato.to/reader#e5fc75f0ca34bcd5
There are small portions in the code block to explain certain scenarios, so devs. please go through them if you're thinking of changing something.
The directory contains the name of the Scanlation group as well, because the script currently downloads jus the english chapters, but in future it
will download all the languages available. So, this one as a reminded (lol) and for consistency. Oh, let's not forget the group's hardwork as well.
Currently there is no way/hack to view all the images in one page manually or to bypass the not logged in user restriction.
This script pretty much does everything fine. However, should you encounter a bug/problem, please mention in the github issue.
All bug fixes and patches are welcomed.
"""
def create_driver():
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) ' \
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/39.0.2171.95 Safari/537.36'
driver = webdriver.PhantomJS(
desired_capabilities=desired_capabilities,
service_args=['--load-images=no'])
return driver
def single_chapter(driver, url, current_directory, User_Name, User_Password):
"""This little block checks whether the user has provided the arguments for password or username.
If the user has provided something, then check that both UserName and Password has been provided.
Filling either of them won't work. If the user has provided both, the username and password, send
that info to batoto_login function which will create a logged in session and return us that instance
of the selenium webdriver.
"""
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
print("Username or Password cannot be empty.")
sys.exit()
print("Authenticating Your Username and Password ...")
batoto_login(driver, User_Name, User_Password)
print("Logged in successfully")
"""Selenium was navigating to the new url, but the old page still had its resources loaded, which made selenium
think that the page was already loaded. So, it started taking 'Stale Elements' and threw the same exception.
So, refreshing the page seemed to do the job.
"""
driver.get(url)
driver.refresh()
"""Let's wait till the 'comic wrap' element has been loaded. This element contains the actual
image for the comic. This element doesn't load in the beginning, so Selenium could be tricked
into the false alarm that the page has been loaded. Half loaded page will seem like fully loaded
page and selenium will start executing the search operation, which will cause the script to break
in case everything 'Comic Image' has been loaded.
"""
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "comic_wrap"))
)
except Exception as e:
pass
page_title = str(driver.title)
"""Batoto doesn't provide shit in the source code of the web page. Hence, we'll be using the outer HTML
to scrap all the info we need.
"""
elem = driver.find_element_by_xpath("//*")
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
"""As mentioned above, batoto won't let the user watch/read the older entries/chapters if you're not logged in.
So, if any user tries to download any such page, let's show the user the error and close the instance of selenium
webdriver, and quit the script entirely without wasting anymore resources.
"""
try:
access_check = driver.find_element_by_xpath(
'//*[@id="reader"]/div/span').text
if access_check in [
"ERROR [10030]: The thing you're looking for is unavailable. It may be due to:"]:
print("You cannot access this page. You'll need to log in to download this page.")
driver.quit()
sys.exit()
else:
pass
except Exception as e:
pass
try:
# Getting the Series Name from the <title></title> tags of the web
# page.
Series_Name = str(
re.search(
'^(.*)\ \-',
page_title).group(1)).strip().replace(
'_',
' ').title()
except Exception as e:
Series_Name = "Unkown Series"
try:
# Getting the Series Name from the <title></title> tags of the web
# page.
volume_number = int(
str(re.search('vol (\d+)', page_title).group(1)).strip())
except Exception as e:
volume_number = '0'
try:
# Getting the Series Name from the <title></title> tags of the web
# page.
chapter_number = int(
str(re.search('ch (\d+)', page_title).group(1)).strip())
except Exception as e:
chapter_number = '0'
try:
# Used to find translation group's name from the 'Drop Down Menu'.
Group_Name_Finder = str(driver.find_element_by_xpath(
'//*[@id="reader"]/div[1]/ul/li[3]/select').text).replace("/", " ").strip()
except Exception as e:
# Some entries on batoto don't have a name. So, if we get to any such
# occassion, let's be prepared.
Group_Name_Finder = str('No Group')
try:
# This is a check which tells us if this particular web page is a
# traditional way or one page all image thing.
page_list = driver.find_element_by_id('page_select')
except Exception as e:
# If we cannot find the 'page_select' element, it means that this
# chapter is showing all the images in one page.
page_list = False
if volume_number == 0:
# Some series don't seem to have volumes mentioned. Let's assume
# they're 0.
Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + \
str(chapter_number) + " [" + str(Group_Name_Finder) + " ]"
else:
Raw_File_Directory = str(Series_Name) + '/' + "Volume " + str(volume_number) + \
'/' + "Chapter " + str(chapter_number) + " [" + str(Group_Name_Finder) + " ]"
# Fix for "Special Characters" in The series name
File_Directory = re.sub(
'[^A-Za-z0-9\-\.\'\#\/ \[\]]+',
'',
Raw_File_Directory)
Directory_path = os.path.normpath(File_Directory)
print('\n')
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
print('{:^80}'.format('=====================================================================\n'))
if page_list: # If batoto is serving 1 image per page, we'll be using this part.
"""We will be grabbing all the values in the drop down menu that has page numbers and take the very last value
and extract the integer from it and use it to know what is the last page number for this chapter.
Batoto follow a very simple linking syntax for serving the images, so let's exploit that to get the images
without hitting batoto for each and every page of the chapter.
URL Syntax : http://img.bato.to/comics/2016/11/02/s/read58196cffb13dd/img000001.jpg
Look at the last number for the image. Manipulate that and we have what we need.
"""
items_list = page_list.find_elements_by_tag_name("option")
for item in items_list:
list_of_pages = item.text
lst_pag = str(list_of_pages)
Last_Page_number = int(
str(re.search('(\d+)', lst_pag).group(1)).strip())
img_link = driver.find_element_by_id('comic_page').get_attribute('src')
for i in range(1, Last_Page_number + 1):
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
if len(str(i)) == 1:
ddl_image = str(img_link).replace(
'img000001', 'img00000%s') % (i)
else:
ddl_image = str(img_link).replace(
'img000001', 'img0000%s') % (i)
File_Name_Final = str(
i).strip() + "." + str(re.search('\d\.(.*?)$', ddl_image).group(1)).strip()
FileDownloader(File_Name_Final, Directory_path, ddl_image)
print('\n')
print("Completed downloading ", Series_Name, ' - ', chapter_number)
# driver.close()
# If Batoto is serving all the images in one page, we'll follow this block.
if not page_list:
"""Since all the image links are in one place, we don't have to rack our brains. Grab all the links
to the images and download them one by one.
"""
soup = BeautifulSoup(Page_Source, "html.parser")
Image_Links = soup.findAll('div', {'style': 'text-align:center;'})
for link in Image_Links:
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
x = link.findAll('img')
for a in x:
ddl_image = a['src']
File_Name_Final = str(
re.search(
'img0000(\d+)\.([jpg]|[png])',
ddl_image).group(1)).strip() + "." + str(
re.search(
'\d\.(.*?)$',
ddl_image).group(1)).strip()
FileDownloader(File_Name_Final, Directory_path, ddl_image)
print('\n')
print("Completed Downloading ", Series_Name, ' - ', chapter_number)
def whole_series(driver, url, current_directory, User_Name, User_Password):
# print "Whole Series : ",url
"""This little block checks whether the user has provided the arguments for password or username.
If the user has provided something, then check that both UserName and Password has been provided.
Filling either of them won't work. If the user has provided both, the username and password, send
that info to batoto_login function which will create a logged in session and return us that instance
of the selenium webdriver.
"""
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
print("Username or Password cannot be empty.")
sys.exit()
print("Authenticating Your Username and Password ...")
batoto_login(driver, User_Name, User_Password)
print("Logged in successfully")
driver.get(url)
"""Let's wait till the 'content' element has been loaded. This element contains the list of all the
chapters related to a particular manga. This element doesn't load in the beginning, so Selenium could
be tricked into the false alarm that the page has been loaded. Half loaded page will seem like fully
loaded page and selenium will start executing the search operation, which will cause the script to
break in case everything 'Comic Image' has been loaded.
"""
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "content"))
)
except Exception as e:
pass
elem = driver.find_element_by_xpath("//*")
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
"""Basic idea is to grab all the 'a href' links found in the `row lang_English chapter_row` class
and put them inside a lit. Later, for each element of the list, call the 'single_chapter' function to
do the rest of the job.
"""
link_list = []
soup = BeautifulSoup(Page_Source, "html.parser")
all_links = soup.findAll(
'tr', {'class': 'row lang_English chapter_row'})
for link in all_links:
x = link.findAll('a')
for a in x:
ddl_image = a['href']
if "reader" in ddl_image:
link_list.append(ddl_image)
print("Total Chapters To Download : ", len(link_list))
for item in link_list:
url = str(item)
User_Name = 'N'
User_Password = 'N'
single_chapter(
driver,
url,
current_directory,
User_Name,
User_Password)
else:
# If the user hasn't supplied any logging information, we'll do this.
driver.get(url)
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "content"))
)
except Exception as e:
pass
elem = driver.find_element_by_xpath("//*")
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
link_list = []
soup = BeautifulSoup(Page_Source, "html.parser")
all_links = soup.findAll(
'tr', {'class': 'row lang_English chapter_row'})
for link in all_links:
x = link.findAll('a')
for a in x:
ddl_image = a['href']
if "reader" in ddl_image:
link_list.append(ddl_image)
print("Total Chapters To Download : ", len(link_list))
#print(link_list)
for x in link_list:
url = str(x)
User_Name = 'N'
User_Password = 'N'
single_chapter(
driver,
url,
current_directory,
User_Name,
User_Password)
def batoto_login(driver, User_Name, User_Password):
driver.get(
"https://bato.to/forums/index.php?app=core&module=global&section=login")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "ips_password"))
)
except Exception as e:
# driver.save_screenshot('Single_exception.png')
pass
LoggedOut_Title = driver.title
driver.find_element_by_id('ips_username').send_keys(User_Name)
driver.find_element_by_id('ips_password').send_keys(User_Password)
driver.find_element_by_xpath('//*[@id="login"]/fieldset[2]/input').click()
LoggedIn_Title = driver.title
"""A little check to see whether we've logged in or not. Comparing the titles of the before and after logging
pages.
"""
if str(LoggedIn_Title).strip() == str(LoggedOut_Title).strip():
print("Couldn't log you in. Please check your credentials.")
driver.quit()
sys.exit()
def batoto_Url_Check(input_url, current_directory, User_Name, User_Password):
batoto_single_regex = re.compile(
'https?://(?P<host>bato.to)/reader\#(?P<extra_characters>[\d\w-]+)?(\/|.)')
batoto_whole_regex = re.compile(
'^https?://(?P<host>bato.to)/comic/\_/comics/(?P<comic>[\d\w-]+)?(\/|.)$')
#print "Inside"
lines = input_url.split('\n')
for line in lines:
found = re.search(batoto_single_regex, line)
if found:
match = found.groupdict()
if match['extra_characters']:
url = str(input_url)
driver = create_driver()
single_chapter(
driver,
url,
current_directory,
User_Name,
User_Password)
driver.quit()
else:
pass
found = re.search(batoto_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
driver = create_driver()
whole_series(
driver,
url,
current_directory,
User_Name,
User_Password)
driver.quit()
else:
pass

View File

@ -1,114 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import print_function
import re
import sys
import os
import requests
from downloader.cookies_required import with_referer as FileDownloader
from six.moves import range
from six.moves import input
def single_chapter(url,current_directory):
s = requests.Session()
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
req = s.get(url,headers=headers)
cookies = req.cookies
page_source_1 = str(req.text.encode('utf-8'))
try:
#Korean_Name = re.search(r'<h2>(.*?)<span class="wrt_nm">',str(page_source)).group(1)
Series_Name = re.search(r'titleId=(\d+)',url).group(1)
except Exception as e:
Series_Name = "Unknown"
try:
#chapter_number = int(re.search(r'\<span\ class\=\"total\"\>(.\d+)\<\/span\>',page_source_1).group(1))
chapter_number = re.search(r'&no=(\d+)',url).group(1)
except Exception as e:
print(e)
chapter_number = 0
img_regex = r'http://imgcomic.naver.net/webtoon/\d+/\d+/.+?\.(?:jpg|png|gif|bmp|JPG|PNG|GIF|BMP)'
img_links = list(re.findall(img_regex,page_source_1))
Raw_File_Directory = str(Series_Name) +'/'+"Chapter "+str(chapter_number)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
print('\n')
print('{:^80}'.format('=====================================================================\n'))
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for x,items in enumerate(img_links):
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
FileDownloader(str(x+1)+str(items[-4:]),Directory_path,cookies,items,url)
print('\n')
print("Completed downloading ",Series_Name)
def whole_series(url, current_directory):
s = requests.Session()
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
req = s.get(url,headers=headers)
cookies = req.cookies
page_source_1 = req.text.encode('utf-8')
titleId = re.search(r'titleId=(\d+)',url).group(1)
try:
first_link = int(re.search(r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %(titleId),page_source_1).group(1))
except Exception as e:
first_link = eval(input("Please Enter the Last chapter of the series : "))
if not first_link:
print("You failed to enter the last chapter count. Script will exit now.")
sys.exit()
for x in range(1,int(first_link)):
Chapter_Url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" %(titleId,x)
single_chapter(Chapter_Url,current_directory)
def comic_naver_Url_Check(input_url, current_directory):
comic_naver_single_regex = re.compile(
'https?://(?P<host>comic.naver.com)/webtoon/(?P<detail>detail.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)')
comic_naver_whole_regex = re.compile(
'https?://(?P<host>comic.naver.com)/webtoon/(?P<list>list.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)')
lines = input_url.split('\n')
for line in lines:
found = re.search(comic_naver_single_regex, line)
if found:
match = found.groupdict()
if match['detail']:
url = str(input_url)
single_chapter(url, current_directory)
else:
pass
found = re.search(comic_naver_whole_regex, line)
if found:
match = found.groupdict()
if match['list']:
url = str(input_url)
whole_series(url, current_directory)
else:
pass

View File

@ -1,128 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import requests
import re
import os
import sys
from more_itertools import unique_everseen
from bs4 import BeautifulSoup
from downloader.cookies_required import main as FileDownloader
def single_chapter(url,current_directory):
if not url:
print("Couldn't get the URL. Please report it on Github Repository.")
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
try:
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
ddl_list = list(unique_everseen(ddl_image_list))
print('\n')
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for i in ddl_list:
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
print('\n')
print("Completed downloading ",Series_Name)
def whole_series(url,current_directory):
if not url:
print("Couldn't get the URL. Please report it on Github Repository.")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
soup = BeautifulSoup(Page_source, 'html.parser')
chapter_text = soup.findAll('div',{'class':'title'})
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url,current_directory)
def gomanga_Url_Check(input_url,current_directory):
gomanga_single_regex = re.compile('https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
gomanga_whole_regex = re.compile('^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(gomanga_single_regex, line)
if found:
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url,current_directory)
else:
pass
found = re.search(gomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url,current_directory)
else:
pass

View File

@ -1,145 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import os
import sys
from bs4 import BeautifulSoup
from downloader.universal import main as FileDownloader
import cfscrape
def single_chapter(url, current_directory):
scraper = cfscrape.create_scraper()
Page_Source = scraper.get(str(url)).content
formatted = BeautifulSoup(Page_Source, "lxml")
meta = formatted.findAll('title')
meta_data = list(str(meta).split('\n'))
try:
Series_Name = str(meta_data[2])
except Exception as e:
print (e)
Series_Name = "Unkown Series"
try:
# Getting the Volume Number from the page source.
volume_number = int(
str(re.search('Vol\.(.*)\ Ch', Page_Source).group(1)).strip())
except Exception as e:
volume_number = '0'
try:
chapter_number = int(str(meta_data[3]))
except Exception as e:
try:
# Getting the Volume Number from the page source.
chapter_number = int(
str(re.search('Ch\.(.*)\:', Page_Source).group(1)).strip())
except Exception as e:
chapter_number = '0'
all_links = re.findall('lstImages.push\(\"(.*)\"\)\;', str(formatted))
if volume_number == '0':
# Some series don't seem to have volumes mentioned. Let's assume
# they're 0.
Raw_File_Directory = str(Series_Name) + '/' + \
"Chapter " + str(chapter_number)
else:
Raw_File_Directory = str(Series_Name) + '/' + "Volume " + \
str(volume_number) + '/' + "Chapter " + str(chapter_number)
# Fix for "Special Characters" in The series name
File_Directory = re.sub(
'[^A-Za-z0-9\-\.\'\#\/ \[\]]+',
'',
Raw_File_Directory)
Directory_path = os.path.normpath(File_Directory)
print ('\n')
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for elements in all_links:
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = str(elements).strip()
try:
File_Name_Final = str(re.search(
's0/(.*)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:])
except Exception as e:
File_Name_Final = str(re.search(
'title\=(.*)\_(\d+)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:])
FileDownloader(File_Name_Final, Directory_path, ddl_image)
print('\n')
print("Completed downloading ", Series_Name, ' - ', chapter_number)
def whole_series(url, current_directory):
scraper = cfscrape.create_scraper()
Page_Source = scraper.get(str(url)).content
link_list = []
soup = BeautifulSoup(Page_Source, "html.parser")
all_links = soup.findAll('table', {'class': 'listing'})
for link in all_links:
x = link.findAll('a')
for a in x:
ddl_image = a['href']
if "Manga" in ddl_image:
final_url = "http://kissmanga.com" + ddl_image
link_list.append(final_url)
if int(len(link_list)) == '0':
print("Sorry, I couldn't bypass KissManga's Hooman check. Please try again in a few minutes.")
sys.exit()
print("Total Chapters To Download : ", len(link_list))
for item in link_list:
url = str(item)
single_chapter(url, current_directory)
def kissmanga_Url_Check(input_url, current_directory):
kissmanga_single_regex = re.compile(
'https?://(?P<host>kissmanga.com)/Manga/(?P<Series_Name>[\d\w-]+)?/((?P<Volume>[Vol\-\d]+)|(.*)(?P<Chapter>[Ch\-\d]+))\-(?P<Chap_Name>[\d\w-]+)\?(?P<id>[\=\d\w-]+)')
kissmanga_whole_regex = re.compile(
'^https?://(?P<host>kissmanga.com)/Manga/(?P<comic>[\d\w\-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(kissmanga_single_regex, line)
if found:
match = found.groupdict()
if match['Chap_Name']:
url = str(input_url)
single_chapter(url, current_directory)
else:
pass
found = re.search(kissmanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url, current_directory)
else:
pass

View File

@ -1,180 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import requests
import os
import re
import sys
import shutil
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from downloader.universal import main as FileDownloader
from six.moves import range
def create_driver():
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) ' \
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/39.0.2171.95 Safari/537.36'
driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities,service_args=['--load-images=no'])
return driver
def single_chapter(driver,url,current_directory):
try:
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
except Exception as e:
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
try:
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories.
except Exception as e:
volume_number = "Volume 01"
try:
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
if volume_number == '0':
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) # Some series don't seem to have volumes mentioned. Let's assume they're 0.
else:
Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
driver.get(url)
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "image"))
)
except Exception as e:
driver.save_screenshot("error.png")
print("Couldn't load the element. I'll try to move ahead in any case.")
print('\n')
print("I took a screenshot, please attach it in the issue you open in the repository.")
pass
elem = driver.find_element_by_xpath("//*")
Page_Source = str(elem.get_attribute("outerHTML").encode('utf-8'))
First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden.
current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
print('\n')
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
for x in range(current_chapter_count,last_chapter_count+1):
driver.refresh()
File_Name_Final = str(x)+'.jpg'
link_container = driver.find_element_by_xpath('//*[@id="image"]')
ddl_image = str(link_container.get_attribute('src'))
FileDownloader(File_Name_Final,Directory_path,ddl_image)
driver.find_element_by_xpath('//*[@id="top_bar"]/div/a[2]').click()
print('\n')
print("Completed downloading ",Series_Name,' - ',chapter_number)
def whole_series(url,current_directory):
if not url:
print("Couldn't get the URL. Please report it on Github Repository.")
try:
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
except Exception as e:
print('Check if the URL is correct or not. Report on Github.')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
response = requests.get(url, headers=headers)
Page_source = str(response.text.encode('utf-8'))
try:
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v"
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
if len(links) == 0:
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c"
#print chapter_link_format
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
except Exception as e:
print("Error : ",e,'\n',"Please report this error on Github repository.")
driver = create_driver()
for x in links:
chapter_link = str(str(chapter_link_format)+str(x)+"html").strip()
try:
single_chapter(driver,chapter_link,current_directory)
except Exception as e:
print(e)
driver.quit()
driver.quit()
def mangafox_Url_Check(input_url,current_directory):
mangafox_single_regex = re.compile('https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
mangafox_whole_regex = re.compile('^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(mangafox_single_regex, line)
if found:
match = found.groupdict()
if match['issue']:
url = str(input_url)
driver = create_driver()
try:
single_chapter(driver,url,current_directory)
except Exception as e:
print(e)
driver.quit()
driver.quit()
sys.exit()
else:
pass
found = re.search(mangafox_whole_regex, line)
if found:
match = found.groupdict()
if match['comic_series']:
url = str(input_url)
#driver = create_driver()
try:
whole_series(url,current_directory)
except Exception as e:
print(e)
sys.exit()
else:
pass

View File

@ -1,121 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import requests
import re
import os
import sys
from more_itertools import unique_everseen
from bs4 import BeautifulSoup
from downloader.cookies_required import main as FileDownloader
def single_chapter(url,current_directory):
if not url:
print("Couldn't get the URL. Please report it on Github Repository.")
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
try:
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
ddl_list = list(unique_everseen(ddl_image_list))
print('\n')
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for i in ddl_list:
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
print('\n')
print("Completed downloading ",Series_Name)
def whole_series(url,current_directory):
if not url:
print("Couldn't get the URL. Please report it on Github Repository.")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
soup = BeautifulSoup(Page_source, 'html.parser')
chapter_text = soup.findAll('div',{'class':'title'})
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url,current_directory)
def yomanga_Url_Check(input_url,current_directory):
yomanga_single_regex = re.compile('https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
yomanga_whole_regex = re.compile('^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(yomanga_single_regex, line)
if found:
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url,current_directory)
else:
pass
found = re.search(yomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url,current_directory)
else:
pass

View File

@ -1,7 +0,0 @@
'''
Date Format : YY/MM/DD
'''
__version__ = '2016.11.26'

View File

@ -1,73 +1,52 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from honcho import url_checker
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import argparse
from honcho import url_checker
from version import __version__
def version():
print '\n'
print '{:^80}'.format('Current Version : %s') % (__version__)
print '\n'
print '{:^80}'.format("More info : comic-dl -h")
print('\n')
print('{:^80}'.format('Current Version : %s')%(__version__))
print('\n')
print('{:^80}'.format("More info : comic-dl -h"))
def usage():
print '\n'
print '{:^80}'.format('################################################')
print '{:^80}'.format('Comic-DL Usage')
print '{:^80}'.format('################################################\n')
print '\n'
print '{:^80}'.format('Author : Xonshiz | Version : %s') % (__version__)
print '{:^80}'.format('-------------------------------------------------\n')
print "Comic-dl is a command line tool to download manga and comics from various comic and manga sites."
print "Using the script is pretty simple and should be easy for anyone familiar with a command line/shell."
print '\n'
print '{:^80}'.format("USAGE : comic-dl -i <URL to comic>")
print '\n'
print "Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ", '\n'
print "Available Arguments : "
print '{:^80}'.format("-i,--input : Specifies the Input URL")
print '{:^80}'.format("-h : Prints this help menu")
print '{:^80}'.format("--version : Prints the current version and exits")
print '{:^80}'.format("-a,--about : Shows the info about this script and exits.")
print '{:^80}'.format("-u,--username : Indicates username for a website.")
print '{:^80}'.format("-p,--password : Indicates password for a website.")
print('\n')
print('{:^80}'.format('################################################'))
print('{:^80}'.format('Comic-DL Usage'))
print('{:^80}'.format('################################################\n'))
print('\n')
print('{:^80}'.format('Author : Xonshiz | Version : %s')%(__version__))
print('{:^80}'.format('-------------------------------------------------\n'))
print("Comic-dl is a command line tool to download manga and comics from various comic and manga sites.")
print("Using the script is pretty simple and should be easy for anyone familiar with a command line/shell.")
print('\n')
print('{:^80}'.format("USAGE : comic-dl -i <URL to comic>"))
print('\n')
print("Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ",'\n')
print("Available Arguments : ")
print('{:^80}'.format("-i,--input : Specifies the Input URL"))
print('{:^80}'.format("-h : Prints this help menu"))
print('{:^80}'.format("--version : Prints the current version and exits"))
print('{:^80}'.format("-a,--about : Shows the info about this script and exits."))
print('{:^80}'.format("-u,--username : Indicates username for a website."))
print('{:^80}'.format("-p,--password : Indicates password for a website."))
def main(argv):
current_directory = str(os.getcwd())
parser = argparse.ArgumentParser(
description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
parser.add_argument(
'--version',
action='store_true',
help='Shows version and exits')
parser.add_argument(
'-a',
'--about',
action='store_true',
help='Shows the info regarding this script')
parser.add_argument(
'-i',
'--input',
nargs=1,
help='Inputs the URL to comic')
parser.add_argument(
'-p',
'--password',
nargs=1,
help='Indicates password for a website',
default='None')
parser.add_argument(
'-u',
'--username',
nargs=1,
help='Indicates username for a website',
default='None')
parser = argparse.ArgumentParser(description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
parser.add_argument('--version',action='store_true',help='Shows version and exits' )
parser.add_argument('-a','--about',action='store_true',help='Shows the info regarding this script' )
parser.add_argument('-i','--input',nargs=1,help='Inputs the URL to comic')
parser.add_argument('-p','--password',nargs=1,help='Indicates password for a website',default='None')
parser.add_argument('-u','--username',nargs=1,help='Indicates username for a website',default='None')
args = parser.parse_args()
@ -82,8 +61,8 @@ def main(argv):
input_url = str(args.input[0]).strip()
User_Password = str(args.password[0].strip())
User_Name = str(args.username[0].strip())
url_checker(input_url, current_directory, User_Name, User_Password)
url_checker(input_url,current_directory,User_Name,User_Password)
sys.exit()
if __name__ == "__main__":
main(sys.argv[1:])
main(sys.argv[1:])

View File

@ -11,70 +11,62 @@ ddl_image is the direct link to the image itself.
This module uses `requests` library to achieve the handling of cookies.
"""
from __future__ import absolute_import
from __future__ import print_function
import os
import requests
import shutil
from downloader.universal import main as FileDownloader
def main(File_Name_Final, Directory_path, tasty_cookies, ddl_image):
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
def main(File_Name_Final,Directory_path,tasty_cookies,ddl_image):
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print '[Comic-dl] File Exist! Skipping ', File_Name_Final, '\n'
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
pass
if not os.path.isfile(File_Check_Path):
print '[Comic-dl] Downloading : ', File_Name_Final
response = requests.get(ddl_image, stream=True, cookies=tasty_cookies)
if not os.path.isfile(File_Check_Path):
print('[Comic-dl] Downloading : ',File_Name_Final)
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
print "Couldn't download file from : ", ddl_image
print("Couldn't download file from : ",ddl_image)
pass
try:
shutil.move(File_Path, Directory_path)
shutil.move(File_Path,Directory_path)
except Exception as e:
print e, '\n'
print(e,'\n')
pass
def with_referer(
File_Name_Final,
Directory_path,
tasty_cookies,
ddl_image,
referer):
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
def with_referer(File_Name_Final,Directory_path,tasty_cookies,ddl_image,referer):
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print '[Comic-dl] File Exist! Skipping ', File_Name_Final, '\n'
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
pass
if not os.path.isfile(File_Check_Path):
print '[Comic-dl] Downloading : ', File_Name_Final
headers = {'Referer': referer}
response = requests.get(
ddl_image,
stream=True,
cookies=tasty_cookies,
headers=headers)
if not os.path.isfile(File_Check_Path):
print('[Comic-dl] Downloading : ',File_Name_Final)
headers = {'Referer': referer}
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies,headers=headers)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
print "Couldn't download file from : ", ddl_image
print("Couldn't download file from : ",ddl_image)
pass
try:
shutil.move(File_Path, Directory_path)
shutil.move(File_Path,Directory_path)
except Exception as e:
print e, '\n'
print(e,'\n')
pass
if __name__ == '__main__':
main()
main()

View File

@ -9,32 +9,38 @@ Directory_path which is the directory path where you want to download the file,
ddl_image is the direct link to the image itself.
"""
from __future__ import absolute_import
from __future__ import print_function
import os
import urllib2
import urllib
import shutil
from urllib2 import URLError
import urllib
#from urllib import URLError
import sys
def main(File_Name_Final, Directory_path, ddl_image):
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
def main(File_Name_Final,Directory_path,ddl_image):
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print '[Comic-dl] File Exist! Skipping ', File_Name_Final, '\n'
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
pass
if not os.path.isfile(File_Check_Path):
print '[Comic-dl] Downloading : ', File_Name_Final
urllib.urlretrieve(ddl_image, File_Name_Final)
if not os.path.isfile(File_Check_Path):
print('[Comic-dl] Downloading : ',File_Name_Final)
urllib.request.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
urllib.request.urlretrieve(ddl_image, File_Name_Final)
#filename, headers = urllib.urlretrieve(ddl_image,File_Name_Final)
#print "File Name : ",filename
#print "Headers : ",headers
File_Path = os.path.normpath(File_Name_Final)
try:
shutil.move(File_Path, Directory_path)
shutil.move(File_Path,Directory_path)
except Exception as e:
print e, '\n'
print(e,'\n')
os.remove(File_Path)
pass
if __name__ == '__main__':
main()
main()

View File

@ -5,44 +5,49 @@
"""This python module decides which URL should be assigned to which other module from the site package.
"""
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
#import urllib as urllib2
from sites.yomanga import yomanga_Url_Check
from sites.gomanga import gomanga_Url_Check
from sites.mangafox import mangafox_Url_Check
from sites.batoto import batoto_Url_Check
from sites.kissmanga import kissmanga_Url_Check
from sites.comic_naver import comic_naver_Url_Check
from downloader import universal, cookies_required
import urllib2
from downloader import universal,cookies_required
from urllib.parse import urlparse
def url_checker(input_url, current_directory, User_Name, User_Password):
domain = urllib2.urlparse.urlparse(input_url).netloc
domain = urlparse(input_url).netloc
if domain in ['mangafox.me']:
mangafox_Url_Check(input_url, current_directory)
pass
elif domain in ['yomanga.co']:
yomanga_Url_Check(input_url, current_directory)
pass
elif domain in ['gomanga.co']:
gomanga_Url_Check(input_url, current_directory)
pass
elif domain in ['bato.to']:
batoto_Url_Check(
input_url,
current_directory,
User_Name,
User_Password)
pass
batoto_Url_Check(input_url, current_directory, User_Name, User_Password)
elif domain in ['kissmanga.com']:
kissmanga_Url_Check(input_url, current_directory)
pass
elif domain in ['comic.naver.com']:
comic_naver_Url_Check(input_url, current_directory)
pass
elif domain in ['']:
print 'You need to specify at least 1 URL. Please run : comic-dl -h'
print('You need to specify at least 1 URL. Please run : comic-dl -h')
else:
print "%s is unsupported at the moment. Please request on Github repository." % (domain)
print("%s is unsupported at the moment. Please request on Github repository."%(domain))

View File

@ -1,2 +1,2 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-

View File

@ -1,6 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import re
import os
import sys
@ -12,6 +14,7 @@ from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from downloader.universal import main as FileDownloader
from six.moves import range
"""Bato serves the chapters in 2 ways :
@ -63,12 +66,12 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
print "Username or Password cannot be empty."
print("Username or Password cannot be empty.")
sys.exit()
print "Authenticating Your Username and Password ..."
print("Authenticating Your Username and Password ...")
batoto_login(driver, User_Name, User_Password)
print "Logged in successfully"
print("Logged in successfully")
"""Selenium was navigating to the new url, but the old page still had its resources loaded, which made selenium
think that the page was already loaded. So, it started taking 'Stale Elements' and threw the same exception.
So, refreshing the page seemed to do the job.
@ -107,7 +110,7 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
if access_check in [
"ERROR [10030]: The thing you're looking for is unavailable. It may be due to:"]:
print "You cannot access this page. You'll need to log in to download this page."
print("You cannot access this page. You'll need to log in to download this page.")
driver.quit()
sys.exit()
@ -183,9 +186,9 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
Directory_path = os.path.normpath(File_Directory)
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
print('\n')
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
print('{:^80}'.format('=====================================================================\n'))
if page_list: # If batoto is serving 1 image per page, we'll be using this part.
"""We will be grabbing all the values in the drop down menu that has page numbers and take the very last value
@ -224,8 +227,8 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
i).strip() + "." + str(re.search('\d\.(.*?)$', ddl_image).group(1)).strip()
FileDownloader(File_Name_Final, Directory_path, ddl_image)
print '\n'
print "Completed downloading ", Series_Name, ' - ', chapter_number
print('\n')
print("Completed downloading ", Series_Name, ' - ', chapter_number)
# driver.close()
# If Batoto is serving all the images in one page, we'll follow this block.
@ -253,8 +256,8 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
ddl_image).group(1)).strip()
FileDownloader(File_Name_Final, Directory_path, ddl_image)
print '\n'
print "Completed Downloading ", Series_Name, ' - ', chapter_number
print('\n')
print("Completed Downloading ", Series_Name, ' - ', chapter_number)
def whole_series(driver, url, current_directory, User_Name, User_Password):
@ -268,12 +271,12 @@ def whole_series(driver, url, current_directory, User_Name, User_Password):
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
print "Username or Password cannot be empty."
print("Username or Password cannot be empty.")
sys.exit()
print "Authenticating Your Username and Password ..."
print("Authenticating Your Username and Password ...")
batoto_login(driver, User_Name, User_Password)
print "Logged in successfully"
print("Logged in successfully")
driver.get(url)
"""Let's wait till the 'content' element has been loaded. This element contains the list of all the
@ -310,7 +313,7 @@ def whole_series(driver, url, current_directory, User_Name, User_Password):
link_list.append(ddl_image)
print "Total Chapters To Download : ", len(link_list)
print("Total Chapters To Download : ", len(link_list))
for item in link_list:
url = str(item)
@ -349,8 +352,8 @@ def whole_series(driver, url, current_directory, User_Name, User_Password):
if "reader" in ddl_image:
link_list.append(ddl_image)
print "Total Chapters To Download : ", len(link_list)
# print link_list
print("Total Chapters To Download : ", len(link_list))
#print(link_list)
for x in link_list:
url = str(x)
@ -388,7 +391,7 @@ def batoto_login(driver, User_Name, User_Password):
"""
if str(LoggedIn_Title).strip() == str(LoggedOut_Title).strip():
print "Couldn't log you in. Please check your credentials."
print("Couldn't log you in. Please check your credentials.")
driver.quit()
sys.exit()
@ -399,7 +402,7 @@ def batoto_Url_Check(input_url, current_directory, User_Name, User_Password):
'https?://(?P<host>bato.to)/reader\#(?P<extra_characters>[\d\w-]+)?(\/|.)')
batoto_whole_regex = re.compile(
'^https?://(?P<host>bato.to)/comic/\_/comics/(?P<comic>[\d\w-]+)?(\/|.)$')
#print "Inside"
lines = input_url.split('\n')
for line in lines:
found = re.search(batoto_single_regex, line)
@ -423,6 +426,7 @@ def batoto_Url_Check(input_url, current_directory, User_Name, User_Password):
match = found.groupdict()
if match['comic']:
url = str(input_url)
driver = create_driver()
whole_series(
driver,

View File

@ -2,93 +2,87 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import print_function
import re
import sys
import os
import requests
from downloader.cookies_required import with_referer as FileDownloader
from six.moves import range
from six.moves import input
"""Thanks to puilp0502 for his repo : https://github.com/puilp0502/comic-downloader
I got stuck at getting the images to download from the links. Then I went through puilp's script to see that I was missing referer in my requests module.
"""
def single_chapter(url, current_directory):
def single_chapter(url,current_directory):
s = requests.Session()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
req = s.get(url, headers=headers)
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
req = s.get(url,headers=headers)
cookies = req.cookies
page_source_1 = req.text.encode('utf-8')
page_source_1 = str(req.text.encode('utf-8'))
try:
#Korean_Name = re.search(r'<h2>(.*?)<span class="wrt_nm">',str(page_source)).group(1)
Series_Name = re.search(r'titleId=(\d+)', url).group(1)
Series_Name = re.search(r'titleId=(\d+)',url).group(1)
except Exception as e:
Series_Name = "Unknown"
try:
#chapter_number = int(re.search(r'\<span\ class\=\"total\"\>(.\d+)\<\/span\>',page_source_1).group(1))
chapter_number = re.search(r'&no=(\d+)', url).group(1)
chapter_number = re.search(r'&no=(\d+)',url).group(1)
except Exception as e:
print e
print(e)
chapter_number = 0
img_regex = r'http://imgcomic.naver.net/webtoon/\d+/\d+/.+?\.(?:jpg|png|gif|bmp|JPG|PNG|GIF|BMP)'
img_links = list(re.findall(img_regex, page_source_1))
img_links = list(re.findall(img_regex,page_source_1))
Raw_File_Directory = str(Series_Name) +'/'+"Chapter "+str(chapter_number)
Raw_File_Directory = str(Series_Name).decode(
'utf-8') + '/' + "Chapter " + str(chapter_number)
# Fix for "Special Characters" in The series name
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
print '\n'
print '{:^80}'.format('=====================================================================\n')
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
print('\n')
print('{:^80}'.format('=====================================================================\n'))
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for x, items in enumerate(img_links):
for x,items in enumerate(img_links):
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
FileDownloader(
str(x + 1) + str(items[-4:]), Directory_path, cookies, items, url)
FileDownloader(str(x+1)+str(items[-4:]),Directory_path,cookies,items,url)
print('\n')
print("Completed downloading ",Series_Name)
print '\n'
print "Completed downloading ", Series_Name
def whole_series(url, current_directory):
s = requests.Session()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
req = s.get(url, headers=headers)
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
req = s.get(url,headers=headers)
cookies = req.cookies
page_source_1 = req.text.encode('utf-8')
titleId = re.search(r'titleId=(\d+)', url).group(1)
titleId = re.search(r'titleId=(\d+)',url).group(1)
try:
first_link = int(
re.search(
r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %
(titleId),
page_source_1).group(1))
first_link = int(re.search(r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %(titleId),page_source_1).group(1))
except Exception as e:
first_link = input("Please Enter the Last chapter of the series : ")
first_link = eval(input("Please Enter the Last chapter of the series : "))
if not first_link:
print "You failed to enter the last chapter count. Script will exit now."
print("You failed to enter the last chapter count. Script will exit now.")
sys.exit()
for x in range(1,int(first_link)):
Chapter_Url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" %(titleId,x)
single_chapter(Chapter_Url,current_directory)
for x in range(1, int(first_link)):
Chapter_Url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" % (
titleId, x)
single_chapter(Chapter_Url, current_directory)
def comic_naver_Url_Check(input_url, current_directory):
@ -106,7 +100,7 @@ def comic_naver_Url_Check(input_url, current_directory):
if match['detail']:
url = str(input_url)
single_chapter(url, current_directory)
else:
pass

View File

@ -1,146 +1,103 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import requests
import re
import os
import sys
from more_itertools import unique_everseen
from more_itertools import unique_everseen
from bs4 import BeautifulSoup
from downloader.cookies_required import main as FileDownloader
def single_chapter(url, current_directory):
def single_chapter(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
print("Couldn't get the URL. Please report it on Github Repository.")
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
# Getting the Series Name from the URL itself for naming the
# folder/dicrectories.
Series_Name = str(
re.search(
'\/read\/(.*?)/',
url).group(1)).strip().replace(
'_',
' ').title()
try:
# Getting the chapter count from the URL itself for naming the
# folder/dicrectories in integer.
chapter_number = int(str(re.search(
'0\/(.*?)/', url).group(1)).strip().replace('0', '').replace('/', ''))
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
Raw_File_Directory = str(Series_Name) + '/' + \
"Chapter " + str(chapter_number)
# Fix for "Special Characters" in The series name
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
ddl_list = list(unique_everseen(ddl_image_list))
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
print('\n')
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for i in ddl_list:
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://gomanga.co/reader/content/comics" + \
str(i).replace('"', '').replace('\\', '')
os.makedirs(File_Directory)
ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
File_Name_Final = str(
re.findall(
'\/(\d+)\.[jpg]|[png]',
i)).replace(
"[",
"").replace(
"]",
"").replace(
"'",
"").replace(
",",
"").strip() + "." + str(
re.findall(
'\d\.(.*?)$',
str(i))).replace(
",",
"").replace(
"[",
"").replace(
"]",
"").replace(
"'",
"").strip()
FileDownloader(
File_Name_Final,
Directory_path,
tasty_cookies,
ddl_image)
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
print '\n'
print "Completed downloading ", Series_Name
print('\n')
print("Completed downloading ",Series_Name)
def whole_series(url, current_directory):
def whole_series(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
print("Couldn't get the URL. Please report it on Github Repository.")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
# Getting the Series Name from the URL itself for naming the
# folder/dicrectories.
Series_Name = str(
re.search(
'\/series\/(.*?)/',
url).group(1)).strip().replace(
'_',
' ').title()
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
soup = BeautifulSoup(Page_source, 'html.parser')
chapter_text = soup.findAll('div', {'class': 'title'})
chapter_text = soup.findAll('div',{'class':'title'})
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url, current_directory)
single_chapter(url,current_directory)
def gomanga_Url_Check(input_url, current_directory):
gomanga_single_regex = re.compile(
'https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
gomanga_whole_regex = re.compile(
'^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
def gomanga_Url_Check(input_url,current_directory):
gomanga_single_regex = re.compile('https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
gomanga_whole_regex = re.compile('^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
@ -149,15 +106,23 @@ def gomanga_Url_Check(input_url, current_directory):
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url, current_directory)
single_chapter(url,current_directory)
else:
pass
found = re.search(gomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url, current_directory)
whole_series(url,current_directory)
else:
pass

View File

@ -10,18 +10,22 @@ import cfscrape
def single_chapter(url, current_directory):
scraper = cfscrape.create_scraper()
Page_Source = scraper.get(str(url)).content
soup = BeautifulSoup(Page_Source, "html.parser")
meta = soup.findAll('title')
meta_data = list(str(meta).split('\\n'))
formatted = BeautifulSoup(Page_Source, "lxml")
meta = formatted.findAll('title')
meta_data = list(str(meta).split('\n'))
try:
Series_Name = str(meta_data[2])
except Exception as e:
print (e)
Series_Name = "Unkown Series"
try:
@ -42,8 +46,8 @@ def single_chapter(url, current_directory):
except Exception as e:
chapter_number = '0'
all_links = re.findall('lstImages.push\(\"(.*)\"\)\;', Page_Source)
all_links = re.findall('lstImages.push\(\"(.*)\"\)\;', str(formatted))
if volume_number == '0':
# Some series don't seem to have volumes mentioned. Let's assume
# they're 0.
@ -61,9 +65,9 @@ def single_chapter(url, current_directory):
Directory_path = os.path.normpath(File_Directory)
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
print ('\n')
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for elements in all_links:
if not os.path.exists(File_Directory):
@ -78,8 +82,8 @@ def single_chapter(url, current_directory):
'title\=(.*)\_(\d+)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:])
FileDownloader(File_Name_Final, Directory_path, ddl_image)
print '\n'
print "Completed downloading ", Series_Name, ' - ', chapter_number
print('\n')
print("Completed downloading ", Series_Name, ' - ', chapter_number)
def whole_series(url, current_directory):
@ -103,10 +107,10 @@ def whole_series(url, current_directory):
link_list.append(final_url)
if int(len(link_list)) == '0':
print "Sorry, I couldn't bypass KissManga's Hooman check. Please try again in a few minutes."
print("Sorry, I couldn't bypass KissManga's Hooman check. Please try again in a few minutes.")
sys.exit()
print "Total Chapters To Download : ", len(link_list)
print("Total Chapters To Download : ", len(link_list))
for item in link_list:
url = str(item)

View File

@ -1,90 +1,56 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import requests
import os
import re
import sys
import urllib2
import urllib
import shutil
from bs4 import BeautifulSoup
from urllib2 import URLError
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from downloader.universal import main as FileDownloader
from six.moves import range
def create_driver():
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) ' \
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/39.0.2171.95 Safari/537.36'
driver = webdriver.PhantomJS(
desired_capabilities=desired_capabilities,
service_args=['--load-images=no'])
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/39.0.2171.95 Safari/537.36'
driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities,service_args=['--load-images=no'])
return driver
def single_chapter(driver, url, current_directory):
def single_chapter(driver,url,current_directory):
try:
# Getting the Series Name from the URL itself for naming the
# folder/dicrectories.
Series_Name = str(
re.search(
'manga\/(.*?)/v',
url).group(1)).strip().replace(
'_',
' ').title()
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
except Exception as e:
# Getting the Series Name from the URL itself for naming the
# folder/dicrectories.
Series_Name = str(
re.search(
'manga\/(.*?)/c',
url).group(1)).strip().replace(
'_',
' ').title()
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
try:
# Getting the volume count from the URL itself for naming the
# folder/dicrectories.
volume_number = "Volume " + \
str(re.search('v(.*?)/c', url).group(1)).strip()
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories.
except Exception as e:
volume_number = "Volume 01"
try:
# Getting the chapter count from the URL itself for naming the
# folder/dicrectories in integer.
chapter_number = int(
str(re.search('\/c(.*?)/\d', url).group(1)).strip())
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
# Getting the chapter count from the URL itself for naming the
# folder/dicrectories in float.
chapter_number = float(
str(re.search('\/c(.*?)/\d', url).group(1)).strip())
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
if volume_number == '0':
# Some series don't seem to have volumes mentioned. Let's assume
# they're 0.
Raw_File_Directory = str(Series_Name) + '/' + \
"Chapter " + str(chapter_number)
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) # Some series don't seem to have volumes mentioned. Let's assume they're 0.
else:
Raw_File_Directory = str(
Series_Name) + '/' + str(volume_number) + '/' + "Chapter " + str(chapter_number)
# Fix for "Special Characters" in The series name
File_Directory = re.sub(
'[^A-Za-z0-9\-\.\'\#\/ \[\]]+',
'',
Raw_File_Directory)
Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
driver.get(url)
@ -93,109 +59,91 @@ def single_chapter(driver, url, current_directory):
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "image"))
)
except Exception as e:
driver.save_screenshot("error.png")
print "Couldn't load the element. I'll try to move ahead in any case."
print '\n'
print "I took a screenshot, please attach it in the issue you open in the repository."
print("Couldn't load the element. I'll try to move ahead in any case.")
print('\n')
print("I took a screenshot, please attach it in the issue you open in the repository.")
pass
elem = driver.find_element_by_xpath("//*")
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
Page_Source = str(elem.get_attribute("outerHTML").encode('utf-8'))
First_chapter_link = str(
re.search(
'http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg',
Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden.
# Getting the last chapter number from the URL itself for naming the
# folder/dicrectories.
current_chapter_count = int(
str(re.search('current_page\=(.*?)\;', Page_Source).group(1)).strip())
# Getting the last chapter number from the URL itself for naming the
# folder/dicrectories.
last_chapter_count = int(
str(re.search('total_pages\=(.*?)\;', Page_Source).group(1)).strip())
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden.
current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
print('\n')
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
for x in range(current_chapter_count, last_chapter_count + 1):
for x in range(current_chapter_count,last_chapter_count+1):
driver.refresh()
File_Name_Final = str(x) + '.jpg'
File_Name_Final = str(x)+'.jpg'
link_container = driver.find_element_by_xpath('//*[@id="image"]')
ddl_image = str(link_container.get_attribute('src'))
FileDownloader(File_Name_Final, Directory_path, ddl_image)
FileDownloader(File_Name_Final,Directory_path,ddl_image)
driver.find_element_by_xpath('//*[@id="top_bar"]/div/a[2]').click()
print '\n'
print "Completed downloading ", Series_Name, ' - ', chapter_number
print('\n')
print("Completed downloading ",Series_Name,' - ',chapter_number)
def whole_series(url, current_directory):
def whole_series(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
print("Couldn't get the URL. Please report it on Github Repository.")
try:
# Getting the Series Name from the URL itself for naming the
# folder/dicrectories.
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip()
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
except Exception as e:
print 'Check if the URL is correct or not. Report on Github.'
print('Check if the URL is correct or not. Report on Github.')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
response = requests.get(url, headers=headers)
Page_source = str(response.text.encode('utf-8'))
try:
chapter_link_format = "http://mangafox.me/manga/" + \
str(Series_Name) + "/v"
links = re.findall(
'{0}(.*?)html'.format(chapter_link_format),
Page_source)
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v"
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
if len(links) == 0:
chapter_link_format = "http://mangafox.me/manga/" + \
str(Series_Name) + "/c"
# print chapter_link_format
links = re.findall(
'{0}(.*?)html'.format(chapter_link_format),
Page_source)
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c"
#print chapter_link_format
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
except Exception as e:
print "Error : ", e, '\n', "Please report this error on Github repository."
print("Error : ",e,'\n',"Please report this error on Github repository.")
driver = create_driver()
for x in links:
chapter_link = str(str(chapter_link_format) + str(x) + "html").strip()
chapter_link = str(str(chapter_link_format)+str(x)+"html").strip()
try:
single_chapter(driver, chapter_link, current_directory)
single_chapter(driver,chapter_link,current_directory)
except Exception as e:
print e
print(e)
driver.quit()
driver.quit()
def mangafox_Url_Check(input_url, current_directory):
mangafox_single_regex = re.compile(
'https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
mangafox_whole_regex = re.compile(
'^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
def mangafox_Url_Check(input_url,current_directory):
mangafox_single_regex = re.compile('https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
mangafox_whole_regex = re.compile('^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
lines = input_url.split('\n')
for line in lines:
@ -206,15 +154,17 @@ def mangafox_Url_Check(input_url, current_directory):
url = str(input_url)
driver = create_driver()
try:
single_chapter(driver, url, current_directory)
single_chapter(driver,url,current_directory)
except Exception as e:
print e
print(e)
driver.quit()
driver.quit()
sys.exit()
else:
pass
found = re.search(mangafox_whole_regex, line)
if found:
match = found.groupdict()
@ -222,9 +172,9 @@ def mangafox_Url_Check(input_url, current_directory):
url = str(input_url)
#driver = create_driver()
try:
whole_series(url, current_directory)
whole_series(url,current_directory)
except Exception as e:
print e
print(e)
sys.exit()
else:
pass

View File

@ -1,144 +1,96 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import requests
import re
import os
import sys
from more_itertools import unique_everseen
from more_itertools import unique_everseen
from bs4 import BeautifulSoup
from downloader.cookies_required import main as FileDownloader
def single_chapter(url, current_directory):
def single_chapter(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
print("Couldn't get the URL. Please report it on Github Repository.")
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
# Getting the Series Name from the URL itself for naming the
# folder/dicrectories.
Series_Name = str(
re.search(
'\/read\/(.*?)/',
url).group(1)).strip().replace(
'_',
' ').title()
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
try:
# Getting the chapter count from the URL itself for naming the
# folder/dicrectories in integer.
chapter_number = int(str(re.search(
'0\/(.*?)/', url).group(1)).strip().replace('0', '').replace('/', ''))
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
Raw_File_Directory = str(Series_Name) + '/' + \
"Chapter " + str(chapter_number)
# Fix for "Special Characters" in The series name
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
Directory_path = os.path.normpath(File_Directory)
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
ddl_list = list(unique_everseen(ddl_image_list))
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
print('\n')
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
print('{:^80}'.format('=====================================================================\n'))
for i in ddl_list:
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://yomanga.co/reader/content/comics" + \
str(i).replace('"', '').replace('\\', '')
os.makedirs(File_Directory)
ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
print('\n')
print("Completed downloading ",Series_Name)
File_Name_Final = str(
re.findall(
'\/(\d+)\.[jpg]|[png]',
i)).replace(
"[",
"").replace(
"]",
"").replace(
"'",
"").replace(
",",
"").strip() + "." + str(
re.findall(
'\d\.(.*?)$',
str(i))).replace(
",",
"").replace(
"[",
"").replace(
"]",
"").replace(
"'",
"").strip()
FileDownloader(
File_Name_Final,
Directory_path,
tasty_cookies,
ddl_image)
print '\n'
print "Completed downloading ", Series_Name
def whole_series(url, current_directory):
def whole_series(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
print("Couldn't get the URL. Please report it on Github Repository.")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
# Getting the Series Name from the URL itself for naming the
# folder/dicrectories.
Series_Name = str(
re.search(
'\/series\/(.*?)/',
url).group(1)).strip().replace(
'_',
' ').title()
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
soup = BeautifulSoup(Page_source, 'html.parser')
chapter_text = soup.findAll('div', {'class': 'title'})
chapter_text = soup.findAll('div',{'class':'title'})
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url, current_directory)
def yomanga_Url_Check(input_url, current_directory):
yomanga_single_regex = re.compile(
'https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
yomanga_whole_regex = re.compile(
'^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
single_chapter(url,current_directory)
def yomanga_Url_Check(input_url,current_directory):
yomanga_single_regex = re.compile('https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
yomanga_whole_regex = re.compile('^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(yomanga_single_regex, line)
@ -146,15 +98,24 @@ def yomanga_Url_Check(input_url, current_directory):
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url, current_directory)
single_chapter(url,current_directory)
else:
pass
found = re.search(yomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url, current_directory)
whole_series(url,current_directory)
else:
pass

View File

@ -4,4 +4,4 @@ Date Format : YY/MM/DD
'''
__version__ = '2016.12.23'
__version__ = '2016.11.28'

View File

@ -8,4 +8,6 @@
- Argument priority updated [2016.11.22]
- Site support for comic.naver.com [2016.11.26]
- Support for Python 3 [2016.11.26]
- Removed Kissmanga PhantomJS dependency [2016.12.23]
- Removed Kissmanga PhantomJS dependency [2016.12.23]
- Support for Python 2 and 3 has been merged [2016.12.28]
- Updated PhantomJS dependency in [supported sites](https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md) [2016.12.28]

View File

@ -2,7 +2,7 @@
[PhantomJS] = Denotes that these sites need PhantomJS to be able to download content.
* [Mangafox](http://mangafox.me/)
* [Mangafox](http://mangafox.me/) [PhantomJS]
* [YoManga](http://yomanga.co/)
* [GoManga](http://gomanga.co/)
* [Batoto](http://bato.to/) [PhantomJS]