general pep8 clean up

This commit is contained in:
Johnathan Jenkins 2016-11-14 12:37:47 -08:00
parent e5b7eeffdd
commit 09cdfeb526
5 changed files with 520 additions and 495 deletions

View File

@ -7,56 +7,61 @@ import sys
import argparse
from version import __version__
def version():
print '\n'
print '{:^80}'.format('Current Version : %s')%(__version__)
print '\n'
print '{:^80}'.format("More info : comic-dl -h")
sys.exit()
print '\n'
print '{:^80}'.format('Current Version : %s') % (__version__)
print '\n'
print '{:^80}'.format("More info : comic-dl -h")
sys.exit()
def usage():
print '\n'
print '{:^80}'.format('################################################')
print '{:^80}'.format('Comic-DL Usage')
print '{:^80}'.format('################################################\n')
print '\n'
print '{:^80}'.format('Author : Xonshiz | Version : %s')%(__version__)
print '{:^80}'.format('-------------------------------------------------\n')
print "Comic-dl is a command line tool to download manga and comics from various comic and manga sites."
print "Using the script is pretty simple and should be easy for anyone familiar with a command line/shell."
print '\n'
print '{:^80}'.format("USAGE : comic-dl -i <URL to comic>")
print '\n'
print "Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ",'\n'
print "Available Arguments : "
print '{:^80}'.format("-i,--input : Specifies the Input URL")
print '{:^80}'.format("-h : Prints this help menu")
print '{:^80}'.format("--version : Prints the current version and exits")
print '{:^80}'.format("-a,--about : Shows the info about this script and exits.")
sys.exit()
print '\n'
print '{:^80}'.format('################################################')
print '{:^80}'.format('Comic-DL Usage')
print '{:^80}'.format('################################################\n')
print '\n'
print '{:^80}'.format('Author : Xonshiz | Version : %s') % (__version__)
print '{:^80}'.format('-------------------------------------------------\n')
print "Comic-dl is a command line tool to download manga and comics from various comic and manga sites."
print "Using the script is pretty simple and should be easy for anyone familiar with a command line/shell."
print '\n'
print '{:^80}'.format("USAGE : comic-dl -i <URL to comic>")
print '\n'
print "Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ", '\n'
print "Available Arguments : "
print '{:^80}'.format("-i,--input : Specifies the Input URL")
print '{:^80}'.format("-h : Prints this help menu")
print '{:^80}'.format("--version : Prints the current version and exits")
print '{:^80}'.format("-a,--about : Shows the info about this script and exits.")
sys.exit()
def main(argv):
current_directory = str(os.getcwd())
parser = argparse.ArgumentParser(description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
parser.add_argument('--version',action='store_true',help='Shows version and exits' )
parser.add_argument('-i','--input',nargs=1,help='Inputs the URL to comic',default='--version')
parser.add_argument('-a','--about',action='store_true',help='Shows the info regarding this script' )
current_directory = str(os.getcwd())
parser = argparse.ArgumentParser(
description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
parser.add_argument('--version', action='store_true', help='Shows version and exits')
parser.add_argument('-i', '--input', nargs=1,
help='Inputs the URL to comic', default='--version')
parser.add_argument('-a', '--about', action='store_true',
help='Shows the info regarding this script')
args = parser.parse_args()
args = parser.parse_args()
if args.version:
version()
sys.exit()
if args.version:
version()
sys.exit()
if args.input:
#print args.input
input_url = str(args.input[0]).strip()
url_checker(input_url,current_directory)
sys.exit()
if args.input:
# print args.input
input_url = str(args.input[0]).strip()
url_checker(input_url, current_directory)
sys.exit()
if args.about:
usage()
if args.about:
usage()
if __name__ == "__main__":
main(sys.argv[1:])
main(sys.argv[1:])

View File

@ -9,7 +9,6 @@ This python module decides which URL should be assigned to which other module fr
'''
from sites.yomanga import yomanga_Url_Check
from sites.gomanga import gomanga_Url_Check
from sites.mangafox import mangafox_Url_Check
@ -17,22 +16,20 @@ import os
import urllib2
def url_checker(input_url, current_directory):
domain = urllib2.urlparse.urlparse(input_url).netloc
def url_checker(input_url,current_directory):
domain = urllib2.urlparse.urlparse(input_url).netloc
if domain in ['mangafox.me']:
mangafox_Url_Check(input_url,current_directory)
pass
elif domain in ['yomanga.co']:
yomanga_Url_Check(input_url,current_directory)
pass
elif domain in ['gomanga.co']:
gomanga_Url_Check(input_url,current_directory)
pass
elif domain in ['']:
print 'You need to specify at least 1 URL. Please run : comic-dl -h'
else:
print "%s is unsupported at the moment. Please request on Github repository."%(domain)
if domain in ['mangafox.me']:
mangafox_Url_Check(input_url, current_directory)
pass
elif domain in ['yomanga.co']:
yomanga_Url_Check(input_url, current_directory)
pass
elif domain in ['gomanga.co']:
gomanga_Url_Check(input_url, current_directory)
pass
elif domain in ['']:
print 'You need to specify at least 1 URL. Please run : comic-dl -h'
else:
print "%s is unsupported at the moment. Please request on Github repository." % (domain)

View File

@ -5,162 +5,166 @@ import requests
import re
import os
import sys
from more_itertools import unique_everseen
from more_itertools import unique_everseen
import urllib2
import urllib
import shutil
from urllib2 import URLError
from bs4 import BeautifulSoup
def single_chapter(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
#print "Series Name : ",Series_Name
try:
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
#raise e
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
#print "Chapter No : ",chapter_number
def single_chapter(url, current_directory):
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
#print 'Raw_File_Directory : ',Raw_File_Directory
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
#print "File_Directory : ",File_Directory
Directory_path = os.path.normpath(File_Directory)
#print "Directory_path : ",Directory_path
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
#print "Older List : ",ddl_image_list
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
sys.exit(0)
ddl_list = list(unique_everseen(ddl_image_list))
#print ddl_list
#sys.exit()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
print '\n'
print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number)
print '{:^80}'.format('=====================================================================\n')
}
for i in ddl_list:
#print i
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
#print ddl_image
try:
#u = urllib2.urlopen(ddl_image, cookies=response.cookies)
u = requests.get(ddl_image,cookies=tasty_cookies)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from "+ddl_image, e.code, e.msg
resp = e
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ',File_Name_Final,'\n'
pass
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
if not os.path.isfile(File_Check_Path):
print 'Downloading : ',File_Name_Final
#urllib.urlretrieve(ddl_image, File_Name_Final)
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
#raise e
#print e
print "Couldn't download file from : ",ddl_image
pass
try:
shutil.move(File_Path,Directory_path)
except Exception, e:
#raise e
print e,'\n'
#os.remove(File_Path)
pass
Page_source = str(response.text.encode('utf-8'))
print '\n'
print "Completed downloading ",Series_Name
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
# print "Series Name : ",Series_Name
def whole_series(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
try:
# Getting the chapter count from the URL itself for naming the
# folder/dicrectories in integer.
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)
).strip().replace('0', '').replace('/', ''))
except Exception as e:
# raise e
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
# print "Chapter No : ",chapter_number
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
#print "Series Name : ",Series_Name
Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + str(chapter_number)
# print 'Raw_File_Directory : ',Raw_File_Directory
# Fix for "Special Characters" in The series name
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
# print "File_Directory : ",File_Directory
Directory_path = os.path.normpath(File_Directory)
# print "Directory_path : ",Directory_path
soup = BeautifulSoup(Page_source, 'html.parser')
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
# print "Older List : ",ddl_image_list
chapter_text = soup.findAll('div',{'class':'title'})
#print chapter_text
ddl_list = list(unique_everseen(ddl_image_list))
# print ddl_list
# sys.exit()
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url,current_directory)
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
def gomanga_Url_Check(input_url,current_directory):
gomanga_single_regex = re.compile('https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
gomanga_whole_regex = re.compile('^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
for i in ddl_list:
# print i
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://gomanga.co/reader/content/comics" + \
str(i).replace('"', '').replace('\\', '')
# print ddl_image
try:
# u = urllib2.urlopen(ddl_image, cookies=response.cookies)
u = requests.get(ddl_image, cookies=tasty_cookies)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from " + ddl_image, e.code, e.msg
resp = e
lines = input_url.split('\n')
for line in lines:
found = re.search(gomanga_single_regex, line)
if found:
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url,current_directory)
else:
pass
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[", "").replace("]", "").replace("'", "").replace(",", "").strip(
) + "." + str(re.findall('\d\.(.*?)$', str(i))).replace(",", "").replace("[", "").replace("]", "").replace("'", "").strip()
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ', File_Name_Final, '\n'
pass
found = re.search(gomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url,current_directory)
else:
pass
if not os.path.isfile(File_Check_Path):
print 'Downloading : ', File_Name_Final
# urllib.urlretrieve(ddl_image, File_Name_Final)
response = requests.get(ddl_image, stream=True, cookies=tasty_cookies)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
# raise e
# print e
print "Couldn't download file from : ", ddl_image
pass
try:
shutil.move(File_Path, Directory_path)
except Exception, e:
# raise e
print e, '\n'
# os.remove(File_Path)
pass
print '\n'
print "Completed downloading ", Series_Name
def whole_series(url, current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
# print "Series Name : ",Series_Name
soup = BeautifulSoup(Page_source, 'html.parser')
chapter_text = soup.findAll('div', {'class': 'title'})
# print chapter_text
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url, current_directory)
def gomanga_Url_Check(input_url, current_directory):
gomanga_single_regex = re.compile(
'https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
gomanga_whole_regex = re.compile(
'^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(gomanga_single_regex, line)
if found:
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url, current_directory)
else:
pass
found = re.search(gomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url, current_directory)
else:
pass

View File

@ -11,193 +11,209 @@ import shutil
from bs4 import BeautifulSoup
from urllib2 import URLError
def single_chapter(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
response = requests.get(url, headers=headers)
Page_source = str(response.text.encode('utf-8'))
try:
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
except Exception as e:
#raise e
#print "Error : ",e,'\n'
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
#print "Series Name : ",Series_Name
try:
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories.
except Exception as e:
#raise e
volume_number = "Volume 01"
#print "Volume No : ",volume_number
try:
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
#raise e
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
#print "Chapter No : ",chapter_number
First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_source).group(0)).strip() # Fix if they change the CDN all of a sudden.
#print First_chapter_link
current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
#print "Last Chapter : ",last_chapter_count
def single_chapter(url, current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number)
#print 'Raw_File_Directory : ',Raw_File_Directory
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
#print "File_Directory : ",File_Directory
Directory_path = os.path.normpath(File_Directory)
#print "Directory_path : ",Directory_path
}
print '\n'
print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number)
print '{:^80}'.format('=====================================================================\n')
response = requests.get(url, headers=headers)
Page_source = str(response.text.encode('utf-8'))
for x in range(current_chapter_count,last_chapter_count+1):
#print x
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
if len(str(x)) == 1:
ddl_image = First_chapter_link.replace('001.jpg','00{0}.jpg'.format(x))
#print ddl_image
try:
u = urllib2.urlopen(ddl_image)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from "+ddl_image, e.code, e.msg
resp = e
File_Name_Final = str(x).strip()+".jpg"
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
#print "Final Check Path : ",File_Check_Path
try:
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)
).strip().replace('_', ' ').title()
except Exception as e:
#raise e
# print "Error : ",e,'\n'
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)
).strip().replace('_', ' ').title()
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ',File_Name_Final,'\n'
pass
# print "Series Name : ",Series_Name
if not os.path.isfile(File_Check_Path):
print 'Downloading : ',File_Name_Final
urllib.urlretrieve(ddl_image, File_Name_Final)
File_Path = os.path.normpath(File_Name_Final)
try:
shutil.move(File_Path,Directory_path)
except Exception, e:
#raise e
print e,'\n'
os.remove(File_Path)
pass
try:
# Getting the volume count from the URL itself for naming the folder/dicrectories.
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip()
except Exception as e:
# raise e
volume_number = "Volume 01"
# print "Volume No : ",volume_number
try:
# Getting the chapter count from the URL itself for naming the
# folder/dicrectories in integer.
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip())
except Exception as e:
#raise e
# Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip())
# print "Chapter No : ",chapter_number
# Fix if they change the CDN all of a sudden.
First_chapter_link = str(re.search(
'http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_source).group(0)).strip()
# print First_chapter_link
# Getting the last chapter number from the URL itself for naming the folder/dicrectories.
current_chapter_count = int(
str(re.search('current_page\=(.*?)\;', Page_source).group(1)).strip())
# Getting the last chapter number from the URL itself for naming the folder/dicrectories.
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_source).group(1)).strip())
# print "Last Chapter : ",last_chapter_count
else :
ddl_image = First_chapter_link.replace('001','0{0}'.format(x))
#print ddl_image
try:
u = urllib2.urlopen(ddl_image)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from "+ddl_image, e.code, e.msg
resp = e
File_Name_Final = str(x).strip()+".jpg"
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
#print "Final Check Path : ",File_Check_Path
Raw_File_Directory = str(Series_Name) + '/' + str(volume_number) + \
'/' + "Chapter " + str(chapter_number)
# print 'Raw_File_Directory : ',Raw_File_Directory
# Fix for "Special Characters" in The series name
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
# print "File_Directory : ",File_Directory
Directory_path = os.path.normpath(File_Directory)
# print "Directory_path : ",Directory_path
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ',File_Name_Final,'\n'
pass
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
if not os.path.isfile(File_Check_Path):
print 'Downloading : ',File_Name_Final
urllib.urlretrieve(ddl_image, File_Name_Final)
File_Path = os.path.normpath(File_Name_Final)
try:
shutil.move(File_Path,Directory_path)
except Exception, e:
#raise e
print e,'\n'
os.remove(File_Path)
pass
print '\n'
print "Completed downloading ",Series_Name
for x in range(current_chapter_count, last_chapter_count + 1):
# print x
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
if len(str(x)) == 1:
ddl_image = First_chapter_link.replace('001.jpg', '00{0}.jpg'.format(x))
def whole_series(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
# print ddl_image
try:
u = urllib2.urlopen(ddl_image)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from " + ddl_image, e.code, e.msg
resp = e
try:
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
except Exception as e:
#raise e
print 'Check if the URL is correct or not. Report on Github.'
#print "Series Name : ",Series_Name
File_Name_Final = str(x).strip() + ".jpg"
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
# print "Final Check Path : ",File_Check_Path
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
response = requests.get(url, headers=headers)
Page_source = str(response.text.encode('utf-8'))
try:
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v"
#print 'UP : ',chapter_link_format
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
#print "Lower links : ",links
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ', File_Name_Final, '\n'
pass
if len(links) == 0:
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c"
#print chapter_link_format
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
if not os.path.isfile(File_Check_Path):
print 'Downloading : ', File_Name_Final
urllib.urlretrieve(ddl_image, File_Name_Final)
File_Path = os.path.normpath(File_Name_Final)
try:
shutil.move(File_Path, Directory_path)
except Exception, e:
# raise e
print e, '\n'
os.remove(File_Path)
pass
else:
ddl_image = First_chapter_link.replace('001', '0{0}'.format(x))
# print ddl_image
try:
u = urllib2.urlopen(ddl_image)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from " + ddl_image, e.code, e.msg
resp = e
File_Name_Final = str(x).strip() + ".jpg"
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
# print "Final Check Path : ",File_Check_Path
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ', File_Name_Final, '\n'
pass
if not os.path.isfile(File_Check_Path):
print 'Downloading : ', File_Name_Final
urllib.urlretrieve(ddl_image, File_Name_Final)
File_Path = os.path.normpath(File_Name_Final)
try:
shutil.move(File_Path, Directory_path)
except Exception, e:
# raise e
print e, '\n'
os.remove(File_Path)
pass
print '\n'
print "Completed downloading ", Series_Name
except Exception as e:
#raise e
print "Error : ",e,'\n',"Please report this error on Github repository."
def whole_series(url, current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
for x in links:
#print x
chapter_link = str(str(chapter_link_format)+str(x)+"html").strip()
#print "URL : ",chapter_link
single_chapter(chapter_link,current_directory)
try:
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip()
except Exception as e:
# raise e
print 'Check if the URL is correct or not. Report on Github.'
# print "Series Name : ",Series_Name
def mangafox_Url_Check(input_url,current_directory):
mangafox_single_regex = re.compile('https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
mangafox_whole_regex = re.compile('^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
lines = input_url.split('\n')
for line in lines:
found = re.search(mangafox_single_regex, line)
if found:
match = found.groupdict()
if match['issue']:
url = str(input_url)
single_chapter(url,current_directory)
else:
pass
}
found = re.search(mangafox_whole_regex, line)
if found:
match = found.groupdict()
if match['comic_series']:
url = str(input_url)
whole_series(url,current_directory)
else:
pass
response = requests.get(url, headers=headers)
Page_source = str(response.text.encode('utf-8'))
try:
chapter_link_format = "http://mangafox.me/manga/" + str(Series_Name) + "/v"
# print 'UP : ',chapter_link_format
links = re.findall('{0}(.*?)html'.format(chapter_link_format), Page_source)
# print "Lower links : ",links
if len(links) == 0:
chapter_link_format = "http://mangafox.me/manga/" + str(Series_Name) + "/c"
# print chapter_link_format
links = re.findall('{0}(.*?)html'.format(chapter_link_format), Page_source)
except Exception as e:
# raise e
print "Error : ", e, '\n', "Please report this error on Github repository."
for x in links:
# print x
chapter_link = str(str(chapter_link_format) + str(x) + "html").strip()
# print "URL : ",chapter_link
single_chapter(chapter_link, current_directory)
def mangafox_Url_Check(input_url, current_directory):
mangafox_single_regex = re.compile(
'https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
mangafox_whole_regex = re.compile(
'^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(mangafox_single_regex, line)
if found:
match = found.groupdict()
if match['issue']:
url = str(input_url)
single_chapter(url, current_directory)
else:
pass
found = re.search(mangafox_whole_regex, line)
if found:
match = found.groupdict()
if match['comic_series']:
url = str(input_url)
whole_series(url, current_directory)
else:
pass

View File

@ -5,161 +5,164 @@ import requests
import re
import os
import sys
from more_itertools import unique_everseen
from more_itertools import unique_everseen
import urllib2
import urllib
import shutil
from urllib2 import URLError
from bs4 import BeautifulSoup
def single_chapter(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
sys.exit(0)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
#print "Series Name : ",Series_Name
try:
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
except Exception as e:
#raise e
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
#print "Chapter No : ",chapter_number
def single_chapter(url, current_directory):
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
#print 'Raw_File_Directory : ',Raw_File_Directory
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
#print "File_Directory : ",File_Directory
Directory_path = os.path.normpath(File_Directory)
#print "Directory_path : ",Directory_path
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
#print "Older List : ",ddl_image_list
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
sys.exit(0)
ddl_list = list(unique_everseen(ddl_image_list))
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
print '\n'
print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number)
print '{:^80}'.format('=====================================================================\n')
}
for i in ddl_list:
#print i
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
#print ddl_image
try:
#u = urllib2.urlopen(ddl_image, cookies=response.cookies)
u = requests.get(ddl_image,cookies=tasty_cookies)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from "+ddl_image, e.code, e.msg
resp = e
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ',File_Name_Final,'\n'
pass
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
if not os.path.isfile(File_Check_Path):
print 'Downloading : ',File_Name_Final
#urllib.urlretrieve(ddl_image, File_Name_Final)
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
#raise e
#print e
print "Couldn't download file from : ",ddl_image
pass
try:
shutil.move(File_Path,Directory_path)
except Exception, e:
#raise e
print e,'\n'
#os.remove(File_Path)
pass
Page_source = str(response.text.encode('utf-8'))
print '\n'
print "Completed downloading ",Series_Name
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
# print "Series Name : ",Series_Name
def whole_series(url,current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
try:
# Getting the chapter count from the URL itself for naming the
# folder/dicrectories in integer.
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)
).strip().replace('0', '').replace('/', ''))
except Exception as e:
#raise e
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
# print "Chapter No : ",chapter_number
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
#print "Series Name : ",Series_Name
Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + str(chapter_number)
# print 'Raw_File_Directory : ',Raw_File_Directory
# Fix for "Special Characters" in The series name
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
# print "File_Directory : ",File_Directory
Directory_path = os.path.normpath(File_Directory)
# print "Directory_path : ",Directory_path
soup = BeautifulSoup(Page_source, 'html.parser')
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
# print "Older List : ",ddl_image_list
chapter_text = soup.findAll('div',{'class':'title'})
#print chapter_text
ddl_list = list(unique_everseen(ddl_image_list))
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url,current_directory)
def yomanga_Url_Check(input_url,current_directory):
yomanga_single_regex = re.compile('https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
yomanga_whole_regex = re.compile('^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(yomanga_single_regex, line)
if found:
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url,current_directory)
else:
pass
print '\n'
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
print '{:^80}'.format('=====================================================================\n')
found = re.search(yomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url,current_directory)
else:
pass
for i in ddl_list:
# print i
if not os.path.exists(File_Directory):
os.makedirs(File_Directory)
ddl_image = "http://yomanga.co/reader/content/comics" + \
str(i).replace('"', '').replace('\\', '')
# print ddl_image
try:
# u = urllib2.urlopen(ddl_image, cookies=response.cookies)
u = requests.get(ddl_image, cookies=tasty_cookies)
except URLError, e:
if not hasattr(e, "code"):
raise
print "Got error from " + ddl_image, e.code, e.msg
resp = e
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[", "").replace("]", "").replace("'", "").replace(",", "").strip(
) + "." + str(re.findall('\d\.(.*?)$', str(i))).replace(",", "").replace("[", "").replace("]", "").replace("'", "").strip()
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
if os.path.isfile(File_Check_Path):
print 'File Exist! Skipping ', File_Name_Final, '\n'
pass
if not os.path.isfile(File_Check_Path):
print 'Downloading : ', File_Name_Final
# urllib.urlretrieve(ddl_image, File_Name_Final)
response = requests.get(ddl_image, stream=True, cookies=tasty_cookies)
try:
with open(File_Name_Final, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
File_Path = os.path.normpath(File_Name_Final)
except Exception as e:
#raise e
# print e
print "Couldn't download file from : ", ddl_image
pass
try:
shutil.move(File_Path, Directory_path)
except Exception, e:
# raise e
print e, '\n'
# os.remove(File_Path)
pass
print '\n'
print "Completed downloading ", Series_Name
def whole_series(url, current_directory):
if not url:
print "Couldn't get the URL. Please report it on Github Repository."
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
}
s = requests.Session()
response = s.get(url, headers=headers)
tasty_cookies = response.cookies
Page_source = str(response.text.encode('utf-8'))
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
# print "Series Name : ",Series_Name
soup = BeautifulSoup(Page_source, 'html.parser')
chapter_text = soup.findAll('div', {'class': 'title'})
# print chapter_text
for link in chapter_text:
x = link.findAll('a')
for a in x:
url = a['href']
single_chapter(url, current_directory)
def yomanga_Url_Check(input_url, current_directory):
yomanga_single_regex = re.compile(
'https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
yomanga_whole_regex = re.compile(
'^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
lines = input_url.split('\n')
for line in lines:
found = re.search(yomanga_single_regex, line)
if found:
match = found.groupdict()
if match['Chapter']:
url = str(input_url)
single_chapter(url, current_directory)
else:
pass
found = re.search(yomanga_whole_regex, line)
if found:
match = found.groupdict()
if match['comic']:
url = str(input_url)
whole_series(url, current_directory)
else:
pass