general pep8 clean up
This commit is contained in:
parent
e5b7eeffdd
commit
09cdfeb526
@ -7,56 +7,61 @@ import sys
|
||||
import argparse
|
||||
from version import __version__
|
||||
|
||||
|
||||
def version():
|
||||
print '\n'
|
||||
print '{:^80}'.format('Current Version : %s')%(__version__)
|
||||
print '\n'
|
||||
print '{:^80}'.format("More info : comic-dl -h")
|
||||
sys.exit()
|
||||
print '\n'
|
||||
print '{:^80}'.format('Current Version : %s') % (__version__)
|
||||
print '\n'
|
||||
print '{:^80}'.format("More info : comic-dl -h")
|
||||
sys.exit()
|
||||
|
||||
|
||||
def usage():
|
||||
print '\n'
|
||||
print '{:^80}'.format('################################################')
|
||||
print '{:^80}'.format('Comic-DL Usage')
|
||||
print '{:^80}'.format('################################################\n')
|
||||
print '\n'
|
||||
print '{:^80}'.format('Author : Xonshiz | Version : %s')%(__version__)
|
||||
print '{:^80}'.format('-------------------------------------------------\n')
|
||||
print "Comic-dl is a command line tool to download manga and comics from various comic and manga sites."
|
||||
print "Using the script is pretty simple and should be easy for anyone familiar with a command line/shell."
|
||||
print '\n'
|
||||
print '{:^80}'.format("USAGE : comic-dl -i <URL to comic>")
|
||||
print '\n'
|
||||
print "Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ",'\n'
|
||||
print "Available Arguments : "
|
||||
print '{:^80}'.format("-i,--input : Specifies the Input URL")
|
||||
print '{:^80}'.format("-h : Prints this help menu")
|
||||
print '{:^80}'.format("--version : Prints the current version and exits")
|
||||
print '{:^80}'.format("-a,--about : Shows the info about this script and exits.")
|
||||
sys.exit()
|
||||
print '\n'
|
||||
print '{:^80}'.format('################################################')
|
||||
print '{:^80}'.format('Comic-DL Usage')
|
||||
print '{:^80}'.format('################################################\n')
|
||||
print '\n'
|
||||
print '{:^80}'.format('Author : Xonshiz | Version : %s') % (__version__)
|
||||
print '{:^80}'.format('-------------------------------------------------\n')
|
||||
print "Comic-dl is a command line tool to download manga and comics from various comic and manga sites."
|
||||
print "Using the script is pretty simple and should be easy for anyone familiar with a command line/shell."
|
||||
print '\n'
|
||||
print '{:^80}'.format("USAGE : comic-dl -i <URL to comic>")
|
||||
print '\n'
|
||||
print "Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ", '\n'
|
||||
print "Available Arguments : "
|
||||
print '{:^80}'.format("-i,--input : Specifies the Input URL")
|
||||
print '{:^80}'.format("-h : Prints this help menu")
|
||||
print '{:^80}'.format("--version : Prints the current version and exits")
|
||||
print '{:^80}'.format("-a,--about : Shows the info about this script and exits.")
|
||||
sys.exit()
|
||||
|
||||
|
||||
def main(argv):
|
||||
current_directory = str(os.getcwd())
|
||||
parser = argparse.ArgumentParser(description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
|
||||
parser.add_argument('--version',action='store_true',help='Shows version and exits' )
|
||||
parser.add_argument('-i','--input',nargs=1,help='Inputs the URL to comic',default='--version')
|
||||
parser.add_argument('-a','--about',action='store_true',help='Shows the info regarding this script' )
|
||||
|
||||
current_directory = str(os.getcwd())
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
|
||||
parser.add_argument('--version', action='store_true', help='Shows version and exits')
|
||||
parser.add_argument('-i', '--input', nargs=1,
|
||||
help='Inputs the URL to comic', default='--version')
|
||||
parser.add_argument('-a', '--about', action='store_true',
|
||||
help='Shows the info regarding this script')
|
||||
|
||||
args = parser.parse_args()
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.version:
|
||||
version()
|
||||
sys.exit()
|
||||
if args.version:
|
||||
version()
|
||||
sys.exit()
|
||||
|
||||
if args.input:
|
||||
#print args.input
|
||||
input_url = str(args.input[0]).strip()
|
||||
url_checker(input_url,current_directory)
|
||||
sys.exit()
|
||||
if args.input:
|
||||
# print args.input
|
||||
input_url = str(args.input[0]).strip()
|
||||
url_checker(input_url, current_directory)
|
||||
sys.exit()
|
||||
|
||||
if args.about:
|
||||
usage()
|
||||
if args.about:
|
||||
usage()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
main(sys.argv[1:])
|
||||
|
@ -9,7 +9,6 @@ This python module decides which URL should be assigned to which other module fr
|
||||
'''
|
||||
|
||||
|
||||
|
||||
from sites.yomanga import yomanga_Url_Check
|
||||
from sites.gomanga import gomanga_Url_Check
|
||||
from sites.mangafox import mangafox_Url_Check
|
||||
@ -17,22 +16,20 @@ import os
|
||||
import urllib2
|
||||
|
||||
|
||||
def url_checker(input_url, current_directory):
|
||||
|
||||
domain = urllib2.urlparse.urlparse(input_url).netloc
|
||||
|
||||
def url_checker(input_url,current_directory):
|
||||
|
||||
domain = urllib2.urlparse.urlparse(input_url).netloc
|
||||
|
||||
if domain in ['mangafox.me']:
|
||||
mangafox_Url_Check(input_url,current_directory)
|
||||
pass
|
||||
elif domain in ['yomanga.co']:
|
||||
yomanga_Url_Check(input_url,current_directory)
|
||||
pass
|
||||
elif domain in ['gomanga.co']:
|
||||
gomanga_Url_Check(input_url,current_directory)
|
||||
pass
|
||||
elif domain in ['']:
|
||||
print 'You need to specify at least 1 URL. Please run : comic-dl -h'
|
||||
else:
|
||||
print "%s is unsupported at the moment. Please request on Github repository."%(domain)
|
||||
if domain in ['mangafox.me']:
|
||||
mangafox_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
elif domain in ['yomanga.co']:
|
||||
yomanga_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
elif domain in ['gomanga.co']:
|
||||
gomanga_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
elif domain in ['']:
|
||||
print 'You need to specify at least 1 URL. Please run : comic-dl -h'
|
||||
else:
|
||||
print "%s is unsupported at the moment. Please request on Github repository." % (domain)
|
||||
|
@ -5,162 +5,166 @@ import requests
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from more_itertools import unique_everseen
|
||||
from more_itertools import unique_everseen
|
||||
import urllib2
|
||||
import urllib
|
||||
import shutil
|
||||
from urllib2 import URLError
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
sys.exit(0)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
#print "Series Name : ",Series_Name
|
||||
|
||||
try:
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
#raise e
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
#print "Chapter No : ",chapter_number
|
||||
def single_chapter(url, current_directory):
|
||||
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
|
||||
#print 'Raw_File_Directory : ',Raw_File_Directory
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
#print "File_Directory : ",File_Directory
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
#print "Directory_path : ",Directory_path
|
||||
|
||||
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
#print "Older List : ",ddl_image_list
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
sys.exit(0)
|
||||
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
#print ddl_list
|
||||
#sys.exit()
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
}
|
||||
|
||||
for i in ddl_list:
|
||||
#print i
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
|
||||
#print ddl_image
|
||||
try:
|
||||
#u = urllib2.urlopen(ddl_image, cookies=response.cookies)
|
||||
u = requests.get(ddl_image,cookies=tasty_cookies)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from "+ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ',File_Name_Final,'\n'
|
||||
pass
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ',File_Name_Final
|
||||
#urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
#raise e
|
||||
#print e
|
||||
print "Couldn't download file from : ",ddl_image
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception, e:
|
||||
#raise e
|
||||
print e,'\n'
|
||||
#os.remove(File_Path)
|
||||
pass
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ",Series_Name
|
||||
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
|
||||
# print "Series Name : ",Series_Name
|
||||
|
||||
def whole_series(url,current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
try:
|
||||
# Getting the chapter count from the URL itself for naming the
|
||||
# folder/dicrectories in integer.
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)
|
||||
).strip().replace('0', '').replace('/', ''))
|
||||
except Exception as e:
|
||||
# raise e
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
# print "Chapter No : ",chapter_number
|
||||
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
#print "Series Name : ",Series_Name
|
||||
Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + str(chapter_number)
|
||||
# print 'Raw_File_Directory : ',Raw_File_Directory
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
|
||||
# print "File_Directory : ",File_Directory
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
# print "Directory_path : ",Directory_path
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
# print "Older List : ",ddl_image_list
|
||||
|
||||
chapter_text = soup.findAll('div',{'class':'title'})
|
||||
#print chapter_text
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
# print ddl_list
|
||||
# sys.exit()
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url,current_directory)
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
|
||||
def gomanga_Url_Check(input_url,current_directory):
|
||||
|
||||
gomanga_single_regex = re.compile('https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
gomanga_whole_regex = re.compile('^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
for i in ddl_list:
|
||||
# print i
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://gomanga.co/reader/content/comics" + \
|
||||
str(i).replace('"', '').replace('\\', '')
|
||||
# print ddl_image
|
||||
try:
|
||||
# u = urllib2.urlopen(ddl_image, cookies=response.cookies)
|
||||
u = requests.get(ddl_image, cookies=tasty_cookies)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from " + ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(gomanga_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[", "").replace("]", "").replace("'", "").replace(",", "").strip(
|
||||
) + "." + str(re.findall('\d\.(.*?)$', str(i))).replace(",", "").replace("[", "").replace("]", "").replace("'", "").strip()
|
||||
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ', File_Name_Final, '\n'
|
||||
pass
|
||||
|
||||
|
||||
found = re.search(gomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ', File_Name_Final
|
||||
# urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
response = requests.get(ddl_image, stream=True, cookies=tasty_cookies)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
# raise e
|
||||
# print e
|
||||
print "Couldn't download file from : ", ddl_image
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path, Directory_path)
|
||||
except Exception, e:
|
||||
# raise e
|
||||
print e, '\n'
|
||||
# os.remove(File_Path)
|
||||
pass
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
|
||||
# print "Series Name : ",Series_Name
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
|
||||
chapter_text = soup.findAll('div', {'class': 'title'})
|
||||
# print chapter_text
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url, current_directory)
|
||||
|
||||
|
||||
def gomanga_Url_Check(input_url, current_directory):
|
||||
|
||||
gomanga_single_regex = re.compile(
|
||||
'https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
gomanga_whole_regex = re.compile(
|
||||
'^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(gomanga_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
found = re.search(gomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url, current_directory)
|
||||
else:
|
||||
pass
|
||||
|
@ -11,193 +11,209 @@ import shutil
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib2 import URLError
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
sys.exit(0)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
try:
|
||||
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
#raise e
|
||||
#print "Error : ",e,'\n'
|
||||
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
#print "Series Name : ",Series_Name
|
||||
|
||||
try:
|
||||
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
#raise e
|
||||
volume_number = "Volume 01"
|
||||
#print "Volume No : ",volume_number
|
||||
try:
|
||||
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
#raise e
|
||||
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
|
||||
#print "Chapter No : ",chapter_number
|
||||
First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_source).group(0)).strip() # Fix if they change the CDN all of a sudden.
|
||||
#print First_chapter_link
|
||||
current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
#print "Last Chapter : ",last_chapter_count
|
||||
def single_chapter(url, current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
sys.exit(0)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number)
|
||||
#print 'Raw_File_Directory : ',Raw_File_Directory
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
#print "File_Directory : ",File_Directory
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
#print "Directory_path : ",Directory_path
|
||||
}
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
response = requests.get(url, headers=headers)
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
for x in range(current_chapter_count,last_chapter_count+1):
|
||||
#print x
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
if len(str(x)) == 1:
|
||||
ddl_image = First_chapter_link.replace('001.jpg','00{0}.jpg'.format(x))
|
||||
|
||||
#print ddl_image
|
||||
try:
|
||||
u = urllib2.urlopen(ddl_image)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from "+ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
File_Name_Final = str(x).strip()+".jpg"
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
#print "Final Check Path : ",File_Check_Path
|
||||
try:
|
||||
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)
|
||||
).strip().replace('_', ' ').title()
|
||||
except Exception as e:
|
||||
#raise e
|
||||
# print "Error : ",e,'\n'
|
||||
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)
|
||||
).strip().replace('_', ' ').title()
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ',File_Name_Final,'\n'
|
||||
pass
|
||||
# print "Series Name : ",Series_Name
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ',File_Name_Final
|
||||
urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
try:
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception, e:
|
||||
#raise e
|
||||
print e,'\n'
|
||||
os.remove(File_Path)
|
||||
pass
|
||||
try:
|
||||
# Getting the volume count from the URL itself for naming the folder/dicrectories.
|
||||
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip()
|
||||
except Exception as e:
|
||||
# raise e
|
||||
volume_number = "Volume 01"
|
||||
# print "Volume No : ",volume_number
|
||||
try:
|
||||
# Getting the chapter count from the URL itself for naming the
|
||||
# folder/dicrectories in integer.
|
||||
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip())
|
||||
except Exception as e:
|
||||
#raise e
|
||||
# Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
|
||||
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip())
|
||||
# print "Chapter No : ",chapter_number
|
||||
# Fix if they change the CDN all of a sudden.
|
||||
First_chapter_link = str(re.search(
|
||||
'http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_source).group(0)).strip()
|
||||
# print First_chapter_link
|
||||
# Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
current_chapter_count = int(
|
||||
str(re.search('current_page\=(.*?)\;', Page_source).group(1)).strip())
|
||||
# Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_source).group(1)).strip())
|
||||
# print "Last Chapter : ",last_chapter_count
|
||||
|
||||
else :
|
||||
|
||||
ddl_image = First_chapter_link.replace('001','0{0}'.format(x))
|
||||
|
||||
#print ddl_image
|
||||
try:
|
||||
u = urllib2.urlopen(ddl_image)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from "+ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
File_Name_Final = str(x).strip()+".jpg"
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
#print "Final Check Path : ",File_Check_Path
|
||||
Raw_File_Directory = str(Series_Name) + '/' + str(volume_number) + \
|
||||
'/' + "Chapter " + str(chapter_number)
|
||||
# print 'Raw_File_Directory : ',Raw_File_Directory
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
|
||||
# print "File_Directory : ",File_Directory
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
# print "Directory_path : ",Directory_path
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ',File_Name_Final,'\n'
|
||||
pass
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ',File_Name_Final
|
||||
urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
try:
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception, e:
|
||||
#raise e
|
||||
print e,'\n'
|
||||
os.remove(File_Path)
|
||||
pass
|
||||
print '\n'
|
||||
print "Completed downloading ",Series_Name
|
||||
for x in range(current_chapter_count, last_chapter_count + 1):
|
||||
# print x
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
if len(str(x)) == 1:
|
||||
ddl_image = First_chapter_link.replace('001.jpg', '00{0}.jpg'.format(x))
|
||||
|
||||
def whole_series(url,current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
# print ddl_image
|
||||
try:
|
||||
u = urllib2.urlopen(ddl_image)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from " + ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
try:
|
||||
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
#raise e
|
||||
print 'Check if the URL is correct or not. Report on Github.'
|
||||
#print "Series Name : ",Series_Name
|
||||
File_Name_Final = str(x).strip() + ".jpg"
|
||||
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
|
||||
# print "Final Check Path : ",File_Check_Path
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
try:
|
||||
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v"
|
||||
#print 'UP : ',chapter_link_format
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
|
||||
#print "Lower links : ",links
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ', File_Name_Final, '\n'
|
||||
pass
|
||||
|
||||
if len(links) == 0:
|
||||
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c"
|
||||
#print chapter_link_format
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ', File_Name_Final
|
||||
urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
try:
|
||||
shutil.move(File_Path, Directory_path)
|
||||
except Exception, e:
|
||||
# raise e
|
||||
print e, '\n'
|
||||
os.remove(File_Path)
|
||||
pass
|
||||
|
||||
else:
|
||||
|
||||
ddl_image = First_chapter_link.replace('001', '0{0}'.format(x))
|
||||
|
||||
# print ddl_image
|
||||
try:
|
||||
u = urllib2.urlopen(ddl_image)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from " + ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
File_Name_Final = str(x).strip() + ".jpg"
|
||||
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
|
||||
# print "Final Check Path : ",File_Check_Path
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ', File_Name_Final, '\n'
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ', File_Name_Final
|
||||
urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
try:
|
||||
shutil.move(File_Path, Directory_path)
|
||||
except Exception, e:
|
||||
# raise e
|
||||
print e, '\n'
|
||||
os.remove(File_Path)
|
||||
pass
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name
|
||||
|
||||
|
||||
except Exception as e:
|
||||
#raise e
|
||||
print "Error : ",e,'\n',"Please report this error on Github repository."
|
||||
def whole_series(url, current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
|
||||
|
||||
for x in links:
|
||||
#print x
|
||||
chapter_link = str(str(chapter_link_format)+str(x)+"html").strip()
|
||||
#print "URL : ",chapter_link
|
||||
single_chapter(chapter_link,current_directory)
|
||||
try:
|
||||
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip()
|
||||
except Exception as e:
|
||||
# raise e
|
||||
print 'Check if the URL is correct or not. Report on Github.'
|
||||
# print "Series Name : ",Series_Name
|
||||
|
||||
def mangafox_Url_Check(input_url,current_directory):
|
||||
|
||||
mangafox_single_regex = re.compile('https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
|
||||
mangafox_whole_regex = re.compile('^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(mangafox_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['issue']:
|
||||
url = str(input_url)
|
||||
single_chapter(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
}
|
||||
|
||||
|
||||
found = re.search(mangafox_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic_series']:
|
||||
url = str(input_url)
|
||||
whole_series(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
response = requests.get(url, headers=headers)
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
try:
|
||||
chapter_link_format = "http://mangafox.me/manga/" + str(Series_Name) + "/v"
|
||||
# print 'UP : ',chapter_link_format
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format), Page_source)
|
||||
# print "Lower links : ",links
|
||||
|
||||
if len(links) == 0:
|
||||
chapter_link_format = "http://mangafox.me/manga/" + str(Series_Name) + "/c"
|
||||
# print chapter_link_format
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format), Page_source)
|
||||
|
||||
except Exception as e:
|
||||
# raise e
|
||||
print "Error : ", e, '\n', "Please report this error on Github repository."
|
||||
|
||||
for x in links:
|
||||
# print x
|
||||
chapter_link = str(str(chapter_link_format) + str(x) + "html").strip()
|
||||
# print "URL : ",chapter_link
|
||||
single_chapter(chapter_link, current_directory)
|
||||
|
||||
|
||||
def mangafox_Url_Check(input_url, current_directory):
|
||||
|
||||
mangafox_single_regex = re.compile(
|
||||
'https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
|
||||
mangafox_whole_regex = re.compile(
|
||||
'^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(mangafox_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['issue']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
found = re.search(mangafox_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic_series']:
|
||||
url = str(input_url)
|
||||
whole_series(url, current_directory)
|
||||
else:
|
||||
pass
|
||||
|
@ -5,161 +5,164 @@ import requests
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from more_itertools import unique_everseen
|
||||
from more_itertools import unique_everseen
|
||||
import urllib2
|
||||
import urllib
|
||||
import shutil
|
||||
from urllib2 import URLError
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
sys.exit(0)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
#print "Series Name : ",Series_Name
|
||||
|
||||
try:
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
#raise e
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
#print "Chapter No : ",chapter_number
|
||||
def single_chapter(url, current_directory):
|
||||
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
|
||||
#print 'Raw_File_Directory : ',Raw_File_Directory
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
#print "File_Directory : ",File_Directory
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
#print "Directory_path : ",Directory_path
|
||||
|
||||
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
#print "Older List : ",ddl_image_list
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
sys.exit(0)
|
||||
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
}
|
||||
|
||||
for i in ddl_list:
|
||||
#print i
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
|
||||
#print ddl_image
|
||||
try:
|
||||
#u = urllib2.urlopen(ddl_image, cookies=response.cookies)
|
||||
u = requests.get(ddl_image,cookies=tasty_cookies)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from "+ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ',File_Name_Final,'\n'
|
||||
pass
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ',File_Name_Final
|
||||
#urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
#raise e
|
||||
#print e
|
||||
print "Couldn't download file from : ",ddl_image
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception, e:
|
||||
#raise e
|
||||
print e,'\n'
|
||||
#os.remove(File_Path)
|
||||
pass
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ",Series_Name
|
||||
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
|
||||
# print "Series Name : ",Series_Name
|
||||
|
||||
def whole_series(url,current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
try:
|
||||
# Getting the chapter count from the URL itself for naming the
|
||||
# folder/dicrectories in integer.
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)
|
||||
).strip().replace('0', '').replace('/', ''))
|
||||
except Exception as e:
|
||||
#raise e
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
# print "Chapter No : ",chapter_number
|
||||
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
#print "Series Name : ",Series_Name
|
||||
Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + str(chapter_number)
|
||||
# print 'Raw_File_Directory : ',Raw_File_Directory
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
|
||||
# print "File_Directory : ",File_Directory
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
# print "Directory_path : ",Directory_path
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
# print "Older List : ",ddl_image_list
|
||||
|
||||
chapter_text = soup.findAll('div',{'class':'title'})
|
||||
#print chapter_text
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url,current_directory)
|
||||
|
||||
def yomanga_Url_Check(input_url,current_directory):
|
||||
|
||||
yomanga_single_regex = re.compile('https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
yomanga_whole_regex = re.compile('^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(yomanga_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
|
||||
|
||||
found = re.search(yomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
for i in ddl_list:
|
||||
# print i
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://yomanga.co/reader/content/comics" + \
|
||||
str(i).replace('"', '').replace('\\', '')
|
||||
# print ddl_image
|
||||
try:
|
||||
# u = urllib2.urlopen(ddl_image, cookies=response.cookies)
|
||||
u = requests.get(ddl_image, cookies=tasty_cookies)
|
||||
except URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
print "Got error from " + ddl_image, e.code, e.msg
|
||||
resp = e
|
||||
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[", "").replace("]", "").replace("'", "").replace(",", "").strip(
|
||||
) + "." + str(re.findall('\d\.(.*?)$', str(i))).replace(",", "").replace("[", "").replace("]", "").replace("'", "").strip()
|
||||
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print 'File Exist! Skipping ', File_Name_Final, '\n'
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print 'Downloading : ', File_Name_Final
|
||||
# urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
response = requests.get(ddl_image, stream=True, cookies=tasty_cookies)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
#raise e
|
||||
# print e
|
||||
print "Couldn't download file from : ", ddl_image
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path, Directory_path)
|
||||
except Exception, e:
|
||||
# raise e
|
||||
print e, '\n'
|
||||
# os.remove(File_Path)
|
||||
pass
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
# Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_', ' ').title()
|
||||
# print "Series Name : ",Series_Name
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
|
||||
chapter_text = soup.findAll('div', {'class': 'title'})
|
||||
# print chapter_text
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url, current_directory)
|
||||
|
||||
|
||||
def yomanga_Url_Check(input_url, current_directory):
|
||||
|
||||
yomanga_single_regex = re.compile(
|
||||
'https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
yomanga_whole_regex = re.compile(
|
||||
'^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(yomanga_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
found = re.search(yomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url, current_directory)
|
||||
else:
|
||||
pass
|
||||
|
Loading…
x
Reference in New Issue
Block a user