merged Py 2 and 3
Check changelog for more info.
This commit is contained in:
parent
c3a0d81fae
commit
3d20f2e3ef
@ -8,4 +8,6 @@
|
||||
- Argument priority updated [2016.11.22]
|
||||
- Site support for comic.naver.com [2016.11.26]
|
||||
- Support for Python 3 [2016.11.26]
|
||||
- Removed Kissmanga PhantomJS dependency [2016.12.23]
|
||||
- Removed Kissmanga PhantomJS dependency [2016.12.23]
|
||||
- Support for Python 2 and 3 has been merged [2016.12.28]
|
||||
- Updated PhantomJS dependency in [supported sites](https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md) [2016.12.28]
|
@ -2,7 +2,7 @@
|
||||
|
||||
[PhantomJS] = Denotes that these sites need PhantomJS to be able to download content.
|
||||
|
||||
* [Mangafox](http://mangafox.me/)
|
||||
* [Mangafox](http://mangafox.me/) [PhantomJS]
|
||||
* [YoManga](http://yomanga.co/)
|
||||
* [GoManga](http://gomanga.co/)
|
||||
* [Batoto](http://bato.to/) [PhantomJS]
|
||||
|
@ -1,68 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from honcho import url_checker
|
||||
from version import __version__
|
||||
|
||||
def version():
|
||||
print('\n')
|
||||
print('{:^80}'.format('Current Version : %s')%(__version__))
|
||||
print('\n')
|
||||
print('{:^80}'.format("More info : comic-dl -h"))
|
||||
|
||||
def usage():
|
||||
print('\n')
|
||||
print('{:^80}'.format('################################################'))
|
||||
print('{:^80}'.format('Comic-DL Usage'))
|
||||
print('{:^80}'.format('################################################\n'))
|
||||
print('\n')
|
||||
print('{:^80}'.format('Author : Xonshiz | Version : %s')%(__version__))
|
||||
print('{:^80}'.format('-------------------------------------------------\n'))
|
||||
print("Comic-dl is a command line tool to download manga and comics from various comic and manga sites.")
|
||||
print("Using the script is pretty simple and should be easy for anyone familiar with a command line/shell.")
|
||||
print('\n')
|
||||
print('{:^80}'.format("USAGE : comic-dl -i <URL to comic>"))
|
||||
print('\n')
|
||||
print("Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ",'\n')
|
||||
print("Available Arguments : ")
|
||||
print('{:^80}'.format("-i,--input : Specifies the Input URL"))
|
||||
print('{:^80}'.format("-h : Prints this help menu"))
|
||||
print('{:^80}'.format("--version : Prints the current version and exits"))
|
||||
print('{:^80}'.format("-a,--about : Shows the info about this script and exits."))
|
||||
print('{:^80}'.format("-u,--username : Indicates username for a website."))
|
||||
print('{:^80}'.format("-p,--password : Indicates password for a website."))
|
||||
|
||||
|
||||
def main(argv):
|
||||
current_directory = str(os.getcwd())
|
||||
parser = argparse.ArgumentParser(description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
|
||||
parser.add_argument('--version',action='store_true',help='Shows version and exits' )
|
||||
parser.add_argument('-a','--about',action='store_true',help='Shows the info regarding this script' )
|
||||
parser.add_argument('-i','--input',nargs=1,help='Inputs the URL to comic')
|
||||
parser.add_argument('-p','--password',nargs=1,help='Indicates password for a website',default='None')
|
||||
parser.add_argument('-u','--username',nargs=1,help='Indicates username for a website',default='None')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.version:
|
||||
version()
|
||||
sys.exit()
|
||||
|
||||
if args.about:
|
||||
usage()
|
||||
sys.exit()
|
||||
if args.input:
|
||||
input_url = str(args.input[0]).strip()
|
||||
User_Password = str(args.password[0].strip())
|
||||
User_Name = str(args.username[0].strip())
|
||||
url_checker(input_url,current_directory,User_Name,User_Password)
|
||||
sys.exit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""This module serves as a universal downloader for downloading Images.
|
||||
This module supports handling of cookies.
|
||||
This module needs a File_Name for the file to be downloaded,
|
||||
Directory_path which is the directory path where you want to download the file,
|
||||
tasty_cookies refer to the `cookies` you fetch from your session.
|
||||
ddl_image is the direct link to the image itself.
|
||||
|
||||
This module uses `requests` library to achieve the handling of cookies.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
from downloader.universal import main as FileDownloader
|
||||
|
||||
def main(File_Name_Final,Directory_path,tasty_cookies,ddl_image):
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] Downloading : ',File_Name_Final)
|
||||
|
||||
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
print("Couldn't download file from : ",ddl_image)
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception as e:
|
||||
print(e,'\n')
|
||||
pass
|
||||
|
||||
|
||||
def with_referer(File_Name_Final,Directory_path,tasty_cookies,ddl_image,referer):
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] Downloading : ',File_Name_Final)
|
||||
headers = {'Referer': referer}
|
||||
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies,headers=headers)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
print("Couldn't download file from : ",ddl_image)
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception as e:
|
||||
print(e,'\n')
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""This module serves as a universal downloader for downloading Images.
|
||||
Note that this module does not support handling of cookies, for that you
|
||||
need to refer to `cookies_required` module.
|
||||
This module needs a File_Name for the file to be downloaded,
|
||||
Directory_path which is the directory path where you want to download the file,
|
||||
ddl_image is the direct link to the image itself.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import urllib
|
||||
import shutil
|
||||
import urllib.request
|
||||
#from urllib import URLError
|
||||
import sys
|
||||
|
||||
def main(File_Name_Final,Directory_path,ddl_image):
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] Downloading : ',File_Name_Final)
|
||||
urllib.request.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
|
||||
urllib.request.urlretrieve(ddl_image, File_Name_Final)
|
||||
#filename, headers = urllib.urlretrieve(ddl_image,File_Name_Final)
|
||||
#print "File Name : ",filename
|
||||
#print "Headers : ",headers
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
try:
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception as e:
|
||||
print(e,'\n')
|
||||
os.remove(File_Path)
|
||||
pass
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
"""This python module decides which URL should be assigned to which other module from the site package.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
#import urllib as urllib2
|
||||
from urllib.parse import urlparse
|
||||
from sites.yomanga import yomanga_Url_Check
|
||||
from sites.gomanga import gomanga_Url_Check
|
||||
from sites.mangafox import mangafox_Url_Check
|
||||
from sites.batoto import batoto_Url_Check
|
||||
from sites.kissmanga import kissmanga_Url_Check
|
||||
from sites.comic_naver import comic_naver_Url_Check
|
||||
from downloader import universal,cookies_required
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def url_checker(input_url, current_directory, User_Name, User_Password):
|
||||
|
||||
domain = urlparse(input_url).netloc
|
||||
|
||||
if domain in ['mangafox.me']:
|
||||
mangafox_Url_Check(input_url, current_directory)
|
||||
|
||||
elif domain in ['yomanga.co']:
|
||||
yomanga_Url_Check(input_url, current_directory)
|
||||
|
||||
elif domain in ['gomanga.co']:
|
||||
gomanga_Url_Check(input_url, current_directory)
|
||||
|
||||
elif domain in ['bato.to']:
|
||||
batoto_Url_Check(input_url, current_directory, User_Name, User_Password)
|
||||
|
||||
elif domain in ['kissmanga.com']:
|
||||
kissmanga_Url_Check(input_url, current_directory)
|
||||
|
||||
elif domain in ['comic.naver.com']:
|
||||
comic_naver_Url_Check(input_url, current_directory)
|
||||
|
||||
elif domain in ['']:
|
||||
print('You need to specify at least 1 URL. Please run : comic-dl -h')
|
||||
else:
|
||||
print("%s is unsupported at the moment. Please request on Github repository."%(domain))
|
@ -1,2 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,439 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from more_itertools import unique_everseen
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from downloader.universal import main as FileDownloader
|
||||
from six.moves import range
|
||||
|
||||
|
||||
"""Bato serves the chapters in 2 ways :
|
||||
1.) All the images on 1 page by default. However, only some of the series/chapters have this thing.
|
||||
2.) Traditional 1 page 1 image thing.
|
||||
|
||||
We can check which kind of page this is by checking the "NEXT ARROW" kind of thing to move to the next page.
|
||||
|
||||
batoto_login function open a fresh instance of selenium webdriver and logs the user in by sending user name and password to batoto login page.
|
||||
The session is maintained and that instance of selenium webdriver is used to browse the pages to maintain the session and see the pages without
|
||||
any restriction.
|
||||
|
||||
The script should show error if the user is trying the access the page not visible to logged out users and quit. Some instances of pages to replicate these validations :
|
||||
|
||||
1.) Page not available to logged out users : http://bato.to/reader#1f018238b7e945ed
|
||||
2.) Single Page with all images : http://bato.to/reader#cb22bfed948294cb
|
||||
3.) Traditional Manga Page : http://bato.to/reader#e5fc75f0ca34bcd5
|
||||
|
||||
There are small portions in the code block to explain certain scenarios, so devs. please go through them if you're thinking of changing something.
|
||||
|
||||
The directory contains the name of the Scanlation group as well, because the script currently downloads jus the english chapters, but in future it
|
||||
will download all the languages available. So, this one as a reminded (lol) and for consistency. Oh, let's not forget the group's hardwork as well.
|
||||
|
||||
Currently there is no way/hack to view all the images in one page manually or to bypass the not logged in user restriction.
|
||||
This script pretty much does everything fine. However, should you encounter a bug/problem, please mention in the github issue.
|
||||
All bug fixes and patches are welcomed.
|
||||
"""
|
||||
|
||||
|
||||
def create_driver():
|
||||
|
||||
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
|
||||
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) ' \
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
|
||||
'Chrome/39.0.2171.95 Safari/537.36'
|
||||
driver = webdriver.PhantomJS(
|
||||
desired_capabilities=desired_capabilities,
|
||||
service_args=['--load-images=no'])
|
||||
return driver
|
||||
|
||||
|
||||
def single_chapter(driver, url, current_directory, User_Name, User_Password):
|
||||
"""This little block checks whether the user has provided the arguments for password or username.
|
||||
If the user has provided something, then check that both UserName and Password has been provided.
|
||||
Filling either of them won't work. If the user has provided both, the username and password, send
|
||||
that info to batoto_login function which will create a logged in session and return us that instance
|
||||
of the selenium webdriver.
|
||||
"""
|
||||
|
||||
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
|
||||
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
|
||||
print("Username or Password cannot be empty.")
|
||||
sys.exit()
|
||||
print("Authenticating Your Username and Password ...")
|
||||
|
||||
batoto_login(driver, User_Name, User_Password)
|
||||
print("Logged in successfully")
|
||||
"""Selenium was navigating to the new url, but the old page still had its resources loaded, which made selenium
|
||||
think that the page was already loaded. So, it started taking 'Stale Elements' and threw the same exception.
|
||||
So, refreshing the page seemed to do the job.
|
||||
"""
|
||||
driver.get(url)
|
||||
driver.refresh()
|
||||
|
||||
"""Let's wait till the 'comic wrap' element has been loaded. This element contains the actual
|
||||
image for the comic. This element doesn't load in the beginning, so Selenium could be tricked
|
||||
into the false alarm that the page has been loaded. Half loaded page will seem like fully loaded
|
||||
page and selenium will start executing the search operation, which will cause the script to break
|
||||
in case everything 'Comic Image' has been loaded.
|
||||
"""
|
||||
try:
|
||||
element = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.ID, "comic_wrap"))
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
page_title = str(driver.title)
|
||||
|
||||
"""Batoto doesn't provide shit in the source code of the web page. Hence, we'll be using the outer HTML
|
||||
to scrap all the info we need.
|
||||
"""
|
||||
elem = driver.find_element_by_xpath("//*")
|
||||
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
|
||||
|
||||
"""As mentioned above, batoto won't let the user watch/read the older entries/chapters if you're not logged in.
|
||||
So, if any user tries to download any such page, let's show the user the error and close the instance of selenium
|
||||
webdriver, and quit the script entirely without wasting anymore resources.
|
||||
"""
|
||||
try:
|
||||
access_check = driver.find_element_by_xpath(
|
||||
'//*[@id="reader"]/div/span').text
|
||||
|
||||
if access_check in [
|
||||
"ERROR [10030]: The thing you're looking for is unavailable. It may be due to:"]:
|
||||
print("You cannot access this page. You'll need to log in to download this page.")
|
||||
driver.quit()
|
||||
sys.exit()
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Getting the Series Name from the <title></title> tags of the web
|
||||
# page.
|
||||
Series_Name = str(
|
||||
re.search(
|
||||
'^(.*)\ \-',
|
||||
page_title).group(1)).strip().replace(
|
||||
'_',
|
||||
' ').title()
|
||||
except Exception as e:
|
||||
Series_Name = "Unkown Series"
|
||||
|
||||
try:
|
||||
# Getting the Series Name from the <title></title> tags of the web
|
||||
# page.
|
||||
volume_number = int(
|
||||
str(re.search('vol (\d+)', page_title).group(1)).strip())
|
||||
except Exception as e:
|
||||
volume_number = '0'
|
||||
|
||||
try:
|
||||
# Getting the Series Name from the <title></title> tags of the web
|
||||
# page.
|
||||
chapter_number = int(
|
||||
str(re.search('ch (\d+)', page_title).group(1)).strip())
|
||||
except Exception as e:
|
||||
chapter_number = '0'
|
||||
|
||||
try:
|
||||
# Used to find translation group's name from the 'Drop Down Menu'.
|
||||
Group_Name_Finder = str(driver.find_element_by_xpath(
|
||||
'//*[@id="reader"]/div[1]/ul/li[3]/select').text).replace("/", " ").strip()
|
||||
|
||||
except Exception as e:
|
||||
# Some entries on batoto don't have a name. So, if we get to any such
|
||||
# occassion, let's be prepared.
|
||||
Group_Name_Finder = str('No Group')
|
||||
|
||||
try:
|
||||
# This is a check which tells us if this particular web page is a
|
||||
# traditional way or one page all image thing.
|
||||
page_list = driver.find_element_by_id('page_select')
|
||||
|
||||
except Exception as e:
|
||||
|
||||
# If we cannot find the 'page_select' element, it means that this
|
||||
# chapter is showing all the images in one page.
|
||||
page_list = False
|
||||
|
||||
if volume_number == 0:
|
||||
# Some series don't seem to have volumes mentioned. Let's assume
|
||||
# they're 0.
|
||||
Raw_File_Directory = str(Series_Name) + '/' + "Chapter " + \
|
||||
str(chapter_number) + " [" + str(Group_Name_Finder) + " ]"
|
||||
else:
|
||||
Raw_File_Directory = str(Series_Name) + '/' + "Volume " + str(volume_number) + \
|
||||
'/' + "Chapter " + str(chapter_number) + " [" + str(Group_Name_Finder) + " ]"
|
||||
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub(
|
||||
'[^A-Za-z0-9\-\.\'\#\/ \[\]]+',
|
||||
'',
|
||||
Raw_File_Directory)
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
if page_list: # If batoto is serving 1 image per page, we'll be using this part.
|
||||
"""We will be grabbing all the values in the drop down menu that has page numbers and take the very last value
|
||||
and extract the integer from it and use it to know what is the last page number for this chapter.
|
||||
Batoto follow a very simple linking syntax for serving the images, so let's exploit that to get the images
|
||||
without hitting batoto for each and every page of the chapter.
|
||||
URL Syntax : http://img.bato.to/comics/2016/11/02/s/read58196cffb13dd/img000001.jpg
|
||||
Look at the last number for the image. Manipulate that and we have what we need.
|
||||
"""
|
||||
items_list = page_list.find_elements_by_tag_name("option")
|
||||
|
||||
for item in items_list:
|
||||
list_of_pages = item.text
|
||||
|
||||
lst_pag = str(list_of_pages)
|
||||
|
||||
Last_Page_number = int(
|
||||
str(re.search('(\d+)', lst_pag).group(1)).strip())
|
||||
|
||||
img_link = driver.find_element_by_id('comic_page').get_attribute('src')
|
||||
|
||||
for i in range(1, Last_Page_number + 1):
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
if len(str(i)) == 1:
|
||||
|
||||
ddl_image = str(img_link).replace(
|
||||
'img000001', 'img00000%s') % (i)
|
||||
|
||||
else:
|
||||
|
||||
ddl_image = str(img_link).replace(
|
||||
'img000001', 'img0000%s') % (i)
|
||||
|
||||
File_Name_Final = str(
|
||||
i).strip() + "." + str(re.search('\d\.(.*?)$', ddl_image).group(1)).strip()
|
||||
FileDownloader(File_Name_Final, Directory_path, ddl_image)
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ", Series_Name, ' - ', chapter_number)
|
||||
# driver.close()
|
||||
|
||||
# If Batoto is serving all the images in one page, we'll follow this block.
|
||||
if not page_list:
|
||||
"""Since all the image links are in one place, we don't have to rack our brains. Grab all the links
|
||||
to the images and download them one by one.
|
||||
"""
|
||||
|
||||
soup = BeautifulSoup(Page_Source, "html.parser")
|
||||
Image_Links = soup.findAll('div', {'style': 'text-align:center;'})
|
||||
|
||||
for link in Image_Links:
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
x = link.findAll('img')
|
||||
for a in x:
|
||||
ddl_image = a['src']
|
||||
|
||||
File_Name_Final = str(
|
||||
re.search(
|
||||
'img0000(\d+)\.([jpg]|[png])',
|
||||
ddl_image).group(1)).strip() + "." + str(
|
||||
re.search(
|
||||
'\d\.(.*?)$',
|
||||
ddl_image).group(1)).strip()
|
||||
FileDownloader(File_Name_Final, Directory_path, ddl_image)
|
||||
|
||||
print('\n')
|
||||
print("Completed Downloading ", Series_Name, ' - ', chapter_number)
|
||||
|
||||
|
||||
def whole_series(driver, url, current_directory, User_Name, User_Password):
|
||||
# print "Whole Series : ",url
|
||||
"""This little block checks whether the user has provided the arguments for password or username.
|
||||
If the user has provided something, then check that both UserName and Password has been provided.
|
||||
Filling either of them won't work. If the user has provided both, the username and password, send
|
||||
that info to batoto_login function which will create a logged in session and return us that instance
|
||||
of the selenium webdriver.
|
||||
"""
|
||||
|
||||
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
|
||||
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
|
||||
print("Username or Password cannot be empty.")
|
||||
sys.exit()
|
||||
print("Authenticating Your Username and Password ...")
|
||||
|
||||
batoto_login(driver, User_Name, User_Password)
|
||||
print("Logged in successfully")
|
||||
|
||||
driver.get(url)
|
||||
"""Let's wait till the 'content' element has been loaded. This element contains the list of all the
|
||||
chapters related to a particular manga. This element doesn't load in the beginning, so Selenium could
|
||||
be tricked into the false alarm that the page has been loaded. Half loaded page will seem like fully
|
||||
loaded page and selenium will start executing the search operation, which will cause the script to
|
||||
break in case everything 'Comic Image' has been loaded.
|
||||
"""
|
||||
try:
|
||||
element = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.ID, "content"))
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
elem = driver.find_element_by_xpath("//*")
|
||||
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
|
||||
"""Basic idea is to grab all the 'a href' links found in the `row lang_English chapter_row` class
|
||||
and put them inside a lit. Later, for each element of the list, call the 'single_chapter' function to
|
||||
do the rest of the job.
|
||||
"""
|
||||
|
||||
link_list = []
|
||||
|
||||
soup = BeautifulSoup(Page_Source, "html.parser")
|
||||
all_links = soup.findAll(
|
||||
'tr', {'class': 'row lang_English chapter_row'})
|
||||
|
||||
for link in all_links:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
ddl_image = a['href']
|
||||
if "reader" in ddl_image:
|
||||
|
||||
link_list.append(ddl_image)
|
||||
|
||||
print("Total Chapters To Download : ", len(link_list))
|
||||
|
||||
for item in link_list:
|
||||
url = str(item)
|
||||
User_Name = 'N'
|
||||
User_Password = 'N'
|
||||
single_chapter(
|
||||
driver,
|
||||
url,
|
||||
current_directory,
|
||||
User_Name,
|
||||
User_Password)
|
||||
|
||||
else:
|
||||
# If the user hasn't supplied any logging information, we'll do this.
|
||||
driver.get(url)
|
||||
try:
|
||||
element = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.ID, "content"))
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
elem = driver.find_element_by_xpath("//*")
|
||||
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
|
||||
|
||||
link_list = []
|
||||
|
||||
soup = BeautifulSoup(Page_Source, "html.parser")
|
||||
all_links = soup.findAll(
|
||||
'tr', {'class': 'row lang_English chapter_row'})
|
||||
|
||||
for link in all_links:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
ddl_image = a['href']
|
||||
if "reader" in ddl_image:
|
||||
link_list.append(ddl_image)
|
||||
|
||||
print("Total Chapters To Download : ", len(link_list))
|
||||
#print(link_list)
|
||||
|
||||
for x in link_list:
|
||||
url = str(x)
|
||||
User_Name = 'N'
|
||||
User_Password = 'N'
|
||||
single_chapter(
|
||||
driver,
|
||||
url,
|
||||
current_directory,
|
||||
User_Name,
|
||||
User_Password)
|
||||
|
||||
|
||||
def batoto_login(driver, User_Name, User_Password):
|
||||
|
||||
driver.get(
|
||||
"https://bato.to/forums/index.php?app=core&module=global§ion=login")
|
||||
try:
|
||||
element = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.ID, "ips_password"))
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# driver.save_screenshot('Single_exception.png')
|
||||
pass
|
||||
LoggedOut_Title = driver.title
|
||||
driver.find_element_by_id('ips_username').send_keys(User_Name)
|
||||
driver.find_element_by_id('ips_password').send_keys(User_Password)
|
||||
|
||||
driver.find_element_by_xpath('//*[@id="login"]/fieldset[2]/input').click()
|
||||
LoggedIn_Title = driver.title
|
||||
|
||||
"""A little check to see whether we've logged in or not. Comparing the titles of the before and after logging
|
||||
pages.
|
||||
"""
|
||||
|
||||
if str(LoggedIn_Title).strip() == str(LoggedOut_Title).strip():
|
||||
print("Couldn't log you in. Please check your credentials.")
|
||||
driver.quit()
|
||||
sys.exit()
|
||||
|
||||
|
||||
def batoto_Url_Check(input_url, current_directory, User_Name, User_Password):
|
||||
|
||||
batoto_single_regex = re.compile(
|
||||
'https?://(?P<host>bato.to)/reader\#(?P<extra_characters>[\d\w-]+)?(\/|.)')
|
||||
batoto_whole_regex = re.compile(
|
||||
'^https?://(?P<host>bato.to)/comic/\_/comics/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
#print "Inside"
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(batoto_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['extra_characters']:
|
||||
url = str(input_url)
|
||||
driver = create_driver()
|
||||
single_chapter(
|
||||
driver,
|
||||
url,
|
||||
current_directory,
|
||||
User_Name,
|
||||
User_Password)
|
||||
driver.quit()
|
||||
else:
|
||||
pass
|
||||
|
||||
found = re.search(batoto_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
|
||||
driver = create_driver()
|
||||
whole_series(
|
||||
driver,
|
||||
url,
|
||||
current_directory,
|
||||
User_Name,
|
||||
User_Password)
|
||||
driver.quit()
|
||||
else:
|
||||
pass
|
@ -1,114 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
from downloader.cookies_required import with_referer as FileDownloader
|
||||
from six.moves import range
|
||||
from six.moves import input
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
s = requests.Session()
|
||||
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
|
||||
req = s.get(url,headers=headers)
|
||||
cookies = req.cookies
|
||||
page_source_1 = str(req.text.encode('utf-8'))
|
||||
|
||||
try:
|
||||
#Korean_Name = re.search(r'<h2>(.*?)<span class="wrt_nm">',str(page_source)).group(1)
|
||||
Series_Name = re.search(r'titleId=(\d+)',url).group(1)
|
||||
except Exception as e:
|
||||
Series_Name = "Unknown"
|
||||
|
||||
try:
|
||||
#chapter_number = int(re.search(r'\<span\ class\=\"total\"\>(.\d+)\<\/span\>',page_source_1).group(1))
|
||||
chapter_number = re.search(r'&no=(\d+)',url).group(1)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
chapter_number = 0
|
||||
|
||||
img_regex = r'http://imgcomic.naver.net/webtoon/\d+/\d+/.+?\.(?:jpg|png|gif|bmp|JPG|PNG|GIF|BMP)'
|
||||
|
||||
img_links = list(re.findall(img_regex,page_source_1))
|
||||
|
||||
Raw_File_Directory = str(Series_Name) +'/'+"Chapter "+str(chapter_number)
|
||||
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for x,items in enumerate(img_links):
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
FileDownloader(str(x+1)+str(items[-4:]),Directory_path,cookies,items,url)
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name)
|
||||
|
||||
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
|
||||
|
||||
|
||||
s = requests.Session()
|
||||
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
|
||||
req = s.get(url,headers=headers)
|
||||
cookies = req.cookies
|
||||
page_source_1 = req.text.encode('utf-8')
|
||||
|
||||
titleId = re.search(r'titleId=(\d+)',url).group(1)
|
||||
|
||||
try:
|
||||
first_link = int(re.search(r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %(titleId),page_source_1).group(1))
|
||||
except Exception as e:
|
||||
first_link = eval(input("Please Enter the Last chapter of the series : "))
|
||||
if not first_link:
|
||||
print("You failed to enter the last chapter count. Script will exit now.")
|
||||
sys.exit()
|
||||
|
||||
for x in range(1,int(first_link)):
|
||||
Chapter_Url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" %(titleId,x)
|
||||
single_chapter(Chapter_Url,current_directory)
|
||||
|
||||
|
||||
|
||||
def comic_naver_Url_Check(input_url, current_directory):
|
||||
|
||||
comic_naver_single_regex = re.compile(
|
||||
'https?://(?P<host>comic.naver.com)/webtoon/(?P<detail>detail.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)')
|
||||
comic_naver_whole_regex = re.compile(
|
||||
'https?://(?P<host>comic.naver.com)/webtoon/(?P<list>list.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(comic_naver_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['detail']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
found = re.search(comic_naver_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['list']:
|
||||
url = str(input_url)
|
||||
whole_series(url, current_directory)
|
||||
else:
|
||||
pass
|
@ -1,128 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from more_itertools import unique_everseen
|
||||
from bs4 import BeautifulSoup
|
||||
from downloader.cookies_required import main as FileDownloader
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
sys.exit(0)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
|
||||
try:
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
|
||||
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
|
||||
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
|
||||
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
|
||||
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for i in ddl_list:
|
||||
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
|
||||
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
|
||||
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name)
|
||||
|
||||
def whole_series(url,current_directory):
|
||||
if not url:
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
|
||||
chapter_text = soup.findAll('div',{'class':'title'})
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url,current_directory)
|
||||
|
||||
def gomanga_Url_Check(input_url,current_directory):
|
||||
|
||||
gomanga_single_regex = re.compile('https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
gomanga_whole_regex = re.compile('^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(gomanga_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
found = re.search(gomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,145 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
from downloader.universal import main as FileDownloader
|
||||
import cfscrape
|
||||
|
||||
|
||||
def single_chapter(url, current_directory):
|
||||
|
||||
scraper = cfscrape.create_scraper()
|
||||
|
||||
Page_Source = scraper.get(str(url)).content
|
||||
|
||||
formatted = BeautifulSoup(Page_Source, "lxml")
|
||||
|
||||
meta = formatted.findAll('title')
|
||||
|
||||
meta_data = list(str(meta).split('\n'))
|
||||
|
||||
|
||||
try:
|
||||
Series_Name = str(meta_data[2])
|
||||
except Exception as e:
|
||||
print (e)
|
||||
Series_Name = "Unkown Series"
|
||||
|
||||
try:
|
||||
# Getting the Volume Number from the page source.
|
||||
volume_number = int(
|
||||
str(re.search('Vol\.(.*)\ Ch', Page_Source).group(1)).strip())
|
||||
except Exception as e:
|
||||
volume_number = '0'
|
||||
|
||||
try:
|
||||
chapter_number = int(str(meta_data[3]))
|
||||
|
||||
except Exception as e:
|
||||
try:
|
||||
# Getting the Volume Number from the page source.
|
||||
chapter_number = int(
|
||||
str(re.search('Ch\.(.*)\:', Page_Source).group(1)).strip())
|
||||
except Exception as e:
|
||||
chapter_number = '0'
|
||||
|
||||
all_links = re.findall('lstImages.push\(\"(.*)\"\)\;', str(formatted))
|
||||
|
||||
if volume_number == '0':
|
||||
# Some series don't seem to have volumes mentioned. Let's assume
|
||||
# they're 0.
|
||||
Raw_File_Directory = str(Series_Name) + '/' + \
|
||||
"Chapter " + str(chapter_number)
|
||||
else:
|
||||
Raw_File_Directory = str(Series_Name) + '/' + "Volume " + \
|
||||
str(volume_number) + '/' + "Chapter " + str(chapter_number)
|
||||
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub(
|
||||
'[^A-Za-z0-9\-\.\'\#\/ \[\]]+',
|
||||
'',
|
||||
Raw_File_Directory)
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
print ('\n')
|
||||
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for elements in all_links:
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = str(elements).strip()
|
||||
|
||||
try:
|
||||
File_Name_Final = str(re.search(
|
||||
's0/(.*)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:])
|
||||
except Exception as e:
|
||||
File_Name_Final = str(re.search(
|
||||
'title\=(.*)\_(\d+)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:])
|
||||
FileDownloader(File_Name_Final, Directory_path, ddl_image)
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ", Series_Name, ' - ', chapter_number)
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
|
||||
scraper = cfscrape.create_scraper()
|
||||
|
||||
Page_Source = scraper.get(str(url)).content
|
||||
|
||||
link_list = []
|
||||
|
||||
soup = BeautifulSoup(Page_Source, "html.parser")
|
||||
all_links = soup.findAll('table', {'class': 'listing'})
|
||||
|
||||
for link in all_links:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
|
||||
ddl_image = a['href']
|
||||
if "Manga" in ddl_image:
|
||||
final_url = "http://kissmanga.com" + ddl_image
|
||||
link_list.append(final_url)
|
||||
|
||||
if int(len(link_list)) == '0':
|
||||
print("Sorry, I couldn't bypass KissManga's Hooman check. Please try again in a few minutes.")
|
||||
sys.exit()
|
||||
|
||||
print("Total Chapters To Download : ", len(link_list))
|
||||
|
||||
for item in link_list:
|
||||
url = str(item)
|
||||
single_chapter(url, current_directory)
|
||||
|
||||
|
||||
def kissmanga_Url_Check(input_url, current_directory):
|
||||
|
||||
kissmanga_single_regex = re.compile(
|
||||
'https?://(?P<host>kissmanga.com)/Manga/(?P<Series_Name>[\d\w-]+)?/((?P<Volume>[Vol\-\d]+)|(.*)(?P<Chapter>[Ch\-\d]+))\-(?P<Chap_Name>[\d\w-]+)\?(?P<id>[\=\d\w-]+)')
|
||||
kissmanga_whole_regex = re.compile(
|
||||
'^https?://(?P<host>kissmanga.com)/Manga/(?P<comic>[\d\w\-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(kissmanga_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['Chap_Name']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
found = re.search(kissmanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url, current_directory)
|
||||
else:
|
||||
pass
|
@ -1,180 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import requests
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import shutil
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from downloader.universal import main as FileDownloader
|
||||
from six.moves import range
|
||||
|
||||
|
||||
def create_driver():
|
||||
|
||||
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
|
||||
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) ' \
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
|
||||
'Chrome/39.0.2171.95 Safari/537.36'
|
||||
driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities,service_args=['--load-images=no'])
|
||||
return driver
|
||||
|
||||
def single_chapter(driver,url,current_directory):
|
||||
|
||||
try:
|
||||
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
try:
|
||||
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
volume_number = "Volume 01"
|
||||
|
||||
try:
|
||||
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
|
||||
|
||||
if volume_number == '0':
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) # Some series don't seem to have volumes mentioned. Let's assume they're 0.
|
||||
else:
|
||||
Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number)
|
||||
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
driver.get(url)
|
||||
|
||||
try:
|
||||
element = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.ID, "image"))
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
driver.save_screenshot("error.png")
|
||||
print("Couldn't load the element. I'll try to move ahead in any case.")
|
||||
print('\n')
|
||||
print("I took a screenshot, please attach it in the issue you open in the repository.")
|
||||
pass
|
||||
|
||||
elem = driver.find_element_by_xpath("//*")
|
||||
Page_Source = str(elem.get_attribute("outerHTML").encode('utf-8'))
|
||||
|
||||
First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden.
|
||||
|
||||
current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
|
||||
for x in range(current_chapter_count,last_chapter_count+1):
|
||||
|
||||
driver.refresh()
|
||||
File_Name_Final = str(x)+'.jpg'
|
||||
link_container = driver.find_element_by_xpath('//*[@id="image"]')
|
||||
ddl_image = str(link_container.get_attribute('src'))
|
||||
FileDownloader(File_Name_Final,Directory_path,ddl_image)
|
||||
driver.find_element_by_xpath('//*[@id="top_bar"]/div/a[2]').click()
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name,' - ',chapter_number)
|
||||
|
||||
|
||||
def whole_series(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
|
||||
try:
|
||||
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
print('Check if the URL is correct or not. Report on Github.')
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
try:
|
||||
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v"
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
|
||||
|
||||
if len(links) == 0:
|
||||
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c"
|
||||
#print chapter_link_format
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print("Error : ",e,'\n',"Please report this error on Github repository.")
|
||||
|
||||
driver = create_driver()
|
||||
|
||||
for x in links:
|
||||
chapter_link = str(str(chapter_link_format)+str(x)+"html").strip()
|
||||
|
||||
try:
|
||||
single_chapter(driver,chapter_link,current_directory)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
driver.quit()
|
||||
driver.quit()
|
||||
|
||||
def mangafox_Url_Check(input_url,current_directory):
|
||||
|
||||
mangafox_single_regex = re.compile('https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
|
||||
mangafox_whole_regex = re.compile('^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(mangafox_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['issue']:
|
||||
url = str(input_url)
|
||||
driver = create_driver()
|
||||
try:
|
||||
single_chapter(driver,url,current_directory)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
driver.quit()
|
||||
driver.quit()
|
||||
sys.exit()
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
found = re.search(mangafox_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic_series']:
|
||||
url = str(input_url)
|
||||
#driver = create_driver()
|
||||
try:
|
||||
whole_series(url,current_directory)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
sys.exit()
|
||||
else:
|
||||
pass
|
@ -1,121 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from more_itertools import unique_everseen
|
||||
from bs4 import BeautifulSoup
|
||||
from downloader.cookies_required import main as FileDownloader
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
sys.exit(0)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
try:
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for i in ddl_list:
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
|
||||
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
|
||||
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name)
|
||||
|
||||
def whole_series(url,current_directory):
|
||||
if not url:
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
}
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
|
||||
chapter_text = soup.findAll('div',{'class':'title'})
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url,current_directory)
|
||||
|
||||
def yomanga_Url_Check(input_url,current_directory):
|
||||
|
||||
yomanga_single_regex = re.compile('https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
yomanga_whole_regex = re.compile('^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(yomanga_single_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
found = re.search(yomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,7 +0,0 @@
|
||||
'''
|
||||
|
||||
Date Format : YY/MM/DD
|
||||
|
||||
'''
|
||||
|
||||
__version__ = '2016.11.26'
|
@ -1,73 +1,52 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from honcho import url_checker
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from honcho import url_checker
|
||||
from version import __version__
|
||||
|
||||
|
||||
def version():
|
||||
print '\n'
|
||||
print '{:^80}'.format('Current Version : %s') % (__version__)
|
||||
print '\n'
|
||||
print '{:^80}'.format("More info : comic-dl -h")
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('Current Version : %s')%(__version__))
|
||||
print('\n')
|
||||
print('{:^80}'.format("More info : comic-dl -h"))
|
||||
|
||||
def usage():
|
||||
print '\n'
|
||||
print '{:^80}'.format('################################################')
|
||||
print '{:^80}'.format('Comic-DL Usage')
|
||||
print '{:^80}'.format('################################################\n')
|
||||
print '\n'
|
||||
print '{:^80}'.format('Author : Xonshiz | Version : %s') % (__version__)
|
||||
print '{:^80}'.format('-------------------------------------------------\n')
|
||||
print "Comic-dl is a command line tool to download manga and comics from various comic and manga sites."
|
||||
print "Using the script is pretty simple and should be easy for anyone familiar with a command line/shell."
|
||||
print '\n'
|
||||
print '{:^80}'.format("USAGE : comic-dl -i <URL to comic>")
|
||||
print '\n'
|
||||
print "Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ", '\n'
|
||||
print "Available Arguments : "
|
||||
print '{:^80}'.format("-i,--input : Specifies the Input URL")
|
||||
print '{:^80}'.format("-h : Prints this help menu")
|
||||
print '{:^80}'.format("--version : Prints the current version and exits")
|
||||
print '{:^80}'.format("-a,--about : Shows the info about this script and exits.")
|
||||
print '{:^80}'.format("-u,--username : Indicates username for a website.")
|
||||
print '{:^80}'.format("-p,--password : Indicates password for a website.")
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('################################################'))
|
||||
print('{:^80}'.format('Comic-DL Usage'))
|
||||
print('{:^80}'.format('################################################\n'))
|
||||
print('\n')
|
||||
print('{:^80}'.format('Author : Xonshiz | Version : %s')%(__version__))
|
||||
print('{:^80}'.format('-------------------------------------------------\n'))
|
||||
print("Comic-dl is a command line tool to download manga and comics from various comic and manga sites.")
|
||||
print("Using the script is pretty simple and should be easy for anyone familiar with a command line/shell.")
|
||||
print('\n')
|
||||
print('{:^80}'.format("USAGE : comic-dl -i <URL to comic>"))
|
||||
print('\n')
|
||||
print("Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ",'\n')
|
||||
print("Available Arguments : ")
|
||||
print('{:^80}'.format("-i,--input : Specifies the Input URL"))
|
||||
print('{:^80}'.format("-h : Prints this help menu"))
|
||||
print('{:^80}'.format("--version : Prints the current version and exits"))
|
||||
print('{:^80}'.format("-a,--about : Shows the info about this script and exits."))
|
||||
print('{:^80}'.format("-u,--username : Indicates username for a website."))
|
||||
print('{:^80}'.format("-p,--password : Indicates password for a website."))
|
||||
|
||||
|
||||
def main(argv):
|
||||
current_directory = str(os.getcwd())
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
|
||||
parser.add_argument(
|
||||
'--version',
|
||||
action='store_true',
|
||||
help='Shows version and exits')
|
||||
parser.add_argument(
|
||||
'-a',
|
||||
'--about',
|
||||
action='store_true',
|
||||
help='Shows the info regarding this script')
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
'--input',
|
||||
nargs=1,
|
||||
help='Inputs the URL to comic')
|
||||
parser.add_argument(
|
||||
'-p',
|
||||
'--password',
|
||||
nargs=1,
|
||||
help='Indicates password for a website',
|
||||
default='None')
|
||||
parser.add_argument(
|
||||
'-u',
|
||||
'--username',
|
||||
nargs=1,
|
||||
help='Indicates username for a website',
|
||||
default='None')
|
||||
parser = argparse.ArgumentParser(description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.')
|
||||
parser.add_argument('--version',action='store_true',help='Shows version and exits' )
|
||||
parser.add_argument('-a','--about',action='store_true',help='Shows the info regarding this script' )
|
||||
parser.add_argument('-i','--input',nargs=1,help='Inputs the URL to comic')
|
||||
parser.add_argument('-p','--password',nargs=1,help='Indicates password for a website',default='None')
|
||||
parser.add_argument('-u','--username',nargs=1,help='Indicates username for a website',default='None')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -82,8 +61,8 @@ def main(argv):
|
||||
input_url = str(args.input[0]).strip()
|
||||
User_Password = str(args.password[0].strip())
|
||||
User_Name = str(args.username[0].strip())
|
||||
url_checker(input_url, current_directory, User_Name, User_Password)
|
||||
url_checker(input_url,current_directory,User_Name,User_Password)
|
||||
sys.exit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
main(sys.argv[1:])
|
||||
|
@ -11,70 +11,62 @@ ddl_image is the direct link to the image itself.
|
||||
This module uses `requests` library to achieve the handling of cookies.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
from downloader.universal import main as FileDownloader
|
||||
|
||||
|
||||
def main(File_Name_Final, Directory_path, tasty_cookies, ddl_image):
|
||||
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
|
||||
def main(File_Name_Final,Directory_path,tasty_cookies,ddl_image):
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print '[Comic-dl] File Exist! Skipping ', File_Name_Final, '\n'
|
||||
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print '[Comic-dl] Downloading : ', File_Name_Final
|
||||
|
||||
response = requests.get(ddl_image, stream=True, cookies=tasty_cookies)
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] Downloading : ',File_Name_Final)
|
||||
|
||||
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
print "Couldn't download file from : ", ddl_image
|
||||
print("Couldn't download file from : ",ddl_image)
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path, Directory_path)
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception as e:
|
||||
print e, '\n'
|
||||
print(e,'\n')
|
||||
pass
|
||||
|
||||
|
||||
def with_referer(
|
||||
File_Name_Final,
|
||||
Directory_path,
|
||||
tasty_cookies,
|
||||
ddl_image,
|
||||
referer):
|
||||
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
|
||||
def with_referer(File_Name_Final,Directory_path,tasty_cookies,ddl_image,referer):
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print '[Comic-dl] File Exist! Skipping ', File_Name_Final, '\n'
|
||||
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print '[Comic-dl] Downloading : ', File_Name_Final
|
||||
headers = {'Referer': referer}
|
||||
response = requests.get(
|
||||
ddl_image,
|
||||
stream=True,
|
||||
cookies=tasty_cookies,
|
||||
headers=headers)
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] Downloading : ',File_Name_Final)
|
||||
headers = {'Referer': referer}
|
||||
response = requests.get(ddl_image, stream=True,cookies=tasty_cookies,headers=headers)
|
||||
try:
|
||||
with open(File_Name_Final, 'wb') as out_file:
|
||||
shutil.copyfileobj(response.raw, out_file)
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
except Exception as e:
|
||||
print "Couldn't download file from : ", ddl_image
|
||||
print("Couldn't download file from : ",ddl_image)
|
||||
pass
|
||||
try:
|
||||
shutil.move(File_Path, Directory_path)
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception as e:
|
||||
print e, '\n'
|
||||
print(e,'\n')
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
@ -9,32 +9,38 @@ Directory_path which is the directory path where you want to download the file,
|
||||
ddl_image is the direct link to the image itself.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import urllib2
|
||||
import urllib
|
||||
import shutil
|
||||
from urllib2 import URLError
|
||||
import urllib
|
||||
#from urllib import URLError
|
||||
import sys
|
||||
|
||||
|
||||
def main(File_Name_Final, Directory_path, ddl_image):
|
||||
File_Check_Path = str(Directory_path) + '/' + str(File_Name_Final)
|
||||
|
||||
def main(File_Name_Final,Directory_path,ddl_image):
|
||||
File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final)
|
||||
|
||||
if os.path.isfile(File_Check_Path):
|
||||
print '[Comic-dl] File Exist! Skipping ', File_Name_Final, '\n'
|
||||
print('[Comic-dl] File Exist! Skipping ',File_Name_Final,'\n')
|
||||
pass
|
||||
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print '[Comic-dl] Downloading : ', File_Name_Final
|
||||
urllib.urlretrieve(ddl_image, File_Name_Final)
|
||||
if not os.path.isfile(File_Check_Path):
|
||||
print('[Comic-dl] Downloading : ',File_Name_Final)
|
||||
urllib.request.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
|
||||
urllib.request.urlretrieve(ddl_image, File_Name_Final)
|
||||
#filename, headers = urllib.urlretrieve(ddl_image,File_Name_Final)
|
||||
#print "File Name : ",filename
|
||||
#print "Headers : ",headers
|
||||
File_Path = os.path.normpath(File_Name_Final)
|
||||
try:
|
||||
shutil.move(File_Path, Directory_path)
|
||||
shutil.move(File_Path,Directory_path)
|
||||
except Exception as e:
|
||||
print e, '\n'
|
||||
print(e,'\n')
|
||||
os.remove(File_Path)
|
||||
pass
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
@ -5,44 +5,49 @@
|
||||
"""This python module decides which URL should be assigned to which other module from the site package.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
|
||||
from future import standard_library
|
||||
standard_library.install_aliases()
|
||||
|
||||
#import urllib as urllib2
|
||||
from sites.yomanga import yomanga_Url_Check
|
||||
from sites.gomanga import gomanga_Url_Check
|
||||
from sites.mangafox import mangafox_Url_Check
|
||||
from sites.batoto import batoto_Url_Check
|
||||
from sites.kissmanga import kissmanga_Url_Check
|
||||
from sites.comic_naver import comic_naver_Url_Check
|
||||
from downloader import universal, cookies_required
|
||||
import urllib2
|
||||
from downloader import universal,cookies_required
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
|
||||
|
||||
def url_checker(input_url, current_directory, User_Name, User_Password):
|
||||
|
||||
domain = urllib2.urlparse.urlparse(input_url).netloc
|
||||
|
||||
domain = urlparse(input_url).netloc
|
||||
|
||||
if domain in ['mangafox.me']:
|
||||
mangafox_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
|
||||
elif domain in ['yomanga.co']:
|
||||
yomanga_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
|
||||
elif domain in ['gomanga.co']:
|
||||
gomanga_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
|
||||
elif domain in ['bato.to']:
|
||||
batoto_Url_Check(
|
||||
input_url,
|
||||
current_directory,
|
||||
User_Name,
|
||||
User_Password)
|
||||
pass
|
||||
batoto_Url_Check(input_url, current_directory, User_Name, User_Password)
|
||||
|
||||
elif domain in ['kissmanga.com']:
|
||||
kissmanga_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
|
||||
elif domain in ['comic.naver.com']:
|
||||
comic_naver_Url_Check(input_url, current_directory)
|
||||
pass
|
||||
|
||||
elif domain in ['']:
|
||||
print 'You need to specify at least 1 URL. Please run : comic-dl -h'
|
||||
print('You need to specify at least 1 URL. Please run : comic-dl -h')
|
||||
else:
|
||||
print "%s is unsupported at the moment. Please request on Github repository." % (domain)
|
||||
print("%s is unsupported at the moment. Please request on Github repository."%(domain))
|
||||
|
@ -1,2 +1,2 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
@ -12,6 +14,7 @@ from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from downloader.universal import main as FileDownloader
|
||||
from six.moves import range
|
||||
|
||||
|
||||
"""Bato serves the chapters in 2 ways :
|
||||
@ -63,12 +66,12 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
|
||||
|
||||
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
|
||||
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
|
||||
print "Username or Password cannot be empty."
|
||||
print("Username or Password cannot be empty.")
|
||||
sys.exit()
|
||||
print "Authenticating Your Username and Password ..."
|
||||
print("Authenticating Your Username and Password ...")
|
||||
|
||||
batoto_login(driver, User_Name, User_Password)
|
||||
print "Logged in successfully"
|
||||
print("Logged in successfully")
|
||||
"""Selenium was navigating to the new url, but the old page still had its resources loaded, which made selenium
|
||||
think that the page was already loaded. So, it started taking 'Stale Elements' and threw the same exception.
|
||||
So, refreshing the page seemed to do the job.
|
||||
@ -107,7 +110,7 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
|
||||
|
||||
if access_check in [
|
||||
"ERROR [10030]: The thing you're looking for is unavailable. It may be due to:"]:
|
||||
print "You cannot access this page. You'll need to log in to download this page."
|
||||
print("You cannot access this page. You'll need to log in to download this page.")
|
||||
driver.quit()
|
||||
sys.exit()
|
||||
|
||||
@ -183,9 +186,9 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
if page_list: # If batoto is serving 1 image per page, we'll be using this part.
|
||||
"""We will be grabbing all the values in the drop down menu that has page numbers and take the very last value
|
||||
@ -224,8 +227,8 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
|
||||
i).strip() + "." + str(re.search('\d\.(.*?)$', ddl_image).group(1)).strip()
|
||||
FileDownloader(File_Name_Final, Directory_path, ddl_image)
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name, ' - ', chapter_number
|
||||
print('\n')
|
||||
print("Completed downloading ", Series_Name, ' - ', chapter_number)
|
||||
# driver.close()
|
||||
|
||||
# If Batoto is serving all the images in one page, we'll follow this block.
|
||||
@ -253,8 +256,8 @@ def single_chapter(driver, url, current_directory, User_Name, User_Password):
|
||||
ddl_image).group(1)).strip()
|
||||
FileDownloader(File_Name_Final, Directory_path, ddl_image)
|
||||
|
||||
print '\n'
|
||||
print "Completed Downloading ", Series_Name, ' - ', chapter_number
|
||||
print('\n')
|
||||
print("Completed Downloading ", Series_Name, ' - ', chapter_number)
|
||||
|
||||
|
||||
def whole_series(driver, url, current_directory, User_Name, User_Password):
|
||||
@ -268,12 +271,12 @@ def whole_series(driver, url, current_directory, User_Name, User_Password):
|
||||
|
||||
if str(User_Name) not in ["N"] or str(User_Password) not in ["N"]:
|
||||
if str(User_Name) in ["N"] or str(User_Password) in ["N"]:
|
||||
print "Username or Password cannot be empty."
|
||||
print("Username or Password cannot be empty.")
|
||||
sys.exit()
|
||||
print "Authenticating Your Username and Password ..."
|
||||
print("Authenticating Your Username and Password ...")
|
||||
|
||||
batoto_login(driver, User_Name, User_Password)
|
||||
print "Logged in successfully"
|
||||
print("Logged in successfully")
|
||||
|
||||
driver.get(url)
|
||||
"""Let's wait till the 'content' element has been loaded. This element contains the list of all the
|
||||
@ -310,7 +313,7 @@ def whole_series(driver, url, current_directory, User_Name, User_Password):
|
||||
|
||||
link_list.append(ddl_image)
|
||||
|
||||
print "Total Chapters To Download : ", len(link_list)
|
||||
print("Total Chapters To Download : ", len(link_list))
|
||||
|
||||
for item in link_list:
|
||||
url = str(item)
|
||||
@ -349,8 +352,8 @@ def whole_series(driver, url, current_directory, User_Name, User_Password):
|
||||
if "reader" in ddl_image:
|
||||
link_list.append(ddl_image)
|
||||
|
||||
print "Total Chapters To Download : ", len(link_list)
|
||||
# print link_list
|
||||
print("Total Chapters To Download : ", len(link_list))
|
||||
#print(link_list)
|
||||
|
||||
for x in link_list:
|
||||
url = str(x)
|
||||
@ -388,7 +391,7 @@ def batoto_login(driver, User_Name, User_Password):
|
||||
"""
|
||||
|
||||
if str(LoggedIn_Title).strip() == str(LoggedOut_Title).strip():
|
||||
print "Couldn't log you in. Please check your credentials."
|
||||
print("Couldn't log you in. Please check your credentials.")
|
||||
driver.quit()
|
||||
sys.exit()
|
||||
|
||||
@ -399,7 +402,7 @@ def batoto_Url_Check(input_url, current_directory, User_Name, User_Password):
|
||||
'https?://(?P<host>bato.to)/reader\#(?P<extra_characters>[\d\w-]+)?(\/|.)')
|
||||
batoto_whole_regex = re.compile(
|
||||
'^https?://(?P<host>bato.to)/comic/\_/comics/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
#print "Inside"
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(batoto_single_regex, line)
|
||||
@ -423,6 +426,7 @@ def batoto_Url_Check(input_url, current_directory, User_Name, User_Password):
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
|
||||
driver = create_driver()
|
||||
whole_series(
|
||||
driver,
|
||||
|
@ -2,93 +2,87 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
from downloader.cookies_required import with_referer as FileDownloader
|
||||
from six.moves import range
|
||||
from six.moves import input
|
||||
|
||||
"""Thanks to puilp0502 for his repo : https://github.com/puilp0502/comic-downloader
|
||||
I got stuck at getting the images to download from the links. Then I went through puilp's script to see that I was missing referer in my requests module.
|
||||
"""
|
||||
|
||||
|
||||
def single_chapter(url, current_directory):
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
s = requests.Session()
|
||||
headers = {
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
|
||||
req = s.get(url, headers=headers)
|
||||
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
|
||||
req = s.get(url,headers=headers)
|
||||
cookies = req.cookies
|
||||
page_source_1 = req.text.encode('utf-8')
|
||||
|
||||
page_source_1 = str(req.text.encode('utf-8'))
|
||||
|
||||
try:
|
||||
#Korean_Name = re.search(r'<h2>(.*?)<span class="wrt_nm">',str(page_source)).group(1)
|
||||
Series_Name = re.search(r'titleId=(\d+)', url).group(1)
|
||||
Series_Name = re.search(r'titleId=(\d+)',url).group(1)
|
||||
except Exception as e:
|
||||
Series_Name = "Unknown"
|
||||
|
||||
try:
|
||||
#chapter_number = int(re.search(r'\<span\ class\=\"total\"\>(.\d+)\<\/span\>',page_source_1).group(1))
|
||||
chapter_number = re.search(r'&no=(\d+)', url).group(1)
|
||||
chapter_number = re.search(r'&no=(\d+)',url).group(1)
|
||||
except Exception as e:
|
||||
print e
|
||||
print(e)
|
||||
chapter_number = 0
|
||||
|
||||
|
||||
img_regex = r'http://imgcomic.naver.net/webtoon/\d+/\d+/.+?\.(?:jpg|png|gif|bmp|JPG|PNG|GIF|BMP)'
|
||||
|
||||
img_links = list(re.findall(img_regex, page_source_1))
|
||||
img_links = list(re.findall(img_regex,page_source_1))
|
||||
|
||||
Raw_File_Directory = str(Series_Name) +'/'+"Chapter "+str(chapter_number)
|
||||
|
||||
Raw_File_Directory = str(Series_Name).decode(
|
||||
'utf-8') + '/' + "Chapter " + str(chapter_number)
|
||||
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
print('\n')
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for x, items in enumerate(img_links):
|
||||
for x,items in enumerate(img_links):
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
FileDownloader(
|
||||
str(x + 1) + str(items[-4:]), Directory_path, cookies, items, url)
|
||||
FileDownloader(str(x+1)+str(items[-4:]),Directory_path,cookies,items,url)
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name)
|
||||
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
|
||||
|
||||
|
||||
|
||||
s = requests.Session()
|
||||
headers = {
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
|
||||
req = s.get(url, headers=headers)
|
||||
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
|
||||
req = s.get(url,headers=headers)
|
||||
cookies = req.cookies
|
||||
page_source_1 = req.text.encode('utf-8')
|
||||
|
||||
titleId = re.search(r'titleId=(\d+)', url).group(1)
|
||||
|
||||
|
||||
titleId = re.search(r'titleId=(\d+)',url).group(1)
|
||||
|
||||
try:
|
||||
first_link = int(
|
||||
re.search(
|
||||
r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %
|
||||
(titleId),
|
||||
page_source_1).group(1))
|
||||
first_link = int(re.search(r'\/webtoon\/detail\.nhn\?titleId\=%s\&no\=(\d+)\&weekday\=tue' %(titleId),page_source_1).group(1))
|
||||
except Exception as e:
|
||||
first_link = input("Please Enter the Last chapter of the series : ")
|
||||
first_link = eval(input("Please Enter the Last chapter of the series : "))
|
||||
if not first_link:
|
||||
print "You failed to enter the last chapter count. Script will exit now."
|
||||
print("You failed to enter the last chapter count. Script will exit now.")
|
||||
sys.exit()
|
||||
|
||||
for x in range(1,int(first_link)):
|
||||
Chapter_Url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" %(titleId,x)
|
||||
single_chapter(Chapter_Url,current_directory)
|
||||
|
||||
for x in range(1, int(first_link)):
|
||||
Chapter_Url = "http://comic.naver.com/webtoon/detail.nhn?titleId=%s&no=%s" % (
|
||||
titleId, x)
|
||||
single_chapter(Chapter_Url, current_directory)
|
||||
|
||||
|
||||
def comic_naver_Url_Check(input_url, current_directory):
|
||||
@ -106,7 +100,7 @@ def comic_naver_Url_Check(input_url, current_directory):
|
||||
if match['detail']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
|
@ -1,146 +1,103 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from more_itertools import unique_everseen
|
||||
from more_itertools import unique_everseen
|
||||
from bs4 import BeautifulSoup
|
||||
from downloader.cookies_required import main as FileDownloader
|
||||
|
||||
|
||||
def single_chapter(url, current_directory):
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
# Getting the Series Name from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
Series_Name = str(
|
||||
re.search(
|
||||
'\/read\/(.*?)/',
|
||||
url).group(1)).strip().replace(
|
||||
'_',
|
||||
' ').title()
|
||||
|
||||
try:
|
||||
# Getting the chapter count from the URL itself for naming the
|
||||
# folder/dicrectories in integer.
|
||||
chapter_number = int(str(re.search(
|
||||
'0\/(.*?)/', url).group(1)).strip().replace('0', '').replace('/', ''))
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
|
||||
Raw_File_Directory = str(Series_Name) + '/' + \
|
||||
"Chapter " + str(chapter_number)
|
||||
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
|
||||
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
|
||||
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
|
||||
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for i in ddl_list:
|
||||
|
||||
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://gomanga.co/reader/content/comics" + \
|
||||
str(i).replace('"', '').replace('\\', '')
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
|
||||
|
||||
File_Name_Final = str(
|
||||
re.findall(
|
||||
'\/(\d+)\.[jpg]|[png]',
|
||||
i)).replace(
|
||||
"[",
|
||||
"").replace(
|
||||
"]",
|
||||
"").replace(
|
||||
"'",
|
||||
"").replace(
|
||||
",",
|
||||
"").strip() + "." + str(
|
||||
re.findall(
|
||||
'\d\.(.*?)$',
|
||||
str(i))).replace(
|
||||
",",
|
||||
"").replace(
|
||||
"[",
|
||||
"").replace(
|
||||
"]",
|
||||
"").replace(
|
||||
"'",
|
||||
"").strip()
|
||||
FileDownloader(
|
||||
File_Name_Final,
|
||||
Directory_path,
|
||||
tasty_cookies,
|
||||
ddl_image)
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
|
||||
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name)
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
def whole_series(url,current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
# Getting the Series Name from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
Series_Name = str(
|
||||
re.search(
|
||||
'\/series\/(.*?)/',
|
||||
url).group(1)).strip().replace(
|
||||
'_',
|
||||
' ').title()
|
||||
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
|
||||
chapter_text = soup.findAll('div', {'class': 'title'})
|
||||
|
||||
chapter_text = soup.findAll('div',{'class':'title'})
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url, current_directory)
|
||||
single_chapter(url,current_directory)
|
||||
|
||||
|
||||
def gomanga_Url_Check(input_url, current_directory):
|
||||
|
||||
gomanga_single_regex = re.compile(
|
||||
'https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
gomanga_whole_regex = re.compile(
|
||||
'^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
def gomanga_Url_Check(input_url,current_directory):
|
||||
|
||||
gomanga_single_regex = re.compile('https?://(?P<host>gomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
gomanga_whole_regex = re.compile('^https?://(?P<host>gomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
@ -149,15 +106,23 @@ def gomanga_Url_Check(input_url, current_directory):
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
single_chapter(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
found = re.search(gomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url, current_directory)
|
||||
whole_series(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -10,18 +10,22 @@ import cfscrape
|
||||
|
||||
|
||||
def single_chapter(url, current_directory):
|
||||
|
||||
|
||||
scraper = cfscrape.create_scraper()
|
||||
|
||||
Page_Source = scraper.get(str(url)).content
|
||||
|
||||
soup = BeautifulSoup(Page_Source, "html.parser")
|
||||
meta = soup.findAll('title')
|
||||
meta_data = list(str(meta).split('\\n'))
|
||||
formatted = BeautifulSoup(Page_Source, "lxml")
|
||||
|
||||
meta = formatted.findAll('title')
|
||||
|
||||
meta_data = list(str(meta).split('\n'))
|
||||
|
||||
|
||||
try:
|
||||
Series_Name = str(meta_data[2])
|
||||
except Exception as e:
|
||||
print (e)
|
||||
Series_Name = "Unkown Series"
|
||||
|
||||
try:
|
||||
@ -42,8 +46,8 @@ def single_chapter(url, current_directory):
|
||||
except Exception as e:
|
||||
chapter_number = '0'
|
||||
|
||||
all_links = re.findall('lstImages.push\(\"(.*)\"\)\;', Page_Source)
|
||||
|
||||
all_links = re.findall('lstImages.push\(\"(.*)\"\)\;', str(formatted))
|
||||
|
||||
if volume_number == '0':
|
||||
# Some series don't seem to have volumes mentioned. Let's assume
|
||||
# they're 0.
|
||||
@ -61,9 +65,9 @@ def single_chapter(url, current_directory):
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
print ('\n')
|
||||
print('{:^80}'.format('%s - %s') % (Series_Name, chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for elements in all_links:
|
||||
if not os.path.exists(File_Directory):
|
||||
@ -78,8 +82,8 @@ def single_chapter(url, current_directory):
|
||||
'title\=(.*)\_(\d+)\.([png]|[jpg])', ddl_image).group(1)).strip() + "." + str(ddl_image[-3:])
|
||||
FileDownloader(File_Name_Final, Directory_path, ddl_image)
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name, ' - ', chapter_number
|
||||
print('\n')
|
||||
print("Completed downloading ", Series_Name, ' - ', chapter_number)
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
@ -103,10 +107,10 @@ def whole_series(url, current_directory):
|
||||
link_list.append(final_url)
|
||||
|
||||
if int(len(link_list)) == '0':
|
||||
print "Sorry, I couldn't bypass KissManga's Hooman check. Please try again in a few minutes."
|
||||
print("Sorry, I couldn't bypass KissManga's Hooman check. Please try again in a few minutes.")
|
||||
sys.exit()
|
||||
|
||||
print "Total Chapters To Download : ", len(link_list)
|
||||
print("Total Chapters To Download : ", len(link_list))
|
||||
|
||||
for item in link_list:
|
||||
url = str(item)
|
||||
|
@ -1,90 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import requests
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib2
|
||||
import urllib
|
||||
import shutil
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib2 import URLError
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from downloader.universal import main as FileDownloader
|
||||
from six.moves import range
|
||||
|
||||
|
||||
def create_driver():
|
||||
|
||||
|
||||
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
|
||||
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) ' \
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
|
||||
'Chrome/39.0.2171.95 Safari/537.36'
|
||||
driver = webdriver.PhantomJS(
|
||||
desired_capabilities=desired_capabilities,
|
||||
service_args=['--load-images=no'])
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
|
||||
'Chrome/39.0.2171.95 Safari/537.36'
|
||||
driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities,service_args=['--load-images=no'])
|
||||
return driver
|
||||
|
||||
|
||||
def single_chapter(driver, url, current_directory):
|
||||
|
||||
def single_chapter(driver,url,current_directory):
|
||||
|
||||
try:
|
||||
# Getting the Series Name from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
Series_Name = str(
|
||||
re.search(
|
||||
'manga\/(.*?)/v',
|
||||
url).group(1)).strip().replace(
|
||||
'_',
|
||||
' ').title()
|
||||
Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
# Getting the Series Name from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
Series_Name = str(
|
||||
re.search(
|
||||
'manga\/(.*?)/c',
|
||||
url).group(1)).strip().replace(
|
||||
'_',
|
||||
' ').title()
|
||||
|
||||
Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
try:
|
||||
# Getting the volume count from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
volume_number = "Volume " + \
|
||||
str(re.search('v(.*?)/c', url).group(1)).strip()
|
||||
volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
volume_number = "Volume 01"
|
||||
|
||||
|
||||
try:
|
||||
# Getting the chapter count from the URL itself for naming the
|
||||
# folder/dicrectories in integer.
|
||||
chapter_number = int(
|
||||
str(re.search('\/c(.*?)/\d', url).group(1)).strip())
|
||||
chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
# Getting the chapter count from the URL itself for naming the
|
||||
# folder/dicrectories in float.
|
||||
chapter_number = float(
|
||||
str(re.search('\/c(.*?)/\d', url).group(1)).strip())
|
||||
|
||||
chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in float.
|
||||
|
||||
if volume_number == '0':
|
||||
# Some series don't seem to have volumes mentioned. Let's assume
|
||||
# they're 0.
|
||||
Raw_File_Directory = str(Series_Name) + '/' + \
|
||||
"Chapter " + str(chapter_number)
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) # Some series don't seem to have volumes mentioned. Let's assume they're 0.
|
||||
else:
|
||||
Raw_File_Directory = str(
|
||||
Series_Name) + '/' + str(volume_number) + '/' + "Chapter " + str(chapter_number)
|
||||
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub(
|
||||
'[^A-Za-z0-9\-\.\'\#\/ \[\]]+',
|
||||
'',
|
||||
Raw_File_Directory)
|
||||
|
||||
Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number)
|
||||
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ \[\]]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
driver.get(url)
|
||||
@ -93,109 +59,91 @@ def single_chapter(driver, url, current_directory):
|
||||
element = WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.ID, "image"))
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
driver.save_screenshot("error.png")
|
||||
print "Couldn't load the element. I'll try to move ahead in any case."
|
||||
print '\n'
|
||||
print "I took a screenshot, please attach it in the issue you open in the repository."
|
||||
print("Couldn't load the element. I'll try to move ahead in any case.")
|
||||
print('\n')
|
||||
print("I took a screenshot, please attach it in the issue you open in the repository.")
|
||||
pass
|
||||
|
||||
elem = driver.find_element_by_xpath("//*")
|
||||
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
|
||||
Page_Source = str(elem.get_attribute("outerHTML").encode('utf-8'))
|
||||
|
||||
First_chapter_link = str(
|
||||
re.search(
|
||||
'http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg',
|
||||
Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden.
|
||||
|
||||
# Getting the last chapter number from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
current_chapter_count = int(
|
||||
str(re.search('current_page\=(.*?)\;', Page_Source).group(1)).strip())
|
||||
|
||||
# Getting the last chapter number from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
last_chapter_count = int(
|
||||
str(re.search('total_pages\=(.*?)\;', Page_Source).group(1)).strip())
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_Source).group(0)).strip() # Fix if they change the CDN all of a sudden.
|
||||
|
||||
current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_Source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
|
||||
for x in range(current_chapter_count, last_chapter_count + 1):
|
||||
|
||||
|
||||
for x in range(current_chapter_count,last_chapter_count+1):
|
||||
|
||||
driver.refresh()
|
||||
File_Name_Final = str(x) + '.jpg'
|
||||
File_Name_Final = str(x)+'.jpg'
|
||||
link_container = driver.find_element_by_xpath('//*[@id="image"]')
|
||||
ddl_image = str(link_container.get_attribute('src'))
|
||||
FileDownloader(File_Name_Final, Directory_path, ddl_image)
|
||||
FileDownloader(File_Name_Final,Directory_path,ddl_image)
|
||||
driver.find_element_by_xpath('//*[@id="top_bar"]/div/a[2]').click()
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name, ' - ', chapter_number
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name,' - ',chapter_number)
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
|
||||
def whole_series(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
|
||||
try:
|
||||
# Getting the Series Name from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip()
|
||||
Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
except Exception as e:
|
||||
print 'Check if the URL is correct or not. Report on Github.'
|
||||
|
||||
print('Check if the URL is correct or not. Report on Github.')
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
|
||||
try:
|
||||
chapter_link_format = "http://mangafox.me/manga/" + \
|
||||
str(Series_Name) + "/v"
|
||||
links = re.findall(
|
||||
'{0}(.*?)html'.format(chapter_link_format),
|
||||
Page_source)
|
||||
|
||||
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v"
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
|
||||
|
||||
if len(links) == 0:
|
||||
chapter_link_format = "http://mangafox.me/manga/" + \
|
||||
str(Series_Name) + "/c"
|
||||
# print chapter_link_format
|
||||
links = re.findall(
|
||||
'{0}(.*?)html'.format(chapter_link_format),
|
||||
Page_source)
|
||||
chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c"
|
||||
#print chapter_link_format
|
||||
links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print "Error : ", e, '\n', "Please report this error on Github repository."
|
||||
print("Error : ",e,'\n',"Please report this error on Github repository.")
|
||||
|
||||
driver = create_driver()
|
||||
|
||||
|
||||
for x in links:
|
||||
chapter_link = str(str(chapter_link_format) + str(x) + "html").strip()
|
||||
|
||||
chapter_link = str(str(chapter_link_format)+str(x)+"html").strip()
|
||||
|
||||
try:
|
||||
single_chapter(driver, chapter_link, current_directory)
|
||||
single_chapter(driver,chapter_link,current_directory)
|
||||
except Exception as e:
|
||||
print e
|
||||
print(e)
|
||||
driver.quit()
|
||||
driver.quit()
|
||||
|
||||
|
||||
def mangafox_Url_Check(input_url, current_directory):
|
||||
|
||||
mangafox_single_regex = re.compile(
|
||||
'https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
|
||||
mangafox_whole_regex = re.compile(
|
||||
'^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
|
||||
def mangafox_Url_Check(input_url,current_directory):
|
||||
|
||||
mangafox_single_regex = re.compile('https?://(?P<host>mangafox.me)/manga/(?P<comic>[\d\w-]+)(?P<Volume>(/v\d+)|(.))/(?P<chapter>c\d+(\.\d)?)?/(?P<issue>\d+)?\.html')
|
||||
mangafox_whole_regex = re.compile('^https?://(?P<host>mangafox.me)/manga/(?P<comic_series>[\d\w-]+)?|(\/)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
@ -206,15 +154,17 @@ def mangafox_Url_Check(input_url, current_directory):
|
||||
url = str(input_url)
|
||||
driver = create_driver()
|
||||
try:
|
||||
single_chapter(driver, url, current_directory)
|
||||
single_chapter(driver,url,current_directory)
|
||||
except Exception as e:
|
||||
print e
|
||||
print(e)
|
||||
driver.quit()
|
||||
driver.quit()
|
||||
sys.exit()
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
found = re.search(mangafox_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
@ -222,9 +172,9 @@ def mangafox_Url_Check(input_url, current_directory):
|
||||
url = str(input_url)
|
||||
#driver = create_driver()
|
||||
try:
|
||||
whole_series(url, current_directory)
|
||||
whole_series(url,current_directory)
|
||||
except Exception as e:
|
||||
print e
|
||||
print(e)
|
||||
sys.exit()
|
||||
else:
|
||||
pass
|
||||
|
@ -1,144 +1,96 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from more_itertools import unique_everseen
|
||||
from more_itertools import unique_everseen
|
||||
from bs4 import BeautifulSoup
|
||||
from downloader.cookies_required import main as FileDownloader
|
||||
|
||||
|
||||
def single_chapter(url, current_directory):
|
||||
|
||||
def single_chapter(url,current_directory):
|
||||
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
# Getting the Series Name from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
Series_Name = str(
|
||||
re.search(
|
||||
'\/read\/(.*?)/',
|
||||
url).group(1)).strip().replace(
|
||||
'_',
|
||||
' ').title()
|
||||
|
||||
|
||||
Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
try:
|
||||
# Getting the chapter count from the URL itself for naming the
|
||||
# folder/dicrectories in integer.
|
||||
chapter_number = int(str(re.search(
|
||||
'0\/(.*?)/', url).group(1)).strip().replace('0', '').replace('/', ''))
|
||||
chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer.
|
||||
except Exception as e:
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
|
||||
Raw_File_Directory = str(Series_Name) + '/' + \
|
||||
"Chapter " + str(chapter_number)
|
||||
# Fix for "Special Characters" in The series name
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory)
|
||||
chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up
|
||||
|
||||
Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number)
|
||||
File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name
|
||||
Directory_path = os.path.normpath(File_Directory)
|
||||
|
||||
|
||||
|
||||
ddl_image_list = re.findall('comics(.*?)\"', Page_source)
|
||||
|
||||
|
||||
ddl_list = list(unique_everseen(ddl_image_list))
|
||||
|
||||
print '\n'
|
||||
print '{:^80}'.format('%s - %s') % (Series_Name, chapter_number)
|
||||
print '{:^80}'.format('=====================================================================\n')
|
||||
print('\n')
|
||||
print('{:^80}'.format('%s - %s')%(Series_Name,chapter_number))
|
||||
print('{:^80}'.format('=====================================================================\n'))
|
||||
|
||||
for i in ddl_list:
|
||||
if not os.path.exists(File_Directory):
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://yomanga.co/reader/content/comics" + \
|
||||
str(i).replace('"', '').replace('\\', '')
|
||||
os.makedirs(File_Directory)
|
||||
ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','')
|
||||
|
||||
File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip()
|
||||
FileDownloader(File_Name_Final,Directory_path,tasty_cookies,ddl_image)
|
||||
|
||||
print('\n')
|
||||
print("Completed downloading ",Series_Name)
|
||||
|
||||
File_Name_Final = str(
|
||||
re.findall(
|
||||
'\/(\d+)\.[jpg]|[png]',
|
||||
i)).replace(
|
||||
"[",
|
||||
"").replace(
|
||||
"]",
|
||||
"").replace(
|
||||
"'",
|
||||
"").replace(
|
||||
",",
|
||||
"").strip() + "." + str(
|
||||
re.findall(
|
||||
'\d\.(.*?)$',
|
||||
str(i))).replace(
|
||||
",",
|
||||
"").replace(
|
||||
"[",
|
||||
"").replace(
|
||||
"]",
|
||||
"").replace(
|
||||
"'",
|
||||
"").strip()
|
||||
FileDownloader(
|
||||
File_Name_Final,
|
||||
Directory_path,
|
||||
tasty_cookies,
|
||||
ddl_image)
|
||||
|
||||
print '\n'
|
||||
print "Completed downloading ", Series_Name
|
||||
|
||||
|
||||
def whole_series(url, current_directory):
|
||||
def whole_series(url,current_directory):
|
||||
if not url:
|
||||
print "Couldn't get the URL. Please report it on Github Repository."
|
||||
|
||||
print("Couldn't get the URL. Please report it on Github Repository.")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
s = requests.Session()
|
||||
response = s.get(url, headers=headers)
|
||||
tasty_cookies = response.cookies
|
||||
|
||||
|
||||
Page_source = str(response.text.encode('utf-8'))
|
||||
|
||||
# Getting the Series Name from the URL itself for naming the
|
||||
# folder/dicrectories.
|
||||
Series_Name = str(
|
||||
re.search(
|
||||
'\/series\/(.*?)/',
|
||||
url).group(1)).strip().replace(
|
||||
'_',
|
||||
' ').title()
|
||||
|
||||
Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories.
|
||||
|
||||
soup = BeautifulSoup(Page_source, 'html.parser')
|
||||
|
||||
chapter_text = soup.findAll('div', {'class': 'title'})
|
||||
|
||||
chapter_text = soup.findAll('div',{'class':'title'})
|
||||
|
||||
for link in chapter_text:
|
||||
x = link.findAll('a')
|
||||
for a in x:
|
||||
url = a['href']
|
||||
single_chapter(url, current_directory)
|
||||
|
||||
|
||||
def yomanga_Url_Check(input_url, current_directory):
|
||||
|
||||
yomanga_single_regex = re.compile(
|
||||
'https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
yomanga_whole_regex = re.compile(
|
||||
'^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
single_chapter(url,current_directory)
|
||||
|
||||
def yomanga_Url_Check(input_url,current_directory):
|
||||
|
||||
yomanga_single_regex = re.compile('https?://(?P<host>yomanga.co)/reader/read/(?P<comic_single>[\d\w-]+)/en/(?P<volume>\d+)?/(?P<Chapter>\d+)?()|(/page/(?P<PageNumber>\d+)?)')
|
||||
yomanga_whole_regex = re.compile('^https?://(?P<host>yomanga.co)/reader/(?P<series>series)?/(?P<comic>[\d\w-]+)?(\/|.)$')
|
||||
|
||||
lines = input_url.split('\n')
|
||||
for line in lines:
|
||||
found = re.search(yomanga_single_regex, line)
|
||||
@ -146,15 +98,24 @@ def yomanga_Url_Check(input_url, current_directory):
|
||||
match = found.groupdict()
|
||||
if match['Chapter']:
|
||||
url = str(input_url)
|
||||
single_chapter(url, current_directory)
|
||||
single_chapter(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
found = re.search(yomanga_whole_regex, line)
|
||||
if found:
|
||||
match = found.groupdict()
|
||||
if match['comic']:
|
||||
url = str(input_url)
|
||||
whole_series(url, current_directory)
|
||||
whole_series(url,current_directory)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -4,4 +4,4 @@ Date Format : YY/MM/DD
|
||||
|
||||
'''
|
||||
|
||||
__version__ = '2016.12.23'
|
||||
__version__ = '2016.11.28'
|
||||
|
@ -8,4 +8,6 @@
|
||||
- Argument priority updated [2016.11.22]
|
||||
- Site support for comic.naver.com [2016.11.26]
|
||||
- Support for Python 3 [2016.11.26]
|
||||
- Removed Kissmanga PhantomJS dependency [2016.12.23]
|
||||
- Removed Kissmanga PhantomJS dependency [2016.12.23]
|
||||
- Support for Python 2 and 3 has been merged [2016.12.28]
|
||||
- Updated PhantomJS dependency in [supported sites](https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md) [2016.12.28]
|
@ -2,7 +2,7 @@
|
||||
|
||||
[PhantomJS] = Denotes that these sites need PhantomJS to be able to download content.
|
||||
|
||||
* [Mangafox](http://mangafox.me/)
|
||||
* [Mangafox](http://mangafox.me/) [PhantomJS]
|
||||
* [YoManga](http://yomanga.co/)
|
||||
* [GoManga](http://gomanga.co/)
|
||||
* [Batoto](http://bato.to/) [PhantomJS]
|
||||
|
Loading…
x
Reference in New Issue
Block a user