diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8046f58 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,18 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "3.4" + - "3.5" + - "3.5-dev" # 3.5 development branch + - "3.6-dev" # 3.6 development branch + - "nightly" # currently points to 3.7-dev +# command to install dependencies +install: "pip install -r requirements.txt" +# command to run tests +script: nosetests +notifications: + email: + - xonshiz@psychoticelites.com \ No newline at end of file diff --git a/Changelog.md b/Changelog.md new file mode 100644 index 0000000..0a723bf --- /dev/null +++ b/Changelog.md @@ -0,0 +1 @@ +#Changelog \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..07a6d17 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ + +The MIT License (MIT) + +Copyright (c) 2013-2016 Blackrock Digital LLC. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/ReadMe.md b/ReadMe.md new file mode 100644 index 0000000..b0115d4 --- /dev/null +++ b/ReadMe.md @@ -0,0 +1,151 @@ +# Comic-DL + +Comic-dl is a command line tool to download Comics and Manga from various Manga and Comic sites easily. + +> Don't overuse this script. Support the developers of those websites by disabling your adblock on their site. +>Advertisments pay for the website servers. + +> WINDOWS BINARY COMING SOON! + +### Table of Content + +* [Supported Sites]() +* [Dependencies Installation]() + * [Linux/Debian]() + * [Windows]() + * [Mac OS X]() +* [Installation]() +* [Windows Binary]() +* [List of Arguments]() +* [Usage]() + * [Windows]() + * [Linux/Debian]() +* [Features]() +* [Changelog]() +* [Opening An Issue/Requesting A Site]() + * [Reporting Issues]() + * [Suggesting A Feature]() + +## Supported Websites +You can check the list of supported websites [**`HERE`**](https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md). + +## Dependencies Installation +This script can run on multiple Operating Systems. So, if you're using the `python` script instead of the `windows binary` of this script, then you'll need to get things ready first. Follow the instructions mentioned below, according to your OS. + +### Linux/Debian : +Since most (if not all) Linux/Debian OS come with python pre-installed, you don't have to install python manually. Make sure you're using python >= 2.7.x though. + +We need `pip` to install any external dependenc(ies). So, open any terminal and type in `pip list` and if it shows some data, then it is fine. But, if it shows error, like `pip not found` or something along this line, then you need to install `pip`. Just type this command in terminal : + +> `sudo apt-get install python-pip` + +If you're on Fedora, CentOS/RHEL, openSUSE, Arch Linux, then you simply need to follow [`THIS TUTORIAL`](https://packaging.python.org/install_requirements_linux/) to install `pip`. + +If this still doesn't work, then you'll manually need to install pip. Doing so is an easy one time job and you can follow [`THHIS TUTORIAL`](https://pip.pypa.io/en/stable/installing/) to do so. + +### Windows : +If you're on windows, then it is recommended to download the [`windows binary`](#) for this script. If you use the windows binary, you don't need to install anything. But, if for some weird reason you want to use Python script instead, then follow these steps : + +* Install Python > 2.7.6. Download the desired installer from [here](https://www.python.org/downloads/). +* [Add it in the system path](http://superuser.com/questions/143119/how-to-add-python-to-the-windows-path) (if not already added). +* If you're using python >2.7.9, you don't need to install `PIP`. However, if you don't have pip installed and added in windows path, then do so by following [this little tutorial](http://stackoverflow.com/a/12476379). +* Download [this `text`]() file and put it in some directory/folder. +* Open Command Prompt and browse to the directory where you downloaded your requiremenets.txt file and run this command : +``` +pip install -r requirements.txt +``` +* It should install the required external libraries. + +Well, if everything came up good without any error(s), then you're good to go! + +### Mac OS X : +Mac OS X users will have to fetch their version of `Python` and `Pip`. +* Python installation guide : http://docs.python-guide.org/en/latest/starting/install/osx/ +* Pip installation guide : http://stackoverflow.com/questions/17271319/installing-pip-on-mac-os-x + +After downloading and installing these, you need to add PIP & Python in your path. Follow [`THIS LITTLE GUIDE`](http://www.pyladies.com/blog/Get-Your-Mac-Ready-for-Python-Programming/) to install both, Python & pip successfully. + +## Installation +After installing and setting up all the dependencies in your Operating System, you're good to go and use this script. +The instructions for all the OS would remain same. Download [`THIS REPOSITORY`]() and put it somewhere in your system. + +**Windows users**, it's better to not place it places where it requires administrator privileges. Good example would be `C:\Windows`. This goes for both, the Python script and the windows binary file (.exe). + +**Linux/Debian** users make sure that this script is executable.just run this command, if you run into problem(s) : + +> `chmod +x comic-dl.py` + +and then, execute with this : + +> `./comic-dl.py` + +## Windows Binary +It is recommended that windows users use this binary to save both, your head and time from installing all the dependencies. You can download the binary and start using the script right off the bat. Grab the respective binaries from the links below : +* `x86 Systems` : [COMING SOON](#) +* `x64 Systems` : [COMING SOON](#) + +## List of Arguments +Currently, the script supports these arguments : +``` +-h, --help Prints the basic help menu of the script and exits. +-i,--input Defines the input link to the comic/manga. +-V,--version Prints the VERSION and exits. +-a,--about Prints ABOUT and USAGE of the script and exits. +``` + +## Usage +With this script, you have to pass arguments in order to be able to download anything. Passing arguments in a script is pretty easy. Since the script is pretty basic, it doesn't have too many arguments. Go check the [`ARGUMENTS SECTION`] to know more about which arguments the script offers. + +Follow the instructions according to your OS : + +### Windows +After you've saved this script in a directory/folder, you need to open `command prompt` and browse to that directory and then execute the script. Let's do it step by step : +* Open the folder where you've downloaded the files of this repository. +* Hold down the **`SHIFT`** key and while holding down the SHIFT key, **`RIGHT CLICK`** and select `Open Command Prompy Here` from the options that show up. +* Now, in the command prompt, type this : + +*If you're using the windows binary :* +>>> `comic-dl.exe -i ` + +*If you're using the Python Script :* +>>> `comic-dl.py -i ` + +URL can be any URL of the [supported websites](). + +### Linux/Debian +After you've saved this script in a directory/folder, you need to open `command prompt` and browse to that directory and then execute the script. Let's do it step by step : +* Open a terminal, `Ctrl + Alt + T` is the shortcut to do so (if you didn't know). +* Now, change the current working directory of the terminal to the one where you've downloaded this repository. +* Now, in the Terminal, type this : + +>>> `comic-dl.py -i ` + +URL can be any URL of the [supported websites](). + +## Features +This is a very basic and small sript, so at the moment it only have a few features. +* Downloads a Single Chapter and puts in a directory with the comic name, volume and chapter. +* Downloads all the chapters available for a series. +* Skip if the file has already been downloaded. +* Show human readable error(s) in most places. + +## Changelog +You can check the changelog [**`HERE`**](https://github.com/Xonshiz/comic-dl/blob/master/Changelog.md). + +## Opening An Issue/Requesting A Site +If your're planning to open an issue for the script or ask for a new feature or anything that requires opening an Issue, then please do keep these things in mind. + +### Reporting Issues +If you're about to report some issue with the script, the please do include these things : +* The command your entered. Yes, with the URL +* The output of that command. You can simply copy the text from the terminal/command prompt and paste it. Make sure you put that output inside `` (tilde). +* Your Operating System and python version. + +### Suggesting A Feature +If you're here to make suggestions, please follow the basic syntax to post a request : + +**Subject** : Something that briefly tells us about the feature. + +**Long Explanation** : Describe in details what you want and how you want. + +This should be enough, but it'll be great if you can add more ;) diff --git a/Supported_Sites.md b/Supported_Sites.md index 8449f7d..c17524e 100644 --- a/Supported_Sites.md +++ b/Supported_Sites.md @@ -1,5 +1,5 @@ #List of Supported Websites -* [Mangafox] -* [YoManga] -* [GoManga] \ No newline at end of file +* [Mangafox](http://mangafox.me/) +* [YoManga](http://yomanga.co/) +* [GoManga](http://gomanga.co/) \ No newline at end of file diff --git a/comic_dl/comic-dl.py b/comic_dl/comic-dl.py new file mode 100644 index 0000000..7a414bf --- /dev/null +++ b/comic_dl/comic-dl.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from honcho import url_checker +import os +import sys +import argparse +from version import __version__ + +def version(): + print '\n' + print '{:^80}'.format('Current Version : %s')%(__version__) + print '\n' + print '{:^80}'.format("More info : comic-dl -h") + sys.exit() + +def usage(): + print '\n' + print '{:^80}'.format('################################################') + print '{:^80}'.format('Comic-DL Usage') + print '{:^80}'.format('################################################\n') + print '\n' + print '{:^80}'.format('Author : Xonshiz | Version : %s')%(__version__) + print '{:^80}'.format('-------------------------------------------------\n') + print "Comic-dl is a command line tool to download manga and comics from various comic and manga sites." + print "Using the script is pretty simple and should be easy for anyone familiar with a command line/shell." + print '\n' + print '{:^80}'.format("USAGE : comic-dl -i ") + print '\n' + print "Check Supported websites : https://github.com/Xonshiz/comic-dl/blob/master/Supported_Sites.md ",'\n' + print "Available Arguments : " + print '{:^80}'.format("-i,--input : Specifies the Input URL") + print '{:^80}'.format("-h : Prints this help menu") + print '{:^80}'.format("--version : Prints the current version and exits") + print '{:^80}'.format("-a,--about : Shows the info about this script and exits.") + sys.exit() + +def main(argv): + current_directory = str(os.getcwd()) + parser = argparse.ArgumentParser(description='Comic-dl is a command line tool to download manga and comics from various comic and manga sites.') + parser.add_argument('--version',action='store_true',help='Shows version and exits' ) + parser.add_argument('-i','--input',nargs=1,help='Inputs the URL to comic',default='--version') + parser.add_argument('-a','--about',action='store_true',help='Shows the info regarding this script' ) + + + args = parser.parse_args() + + if args.version: + version() + sys.exit() + + if args.input: + #print args.input + input_url = str(args.input[0]).strip() + url_checker(input_url,current_directory) + sys.exit() + + if args.about: + usage() + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/comic_dl/honcho.py b/comic_dl/honcho.py new file mode 100644 index 0000000..1afdfa1 --- /dev/null +++ b/comic_dl/honcho.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + + +''' + +This python module decides which URL should be assigned to which other module from the site package. + +''' + + + +from sites.yomanga import yomanga_Url_Check +from sites.gomanga import gomanga_Url_Check +from sites.mangafox import mangafox_Url_Check +import os +import urllib2 + + + + +def url_checker(input_url,current_directory): + + domain = urllib2.urlparse.urlparse(input_url).netloc + + if domain in ['mangafox.me']: + mangafox_Url_Check(input_url,current_directory) + pass + elif domain in ['yomanga.co']: + yomanga_Url_Check(input_url,current_directory) + pass + elif domain in ['gomanga.co']: + gomanga_Url_Check(input_url,current_directory) + pass + elif domain in ['']: + print 'You need to specify at least 1 URL. Please run : comic-dl -h' + else: + print "%s is unsupported at the moment. Please request on Github repository."%(domain) diff --git a/comic_dl/sites/__init__.py b/comic_dl/sites/__init__.py new file mode 100644 index 0000000..6e0e4ef --- /dev/null +++ b/comic_dl/sites/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + + +import mangafox +import yomanga +import gomanga \ No newline at end of file diff --git a/comic_dl/sites/gomanga.py b/comic_dl/sites/gomanga.py new file mode 100644 index 0000000..2c0c225 --- /dev/null +++ b/comic_dl/sites/gomanga.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import requests +import re +import os +import sys +from more_itertools import unique_everseen +import urllib2 +import urllib +import shutil +from urllib2 import URLError +from bs4 import BeautifulSoup + +def single_chapter(url,current_directory): + + if not url: + print "Couldn't get the URL. Please report it on Github Repository." + sys.exit(0) + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' + + } + + s = requests.Session() + response = s.get(url, headers=headers) + tasty_cookies = response.cookies + + Page_source = str(response.text.encode('utf-8')) + + Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + #print "Series Name : ",Series_Name + + try: + chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. + except Exception as e: + #raise e + chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up + #print "Chapter No : ",chapter_number + + Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) + #print 'Raw_File_Directory : ',Raw_File_Directory + File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name + #print "File_Directory : ",File_Directory + Directory_path = os.path.normpath(File_Directory) + #print "Directory_path : ",Directory_path + + + ddl_image_list = re.findall('comics(.*?)\"', Page_source) + #print "Older List : ",ddl_image_list + + ddl_list = list(unique_everseen(ddl_image_list)) + #print ddl_list + #sys.exit() + + print '\n' + print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number) + print '{:^80}'.format('=====================================================================\n') + + for i in ddl_list: + #print i + if not os.path.exists(File_Directory): + os.makedirs(File_Directory) + ddl_image = "http://gomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','') + #print ddl_image + try: + #u = urllib2.urlopen(ddl_image, cookies=response.cookies) + u = requests.get(ddl_image,cookies=tasty_cookies) + except URLError, e: + if not hasattr(e, "code"): + raise + print "Got error from "+ddl_image, e.code, e.msg + resp = e + + File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip() + File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final) + if os.path.isfile(File_Check_Path): + print 'File Exist! Skipping ',File_Name_Final,'\n' + pass + + if not os.path.isfile(File_Check_Path): + print 'Downloading : ',File_Name_Final + #urllib.urlretrieve(ddl_image, File_Name_Final) + response = requests.get(ddl_image, stream=True,cookies=tasty_cookies) + try: + with open(File_Name_Final, 'wb') as out_file: + shutil.copyfileobj(response.raw, out_file) + File_Path = os.path.normpath(File_Name_Final) + except Exception as e: + #raise e + #print e + print "Couldn't download file from : ",ddl_image + pass + try: + shutil.move(File_Path,Directory_path) + except Exception, e: + #raise e + print e,'\n' + #os.remove(File_Path) + pass + + print '\n' + print "Completed downloading ",Series_Name + +def whole_series(url,current_directory): + if not url: + print "Couldn't get the URL. Please report it on Github Repository." + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' + + } + + s = requests.Session() + response = s.get(url, headers=headers) + tasty_cookies = response.cookies + + Page_source = str(response.text.encode('utf-8')) + + Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + #print "Series Name : ",Series_Name + + soup = BeautifulSoup(Page_source, 'html.parser') + + chapter_text = soup.findAll('div',{'class':'title'}) + #print chapter_text + + for link in chapter_text: + x = link.findAll('a') + for a in x: + url = a['href'] + single_chapter(url,current_directory) + +def gomanga_Url_Check(input_url,current_directory): + + gomanga_single_regex = re.compile('https?://(?Pgomanga.co)/reader/read/(?P[\d\w-]+)/en/(?P\d+)?/(?P\d+)?()|(/page/(?P\d+)?)') + gomanga_whole_regex = re.compile('^https?://(?Pgomanga.co)/reader/(?Pseries)?/(?P[\d\w-]+)?(\/|.)$') + + lines = input_url.split('\n') + for line in lines: + found = re.search(gomanga_single_regex, line) + if found: + match = found.groupdict() + if match['Chapter']: + url = str(input_url) + single_chapter(url,current_directory) + else: + pass + + + + found = re.search(gomanga_whole_regex, line) + if found: + match = found.groupdict() + if match['comic']: + url = str(input_url) + whole_series(url,current_directory) + else: + pass + + + + + + diff --git a/comic_dl/sites/mangafox.py b/comic_dl/sites/mangafox.py new file mode 100644 index 0000000..fbaa7e6 --- /dev/null +++ b/comic_dl/sites/mangafox.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import requests +import os +import re +import sys +import urllib2 +import urllib +import shutil +from bs4 import BeautifulSoup +from urllib2 import URLError + +def single_chapter(url,current_directory): + if not url: + print "Couldn't get the URL. Please report it on Github Repository." + sys.exit(0) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' + + } + + response = requests.get(url, headers=headers) + Page_source = str(response.text.encode('utf-8')) + + try: + Series_Name = str(re.search('manga\/(.*?)/v', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + except Exception as e: + #raise e + #print "Error : ",e,'\n' + Series_Name = str(re.search('manga\/(.*?)/c', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + + #print "Series Name : ",Series_Name + + try: + volume_number = "Volume " + str(re.search('v(.*?)/c', url).group(1)).strip() # Getting the volume count from the URL itself for naming the folder/dicrectories. + except Exception as e: + #raise e + volume_number = "Volume 01" + #print "Volume No : ",volume_number + try: + chapter_number = int(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. + except Exception as e: + #raise e + chapter_number = float(str(re.search('\/c(.*?)/\d', url).group(1)).strip()) # Getting the chapter count from the URL itself for naming the folder/dicrectories in float. + #print "Chapter No : ",chapter_number + First_chapter_link = str(re.search('http://(.*?)/(.*?)/manga/(.*?)/(.*?)/compressed/(.*?)\.jpg', Page_source).group(0)).strip() # Fix if they change the CDN all of a sudden. + #print First_chapter_link + current_chapter_count = int(str(re.search('current_page\=(.*?)\;', Page_source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories. + last_chapter_count = int(str(re.search('total_pages\=(.*?)\;', Page_source).group(1)).strip()) # Getting the last chapter number from the URL itself for naming the folder/dicrectories. + #print "Last Chapter : ",last_chapter_count + + Raw_File_Directory = str(Series_Name)+'/'+str(volume_number)+'/'+"Chapter "+str(chapter_number) + #print 'Raw_File_Directory : ',Raw_File_Directory + File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name + #print "File_Directory : ",File_Directory + Directory_path = os.path.normpath(File_Directory) + #print "Directory_path : ",Directory_path + + print '\n' + print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number) + print '{:^80}'.format('=====================================================================\n') + + for x in range(current_chapter_count,last_chapter_count+1): + #print x + if not os.path.exists(File_Directory): + os.makedirs(File_Directory) + if len(str(x)) == 1: + ddl_image = First_chapter_link.replace('001.jpg','00{0}.jpg'.format(x)) + + #print ddl_image + try: + u = urllib2.urlopen(ddl_image) + except URLError, e: + if not hasattr(e, "code"): + raise + print "Got error from "+ddl_image, e.code, e.msg + resp = e + + File_Name_Final = str(x).strip()+".jpg" + File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final) + #print "Final Check Path : ",File_Check_Path + + if os.path.isfile(File_Check_Path): + print 'File Exist! Skipping ',File_Name_Final,'\n' + pass + + if not os.path.isfile(File_Check_Path): + print 'Downloading : ',File_Name_Final + urllib.urlretrieve(ddl_image, File_Name_Final) + File_Path = os.path.normpath(File_Name_Final) + try: + shutil.move(File_Path,Directory_path) + except Exception, e: + #raise e + print e,'\n' + os.remove(File_Path) + pass + + else : + + ddl_image = First_chapter_link.replace('001','0{0}'.format(x)) + + #print ddl_image + try: + u = urllib2.urlopen(ddl_image) + except URLError, e: + if not hasattr(e, "code"): + raise + print "Got error from "+ddl_image, e.code, e.msg + resp = e + + File_Name_Final = str(x).strip()+".jpg" + File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final) + #print "Final Check Path : ",File_Check_Path + + if os.path.isfile(File_Check_Path): + print 'File Exist! Skipping ',File_Name_Final,'\n' + pass + + if not os.path.isfile(File_Check_Path): + print 'Downloading : ',File_Name_Final + urllib.urlretrieve(ddl_image, File_Name_Final) + File_Path = os.path.normpath(File_Name_Final) + try: + shutil.move(File_Path,Directory_path) + except Exception, e: + #raise e + print e,'\n' + os.remove(File_Path) + pass + print '\n' + print "Completed downloading ",Series_Name + +def whole_series(url,current_directory): + if not url: + print "Couldn't get the URL. Please report it on Github Repository." + + try: + Series_Name = str(re.search('manga\/(.*?)/', url).group(1)).strip() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + except Exception as e: + #raise e + print 'Check if the URL is correct or not. Report on Github.' + #print "Series Name : ",Series_Name + + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' + + } + + response = requests.get(url, headers=headers) + Page_source = str(response.text.encode('utf-8')) + + try: + chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/v" + #print 'UP : ',chapter_link_format + links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source) + #print "Lower links : ",links + + if len(links) == 0: + chapter_link_format = "http://mangafox.me/manga/"+str(Series_Name)+"/c" + #print chapter_link_format + links = re.findall('{0}(.*?)html'.format(chapter_link_format),Page_source) + + + except Exception as e: + #raise e + print "Error : ",e,'\n',"Please report this error on Github repository." + + + for x in links: + #print x + chapter_link = str(str(chapter_link_format)+str(x)+"html").strip() + #print "URL : ",chapter_link + single_chapter(chapter_link,current_directory) + +def mangafox_Url_Check(input_url,current_directory): + + mangafox_single_regex = re.compile('https?://(?Pmangafox.me)/manga/(?P[\d\w-]+)(?P(/v\d+)|(.))/(?Pc\d+(\.\d)?)?/(?P\d+)?\.html') + mangafox_whole_regex = re.compile('^https?://(?Pmangafox.me)/manga/(?P[\d\w-]+)?|(\/)$') + + lines = input_url.split('\n') + for line in lines: + found = re.search(mangafox_single_regex, line) + if found: + match = found.groupdict() + if match['issue']: + url = str(input_url) + single_chapter(url,current_directory) + else: + pass + + + + found = re.search(mangafox_whole_regex, line) + if found: + match = found.groupdict() + if match['comic_series']: + url = str(input_url) + whole_series(url,current_directory) + else: + pass diff --git a/comic_dl/sites/yomanga.py b/comic_dl/sites/yomanga.py new file mode 100644 index 0000000..448ca79 --- /dev/null +++ b/comic_dl/sites/yomanga.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import requests +import re +import os +import sys +from more_itertools import unique_everseen +import urllib2 +import urllib +import shutil +from urllib2 import URLError +from bs4 import BeautifulSoup + +def single_chapter(url,current_directory): + + if not url: + print "Couldn't get the URL. Please report it on Github Repository." + sys.exit(0) + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' + + } + + s = requests.Session() + response = s.get(url, headers=headers) + tasty_cookies = response.cookies + + Page_source = str(response.text.encode('utf-8')) + + Series_Name = str(re.search('\/read\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + #print "Series Name : ",Series_Name + + try: + chapter_number = int(str(re.search('0\/(.*?)/', url).group(1)).strip().replace('0','').replace('/','')) # Getting the chapter count from the URL itself for naming the folder/dicrectories in integer. + except Exception as e: + #raise e + chapter_number = 0 # Name the chapter 0 if nothing INTEGER type comes up + #print "Chapter No : ",chapter_number + + Raw_File_Directory = str(Series_Name)+'/'+"Chapter "+str(chapter_number) + #print 'Raw_File_Directory : ',Raw_File_Directory + File_Directory = re.sub('[^A-Za-z0-9\-\.\'\#\/ ]+', '', Raw_File_Directory) # Fix for "Special Characters" in The series name + #print "File_Directory : ",File_Directory + Directory_path = os.path.normpath(File_Directory) + #print "Directory_path : ",Directory_path + + + ddl_image_list = re.findall('comics(.*?)\"', Page_source) + #print "Older List : ",ddl_image_list + + ddl_list = list(unique_everseen(ddl_image_list)) + + print '\n' + print '{:^80}'.format('%s - %s')%(Series_Name,chapter_number) + print '{:^80}'.format('=====================================================================\n') + + for i in ddl_list: + #print i + if not os.path.exists(File_Directory): + os.makedirs(File_Directory) + ddl_image = "http://yomanga.co/reader/content/comics"+str(i).replace('"','').replace('\\','') + #print ddl_image + try: + #u = urllib2.urlopen(ddl_image, cookies=response.cookies) + u = requests.get(ddl_image,cookies=tasty_cookies) + except URLError, e: + if not hasattr(e, "code"): + raise + print "Got error from "+ddl_image, e.code, e.msg + resp = e + + File_Name_Final = str(re.findall('\/(\d+)\.[jpg]|[png]', i)).replace("[","").replace("]","").replace("'","").replace(",","").strip()+"."+str(re.findall('\d\.(.*?)$', str(i))).replace(",","").replace("[","").replace("]","").replace("'","").strip() + File_Check_Path = str(Directory_path)+'/'+str(File_Name_Final) + if os.path.isfile(File_Check_Path): + print 'File Exist! Skipping ',File_Name_Final,'\n' + pass + + if not os.path.isfile(File_Check_Path): + print 'Downloading : ',File_Name_Final + #urllib.urlretrieve(ddl_image, File_Name_Final) + response = requests.get(ddl_image, stream=True,cookies=tasty_cookies) + try: + with open(File_Name_Final, 'wb') as out_file: + shutil.copyfileobj(response.raw, out_file) + File_Path = os.path.normpath(File_Name_Final) + except Exception as e: + #raise e + #print e + print "Couldn't download file from : ",ddl_image + pass + try: + shutil.move(File_Path,Directory_path) + except Exception, e: + #raise e + print e,'\n' + #os.remove(File_Path) + pass + + print '\n' + print "Completed downloading ",Series_Name + +def whole_series(url,current_directory): + if not url: + print "Couldn't get the URL. Please report it on Github Repository." + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36' + + } + + s = requests.Session() + response = s.get(url, headers=headers) + tasty_cookies = response.cookies + + Page_source = str(response.text.encode('utf-8')) + + Series_Name = str(re.search('\/series\/(.*?)/', url).group(1)).strip().replace('_',' ').title() # Getting the Series Name from the URL itself for naming the folder/dicrectories. + #print "Series Name : ",Series_Name + + soup = BeautifulSoup(Page_source, 'html.parser') + + chapter_text = soup.findAll('div',{'class':'title'}) + #print chapter_text + + for link in chapter_text: + x = link.findAll('a') + for a in x: + url = a['href'] + single_chapter(url,current_directory) + +def yomanga_Url_Check(input_url,current_directory): + + yomanga_single_regex = re.compile('https?://(?Pyomanga.co)/reader/read/(?P[\d\w-]+)/en/(?P\d+)?/(?P\d+)?()|(/page/(?P\d+)?)') + yomanga_whole_regex = re.compile('^https?://(?Pyomanga.co)/reader/(?Pseries)?/(?P[\d\w-]+)?(\/|.)$') + + lines = input_url.split('\n') + for line in lines: + found = re.search(yomanga_single_regex, line) + if found: + match = found.groupdict() + if match['Chapter']: + url = str(input_url) + single_chapter(url,current_directory) + else: + pass + + + + found = re.search(yomanga_whole_regex, line) + if found: + match = found.groupdict() + if match['comic']: + url = str(input_url) + whole_series(url,current_directory) + else: + pass + + + + + + + diff --git a/comic_dl/version.py b/comic_dl/version.py new file mode 100644 index 0000000..570e34b --- /dev/null +++ b/comic_dl/version.py @@ -0,0 +1,7 @@ +''' + +Date Format : YY/MM/DD + +''' + +__version__ = '2016.11.13' diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3d6f480 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +more_itertools +bs4 +requests \ No newline at end of file