gallery-dl/gallery_dl/__init__.py

262 lines
8.6 KiB
Python
Raw Normal View History

2015-04-05 17:15:27 +02:00
# -*- coding: utf-8 -*-
# Copyright 2014-2020 Mike Fährmann
2015-04-05 17:15:27 +02:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
2016-10-04 14:33:50 +02:00
from __future__ import unicode_literals, print_function
2017-01-30 19:40:15 +01:00
__author__ = "Mike Fährmann"
2020-10-25 03:05:10 +01:00
__copyright__ = "Copyright 2014-2020 Mike Fährmann"
2017-01-30 19:40:15 +01:00
__license__ = "GPLv2"
2014-10-12 21:56:44 +02:00
__maintainer__ = "Mike Fährmann"
2017-01-30 19:40:15 +01:00
__email__ = "mike_faehrmann@web.de"
2014-10-12 21:56:44 +02:00
2016-08-06 13:40:49 +02:00
import sys
2016-10-04 14:33:50 +02:00
if sys.hexversion < 0x3040000:
sys.exit("Python 3.4+ required")
2016-10-04 14:33:50 +02:00
import json
2017-03-07 23:50:19 +01:00
import logging
from . import version, config, option, output, extractor, job, util, exception
2014-10-12 21:56:44 +02:00
__version__ = version.__version__
def progress(urls, pformat):
"""Wrapper around urls to output a simple progress indicator"""
if pformat is True:
pformat = "[{current}/{total}] {url}"
pinfo = {"total": len(urls)}
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
print(pformat.format_map(pinfo), file=sys.stderr)
yield pinfo["url"]
def parse_inputfile(file, log):
"""Filter and process strings from an input file.
Lines starting with '#' and empty lines will be ignored.
Lines starting with '-' will be interpreted as a key-value pair separated
by an '='. where 'key' is a dot-separated option name and 'value' is a
JSON-parsable value for it. These config options will be applied while
processing the next URL.
Lines starting with '-G' are the same as above, except these options will
be valid for all following URLs, i.e. they are Global.
Everything else will be used as potential URL.
Example input file:
# settings global options
-G base-directory = "/tmp/"
-G skip = false
# setting local options for the next URL
-filename="spaces_are_optional.jpg"
-skip = true
https://example.org/
# next URL uses default filename and 'skip' is false.
https://example.com/index.htm
"""
gconf = []
lconf = []
for line in file:
line = line.strip()
if not line or line[0] == "#":
# empty line or comment
continue
elif line[0] == "-":
# config spec
if len(line) >= 2 and line[1] == "G":
conf = gconf
line = line[2:]
else:
conf = lconf
line = line[1:]
key, sep, value = line.partition("=")
if not sep:
log.warning("input file: invalid <key>=<value> pair: %s", line)
continue
try:
value = json.loads(value.strip())
except ValueError as exc:
log.warning("input file: unable to parse '%s': %s", value, exc)
continue
2019-11-23 23:50:16 +01:00
key = key.strip().split(".")
conf.append((key[:-1], key[-1], value))
else:
# url
if gconf or lconf:
yield util.ExtendedUrl(line, gconf, lconf)
gconf = []
lconf = []
else:
yield line
2014-10-12 21:56:44 +02:00
def main():
try:
if sys.stdout and sys.stdout.encoding.lower() != "utf-8":
output.replace_std_streams()
2017-03-23 16:29:40 +01:00
parser = option.build_parser()
args = parser.parse_args()
log = output.initialize_logging(args.loglevel)
# configuration
2017-04-25 17:09:10 +02:00
if args.load_config:
config.load()
2015-11-14 17:22:56 +01:00
if args.cfgfiles:
config.load(args.cfgfiles, strict=True)
2017-03-08 16:57:42 +01:00
if args.yamlfiles:
config.load(args.yamlfiles, strict=True, fmt="yaml")
if args.postprocessors:
2019-11-23 23:50:16 +01:00
config.set((), "postprocessors", args.postprocessors)
if args.abort:
2019-11-23 23:50:16 +01:00
config.set((), "skip", "abort:" + str(args.abort))
for opts in args.options:
config.set(*opts)
2020-10-25 03:05:10 +01:00
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
extractor.modules = modules
extractor._module_iter = iter(modules)
# loglevels
output.configure_logging(args.loglevel)
2017-04-26 11:33:19 +02:00
if args.loglevel >= logging.ERROR:
2019-11-23 23:50:16 +01:00
config.set(("output",), "mode", "null")
2017-08-13 20:35:44 +02:00
elif args.loglevel <= logging.DEBUG:
import platform
import subprocess
import os.path
import requests
head = ""
try:
out, err = subprocess.Popen(
2020-10-25 03:05:10 +01:00
("git", "rev-parse", "--short", "HEAD"),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)),
).communicate()
if out and not err:
head = " - Git HEAD: " + out.decode().rstrip()
except (OSError, subprocess.SubprocessError):
pass
log.debug("Version %s%s", __version__, head)
2017-08-13 20:35:44 +02:00
log.debug("Python %s - %s",
platform.python_version(), platform.platform())
try:
log.debug("requests %s - urllib3 %s",
requests.__version__,
requests.packages.urllib3.__version__)
except AttributeError:
pass
if args.list_modules:
for module_name in extractor.modules:
print(module_name)
2016-09-14 09:51:01 +02:00
elif args.list_extractors:
for extr in extractor.extractors():
if not extr.__doc__:
continue
2016-09-14 09:51:01 +02:00
print(extr.__name__)
print(extr.__doc__)
print("Category:", extr.category,
"- Subcategory:", extr.subcategory)
test = next(extr._get_tests(), None)
if test:
print("Example :", test[0])
2016-09-14 09:51:01 +02:00
print()
elif args.clear_cache:
from . import cache
log = logging.getLogger("cache")
cnt = cache.clear()
if cnt is None:
log.error("Database file not available")
else:
log.info(
"Deleted %d %s from '%s'",
cnt, "entry" if cnt == 1 else "entries", cache._path(),
)
else:
2016-12-04 16:11:54 +01:00
if not args.urls and not args.inputfile:
2017-08-13 20:35:44 +02:00
parser.error(
"The following arguments are required: URL\n"
"Use 'gallery-dl --help' to get a list of all options.")
2016-07-21 13:13:53 +02:00
2015-12-10 02:14:28 +01:00
if args.list_urls:
2016-07-14 14:25:56 +02:00
jobtype = job.UrlJob
jobtype.maxdepth = args.list_urls
2015-12-10 02:14:28 +01:00
else:
jobtype = args.jobtype or job.DownloadJob
2016-07-21 13:13:53 +02:00
2016-12-04 16:11:54 +01:00
urls = args.urls
if args.inputfile:
try:
if args.inputfile == "-":
if sys.stdin:
urls += parse_inputfile(sys.stdin, log)
else:
log.warning("input file: stdin is not readable")
2016-12-04 16:11:54 +01:00
else:
with open(args.inputfile, encoding="utf-8") as file:
urls += parse_inputfile(file, log)
2017-05-27 16:16:57 +02:00
except OSError as exc:
log.warning("input file: %s", exc)
2017-05-27 16:16:57 +02:00
# unsupported file logging handler
handler = output.setup_logging_handler(
"unsupportedfile", fmt="{message}")
if handler:
ulog = logging.getLogger("unsupported")
ulog.addHandler(handler)
ulog.propagate = False
job.Job.ulog = ulog
2016-12-04 16:11:54 +01:00
2019-11-23 23:50:16 +01:00
pformat = config.get(("output",), "progress", True)
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
urls = progress(urls, pformat)
retval = 0
2016-12-04 16:11:54 +01:00
for url in urls:
2016-07-14 14:57:42 +02:00
try:
log.debug("Starting %s for '%s'", jobtype.__name__, url)
if isinstance(url, util.ExtendedUrl):
2019-11-23 23:50:16 +01:00
for opts in url.gconfig:
config.set(*opts)
with config.apply(url.lconfig):
retval |= jobtype(url.value).run()
else:
retval |= jobtype(url).run()
2016-07-14 14:57:42 +02:00
except exception.NoExtractorError:
2017-03-11 01:47:57 +01:00
log.error("No suitable extractor found for '%s'", url)
retval |= 64
return retval
2017-02-25 23:53:31 +01:00
except KeyboardInterrupt:
2019-09-10 16:46:38 +02:00
sys.exit("\nKeyboardInterrupt")
2016-08-05 10:25:31 +02:00
except BrokenPipeError:
pass
except OSError as exc:
2016-08-05 10:25:31 +02:00
import errno
2017-08-13 20:35:44 +02:00
if exc.errno != errno.EPIPE:
2016-08-05 10:25:31 +02:00
raise
return 1