2017-02-20 22:02:49 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# Copyright 2017 Mike Fährmann
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
"""Utility functions and classes"""
|
2017-02-20 22:02:49 +01:00
|
|
|
|
2017-10-09 22:12:58 +02:00
|
|
|
import re
|
2017-03-28 13:12:44 +02:00
|
|
|
import os
|
2017-02-23 21:51:29 +01:00
|
|
|
import sys
|
2017-06-16 21:01:40 +02:00
|
|
|
import hmac
|
|
|
|
import time
|
|
|
|
import base64
|
|
|
|
import random
|
|
|
|
import string
|
2017-09-27 21:18:34 +02:00
|
|
|
import _string
|
2017-06-16 21:01:40 +02:00
|
|
|
import hashlib
|
2017-10-03 22:38:48 +02:00
|
|
|
import datetime
|
2017-06-16 21:01:40 +02:00
|
|
|
import urllib.parse
|
2017-08-12 21:32:24 +02:00
|
|
|
from . import text, exception
|
2017-02-20 22:02:49 +01:00
|
|
|
|
|
|
|
|
2017-02-23 21:51:29 +01:00
|
|
|
def parse_range(rangespec):
|
2017-03-03 17:26:50 +01:00
|
|
|
"""Parse an integer range string and return the resulting ranges
|
2017-02-20 22:02:49 +01:00
|
|
|
|
|
|
|
Examples
|
2017-03-03 17:26:50 +01:00
|
|
|
parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)]
|
|
|
|
parse_range(" - 3 , 4- 4, 2-6") -> [(1,3), (4,4), (2,6)]
|
2017-02-20 22:02:49 +01:00
|
|
|
"""
|
2017-02-23 21:51:29 +01:00
|
|
|
ranges = []
|
2017-03-03 17:26:50 +01:00
|
|
|
|
2017-02-20 22:02:49 +01:00
|
|
|
for group in rangespec.split(","):
|
|
|
|
parts = group.split("-", maxsplit=1)
|
|
|
|
try:
|
|
|
|
if len(parts) == 1:
|
2017-02-23 21:51:29 +01:00
|
|
|
beg = int(parts[0])
|
|
|
|
end = beg
|
2017-02-20 22:02:49 +01:00
|
|
|
else:
|
|
|
|
beg = int(parts[0]) if parts[0].strip() else 1
|
2017-02-23 21:51:29 +01:00
|
|
|
end = int(parts[1]) if parts[1].strip() else sys.maxsize
|
2017-03-03 17:26:50 +01:00
|
|
|
ranges.append((beg, end) if beg <= end else (end, beg))
|
2017-02-20 22:02:49 +01:00
|
|
|
except ValueError:
|
|
|
|
pass
|
2017-03-03 17:26:50 +01:00
|
|
|
|
|
|
|
return ranges
|
|
|
|
|
|
|
|
|
|
|
|
def optimize_range(ranges):
|
|
|
|
"""Simplify/Combine a parsed list of ranges
|
|
|
|
|
|
|
|
Examples
|
|
|
|
optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)]
|
2017-06-01 18:14:33 +02:00
|
|
|
optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
|
2017-03-03 17:26:50 +01:00
|
|
|
"""
|
|
|
|
if len(ranges) <= 1:
|
|
|
|
return ranges
|
|
|
|
|
|
|
|
ranges.sort()
|
|
|
|
riter = iter(ranges)
|
|
|
|
result = []
|
|
|
|
|
|
|
|
beg, end = next(riter)
|
|
|
|
for lower, upper in riter:
|
|
|
|
if lower > end+1:
|
|
|
|
result.append((beg, end))
|
|
|
|
beg, end = lower, upper
|
|
|
|
elif upper > end:
|
|
|
|
end = upper
|
|
|
|
result.append((beg, end))
|
|
|
|
return result
|
2017-02-23 21:51:29 +01:00
|
|
|
|
|
|
|
|
2017-06-01 18:14:33 +02:00
|
|
|
def bdecode(data, alphabet="0123456789"):
|
|
|
|
"""Decode a base-N encoded string ( N = len(alphabet) )"""
|
|
|
|
num = 0
|
|
|
|
base = len(alphabet)
|
|
|
|
for c in data:
|
|
|
|
num *= base
|
|
|
|
num += alphabet.index(c)
|
|
|
|
return num
|
|
|
|
|
|
|
|
|
2017-08-12 20:07:27 +02:00
|
|
|
def combine_dict(a, b):
|
|
|
|
"""Recursively combine the contents of b into a"""
|
|
|
|
for key, value in b.items():
|
|
|
|
if key in a and isinstance(value, dict) and isinstance(a[key], dict):
|
|
|
|
combine_dict(a[key], value)
|
|
|
|
else:
|
|
|
|
a[key] = value
|
2017-08-13 14:31:22 +02:00
|
|
|
return a
|
2017-08-12 20:07:27 +02:00
|
|
|
|
|
|
|
|
2017-09-24 15:59:25 +02:00
|
|
|
def safe_int(value, default=0):
|
|
|
|
"""Safely convert value to integer"""
|
|
|
|
if value is None or value == "":
|
|
|
|
return default
|
|
|
|
try:
|
|
|
|
return int(value)
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
return default
|
|
|
|
|
|
|
|
|
2017-08-08 19:22:04 +02:00
|
|
|
def code_to_language(code, default=None):
|
2017-03-28 13:12:44 +02:00
|
|
|
"""Map an ISO 639-1 language code to its actual name"""
|
2017-08-08 19:22:04 +02:00
|
|
|
return CODES.get((code or "").lower(), default)
|
2017-03-28 13:12:44 +02:00
|
|
|
|
|
|
|
|
2017-08-08 19:22:04 +02:00
|
|
|
def language_to_code(lang, default=None):
|
2017-03-28 13:12:44 +02:00
|
|
|
"""Map a language name to its ISO 639-1 code"""
|
2017-08-04 21:01:10 +08:00
|
|
|
if lang is None:
|
2017-08-08 19:22:04 +02:00
|
|
|
return default
|
2017-03-28 13:12:44 +02:00
|
|
|
lang = lang.capitalize()
|
2017-06-16 21:01:40 +02:00
|
|
|
for code, language in CODES.items():
|
2017-03-28 13:12:44 +02:00
|
|
|
if language == lang:
|
|
|
|
return code
|
|
|
|
return default
|
|
|
|
|
|
|
|
|
2017-06-16 21:01:40 +02:00
|
|
|
CODES = {
|
2017-03-28 13:12:44 +02:00
|
|
|
"ar": "Arabic",
|
|
|
|
"cs": "Czech",
|
|
|
|
"da": "Danish",
|
|
|
|
"de": "German",
|
|
|
|
"el": "Greek",
|
|
|
|
"en": "English",
|
|
|
|
"es": "Spanish",
|
|
|
|
"fi": "Finnish",
|
|
|
|
"fr": "French",
|
|
|
|
"he": "Hebrew",
|
|
|
|
"hu": "Hungarian",
|
|
|
|
"id": "Indonesian",
|
|
|
|
"it": "Italian",
|
|
|
|
"jp": "Japanese",
|
|
|
|
"ko": "Korean",
|
|
|
|
"ms": "Malay",
|
|
|
|
"nl": "Dutch",
|
|
|
|
"no": "Norwegian",
|
|
|
|
"pl": "Polish",
|
|
|
|
"pt": "Portuguese",
|
|
|
|
"ro": "Romanian",
|
|
|
|
"ru": "Russian",
|
|
|
|
"sv": "Swedish",
|
|
|
|
"th": "Thai",
|
|
|
|
"tr": "Turkish",
|
|
|
|
"vi": "Vietnamese",
|
|
|
|
"zh": "Chinese",
|
|
|
|
}
|
|
|
|
|
2017-06-16 21:01:40 +02:00
|
|
|
SPECIAL_EXTRACTORS = ("oauth", "recursive", "test")
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
def build_predicate(predicates):
|
|
|
|
if not predicates:
|
|
|
|
return lambda url, kwds: True
|
|
|
|
elif len(predicates) == 1:
|
|
|
|
return predicates[0]
|
|
|
|
else:
|
|
|
|
return ChainPredicate(predicates)
|
|
|
|
|
|
|
|
|
2017-02-23 21:51:29 +01:00
|
|
|
class RangePredicate():
|
2017-09-06 17:08:50 +02:00
|
|
|
"""Predicate; True if the current index is in the given range"""
|
2017-09-08 17:52:00 +02:00
|
|
|
def __init__(self, ranges):
|
|
|
|
self.ranges = ranges
|
2017-02-23 21:51:29 +01:00
|
|
|
self.index = 0
|
2017-03-03 17:26:50 +01:00
|
|
|
if self.ranges:
|
|
|
|
self.lower, self.upper = self.ranges[0][0], self.ranges[-1][1]
|
|
|
|
else:
|
|
|
|
self.lower, self.upper = 0, 0
|
2017-02-23 21:51:29 +01:00
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
def __call__(self, url, kwds):
|
2017-02-23 21:51:29 +01:00
|
|
|
self.index += 1
|
|
|
|
|
2017-03-03 17:26:50 +01:00
|
|
|
if self.index > self.upper:
|
2017-02-23 21:51:29 +01:00
|
|
|
raise exception.StopExtraction()
|
|
|
|
|
|
|
|
for lower, upper in self.ranges:
|
|
|
|
if lower <= self.index <= upper:
|
|
|
|
return True
|
|
|
|
return False
|
2017-03-28 13:12:44 +02:00
|
|
|
|
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
class UniquePredicate():
|
|
|
|
"""Predicate; True if given URL has not been encountered before"""
|
|
|
|
def __init__(self):
|
|
|
|
self.urls = set()
|
|
|
|
|
|
|
|
def __call__(self, url, kwds):
|
|
|
|
if url not in self.urls:
|
|
|
|
self.urls.add(url)
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2017-09-08 17:52:00 +02:00
|
|
|
class FilterPredicate():
|
|
|
|
"""Predicate; True if evaluating the given expression returns True"""
|
2017-10-09 22:12:58 +02:00
|
|
|
globalsdict = {
|
|
|
|
"safe_int": safe_int,
|
|
|
|
"urlsplit": urllib.parse.urlsplit,
|
|
|
|
"datetime": datetime.datetime,
|
|
|
|
"re": re,
|
|
|
|
}
|
2017-09-08 17:52:00 +02:00
|
|
|
|
|
|
|
def __init__(self, codeobj):
|
|
|
|
self.codeobj = codeobj
|
|
|
|
|
|
|
|
def __call__(self, url, kwds):
|
|
|
|
try:
|
|
|
|
return eval(self.codeobj, self.globalsdict, kwds)
|
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FilterError(exc)
|
|
|
|
|
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
class ChainPredicate():
|
|
|
|
"""Predicate; True if all of its predicates return True"""
|
|
|
|
def __init__(self, predicates):
|
|
|
|
self.predicates = predicates
|
|
|
|
|
|
|
|
def __call__(self, url, kwds):
|
|
|
|
for pred in self.predicates:
|
|
|
|
if not pred(url, kwds):
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2017-09-27 21:18:34 +02:00
|
|
|
class Formatter():
|
|
|
|
"""Custom, trimmed-down version of string.Formatter
|
|
|
|
|
|
|
|
This string formatter implementation is a mostly performance-optimized
|
|
|
|
variant of the original string.Formatter class. Unnecessary features have
|
|
|
|
been removed (positional arguments, unused argument check) and new
|
|
|
|
formatting options have been added.
|
|
|
|
|
|
|
|
Extra Conversions:
|
|
|
|
- "l": calls str.lower on the target value
|
|
|
|
- "u": calls str.upper
|
|
|
|
- "c": calls str.capitalize
|
|
|
|
- "C": calls string.capwords
|
|
|
|
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
|
|
|
|
|
|
|
|
Extra Format Specifiers:
|
|
|
|
- "?<before>/<after>/":
|
|
|
|
Adds <before> and <after> to the actual value if it evaluates to True.
|
2017-09-30 18:52:23 +02:00
|
|
|
Otherwise the whole replacement field becomes an empty string.
|
2017-09-27 21:18:34 +02:00
|
|
|
Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
|
|
|
|
-> "" (if "f" is None, 0, "")
|
|
|
|
"""
|
|
|
|
conversions = {
|
|
|
|
"l": str.lower,
|
|
|
|
"u": str.upper,
|
|
|
|
"c": str.capitalize,
|
|
|
|
"C": string.capwords,
|
|
|
|
"s": str,
|
|
|
|
"r": repr,
|
|
|
|
"a": ascii,
|
|
|
|
}
|
|
|
|
|
2017-10-06 15:47:06 +02:00
|
|
|
def vformat(self, format_string, kwargs):
|
2017-09-27 21:18:34 +02:00
|
|
|
"""Apply 'kwargs' to the initial format_string and return its result"""
|
|
|
|
result = []
|
|
|
|
append = result.append
|
|
|
|
|
|
|
|
for literal_text, field_name, format_spec, conversion in \
|
2017-10-06 15:47:06 +02:00
|
|
|
_string.formatter_parser(format_string):
|
2017-09-27 21:18:34 +02:00
|
|
|
|
|
|
|
if literal_text:
|
|
|
|
append(literal_text)
|
|
|
|
|
|
|
|
if field_name:
|
|
|
|
obj = self.get_field(field_name, kwargs)
|
|
|
|
if conversion:
|
|
|
|
obj = self.conversions[conversion](obj)
|
|
|
|
if format_spec:
|
|
|
|
format_spec = format_spec.format_map(kwargs)
|
|
|
|
obj = self.format_field(obj, format_spec)
|
|
|
|
else:
|
|
|
|
obj = str(obj)
|
|
|
|
append(obj)
|
|
|
|
|
|
|
|
return "".join(result)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def format_field(value, format_spec):
|
|
|
|
"""Format 'value' according to 'format_spec'"""
|
|
|
|
if format_spec[0] == "?":
|
|
|
|
if not value:
|
|
|
|
return ""
|
|
|
|
before, after, format_spec = format_spec.split("/", 2)
|
|
|
|
return before[1:] + format(value, format_spec) + after
|
|
|
|
return format(value, format_spec)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_field(field_name, kwargs):
|
|
|
|
"""Return value called 'field_name' from 'kwargs'"""
|
|
|
|
first, rest = _string.formatter_field_name_split(field_name)
|
|
|
|
|
|
|
|
obj = kwargs[first]
|
|
|
|
for is_attr, i in rest:
|
|
|
|
if is_attr:
|
|
|
|
obj = getattr(obj, i)
|
|
|
|
else:
|
|
|
|
obj = obj[i]
|
|
|
|
|
|
|
|
return obj
|
|
|
|
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
class PathFormat():
|
|
|
|
|
|
|
|
def __init__(self, extractor):
|
2017-05-03 15:17:08 +02:00
|
|
|
self.filename_fmt = extractor.config(
|
|
|
|
"filename", extractor.filename_fmt)
|
|
|
|
self.directory_fmt = extractor.config(
|
|
|
|
"directory", extractor.directory_fmt)
|
2017-10-06 15:47:06 +02:00
|
|
|
self.formatter = Formatter()
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
self.has_extension = False
|
|
|
|
self.keywords = {}
|
|
|
|
self.directory = self.realdirectory = ""
|
|
|
|
self.path = self.realpath = ""
|
|
|
|
|
2017-08-12 21:32:24 +02:00
|
|
|
bdir = extractor.config("base-directory", (".", "gallery-dl"))
|
|
|
|
if not isinstance(bdir, str):
|
|
|
|
bdir = os.path.join(*bdir)
|
|
|
|
self.basedirectory = os.path.expanduser(os.path.expandvars(bdir))
|
|
|
|
|
2017-05-03 15:17:08 +02:00
|
|
|
skipmode = extractor.config("skip", True)
|
|
|
|
if skipmode == "abort":
|
|
|
|
self.exists = self._exists_abort
|
2017-05-05 15:49:58 +02:00
|
|
|
elif skipmode == "exit":
|
|
|
|
self.exists = self._exists_exit
|
2017-05-03 15:17:08 +02:00
|
|
|
elif not skipmode:
|
2017-05-05 15:49:58 +02:00
|
|
|
self.exists = lambda: False
|
2017-05-03 15:17:08 +02:00
|
|
|
|
2017-05-12 14:10:25 +02:00
|
|
|
def open(self, mode="wb"):
|
2017-05-03 15:17:08 +02:00
|
|
|
"""Open file to 'realpath' and return a corresponding file object"""
|
2017-05-12 14:10:25 +02:00
|
|
|
return open(self.realpath, mode)
|
2017-03-28 13:12:44 +02:00
|
|
|
|
|
|
|
def exists(self):
|
2017-05-03 15:17:08 +02:00
|
|
|
"""Return True if 'path' is complete and refers to an existing path"""
|
2017-03-28 13:12:44 +02:00
|
|
|
if self.has_extension:
|
|
|
|
return os.path.exists(self.realpath)
|
|
|
|
return False
|
|
|
|
|
|
|
|
def set_directory(self, keywords):
|
|
|
|
"""Build directory path and create it if necessary"""
|
2017-08-11 21:48:37 +02:00
|
|
|
try:
|
|
|
|
segments = [
|
2017-09-27 21:18:34 +02:00
|
|
|
text.clean_path(
|
2017-10-06 15:47:06 +02:00
|
|
|
self.formatter.vformat(segment, keywords).strip())
|
2017-08-11 21:48:37 +02:00
|
|
|
for segment in self.directory_fmt
|
|
|
|
]
|
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FormatError(exc, "directory")
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
self.directory = os.path.join(
|
2017-08-12 21:32:24 +02:00
|
|
|
self.basedirectory,
|
2017-03-28 13:12:44 +02:00
|
|
|
*segments
|
|
|
|
)
|
|
|
|
self.realdirectory = self.adjust_path(self.directory)
|
|
|
|
os.makedirs(self.realdirectory, exist_ok=True)
|
|
|
|
|
|
|
|
def set_keywords(self, keywords):
|
|
|
|
"""Set filename keywords"""
|
|
|
|
self.keywords = keywords
|
|
|
|
self.has_extension = bool(keywords.get("extension"))
|
|
|
|
if self.has_extension:
|
|
|
|
self.build_path()
|
|
|
|
|
|
|
|
def set_extension(self, extension):
|
|
|
|
"""Set the 'extension' keyword"""
|
|
|
|
self.has_extension = True
|
|
|
|
self.keywords["extension"] = extension
|
|
|
|
self.build_path()
|
|
|
|
|
|
|
|
def build_path(self, sep=os.path.sep):
|
|
|
|
"""Use filename-keywords and directory to build a full path"""
|
2017-08-11 21:48:37 +02:00
|
|
|
try:
|
|
|
|
filename = text.clean_path(
|
2017-10-06 15:47:06 +02:00
|
|
|
self.formatter.vformat(self.filename_fmt, self.keywords))
|
2017-08-11 21:48:37 +02:00
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FormatError(exc, "filename")
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
self.path = self.directory + sep + filename
|
|
|
|
self.realpath = self.realdirectory + sep + filename
|
|
|
|
|
2017-05-03 15:17:08 +02:00
|
|
|
def _exists_abort(self):
|
|
|
|
if self.has_extension and os.path.exists(self.realpath):
|
|
|
|
raise exception.StopExtraction()
|
|
|
|
return False
|
|
|
|
|
2017-05-05 15:49:58 +02:00
|
|
|
def _exists_exit(self):
|
|
|
|
if self.has_extension and os.path.exists(self.realpath):
|
|
|
|
exit()
|
2017-05-03 15:17:08 +02:00
|
|
|
return False
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
@staticmethod
|
|
|
|
def adjust_path(path):
|
|
|
|
"""Enable longer-than-260-character paths on windows"""
|
|
|
|
return "\\\\?\\" + os.path.abspath(path) if os.name == "nt" else path
|
2017-06-16 21:01:40 +02:00
|
|
|
|
|
|
|
|
|
|
|
class OAuthSession():
|
|
|
|
"""Minimal wrapper for requests.session objects to support OAuth 1.0"""
|
|
|
|
def __init__(self, session, consumer_key, consumer_secret,
|
|
|
|
token=None, token_secret=None):
|
|
|
|
self.session = session
|
|
|
|
self.consumer_secret = consumer_secret
|
|
|
|
self.token_secret = token_secret or ""
|
2017-07-17 10:33:36 +02:00
|
|
|
self.params = {}
|
2017-06-16 21:01:40 +02:00
|
|
|
self.params["oauth_consumer_key"] = consumer_key
|
|
|
|
self.params["oauth_token"] = token
|
|
|
|
self.params["oauth_signature_method"] = "HMAC-SHA1"
|
|
|
|
self.params["oauth_version"] = "1.0"
|
|
|
|
|
|
|
|
def get(self, url, params):
|
|
|
|
params.update(self.params)
|
|
|
|
params["oauth_nonce"] = self.nonce(16)
|
|
|
|
params["oauth_timestamp"] = int(time.time())
|
|
|
|
params["oauth_signature"] = self.signature(url, params)
|
|
|
|
return self.session.get(url, params=params)
|
|
|
|
|
|
|
|
def signature(self, url, params):
|
|
|
|
"""Generate 'oauth_signature' value"""
|
|
|
|
query = urllib.parse.urlencode(sorted(params.items()))
|
|
|
|
message = self.concat("GET", url, query).encode()
|
|
|
|
key = self.concat(self.consumer_secret, self.token_secret).encode()
|
|
|
|
signature = hmac.new(key, message, hashlib.sha1).digest()
|
|
|
|
return base64.b64encode(signature).decode()
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def concat(*args):
|
|
|
|
return "&".join(urllib.parse.quote(item, "") for item in args)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def nonce(N, alphabet=string.ascii_letters):
|
|
|
|
return "".join(random.choice(alphabet) for _ in range(N))
|