2017-02-20 22:02:49 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2019-01-16 17:49:29 +01:00
|
|
|
# Copyright 2017-2019 Mike Fährmann
|
2017-02-20 22:02:49 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
"""Utility functions and classes"""
|
2017-02-20 22:02:49 +01:00
|
|
|
|
2017-10-09 22:12:58 +02:00
|
|
|
import re
|
2017-03-28 13:12:44 +02:00
|
|
|
import os
|
2017-02-23 21:51:29 +01:00
|
|
|
import sys
|
2019-05-09 16:22:06 +02:00
|
|
|
import json
|
2019-06-19 23:16:32 +02:00
|
|
|
import time
|
2017-10-24 23:33:44 +02:00
|
|
|
import shutil
|
2017-06-16 21:01:40 +02:00
|
|
|
import string
|
2017-09-27 21:18:34 +02:00
|
|
|
import _string
|
2018-01-29 22:13:06 +01:00
|
|
|
import sqlite3
|
2017-10-03 22:38:48 +02:00
|
|
|
import datetime
|
2018-08-24 20:21:05 +02:00
|
|
|
import operator
|
2017-12-03 01:38:24 +01:00
|
|
|
import itertools
|
2017-06-16 21:01:40 +02:00
|
|
|
import urllib.parse
|
2019-07-14 22:37:28 +02:00
|
|
|
from email.utils import mktime_tz, parsedate_tz
|
2017-08-12 21:32:24 +02:00
|
|
|
from . import text, exception
|
2017-02-20 22:02:49 +01:00
|
|
|
|
|
|
|
|
2018-03-14 13:17:34 +01:00
|
|
|
def bencode(num, alphabet="0123456789"):
|
|
|
|
"""Encode an integer into a base-N encoded string"""
|
|
|
|
data = ""
|
|
|
|
base = len(alphabet)
|
|
|
|
while num:
|
|
|
|
num, remainder = divmod(num, base)
|
|
|
|
data = alphabet[remainder] + data
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2017-06-01 18:14:33 +02:00
|
|
|
def bdecode(data, alphabet="0123456789"):
|
|
|
|
"""Decode a base-N encoded string ( N = len(alphabet) )"""
|
|
|
|
num = 0
|
|
|
|
base = len(alphabet)
|
|
|
|
for c in data:
|
|
|
|
num *= base
|
|
|
|
num += alphabet.index(c)
|
|
|
|
return num
|
|
|
|
|
|
|
|
|
2017-12-03 01:38:24 +01:00
|
|
|
def advance(iterable, num):
|
|
|
|
""""Advance the iterable by 'num' steps"""
|
|
|
|
iterator = iter(iterable)
|
|
|
|
next(itertools.islice(iterator, num, num), None)
|
|
|
|
return iterator
|
|
|
|
|
|
|
|
|
2018-04-12 17:07:12 +02:00
|
|
|
def raises(obj):
|
|
|
|
"""Returns a function that raises 'obj' as exception"""
|
|
|
|
def wrap():
|
|
|
|
raise obj
|
|
|
|
return wrap
|
|
|
|
|
|
|
|
|
2017-08-12 20:07:27 +02:00
|
|
|
def combine_dict(a, b):
|
2018-10-08 20:28:54 +02:00
|
|
|
"""Recursively combine the contents of 'b' into 'a'"""
|
2017-08-12 20:07:27 +02:00
|
|
|
for key, value in b.items():
|
|
|
|
if key in a and isinstance(value, dict) and isinstance(a[key], dict):
|
|
|
|
combine_dict(a[key], value)
|
|
|
|
else:
|
|
|
|
a[key] = value
|
2017-08-13 14:31:22 +02:00
|
|
|
return a
|
2017-08-12 20:07:27 +02:00
|
|
|
|
|
|
|
|
2018-10-08 20:28:54 +02:00
|
|
|
def transform_dict(a, func):
|
|
|
|
"""Recursively apply 'func' to all values in 'a'"""
|
|
|
|
for key, value in a.items():
|
|
|
|
if isinstance(value, dict):
|
|
|
|
transform_dict(value, func)
|
|
|
|
else:
|
|
|
|
a[key] = func(value)
|
|
|
|
|
|
|
|
|
2019-02-14 11:15:19 +01:00
|
|
|
def number_to_string(value, numbers=(int, float)):
|
2018-10-08 20:28:54 +02:00
|
|
|
"""Convert numbers (int, float) to string; Return everything else as is."""
|
2019-02-14 11:15:19 +01:00
|
|
|
return str(value) if value.__class__ in numbers else value
|
2018-10-08 20:28:54 +02:00
|
|
|
|
|
|
|
|
2019-03-04 21:13:34 +01:00
|
|
|
def to_string(value):
|
|
|
|
"""str() with "better" defaults"""
|
|
|
|
if not value:
|
|
|
|
return ""
|
|
|
|
if value.__class__ is list:
|
|
|
|
try:
|
|
|
|
return ", ".join(value)
|
|
|
|
except Exception:
|
|
|
|
return ", ".join(map(str, value))
|
|
|
|
return str(value)
|
|
|
|
|
|
|
|
|
2019-05-09 16:22:06 +02:00
|
|
|
def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4):
|
|
|
|
"""Serialize 'obj' as JSON and write it to 'fp'"""
|
|
|
|
json.dump(
|
|
|
|
obj, fp,
|
|
|
|
ensure_ascii=ensure_ascii,
|
|
|
|
indent=indent,
|
|
|
|
default=str,
|
|
|
|
sort_keys=True,
|
|
|
|
)
|
|
|
|
fp.write("\n")
|
|
|
|
|
|
|
|
|
2017-10-26 00:04:28 +02:00
|
|
|
def expand_path(path):
|
|
|
|
"""Expand environment variables and tildes (~)"""
|
|
|
|
if not path:
|
|
|
|
return path
|
2017-12-21 21:56:24 +01:00
|
|
|
if not isinstance(path, str):
|
|
|
|
path = os.path.join(*path)
|
2017-10-26 00:04:28 +02:00
|
|
|
return os.path.expandvars(os.path.expanduser(path))
|
|
|
|
|
|
|
|
|
2017-08-08 19:22:04 +02:00
|
|
|
def code_to_language(code, default=None):
|
2017-03-28 13:12:44 +02:00
|
|
|
"""Map an ISO 639-1 language code to its actual name"""
|
2017-08-08 19:22:04 +02:00
|
|
|
return CODES.get((code or "").lower(), default)
|
2017-03-28 13:12:44 +02:00
|
|
|
|
|
|
|
|
2017-08-08 19:22:04 +02:00
|
|
|
def language_to_code(lang, default=None):
|
2017-03-28 13:12:44 +02:00
|
|
|
"""Map a language name to its ISO 639-1 code"""
|
2017-08-04 21:01:10 +08:00
|
|
|
if lang is None:
|
2017-08-08 19:22:04 +02:00
|
|
|
return default
|
2017-03-28 13:12:44 +02:00
|
|
|
lang = lang.capitalize()
|
2017-06-16 21:01:40 +02:00
|
|
|
for code, language in CODES.items():
|
2017-03-28 13:12:44 +02:00
|
|
|
if language == lang:
|
|
|
|
return code
|
|
|
|
return default
|
|
|
|
|
|
|
|
|
2017-06-16 21:01:40 +02:00
|
|
|
CODES = {
|
2017-03-28 13:12:44 +02:00
|
|
|
"ar": "Arabic",
|
2018-03-05 18:37:21 +01:00
|
|
|
"bg": "Bulgarian",
|
|
|
|
"ca": "Catalan",
|
2017-03-28 13:12:44 +02:00
|
|
|
"cs": "Czech",
|
|
|
|
"da": "Danish",
|
|
|
|
"de": "German",
|
|
|
|
"el": "Greek",
|
|
|
|
"en": "English",
|
|
|
|
"es": "Spanish",
|
|
|
|
"fi": "Finnish",
|
|
|
|
"fr": "French",
|
|
|
|
"he": "Hebrew",
|
|
|
|
"hu": "Hungarian",
|
|
|
|
"id": "Indonesian",
|
|
|
|
"it": "Italian",
|
|
|
|
"jp": "Japanese",
|
|
|
|
"ko": "Korean",
|
|
|
|
"ms": "Malay",
|
|
|
|
"nl": "Dutch",
|
|
|
|
"no": "Norwegian",
|
|
|
|
"pl": "Polish",
|
|
|
|
"pt": "Portuguese",
|
|
|
|
"ro": "Romanian",
|
|
|
|
"ru": "Russian",
|
|
|
|
"sv": "Swedish",
|
|
|
|
"th": "Thai",
|
|
|
|
"tr": "Turkish",
|
|
|
|
"vi": "Vietnamese",
|
|
|
|
"zh": "Chinese",
|
|
|
|
}
|
|
|
|
|
2018-01-14 18:47:22 +01:00
|
|
|
SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
|
2017-06-16 21:01:40 +02:00
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
|
2019-02-13 17:39:43 +01:00
|
|
|
class UniversalNone():
|
2019-02-14 11:15:19 +01:00
|
|
|
"""None-style object that supports more operations than None itself"""
|
|
|
|
__slots__ = ()
|
|
|
|
|
|
|
|
def __getattribute__(self, _):
|
2019-02-13 17:39:43 +01:00
|
|
|
return self
|
|
|
|
|
|
|
|
def __getitem__(self, _):
|
|
|
|
return self
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def __bool__():
|
|
|
|
return False
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def __str__():
|
|
|
|
return "None"
|
|
|
|
|
|
|
|
__repr__ = __str__
|
|
|
|
|
|
|
|
|
|
|
|
NONE = UniversalNone()
|
|
|
|
|
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
def build_predicate(predicates):
|
|
|
|
if not predicates:
|
|
|
|
return lambda url, kwds: True
|
|
|
|
elif len(predicates) == 1:
|
|
|
|
return predicates[0]
|
|
|
|
else:
|
|
|
|
return ChainPredicate(predicates)
|
|
|
|
|
|
|
|
|
2017-02-23 21:51:29 +01:00
|
|
|
class RangePredicate():
|
2017-09-06 17:08:50 +02:00
|
|
|
"""Predicate; True if the current index is in the given range"""
|
2018-10-07 21:34:25 +02:00
|
|
|
def __init__(self, rangespec):
|
|
|
|
self.ranges = self.optimize_range(self.parse_range(rangespec))
|
2017-02-23 21:51:29 +01:00
|
|
|
self.index = 0
|
2018-10-07 21:34:25 +02:00
|
|
|
|
2017-03-03 17:26:50 +01:00
|
|
|
if self.ranges:
|
|
|
|
self.lower, self.upper = self.ranges[0][0], self.ranges[-1][1]
|
|
|
|
else:
|
|
|
|
self.lower, self.upper = 0, 0
|
2017-02-23 21:51:29 +01:00
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
def __call__(self, url, kwds):
|
2017-02-23 21:51:29 +01:00
|
|
|
self.index += 1
|
|
|
|
|
2017-03-03 17:26:50 +01:00
|
|
|
if self.index > self.upper:
|
2017-02-23 21:51:29 +01:00
|
|
|
raise exception.StopExtraction()
|
|
|
|
|
|
|
|
for lower, upper in self.ranges:
|
|
|
|
if lower <= self.index <= upper:
|
|
|
|
return True
|
|
|
|
return False
|
2017-03-28 13:12:44 +02:00
|
|
|
|
2018-10-07 21:34:25 +02:00
|
|
|
@staticmethod
|
|
|
|
def parse_range(rangespec):
|
|
|
|
"""Parse an integer range string and return the resulting ranges
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)]
|
|
|
|
parse_range(" - 3 , 4- 4, 2-6") -> [(1,3), (4,4), (2,6)]
|
|
|
|
"""
|
|
|
|
ranges = []
|
|
|
|
|
|
|
|
for group in rangespec.split(","):
|
|
|
|
if not group:
|
|
|
|
continue
|
|
|
|
first, sep, last = group.partition("-")
|
|
|
|
if not sep:
|
|
|
|
beg = end = int(first)
|
|
|
|
else:
|
|
|
|
beg = int(first) if first.strip() else 1
|
|
|
|
end = int(last) if last.strip() else sys.maxsize
|
|
|
|
ranges.append((beg, end) if beg <= end else (end, beg))
|
|
|
|
|
|
|
|
return ranges
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def optimize_range(ranges):
|
|
|
|
"""Simplify/Combine a parsed list of ranges
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)]
|
|
|
|
optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
|
|
|
|
"""
|
|
|
|
if len(ranges) <= 1:
|
|
|
|
return ranges
|
|
|
|
|
|
|
|
ranges.sort()
|
|
|
|
riter = iter(ranges)
|
|
|
|
result = []
|
|
|
|
|
|
|
|
beg, end = next(riter)
|
|
|
|
for lower, upper in riter:
|
|
|
|
if lower > end+1:
|
|
|
|
result.append((beg, end))
|
|
|
|
beg, end = lower, upper
|
|
|
|
elif upper > end:
|
|
|
|
end = upper
|
|
|
|
result.append((beg, end))
|
|
|
|
return result
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
class UniquePredicate():
|
|
|
|
"""Predicate; True if given URL has not been encountered before"""
|
|
|
|
def __init__(self):
|
|
|
|
self.urls = set()
|
|
|
|
|
|
|
|
def __call__(self, url, kwds):
|
2018-02-20 18:14:27 +01:00
|
|
|
if url.startswith("text:"):
|
|
|
|
return True
|
2017-09-06 17:08:50 +02:00
|
|
|
if url not in self.urls:
|
|
|
|
self.urls.add(url)
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2017-09-08 17:52:00 +02:00
|
|
|
class FilterPredicate():
|
|
|
|
"""Predicate; True if evaluating the given expression returns True"""
|
2017-10-09 22:12:58 +02:00
|
|
|
globalsdict = {
|
2018-04-20 14:53:21 +02:00
|
|
|
"parse_int": text.parse_int,
|
2017-10-09 22:12:58 +02:00
|
|
|
"urlsplit": urllib.parse.urlsplit,
|
|
|
|
"datetime": datetime.datetime,
|
2018-04-12 17:07:12 +02:00
|
|
|
"abort": raises(exception.StopExtraction()),
|
2017-10-09 22:12:58 +02:00
|
|
|
"re": re,
|
|
|
|
}
|
2017-09-08 17:52:00 +02:00
|
|
|
|
2018-10-07 21:34:25 +02:00
|
|
|
def __init__(self, filterexpr, target="image"):
|
|
|
|
name = "<{} filter>".format(target)
|
|
|
|
self.codeobj = compile(filterexpr, name, "eval")
|
2017-09-08 17:52:00 +02:00
|
|
|
|
|
|
|
def __call__(self, url, kwds):
|
|
|
|
try:
|
|
|
|
return eval(self.codeobj, self.globalsdict, kwds)
|
2018-04-12 17:07:12 +02:00
|
|
|
except exception.GalleryDLException:
|
|
|
|
raise
|
2017-09-08 17:52:00 +02:00
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FilterError(exc)
|
|
|
|
|
|
|
|
|
2017-09-06 17:08:50 +02:00
|
|
|
class ChainPredicate():
|
|
|
|
"""Predicate; True if all of its predicates return True"""
|
|
|
|
def __init__(self, predicates):
|
|
|
|
self.predicates = predicates
|
|
|
|
|
|
|
|
def __call__(self, url, kwds):
|
|
|
|
for pred in self.predicates:
|
|
|
|
if not pred(url, kwds):
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2018-02-07 21:47:27 +01:00
|
|
|
class ExtendedUrl():
|
2018-02-15 21:15:33 +01:00
|
|
|
"""URL with attached config key-value pairs"""
|
|
|
|
def __init__(self, url, gconf, lconf):
|
|
|
|
self.value, self.gconfig, self.lconfig = url, gconf, lconf
|
2018-02-07 21:47:27 +01:00
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return self.value
|
|
|
|
|
|
|
|
|
2017-09-27 21:18:34 +02:00
|
|
|
class Formatter():
|
2018-08-24 20:21:05 +02:00
|
|
|
"""Custom, extended version of string.Formatter
|
2017-09-27 21:18:34 +02:00
|
|
|
|
|
|
|
This string formatter implementation is a mostly performance-optimized
|
|
|
|
variant of the original string.Formatter class. Unnecessary features have
|
|
|
|
been removed (positional arguments, unused argument check) and new
|
|
|
|
formatting options have been added.
|
|
|
|
|
|
|
|
Extra Conversions:
|
|
|
|
- "l": calls str.lower on the target value
|
|
|
|
- "u": calls str.upper
|
|
|
|
- "c": calls str.capitalize
|
|
|
|
- "C": calls string.capwords
|
2018-02-25 21:57:59 +01:00
|
|
|
- "U": calls urllib.parse.unquote
|
2019-06-23 22:41:03 +02:00
|
|
|
- "S": calls util.to_string()
|
2017-09-27 21:18:34 +02:00
|
|
|
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
|
|
|
|
|
|
|
|
Extra Format Specifiers:
|
|
|
|
- "?<before>/<after>/":
|
|
|
|
Adds <before> and <after> to the actual value if it evaluates to True.
|
2017-09-30 18:52:23 +02:00
|
|
|
Otherwise the whole replacement field becomes an empty string.
|
2017-09-27 21:18:34 +02:00
|
|
|
Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
|
|
|
|
-> "" (if "f" is None, 0, "")
|
2018-07-29 13:52:07 +02:00
|
|
|
|
|
|
|
- "L<maxlen>/<replacement>/":
|
|
|
|
Replaces the output with <replacement> if its length (in characters)
|
|
|
|
exceeds <maxlen>. Otherwise everything is left as is.
|
|
|
|
Example: {f:L5/too long/} -> "foo" (if "f" is "foo")
|
|
|
|
-> "too long" (if "f" is "foobar")
|
2019-01-16 17:49:29 +01:00
|
|
|
|
|
|
|
- "J<separator>/":
|
|
|
|
Joins elements of a list (or string) using <separator>
|
|
|
|
Example: {f:J - /} -> "a - b - c" (if "f" is ["a", "b", "c"])
|
2019-06-23 22:41:03 +02:00
|
|
|
|
|
|
|
- "R<old>/<new>/":
|
|
|
|
Replaces all occurrences of <old> with <new>
|
|
|
|
Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
|
2017-09-27 21:18:34 +02:00
|
|
|
"""
|
2019-03-04 21:13:34 +01:00
|
|
|
CONVERSIONS = {
|
2017-09-27 21:18:34 +02:00
|
|
|
"l": str.lower,
|
|
|
|
"u": str.upper,
|
|
|
|
"c": str.capitalize,
|
|
|
|
"C": string.capwords,
|
2018-02-25 21:57:59 +01:00
|
|
|
"U": urllib.parse.unquote,
|
2019-03-04 21:13:34 +01:00
|
|
|
"S": to_string,
|
2017-09-27 21:18:34 +02:00
|
|
|
"s": str,
|
|
|
|
"r": repr,
|
|
|
|
"a": ascii,
|
|
|
|
}
|
|
|
|
|
2018-08-24 20:21:05 +02:00
|
|
|
def __init__(self, format_string, default=None):
|
|
|
|
self.default = default
|
|
|
|
self.result = []
|
|
|
|
self.fields = []
|
2017-09-27 21:18:34 +02:00
|
|
|
|
|
|
|
for literal_text, field_name, format_spec, conversion in \
|
2017-10-06 15:47:06 +02:00
|
|
|
_string.formatter_parser(format_string):
|
2017-09-27 21:18:34 +02:00
|
|
|
if literal_text:
|
2018-08-24 20:21:05 +02:00
|
|
|
self.result.append(literal_text)
|
2017-09-27 21:18:34 +02:00
|
|
|
if field_name:
|
2018-08-24 20:21:05 +02:00
|
|
|
self.fields.append((
|
|
|
|
len(self.result),
|
2019-08-19 15:56:20 +02:00
|
|
|
self._field_access(field_name, format_spec, conversion),
|
2018-08-24 20:21:05 +02:00
|
|
|
))
|
|
|
|
self.result.append("")
|
2017-09-27 21:18:34 +02:00
|
|
|
|
2019-08-19 15:56:20 +02:00
|
|
|
if len(self.result) == 1:
|
|
|
|
if self.fields:
|
|
|
|
self.format_map = self.fields[0][1]
|
|
|
|
else:
|
|
|
|
self.format_map = lambda _: format_string
|
|
|
|
del self.result
|
|
|
|
del self.fields
|
|
|
|
|
2018-08-24 20:21:05 +02:00
|
|
|
def format_map(self, kwargs):
|
|
|
|
"""Apply 'kwargs' to the initial format_string and return its result"""
|
|
|
|
for index, func in self.fields:
|
|
|
|
self.result[index] = func(kwargs)
|
|
|
|
return "".join(self.result)
|
2017-09-27 21:18:34 +02:00
|
|
|
|
2018-08-24 20:21:05 +02:00
|
|
|
def _field_access(self, field_name, format_spec, conversion):
|
|
|
|
first, rest = _string.formatter_field_name_split(field_name)
|
2018-02-03 22:28:41 +01:00
|
|
|
|
2018-08-24 20:21:05 +02:00
|
|
|
funcs = []
|
|
|
|
for is_attr, key in rest:
|
2017-09-27 21:18:34 +02:00
|
|
|
if is_attr:
|
2018-08-24 20:21:05 +02:00
|
|
|
func = operator.attrgetter
|
|
|
|
elif ":" in key:
|
|
|
|
func = self._slicegetter
|
2017-09-27 21:18:34 +02:00
|
|
|
else:
|
2018-08-24 20:21:05 +02:00
|
|
|
func = operator.itemgetter
|
|
|
|
funcs.append(func(key))
|
|
|
|
|
|
|
|
if conversion:
|
2019-03-05 22:50:56 +01:00
|
|
|
funcs.append(self.CONVERSIONS[conversion])
|
2017-09-27 21:18:34 +02:00
|
|
|
|
2018-08-24 20:21:05 +02:00
|
|
|
if format_spec:
|
|
|
|
if format_spec[0] == "?":
|
|
|
|
func = self._format_optional
|
|
|
|
elif format_spec[0] == "L":
|
|
|
|
func = self._format_maxlen
|
2019-01-16 17:49:29 +01:00
|
|
|
elif format_spec[0] == "J":
|
|
|
|
func = self._format_join
|
2019-06-23 22:41:03 +02:00
|
|
|
elif format_spec[0] == "R":
|
|
|
|
func = self._format_replace
|
2018-08-24 20:21:05 +02:00
|
|
|
else:
|
|
|
|
func = self._format_default
|
|
|
|
fmt = func(format_spec)
|
|
|
|
else:
|
|
|
|
fmt = str
|
|
|
|
|
|
|
|
if funcs:
|
|
|
|
return self._apply(first, funcs, fmt)
|
|
|
|
return self._apply_simple(first, fmt)
|
|
|
|
|
|
|
|
def _apply_simple(self, key, fmt):
|
|
|
|
def wrap(obj):
|
|
|
|
if key in obj:
|
|
|
|
obj = obj[key]
|
|
|
|
else:
|
|
|
|
obj = self.default
|
|
|
|
return fmt(obj)
|
|
|
|
return wrap
|
|
|
|
|
|
|
|
def _apply(self, key, funcs, fmt):
|
|
|
|
def wrap(obj):
|
2018-12-22 13:54:14 +01:00
|
|
|
try:
|
2018-08-24 20:21:05 +02:00
|
|
|
obj = obj[key]
|
|
|
|
for func in funcs:
|
|
|
|
obj = func(obj)
|
2018-12-22 13:54:14 +01:00
|
|
|
except Exception:
|
2018-08-24 20:21:05 +02:00
|
|
|
obj = self.default
|
|
|
|
return fmt(obj)
|
|
|
|
return wrap
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _slicegetter(key):
|
|
|
|
start, _, stop = key.partition(":")
|
|
|
|
stop, _, step = stop.partition(":")
|
|
|
|
start = int(start) if start else None
|
|
|
|
stop = int(stop) if stop else None
|
|
|
|
step = int(step) if step else None
|
|
|
|
return operator.itemgetter(slice(start, stop, step))
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _format_optional(format_spec):
|
|
|
|
def wrap(obj):
|
|
|
|
if not obj:
|
|
|
|
return ""
|
|
|
|
return before + format(obj, format_spec) + after
|
|
|
|
before, after, format_spec = format_spec.split("/", 2)
|
|
|
|
before = before[1:]
|
|
|
|
return wrap
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _format_maxlen(format_spec):
|
|
|
|
def wrap(obj):
|
|
|
|
obj = format(obj, format_spec)
|
|
|
|
return obj if len(obj) <= maxlen else replacement
|
|
|
|
maxlen, replacement, format_spec = format_spec.split("/", 2)
|
|
|
|
maxlen = text.parse_int(maxlen[1:])
|
|
|
|
return wrap
|
|
|
|
|
2019-01-16 17:49:29 +01:00
|
|
|
@staticmethod
|
|
|
|
def _format_join(format_spec):
|
|
|
|
def wrap(obj):
|
|
|
|
obj = separator.join(obj)
|
|
|
|
return format(obj, format_spec)
|
|
|
|
separator, _, format_spec = format_spec.partition("/")
|
|
|
|
separator = separator[1:]
|
|
|
|
return wrap
|
|
|
|
|
2019-06-23 22:41:03 +02:00
|
|
|
@staticmethod
|
|
|
|
def _format_replace(format_spec):
|
|
|
|
def wrap(obj):
|
|
|
|
obj = obj.replace(old, new)
|
|
|
|
return format(obj, format_spec)
|
|
|
|
old, new, format_spec = format_spec.split("/", 2)
|
|
|
|
old = old[1:]
|
|
|
|
return wrap
|
|
|
|
|
2018-08-24 20:21:05 +02:00
|
|
|
@staticmethod
|
|
|
|
def _format_default(format_spec):
|
|
|
|
def wrap(obj):
|
|
|
|
return format(obj, format_spec)
|
|
|
|
return wrap
|
2017-09-27 21:18:34 +02:00
|
|
|
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
class PathFormat():
|
|
|
|
|
|
|
|
def __init__(self, extractor):
|
2019-08-19 15:56:20 +02:00
|
|
|
filename_fmt = extractor.config("filename", extractor.filename_fmt)
|
|
|
|
directory_fmt = extractor.config("directory", extractor.directory_fmt)
|
|
|
|
kwdefault = extractor.config("keywords-default")
|
2018-08-24 20:21:05 +02:00
|
|
|
|
|
|
|
try:
|
2019-08-19 15:56:20 +02:00
|
|
|
self.filename_formatter = Formatter(
|
|
|
|
filename_fmt, kwdefault).format_map
|
2018-08-24 20:21:05 +02:00
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FormatError(exc, "filename")
|
2017-10-06 15:47:06 +02:00
|
|
|
|
2019-08-19 15:56:20 +02:00
|
|
|
try:
|
|
|
|
self.directory_formatters = [
|
|
|
|
Formatter(dirfmt, kwdefault).format_map
|
|
|
|
for dirfmt in directory_fmt
|
|
|
|
]
|
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FormatError(exc, "directory")
|
|
|
|
|
2017-03-28 13:12:44 +02:00
|
|
|
self.directory = self.realdirectory = ""
|
2019-08-12 21:40:37 +02:00
|
|
|
self.filename = ""
|
|
|
|
self.extension = ""
|
2019-08-16 22:06:26 +02:00
|
|
|
self.prefix = ""
|
2019-08-12 21:40:37 +02:00
|
|
|
self.kwdict = {}
|
|
|
|
self.delete = False
|
2018-06-06 20:17:17 +02:00
|
|
|
self.path = self.realpath = self.temppath = ""
|
2017-03-28 13:12:44 +02:00
|
|
|
|
2019-08-20 00:25:13 +02:00
|
|
|
basedir = expand_path(
|
2017-12-21 21:56:24 +01:00
|
|
|
extractor.config("base-directory", (".", "gallery-dl")))
|
2019-08-20 00:25:13 +02:00
|
|
|
if os.altsep and os.altsep in basedir:
|
|
|
|
basedir = basedir.replace(os.altsep, os.sep)
|
|
|
|
if basedir[-1] != os.sep:
|
|
|
|
basedir += os.sep
|
|
|
|
self.basedirectory = basedir
|
2017-08-12 21:32:24 +02:00
|
|
|
|
2019-08-16 21:13:49 +02:00
|
|
|
restrict = extractor.config("path-restrict", "auto")
|
2019-07-23 17:36:07 +02:00
|
|
|
if restrict == "auto":
|
2019-08-16 21:13:49 +02:00
|
|
|
restrict = "\\\\|/<>:\"?*" if os.name == "nt" else "/"
|
2019-07-23 17:36:07 +02:00
|
|
|
elif restrict == "unix":
|
|
|
|
restrict = "/"
|
|
|
|
elif restrict == "windows":
|
2019-08-16 21:13:49 +02:00
|
|
|
restrict = "\\\\|/<>:\"?*"
|
|
|
|
|
|
|
|
remove = extractor.config("path-remove", "\x00-\x1f\x7f")
|
|
|
|
|
|
|
|
self.clean_segment = self._build_cleanfunc(restrict, "_")
|
|
|
|
self.clean_path = self._build_cleanfunc(remove, "")
|
2019-07-23 17:36:07 +02:00
|
|
|
|
|
|
|
@staticmethod
|
2019-08-16 21:13:49 +02:00
|
|
|
def _build_cleanfunc(chars, repl):
|
|
|
|
if not chars:
|
2019-07-23 17:36:07 +02:00
|
|
|
return lambda x: x
|
2019-08-16 21:13:49 +02:00
|
|
|
elif len(chars) == 1:
|
|
|
|
def func(x, c=chars, r=repl):
|
|
|
|
return x.replace(c, r)
|
2019-07-23 17:36:07 +02:00
|
|
|
else:
|
2019-08-16 21:13:49 +02:00
|
|
|
def func(x, sub=re.compile("[" + chars + "]").sub, r=repl):
|
|
|
|
return sub(r, x)
|
2019-07-23 17:36:07 +02:00
|
|
|
return func
|
|
|
|
|
2017-05-12 14:10:25 +02:00
|
|
|
def open(self, mode="wb"):
|
2017-10-24 23:33:44 +02:00
|
|
|
"""Open file and return a corresponding file object"""
|
2018-06-06 20:17:17 +02:00
|
|
|
return open(self.temppath, mode)
|
2017-03-28 13:12:44 +02:00
|
|
|
|
2018-02-12 16:56:45 +01:00
|
|
|
def exists(self, archive=None):
|
2018-02-13 23:45:30 +01:00
|
|
|
"""Return True if the file exists on disk or in 'archive'"""
|
2019-08-16 21:18:56 +02:00
|
|
|
if archive and self.kwdict in archive:
|
2019-07-15 16:39:03 +02:00
|
|
|
return self.fix_extension()
|
2019-08-12 21:40:37 +02:00
|
|
|
if self.extension and os.path.exists(self.realpath):
|
2019-08-08 18:34:31 +02:00
|
|
|
return self.check_file()
|
2017-03-28 13:12:44 +02:00
|
|
|
return False
|
|
|
|
|
2019-08-08 18:34:31 +02:00
|
|
|
@staticmethod
|
|
|
|
def check_file():
|
|
|
|
return True
|
|
|
|
|
|
|
|
def _enum_file(self):
|
|
|
|
num = 1
|
|
|
|
while True:
|
2019-08-16 22:06:26 +02:00
|
|
|
self.prefix = str(num) + "."
|
|
|
|
self.set_extension(self.extension, False)
|
2019-08-12 21:40:37 +02:00
|
|
|
if not os.path.exists(self.realpath):
|
2019-08-08 18:34:31 +02:00
|
|
|
return False
|
|
|
|
num += 1
|
|
|
|
|
2019-08-12 21:40:37 +02:00
|
|
|
def set_directory(self, kwdict):
|
2017-03-28 13:12:44 +02:00
|
|
|
"""Build directory path and create it if necessary"""
|
2019-08-12 21:40:37 +02:00
|
|
|
|
|
|
|
# Build path segments by applying 'kwdict' to directory format strings
|
2017-08-11 21:48:37 +02:00
|
|
|
try:
|
|
|
|
segments = [
|
2019-08-19 15:56:20 +02:00
|
|
|
self.clean_segment(format_map(kwdict).strip())
|
|
|
|
for format_map in self.directory_formatters
|
2017-08-11 21:48:37 +02:00
|
|
|
]
|
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FormatError(exc, "directory")
|
|
|
|
|
2019-08-12 21:40:37 +02:00
|
|
|
# Join path segements
|
2019-08-20 00:25:13 +02:00
|
|
|
sep = os.sep
|
|
|
|
directory = self.clean_path(self.basedirectory + sep.join(segments))
|
2018-03-22 10:24:59 +01:00
|
|
|
|
2019-08-20 00:25:13 +02:00
|
|
|
# Ensure directory ends with a path separator
|
|
|
|
if directory[-1] != sep:
|
|
|
|
directory += sep
|
2019-08-19 15:56:20 +02:00
|
|
|
self.directory = directory
|
2018-03-22 10:24:59 +01:00
|
|
|
|
2019-08-12 21:40:37 +02:00
|
|
|
# Enable longer-than-260-character paths on Windows
|
|
|
|
if os.name == "nt":
|
2019-08-20 00:25:13 +02:00
|
|
|
self.realdirectory = "\\\\?\\" + os.path.abspath(directory) + sep
|
2019-08-12 21:40:37 +02:00
|
|
|
else:
|
2019-08-19 15:56:20 +02:00
|
|
|
self.realdirectory = directory
|
2019-08-12 21:40:37 +02:00
|
|
|
|
|
|
|
# Create directory tree
|
2017-03-28 13:12:44 +02:00
|
|
|
os.makedirs(self.realdirectory, exist_ok=True)
|
|
|
|
|
2019-08-12 21:40:37 +02:00
|
|
|
def set_filename(self, kwdict):
|
|
|
|
"""Set general filename data"""
|
|
|
|
self.kwdict = kwdict
|
2019-08-16 22:06:26 +02:00
|
|
|
self.temppath = self.prefix = ""
|
2019-08-12 21:40:37 +02:00
|
|
|
self.extension = kwdict["extension"]
|
|
|
|
|
|
|
|
if self.extension:
|
2017-03-28 13:12:44 +02:00
|
|
|
self.build_path()
|
|
|
|
|
2017-10-24 12:53:03 +02:00
|
|
|
def set_extension(self, extension, real=True):
|
2019-08-12 21:40:37 +02:00
|
|
|
"""Set filename extension"""
|
|
|
|
if real:
|
|
|
|
self.extension = extension
|
2019-08-16 22:06:26 +02:00
|
|
|
self.kwdict["extension"] = self.prefix + extension
|
2017-03-28 13:12:44 +02:00
|
|
|
self.build_path()
|
|
|
|
|
2019-07-15 16:39:03 +02:00
|
|
|
def fix_extension(self, _=None):
|
2019-08-12 21:40:37 +02:00
|
|
|
"""Fix filenames without a given filename extension"""
|
|
|
|
if not self.extension:
|
|
|
|
self.set_extension("", False)
|
2019-07-15 16:39:03 +02:00
|
|
|
if self.path[-1] == ".":
|
|
|
|
self.path = self.path[:-1]
|
|
|
|
self.temppath = self.realpath = self.realpath[:-1]
|
|
|
|
return True
|
|
|
|
|
2017-12-21 21:56:24 +01:00
|
|
|
def build_path(self):
|
2019-08-12 21:40:37 +02:00
|
|
|
"""Use filename metadata and directory to build a full path"""
|
|
|
|
|
|
|
|
# Apply 'kwdict' to filename format string
|
2017-08-11 21:48:37 +02:00
|
|
|
try:
|
2019-08-20 00:25:13 +02:00
|
|
|
self.filename = filename = self.clean_path(self.clean_segment(
|
2019-08-19 15:56:20 +02:00
|
|
|
self.filename_formatter(self.kwdict)))
|
2017-08-11 21:48:37 +02:00
|
|
|
except Exception as exc:
|
|
|
|
raise exception.FormatError(exc, "filename")
|
|
|
|
|
2019-08-12 21:40:37 +02:00
|
|
|
# Combine directory and filename to full paths
|
2017-10-24 12:53:03 +02:00
|
|
|
self.path = self.directory + filename
|
|
|
|
self.realpath = self.realdirectory + filename
|
2018-06-06 20:17:17 +02:00
|
|
|
if not self.temppath:
|
|
|
|
self.temppath = self.realpath
|
2017-03-28 13:12:44 +02:00
|
|
|
|
2017-10-26 00:07:32 +02:00
|
|
|
def part_enable(self, part_directory=None):
|
|
|
|
"""Enable .part file usage"""
|
2019-08-12 21:40:37 +02:00
|
|
|
if self.extension:
|
2018-10-18 22:32:03 +02:00
|
|
|
self.temppath += ".part"
|
2017-10-24 23:33:44 +02:00
|
|
|
else:
|
|
|
|
self.set_extension("part", False)
|
2017-10-26 00:07:32 +02:00
|
|
|
if part_directory:
|
2018-06-06 20:17:17 +02:00
|
|
|
self.temppath = os.path.join(
|
2017-10-26 00:07:32 +02:00
|
|
|
part_directory,
|
2018-06-06 20:17:17 +02:00
|
|
|
os.path.basename(self.temppath),
|
2017-10-26 00:07:32 +02:00
|
|
|
)
|
2017-10-24 23:33:44 +02:00
|
|
|
|
|
|
|
def part_size(self):
|
2017-10-26 00:07:32 +02:00
|
|
|
"""Return size of .part file"""
|
2018-06-06 20:17:17 +02:00
|
|
|
try:
|
|
|
|
return os.stat(self.temppath).st_size
|
|
|
|
except OSError:
|
|
|
|
pass
|
2017-10-24 23:33:44 +02:00
|
|
|
return 0
|
|
|
|
|
2018-06-06 20:17:17 +02:00
|
|
|
def finalize(self):
|
|
|
|
"""Move tempfile to its target location"""
|
2018-06-08 17:39:02 +02:00
|
|
|
if self.delete:
|
2018-06-20 18:12:59 +02:00
|
|
|
self.delete = False
|
2018-06-08 17:39:02 +02:00
|
|
|
os.unlink(self.temppath)
|
|
|
|
return
|
|
|
|
|
2019-06-19 23:16:32 +02:00
|
|
|
if self.temppath != self.realpath:
|
2019-08-12 21:40:37 +02:00
|
|
|
# Move temp file to its actual location
|
2019-06-19 23:16:32 +02:00
|
|
|
try:
|
|
|
|
os.replace(self.temppath, self.realpath)
|
|
|
|
except OSError:
|
|
|
|
shutil.copyfile(self.temppath, self.realpath)
|
|
|
|
os.unlink(self.temppath)
|
2018-06-06 20:17:17 +02:00
|
|
|
|
2019-08-12 21:40:37 +02:00
|
|
|
if "_mtime" in self.kwdict:
|
|
|
|
# Set file modification time
|
|
|
|
mtime = self.kwdict["_mtime"]
|
2019-07-14 22:37:28 +02:00
|
|
|
if mtime:
|
|
|
|
try:
|
|
|
|
if isinstance(mtime, str):
|
|
|
|
mtime = mktime_tz(parsedate_tz(mtime))
|
|
|
|
os.utime(self.realpath, (time.time(), mtime))
|
|
|
|
except Exception:
|
|
|
|
pass
|
2017-10-24 23:33:44 +02:00
|
|
|
|
2017-06-16 21:01:40 +02:00
|
|
|
|
2018-01-29 22:13:06 +01:00
|
|
|
class DownloadArchive():
|
|
|
|
|
2018-02-13 23:45:30 +01:00
|
|
|
def __init__(self, path, extractor):
|
2018-01-29 22:13:06 +01:00
|
|
|
con = sqlite3.connect(path)
|
|
|
|
con.isolation_level = None
|
|
|
|
self.cursor = con.cursor()
|
|
|
|
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
|
|
|
|
"(entry PRIMARY KEY) WITHOUT ROWID")
|
2018-02-24 21:21:59 +01:00
|
|
|
self.keygen = (extractor.category + extractor.config(
|
|
|
|
"archive-format", extractor.archive_fmt)
|
|
|
|
).format_map
|
2018-01-29 22:13:06 +01:00
|
|
|
|
2019-08-16 21:18:56 +02:00
|
|
|
def __contains__(self, kwdict):
|
|
|
|
"""Return True if the item described by 'kwdict' exists in archive"""
|
2018-02-13 23:45:30 +01:00
|
|
|
key = self.keygen(kwdict)
|
2018-01-29 22:13:06 +01:00
|
|
|
self.cursor.execute(
|
2018-02-13 23:45:30 +01:00
|
|
|
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
|
2018-01-29 22:13:06 +01:00
|
|
|
return self.cursor.fetchone()
|
|
|
|
|
2018-02-13 23:45:30 +01:00
|
|
|
def add(self, kwdict):
|
|
|
|
"""Add item described by 'kwdict' to archive"""
|
|
|
|
key = self.keygen(kwdict)
|
2018-01-29 22:13:06 +01:00
|
|
|
self.cursor.execute(
|
2018-02-13 23:45:30 +01:00
|
|
|
"INSERT OR IGNORE INTO archive VALUES (?)", (key,))
|