add 'restrict-filenames' option (#348)

This commit is contained in:
Mike Fährmann 2019-07-23 17:36:07 +02:00
parent 60cf40380a
commit b1bea8aaeb
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 42 additions and 44 deletions

View File

@ -108,6 +108,24 @@ Description Directory path used as the base for all download destinations.
=========== =====
extractor.*.restrict-filenames
------------------------------
=========== =====
Type ``string``
Default ``"auto"``
Example ``"/!? ()[]{}"``
Description Characters to replace with underscores (``_``) when generating
directory and file names.
Special values:
* ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
depending on the local operating system
* ``"unix"``: ``"/"``
* ``"windows"``: ``"<>:\"\\|/?*"``
=========== =====
extractor.*.skip
----------------
=========== =====

View File

@ -9,6 +9,7 @@
"skip": true,
"sleep": 0,
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
"restrict-filenames": "auto",
"artstation":
{

View File

@ -83,22 +83,6 @@ def nameext_from_url(url, data=None):
return data
def clean_path_windows(path):
"""Remove illegal characters from a path-segment (Windows)"""
try:
return re.sub(r'[<>:"\\/|?*]', "_", path)
except TypeError:
return ""
def clean_path_posix(path):
"""Remove illegal characters from a path-segment (Posix)"""
try:
return path.replace("/", "_")
except AttributeError:
return ""
def extract(txt, begin, end, pos=0):
"""Extract the text between 'begin' and 'end' from 'txt'
@ -266,12 +250,6 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"):
return date_string
if os.name == "nt":
clean_path = clean_path_windows
else:
clean_path = clean_path_posix
urljoin = urllib.parse.urljoin
quote = urllib.parse.quote

View File

@ -535,6 +535,27 @@ class PathFormat():
if os.altsep and os.altsep in self.basedirectory:
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
restrict = extractor.config("restrict-filenames", "auto")
if restrict == "auto":
restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
elif restrict == "unix":
restrict = "/"
elif restrict == "windows":
restrict = "<>:\"\\/|?*"
self.clean_path = self._build_cleanfunc(restrict)
@staticmethod
def _build_cleanfunc(repl):
if not repl:
return lambda x: x
elif len(repl) == 1:
def func(x, r=repl):
return x.replace(r, "_")
else:
def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
return sub("_", x)
return func
def open(self, mode="wb"):
"""Open file and return a corresponding file object"""
return open(self.temppath, mode)
@ -551,7 +572,7 @@ class PathFormat():
"""Build directory path and create it if necessary"""
try:
segments = [
text.clean_path(
self.clean_path(
Formatter(segment, self.kwdefault)
.format_map(keywords).strip())
for segment in self.directory_fmt
@ -597,7 +618,7 @@ class PathFormat():
def build_path(self):
"""Use filename-keywords and directory to build a full path"""
try:
self.filename = text.clean_path(
self.filename = self.clean_path(
self.formatter.format_map(self.keywords))
except Exception as exc:
raise exception.FormatError(exc, "filename")

View File

@ -139,26 +139,6 @@ class TestText(unittest.TestCase):
for value in INVALID:
self.assertEqual(f(value), empty)
def test_clean_path_windows(self, f=text.clean_path_windows):
self.assertEqual(f(""), "")
self.assertEqual(f("foo"), "foo")
self.assertEqual(f("foo/bar"), "foo_bar")
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar")
# invalid arguments
for value in INVALID:
self.assertEqual(f(value), "")
def test_clean_path_posix(self, f=text.clean_path_posix):
self.assertEqual(f(""), "")
self.assertEqual(f("foo"), "foo")
self.assertEqual(f("foo/bar"), "foo_bar")
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar")
# invalid arguments
for value in INVALID:
self.assertEqual(f(value), "")
def test_extract(self, f=text.extract):
txt = "<a><b>"
self.assertEqual(f(txt, "<", ">"), ("a" , 3))