add 'restrict-filenames' option (#348)
This commit is contained in:
parent
60cf40380a
commit
b1bea8aaeb
@ -108,6 +108,24 @@ Description Directory path used as the base for all download destinations.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.*.restrict-filenames
|
||||
------------------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"auto"``
|
||||
Example ``"/!? ()[]{}"``
|
||||
Description Characters to replace with underscores (``_``) when generating
|
||||
directory and file names.
|
||||
|
||||
Special values:
|
||||
|
||||
* ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
|
||||
depending on the local operating system
|
||||
* ``"unix"``: ``"/"``
|
||||
* ``"windows"``: ``"<>:\"\\|/?*"``
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.*.skip
|
||||
----------------
|
||||
=========== =====
|
||||
|
@ -9,6 +9,7 @@
|
||||
"skip": true,
|
||||
"sleep": 0,
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
|
||||
"restrict-filenames": "auto",
|
||||
|
||||
"artstation":
|
||||
{
|
||||
|
@ -83,22 +83,6 @@ def nameext_from_url(url, data=None):
|
||||
return data
|
||||
|
||||
|
||||
def clean_path_windows(path):
|
||||
"""Remove illegal characters from a path-segment (Windows)"""
|
||||
try:
|
||||
return re.sub(r'[<>:"\\/|?*]', "_", path)
|
||||
except TypeError:
|
||||
return ""
|
||||
|
||||
|
||||
def clean_path_posix(path):
|
||||
"""Remove illegal characters from a path-segment (Posix)"""
|
||||
try:
|
||||
return path.replace("/", "_")
|
||||
except AttributeError:
|
||||
return ""
|
||||
|
||||
|
||||
def extract(txt, begin, end, pos=0):
|
||||
"""Extract the text between 'begin' and 'end' from 'txt'
|
||||
|
||||
@ -266,12 +250,6 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"):
|
||||
return date_string
|
||||
|
||||
|
||||
if os.name == "nt":
|
||||
clean_path = clean_path_windows
|
||||
else:
|
||||
clean_path = clean_path_posix
|
||||
|
||||
|
||||
urljoin = urllib.parse.urljoin
|
||||
|
||||
quote = urllib.parse.quote
|
||||
|
@ -535,6 +535,27 @@ class PathFormat():
|
||||
if os.altsep and os.altsep in self.basedirectory:
|
||||
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
|
||||
|
||||
restrict = extractor.config("restrict-filenames", "auto")
|
||||
if restrict == "auto":
|
||||
restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
|
||||
elif restrict == "unix":
|
||||
restrict = "/"
|
||||
elif restrict == "windows":
|
||||
restrict = "<>:\"\\/|?*"
|
||||
self.clean_path = self._build_cleanfunc(restrict)
|
||||
|
||||
@staticmethod
|
||||
def _build_cleanfunc(repl):
|
||||
if not repl:
|
||||
return lambda x: x
|
||||
elif len(repl) == 1:
|
||||
def func(x, r=repl):
|
||||
return x.replace(r, "_")
|
||||
else:
|
||||
def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
|
||||
return sub("_", x)
|
||||
return func
|
||||
|
||||
def open(self, mode="wb"):
|
||||
"""Open file and return a corresponding file object"""
|
||||
return open(self.temppath, mode)
|
||||
@ -551,7 +572,7 @@ class PathFormat():
|
||||
"""Build directory path and create it if necessary"""
|
||||
try:
|
||||
segments = [
|
||||
text.clean_path(
|
||||
self.clean_path(
|
||||
Formatter(segment, self.kwdefault)
|
||||
.format_map(keywords).strip())
|
||||
for segment in self.directory_fmt
|
||||
@ -597,7 +618,7 @@ class PathFormat():
|
||||
def build_path(self):
|
||||
"""Use filename-keywords and directory to build a full path"""
|
||||
try:
|
||||
self.filename = text.clean_path(
|
||||
self.filename = self.clean_path(
|
||||
self.formatter.format_map(self.keywords))
|
||||
except Exception as exc:
|
||||
raise exception.FormatError(exc, "filename")
|
||||
|
@ -139,26 +139,6 @@ class TestText(unittest.TestCase):
|
||||
for value in INVALID:
|
||||
self.assertEqual(f(value), empty)
|
||||
|
||||
def test_clean_path_windows(self, f=text.clean_path_windows):
|
||||
self.assertEqual(f(""), "")
|
||||
self.assertEqual(f("foo"), "foo")
|
||||
self.assertEqual(f("foo/bar"), "foo_bar")
|
||||
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar")
|
||||
|
||||
# invalid arguments
|
||||
for value in INVALID:
|
||||
self.assertEqual(f(value), "")
|
||||
|
||||
def test_clean_path_posix(self, f=text.clean_path_posix):
|
||||
self.assertEqual(f(""), "")
|
||||
self.assertEqual(f("foo"), "foo")
|
||||
self.assertEqual(f("foo/bar"), "foo_bar")
|
||||
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar")
|
||||
|
||||
# invalid arguments
|
||||
for value in INVALID:
|
||||
self.assertEqual(f(value), "")
|
||||
|
||||
def test_extract(self, f=text.extract):
|
||||
txt = "<a><b>"
|
||||
self.assertEqual(f(txt, "<", ">"), ("a" , 3))
|
||||
|
Loading…
x
Reference in New Issue
Block a user