re-implement and improve filename formatter
A format string now gets parsed only once instead of re-parsing it each time it is applied to a set of data. The initial parsing causes directory path creation to be at about 2x slower than before, since each format string there is used only once, but building a filename, the more common operation, is at least 2x faster. The "directory slowness" cancels at about 5 filenames and everything above that is significantly faster.
This commit is contained in:
parent
34b556922d
commit
590c0b3ad5
@ -16,6 +16,7 @@ import string
|
||||
import _string
|
||||
import sqlite3
|
||||
import datetime
|
||||
import operator
|
||||
import itertools
|
||||
import urllib.parse
|
||||
from . import text, exception
|
||||
@ -263,7 +264,7 @@ class ExtendedUrl():
|
||||
|
||||
|
||||
class Formatter():
|
||||
"""Custom, trimmed-down version of string.Formatter
|
||||
"""Custom, extended version of string.Formatter
|
||||
|
||||
This string formatter implementation is a mostly performance-optimized
|
||||
variant of the original string.Formatter class. Unnecessary features have
|
||||
@ -302,67 +303,112 @@ class Formatter():
|
||||
"a": ascii,
|
||||
}
|
||||
|
||||
def __init__(self, default=None):
|
||||
self.kwdefault = default
|
||||
|
||||
def vformat(self, format_string, kwargs):
|
||||
"""Apply 'kwargs' to the initial format_string and return its result"""
|
||||
result = []
|
||||
append = result.append
|
||||
def __init__(self, format_string, default=None):
|
||||
self.default = default
|
||||
self.result = []
|
||||
self.fields = []
|
||||
|
||||
for literal_text, field_name, format_spec, conversion in \
|
||||
_string.formatter_parser(format_string):
|
||||
|
||||
if literal_text:
|
||||
append(literal_text)
|
||||
|
||||
self.result.append(literal_text)
|
||||
if field_name:
|
||||
obj = self.get_field(field_name, kwargs)
|
||||
if conversion:
|
||||
obj = self.conversions[conversion](obj)
|
||||
if format_spec:
|
||||
format_spec = format_spec.format_map(kwargs)
|
||||
obj = self.format_field(obj, format_spec)
|
||||
else:
|
||||
obj = str(obj)
|
||||
append(obj)
|
||||
self.fields.append((
|
||||
len(self.result),
|
||||
self._field_access(field_name, format_spec, conversion)
|
||||
))
|
||||
self.result.append("")
|
||||
|
||||
return "".join(result)
|
||||
def format_map(self, kwargs):
|
||||
"""Apply 'kwargs' to the initial format_string and return its result"""
|
||||
for index, func in self.fields:
|
||||
self.result[index] = func(kwargs)
|
||||
return "".join(self.result)
|
||||
|
||||
@staticmethod
|
||||
def format_field(value, format_spec):
|
||||
"""Format 'value' according to 'format_spec'"""
|
||||
if format_spec[0] == "?":
|
||||
if not value:
|
||||
return ""
|
||||
before, after, format_spec = format_spec.split("/", 2)
|
||||
return before[1:] + format(value, format_spec) + after
|
||||
if format_spec[0] == "L":
|
||||
maxlen, replacement, format_spec = format_spec.split("/", 2)
|
||||
maxlen = text.parse_int(maxlen[1:])
|
||||
value = format(value, format_spec)
|
||||
return value if len(value) <= maxlen else replacement
|
||||
return format(value, format_spec)
|
||||
|
||||
def get_field(self, field_name, kwargs):
|
||||
"""Return value with key 'field_name' from 'kwargs'"""
|
||||
def _field_access(self, field_name, format_spec, conversion):
|
||||
first, rest = _string.formatter_field_name_split(field_name)
|
||||
|
||||
if first not in kwargs:
|
||||
return self.kwdefault
|
||||
|
||||
obj = kwargs[first]
|
||||
for is_attr, i in rest:
|
||||
funcs = []
|
||||
for is_attr, key in rest:
|
||||
if is_attr:
|
||||
obj = getattr(obj, i)
|
||||
elif ":" in i:
|
||||
start, _, stop = i.partition(":")
|
||||
start = int(start) if start else 0
|
||||
return obj[start:int(stop)] if stop else obj[start:]
|
||||
func = operator.attrgetter
|
||||
elif ":" in key:
|
||||
func = self._slicegetter
|
||||
else:
|
||||
obj = obj[i]
|
||||
func = operator.itemgetter
|
||||
funcs.append(func(key))
|
||||
|
||||
return obj
|
||||
if conversion:
|
||||
funcs.append(self.conversions[conversion])
|
||||
|
||||
if format_spec:
|
||||
if format_spec[0] == "?":
|
||||
func = self._format_optional
|
||||
elif format_spec[0] == "L":
|
||||
func = self._format_maxlen
|
||||
else:
|
||||
func = self._format_default
|
||||
fmt = func(format_spec)
|
||||
else:
|
||||
fmt = str
|
||||
|
||||
if funcs:
|
||||
return self._apply(first, funcs, fmt)
|
||||
return self._apply_simple(first, fmt)
|
||||
|
||||
def _apply_simple(self, key, fmt):
|
||||
def wrap(obj):
|
||||
if key in obj:
|
||||
obj = obj[key]
|
||||
else:
|
||||
obj = self.default
|
||||
return fmt(obj)
|
||||
return wrap
|
||||
|
||||
def _apply(self, key, funcs, fmt):
|
||||
def wrap(obj):
|
||||
if key in obj:
|
||||
obj = obj[key]
|
||||
for func in funcs:
|
||||
obj = func(obj)
|
||||
else:
|
||||
obj = self.default
|
||||
return fmt(obj)
|
||||
return wrap
|
||||
|
||||
@staticmethod
|
||||
def _slicegetter(key):
|
||||
start, _, stop = key.partition(":")
|
||||
stop, _, step = stop.partition(":")
|
||||
start = int(start) if start else None
|
||||
stop = int(stop) if stop else None
|
||||
step = int(step) if step else None
|
||||
return operator.itemgetter(slice(start, stop, step))
|
||||
|
||||
@staticmethod
|
||||
def _format_optional(format_spec):
|
||||
def wrap(obj):
|
||||
if not obj:
|
||||
return ""
|
||||
return before + format(obj, format_spec) + after
|
||||
before, after, format_spec = format_spec.split("/", 2)
|
||||
before = before[1:]
|
||||
return wrap
|
||||
|
||||
@staticmethod
|
||||
def _format_maxlen(format_spec):
|
||||
def wrap(obj):
|
||||
obj = format(obj, format_spec)
|
||||
return obj if len(obj) <= maxlen else replacement
|
||||
maxlen, replacement, format_spec = format_spec.split("/", 2)
|
||||
maxlen = text.parse_int(maxlen[1:])
|
||||
return wrap
|
||||
|
||||
@staticmethod
|
||||
def _format_default(format_spec):
|
||||
def wrap(obj):
|
||||
return format(obj, format_spec)
|
||||
return wrap
|
||||
|
||||
|
||||
class PathFormat():
|
||||
@ -372,7 +418,12 @@ class PathFormat():
|
||||
"filename", extractor.filename_fmt)
|
||||
self.directory_fmt = extractor.config(
|
||||
"directory", extractor.directory_fmt)
|
||||
self.formatter = Formatter(extractor.config("keywords-default"))
|
||||
self.kwdefault = extractor.config("keywords-default")
|
||||
|
||||
try:
|
||||
self.formatter = Formatter(self.filename_fmt, self.kwdefault)
|
||||
except Exception as exc:
|
||||
raise exception.FormatError(exc, "filename")
|
||||
|
||||
self.delete = False
|
||||
self.has_extension = False
|
||||
@ -419,7 +470,8 @@ class PathFormat():
|
||||
try:
|
||||
segments = [
|
||||
text.clean_path(
|
||||
self.formatter.vformat(segment, keywords).strip())
|
||||
Formatter(segment, self.kwdefault)
|
||||
.format_map(keywords).strip())
|
||||
for segment in self.directory_fmt
|
||||
]
|
||||
except Exception as exc:
|
||||
@ -456,7 +508,7 @@ class PathFormat():
|
||||
"""Use filename-keywords and directory to build a full path"""
|
||||
try:
|
||||
self.filename = text.clean_path(
|
||||
self.formatter.vformat(self.filename_fmt, self.keywords))
|
||||
self.formatter.format_map(self.keywords))
|
||||
except Exception as exc:
|
||||
raise exception.FormatError(exc, "filename")
|
||||
|
||||
|
@ -209,6 +209,13 @@ class TestFormatter(unittest.TestCase):
|
||||
self._run_test("{a[:5]}" , v[:5])
|
||||
self._run_test("{a[:50]}", v[:50])
|
||||
self._run_test("{a[:]}" , v)
|
||||
self._run_test("{a[1:10:2]}" , v[1:10:2])
|
||||
self._run_test("{a[-10:-1:2]}", v[-10:-1:2])
|
||||
self._run_test("{a[5::2]}" , v[5::2])
|
||||
self._run_test("{a[50::2]}", v[50::2])
|
||||
self._run_test("{a[:5:2]}" , v[:5:2])
|
||||
self._run_test("{a[:50:2]}", v[:50:2])
|
||||
self._run_test("{a[::]}" , v)
|
||||
|
||||
def test_maxlen(self):
|
||||
v = self.kwdict["a"]
|
||||
@ -219,8 +226,8 @@ class TestFormatter(unittest.TestCase):
|
||||
self._run_test("{a:Lab/foo/}", "foo")
|
||||
|
||||
def _run_test(self, format_string, result, default=None):
|
||||
formatter = util.Formatter(default)
|
||||
output = formatter.vformat(format_string, self.kwdict)
|
||||
formatter = util.Formatter(format_string, default)
|
||||
output = formatter.format_map(self.kwdict)
|
||||
self.assertEqual(output, result, format_string)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user