trelby_export/src/undo.py

324 lines
9.9 KiB
Python

import screenplay
import zlib
# Which command uses which undo object:
#
# command type
# ------- ------
#
# removeElementTypes FullCopy
# addChar SinglePara (possibly merged)
# charmap
# namesDlg
# spellCheck SinglePara
# findAndReplace SinglePara
# NewElement ManyElems(1, 2)
# Tab:
# (end of elem) ManyElems(1, 2)
# (middle of elem) ManyElems(1, 1)
# TabPrev ManyElems(1, 1)
# insertForcedLineBreak ManyElems(1, 1)
# deleteForward:
# (not end of elem) ManyElems(1, 1) (possibly merged)
# (end of elem) ManyElems(2, 1)
# deleteBackward:
# (not start of elem) ManyElems(1, 1) (possibly merged)
# (start of elem) ManyElems(2, 1)
# convertTypeTo ManyElems(N, N)
# cut AnyDifference
# paste AnyDifference
# extremely rough estimate for the base memory usage of a single undo
# object, WITHOUT counting the actual textual differences stored inside
# it. so this figure accounts for the Python object overhead, member
# variable overhead, memory allocation overhead, etc.
#
# this figure does not need to be very accurate.
BASE_MEMORY_USAGE = 1500
# possible command types. only used for possibly merging consecutive
# edits.
(CMD_ADD_CHAR,
CMD_ADD_CHAR_SPACE,
CMD_DEL_FORWARD,
CMD_DEL_BACKWARD,
CMD_MISC) = list(range(5))
# convert a list of Screenplay.Line objects into an unspecified, but
# compact, form of storage. storage2lines will convert this back to the
# original form.
#
# the return type is a tuple: (numberOfLines, ...). the number and type of
# elements after the first is of no concern to the caller.
#
# implementation notes:
#
# tuple[1]: bool; True if tuple[2] is zlib-compressed
#
# tuple[2]: string; the line objects converted to their string
# representation and joined by the "\n" character
#
def lines2storage(lines):
if not lines:
return (0,)
lines = [str(ln) for ln in lines]
linesStr = "\n".join(lines)
# instead of having an arbitrary cutoff figure ("compress if < X
# bytes"), always compress, but only use the compressed version if
# it's shorter than the non-compressed one.
linesStrCompressed = zlib.compress(linesStr.encode(), 6)
if len(linesStrCompressed) < len(linesStr):
return (len(lines), True, linesStrCompressed)
else:
return (len(lines), False, linesStr.encode())
# see lines2storage.
def storage2lines(storage):
if storage[0] == 0:
return []
if storage[1]:
linesStr = zlib.decompress(storage[2]).decode()
else:
linesStr = storage[2].decode()
return [screenplay.Line.fromStr(s) for s in linesStr.split("\n")]
# how much memory is used by the given storage object
def memoryUsed(storage):
# 16 is a rough estimate for the first two tuple members' memory usage
if storage[0] == 0:
return 16
return 16 + len(storage[2])
# abstract base class for storing undo history. concrete subclasses
# implement undo/redo for specific actions taken on a screenplay.
class Base:
def __init__(self, sp, cmdType):
# cursor position before the action
self.startPos = sp.cursorAsMark()
# type of action; one of the CMD_ values
self.cmdType = cmdType
# prev/next undo objects in the history
self.prev = None
self.next = None
# set cursor position after the action
def setEndPos(self, sp):
self.endPos = sp.cursorAsMark()
def getType(self):
return self.cmdType
# rough estimate of how much memory is used by this undo object. can
# be overridden by subclasses that need something different.
def memoryUsed(self):
return (BASE_MEMORY_USAGE + memoryUsed(self.linesBefore) +
memoryUsed(self.linesAfter))
# default implementation for undo. can be overridden by subclasses
# that need something different.
def undo(self, sp):
sp.line, sp.column = self.startPos.line, self.startPos.column
sp.lines[self.elemStartLine : self.elemStartLine + self.linesAfter[0]] = \
storage2lines(self.linesBefore)
# default implementation for redo. can be overridden by subclasses
# that need something different.
def redo(self, sp):
sp.line, sp.column = self.endPos.line, self.endPos.column
sp.lines[self.elemStartLine : self.elemStartLine + self.linesBefore[0]] = \
storage2lines(self.linesAfter)
# stores a full copy of the screenplay before/after the action. used by
# actions that modify the screenplay globally.
#
# we store the line data as compressed text, not as a list of Line
# objects, because it takes much less memory to do so. figures from a
# 32-bit machine (a 64-bit machine wastes even more space storing Line
# objects) from speedTest for a 120-page screenplay (Casablanca):
#
# -Line objects: 1,737 KB, 0.113s
# -text, not compressed: 267 KB, 0.076s
# -text, zlib fastest(1): 127 KB, 0.090s
# -text, zlib medium(6): 109 KB, 0.115s
# -text, zlib best(9): 107 KB, 0.126s
# -text, bz2 best(9): 88 KB, 0.147s
class FullCopy(Base):
def __init__(self, sp):
Base.__init__(self, sp, CMD_MISC)
self.elemStartLine = 0
self.linesBefore = lines2storage(sp.lines)
# called after editing action is over to snapshot the "after" state
def setAfter(self, sp):
self.linesAfter = lines2storage(sp.lines)
self.setEndPos(sp)
# stores a single modified paragraph
class SinglePara(Base):
# line is any line belonging to the modified paragraph. there is no
# requirement for the cursor to be in this paragraph.
def __init__(self, sp, cmdType, line):
Base.__init__(self, sp, cmdType)
self.elemStartLine = sp.getParaFirstIndexFromLine(line)
endLine = sp.getParaLastIndexFromLine(line)
self.linesBefore = lines2storage(
sp.lines[self.elemStartLine : endLine + 1])
def setAfter(self, sp):
# if all we did was modify a single paragraph, the index of its
# starting line can not have changed, because that would mean one of
# the paragraphs above us had changed as well, which is a logical
# impossibility. so we can find the dimensions of the modified
# paragraph by starting at the first line.
endLine = sp.getParaLastIndexFromLine(self.elemStartLine)
self.linesAfter = lines2storage(
sp.lines[self.elemStartLine : endLine + 1])
self.setEndPos(sp)
# stores N modified consecutive elements
class ManyElems(Base):
# line is any line belonging to the first modified element. there is
# no requirement for the cursor to be in this paragraph.
# nrOfElemsStart is how many elements there are before the edit
# operaton and nrOfElemsEnd is how many there are after. so an edit
# operation splitting an element would pass in (1, 2) while an edit
# operation combining two elements would pass in (2, 1).
def __init__(self, sp, cmdType, line, nrOfElemsStart, nrOfElemsEnd):
Base.__init__(self, sp, cmdType)
self.nrOfElemsEnd = nrOfElemsEnd
self.elemStartLine, endLine = sp.getElemIndexesFromLine(line)
# find last line of last element to include in linesBefore
for i in range(nrOfElemsStart - 1):
endLine = sp.getElemLastIndexFromLine(endLine + 1)
self.linesBefore = lines2storage(
sp.lines[self.elemStartLine : endLine + 1])
def setAfter(self, sp):
endLine = sp.getElemLastIndexFromLine(self.elemStartLine)
for i in range(self.nrOfElemsEnd - 1):
endLine = sp.getElemLastIndexFromLine(endLine + 1)
self.linesAfter = lines2storage(
sp.lines[self.elemStartLine : endLine + 1])
self.setEndPos(sp)
# stores a single block of changed lines by diffing before/after states of
# a screenplay
class AnyDifference(Base):
def __init__(self, sp):
Base.__init__(self, sp, CMD_MISC)
self.linesBefore = [screenplay.Line(ln.lb, ln.lt, ln.text) for ln in sp.lines]
def setAfter(self, sp):
self.a, self.b, self.x, self.y = mySequenceMatcher(self.linesBefore, sp.lines)
self.removed = lines2storage(self.linesBefore[self.a : self.b])
self.inserted = lines2storage(sp.lines[self.x : self.y])
self.setEndPos(sp)
del self.linesBefore
def memoryUsed(self):
return (BASE_MEMORY_USAGE + memoryUsed(self.removed) +
memoryUsed(self.inserted))
def undo(self, sp):
sp.line, sp.column = self.startPos.line, self.startPos.column
sp.lines[self.x : self.y] = storage2lines(self.removed)
def redo(self, sp):
sp.line, sp.column = self.endPos.line, self.endPos.column
sp.lines[self.a : self.b] = storage2lines(self.inserted)
# Our own implementation of difflib.SequenceMatcher, since the actual one
# is too slow for our custom needs.
#
# l1, l2 = lists to diff. List elements must have __ne__ defined.
#
# Return a, b, x, y such that l1[a:b] could be replaced with l2[x:y] to
# convert l1 into l2.
def mySequenceMatcher(l1, l2):
len1 = len(l1)
len2 = len(l2)
if len1 >= len2:
bigger = l1
smaller = l2
bigLen = len1
smallLen = len2
l1Big = True
else:
bigger = l2
smaller = l1
bigLen = len2
smallLen = len1
l1Big = False
i = 0
a = b = 0
m1found = False
while a < smallLen:
if not m1found and (bigger[a] != smaller[a]):
b = a
m1found = True
break
a += 1
if not m1found:
a = b = smallLen
num = smallLen - a + 1
i = 1
c = bigLen
d = smallLen
while (i <= num) and (i <= smallLen):
c = bigLen - i + 1
d = smallLen - i + 1
if bigger[-i] != smaller[-i]:
break
i += 1
if not l1Big:
a, c, b, d = a, d, b, c
return a, c, b, d