trelby_export/src/spellcheck.py

217 lines
5.2 KiB
Python

import mypickle
import util
# words loaded from dict_en.dat.
gdict = set()
# key = util.getWordPrefix(word), value = set of words beginning with
# that prefix (only words in gdict)
prefixDict = {}
# load word dictionary. returns True on success or if it's already loaded,
# False on errors.
def loadDict(frame):
if gdict:
return True
s = util.loadMaybeCompressedFile("dict_en.dat", frame)
if not s:
return False
lines = s.splitlines()
chars = "abcdefghijklmnopqrstuvwxyz"
for ch1 in chars:
for ch2 in chars:
prefixDict[ch1 + ch2] = set()
gwp = util.getWordPrefix
for word in lines:
# theoretically, we should do util.lower(util.toInputStr(it)), but:
#
# -user's aren't supposed to modify the file
#
# -it takes 1.35 secs, compared to 0.56 secs if we don't, on an
# 1.33GHz Athlon
gdict.add(word)
if len(word) > 2:
prefixDict[gwp(word)].add(word)
return True
# dictionary, a list of known words that the user has specified.
class Dict:
cvars = None
def __init__(self):
if not self.__class__.cvars:
v = self.__class__.cvars = mypickle.Vars()
v.addList("wordsList", [], "Words",
mypickle.StrLatin1Var("", "", ""))
v.makeDicts()
self.__class__.cvars.setDefaults(self)
# we have wordsList that we use for saving/loading, and words,
# which we use during normal operation. it's possible we should
# introduce a mypickle.SetVar...
# key = word, lowercased, value = None
self.words = {}
# load from string 's'. does not throw any exceptions and silently
# ignores any errors.
def load(self, s):
self.cvars.load(self.cvars.makeVals(s), "", self)
self.words = {}
for w in self.wordsList:
self.words[w] = None
self.refresh()
# save to a string and return that.
def save(self):
self.wordsList = self.get()
return self.cvars.save("", self)
# fix up invalid values.
def refresh(self):
ww = {}
for w in list(self.words.keys()):
w = self.cleanWord(w)
if w:
ww[w] = None
self.words = ww
# returns True if word is known
def isKnown(self, word):
return word in self.words
# add word
def add(self, word):
word = self.cleanWord(word)
if word:
self.words[word] = None
# set words from a list
def set(self, words):
self.words = {}
for w in words:
self.add(w)
# get a sorted list of all the words.
def get(self):
keys = list(self.words.keys())
keys.sort()
return keys
# clean up word in all possible ways and return it, or an empty string
# if nothing remains.
def cleanWord(self, word):
word = util.splitToWords(util.lower(util.toInputStr(word)))
if len(word) == 0:
return ""
return word[0]
# spell check a script
class SpellChecker:
def __init__(self, sp, gScDict):
self.sp = sp
# user's global dictionary (Dict)
self.gScDict = gScDict
# key = word found in character names, value = None
self.cnames = {}
for it in sp.getCharacterNames():
for w in util.splitToWords(it):
self.cnames[w] = None
self.word = None
self.line = self.sp.line
# we can't use the current column, because if the cursor is in the
# middle of a word, we flag the partial word as misspelled.
self.col = 0
# find next possibly misspelled word and store its location. returns
# True if such a word found.
def findNext(self):
line = self.line
col = self.col
# clear these so there's no chance of them left pointing to
# something, we return False, and someone tries to access them
# anyhow.
self.word = None
self.line = 0
self.col = 0
while 1:
word, line, col = self.sp.getWord(line, col)
if not word:
return False
if not self.isKnown(word):
self.word = word
self.line = line
self.col = col
return True
col += len(word)
# return True if word is a known word.
def isKnown(self, word):
word = util.lower(word)
return word in gdict or \
word in self.cnames or \
self.sp.scDict.isKnown(word) or \
self.gScDict.isKnown(word) or \
word.isdigit()
# Calculates the Levenshtein distance between a and b.
def lev(a, b):
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n, m)) space
a, b = b, a
n, m = m, n
current = list(range(n + 1))
for i in range(1, m + 1):
previous, current = current, [i] + [0] * m
for j in range(1, n + 1):
add, delete = previous[j] + 1, current[j - 1] + 1
change = previous[j - 1]
if a[j - 1] != b[i - 1]:
change += 1
current[j] = min(add, delete, change)
return current[n]