247 lines
10 KiB
Python
247 lines
10 KiB
Python
# -*- coding: utf8 -*-
|
||
###
|
||
# Copyright (c) 2010, Valentin Lorentz
|
||
# All rights reserved.
|
||
#
|
||
# Redistribution and use in source and binary forms, with or without
|
||
# modification, are permitted provided that the following conditions are met:
|
||
#
|
||
# * Redistributions of source code must retain the above copyright notice,
|
||
# this list of conditions, and the following disclaimer.
|
||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||
# this list of conditions, and the following disclaimer in the
|
||
# documentation and/or other materials provided with the distribution.
|
||
# * Neither the name of the author of this software nor the name of
|
||
# contributors to this software may be used to endorse or promote products
|
||
# derived from this software without specific prior written consent.
|
||
#
|
||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
# POSSIBILITY OF SUCH DAMAGE.
|
||
|
||
###
|
||
|
||
import re
|
||
import supybot.world as world
|
||
import supybot.ircmsgs as ircmsgs
|
||
import supybot.utils as utils
|
||
from supybot.commands import *
|
||
import supybot.plugins as plugins
|
||
import supybot.ircutils as ircutils
|
||
import supybot.callbacks as callbacks
|
||
|
||
class SpellChecker:
|
||
def __init__(self, text, level):
|
||
# 0 : pas de filtrage ;
|
||
# 1 : filtre le langage SMS
|
||
# 2 : filtre les erreurs de pluriel ;
|
||
# 3 : filtre les fautes de conjugaison courantes ;
|
||
# 4 : filtre les fautes d'orthographe courantes ;
|
||
# 5 : filtre les abbréviations ("t'as" au lieu de "tu as")
|
||
self._text = text
|
||
self._errors = []
|
||
if level >= 1:
|
||
self._checking = 'SMS'
|
||
self.checkSMS()
|
||
if level >= 2:
|
||
self._checking = 'pluriel'
|
||
self.checkPlural()
|
||
if level >= 3:
|
||
self._checking = 'conjugaison'
|
||
self.checkConjugaison()
|
||
if level >= 4:
|
||
self._checking = 'orthographe'
|
||
self.checkSpelling()
|
||
if level >= 5:
|
||
self._checking = 'abbréviation'
|
||
self.checkAbbreviation()
|
||
if level >= 6:
|
||
self._checking = 'typographie'
|
||
self.checkTypographic()
|
||
if level >= 7:
|
||
self._checking = 'lol'
|
||
self.checkLol()
|
||
|
||
def _raise(self, message):
|
||
self._errors.append('[%s] %s' % (self._checking, message))
|
||
|
||
def _detect(self, mode, correct, mask, displayedMask=None, wizard=' '):
|
||
if displayedMask is None:
|
||
displayedMask = mask
|
||
raise_ = False
|
||
text = re.sub('[a-zA-Z0-9]+://[^ ]+', '', self._text)
|
||
nickRemover = re.match('[^ ]*: (?P<text>.*)', text)
|
||
if nickRemover is not None:
|
||
text = nickRemover.group('text')
|
||
text = '%s%s%s' % (wizard, text, wizard)
|
||
AntislashDoubleYou = '[^a-zA-Z0-9éèàùâêûôîäëüïöç’\']'
|
||
if mode == 'single' and re.match('.*%s%s%s.*' % (AntislashDoubleYou,
|
||
mask,
|
||
AntislashDoubleYou),
|
||
text, re.IGNORECASE) is not None:
|
||
raise_ = True
|
||
elif mode == 'regexp' and re.match('.*%s.*' % mask, text):
|
||
raise_ = True
|
||
|
||
if raise_:
|
||
if self._checking == 'conjugaison' or \
|
||
self._checking == 'typographie':
|
||
self._raise(correct)
|
||
else:
|
||
if correct.__class__ == list:
|
||
correct = '« %s »' % ' » , ou « '.join(correct)
|
||
else:
|
||
correct = '« %s »' % correct
|
||
|
||
if displayedMask.__class__ == list:
|
||
displayedMask = '« %s »' % ' » ou « '.join(displayedMask)
|
||
else:
|
||
displayedMask = '« %s »' % displayedMask
|
||
self._raise('On ne dit pas %s mais %s' %
|
||
(displayedMask, correct))
|
||
|
||
def checkSMS(self):
|
||
bad = {
|
||
't': 't\'es',
|
||
'ki': 'qui',
|
||
'koi': 'quoi',
|
||
'tqvu': 't\'as vu',
|
||
'tt': 'tout',
|
||
'ct': 'c\'était',
|
||
'v': 'vais',
|
||
'twa': 'toi',
|
||
'toa': 'toi',
|
||
'mwa': 'moi',
|
||
'moa': 'moi',
|
||
'tro': 'trop',
|
||
'bi1': 'bien',
|
||
'çay': 'c\'est',
|
||
'fé': ['fais', 'fait'],
|
||
'm': ['aime', 'aimes', 'aiment'],
|
||
'u': ['eu', 'eut'],
|
||
}
|
||
for mask, correct in bad.items():
|
||
self._detect(mode='single', correct=correct, mask=mask)
|
||
|
||
self._detect(mode='regexp', correct="c'est",
|
||
mask="(?<!(du|Du|le|Le|en|En)) C (?<!c')",
|
||
displayedMask='C')
|
||
|
||
def checkPlural(self):
|
||
pass
|
||
def checkConjugaison(self):
|
||
self._detect(mode='regexp', correct="tu as oublié un « ne » ou un « n’ »",
|
||
mask="(je|tu|on|il|elle|nous|vous|ils|elles) [^'’ ]+ pas ")
|
||
self._detect(mode='regexp', correct="tu as oublié un « ne » ou un « n’ »",
|
||
mask="j'[^'’ ]+ pas")
|
||
firstPerson = 'un verbe à la première personne ne finit pas par un « t »'
|
||
notAS = 'ce verbe ne devrait pas se finir par un « s » à cette personne.'
|
||
self._detect(mode='regexp', correct=firstPerson, mask="j'[^ ]*t\W")
|
||
self._detect(mode='regexp', correct=firstPerson,mask="je( ne)? [^ ]*t\W")
|
||
self._detect(mode='regexp', correct=notAS,
|
||
mask=" (il|elle|on)( ne | n['’]| )[^ ]*[^u]s\W")
|
||
# [^u] is added in order to not detect 'il [vn]ous...'
|
||
def checkSpelling(self):
|
||
self._detect(mode='regexp', correct='quelle', mask='quel [^ ]+ la',
|
||
displayedMask='quel')
|
||
self._detect(mode='regexp', correct='quel', mask='quelle [^ ]+ le',
|
||
displayedMask='quelle')
|
||
self._detect(mode='regexp', correct=['quels', 'quelles'],
|
||
mask='quel [^ ]+ les',
|
||
displayedMask='quel')
|
||
self._detect(mode='regexp', correct=['quels', 'quelles'],
|
||
mask='quelle [^ ]+ les',
|
||
displayedMask='quelle')
|
||
self._detect(mode='single',
|
||
correct=['quel', 'quels', 'quelle', 'quelles'],
|
||
mask='kel')
|
||
self._detect(mode='single',
|
||
correct=['quel', 'quels', 'quelle', 'quelles'],
|
||
mask='kelle')
|
||
self._detect(mode='single',
|
||
correct=['quel', 'quels', 'quelle', 'quelles'],
|
||
mask='kels')
|
||
self._detect(mode='single',
|
||
correct=['quel', 'quels', 'quelle', 'quelles'],
|
||
mask='kelles')
|
||
def checkAbbreviation(self):
|
||
pass
|
||
def checkLol(self):
|
||
self._detect(mode='regexp', correct='mdr', mask='[Ll1][oO0iu]+[lL1]',
|
||
displayedMask='lol')
|
||
self._detect(mode='regexp', correct='mdr', mask=' [Ll1] +[lL1] ',
|
||
displayedMask='lol')
|
||
def checkTypographic(self):
|
||
self._detect(mode='regexp',
|
||
correct="Un caractère de ponctuation double est toujours "
|
||
"précédé d'une espace",
|
||
mask="[a-zA-Z0-9]{2}[:!?;][^/]", wizard='_')
|
||
self._detect(mode='regexp',
|
||
correct="Un caractère de ponctuation double est toujours "
|
||
"suivi d'une espace",
|
||
mask="(?<!(tp|ps|.[^ a-zA-Z]))[:!?;][a-zA-Z0-9]{2}", wizard='_')
|
||
self._detect(mode='regexp',
|
||
correct="Un caractère de ponctuation simple n'est jamais "
|
||
"précédé d'une espace",
|
||
mask=" ,", wizard='_')
|
||
self._detect(mode='regexp',
|
||
correct="Un caractère de ponctuation simple est toujours "
|
||
"suivi d'une espace",
|
||
mask=",[^ _]", wizard='_')
|
||
|
||
def getErrors(self):
|
||
return self._errors
|
||
|
||
class GoodFrench(callbacks.Plugin):
|
||
def detect(self, irc, msg, args, text):
|
||
"""<texte>
|
||
|
||
Cherche des fautes dans le <texte>, en fonction de la valeur locale de
|
||
supybot.plugins.GoodFrench.level."""
|
||
checker = SpellChecker(text, self.registryValue('level', msg.args[0]))
|
||
errors = checker.getErrors()
|
||
if len(errors) == 0:
|
||
irc.reply('La phrase semble correcte')
|
||
elif len(errors) == 1:
|
||
irc.reply('Il semble y avoir une erreur : %s' % errors[0])
|
||
else:
|
||
irc.reply('Il semble y avoir des erreurs : %s' %
|
||
' | '.join(errors))
|
||
def doPrivmsg(self, irc, msg):
|
||
channel = msg.args[0]
|
||
text = msg.args[1]
|
||
prefix = msg.prefix
|
||
nick = prefix.split('!')[0]
|
||
if callbacks.addressed(irc.nick, msg): #message is direct command
|
||
return
|
||
|
||
checker = SpellChecker(text, self.registryValue('level', channel))
|
||
errors = checker.getErrors()
|
||
if len(errors) == 0:
|
||
return
|
||
elif len(errors) == 1:
|
||
reason = 'Erreur : %s' % errors[0]
|
||
else:
|
||
reason = 'Erreurs : %s' % ' | '.join(errors)
|
||
if self.registryValue('kick'):
|
||
msg = ircmsgs.kick(channel, nick, reason)
|
||
irc.queueMsg(msg)
|
||
else:
|
||
irc.reply(reason)
|
||
|
||
detect = wrap(detect, ['text'])
|
||
|
||
|
||
Class = GoodFrench
|
||
|
||
|
||
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|