Supybot-plugins/GoodFrench/plugin.py

247 lines
10 KiB
Python
Raw Permalink Normal View History

2010-11-06 10:52:20 -07:00
# -*- coding: utf8 -*-
###
# Copyright (c) 2010, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import re
import supybot.world as world
import supybot.ircmsgs as ircmsgs
import supybot.utils as utils
from supybot.commands import *
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
class SpellChecker:
def __init__(self, text, level):
# 0 : pas de filtrage ;
# 1 : filtre le langage SMS
# 2 : filtre les erreurs de pluriel ;
# 3 : filtre les fautes de conjugaison courantes ;
# 4 : filtre les fautes d'orthographe courantes ;
# 5 : filtre les abbréviations ("t'as" au lieu de "tu as")
self._text = text
self._errors = []
if level >= 1:
self._checking = 'SMS'
self.checkSMS()
if level >= 2:
self._checking = 'pluriel'
self.checkPlural()
if level >= 3:
self._checking = 'conjugaison'
self.checkConjugaison()
if level >= 4:
self._checking = 'orthographe'
self.checkSpelling()
if level >= 5:
self._checking = 'abbréviation'
self.checkAbbreviation()
if level >= 6:
2010-11-17 01:31:59 -08:00
self._checking = 'typographie'
self.checkTypographic()
if level >= 7:
self._checking = 'lol'
self.checkLol()
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
def _raise(self, message):
self._errors.append('[%s] %s' % (self._checking, message))
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
def _detect(self, mode, correct, mask, displayedMask=None, wizard=' '):
if displayedMask is None:
displayedMask = mask
raise_ = False
text = re.sub('[a-zA-Z0-9]+://[^ ]+', '', self._text)
nickRemover = re.match('[^ ]*: (?P<text>.*)', text)
2010-11-06 11:24:39 -07:00
if nickRemover is not None:
text = nickRemover.group('text')
2010-11-06 11:31:03 -07:00
text = '%s%s%s' % (wizard, text, wizard)
AntislashDoubleYou = '[^a-zA-Z0-9éèàùâêûôîäëüïöç\']'
2010-11-06 11:17:13 -07:00
if mode == 'single' and re.match('.*%s%s%s.*' % (AntislashDoubleYou,
mask,
AntislashDoubleYou),
text, re.IGNORECASE) is not None:
2010-11-06 10:52:20 -07:00
raise_ = True
elif mode == 'regexp' and re.match('.*%s.*' % mask, text):
raise_ = True
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
if raise_:
if self._checking == 'conjugaison' or \
self._checking == 'typographie':
self._raise(correct)
else:
if correct.__class__ == list:
correct = '« %s »' % ' » , ou « '.join(correct)
2010-11-06 10:52:20 -07:00
else:
correct = '« %s »' % correct
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
if displayedMask.__class__ == list:
displayedMask = '« %s »' % ' » ou « '.join(displayedMask)
2010-11-06 10:52:20 -07:00
else:
displayedMask = '« %s »' % displayedMask
2010-11-06 10:52:20 -07:00
self._raise('On ne dit pas %s mais %s' %
(displayedMask, correct))
def checkSMS(self):
2011-06-30 08:08:04 -07:00
bad = {
't': 't\'es',
'ki': 'qui',
'koi': 'quoi',
'tqvu': 't\'as vu',
'tt': 'tout',
'ct': 'c\'était',
'v': 'vais',
'twa': 'toi',
'toa': 'toi',
'mwa': 'moi',
'moa': 'moi',
'tro': 'trop',
'bi1': 'bien',
'çay': 'c\'est',
'': ['fais', 'fait'],
'm': ['aime', 'aimes', 'aiment'],
'u': ['eu', 'eut'],
}
for mask, correct in bad.items():
self._detect(mode='single', correct=correct, mask=mask)
2010-11-14 10:19:18 -08:00
self._detect(mode='regexp', correct="c'est",
2010-11-06 10:52:20 -07:00
mask="(?<!(du|Du|le|Le|en|En)) C (?<!c')",
displayedMask='C')
def checkPlural(self):
pass
def checkConjugaison(self):
2013-07-15 02:29:58 -07:00
self._detect(mode='regexp', correct="tu as oublié un « ne » ou un « n »",
mask="(je|tu|on|il|elle|nous|vous|ils|elles) [^' ]+ pas ")
self._detect(mode='regexp', correct="tu as oublié un « ne » ou un « n »",
mask="j'[^' ]+ pas")
firstPerson = 'un verbe à la première personne ne finit pas par un « t »'
notAS = 'ce verbe ne devrait pas se finir par un « s » à cette personne.'
2011-06-30 08:36:47 -07:00
self._detect(mode='regexp', correct=firstPerson, mask="j'[^ ]*t\W")
self._detect(mode='regexp', correct=firstPerson,mask="je( ne)? [^ ]*t\W")
2010-11-06 10:52:20 -07:00
self._detect(mode='regexp', correct=notAS,
2013-07-15 02:29:58 -07:00
mask=" (il|elle|on)( ne | n[']| )[^ ]*[^u]s\W")
2010-11-06 12:07:44 -07:00
# [^u] is added in order to not detect 'il [vn]ous...'
2010-11-06 10:52:20 -07:00
def checkSpelling(self):
self._detect(mode='regexp', correct='quelle', mask='quel [^ ]+ la',
displayedMask='quel')
self._detect(mode='regexp', correct='quel', mask='quelle [^ ]+ le',
displayedMask='quelle')
self._detect(mode='regexp', correct=['quels', 'quelles'],
mask='quel [^ ]+ les',
displayedMask='quel')
self._detect(mode='regexp', correct=['quels', 'quelles'],
mask='quelle [^ ]+ les',
displayedMask='quelle')
2010-11-06 11:21:15 -07:00
self._detect(mode='single',
correct=['quel', 'quels', 'quelle', 'quelles'],
mask='kel')
self._detect(mode='single',
correct=['quel', 'quels', 'quelle', 'quelles'],
mask='kelle')
self._detect(mode='single',
correct=['quel', 'quels', 'quelle', 'quelles'],
mask='kels')
self._detect(mode='single',
2010-11-06 10:52:20 -07:00
correct=['quel', 'quels', 'quelle', 'quelles'],
2010-11-06 11:21:15 -07:00
mask='kelles')
2010-11-06 10:52:20 -07:00
def checkAbbreviation(self):
pass
def checkLol(self):
2011-06-30 08:25:22 -07:00
self._detect(mode='regexp', correct='mdr', mask='[Ll1][oO0iu]+[lL1]',
displayedMask='lol')
self._detect(mode='regexp', correct='mdr', mask=' [Ll1] +[lL1] ',
2010-11-06 10:52:20 -07:00
displayedMask='lol')
def checkTypographic(self):
self._detect(mode='regexp',
correct="Un caractère de ponctuation double est toujours "
2013-07-15 02:04:42 -07:00
"précédé d'une espace",
2013-07-15 02:20:55 -07:00
mask="[a-zA-Z0-9]{2}[:!?;][^/]", wizard='_')
2010-11-06 10:52:20 -07:00
self._detect(mode='regexp',
2011-06-30 08:08:04 -07:00
correct="Un caractère de ponctuation double est toujours "
2013-07-15 02:04:42 -07:00
"suivi d'une espace",
2013-07-15 02:20:55 -07:00
mask="(?<!(tp|ps|.[^ a-zA-Z]))[:!?;][a-zA-Z0-9]{2}", wizard='_')
2010-11-06 10:52:20 -07:00
self._detect(mode='regexp',
correct="Un caractère de ponctuation simple n'est jamais "
2013-07-15 02:04:42 -07:00
"précédé d'une espace",
mask=" ,", wizard='_')
2010-11-06 10:52:20 -07:00
self._detect(mode='regexp',
correct="Un caractère de ponctuation simple est toujours "
2013-07-15 02:04:42 -07:00
"suivi d'une espace",
mask=",[^ _]", wizard='_')
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
def getErrors(self):
return self._errors
class GoodFrench(callbacks.Plugin):
def detect(self, irc, msg, args, text):
"""<texte>
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
Cherche des fautes dans le <texte>, en fonction de la valeur locale de
supybot.plugins.GoodFrench.level."""
2011-06-30 08:40:31 -07:00
checker = SpellChecker(text, self.registryValue('level', msg.args[0]))
2010-11-06 10:52:20 -07:00
errors = checker.getErrors()
if len(errors) == 0:
irc.reply('La phrase semble correcte')
elif len(errors) == 1:
irc.reply('Il semble y avoir une erreur : %s' % errors[0])
else:
irc.reply('Il semble y avoir des erreurs : %s' %
' | '.join(errors))
def doPrivmsg(self, irc, msg):
channel = msg.args[0]
text = msg.args[1]
2010-11-06 10:52:20 -07:00
prefix = msg.prefix
nick = prefix.split('!')[0]
if callbacks.addressed(irc.nick, msg): #message is direct command
return
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
checker = SpellChecker(text, self.registryValue('level', channel))
errors = checker.getErrors()
if len(errors) == 0:
return
elif len(errors) == 1:
reason = 'Erreur : %s' % errors[0]
else:
reason = 'Erreurs : %s' % ' | '.join(errors)
2012-07-30 09:09:49 -07:00
if self.registryValue('kick'):
msg = ircmsgs.kick(channel, nick, reason)
irc.queueMsg(msg)
else:
irc.reply(reason)
2010-11-14 10:19:18 -08:00
2010-11-06 10:52:20 -07:00
detect = wrap(detect, ['text'])
Class = GoodFrench
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: