From 1aa1ce0f3c843857e13f171bc1c23284bc514c33 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 1 Mar 2011 16:00:46 +0100 Subject: [PATCH] Wikipedia: import from quantumlemur repository. --- Wikipedia/README.txt | 1 + Wikipedia/__init__.py | 66 ++++++++++++ Wikipedia/config.py | 52 +++++++++ Wikipedia/local/__init__.py | 1 + Wikipedia/plugin.py | 207 ++++++++++++++++++++++++++++++++++++ Wikipedia/test.py | 37 +++++++ 6 files changed, 364 insertions(+) create mode 100644 Wikipedia/README.txt create mode 100644 Wikipedia/__init__.py create mode 100644 Wikipedia/config.py create mode 100644 Wikipedia/local/__init__.py create mode 100644 Wikipedia/plugin.py create mode 100644 Wikipedia/test.py diff --git a/Wikipedia/README.txt b/Wikipedia/README.txt new file mode 100644 index 0000000..d60b47a --- /dev/null +++ b/Wikipedia/README.txt @@ -0,0 +1 @@ +Insert a description of your plugin here, with any notes, etc. about using it. diff --git a/Wikipedia/__init__.py b/Wikipedia/__init__.py new file mode 100644 index 0000000..70c1a5b --- /dev/null +++ b/Wikipedia/__init__.py @@ -0,0 +1,66 @@ +### +# Copyright (c) 2010, quantumlemur +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +""" +Looks up topics on Wikipedia and returns some info on them. +""" + +import supybot +import supybot.world as world + +# Use this for the version of this plugin. You may wish to put a CVS keyword +# in here if you're keeping the plugin in CVS or some similar system. +__version__ = "" + +# XXX Replace this with an appropriate author or supybot.Author instance. +__author__ = supybot.Author('quantumlemur', 'quantumlemur', + 'quantumlemur@users.sourceforge.net') + +# This is a dictionary mapping supybot.Author instances to lists of +# contributions. +__contributors__ = {} + +# This is a url where the most recent plugin package can be downloaded. +__url__ = '' # 'http://supybot.com/Members/yourname/Wikipedia/download' + +import config +import plugin +reload(plugin) # In case we're being reloaded. +# Add more reloads here if you add third-party modules and want them to be +# reloaded when this plugin is reloaded. Don't forget to import them as well! + +if world.testing: + import test + +Class = plugin.Class +configure = config.configure + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/Wikipedia/config.py b/Wikipedia/config.py new file mode 100644 index 0000000..656d783 --- /dev/null +++ b/Wikipedia/config.py @@ -0,0 +1,52 @@ +### +# Copyright (c) 2010, quantumlemur +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.conf as conf +import supybot.registry as registry + +def configure(advanced): + # This will be called by supybot to configure this module. advanced is + # a bool that specifies whether the user identified himself as an advanced + # user or not. You should effect your configuration by manipulating the + # registry as appropriate. + from supybot.questions import expect, anything, something, yn + conf.registerPlugin('Wikipedia', True) + + +Wikipedia = conf.registerPlugin('Wikipedia') +# This is where your configuration variables (if any) should go. For example: +# conf.registerGlobalValue(Wikipedia, 'someConfigVariableName', +# registry.Boolean(False, """Help for someConfigVariableName.""")) + +conf.registerGlobalValue(Wikipedia, 'debug', + registry.Boolean(False, """Output debugging info?""")) + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/Wikipedia/local/__init__.py b/Wikipedia/local/__init__.py new file mode 100644 index 0000000..e86e97b --- /dev/null +++ b/Wikipedia/local/__init__.py @@ -0,0 +1 @@ +# Stub so local is a module, used for third-party modules diff --git a/Wikipedia/plugin.py b/Wikipedia/plugin.py new file mode 100644 index 0000000..7b62cbd --- /dev/null +++ b/Wikipedia/plugin.py @@ -0,0 +1,207 @@ +### +# Copyright (c) 2010, quantumlemur +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + + +import re +import string +import urllib +import StringIO +import lxml.html +from lxml import etree +import supybot.utils as utils +from supybot.commands import * +import supybot.plugins as plugins +import supybot.ircutils as ircutils +import supybot.callbacks as callbacks + +# plugins.wikipedia.snippetStyle in ['sentence','paragraph','none'] + + +class Wikipedia(callbacks.Plugin): + """Add the help for "@plugin help Wikipedia" here + This should describe *how* to use this plugin.""" + threaded = True + + + def wiki(self, irc, msg, args, search): + """ + + Returns the first paragraph of a Wikipedia article""" +# first, we get the page + addr = 'http://en.wikipedia.org/wiki/Special:Search?search=%s' % urllib.quote_plus(search) + try: + article = utils.web.getUrl(addr) + except: + irc.reply('Hmm, something went wrong fetching the page. I\'m highlighting quantumlemur so he can take a look.') + return +# parse the page + tree = lxml.html.document_fromstring(article) +# check if it gives a "Did you mean..." redirect + didyoumean = tree.xpath('//div[@class="searchdidyoumean"]/a[@title="Special:Search"]') + if didyoumean: + redirect = didyoumean[0].text_content().strip() + irc.reply('I didn\'t find anything for "%s". Did you mean "%s"?' % (search, redirect)) + addr = 'http://en.wikipedia.org%s' % didyoumean[0].get('href') + article = utils.web.getUrl(addr) + tree = lxml.html.document_fromstring(article) + search = redirect +# check if it's a page of search results (rather than an article), and if so, retrieve the first result + searchresults = tree.xpath('//div[@class="searchresults"]/ul/li/a') + if searchresults: + redirect = searchresults[0].text_content().strip() + irc.reply('I didn\'t find anything for "%s", but here\'s the result for "%s":' % (search, redirect)) + addr = 'http://en.wikipedia.org%s' % searchresults[0].get('href') + article = utils.web.getUrl(addr) + tree = lxml.html.document_fromstring(article) + search = redirect +# otherwise, simply return the title and whether it redirected + else: + redirect = re.search('\(Redirected from ]*>([^<]*)\)', article) + if redirect: + redirect = tree.xpath('//div[@id="contentSub"]/a')[0].text_content().strip() + title = tree.xpath('//*[@class="firstHeading"]') + title = title[0].text_content().strip() + irc.reply('"%s" (Redirect from "%s"):' % (title, redirect)) +# extract the address we got it from + addr = re.search('Retrieved from "', article) + addr = addr.group(1) +# check if it's a disambiguation page + disambig = tree.xpath('//table[@id="disambigbox"]') + if disambig: + disambig = tree.xpath('//div[@id="bodyContent"]/ul/li/a') + disambig = disambig[:5] + disambig = [item.text_content() for item in disambig] + r = utils.str.commaAndify(disambig) + irc.reply('%s is a disambiguation page. Possible results are: %s' % (addr, r)) +# or just as bad, a page listing events in that year + elif re.search('This article is about the year [\d]*\. For the [a-zA-Z ]* [\d]*, see', article): + irc.reply('"%s" is a page full of events that happened in that year. If you were looking for information about the number itself, try searching for "%s_(number)", but don\'t expect anything useful...' % (search, search)) + else: +##### etree! + p = tree.xpath("//div[@id='bodyContent']/p[1]")[0] + p = p.text_content() + p = p.strip() + p = p.encode('utf-8') +# and finally, return what we've got + irc.reply(addr) + irc.reply(p) + wiki = wrap(wiki, ['text']) + + + + +# def wikiold(self, irc, msg, args, search): +# """ +# +# Returns the first paragraph of a Wikipedia article""" +## first, we get the page +# try: +# article = utils.web.getUrl('http://en.wikipedia.org/wiki/Special:Search?search=%s' % urllib.quote_plus(search)) +# except: +# irc.reply('Hmm, looks like we broke Wikipedia. Try again later?') +# return +## check if it gives a "Did you mean..." redirect +# if 'class="searchdidyoumean"' in article: +# redirect = re.search('class="searchdidyoumean">[^>]*title="Special:Search">(.*?)', article) +# redirect = redirect.group(1) +# redirect = utils.web.htmlToText(redirect, tagReplace="") +# irc.reply('I didn\'t find anything for "%s". Did you mean "%s"?' % (search, redirect)) +# search = redirect +# article = utils.web.getUrl('http://en.wikipedia.org/wiki/Special:Search?search=%s' % urllib.quote_plus(search)) +## then check if it's a page of search results (rather than an article), and if so, retrieve the first result +# if '