diff --git a/COMMANDS.md b/COMMANDS.md index c7798e4..5266a9a 100644 --- a/COMMANDS.md +++ b/COMMANDS.md @@ -77,6 +77,8 @@ Required arguments are enclosed in { and }, optional arguments are enclosed in \ shorten.py !sh {service} {url} Shortens URL Anyone Currently supports: id.gd, v.gd + title.py + !title [link] Query Page Title Anyone twitter.py !tw {link/username/tweet_id} Query Tweet from Twitter Anyone wikipedia.py diff --git a/title.py b/title.py new file mode 100644 index 0000000..713e7a3 --- /dev/null +++ b/title.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +""" +title.py - Phenny URL Title Module +Copyright 2008, Sean B. Palmer, inamidst.com +Modified by sfan5, 2013 +Licensed under the Eiffel Forum License 2. + +http://inamidst.com/phenny/ +""" + +import re, urllib2, urlparse + +r_title = re.compile(r'(?ims)]*>(.*?)') + +def f_title(phenny, input): + for x in phenny.bot.commands["high"].values(): + if x[0].__name__ == "aa_hook": + if x[0](phenny, input): + return # Abort function + uri = input.group(2) + uri = (uri or '').encode('utf-8') + + if not uri and hasattr(phenny.bot, 'last_seen_uri'): + uri = phenny.bot.last_seen_uri + if not uri: + return phenny.reply('I need a URI to give the title of...') + + if not ':' in uri: + uri = 'http://' + uri + + try: + redirects = 0 + while True: + headers = { + 'Accept': 'text/html', + 'User-Agent': 'Mozilla/5.0 (MinetestBot)' + } + req = urllib2.Request(uri, headers=headers) + u = urllib2.urlopen(req) + info = u.info() + u.close() + + if not isinstance(info, list): + status = '200' + else: + status = str(info[1]) + info = info[0] + if status.startswith('3'): + uri = urlparse.urljoin(uri, info['Location']) + else: break + + redirects += 1 + if redirects >= 20: + return phenny.reply("Too many redirects") + + try: mtype = info['content-type'] + except: + return phenny.reply("Couldn't get the Content-Type, sorry") + if not (('/html' in mtype) or ('/xhtml' in mtype)): + return phenny.reply("Document isn't HTML") + + u = urllib2.urlopen(req) + bytes = u.read(262144) + u.close() + + except IOError: + return phenny.reply("Can't connect to %s" % uri) + + m = r_title.search(bytes) + if m: + title = m.group(1) + title = title.strip() + title = title.replace('\n', ' ') + title = title.replace('\r', ' ') + while ' ' in title: + title = title.replace(' ', ' ') + if len(title) > 100: + title = title[:100] + '[...]' + + if title: + try: title.decode('utf-8') + except: + try: title = title.decode('iso-8859-1').encode('utf-8') + except: title = title.decode('cp1252').encode('utf-8') + else: pass + else: title = '[The title is empty.]' + + title = title.replace('\n', '') + title = title.replace('\r', '') + return phenny.reply(title) + else: return phenny.reply('No title found') + +f_title.commands = ['title'] + +def noteuri(phenny, input): + uri = input.group(1).encode('utf-8') + phenny.bot.last_seen_uri = uri + +noteuri.rule = r'.*(https?://[^<> "\x01]+).*' +noteuri.priority = 'low' + +if __name__ == '__main__': + print __doc__.strip()