104 lines
2.9 KiB
Python
104 lines
2.9 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
title.py - Phenny URL Title Module
|
|
Copyright 2008, Sean B. Palmer, inamidst.com
|
|
Modified by sfan5, 2013
|
|
Licensed under the Eiffel Forum License 2.
|
|
|
|
http://inamidst.com/phenny/
|
|
"""
|
|
|
|
import re, urllib2, urlparse
|
|
|
|
r_title = re.compile(r'(?ims)<title[^>]*>(.*?)</title\s*>')
|
|
|
|
def f_title(phenny, input):
|
|
for x in phenny.bot.commands["high"].values():
|
|
if x[0].__name__ == "aa_hook":
|
|
if x[0](phenny, input):
|
|
return # Abort function
|
|
uri = input.group(2)
|
|
uri = (uri or '').encode('utf-8')
|
|
|
|
if not uri and hasattr(phenny.bot, 'last_seen_uri'):
|
|
uri = phenny.bot.last_seen_uri
|
|
if not uri:
|
|
return phenny.reply('I need a URI to give the title of...')
|
|
|
|
if not ':' in uri:
|
|
uri = 'http://' + uri
|
|
|
|
try:
|
|
redirects = 0
|
|
while True:
|
|
headers = {
|
|
'Accept': 'text/html',
|
|
'User-Agent': 'Mozilla/5.0 (MinetestBot)'
|
|
}
|
|
req = urllib2.Request(uri, headers=headers)
|
|
u = urllib2.urlopen(req)
|
|
info = u.info()
|
|
u.close()
|
|
|
|
if not isinstance(info, list):
|
|
status = '200'
|
|
else:
|
|
status = str(info[1])
|
|
info = info[0]
|
|
if status.startswith('3'):
|
|
uri = urlparse.urljoin(uri, info['Location'])
|
|
else: break
|
|
|
|
redirects += 1
|
|
if redirects >= 20:
|
|
return phenny.reply("Too many redirects")
|
|
|
|
try: mtype = info['content-type']
|
|
except:
|
|
return phenny.reply("Couldn't get the Content-Type, sorry")
|
|
if not (('/html' in mtype) or ('/xhtml' in mtype)):
|
|
return phenny.reply("Document isn't HTML")
|
|
|
|
u = urllib2.urlopen(req)
|
|
bytes = u.read(262144)
|
|
u.close()
|
|
|
|
except IOError:
|
|
return phenny.reply("Can't connect to %s" % uri)
|
|
|
|
m = r_title.search(bytes)
|
|
if m:
|
|
title = m.group(1)
|
|
title = title.strip()
|
|
title = title.replace('\n', ' ')
|
|
title = title.replace('\r', ' ')
|
|
while ' ' in title:
|
|
title = title.replace(' ', ' ')
|
|
if len(title) > 100:
|
|
title = title[:100] + '[...]'
|
|
|
|
if title:
|
|
try: title.decode('utf-8')
|
|
except:
|
|
try: title = title.decode('iso-8859-1').encode('utf-8')
|
|
except: title = title.decode('cp1252').encode('utf-8')
|
|
else: pass
|
|
else: title = '[The title is empty.]'
|
|
|
|
title = title.replace('\n', '')
|
|
title = title.replace('\r', '')
|
|
return phenny.reply(title)
|
|
else: return phenny.reply('No title found')
|
|
|
|
f_title.commands = ['title']
|
|
|
|
def noteuri(phenny, input):
|
|
uri = input.group(1).encode('utf-8')
|
|
phenny.bot.last_seen_uri = uri
|
|
|
|
noteuri.rule = r'.*(https?://[^<> "\x01]+).*'
|
|
noteuri.priority = 'low'
|
|
|
|
if __name__ == '__main__':
|
|
print __doc__.strip()
|