Debian: rewrite @file, faster, liter, and more flexible (now uses Debian website instead of local copy)

master
Valentin Lorentz 2011-03-03 16:41:40 +01:00
parent cc6106e38e
commit 5674e2a04f
2 changed files with 74 additions and 214 deletions

View File

@ -29,11 +29,9 @@
import os
import re
import gzip
import time
import popen2
import urllib
import fnmatch
import threading
import BeautifulSoup
@ -46,201 +44,76 @@ import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
from supybot.utils.iter import all, imap, ifilter
class PeriodicFileDownloader(object):
"""A class to periodically download a file/files.
A class-level dictionary 'periodicFiles' maps names of files to
three-tuples of
(url, seconds between downloads, function to run with downloaded file).
'url' should be in some form that urllib2.urlopen can handle (do note that
urllib2.urlopen handles file:// links perfectly well.)
'seconds between downloads' is the number of seconds between downloads,
obviously. An important point to remember, however, is that it is only
engaged when a command is run. I.e., if you say you want the file
downloaded every day, but no commands that use it are run in a week, the
next time such a command is run, it'll be using a week-old file. If you
don't want such behavior, you'll have to give an error mess age to the user
and tell him to call you back in the morning.
'function to run with downloaded file' is a function that will be passed
a string *filename* of the downloaded file. This will be some random
filename probably generated via some mktemp-type-thing. You can do what
you want with this; you may want to build a database, take some stats,
or simply rename the file. You can pass None as your function and the
file with automatically be renamed to match the filename you have it listed
under. It'll be in conf.supybot.directories.data, of course.
Aside from that dictionary, simply use self.getFile(filename) in any method
that makes use of a periodically downloaded file, and you'll be set.
"""
periodicFiles = None
def __init__(self, *args, **kwargs):
if self.periodicFiles is None:
raise ValueError, 'You must provide files to download'
self.lastDownloaded = {}
self.downloadedCounter = {}
for filename in self.periodicFiles:
if self.periodicFiles[filename][-1] is None:
fullname = os.path.join(conf.supybot.directories.data(),
filename)
if os.path.exists(fullname):
self.lastDownloaded[filename] = os.stat(fullname).st_ctime
else:
self.lastDownloaded[filename] = 0
else:
self.lastDownloaded[filename] = 0
self.currentlyDownloading = set()
self.downloadedCounter[filename] = 0
self.getFile(filename)
super(PeriodicFileDownloader, self).__init__(*args, **kwargs)
def _downloadFile(self, filename, url, f):
self.currentlyDownloading.add(filename)
try:
try:
infd = utils.web.getUrlFd(url)
except IOError, e:
self.log.warning('Error downloading %s: %s', url, e)
return
except utils.web.Error, e:
self.log.warning('Error downloading %s: %s', url, e)
return
confDir = conf.supybot.directories.data()
newFilename = os.path.join(confDir, utils.file.mktemp())
outfd = file(newFilename, 'wb')
start = time.time()
s = infd.read(4096)
while s:
outfd.write(s)
s = infd.read(4096)
infd.close()
outfd.close()
self.log.info('Downloaded %s in %s seconds',
filename, time.time()-start)
self.downloadedCounter[filename] += 1
self.lastDownloaded[filename] = time.time()
if f is None:
toFilename = os.path.join(confDir, filename)
if os.name == 'nt':
# Windows, grrr...
if os.path.exists(toFilename):
os.remove(toFilename)
os.rename(newFilename, toFilename)
else:
start = time.time()
f(newFilename)
total = time.time() - start
self.log.info('Function ran on %s in %s seconds',
filename, total)
finally:
self.currentlyDownloading.remove(filename)
def getFile(self, filename):
if world.documenting:
return
(url, timeLimit, f) = self.periodicFiles[filename]
if time.time() - self.lastDownloaded[filename] > timeLimit and \
filename not in self.currentlyDownloading:
self.log.info('Beginning download of %s', url)
args = (filename, url, f)
name = '%s #%s' % (filename, self.downloadedCounter[filename])
t = threading.Thread(target=self._downloadFile, name=name,
args=(filename, url, f))
t.setDaemon(True)
t.start()
world.threadsSpawned += 1
class Debian(callbacks.Plugin, PeriodicFileDownloader):
class Debian(callbacks.Plugin):
threaded = True
periodicFiles = {
# This file is only updated once a week, so there's no sense in
# downloading a new one every day.
'Contents-i386.gz': ('ftp://ftp.us.debian.org/'
'debian/dists/unstable/Contents-i386.gz',
604800, None)
}
def __init__(self, irc):
callbacks.Plugin.__init__(self, irc)
PeriodicFileDownloader.__init__(self)
_debreflags = re.DOTALL | re.MULTILINE
_deblistreFileExact = re.compile(r'<a href="/[^/>]+/[^/>]+">([^<]+)</a>',
_debreflags)
def file(self, irc, msg, args, optlist, filename):
"""[--exact]
[--mode {path,filename,exactfilename}]
[--branch {oldstable,stable,testing,unstable,experimental}] \
[--section {main,contrib,non-free}] <file name>
contents = conf.supybot.directories.data.dirize('Contents-i386.gz')
def file(self, irc, msg, args, optlist, glob):
"""[--{regexp,exact} <value>] [<glob>]
Returns packages in Debian that includes files matching <glob>. If
--regexp is given, returns packages that include files matching the
given regexp. If --exact is given, returns packages that include files
matching exactly the string given.
"""
self.getFile('Contents-i386.gz')
# Make sure it's anchored, make sure it doesn't have a leading slash
# (the filenames don't have leading slashes, and people may not know
# that).
if not optlist and not glob:
raise callbacks.ArgumentError
if optlist and glob:
irc.error('You must specify either a glob or a regexp/exact '
'search, but not both.', Raise=True)
for (option, arg) in optlist:
if option == 'exact':
regexp = arg.lstrip('/')
elif option == 'regexp':
regexp = arg
if glob:
regexp = fnmatch.translate(glob.lstrip('/'))
regexp = regexp.rstrip('$')
regexp = "%s.* " % regexp
Returns the package(s) containing the <file name>.
--mode defaults to path, and defines how to search.
--branch defaults to stable, and defines in what branch to search."""
url = 'http://packages.debian.org/search?searchon=contents' + \
'&keywords=%(keywords)s&mode=%(mode)s&suite=%(suite)s' + \
'&arch=%(arch)s'
args = {'keywords': None, 'mode': 'path', 'suite': 'stable',
'arch': 'any'}
exact = ('exact', True) in optlist
for (key, value) in optlist:
if key == 'branch':
args['suite'] = value
elif key == 'arch':
args['arch'] = value
elif key == 'mode':
args['mode'] = value
responses = []
if '*' in filename:
irc.error('Wildcard characters can not be specified.', Raise=True)
args['keywords'] = urllib.quote(filename, '')
url %= args
try:
re_obj = re.compile(regexp, re.I)
except re.error, e:
irc.error(format('Error in regexp: %s', e), Raise=True)
if self.registryValue('pythonZgrep'):
fd = gzip.open(self.contents)
r = imap(lambda tup: tup[0],
ifilter(lambda tup: tup[0],
imap(lambda line:(re_obj.search(line), line),fd)))
html = utils.web.getUrl(url)
except utils.web.Error, e:
irc.error(format('I couldn\'t reach the search page (%s).', e),
Raise=True)
if 'is down at the moment' in html:
irc.error('Packages.debian.org is down at the moment. '
'Please try again later.', Raise=True)
step = 0
pkgs = []
for line in html.split('\n'):
if '<span class="keyword">' in line:
step += 1
elif step == 1 or (step >= 1 and not exact):
pkgs.extend(self._deblistreFileExact.findall(line))
if pkgs == []:
irc.reply(format('No filename found for %s (%s)',
utils.web.urlunquote(filename), args['suite']))
else:
try:
(r, w) = popen2.popen4(['zgrep', '-e', regexp, self.contents])
w.close()
except TypeError:
# We're on Windows.
irc.error('This command won\'t work on this platform. '
'If you think it should (i.e., you know that you '
'have a zgrep binary somewhere) then file a bug '
'about it at http://supybot.sf.net/ .', Raise=True)
packages = set() # Make packages unique
try:
for line in r:
if len(packages) > 100:
irc.error('More than 100 packages matched, '
'please narrow your search.', Raise=True)
try:
if hasattr(line, 'group'): # we're actually using
line = line.group(0) # pythonZgrep :(
(filename, pkg_list) = line.split()
if filename == 'FILE':
# This is the last line before the actual files.
continue
except ValueError: # Unpack list of wrong size.
continue # We've not gotten to the files yet.
packages.update(pkg_list.split(','))
finally:
if hasattr(r, 'close'):
r.close()
if len(packages) == 0:
irc.reply('I found no packages with that file.')
else:
irc.reply(format('%L', sorted(packages)))
file = wrap(file, [getopts({'regexp':'regexpMatcher','exact':'something'}),
additional('glob')])
irc.reply(format('%i matches found: %s (%s)',
len(pkgs), '; '.join(pkgs), args['suite']))
file = wrap(file, [getopts({'exact': '',
'branch': ('literal', ('oldstable',
'stable',
'testing',
'unstable',
'experimental')),
'mode': ('literal', ('path',
'exactfilename',
'filename')),
'arch': ('literal', ('main',
'contrib',
'non-free'))}),
'text'])
_debreflags = re.DOTALL | re.IGNORECASE
_deblistre = re.compile(r'<h3>Package ([^<]+)</h3>(.*?)</ul>', _debreflags)
_deblistreVersion = re.compile(r'<h3>Package ([^<]+)</h3>(.*?)</ul>', _debreflags)
def version(self, irc, msg, args, optlist, package):
"""[--exact] \
[--searchon {names,all,sourcenames}]
@ -279,7 +152,7 @@ class Debian(callbacks.Plugin, PeriodicFileDownloader):
if 'is down at the moment' in html:
irc.error('Packages.debian.org is down at the moment. '
'Please try again later.', Raise=True)
pkgs = self._deblistre.findall(html)
pkgs = self._deblistreVersion.findall(html)
if not pkgs:
irc.reply(format('No package found for %s (%s)',
utils.web.urlunquote(package), args['suite']))

View File

@ -39,24 +39,6 @@ class DebianTestCase(PluginTestCase):
cleanDataDir = False
fileDownloaded = False
if network:
def setUp(self, nick='test'):
PluginTestCase.setUp(self)
try:
datadir = conf.supybot.directories.data
if os.path.exists(datadir.dirize('Contents-i386.gz')):
pass
else:
print
print "Downloading files, this may take awhile."
filename = datadir.dirize('Contents-i386.gz')
while not os.path.exists(filename):
time.sleep(1)
print "Download complete."
print "Starting test ..."
self.fileDownloaded = True
except KeyboardInterrupt:
pass
def testDebBugNoHtml(self):
self.assertNotRegexp('debian bug 287792', r'\<em\>')
@ -77,10 +59,15 @@ class DebianTestCase(PluginTestCase):
r'^No package.*')
def testDebfile(self):
self.assertHelp('file')
if not self.fileDownloaded:
pass
self.assertRegexp('file --exact bin/gaim', r'net/gaim')
self.assertHelp('debian file')
self.assertRegexp('debian file oigrgrgregg',
r'^No filename.*\(stable\)')
self.assertRegexp('debian file --branch unstable alkdjfad',
r'^No filename.*\(unstable\)')
self.assertRegexp('debian file --exact --branch stable /bin/sh',
r'2 matches found:.*bash.*dash.*\(stable')
self.assertRegexp('debian file --branch stable /bin/sh',
r'3 matches found:.*bash.*dash.*klibc-utils')
def testDebincoming(self):
self.assertNotError('incoming')