Compare commits
5 Commits
a923085699
...
fb0b4bae3c
Author | SHA1 | Date | |
---|---|---|---|
|
fb0b4bae3c | ||
|
a664c2eb6b | ||
|
c447e55a7a | ||
|
8647c75d48 | ||
|
31ff072d23 |
@ -2,7 +2,7 @@ Author can be reached by sending e-mail to <hoxu@users.sf.net>.
|
|||||||
Include "gitstats" in the subject or prepare to battle the spam filters.
|
Include "gitstats" in the subject or prepare to battle the spam filters.
|
||||||
|
|
||||||
See the following command for list of authors who have contributed:
|
See the following command for list of authors who have contributed:
|
||||||
$ git-shortlog HEAD
|
$ git shortlog HEAD
|
||||||
|
|
||||||
Also thanks to the following people:
|
Also thanks to the following people:
|
||||||
Alexander Botero-Lowry
|
Alexander Botero-Lowry
|
||||||
|
@ -53,6 +53,10 @@ How many domains to show in domains by commits.
|
|||||||
|
|
||||||
Maximum file extension length.
|
Maximum file extension length.
|
||||||
|
|
||||||
|
=item processes
|
||||||
|
|
||||||
|
Number of concurrent processes to use when extracting git repository data.
|
||||||
|
|
||||||
=item project_name
|
=item project_name
|
||||||
|
|
||||||
Project name to show on the generated pages. Default is to use basename of the repository directory.
|
Project name to show on the generated pages. Default is to use basename of the repository directory.
|
||||||
|
110
gitstats
110
gitstats
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# Copyright (c) 2007-2012 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
|
# Copyright (c) 2007-2013 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/AUTHOR)
|
||||||
# GPLv2 / GPLv3
|
# GPLv2 / GPLv3
|
||||||
import datetime
|
import datetime
|
||||||
import getopt
|
import getopt
|
||||||
@ -14,6 +14,12 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
|
if sys.version_info < (2, 6):
|
||||||
|
print >> sys.stderr, "Python 2.6 or higher is required for gitstats"
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
from multiprocessing import Pool
|
||||||
|
|
||||||
os.environ['LC_ALL'] = 'C'
|
os.environ['LC_ALL'] = 'C'
|
||||||
|
|
||||||
GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
|
GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
|
||||||
@ -40,7 +46,8 @@ conf = {
|
|||||||
'commit_end': 'HEAD',
|
'commit_end': 'HEAD',
|
||||||
'linear_linestats': 1,
|
'linear_linestats': 1,
|
||||||
'project_name': '',
|
'project_name': '',
|
||||||
'merge_authors': {}
|
'merge_authors': {},
|
||||||
|
'processes': 8,
|
||||||
}
|
}
|
||||||
|
|
||||||
def getpipeoutput(cmds, quiet = False):
|
def getpipeoutput(cmds, quiet = False):
|
||||||
@ -104,6 +111,20 @@ def getgitversion():
|
|||||||
def getgnuplotversion():
|
def getgnuplotversion():
|
||||||
return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
|
return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
|
||||||
|
|
||||||
|
def getnumoffilesfromrev(time_rev):
|
||||||
|
"""
|
||||||
|
Get number of files changed in commit
|
||||||
|
"""
|
||||||
|
time, rev = time_rev
|
||||||
|
return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))
|
||||||
|
|
||||||
|
def getnumoflinesinblob(ext_blob):
|
||||||
|
"""
|
||||||
|
Get number of lines in blob
|
||||||
|
"""
|
||||||
|
ext, blob_id = ext_blob
|
||||||
|
return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))
|
||||||
|
|
||||||
class DataCollector:
|
class DataCollector:
|
||||||
"""Manages data collection from a revision control repository."""
|
"""Manages data collection from a revision control repository."""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -408,14 +429,34 @@ class GitDataCollector(DataCollector):
|
|||||||
# timezone
|
# timezone
|
||||||
self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
|
self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
|
||||||
|
|
||||||
# TODO Optimize this, it's the worst bottleneck
|
|
||||||
# outputs "<stamp> <files>" for each revision
|
# outputs "<stamp> <files>" for each revision
|
||||||
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
|
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
|
||||||
lines = []
|
lines = []
|
||||||
|
revs_to_read = []
|
||||||
|
time_rev_count = []
|
||||||
|
#Look up rev in cache and take info from cache if found
|
||||||
|
#If not append rev to list of rev to read from repo
|
||||||
for revline in revlines:
|
for revline in revlines:
|
||||||
time, rev = revline.split(' ')
|
time, rev = revline.split(' ')
|
||||||
linecount = self.getFilesInCommit(rev)
|
#if cache empty then add time and rev to list of new rev's
|
||||||
lines.append('%d %d' % (int(time), linecount))
|
#otherwise try to read needed info from cache
|
||||||
|
if 'files_in_tree' not in self.cache.keys():
|
||||||
|
revs_to_read.append((time,rev))
|
||||||
|
continue
|
||||||
|
if rev in self.cache['files_in_tree'].keys():
|
||||||
|
lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
|
||||||
|
else:
|
||||||
|
revs_to_read.append((time,rev))
|
||||||
|
|
||||||
|
#Read revisions from repo
|
||||||
|
time_rev_count = Pool(processes=conf['processes']).map(getnumoffilesfromrev, revs_to_read)
|
||||||
|
|
||||||
|
#Update cache with new revisions and append then to general list
|
||||||
|
for (time, rev, count) in time_rev_count:
|
||||||
|
if 'files_in_tree' not in self.cache:
|
||||||
|
self.cache['files_in_tree'] = {}
|
||||||
|
self.cache['files_in_tree'][rev] = count
|
||||||
|
lines.append('%d %d' % (int(time), count))
|
||||||
|
|
||||||
self.total_commits += len(lines)
|
self.total_commits += len(lines)
|
||||||
for line in lines:
|
for line in lines:
|
||||||
@ -430,6 +471,7 @@ class GitDataCollector(DataCollector):
|
|||||||
|
|
||||||
# extensions and size of files
|
# extensions and size of files
|
||||||
lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
|
lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
|
||||||
|
blobs_to_read = []
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if len(line) == 0:
|
if len(line) == 0:
|
||||||
continue
|
continue
|
||||||
@ -437,7 +479,7 @@ class GitDataCollector(DataCollector):
|
|||||||
if parts[0] == '160000' and parts[3] == '-':
|
if parts[0] == '160000' and parts[3] == '-':
|
||||||
# skip submodules
|
# skip submodules
|
||||||
continue
|
continue
|
||||||
sha1 = parts[2]
|
blob_id = parts[2]
|
||||||
size = int(parts[3])
|
size = int(parts[3])
|
||||||
fullpath = parts[4]
|
fullpath = parts[4]
|
||||||
|
|
||||||
@ -451,15 +493,28 @@ class GitDataCollector(DataCollector):
|
|||||||
ext = filename[(filename.rfind('.') + 1):]
|
ext = filename[(filename.rfind('.') + 1):]
|
||||||
if len(ext) > conf['max_ext_length']:
|
if len(ext) > conf['max_ext_length']:
|
||||||
ext = ''
|
ext = ''
|
||||||
|
|
||||||
if ext not in self.extensions:
|
if ext not in self.extensions:
|
||||||
self.extensions[ext] = {'files': 0, 'lines': 0}
|
self.extensions[ext] = {'files': 0, 'lines': 0}
|
||||||
|
|
||||||
self.extensions[ext]['files'] += 1
|
self.extensions[ext]['files'] += 1
|
||||||
try:
|
#if cache empty then add ext and blob id to list of new blob's
|
||||||
self.extensions[ext]['lines'] += self.getLinesInBlob(sha1)
|
#otherwise try to read needed info from cache
|
||||||
except:
|
if 'lines_in_blob' not in self.cache.keys():
|
||||||
print 'Warning: Could not count lines for file "%s"' % line
|
blobs_to_read.append((ext,blob_id))
|
||||||
|
continue
|
||||||
|
if blob_id in self.cache['lines_in_blob'].keys():
|
||||||
|
self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
|
||||||
|
else:
|
||||||
|
blobs_to_read.append((ext,blob_id))
|
||||||
|
|
||||||
|
#Get info abount line count for new blob's that wasn't found in cache
|
||||||
|
ext_blob_linecount = Pool(processes=24).map(getnumoflinesinblob, blobs_to_read)
|
||||||
|
|
||||||
|
#Update cache and write down info about number of number of lines
|
||||||
|
for (ext, blob_id, linecount) in ext_blob_linecount:
|
||||||
|
if 'lines_in_blob' not in self.cache:
|
||||||
|
self.cache['lines_in_blob'] = {}
|
||||||
|
self.cache['lines_in_blob'][blob_id] = linecount
|
||||||
|
self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
|
||||||
|
|
||||||
# line statistics
|
# line statistics
|
||||||
# outputs:
|
# outputs:
|
||||||
@ -619,33 +674,12 @@ class GitDataCollector(DataCollector):
|
|||||||
def getDomains(self):
|
def getDomains(self):
|
||||||
return self.domains.keys()
|
return self.domains.keys()
|
||||||
|
|
||||||
def getFilesInCommit(self, rev):
|
|
||||||
try:
|
|
||||||
res = self.cache['files_in_tree'][rev]
|
|
||||||
except:
|
|
||||||
res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
|
|
||||||
if 'files_in_tree' not in self.cache:
|
|
||||||
self.cache['files_in_tree'] = {}
|
|
||||||
self.cache['files_in_tree'][rev] = res
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
def getFirstCommitDate(self):
|
def getFirstCommitDate(self):
|
||||||
return datetime.datetime.fromtimestamp(self.first_commit_stamp)
|
return datetime.datetime.fromtimestamp(self.first_commit_stamp)
|
||||||
|
|
||||||
def getLastCommitDate(self):
|
def getLastCommitDate(self):
|
||||||
return datetime.datetime.fromtimestamp(self.last_commit_stamp)
|
return datetime.datetime.fromtimestamp(self.last_commit_stamp)
|
||||||
|
|
||||||
def getLinesInBlob(self, sha1):
|
|
||||||
try:
|
|
||||||
res = self.cache['lines_in_blob'][sha1]
|
|
||||||
except:
|
|
||||||
res = int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
|
|
||||||
if 'lines_in_blob' not in self.cache:
|
|
||||||
self.cache['lines_in_blob'] = {}
|
|
||||||
self.cache['lines_in_blob'][sha1] = res
|
|
||||||
return res
|
|
||||||
|
|
||||||
def getTags(self):
|
def getTags(self):
|
||||||
lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
|
lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
|
||||||
return lines.split('\n')
|
return lines.split('\n')
|
||||||
@ -1086,7 +1120,7 @@ class HTMLReportCreator(ReportCreator):
|
|||||||
f.write('</dl>\n')
|
f.write('</dl>\n')
|
||||||
|
|
||||||
f.write(html_header(2, 'Lines of Code'))
|
f.write(html_header(2, 'Lines of Code'))
|
||||||
f.write('<img src="lines_of_code.png" />')
|
f.write('<img src="lines_of_code.png" />')
|
||||||
|
|
||||||
fg = open(path + '/lines_of_code.dat', 'w')
|
fg = open(path + '/lines_of_code.dat', 'w')
|
||||||
for stamp in sorted(data.changes_by_date.keys()):
|
for stamp in sorted(data.changes_by_date.keys()):
|
||||||
@ -1280,7 +1314,9 @@ plot """
|
|||||||
plots = []
|
plots = []
|
||||||
for a in self.authors_to_plot:
|
for a in self.authors_to_plot:
|
||||||
i = i + 1
|
i = i + 1
|
||||||
plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
|
a = a.replace("\"", "\\\"")
|
||||||
|
a = a.replace('`', '')
|
||||||
|
plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, a))
|
||||||
f.write(", ".join(plots))
|
f.write(", ".join(plots))
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
|
|
||||||
@ -1307,7 +1343,9 @@ plot """
|
|||||||
plots = []
|
plots = []
|
||||||
for a in self.authors_to_plot:
|
for a in self.authors_to_plot:
|
||||||
i = i + 1
|
i = i + 1
|
||||||
plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
|
a = a.replace("\"", "\\\"")
|
||||||
|
a = a.replace('`', '')
|
||||||
|
plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, a))
|
||||||
f.write(", ".join(plots))
|
f.write(", ".join(plots))
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user