geany/tagmanager/python.c
Nick Treleaven 9ab442e146 Remove unnecessary vStringClear(name) calls after using the name
buffer, as the convention is now to clear the buffer before use, and
this is easier to remember to do.


git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@2674 ea778897-0a13-0410-b9d1-a72fbfd435f5
2008-06-10 12:16:27 +00:00

618 lines
14 KiB
C

/*
* $Id$
*
* Copyright (c) 2000-2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for Python language
* files.
*/
/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include <string.h>
#include "entry.h"
#include "options.h"
#include "read.h"
#include "main.h"
#include "vstring.h"
/*
* DATA DEFINITIONS
*/
typedef enum {
K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE
} pythonKind;
static kindOption PythonKinds[] = {
{TRUE, 'c', "class", "classes"},
{TRUE, 'f', "function", "functions"},
{TRUE, 'm', "member", "class members"},
{TRUE, 'v', "variable", "variables"}
};
typedef struct NestingLevel NestingLevel;
typedef struct NestingLevels NestingLevels;
struct NestingLevel
{
int indentation;
vString *name;
boolean is_class;
};
struct NestingLevels
{
NestingLevel *levels;
int n;
int allocated;
};
static char const * const singletriple = "'''";
static char const * const doubletriple = "\"\"\"";
/*
* FUNCTION DEFINITIONS
*/
#define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
static boolean isIdentifierFirstCharacter (int c)
{
return (boolean) (isalpha (c) || c == '_');
}
static boolean isIdentifierCharacter (int c)
{
return (boolean) (isalnum (c) || c == '_');
}
/* Given a string with the contents of a line directly after the "def" keyword,
* extract all relevant information and create a tag.
*/
static void makeFunctionTag (vString *const function,
vString *const parent, int is_class_parent)
{
tagEntryInfo tag;
initTagEntry (&tag, vStringValue (function));
tag.kindName = "function";
tag.kind = 'f';
if (vStringLength (parent) > 0)
{
if (is_class_parent)
{
tag.kindName = "member";
tag.kind = 'm';
tag.extensionFields.scope [0] = "class";
tag.extensionFields.scope [1] = vStringValue (parent);
}
else
{
tag.extensionFields.scope [0] = "function";
tag.extensionFields.scope [1] = vStringValue (parent);
}
}
/* If a function starts with __, we mark it as file scope.
* FIXME: What is the proper way to signal such attributes?
* TODO: What does functions/classes starting with _ and __ mean in python?
*/
if (strncmp (vStringValue (function), "__", 2) == 0 &&
strcmp (vStringValue (function), "__init__") != 0)
{
tag.extensionFields.access = "private";
tag.isFileScope = TRUE;
}
else
{
tag.extensionFields.access = "public";
}
makeTagEntry (&tag);
}
/* Given a string with the contents of the line directly after the "class"
* keyword, extract all necessary information and create a tag.
*/
static void makeClassTag (vString *const class, vString *const inheritance,
vString *const parent, int is_class_parent)
{
tagEntryInfo tag;
initTagEntry (&tag, vStringValue (class));
tag.kindName = "class";
tag.kind = 'c';
if (vStringLength (parent) > 0)
{
if (is_class_parent)
{
tag.extensionFields.scope [0] = "class";
tag.extensionFields.scope [1] = vStringValue (parent);
}
else
{
tag.extensionFields.scope [0] = "function";
tag.extensionFields.scope [1] = vStringValue (parent);
}
}
tag.extensionFields.inheritance = vStringValue (inheritance);
makeTagEntry (&tag);
}
static void makeVariableTag (vString *const var, vString *const parent)
{
tagEntryInfo tag;
initTagEntry (&tag, vStringValue (var));
tag.kindName = "variable";
tag.kind = 'v';
if (vStringLength (parent) > 0)
{
tag.extensionFields.scope [0] = "class";
tag.extensionFields.scope [1] = vStringValue (parent);
}
makeTagEntry (&tag);
}
/* Skip a single or double quoted string. */
static const char *skipString (const char *cp)
{
const char *start = cp;
int escaped = 0;
for (cp++; *cp; cp++)
{
if (escaped)
escaped--;
else if (*cp == '\\')
escaped++;
else if (*cp == *start)
return cp + 1;
}
return cp;
}
/* Skip everything up to an identifier start. */
static const char *skipEverything (const char *cp)
{
for (; *cp; cp++)
{
if (*cp == '"' || *cp == '\'')
{
cp = skipString(cp);
if (!*cp) break;
}
if (isIdentifierFirstCharacter ((int) *cp))
return cp;
}
return cp;
}
/* Skip an identifier. */
static const char *skipIdentifier (const char *cp)
{
while (isIdentifierCharacter ((int) *cp))
cp++;
return cp;
}
static const char *findDefinitionOrClass (const char *cp)
{
while (*cp)
{
cp = skipEverything (cp);
if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5))
{
return cp;
}
cp = skipIdentifier (cp);
}
return NULL;
}
static const char *skipSpace (const char *cp)
{
while (isspace ((int) *cp))
++cp;
return cp;
}
/* Starting at ''cp'', parse an identifier into ''identifier''. */
static const char *parseIdentifier (const char *cp, vString *const identifier)
{
vStringClear (identifier);
while (isIdentifierCharacter ((int) *cp))
{
vStringPut (identifier, (int) *cp);
++cp;
}
vStringTerminate (identifier);
return cp;
}
static void parseClass (const char *cp, vString *const class,
vString *const parent, int is_class_parent)
{
vString *const inheritance = vStringNew ();
vStringClear (inheritance);
cp = parseIdentifier (cp, class);
cp = skipSpace (cp);
if (*cp == '(')
{
++cp;
while (*cp != ')')
{
if (*cp == '\0')
{
/* Closing parenthesis can be in follow up line. */
cp = (const char *) fileReadLine ();
if (!cp) break;
vStringPut (inheritance, ' ');
continue;
}
vStringPut (inheritance, *cp);
++cp;
}
vStringTerminate (inheritance);
}
makeClassTag (class, inheritance, parent, is_class_parent);
vStringDelete (inheritance);
}
static void parseFunction (const char *cp, vString *const def,
vString *const parent, int is_class_parent)
{
cp = parseIdentifier (cp, def);
makeFunctionTag (def, parent, is_class_parent);
}
/* Get the combined name of a nested symbol. Classes are separated with ".",
* functions with "/". For example this code:
* class MyClass:
* def myFunction:
* def SubFunction:
* class SubClass:
* def Method:
* pass
* Would produce this string:
* MyClass.MyFunction/SubFunction/SubClass.Method
*/
static boolean constructParentString(NestingLevels *nls, int indent,
vString *result)
{
int i;
NestingLevel *prev = NULL;
int is_class = FALSE;
vStringClear (result);
for (i = 0; i < nls->n; i++)
{
NestingLevel *nl = nls->levels + i;
if (indent <= nl->indentation)
break;
if (prev)
{
if (prev->is_class)
vStringCatS(result, ".");
else
vStringCatS(result, "/");
}
vStringCat(result, nl->name);
is_class = nl->is_class;
prev = nl;
}
return is_class;
}
/* check whether parent's indentation level is higher than the current level and if so, remove it */
static void checkParent(NestingLevels *nls, int indent, vString *parent)
{
int i;
NestingLevel *n;
for (i = 0; i < nls->n; i++)
{
n = nls->levels + i;
/* is there a better way to compare two vStrings? */
if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
{
if (n && indent <= n->indentation)
{
/* invalidate this level by clearing its name */
vStringClear(n->name);
}
break;
}
}
}
static NestingLevels *newNestingLevels(void)
{
NestingLevels *nls = xCalloc (1, NestingLevels);
return nls;
}
static void freeNestingLevels(NestingLevels *nls)
{
int i;
for (i = 0; i < nls->allocated; i++)
vStringDelete(nls->levels[i].name);
if (nls->levels) eFree(nls->levels);
eFree(nls);
}
/* TODO: This is totally out of place in python.c, but strlist.h is not usable.
* Maybe should just move these three functions to a separate file, even if no
* other parser uses them.
*/
static void addNestingLevel(NestingLevels *nls, int indentation,
vString *name, boolean is_class)
{
int i;
NestingLevel *nl = NULL;
for (i = 0; i < nls->n; i++)
{
nl = nls->levels + i;
if (indentation <= nl->indentation) break;
}
if (i == nls->n)
{
if (i >= nls->allocated)
{
nls->allocated++;
nls->levels = xRealloc(nls->levels,
nls->allocated, NestingLevel);
nls->levels[i].name = vStringNew();
}
nl = nls->levels + i;
}
nls->n = i + 1;
vStringCopy(nl->name, name);
nl->indentation = indentation;
nl->is_class = is_class;
}
/* Checks whether a triple string was quoted before.
*/
static boolean isTripleQuoted(char const *start, char const *end, char quote_char)
{
char const *cp = start;
while (cp < end && *cp != quote_char)
cp++;
return (cp < end);
}
/* Return a pointer to the start of the next triple string, or NULL. Store
* the kind of triple string in "which" if the return is not NULL.
*/
static char *find_triple_start(char const *string, char const **which)
{
char *s;
*which = NULL;
if ((s = strstr (string, doubletriple)))
{
/* prevent parsing quoted triple strings */
if (isTripleQuoted (string, s, '\''))
return NULL;
*which = doubletriple;
}
else if ((s = strstr (string, singletriple)))
{
/* prevent parsing quoted triple strings */
if (isTripleQuoted (string, s, '"'))
return NULL;
*which = singletriple;
}
return s;
}
/* Find the end of a triple string as pointed to by "which", and update "which"
* with any other triple strings following in the given string.
*/
static void find_triple_end(char const *string, char const **which)
{
char const *s = string;
while (1)
{
/* Check if the sting ends in the same line. */
s = strstr (string, *which);
if (!s) break;
s += 3;
*which = NULL;
/* If yes, check if another one starts in the same line. */
s = find_triple_start(s, which);
if (!s) break;
s += 3;
}
}
static const char *findVariable(const char *line)
{
/* Parse global and class variable names (C.x) from assignment statements.
* Object attributes (obj.x) are ignored.
* Assignment to a tuple 'x, y = 2, 3' not supported.
* TODO: ignore duplicate tags from reassignment statements. */
const char *cp, *sp, *eq, *start;
cp = strstr(line, "=");
if (! cp)
return NULL;
eq = cp + 1;
while (*eq)
{
if (*eq == '=')
return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
if (*eq == '(')
break; /* allow 'x = func(b=2,y=2,' lines */
eq++;
}
/* go backwards to the start of the line, checking we have valid chars */
start = cp - 1;
while (start >= line && isspace ((int) *start))
--start;
while (start >= line && isIdentifierCharacter ((int) *start))
--start;
if (!isIdentifierFirstCharacter(*(start + 1)))
return NULL;
sp = start;
while (sp >= line && isspace ((int) *sp))
--sp;
if ((sp + 1) != line) /* the line isn't a simple variable assignment */
return NULL;
/* the line is valid, parse the variable name */
++start;
return start;
}
static void findPythonTags (void)
{
vString *const continuation = vStringNew ();
vString *const name = vStringNew ();
vString *const parent = vStringNew();
NestingLevels *const nesting_levels = newNestingLevels();
const char *line;
int line_skip = 0;
char const *longStringLiteral = NULL;
while ((line = (const char *) fileReadLine ()) != NULL)
{
const char *cp = line;
char *longstring;
const char *keyword, *variable;
int indent;
cp = skipSpace (cp);
if (*cp == '\0') /* skip blank line */
continue;
/* skip comment if we are not inside a triple string */
if (*cp == '#' && ! longStringLiteral)
continue;
/* Deal with line continuation. */
if (!line_skip) vStringClear(continuation);
vStringCatS(continuation, line);
vStringStripTrailing(continuation);
if (vStringLast(continuation) == '\\')
{
vStringChop(continuation);
vStringCatS(continuation, " ");
line_skip = 1;
continue;
}
cp = line = vStringValue(continuation);
cp = skipSpace (cp);
indent = cp - line;
line_skip = 0;
checkParent(nesting_levels, indent, parent);
/* Deal with multiline string ending. */
if (longStringLiteral)
{
find_triple_end(cp, &longStringLiteral);
continue;
}
/* Deal with multiline string start. */
longstring = find_triple_start(cp, &longStringLiteral);
if (longstring)
{
/* Note: For our purposes, the line just ends at the first long
* string. I.e. we don't parse for any tags in the rest of the
* line, but we do look for the string ending of course.
*/
*longstring = '\0';
longstring += 3;
find_triple_end(longstring, &longStringLiteral);
}
/* Deal with def and class keywords. */
keyword = findDefinitionOrClass (cp);
if (keyword)
{
boolean found = FALSE;
boolean is_class = FALSE;
if (!strncmp (keyword, "def ", 4))
{
cp = skipSpace (keyword + 3);
found = TRUE;
}
else if (!strncmp (keyword, "class ", 6))
{
cp = skipSpace (keyword + 5);
found = TRUE;
is_class = TRUE;
}
if (found)
{
boolean is_parent_class;
is_parent_class =
constructParentString(nesting_levels, indent, parent);
if (is_class)
parseClass (cp, name, parent, is_parent_class);
else
parseFunction(cp, name, parent, is_parent_class);
addNestingLevel(nesting_levels, indent, name, is_class);
}
}
/* Find global and class variables */
variable = findVariable(line);
if (variable)
{
const char *start = variable;
boolean parent_is_class;
vStringClear (name);
while (isIdentifierCharacter ((int) *start))
{
vStringPut (name, (int) *start);
++start;
}
vStringTerminate (name);
parent_is_class = constructParentString(nesting_levels, indent, parent);
/* skip variables in methods */
if (! parent_is_class && vStringLength(parent) > 0)
continue;
makeVariableTag (name, parent);
}
}
/* Clean up all memory we allocated. */
vStringDelete (parent);
vStringDelete (name);
vStringDelete (continuation);
freeNestingLevels (nesting_levels);
}
extern parserDefinition *PythonParser (void)
{
static const char *const extensions[] = { "py", "pyx", "pxd", "scons", "python", NULL };
parserDefinition *def = parserNew ("Python");
def->kinds = PythonKinds;
def->kindCount = KIND_COUNT (PythonKinds);
def->extensions = extensions;
def->parser = findPythonTags;
return def;
}
/* vi:set tabstop=4 shiftwidth=4: */