5acdfaf4e0
git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@3809 ea778897-0a13-0410-b9d1-a72fbfd435f5
695 lines
16 KiB
C
695 lines
16 KiB
C
/*
|
|
* $Id$
|
|
*
|
|
* Copyright (c) 2000-2003, Darren Hiebert
|
|
*
|
|
* This source code is released for free distribution under the terms of the
|
|
* GNU General Public License.
|
|
*
|
|
* This module contains functions for generating tags for Python language
|
|
* files.
|
|
*/
|
|
/*
|
|
* INCLUDE FILES
|
|
*/
|
|
#include "general.h" /* must always come first */
|
|
|
|
#include <string.h>
|
|
|
|
#include "entry.h"
|
|
#include "options.h"
|
|
#include "read.h"
|
|
#include "main.h"
|
|
#include "vstring.h"
|
|
#include "nestlevel.h"
|
|
|
|
/*
|
|
* DATA DEFINITIONS
|
|
*/
|
|
typedef enum {
|
|
K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT
|
|
} pythonKind;
|
|
|
|
static kindOption PythonKinds[] = {
|
|
{TRUE, 'c', "class", "classes"},
|
|
{TRUE, 'f', "function", "functions"},
|
|
{TRUE, 'm', "member", "class members"},
|
|
{TRUE, 'v', "variable", "variables"},
|
|
{TRUE, 'i', "namespace", "imports"}
|
|
};
|
|
|
|
static char const * const singletriple = "'''";
|
|
static char const * const doubletriple = "\"\"\"";
|
|
|
|
/*
|
|
* FUNCTION DEFINITIONS
|
|
*/
|
|
|
|
#define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
|
|
|
|
static boolean isIdentifierFirstCharacter (int c)
|
|
{
|
|
return (boolean) (isalpha (c) || c == '_');
|
|
}
|
|
|
|
static boolean isIdentifierCharacter (int c)
|
|
{
|
|
return (boolean) (isalnum (c) || c == '_');
|
|
}
|
|
|
|
/* Given a string with the contents of a line directly after the "def" keyword,
|
|
* extract all relevant information and create a tag.
|
|
*/
|
|
static void makeFunctionTag (vString *const function,
|
|
vString *const parent, int is_class_parent, const char *arglist)
|
|
{
|
|
tagEntryInfo tag;
|
|
initTagEntry (&tag, vStringValue (function));
|
|
|
|
tag.kindName = "function";
|
|
tag.kind = 'f';
|
|
tag.extensionFields.arglist = arglist;
|
|
|
|
if (vStringLength (parent) > 0)
|
|
{
|
|
if (is_class_parent)
|
|
{
|
|
tag.kindName = "member";
|
|
tag.kind = 'm';
|
|
tag.extensionFields.scope [0] = "class";
|
|
tag.extensionFields.scope [1] = vStringValue (parent);
|
|
}
|
|
else
|
|
{
|
|
tag.extensionFields.scope [0] = "function";
|
|
tag.extensionFields.scope [1] = vStringValue (parent);
|
|
}
|
|
}
|
|
|
|
/* If a function starts with __, we mark it as file scope.
|
|
* FIXME: What is the proper way to signal such attributes?
|
|
* TODO: What does functions/classes starting with _ and __ mean in python?
|
|
*/
|
|
if (strncmp (vStringValue (function), "__", 2) == 0 &&
|
|
strcmp (vStringValue (function), "__init__") != 0)
|
|
{
|
|
tag.extensionFields.access = "private";
|
|
tag.isFileScope = TRUE;
|
|
}
|
|
else
|
|
{
|
|
tag.extensionFields.access = "public";
|
|
}
|
|
makeTagEntry (&tag);
|
|
}
|
|
|
|
/* Given a string with the contents of the line directly after the "class"
|
|
* keyword, extract all necessary information and create a tag.
|
|
*/
|
|
static void makeClassTag (vString *const class, vString *const inheritance,
|
|
vString *const parent, int is_class_parent)
|
|
{
|
|
tagEntryInfo tag;
|
|
initTagEntry (&tag, vStringValue (class));
|
|
tag.kindName = "class";
|
|
tag.kind = 'c';
|
|
if (vStringLength (parent) > 0)
|
|
{
|
|
if (is_class_parent)
|
|
{
|
|
tag.extensionFields.scope [0] = "class";
|
|
tag.extensionFields.scope [1] = vStringValue (parent);
|
|
}
|
|
else
|
|
{
|
|
tag.extensionFields.scope [0] = "function";
|
|
tag.extensionFields.scope [1] = vStringValue (parent);
|
|
}
|
|
}
|
|
tag.extensionFields.inheritance = vStringValue (inheritance);
|
|
makeTagEntry (&tag);
|
|
}
|
|
|
|
static void makeVariableTag (vString *const var, vString *const parent)
|
|
{
|
|
tagEntryInfo tag;
|
|
initTagEntry (&tag, vStringValue (var));
|
|
tag.kindName = "variable";
|
|
tag.kind = 'v';
|
|
if (vStringLength (parent) > 0)
|
|
{
|
|
tag.extensionFields.scope [0] = "class";
|
|
tag.extensionFields.scope [1] = vStringValue (parent);
|
|
}
|
|
makeTagEntry (&tag);
|
|
}
|
|
|
|
/* Skip a single or double quoted string. */
|
|
static const char *skipString (const char *cp)
|
|
{
|
|
const char *start = cp;
|
|
int escaped = 0;
|
|
for (cp++; *cp; cp++)
|
|
{
|
|
if (escaped)
|
|
escaped--;
|
|
else if (*cp == '\\')
|
|
escaped++;
|
|
else if (*cp == *start)
|
|
return cp + 1;
|
|
}
|
|
return cp;
|
|
}
|
|
|
|
/* Skip everything up to an identifier start. */
|
|
static const char *skipEverything (const char *cp)
|
|
{
|
|
for (; *cp; cp++)
|
|
{
|
|
if (*cp == '"' || *cp == '\'')
|
|
{
|
|
cp = skipString(cp);
|
|
if (!*cp) break;
|
|
}
|
|
if (isIdentifierFirstCharacter ((int) *cp))
|
|
return cp;
|
|
}
|
|
return cp;
|
|
}
|
|
|
|
/* Skip an identifier. */
|
|
static const char *skipIdentifier (const char *cp)
|
|
{
|
|
while (isIdentifierCharacter ((int) *cp))
|
|
cp++;
|
|
return cp;
|
|
}
|
|
|
|
static const char *findDefinitionOrClass (const char *cp)
|
|
{
|
|
while (*cp)
|
|
{
|
|
cp = skipEverything (cp);
|
|
if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
|
|
!strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
|
|
{
|
|
return cp;
|
|
}
|
|
cp = skipIdentifier (cp);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static const char *skipSpace (const char *cp)
|
|
{
|
|
while (isspace ((int) *cp))
|
|
++cp;
|
|
return cp;
|
|
}
|
|
|
|
/* Starting at ''cp'', parse an identifier into ''identifier''. */
|
|
static const char *parseIdentifier (const char *cp, vString *const identifier)
|
|
{
|
|
vStringClear (identifier);
|
|
while (isIdentifierCharacter ((int) *cp))
|
|
{
|
|
vStringPut (identifier, (int) *cp);
|
|
++cp;
|
|
}
|
|
vStringTerminate (identifier);
|
|
return cp;
|
|
}
|
|
|
|
static void parseClass (const char *cp, vString *const class,
|
|
vString *const parent, int is_class_parent)
|
|
{
|
|
vString *const inheritance = vStringNew ();
|
|
vStringClear (inheritance);
|
|
cp = parseIdentifier (cp, class);
|
|
cp = skipSpace (cp);
|
|
if (*cp == '(')
|
|
{
|
|
++cp;
|
|
while (*cp != ')')
|
|
{
|
|
if (*cp == '\0')
|
|
{
|
|
/* Closing parenthesis can be in follow up line. */
|
|
cp = (const char *) fileReadLine ();
|
|
if (!cp) break;
|
|
vStringPut (inheritance, ' ');
|
|
continue;
|
|
}
|
|
vStringPut (inheritance, *cp);
|
|
++cp;
|
|
}
|
|
vStringTerminate (inheritance);
|
|
}
|
|
makeClassTag (class, inheritance, parent, is_class_parent);
|
|
vStringDelete (inheritance);
|
|
}
|
|
|
|
static void parseImports (const char *cp)
|
|
{
|
|
const char *pos;
|
|
vString *name, *name_next;
|
|
|
|
cp = skipEverything (cp);
|
|
|
|
if ((pos = strstr (cp, "import")) == NULL)
|
|
return;
|
|
|
|
cp = pos + 6;
|
|
|
|
/* continue only if there is some space between the keyword and the identifier */
|
|
if (! isspace (*cp))
|
|
return;
|
|
|
|
cp++;
|
|
cp = skipSpace (cp);
|
|
|
|
name = vStringNew ();
|
|
name_next = vStringNew ();
|
|
|
|
cp = skipEverything (cp);
|
|
while (*cp)
|
|
{
|
|
cp = parseIdentifier (cp, name);
|
|
|
|
cp = skipEverything (cp);
|
|
/* we parse the next possible import statement as well to be able to ignore 'foo' in
|
|
* 'import foo as bar' */
|
|
parseIdentifier (cp, name_next);
|
|
|
|
/* take the current tag only if the next one is not "as" */
|
|
if (strcmp (vStringValue (name_next), "as") != 0 &&
|
|
strcmp (vStringValue (name), "as") != 0)
|
|
{
|
|
makeSimpleTag (name, PythonKinds, K_IMPORT);
|
|
}
|
|
}
|
|
vStringDelete (name);
|
|
vStringDelete (name_next);
|
|
}
|
|
|
|
/* modified from get.c getArglistFromStr().
|
|
* warning: terminates rest of string past arglist!
|
|
* note: does not ignore brackets inside strings! */
|
|
static char *parseArglist(const char *buf)
|
|
{
|
|
char *start, *end;
|
|
int level;
|
|
if (NULL == buf)
|
|
return NULL;
|
|
if (NULL == (start = strchr(buf, '(')))
|
|
return NULL;
|
|
for (level = 1, end = start + 1; level > 0; ++end)
|
|
{
|
|
if ('\0' == *end)
|
|
break;
|
|
else if ('(' == *end)
|
|
++ level;
|
|
else if (')' == *end)
|
|
-- level;
|
|
}
|
|
*end = '\0';
|
|
return strdup(start);
|
|
}
|
|
|
|
static void parseFunction (const char *cp, vString *const def,
|
|
vString *const parent, int is_class_parent)
|
|
{
|
|
char *arglist;
|
|
|
|
cp = parseIdentifier (cp, def);
|
|
arglist = parseArglist (cp);
|
|
makeFunctionTag (def, parent, is_class_parent, arglist);
|
|
eFree (arglist);
|
|
}
|
|
|
|
/* Get the combined name of a nested symbol. Classes are separated with ".",
|
|
* functions with "/". For example this code:
|
|
* class MyClass:
|
|
* def myFunction:
|
|
* def SubFunction:
|
|
* class SubClass:
|
|
* def Method:
|
|
* pass
|
|
* Would produce this string:
|
|
* MyClass.MyFunction/SubFunction/SubClass.Method
|
|
*/
|
|
static boolean constructParentString(NestingLevels *nls, int indent,
|
|
vString *result)
|
|
{
|
|
int i;
|
|
NestingLevel *prev = NULL;
|
|
int is_class = FALSE;
|
|
vStringClear (result);
|
|
for (i = 0; i < nls->n; i++)
|
|
{
|
|
NestingLevel *nl = nls->levels + i;
|
|
if (indent <= nl->indentation)
|
|
break;
|
|
if (prev)
|
|
{
|
|
if (prev->type == K_CLASS)
|
|
vStringCatS(result, ".");
|
|
else
|
|
vStringCatS(result, "/");
|
|
}
|
|
vStringCat(result, nl->name);
|
|
is_class = (nl->type == K_CLASS);
|
|
prev = nl;
|
|
}
|
|
return is_class;
|
|
}
|
|
|
|
/* Check whether parent's indentation level is higher than the current level and
|
|
* if so, remove it.
|
|
*/
|
|
static void checkParent(NestingLevels *nls, int indent, vString *parent)
|
|
{
|
|
int i;
|
|
NestingLevel *n;
|
|
|
|
for (i = 0; i < nls->n; i++)
|
|
{
|
|
n = nls->levels + i;
|
|
/* is there a better way to compare two vStrings? */
|
|
if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
|
|
{
|
|
if (n && indent <= n->indentation)
|
|
{
|
|
/* remove this level by clearing its name */
|
|
vStringClear(n->name);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void addNestingLevel(NestingLevels *nls, int indentation,
|
|
const vString *name, boolean is_class)
|
|
{
|
|
int i;
|
|
NestingLevel *nl = NULL;
|
|
|
|
for (i = 0; i < nls->n; i++)
|
|
{
|
|
nl = nls->levels + i;
|
|
if (indentation <= nl->indentation) break;
|
|
}
|
|
if (i == nls->n)
|
|
{
|
|
nestingLevelsPush(nls, name, 0);
|
|
nl = nls->levels + i;
|
|
}
|
|
else
|
|
{ /* reuse existing slot */
|
|
nls->n = i + 1;
|
|
vStringCopy(nl->name, name);
|
|
}
|
|
nl->indentation = indentation;
|
|
nl->type = is_class ? K_CLASS : !K_CLASS;
|
|
}
|
|
|
|
/* Return a pointer to the start of the next triple string, or NULL. Store
|
|
* the kind of triple string in "which" if the return is not NULL.
|
|
*/
|
|
static char const *find_triple_start(char const *string, char const **which)
|
|
{
|
|
char const *cp = string;
|
|
|
|
for (; *cp; cp++)
|
|
{
|
|
if (*cp == '"' || *cp == '\'')
|
|
{
|
|
if (strncmp(cp, doubletriple, 3) == 0)
|
|
{
|
|
*which = doubletriple;
|
|
return cp;
|
|
}
|
|
if (strncmp(cp, singletriple, 3) == 0)
|
|
{
|
|
*which = singletriple;
|
|
return cp;
|
|
}
|
|
cp = skipString(cp);
|
|
if (!*cp) break;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* Find the end of a triple string as pointed to by "which", and update "which"
|
|
* with any other triple strings following in the given string.
|
|
*/
|
|
static void find_triple_end(char const *string, char const **which)
|
|
{
|
|
char const *s = string;
|
|
while (1)
|
|
{
|
|
/* Check if the string ends in the same line. */
|
|
s = strstr (s, *which);
|
|
if (!s) break;
|
|
s += 3;
|
|
*which = NULL;
|
|
/* If yes, check if another one starts in the same line. */
|
|
s = find_triple_start(s, which);
|
|
if (!s) break;
|
|
s += 3;
|
|
}
|
|
}
|
|
|
|
static const char *findVariable(const char *line)
|
|
{
|
|
/* Parse global and class variable names (C.x) from assignment statements.
|
|
* Object attributes (obj.x) are ignored.
|
|
* Assignment to a tuple 'x, y = 2, 3' not supported.
|
|
* TODO: ignore duplicate tags from reassignment statements. */
|
|
const char *cp, *sp, *eq, *start;
|
|
|
|
cp = strstr(line, "=");
|
|
if (!cp)
|
|
return NULL;
|
|
eq = cp + 1;
|
|
while (*eq)
|
|
{
|
|
if (*eq == '=')
|
|
return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
|
|
if (*eq == '(' || *eq == '#')
|
|
break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
|
|
eq++;
|
|
}
|
|
|
|
/* go backwards to the start of the line, checking we have valid chars */
|
|
start = cp - 1;
|
|
while (start >= line && isspace ((int) *start))
|
|
--start;
|
|
while (start >= line && isIdentifierCharacter ((int) *start))
|
|
--start;
|
|
if (!isIdentifierFirstCharacter(*(start + 1)))
|
|
return NULL;
|
|
sp = start;
|
|
while (sp >= line && isspace ((int) *sp))
|
|
--sp;
|
|
if ((sp + 1) != line) /* the line isn't a simple variable assignment */
|
|
return NULL;
|
|
/* the line is valid, parse the variable name */
|
|
++start;
|
|
return start;
|
|
}
|
|
|
|
/* Skip type declaration that optionally follows a cdef/cpdef */
|
|
static const char *skipTypeDecl (const char *cp, boolean *is_class)
|
|
{
|
|
const char *lastStart = cp, *ptr = cp;
|
|
int loopCount = 0;
|
|
ptr = skipSpace(cp);
|
|
if (!strncmp("extern", ptr, 6)) {
|
|
ptr += 6;
|
|
ptr = skipSpace(ptr);
|
|
if (!strncmp("from", ptr, 4)) { return NULL; }
|
|
}
|
|
if (!strncmp("class", ptr, 5)) {
|
|
ptr += 5 ;
|
|
*is_class = TRUE;
|
|
ptr = skipSpace(ptr);
|
|
return ptr;
|
|
}
|
|
/* limit so that we don't pick off "int item=obj()" */
|
|
while (*ptr && loopCount++ < 2) {
|
|
while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++;
|
|
if (!*ptr || *ptr == '=') return NULL;
|
|
if (*ptr == '(') {
|
|
return lastStart; /* if we stopped on a '(' we are done */
|
|
}
|
|
ptr = skipSpace(ptr);
|
|
lastStart = ptr;
|
|
while (*lastStart == '*') lastStart++; /* cdef int *identifier */
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void findPythonTags (void)
|
|
{
|
|
vString *const continuation = vStringNew ();
|
|
vString *const name = vStringNew ();
|
|
vString *const parent = vStringNew();
|
|
|
|
NestingLevels *const nesting_levels = nestingLevelsNew();
|
|
|
|
const char *line;
|
|
int line_skip = 0;
|
|
char const *longStringLiteral = NULL;
|
|
|
|
while ((line = (const char *) fileReadLine ()) != NULL)
|
|
{
|
|
const char *cp = line, *candidate;
|
|
char const *longstring;
|
|
char const *keyword, *variable;
|
|
int indent;
|
|
|
|
cp = skipSpace (cp);
|
|
|
|
if (*cp == '\0') /* skip blank line */
|
|
continue;
|
|
|
|
/* Skip comment if we are not inside a multi-line string. */
|
|
if (*cp == '#' && !longStringLiteral)
|
|
continue;
|
|
|
|
/* Deal with line continuation. */
|
|
if (!line_skip) vStringClear(continuation);
|
|
vStringCatS(continuation, line);
|
|
vStringStripTrailing(continuation);
|
|
if (vStringLast(continuation) == '\\')
|
|
{
|
|
vStringChop(continuation);
|
|
vStringCatS(continuation, " ");
|
|
line_skip = 1;
|
|
continue;
|
|
}
|
|
cp = line = vStringValue(continuation);
|
|
cp = skipSpace (cp);
|
|
indent = cp - line;
|
|
line_skip = 0;
|
|
|
|
checkParent(nesting_levels, indent, parent);
|
|
|
|
/* Deal with multiline string ending. */
|
|
if (longStringLiteral)
|
|
{
|
|
find_triple_end(cp, &longStringLiteral);
|
|
continue;
|
|
}
|
|
|
|
/* Deal with multiline string start. */
|
|
longstring = find_triple_start(cp, &longStringLiteral);
|
|
if (longstring)
|
|
{
|
|
longstring += 3;
|
|
find_triple_end(longstring, &longStringLiteral);
|
|
/* We don't parse for any tags in the rest of the line. */
|
|
continue;
|
|
}
|
|
|
|
/* Deal with def and class keywords. */
|
|
keyword = findDefinitionOrClass (cp);
|
|
if (keyword)
|
|
{
|
|
boolean found = FALSE;
|
|
boolean is_class = FALSE;
|
|
if (!strncmp (keyword, "def ", 4))
|
|
{
|
|
cp = skipSpace (keyword + 3);
|
|
found = TRUE;
|
|
}
|
|
else if (!strncmp (keyword, "class ", 6))
|
|
{
|
|
cp = skipSpace (keyword + 5);
|
|
found = TRUE;
|
|
is_class = TRUE;
|
|
}
|
|
else if (!strncmp (keyword, "cdef ", 5))
|
|
{
|
|
cp = skipSpace(keyword + 4);
|
|
candidate = skipTypeDecl (cp, &is_class);
|
|
if (candidate)
|
|
{
|
|
found = TRUE;
|
|
cp = candidate;
|
|
}
|
|
|
|
}
|
|
else if (!strncmp (keyword, "cpdef ", 6))
|
|
{
|
|
cp = skipSpace(keyword + 5);
|
|
candidate = skipTypeDecl (cp, &is_class);
|
|
if (candidate)
|
|
{
|
|
found = TRUE;
|
|
cp = candidate;
|
|
}
|
|
}
|
|
|
|
if (found)
|
|
{
|
|
boolean is_parent_class;
|
|
|
|
is_parent_class =
|
|
constructParentString(nesting_levels, indent, parent);
|
|
|
|
if (is_class)
|
|
parseClass (cp, name, parent, is_parent_class);
|
|
else
|
|
parseFunction(cp, name, parent, is_parent_class);
|
|
|
|
addNestingLevel(nesting_levels, indent, name, is_class);
|
|
}
|
|
}
|
|
/* Find global and class variables */
|
|
variable = findVariable(line);
|
|
if (variable)
|
|
{
|
|
const char *start = variable;
|
|
boolean parent_is_class;
|
|
|
|
vStringClear (name);
|
|
while (isIdentifierCharacter ((int) *start))
|
|
{
|
|
vStringPut (name, (int) *start);
|
|
++start;
|
|
}
|
|
vStringTerminate (name);
|
|
|
|
parent_is_class = constructParentString(nesting_levels, indent, parent);
|
|
/* skip variables in methods */
|
|
if (! parent_is_class && vStringLength(parent) > 0)
|
|
continue;
|
|
|
|
makeVariableTag (name, parent);
|
|
}
|
|
/* Find and parse imports */
|
|
parseImports(line);
|
|
}
|
|
/* Clean up all memory we allocated. */
|
|
vStringDelete (parent);
|
|
vStringDelete (name);
|
|
vStringDelete (continuation);
|
|
nestingLevelsFree (nesting_levels);
|
|
}
|
|
|
|
extern parserDefinition *PythonParser (void)
|
|
{
|
|
static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
|
|
parserDefinition *def = parserNew ("Python");
|
|
def->kinds = PythonKinds;
|
|
def->kindCount = KIND_COUNT (PythonKinds);
|
|
def->extensions = extensions;
|
|
def->parser = findPythonTags;
|
|
return def;
|
|
}
|
|
|
|
/* vi:set tabstop=4 shiftwidth=4: */
|