/* * $Id$ * * Copyright (c) 2000-2003, Darren Hiebert * * This source code is released for free distribution under the terms of the * GNU General Public License. * * This module contains functions for generating tags for Python language * files. */ /* * INCLUDE FILES */ #include "general.h" /* must always come first */ #include #include "entry.h" #include "options.h" #include "read.h" #include "main.h" #include "vstring.h" #include "nestlevel.h" /* * DATA DEFINITIONS */ typedef enum { K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT } pythonKind; static kindOption PythonKinds[] = { {TRUE, 'c', "class", "classes"}, {TRUE, 'f', "function", "functions"}, {TRUE, 'm', "member", "class members"}, {TRUE, 'v', "variable", "variables"}, {TRUE, 'i', "namespace", "imports"} }; static char const * const singletriple = "'''"; static char const * const doubletriple = "\"\"\""; /* * FUNCTION DEFINITIONS */ #define vStringLast(vs) ((vs)->buffer[(vs)->length - 1]) static boolean isIdentifierFirstCharacter (int c) { return (boolean) (isalpha (c) || c == '_'); } static boolean isIdentifierCharacter (int c) { return (boolean) (isalnum (c) || c == '_'); } static const char *get_class_name_from_parent (const char *parent) { const char *result; if (parent == NULL) return NULL; result = strrchr (parent, '.'); if (result != NULL) { result++; parent = result; } result = strrchr (parent, '/'); if (result != NULL) result++; else result = parent; return result; } /* Given a string with the contents of a line directly after the "def" keyword, * extract all relevant information and create a tag. */ static void makeFunctionTag (vString *const function, vString *const parent, int is_class_parent, const char *arglist) { tagEntryInfo tag; initTagEntry (&tag, vStringValue (function)); tag.kindName = "function"; tag.kind = 'f'; tag.extensionFields.arglist = arglist; /* add argument list of __init__() methods to the class tag */ if (strcmp (vStringValue (function), "__init__") == 0 && parent != NULL) { const char *parent_tag_name = get_class_name_from_parent (vStringValue (parent)); if (parent_tag_name != NULL) setTagArglistByName (parent_tag_name, arglist); } if (vStringLength (parent) > 0) { if (is_class_parent) { tag.kindName = "member"; tag.kind = 'm'; tag.extensionFields.scope [0] = "class"; tag.extensionFields.scope [1] = vStringValue (parent); } else { tag.extensionFields.scope [0] = "function"; tag.extensionFields.scope [1] = vStringValue (parent); } } /* If a function starts with __, we mark it as file scope. * FIXME: What is the proper way to signal such attributes? * TODO: What does functions/classes starting with _ and __ mean in python? */ if (strncmp (vStringValue (function), "__", 2) == 0 && strcmp (vStringValue (function), "__init__") != 0) { tag.extensionFields.access = "private"; tag.isFileScope = TRUE; } else { tag.extensionFields.access = "public"; } makeTagEntry (&tag); } /* Given a string with the contents of the line directly after the "class" * keyword, extract all necessary information and create a tag. */ static void makeClassTag (vString *const class, vString *const inheritance, vString *const parent, int is_class_parent) { tagEntryInfo tag; initTagEntry (&tag, vStringValue (class)); tag.kindName = "class"; tag.kind = 'c'; if (vStringLength (parent) > 0) { if (is_class_parent) { tag.extensionFields.scope [0] = "class"; tag.extensionFields.scope [1] = vStringValue (parent); } else { tag.extensionFields.scope [0] = "function"; tag.extensionFields.scope [1] = vStringValue (parent); } } tag.extensionFields.inheritance = vStringValue (inheritance); makeTagEntry (&tag); } static void makeVariableTag (vString *const var, vString *const parent) { tagEntryInfo tag; initTagEntry (&tag, vStringValue (var)); tag.kindName = "variable"; tag.kind = 'v'; if (vStringLength (parent) > 0) { tag.extensionFields.scope [0] = "class"; tag.extensionFields.scope [1] = vStringValue (parent); } makeTagEntry (&tag); } /* Skip a single or double quoted string. */ static const char *skipString (const char *cp) { const char *start = cp; int escaped = 0; for (cp++; *cp; cp++) { if (escaped) escaped--; else if (*cp == '\\') escaped++; else if (*cp == *start) return cp + 1; } return cp; } /* Skip everything up to an identifier start. */ static const char *skipEverything (const char *cp) { int match; for (; *cp; cp++) { match = 0; if (*cp == '"' || *cp == '\'' || *cp == '#') match = 1; /* these checks find unicode, binary (Python 3) and raw strings */ if (!match && ( !strncasecmp(cp, "u'", 2) || !strncasecmp(cp, "u\"", 2) || !strncasecmp(cp, "r'", 2) || !strncasecmp(cp, "r\"", 2) || !strncasecmp(cp, "b'", 2) || !strncasecmp(cp, "b\"", 2))) { match = 1; cp += 1; } if (!match && ( !strncasecmp(cp, "ur'", 3) || !strncasecmp(cp, "ur\"", 3) || !strncasecmp(cp, "br'", 3) || !strncasecmp(cp, "br\"", 3))) { match = 1; cp += 2; } if (match) { cp = skipString(cp); if (!*cp) break; } if (isIdentifierFirstCharacter ((int) *cp)) return cp; } return cp; } /* Skip an identifier. */ static const char *skipIdentifier (const char *cp) { while (isIdentifierCharacter ((int) *cp)) cp++; return cp; } static const char *findDefinitionOrClass (const char *cp) { while (*cp) { cp = skipEverything (cp); if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) || !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5)) { return cp; } cp = skipIdentifier (cp); } return NULL; } static const char *skipSpace (const char *cp) { while (isspace ((int) *cp)) ++cp; return cp; } /* Starting at ''cp'', parse an identifier into ''identifier''. */ static const char *parseIdentifier (const char *cp, vString *const identifier) { vStringClear (identifier); while (isIdentifierCharacter ((int) *cp)) { vStringPut (identifier, (int) *cp); ++cp; } vStringTerminate (identifier); return cp; } static void parseClass (const char *cp, vString *const class, vString *const parent, int is_class_parent) { vString *const inheritance = vStringNew (); vStringClear (inheritance); cp = parseIdentifier (cp, class); cp = skipSpace (cp); if (*cp == '(') { ++cp; while (*cp != ')') { if (*cp == '\0') { /* Closing parenthesis can be in follow up line. */ cp = (const char *) fileReadLine (); if (!cp) break; vStringPut (inheritance, ' '); continue; } vStringPut (inheritance, *cp); ++cp; } vStringTerminate (inheritance); } makeClassTag (class, inheritance, parent, is_class_parent); vStringDelete (inheritance); } static void parseImports (const char *cp) { const char *pos; vString *name, *name_next; cp = skipEverything (cp); if ((pos = strstr (cp, "import")) == NULL) return; cp = pos + 6; /* continue only if there is some space between the keyword and the identifier */ if (! isspace (*cp)) return; cp++; cp = skipSpace (cp); name = vStringNew (); name_next = vStringNew (); cp = skipEverything (cp); while (*cp) { cp = parseIdentifier (cp, name); cp = skipEverything (cp); /* we parse the next possible import statement as well to be able to ignore 'foo' in * 'import foo as bar' */ parseIdentifier (cp, name_next); /* take the current tag only if the next one is not "as" */ if (strcmp (vStringValue (name_next), "as") != 0 && strcmp (vStringValue (name), "as") != 0) { makeSimpleTag (name, PythonKinds, K_IMPORT); } } vStringDelete (name); vStringDelete (name_next); } /* modified from get.c getArglistFromStr(). * warning: terminates rest of string past arglist! * note: does not ignore brackets inside strings! */ static char *parseArglist(const char *buf) { char *start, *end; int level; if (NULL == buf) return NULL; if (NULL == (start = strchr(buf, '('))) return NULL; for (level = 1, end = start + 1; level > 0; ++end) { if ('\0' == *end) break; else if ('(' == *end) ++ level; else if (')' == *end) -- level; } *end = '\0'; return strdup(start); } static void parseFunction (const char *cp, vString *const def, vString *const parent, int is_class_parent) { char *arglist; cp = parseIdentifier (cp, def); arglist = parseArglist (cp); makeFunctionTag (def, parent, is_class_parent, arglist); if (arglist != NULL) eFree (arglist); } /* Get the combined name of a nested symbol. Classes are separated with ".", * functions with "/". For example this code: * class MyClass: * def myFunction: * def SubFunction: * class SubClass: * def Method: * pass * Would produce this string: * MyClass.MyFunction/SubFunction/SubClass.Method */ static boolean constructParentString(NestingLevels *nls, int indent, vString *result) { int i; NestingLevel *prev = NULL; int is_class = FALSE; vStringClear (result); for (i = 0; i < nls->n; i++) { NestingLevel *nl = nls->levels + i; if (indent <= nl->indentation) break; if (prev) { vStringCatS(result, "."); /* make Geany symbol list grouping work properly */ /* if (prev->type == K_CLASS) vStringCatS(result, "."); else vStringCatS(result, "/"); */ } vStringCat(result, nl->name); is_class = (nl->type == K_CLASS); prev = nl; } return is_class; } /* Check whether parent's indentation level is higher than the current level and * if so, remove it. */ static void checkParent(NestingLevels *nls, int indent, vString *parent) { int i; NestingLevel *n; for (i = 0; i < nls->n; i++) { n = nls->levels + i; /* is there a better way to compare two vStrings? */ if (n && strcmp(vStringValue(parent), vStringValue(n->name)) == 0) { if (indent <= n->indentation) { /* remove this level by clearing its name */ vStringClear(n->name); } break; } } } static void addNestingLevel(NestingLevels *nls, int indentation, const vString *name, boolean is_class) { int i; NestingLevel *nl = NULL; for (i = 0; i < nls->n; i++) { nl = nls->levels + i; if (indentation <= nl->indentation) break; } if (i == nls->n) { nestingLevelsPush(nls, name, 0); nl = nls->levels + i; } else { /* reuse existing slot */ nls->n = i + 1; vStringCopy(nl->name, name); } nl->indentation = indentation; nl->type = is_class ? K_CLASS : !K_CLASS; } /* Return a pointer to the start of the next triple string, or NULL. Store * the kind of triple string in "which" if the return is not NULL. */ static char const *find_triple_start(char const *string, char const **which) { char const *cp = string; for (; *cp; cp++) { if (*cp == '"' || *cp == '\'') { if (strncmp(cp, doubletriple, 3) == 0) { *which = doubletriple; return cp; } if (strncmp(cp, singletriple, 3) == 0) { *which = singletriple; return cp; } cp = skipString(cp); if (!*cp) break; } } return NULL; } /* Find the end of a triple string as pointed to by "which", and update "which" * with any other triple strings following in the given string. */ static void find_triple_end(char const *string, char const **which) { char const *s = string; while (1) { /* Check if the string ends in the same line. */ s = strstr (s, *which); if (!s) break; s += 3; *which = NULL; /* If yes, check if another one starts in the same line. */ s = find_triple_start(s, which); if (!s) break; s += 3; } } static const char *findVariable(const char *line) { /* Parse global and class variable names (C.x) from assignment statements. * Object attributes (obj.x) are ignored. * Assignment to a tuple 'x, y = 2, 3' not supported. * TODO: ignore duplicate tags from reassignment statements. */ const char *cp, *sp, *eq, *start; cp = strstr(line, "="); if (!cp) return NULL; eq = cp + 1; while (*eq) { if (*eq == '=') return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */ if (*eq == '(' || *eq == '#') break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */ eq++; } /* go backwards to the start of the line, checking we have valid chars */ start = cp - 1; while (start >= line && isspace ((int) *start)) --start; while (start >= line && isIdentifierCharacter ((int) *start)) --start; if (!isIdentifierFirstCharacter(*(start + 1))) return NULL; sp = start; while (sp >= line && isspace ((int) *sp)) --sp; if ((sp + 1) != line) /* the line isn't a simple variable assignment */ return NULL; /* the line is valid, parse the variable name */ ++start; return start; } /* Skip type declaration that optionally follows a cdef/cpdef */ static const char *skipTypeDecl (const char *cp, boolean *is_class) { const char *lastStart = cp, *ptr = cp; int loopCount = 0; ptr = skipSpace(cp); if (!strncmp("extern", ptr, 6)) { ptr += 6; ptr = skipSpace(ptr); if (!strncmp("from", ptr, 4)) { return NULL; } } if (!strncmp("class", ptr, 5)) { ptr += 5 ; *is_class = TRUE; ptr = skipSpace(ptr); return ptr; } /* limit so that we don't pick off "int item=obj()" */ while (*ptr && loopCount++ < 2) { while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++; if (!*ptr || *ptr == '=') return NULL; if (*ptr == '(') { return lastStart; /* if we stopped on a '(' we are done */ } ptr = skipSpace(ptr); lastStart = ptr; while (*lastStart == '*') lastStart++; /* cdef int *identifier */ } return NULL; } /* checks if there is a lambda at position of cp, and return its argument list * if so. * We don't return the lambda name since it is useless for now since we already * know it when we call this function, and it would be a little slower. */ static boolean varIsLambda (const char *cp, char **arglist) { boolean is_lambda = FALSE; cp = skipSpace (cp); cp = skipIdentifier (cp); /* skip the lambda's name */ cp = skipSpace (cp); if (*cp == '=') { cp++; cp = skipSpace (cp); if (strncmp (cp, "lambda", 6) == 0) { const char *tmp; cp += 6; /* skip the lambda */ tmp = skipSpace (cp); /* check if there is a space after lambda to detect assignations * starting with 'lambdaXXX' */ if (tmp != cp) { vString *args = vStringNew (); cp = tmp; vStringPut (args, '('); for (; *cp != 0 && *cp != ':'; cp++) vStringPut (args, *cp); vStringPut (args, ')'); vStringTerminate (args); if (arglist) *arglist = strdup (vStringValue (args)); vStringDelete (args); is_lambda = TRUE; } } } return is_lambda; } static void findPythonTags (void) { vString *const continuation = vStringNew (); vString *const name = vStringNew (); vString *const parent = vStringNew(); NestingLevels *const nesting_levels = nestingLevelsNew(); const char *line; int line_skip = 0; char const *longStringLiteral = NULL; while ((line = (const char *) fileReadLine ()) != NULL) { const char *cp = line, *candidate; char const *longstring; char const *keyword, *variable; int indent; cp = skipSpace (cp); if (*cp == '\0') /* skip blank line */ continue; /* Skip comment if we are not inside a multi-line string. */ if (*cp == '#' && !longStringLiteral) continue; /* Deal with line continuation. */ if (!line_skip) vStringClear(continuation); vStringCatS(continuation, line); vStringStripTrailing(continuation); if (vStringLast(continuation) == '\\') { vStringChop(continuation); vStringCatS(continuation, " "); line_skip = 1; continue; } cp = line = vStringValue(continuation); cp = skipSpace (cp); indent = cp - line; line_skip = 0; checkParent(nesting_levels, indent, parent); /* Deal with multiline string ending. */ if (longStringLiteral) { find_triple_end(cp, &longStringLiteral); continue; } /* Deal with multiline string start. */ longstring = find_triple_start(cp, &longStringLiteral); if (longstring) { longstring += 3; find_triple_end(longstring, &longStringLiteral); /* We don't parse for any tags in the rest of the line. */ continue; } /* Deal with def and class keywords. */ keyword = findDefinitionOrClass (cp); if (keyword) { boolean found = FALSE; boolean is_class = FALSE; if (!strncmp (keyword, "def ", 4)) { cp = skipSpace (keyword + 3); found = TRUE; } else if (!strncmp (keyword, "class ", 6)) { cp = skipSpace (keyword + 5); found = TRUE; is_class = TRUE; } else if (!strncmp (keyword, "cdef ", 5)) { cp = skipSpace(keyword + 4); candidate = skipTypeDecl (cp, &is_class); if (candidate) { found = TRUE; cp = candidate; } } else if (!strncmp (keyword, "cpdef ", 6)) { cp = skipSpace(keyword + 5); candidate = skipTypeDecl (cp, &is_class); if (candidate) { found = TRUE; cp = candidate; } } if (found) { boolean is_parent_class; is_parent_class = constructParentString(nesting_levels, indent, parent); if (is_class) parseClass (cp, name, parent, is_parent_class); else parseFunction(cp, name, parent, is_parent_class); addNestingLevel(nesting_levels, indent, name, is_class); } } /* Find global and class variables */ variable = findVariable(line); if (variable) { const char *start = variable; char *arglist; boolean parent_is_class; vStringClear (name); while (isIdentifierCharacter ((int) *start)) { vStringPut (name, (int) *start); ++start; } vStringTerminate (name); parent_is_class = constructParentString(nesting_levels, indent, parent); if (varIsLambda (variable, &arglist)) { /* show class members or top-level script lambdas only */ if (parent_is_class || vStringLength(parent) == 0) makeFunctionTag (name, parent, parent_is_class, arglist); if (arglist != NULL) eFree (arglist); } else { /* skip variables in methods */ if (! parent_is_class && vStringLength(parent) > 0) continue; makeVariableTag (name, parent); } } /* Find and parse imports */ parseImports(line); } /* Clean up all memory we allocated. */ vStringDelete (parent); vStringDelete (name); vStringDelete (continuation); nestingLevelsFree (nesting_levels); } extern parserDefinition *PythonParser (void) { static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL }; parserDefinition *def = parserNew ("Python"); def->kinds = PythonKinds; def->kindCount = KIND_COUNT (PythonKinds); def->extensions = extensions; def->parser = findPythonTags; return def; } /* vi:set tabstop=4 shiftwidth=4: */