diff --git a/ChangeLog b/ChangeLog index 5ff62531..5b070c6a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,13 @@ from this feature. * src/document.c: Don't open zero byte sized files read-only (e.g. files in /proc). + * src/symbols.c, tagmanager/fortran.c, tagmanager/keyword.c, + tagmanager/js.c, tagmanager/asm.c, tagmanager/c.c: + Backport js.c and asm.c from CTags SVN. + Fix Assembler tagmanager support and add appropriate symbol types. + Fix JavaScript parse bug (#1895242). + Change default return value of lookupKeyword() in keyword.c as it was + done in CTags SVN(r339) and adjust affected parsers. 2008-03-03 Enrico Tröger diff --git a/src/symbols.c b/src/symbols.c index 97b1daeb..f04c28b2 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -718,6 +718,16 @@ static void init_tag_list(gint idx) NULL); break; } + case GEANY_FILETYPES_ASM: + { + tag_list_add_groups(tag_store, + &(tv_iters.tag_namespace), _("Labels"), "classviewer-namespace", + &(tv_iters.tag_function), _("Macros"), "classviewer-method", + &(tv_iters.tag_macro), _("Defines"), "classviewer-macro", + &(tv_iters.tag_struct), _("Types"), "classviewer-struct", + NULL); + break; + } case GEANY_FILETYPES_D: default: { diff --git a/tagmanager/asm.c b/tagmanager/asm.c index a820c3b7..f1dc39bc 100644 --- a/tagmanager/asm.c +++ b/tagmanager/asm.c @@ -1,6 +1,7 @@ /* +* $Id$ * -* Copyright (c) 2000-2001, Darren Hiebert +* Copyright (c) 2000-2003, Darren Hiebert * * This source code is released for free distribution under the terms of the * GNU General Public License. @@ -12,94 +13,374 @@ /* * INCLUDE FILES */ -#include "general.h" /* must always come first */ +#include "general.h" /* must always come first */ #include +#include "keyword.h" #include "parse.h" #include "read.h" +#include "main.h" #include "vstring.h" +/* +* DATA DECLARATIONS +*/ +typedef enum { + K_NONE = -1, K_DEFINE, K_LABEL, K_MACRO, K_TYPE +} AsmKind; + +typedef enum { + OP_UNDEFINED = -1, + OP_ALIGN, + OP_COLON_EQUAL, + OP_END, + OP_ENDM, + OP_ENDMACRO, + OP_ENDP, + OP_ENDS, + OP_EQU, + OP_EQUAL, + OP_LABEL, + OP_MACRO, + OP_PROC, + OP_RECORD, + OP_SECTIONS, + OP_SET, + OP_STRUCT, + OP_LAST +} opKeyword; + +typedef struct { + const char *operator; + opKeyword keyword; +} asmKeyword; + +typedef struct { + opKeyword keyword; + AsmKind kind; +} opKind; + /* * DATA DEFINITIONS */ -typedef enum { - K_DEFINE, K_LABEL, K_MACRO -} asmKind; +static langType Lang_asm; -/* indexed by asmKind */ static kindOption AsmKinds [] = { - { TRUE, 'd', "define", "defines (names assigned a specified value)"}, - { TRUE, 'l', "label", "labels (names assigned an address)"}, - { TRUE, 'm', "macro", "macros"} + { TRUE, 'd', "macro", "defines" }, + { TRUE, 'l', "namespace", "labels" }, + { TRUE, 'm', "function", "macros" }, + { TRUE, 't', "struct", "types (structs and records)" } +}; + +static const asmKeyword AsmKeywords [] = { + { "align", OP_ALIGN }, + { "endmacro", OP_ENDMACRO }, + { "endm", OP_ENDM }, + { "end", OP_END }, + { "endp", OP_ENDP }, + { "ends", OP_ENDS }, + { "equ", OP_EQU }, + { "label", OP_LABEL }, + { "macro", OP_MACRO }, + { ":=", OP_COLON_EQUAL }, + { "=", OP_EQUAL }, + { "proc", OP_PROC }, + { "record", OP_RECORD }, + { "sections", OP_SECTIONS }, + { "set", OP_SET }, + { "struct", OP_STRUCT } +}; + +static const opKind OpKinds [] = { + /* must be ordered same as opKeyword enumeration */ + { OP_ALIGN, K_NONE }, + { OP_COLON_EQUAL, K_DEFINE }, + { OP_END, K_NONE }, + { OP_ENDM, K_NONE }, + { OP_ENDMACRO, K_NONE }, + { OP_ENDP, K_NONE }, + { OP_ENDS, K_NONE }, + { OP_EQU, K_DEFINE }, + { OP_EQUAL, K_DEFINE }, + { OP_LABEL, K_LABEL }, + { OP_MACRO, K_MACRO }, + { OP_PROC, K_LABEL }, + { OP_RECORD, K_TYPE }, + { OP_SECTIONS, K_NONE }, + { OP_SET, K_DEFINE }, + { OP_STRUCT, K_TYPE } }; /* * FUNCTION DEFINITIONS */ - -/* Algorithm adapted from from GNU etags. - * By Bob Weiner, Motorola Inc., 4/3/94 - * Unix and microcontroller assembly tag handling - * look for '^ [a-zA-Z_.$] [a-zA_Z0-9_.$]*[: ^I^J]' - */ -static void findAsmTags (void) +static void buildAsmKeywordHash (void) { - vString *name = vStringNew (); - const unsigned char *line; - - while ((line = fileReadLine ()) != NULL) - { - const unsigned char *cp = line; - int c = *cp; - - /* If first char is alphabetic or one of [_.$], test for colon - * following identifier. - */ - if (isalpha (c) || c == '_' || c == '.' || c == '$') + const size_t count = sizeof (AsmKeywords) / sizeof (AsmKeywords [0]); + size_t i; + for (i = 0 ; i < count ; ++i) { - vStringPut (name, c); - c = *++cp; - while (isalnum (c) || c == '_' || c == '.' || c == '$') - { - vStringPut (name, c); - c = *++cp; - } - vStringTerminate (name); - while (isspace (c)) - c = *++cp; - if (c == ':') - makeSimpleTag (name, AsmKinds, K_LABEL); - else if (c == '=' || - strncmp ((const char*) cp, "equ", (size_t) 3) == 0) - makeSimpleTag (name, AsmKinds, K_DEFINE); - else if (strcmp (vStringValue (name), ".macro") == 0) - { + const asmKeyword* const p = AsmKeywords + i; + addKeyword (p->operator, Lang_asm, (int) p->keyword); + } +} + +static opKeyword analyzeOperator (const vString *const op) +{ + vString *keyword = vStringNew (); + opKeyword result; + + vStringCopyToLower (keyword, op); + result = (opKeyword) lookupKeyword (vStringValue (keyword), Lang_asm); + vStringDelete (keyword); + return result; +} + +static boolean isInitialSymbolCharacter (int c) +{ + return (boolean) (c != '\0' && (isalpha (c) || strchr ("_$", c) != NULL)); +} + +static boolean isSymbolCharacter (int c) +{ + /* '?' character is allowed in AMD 29K family */ + return (boolean) (c != '\0' && (isalnum (c) || strchr ("_$?", c) != NULL)); +} + +static boolean readPreProc (const unsigned char *const line) +{ + boolean result; + const unsigned char *cp = line; + vString *name = vStringNew (); + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (name, *cp); + ++cp; + } + vStringTerminate (name); + result = (boolean) (strcmp (vStringValue (name), "define") == 0); + if (result) + { + while (isspace ((int) *cp)) + ++cp; vStringClear (name); - while (isalnum (c) || c == '_') + while (isSymbolCharacter ((int) *cp)) { - vStringPut (name, c); - c = *++cp; + vStringPut (name, *cp); + ++cp; } vStringTerminate (name); - if (vStringLength (name) > 0) - makeSimpleTag (name, AsmKinds, K_MACRO); - } - vStringClear (name); + makeSimpleTag (name, AsmKinds, K_DEFINE); } - } - vStringDelete (name); + vStringDelete (name); + return result; +} + +static AsmKind operatorKind ( + const vString *const operator, + boolean *const found) +{ + AsmKind result = K_NONE; + const opKeyword kw = analyzeOperator (operator); + *found = (boolean) (kw != OP_UNDEFINED); + if (*found) + { + result = OpKinds [kw].kind; + Assert (OpKinds [kw].keyword == kw); + } + return result; +} + +/* We must check for "DB", "DB.L", "DCB.W" (68000) + */ +static boolean isDefineOperator (const vString *const operator) +{ + const unsigned char *const op = + (unsigned char*) vStringValue (operator); + const size_t length = vStringLength (operator); + const boolean result = (boolean) (length > 0 && + toupper ((int) *op) == 'D' && + (length == 2 || + (length == 4 && (int) op [2] == '.') || + (length == 5 && (int) op [3] == '.'))); + return result; +} + +static void makeAsmTag ( + const vString *const name, + const vString *const operator, + const boolean labelCandidate, + const boolean nameFollows) +{ + if (vStringLength (name) > 0) + { + boolean found; + const AsmKind kind = operatorKind (operator, &found); + if (found) + { + if (kind != K_NONE) + makeSimpleTag (name, AsmKinds, kind); + } + else if (isDefineOperator (operator)) + { + if (! nameFollows) + makeSimpleTag (name, AsmKinds, K_DEFINE); + } + else if (labelCandidate) + { + operatorKind (name, &found); + if (! found) + makeSimpleTag (name, AsmKinds, K_LABEL); + } + } +} + +static const unsigned char *readSymbol ( + const unsigned char *const start, + vString *const sym) +{ + const unsigned char *cp = start; + vStringClear (sym); + if (isInitialSymbolCharacter ((int) *cp)) + { + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (sym, *cp); + ++cp; + } + vStringTerminate (sym); + } + return cp; +} + +static const unsigned char *readOperator ( + const unsigned char *const start, + vString *const operator) +{ + const unsigned char *cp = start; + vStringClear (operator); + while (*cp != '\0' && ! isspace ((int) *cp)) + { + vStringPut (operator, *cp); + ++cp; + } + vStringTerminate (operator); + return cp; +} + +static void findAsmTags (void) +{ + vString *name = vStringNew (); + vString *operator = vStringNew (); + const unsigned char *line; + boolean inCComment = FALSE; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + boolean labelCandidate = (boolean) (! isspace ((int) *cp)); + boolean nameFollows = FALSE; + const boolean isComment = (boolean) + (*cp != '\0' && strchr (";*@", *cp) != NULL); + + /* skip comments */ + if (strncmp ((const char*) cp, "/*", (size_t) 2) == 0) + { + inCComment = TRUE; + cp += 2; + } + if (inCComment) + { + do + { + if (strncmp ((const char*) cp, "*/", (size_t) 2) == 0) + { + inCComment = FALSE; + cp += 2; + break; + } + ++cp; + } while (*cp != '\0'); + } + if (isComment || inCComment) + continue; + + /* read preprocessor defines */ + if (*cp == '#') + { + ++cp; + readPreProc (cp); + continue; + } + + /* skip white space */ + while (isspace ((int) *cp)) + ++cp; + + /* read symbol */ + cp = readSymbol (cp, name); + if (vStringLength (name) > 0 && *cp == ':') + { + labelCandidate = TRUE; + ++cp; + } + + if (! isspace ((int) *cp) && *cp != '\0') + continue; + + /* skip white space */ + while (isspace ((int) *cp)) + ++cp; + + /* skip leading dot */ +#if 0 + if (*cp == '.') + ++cp; +#endif + + cp = readOperator (cp, operator); + + /* attempt second read of symbol */ + if (vStringLength (name) == 0) + { + while (isspace ((int) *cp)) + ++cp; + cp = readSymbol (cp, name); + nameFollows = TRUE; + } + makeAsmTag (name, operator, labelCandidate, nameFollows); + } + vStringDelete (name); + vStringDelete (operator); +} + +static void initialize (const langType language) +{ + Lang_asm = language; + buildAsmKeywordHash (); } extern parserDefinition* AsmParser (void) { - static const char *const extensions [] = { "asm", "s", "S", NULL }; - parserDefinition* def = parserNew ("ASM"); - def->kinds = AsmKinds; - def->kindCount = KIND_COUNT (AsmKinds); - def->extensions = extensions; - def->parser = findAsmTags; - return def; + static const char *const extensions [] = { + "asm", "ASM", "s", "S", NULL + }; + static const char *const patterns [] = { + "*.A51", + "*.29[kK]", + "*.[68][68][kKsSxX]", + "*.[xX][68][68]", + NULL + }; + parserDefinition* def = parserNew ("Asm"); + def->kinds = AsmKinds; + def->kindCount = KIND_COUNT (AsmKinds); + def->extensions = extensions; + def->patterns = patterns; + def->parser = findAsmTags; + def->initialize = initialize; + return def; } -/* vi:set tabstop=8 shiftwidth=4: */ +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/tagmanager/c.c b/tagmanager/c.c index d752ca7c..6c0d511e 100644 --- a/tagmanager/c.c +++ b/tagmanager/c.c @@ -54,7 +54,7 @@ typedef enum eException { /* Used to specify type of keyword. */ typedef enum eKeywordId { - KEYWORD_NONE, + KEYWORD_NONE = -1, KEYWORD_ATTRIBUTE, KEYWORD_ABSTRACT, KEYWORD_BOOLEAN, KEYWORD_BYTE, KEYWORD_BAD_STATE, KEYWORD_BAD_TRANS, KEYWORD_BIND, KEYWORD_BIND_VAR, KEYWORD_BIT, diff --git a/tagmanager/fortran.c b/tagmanager/fortran.c index 10c3f640..f49d7f98 100644 --- a/tagmanager/fortran.c +++ b/tagmanager/fortran.c @@ -58,7 +58,7 @@ typedef enum eFortranLineType { /* Used to specify type of keyword. */ typedef enum eKeywordId { - KEYWORD_NONE, + KEYWORD_NONE = -1, KEYWORD_allocatable, KEYWORD_assignment, KEYWORD_block, diff --git a/tagmanager/js.c b/tagmanager/js.c index 4a308f1b..443c7a96 100644 --- a/tagmanager/js.c +++ b/tagmanager/js.c @@ -1,4 +1,6 @@ /* + * $Id$ + * * Copyright (c) 2003, Darren Hiebert * * This source code is released for free distribution under the terms of the @@ -9,6 +11,8 @@ * * This is a good reference for different forms of the function statement: * http://www.permadi.com/tutorial/jsFunc/ + * Another good reference: + * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide */ /* @@ -21,11 +25,10 @@ #include #endif -#include "main.h" -#include "entry.h" #include "keyword.h" #include "parse.h" #include "read.h" +#include "main.h" #include "vstring.h" /* @@ -52,6 +55,8 @@ typedef enum eKeywordId { KEYWORD_NONE = -1, KEYWORD_function, KEYWORD_capital_function, + KEYWORD_object, + KEYWORD_capital_object, KEYWORD_prototype, KEYWORD_var, KEYWORD_new, @@ -61,7 +66,10 @@ typedef enum eKeywordId { KEYWORD_do, KEYWORD_if, KEYWORD_else, - KEYWORD_switch + KEYWORD_switch, + KEYWORD_try, + KEYWORD_catch, + KEYWORD_finally } keywordId; /* Used to determine whether keyword is valid for the token language and @@ -88,17 +96,20 @@ typedef enum eTokenType { TOKEN_OPEN_CURLY, TOKEN_CLOSE_CURLY, TOKEN_EQUAL_SIGN, - TOKEN_FORWARD_SLASH + TOKEN_FORWARD_SLASH, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE } tokenType; typedef struct sTokenInfo { - tokenType type; - keywordId keyword; - vString * string; - vString * scope; - unsigned long lineNumber; - fpos_t filePosition; - int nestLevel; + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; + int nestLevel; + boolean ignoreTag; } tokenInfo; /* @@ -113,21 +124,25 @@ typedef enum { JSTAG_FUNCTION, JSTAG_CLASS, JSTAG_METHOD, + JSTAG_PROPERTY, JSTAG_VARIABLE, JSTAG_COUNT } jsKind; static kindOption JsKinds [] = { - { TRUE, 'f', "function", "functions" }, + { TRUE, 'f', "function", "functions" }, { TRUE, 'c', "class", "classes" }, { TRUE, 'm', "method", "methods" }, - { TRUE, 'v', "variable", "global variables" } + { TRUE, 'p', "property", "properties" }, + { TRUE, 'v', "variable", "global variables" } }; static const keywordDesc JsKeywordTable [] = { /* keyword keyword ID */ { "function", KEYWORD_function }, { "Function", KEYWORD_capital_function }, + { "object", KEYWORD_object }, + { "Object", KEYWORD_capital_object }, { "prototype", KEYWORD_prototype }, { "var", KEYWORD_var }, { "new", KEYWORD_new }, @@ -137,7 +152,10 @@ static const keywordDesc JsKeywordTable [] = { { "do", KEYWORD_do }, { "if", KEYWORD_if }, { "else", KEYWORD_else }, - { "switch", KEYWORD_switch } + { "switch", KEYWORD_switch }, + { "try", KEYWORD_try }, + { "catch", KEYWORD_catch }, + { "finally", KEYWORD_finally } }; /* @@ -149,18 +167,6 @@ static void parseFunction (tokenInfo *const token); static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent); static boolean parseLine (tokenInfo *const token, boolean is_inside_class); -static boolean isIdentChar1 (const int c) -{ - /* - * Other databases are less restrictive on the first character of - * an identifier. - * isIdentChar1 is used to identify the first character of an - * identifier, so we are removing some restrictions. - */ - return (boolean) - (isalpha (c) || c == '@' || c == '_' ); -} - static boolean isIdentChar (const int c) { return (boolean) @@ -189,6 +195,9 @@ static tokenInfo *newToken (void) token->string = vStringNew (); token->scope = vStringNew (); token->nestLevel = 0; + token->ignoreTag = FALSE; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); return token; } @@ -206,7 +215,7 @@ static void deleteToken (tokenInfo *const token) static void makeConstTag (tokenInfo *const token, const jsKind kind) { - if (JsKinds [kind].enabled) + if (JsKinds [kind].enabled && ! token->ignoreTag ) { const char *const name = vStringValue (token->string); tagEntryInfo e; @@ -225,7 +234,7 @@ static void makeJsTag (tokenInfo *const token, const jsKind kind) { vString * fulltag; - if (JsKinds [kind].enabled) + if (JsKinds [kind].enabled && ! token->ignoreTag ) { /* * If a scope has been added to the token, change the token @@ -247,19 +256,25 @@ static void makeJsTag (tokenInfo *const token, const jsKind kind) static void makeClassTag (tokenInfo *const token) { - if ( ! stringListHas(ClassNames, vStringValue (token->string)) ) + if ( ! token->ignoreTag ) { - stringListAdd (ClassNames, vStringNewCopy (token->string)); - makeJsTag (token, JSTAG_CLASS); + if ( ! stringListHas(ClassNames, vStringValue (token->string)) ) + { + stringListAdd (ClassNames, vStringNewCopy (token->string)); + makeJsTag (token, JSTAG_CLASS); + } } } static void makeFunctionTag (tokenInfo *const token) { - if ( ! stringListHas(FunctionNames, vStringValue (token->string)) ) + if ( ! token->ignoreTag ) { - stringListAdd (FunctionNames, vStringNewCopy (token->string)); - makeJsTag (token, JSTAG_FUNCTION); + if ( ! stringListHas(FunctionNames, vStringValue (token->string)) ) + { + stringListAdd (FunctionNames, vStringNewCopy (token->string)); + makeJsTag (token, JSTAG_FUNCTION); + } } } @@ -280,12 +295,16 @@ static int skipToCharacter (const int c) static void parseString (vString *const string, const int delimiter) { boolean end = FALSE; - int c; while (! end) { - c = fileGetc (); + int c = fileGetc (); if (c == EOF) end = TRUE; + else if (c == '\\') + { + c = fileGetc(); /* This maybe a ' or ". */ + vStringPut(string, c); + } else if (c == delimiter) end = TRUE; else @@ -300,7 +319,7 @@ static void parseString (vString *const string, const int delimiter) static void parseIdentifier (vString *const string, const int firstChar) { int c = firstChar; - Assert (isIdentChar1 (c)); + Assert (isIdentChar (c)); do { vStringPut (string, c); @@ -313,11 +332,12 @@ static void parseIdentifier (vString *const string, const int firstChar) static keywordId analyzeToken (vString *const name) { - static vString *keyword = NULL; - if (keyword == NULL) - keyword = vStringNew (); + vString *keyword = vStringNew (); + keywordId result; vStringCopyToLower (keyword, name); - return (keywordId) lookupKeyword (vStringValue (keyword), Lang_js); + result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js); + vStringDelete (keyword); + return result; } static void readToken (tokenInfo *const token) @@ -332,26 +352,25 @@ getNextChar: do { c = fileGetc (); - /* - * Added " to the list of ignores, not sure what this - * might break but it gets by this issue: - * create table "t1" (...) - */ + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; - case '(': token->type = TOKEN_OPEN_PAREN; break; - case ')': token->type = TOKEN_CLOSE_PAREN; break; - case ';': token->type = TOKEN_SEMICOLON; break; - case ',': token->type = TOKEN_COMMA; break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; - case ':': token->type = TOKEN_COLON; break; - case '{': token->type = TOKEN_OPEN_CURLY; break; - case '}': token->type = TOKEN_CLOSE_CURLY; break; + case ':': token->type = TOKEN_COLON; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '\'': case '"': @@ -361,6 +380,15 @@ getNextChar: token->filePosition = getInputFilePosition (); break; + case '\\': + c = fileGetc (); + if (c != '\\' && c != '"' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_CHARACTER; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + case '/': { int d = fileGetc (); @@ -395,7 +423,7 @@ getNextChar: } default: - if (! isIdentChar1 (c)) + if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { @@ -460,6 +488,38 @@ static void skipArgumentList (tokenInfo *const token) } } +static void skipArrayList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Handle square brackets + * var name[1] + * So we must check for nested open and closing square brackets + */ + + if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_SQUARE)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + static void addContext (tokenInfo* const parent, const tokenInfo* const child) { if (vStringLength (parent->string) > 0) @@ -498,6 +558,10 @@ static void findCmdTerm (tokenInfo *const token) { parseBlock (token, token); } + else if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } else { readToken (token); @@ -632,21 +696,39 @@ static void parseLoop (tokenInfo *const token) } } -static void parseIf (tokenInfo *const token) +static boolean parseIf (tokenInfo *const token) { + boolean read_next_token = TRUE; /* * If statements have two forms * if ( ... ) * one line; * + * if ( ... ) + * statement; + * else + * statement + * * if ( ... ) { * multiple; * statements; * } * + * * if ( ... ) { * return elem * } + * + * This example if correctly written, but the + * else contains only 1 statement without a terminator + * since the function finishes with the closing brace. + * + * function a(flag){ + * if(flag) + * test(1); + * else + * test(2) + * } * * TODO: Deal with statements that can optional end * without a semi-colon. Currently this messes up @@ -681,7 +763,39 @@ static void parseIf (tokenInfo *const token) else { findCmdTerm (token); + + /* + * The IF could be followed by an ELSE statement. + * This too could have two formats, a curly braced + * multiline section, or another single line. + */ + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + readToken (token); + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + if (isKeyword (token, KEYWORD_else)) + read_next_token = parseIf (token); + } + } } + return read_next_token; } static void parseFunction (tokenInfo *const token) @@ -756,20 +870,19 @@ static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent) if (isKeyword (token, KEYWORD_this)) { /* - * Then we are inside a class and we have found + * Means we are inside a class and have found * a class, not a function */ is_class = TRUE; vStringCopy(saveScope, token->scope); addToScope (token, parent->string); - /* Move past this */ - readToken(token); - - /* Move past a potential . */ - if ( isType (token, TOKEN_PERIOD) ) - readToken(token); + /* + * Ignore the remainder of the line + * findCmdTerm(token); + */ parseLine (token, is_class); + vStringCopy(token->scope, saveScope); } else if (isKeyword (token, KEYWORD_var)) @@ -832,13 +945,15 @@ static void parseMethods (tokenInfo *const token, tokenInfo *const class) /* * This deals with these formats - * 'validMethod' : function(a,b) {} + * validProperty : 2, + * validMethod : function(a,b) {} + * 'validMethod2' : function(a,b) {} */ do { readToken (token); - if (isType (token, TOKEN_STRING)) + if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE)) { copyToken(name, token); @@ -862,11 +977,22 @@ static void parseMethods (tokenInfo *const token, tokenInfo *const class) /* * Read to the closing curly, check next - * token, if comma, we must loop again + * token, if a comma, we must loop again */ readToken (token); } } + else + { + addToScope (name, class->string); + makeJsTag (name, JSTAG_PROPERTY); + + /* + * Read the next token, if a comma + * we must loop again + */ + readToken (token); + } } } } while ( isType(token, TOKEN_COMMA) ); @@ -884,6 +1010,7 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) boolean is_class = FALSE; boolean is_terminated = TRUE; boolean is_global = FALSE; + boolean is_prototype = FALSE; vStringClear(saveScope); /* @@ -905,7 +1032,13 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) * 'validMethodOne' : function(a,b) {}, * 'validMethodTwo' : function(a,b) {} * } - * Database.prototype.getTodaysDate = Database_getTodaysDate; + * ValidClassTwo = function () + * { + * this.validMethodThree = function() {} + * // unnamed method + * this.validMethodFour = () {} + * } + * Database.prototype.validMethodThree = Database_getTodaysDate; */ if ( is_inside_class ) @@ -925,41 +1058,176 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) readToken(token); } + if ( isKeyword(token, KEYWORD_this) ) + { + readToken(token); + if (isType (token, TOKEN_PERIOD)) + { + readToken(token); + } + } + copyToken(name, token); - /* Potentially the name of the function */ - readToken (token); - if (isType (token, TOKEN_PERIOD)) + while (! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_SEMICOLON) && + ! isType (token, TOKEN_EQUAL_SIGN) ) + { + /* Potentially the name of the function */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Cannot be a global variable is it has dot references in the name + */ + is_global = FALSE; + do + { + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + if ( is_class ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + } + else + addContext (name, token); + } + else if ( isKeyword(token, KEYWORD_prototype) ) + { + /* + * When we reach the "prototype" tag, we infer: + * "BindAgent" is a class + * "build" is a method + * + * function BindAgent( repeatableIdName, newParentIdName ) { + * } + * + * CASE 1 + * Specified function name: "build" + * BindAgent.prototype.build = function( mode ) { + * ignore everything within this function + * } + * + * CASE 2 + * Prototype listing + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * + */ + makeClassTag (name); + is_class = TRUE; + is_prototype = TRUE; + + /* + * There should a ".function_name" next. + */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Handle CASE 1 + */ + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + + makeJsTag (token, JSTAG_METHOD); + /* + * We can read until the end of the block / statement. + * We need to correctly parse any nested blocks, but + * we do NOT want to create any tags based on what is + * within the blocks. + */ + token->ignoreTag = TRUE; + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + else if (isType (token, TOKEN_EQUAL_SIGN)) + { + readToken (token); + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Handle CASE 2 + * + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + } + readToken (token); + } while (isType (token, TOKEN_PERIOD)); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_OPEN_SQUARE) ) + skipArrayList(token); + + /* + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + is_class = parseBlock (token, name); + } + */ + } + + if ( isType (token, TOKEN_CLOSE_CURLY) ) { /* - * Cannot be a global variable is it has dot references in the name + * Reaching this section without having + * processed an open curly brace indicates + * the statement is most likely not terminated. */ - is_global = FALSE; - do - { - readToken (token); - if ( isKeyword(token, KEYWORD_NONE) ) - { - if ( is_class ) - { - vStringCopy(saveScope, token->scope); - addToScope(token, name->string); - makeJsTag (token, JSTAG_METHOD); + is_terminated = FALSE; + goto cleanUp; + } - /* Find to the end of the statement */ - findCmdTerm (token); - goto cleanUp; - } - else - addContext (name, token); - } - else if ( isKeyword(token, KEYWORD_prototype) ) - { - makeClassTag (name); - is_class = TRUE; - } - readToken (token); - } while (isType (token, TOKEN_PERIOD)); + if ( isType (token, TOKEN_SEMICOLON) ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * Handles this syntax: + * var g_var2; + */ + if (isType (token, TOKEN_SEMICOLON)) + makeJsTag (name, JSTAG_VARIABLE); + } + /* + * Statement has ended. + * This deals with calls to functions, like: + * alert(..); + */ + goto cleanUp; } if ( isType (token, TOKEN_EQUAL_SIGN) ) @@ -975,14 +1243,14 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) { /* * Functions of this format: - * var D2A=function theAdd(a, b) + * var D2A = function theAdd(a, b) * { * return a+b; * } - * Are really two separately defined functions and + * Are really two separate defined functions and * can be referenced in two ways: - * alert(D2A(1,2)); // produces 3 - * alert(theAdd(1,2)); // also produces 3 + * alert( D2A(1,2) ); // produces 3 + * alert( theAdd(1,2) ); // also produces 3 * So it must have two tags: * D2A * theAdd @@ -1022,24 +1290,26 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) if ( vStringLength(secondary_name->string) > 0 ) makeFunctionTag (secondary_name); + + /* + * Find to the end of the statement + */ + goto cleanUp; } } } else if (isType (token, TOKEN_OPEN_PAREN)) { /* - * Handle nameless functions, these will only - * be considered methods. + * Handle nameless functions + * this.method_name = () {} */ skipArgumentList(token); if (isType (token, TOKEN_OPEN_CURLY)) { /* - * This will be either a function or a class. - * We can only determine this by checking the body - * of the function. If we find a "this." we know - * it is a class, otherwise it is a function. + * Nameless functions are only setup as methods. */ makeJsTag (name, JSTAG_METHOD); parseBlock (token, name); @@ -1047,20 +1317,40 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) } else if (isType (token, TOKEN_OPEN_CURLY)) { + /* + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ parseMethods(token, name); } else if (isKeyword (token, KEYWORD_new)) { readToken (token); if ( isKeyword (token, KEYWORD_function) || - isKeyword (token, KEYWORD_capital_function) ) + isKeyword (token, KEYWORD_capital_function) || + isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) { + if ( isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + is_class = TRUE; + readToken (token); if ( isType (token, TOKEN_OPEN_PAREN) ) skipArgumentList(token); if (isType (token, TOKEN_SEMICOLON)) - makeFunctionTag (name); + { + if ( is_class ) + { + makeClassTag (name); + } else { + makeFunctionTag (name); + } + } } } else if (isKeyword (token, KEYWORD_NONE)) @@ -1086,28 +1376,13 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) if ( ! stringListHas(FunctionNames, vStringValue (token->string)) && ! stringListHas(ClassNames, vStringValue (token->string)) ) { - readToken (token); + findCmdTerm (token); if (isType (token, TOKEN_SEMICOLON)) makeJsTag (name, JSTAG_VARIABLE); } } } } - else - { - /* - * Only create variables for global scope - */ - if ( token->nestLevel == 0 && is_global ) - { - /* - * Handles this syntax: - * var g_var2; - */ - if (isType (token, TOKEN_SEMICOLON)) - makeJsTag (name, JSTAG_VARIABLE); - } - } findCmdTerm (token); /* @@ -1159,7 +1434,11 @@ static boolean parseLine (tokenInfo *const token, boolean is_inside_class) break; case KEYWORD_if: case KEYWORD_else: - parseIf (token); + case KEYWORD_try: + case KEYWORD_catch: + case KEYWORD_finally: + /* Common semantics */ + is_terminated = parseIf (token); break; case KEYWORD_switch: parseSwitch (token); @@ -1173,7 +1452,7 @@ static boolean parseLine (tokenInfo *const token, boolean is_inside_class) { /* * Special case where single line statements may not be - * SEMICOLON termianted. parseBlock needs to know this + * SEMICOLON terminated. parseBlock needs to know this * so that it does not read the next token. */ is_terminated = parseStatement (token, is_inside_class); @@ -1212,10 +1491,12 @@ static void initialize (const langType language) static void findJsTags (void) { tokenInfo *const token = newToken (); - exception_t exception = (exception_t) (setjmp (Exception)); + exception_t exception; + ClassNames = stringListNew (); FunctionNames = stringListNew (); + exception = (exception_t) (setjmp (Exception)); while (exception == ExceptionNone) parseJsFile (token); @@ -1230,8 +1511,7 @@ static void findJsTags (void) extern parserDefinition* JavaScriptParser (void) { static const char *const extensions [] = { "js", NULL }; - - parserDefinition *const def = parserNew ("Javascript"); + parserDefinition *const def = parserNew ("JavaScript"); def->extensions = extensions; /* * New definitions for parsing instead of regex @@ -1243,5 +1523,4 @@ extern parserDefinition* JavaScriptParser (void) return def; } - /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/tagmanager/keyword.c b/tagmanager/keyword.c index 60884990..b59478b4 100644 --- a/tagmanager/keyword.c +++ b/tagmanager/keyword.c @@ -162,7 +162,7 @@ extern int lookupKeyword (const char *const string, langType language) { const unsigned long hashedValue = hashValue (string); hashEntry *entry = getHashTableEntry (hashedValue); - int value = 0; + int value = -1; while (entry != NULL) {