Backport js.c and asm.c from CTags SVN.

Fix Assembler tagmanager support and add appropriate symbol types.
Fix JavaScript parse bug (#1895242).
Change default return value of lookupKeyword() in keyword.c as it was done in CTags SVN(r339) and adjust affected parsers.


git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@2308 ea778897-0a13-0410-b9d1-a72fbfd435f5
This commit is contained in:
Enrico Tröger 2008-03-05 18:18:19 +00:00
parent 076066c7db
commit 9dd67b2669
7 changed files with 771 additions and 194 deletions

View File

@ -6,6 +6,13 @@
from this feature. from this feature.
* src/document.c: * src/document.c:
Don't open zero byte sized files read-only (e.g. files in /proc). Don't open zero byte sized files read-only (e.g. files in /proc).
* src/symbols.c, tagmanager/fortran.c, tagmanager/keyword.c,
tagmanager/js.c, tagmanager/asm.c, tagmanager/c.c:
Backport js.c and asm.c from CTags SVN.
Fix Assembler tagmanager support and add appropriate symbol types.
Fix JavaScript parse bug (#1895242).
Change default return value of lookupKeyword() in keyword.c as it was
done in CTags SVN(r339) and adjust affected parsers.
2008-03-03 Enrico Tröger <enrico(dot)troeger(at)uvena(dot)de> 2008-03-03 Enrico Tröger <enrico(dot)troeger(at)uvena(dot)de>

View File

@ -718,6 +718,16 @@ static void init_tag_list(gint idx)
NULL); NULL);
break; break;
} }
case GEANY_FILETYPES_ASM:
{
tag_list_add_groups(tag_store,
&(tv_iters.tag_namespace), _("Labels"), "classviewer-namespace",
&(tv_iters.tag_function), _("Macros"), "classviewer-method",
&(tv_iters.tag_macro), _("Defines"), "classviewer-macro",
&(tv_iters.tag_struct), _("Types"), "classviewer-struct",
NULL);
break;
}
case GEANY_FILETYPES_D: case GEANY_FILETYPES_D:
default: default:
{ {

View File

@ -1,6 +1,7 @@
/* /*
* $Id$
* *
* Copyright (c) 2000-2001, Darren Hiebert * Copyright (c) 2000-2003, Darren Hiebert
* *
* This source code is released for free distribution under the terms of the * This source code is released for free distribution under the terms of the
* GNU General Public License. * GNU General Public License.
@ -12,94 +13,374 @@
/* /*
* INCLUDE FILES * INCLUDE FILES
*/ */
#include "general.h" /* must always come first */ #include "general.h" /* must always come first */
#include <string.h> #include <string.h>
#include "keyword.h"
#include "parse.h" #include "parse.h"
#include "read.h" #include "read.h"
#include "main.h"
#include "vstring.h" #include "vstring.h"
/*
* DATA DECLARATIONS
*/
typedef enum {
K_NONE = -1, K_DEFINE, K_LABEL, K_MACRO, K_TYPE
} AsmKind;
typedef enum {
OP_UNDEFINED = -1,
OP_ALIGN,
OP_COLON_EQUAL,
OP_END,
OP_ENDM,
OP_ENDMACRO,
OP_ENDP,
OP_ENDS,
OP_EQU,
OP_EQUAL,
OP_LABEL,
OP_MACRO,
OP_PROC,
OP_RECORD,
OP_SECTIONS,
OP_SET,
OP_STRUCT,
OP_LAST
} opKeyword;
typedef struct {
const char *operator;
opKeyword keyword;
} asmKeyword;
typedef struct {
opKeyword keyword;
AsmKind kind;
} opKind;
/* /*
* DATA DEFINITIONS * DATA DEFINITIONS
*/ */
typedef enum { static langType Lang_asm;
K_DEFINE, K_LABEL, K_MACRO
} asmKind;
/* indexed by asmKind */
static kindOption AsmKinds [] = { static kindOption AsmKinds [] = {
{ TRUE, 'd', "define", "defines (names assigned a specified value)"}, { TRUE, 'd', "macro", "defines" },
{ TRUE, 'l', "label", "labels (names assigned an address)"}, { TRUE, 'l', "namespace", "labels" },
{ TRUE, 'm', "macro", "macros"} { TRUE, 'm', "function", "macros" },
{ TRUE, 't', "struct", "types (structs and records)" }
};
static const asmKeyword AsmKeywords [] = {
{ "align", OP_ALIGN },
{ "endmacro", OP_ENDMACRO },
{ "endm", OP_ENDM },
{ "end", OP_END },
{ "endp", OP_ENDP },
{ "ends", OP_ENDS },
{ "equ", OP_EQU },
{ "label", OP_LABEL },
{ "macro", OP_MACRO },
{ ":=", OP_COLON_EQUAL },
{ "=", OP_EQUAL },
{ "proc", OP_PROC },
{ "record", OP_RECORD },
{ "sections", OP_SECTIONS },
{ "set", OP_SET },
{ "struct", OP_STRUCT }
};
static const opKind OpKinds [] = {
/* must be ordered same as opKeyword enumeration */
{ OP_ALIGN, K_NONE },
{ OP_COLON_EQUAL, K_DEFINE },
{ OP_END, K_NONE },
{ OP_ENDM, K_NONE },
{ OP_ENDMACRO, K_NONE },
{ OP_ENDP, K_NONE },
{ OP_ENDS, K_NONE },
{ OP_EQU, K_DEFINE },
{ OP_EQUAL, K_DEFINE },
{ OP_LABEL, K_LABEL },
{ OP_MACRO, K_MACRO },
{ OP_PROC, K_LABEL },
{ OP_RECORD, K_TYPE },
{ OP_SECTIONS, K_NONE },
{ OP_SET, K_DEFINE },
{ OP_STRUCT, K_TYPE }
}; };
/* /*
* FUNCTION DEFINITIONS * FUNCTION DEFINITIONS
*/ */
static void buildAsmKeywordHash (void)
/* Algorithm adapted from from GNU etags.
* By Bob Weiner, Motorola Inc., 4/3/94
* Unix and microcontroller assembly tag handling
* look for '^ [a-zA-Z_.$] [a-zA_Z0-9_.$]*[: ^I^J]'
*/
static void findAsmTags (void)
{ {
vString *name = vStringNew (); const size_t count = sizeof (AsmKeywords) / sizeof (AsmKeywords [0]);
const unsigned char *line; size_t i;
for (i = 0 ; i < count ; ++i)
while ((line = fileReadLine ()) != NULL)
{
const unsigned char *cp = line;
int c = *cp;
/* If first char is alphabetic or one of [_.$], test for colon
* following identifier.
*/
if (isalpha (c) || c == '_' || c == '.' || c == '$')
{ {
vStringPut (name, c); const asmKeyword* const p = AsmKeywords + i;
c = *++cp; addKeyword (p->operator, Lang_asm, (int) p->keyword);
while (isalnum (c) || c == '_' || c == '.' || c == '$') }
{ }
vStringPut (name, c);
c = *++cp; static opKeyword analyzeOperator (const vString *const op)
} {
vStringTerminate (name); vString *keyword = vStringNew ();
while (isspace (c)) opKeyword result;
c = *++cp;
if (c == ':') vStringCopyToLower (keyword, op);
makeSimpleTag (name, AsmKinds, K_LABEL); result = (opKeyword) lookupKeyword (vStringValue (keyword), Lang_asm);
else if (c == '=' || vStringDelete (keyword);
strncmp ((const char*) cp, "equ", (size_t) 3) == 0) return result;
makeSimpleTag (name, AsmKinds, K_DEFINE); }
else if (strcmp (vStringValue (name), ".macro") == 0)
{ static boolean isInitialSymbolCharacter (int c)
{
return (boolean) (c != '\0' && (isalpha (c) || strchr ("_$", c) != NULL));
}
static boolean isSymbolCharacter (int c)
{
/* '?' character is allowed in AMD 29K family */
return (boolean) (c != '\0' && (isalnum (c) || strchr ("_$?", c) != NULL));
}
static boolean readPreProc (const unsigned char *const line)
{
boolean result;
const unsigned char *cp = line;
vString *name = vStringNew ();
while (isSymbolCharacter ((int) *cp))
{
vStringPut (name, *cp);
++cp;
}
vStringTerminate (name);
result = (boolean) (strcmp (vStringValue (name), "define") == 0);
if (result)
{
while (isspace ((int) *cp))
++cp;
vStringClear (name); vStringClear (name);
while (isalnum (c) || c == '_') while (isSymbolCharacter ((int) *cp))
{ {
vStringPut (name, c); vStringPut (name, *cp);
c = *++cp; ++cp;
} }
vStringTerminate (name); vStringTerminate (name);
if (vStringLength (name) > 0) makeSimpleTag (name, AsmKinds, K_DEFINE);
makeSimpleTag (name, AsmKinds, K_MACRO);
}
vStringClear (name);
} }
} vStringDelete (name);
vStringDelete (name); return result;
}
static AsmKind operatorKind (
const vString *const operator,
boolean *const found)
{
AsmKind result = K_NONE;
const opKeyword kw = analyzeOperator (operator);
*found = (boolean) (kw != OP_UNDEFINED);
if (*found)
{
result = OpKinds [kw].kind;
Assert (OpKinds [kw].keyword == kw);
}
return result;
}
/* We must check for "DB", "DB.L", "DCB.W" (68000)
*/
static boolean isDefineOperator (const vString *const operator)
{
const unsigned char *const op =
(unsigned char*) vStringValue (operator);
const size_t length = vStringLength (operator);
const boolean result = (boolean) (length > 0 &&
toupper ((int) *op) == 'D' &&
(length == 2 ||
(length == 4 && (int) op [2] == '.') ||
(length == 5 && (int) op [3] == '.')));
return result;
}
static void makeAsmTag (
const vString *const name,
const vString *const operator,
const boolean labelCandidate,
const boolean nameFollows)
{
if (vStringLength (name) > 0)
{
boolean found;
const AsmKind kind = operatorKind (operator, &found);
if (found)
{
if (kind != K_NONE)
makeSimpleTag (name, AsmKinds, kind);
}
else if (isDefineOperator (operator))
{
if (! nameFollows)
makeSimpleTag (name, AsmKinds, K_DEFINE);
}
else if (labelCandidate)
{
operatorKind (name, &found);
if (! found)
makeSimpleTag (name, AsmKinds, K_LABEL);
}
}
}
static const unsigned char *readSymbol (
const unsigned char *const start,
vString *const sym)
{
const unsigned char *cp = start;
vStringClear (sym);
if (isInitialSymbolCharacter ((int) *cp))
{
while (isSymbolCharacter ((int) *cp))
{
vStringPut (sym, *cp);
++cp;
}
vStringTerminate (sym);
}
return cp;
}
static const unsigned char *readOperator (
const unsigned char *const start,
vString *const operator)
{
const unsigned char *cp = start;
vStringClear (operator);
while (*cp != '\0' && ! isspace ((int) *cp))
{
vStringPut (operator, *cp);
++cp;
}
vStringTerminate (operator);
return cp;
}
static void findAsmTags (void)
{
vString *name = vStringNew ();
vString *operator = vStringNew ();
const unsigned char *line;
boolean inCComment = FALSE;
while ((line = fileReadLine ()) != NULL)
{
const unsigned char *cp = line;
boolean labelCandidate = (boolean) (! isspace ((int) *cp));
boolean nameFollows = FALSE;
const boolean isComment = (boolean)
(*cp != '\0' && strchr (";*@", *cp) != NULL);
/* skip comments */
if (strncmp ((const char*) cp, "/*", (size_t) 2) == 0)
{
inCComment = TRUE;
cp += 2;
}
if (inCComment)
{
do
{
if (strncmp ((const char*) cp, "*/", (size_t) 2) == 0)
{
inCComment = FALSE;
cp += 2;
break;
}
++cp;
} while (*cp != '\0');
}
if (isComment || inCComment)
continue;
/* read preprocessor defines */
if (*cp == '#')
{
++cp;
readPreProc (cp);
continue;
}
/* skip white space */
while (isspace ((int) *cp))
++cp;
/* read symbol */
cp = readSymbol (cp, name);
if (vStringLength (name) > 0 && *cp == ':')
{
labelCandidate = TRUE;
++cp;
}
if (! isspace ((int) *cp) && *cp != '\0')
continue;
/* skip white space */
while (isspace ((int) *cp))
++cp;
/* skip leading dot */
#if 0
if (*cp == '.')
++cp;
#endif
cp = readOperator (cp, operator);
/* attempt second read of symbol */
if (vStringLength (name) == 0)
{
while (isspace ((int) *cp))
++cp;
cp = readSymbol (cp, name);
nameFollows = TRUE;
}
makeAsmTag (name, operator, labelCandidate, nameFollows);
}
vStringDelete (name);
vStringDelete (operator);
}
static void initialize (const langType language)
{
Lang_asm = language;
buildAsmKeywordHash ();
} }
extern parserDefinition* AsmParser (void) extern parserDefinition* AsmParser (void)
{ {
static const char *const extensions [] = { "asm", "s", "S", NULL }; static const char *const extensions [] = {
parserDefinition* def = parserNew ("ASM"); "asm", "ASM", "s", "S", NULL
def->kinds = AsmKinds; };
def->kindCount = KIND_COUNT (AsmKinds); static const char *const patterns [] = {
def->extensions = extensions; "*.A51",
def->parser = findAsmTags; "*.29[kK]",
return def; "*.[68][68][kKsSxX]",
"*.[xX][68][68]",
NULL
};
parserDefinition* def = parserNew ("Asm");
def->kinds = AsmKinds;
def->kindCount = KIND_COUNT (AsmKinds);
def->extensions = extensions;
def->patterns = patterns;
def->parser = findAsmTags;
def->initialize = initialize;
return def;
} }
/* vi:set tabstop=8 shiftwidth=4: */ /* vi:set tabstop=4 shiftwidth=4: */

View File

@ -54,7 +54,7 @@ typedef enum eException {
/* Used to specify type of keyword. /* Used to specify type of keyword.
*/ */
typedef enum eKeywordId { typedef enum eKeywordId {
KEYWORD_NONE, KEYWORD_NONE = -1,
KEYWORD_ATTRIBUTE, KEYWORD_ABSTRACT, KEYWORD_ATTRIBUTE, KEYWORD_ABSTRACT,
KEYWORD_BOOLEAN, KEYWORD_BYTE, KEYWORD_BAD_STATE, KEYWORD_BAD_TRANS, KEYWORD_BOOLEAN, KEYWORD_BYTE, KEYWORD_BAD_STATE, KEYWORD_BAD_TRANS,
KEYWORD_BIND, KEYWORD_BIND_VAR, KEYWORD_BIT, KEYWORD_BIND, KEYWORD_BIND_VAR, KEYWORD_BIT,

View File

@ -58,7 +58,7 @@ typedef enum eFortranLineType {
/* Used to specify type of keyword. /* Used to specify type of keyword.
*/ */
typedef enum eKeywordId { typedef enum eKeywordId {
KEYWORD_NONE, KEYWORD_NONE = -1,
KEYWORD_allocatable, KEYWORD_allocatable,
KEYWORD_assignment, KEYWORD_assignment,
KEYWORD_block, KEYWORD_block,

View File

@ -1,4 +1,6 @@
/* /*
* $Id$
*
* Copyright (c) 2003, Darren Hiebert * Copyright (c) 2003, Darren Hiebert
* *
* This source code is released for free distribution under the terms of the * This source code is released for free distribution under the terms of the
@ -9,6 +11,8 @@
* *
* This is a good reference for different forms of the function statement: * This is a good reference for different forms of the function statement:
* http://www.permadi.com/tutorial/jsFunc/ * http://www.permadi.com/tutorial/jsFunc/
* Another good reference:
* http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
*/ */
/* /*
@ -21,11 +25,10 @@
#include <stdio.h> #include <stdio.h>
#endif #endif
#include "main.h"
#include "entry.h"
#include "keyword.h" #include "keyword.h"
#include "parse.h" #include "parse.h"
#include "read.h" #include "read.h"
#include "main.h"
#include "vstring.h" #include "vstring.h"
/* /*
@ -52,6 +55,8 @@ typedef enum eKeywordId {
KEYWORD_NONE = -1, KEYWORD_NONE = -1,
KEYWORD_function, KEYWORD_function,
KEYWORD_capital_function, KEYWORD_capital_function,
KEYWORD_object,
KEYWORD_capital_object,
KEYWORD_prototype, KEYWORD_prototype,
KEYWORD_var, KEYWORD_var,
KEYWORD_new, KEYWORD_new,
@ -61,7 +66,10 @@ typedef enum eKeywordId {
KEYWORD_do, KEYWORD_do,
KEYWORD_if, KEYWORD_if,
KEYWORD_else, KEYWORD_else,
KEYWORD_switch KEYWORD_switch,
KEYWORD_try,
KEYWORD_catch,
KEYWORD_finally
} keywordId; } keywordId;
/* Used to determine whether keyword is valid for the token language and /* Used to determine whether keyword is valid for the token language and
@ -88,17 +96,20 @@ typedef enum eTokenType {
TOKEN_OPEN_CURLY, TOKEN_OPEN_CURLY,
TOKEN_CLOSE_CURLY, TOKEN_CLOSE_CURLY,
TOKEN_EQUAL_SIGN, TOKEN_EQUAL_SIGN,
TOKEN_FORWARD_SLASH TOKEN_FORWARD_SLASH,
TOKEN_OPEN_SQUARE,
TOKEN_CLOSE_SQUARE
} tokenType; } tokenType;
typedef struct sTokenInfo { typedef struct sTokenInfo {
tokenType type; tokenType type;
keywordId keyword; keywordId keyword;
vString * string; vString * string;
vString * scope; vString * scope;
unsigned long lineNumber; unsigned long lineNumber;
fpos_t filePosition; fpos_t filePosition;
int nestLevel; int nestLevel;
boolean ignoreTag;
} tokenInfo; } tokenInfo;
/* /*
@ -113,21 +124,25 @@ typedef enum {
JSTAG_FUNCTION, JSTAG_FUNCTION,
JSTAG_CLASS, JSTAG_CLASS,
JSTAG_METHOD, JSTAG_METHOD,
JSTAG_PROPERTY,
JSTAG_VARIABLE, JSTAG_VARIABLE,
JSTAG_COUNT JSTAG_COUNT
} jsKind; } jsKind;
static kindOption JsKinds [] = { static kindOption JsKinds [] = {
{ TRUE, 'f', "function", "functions" }, { TRUE, 'f', "function", "functions" },
{ TRUE, 'c', "class", "classes" }, { TRUE, 'c', "class", "classes" },
{ TRUE, 'm', "method", "methods" }, { TRUE, 'm', "method", "methods" },
{ TRUE, 'v', "variable", "global variables" } { TRUE, 'p', "property", "properties" },
{ TRUE, 'v', "variable", "global variables" }
}; };
static const keywordDesc JsKeywordTable [] = { static const keywordDesc JsKeywordTable [] = {
/* keyword keyword ID */ /* keyword keyword ID */
{ "function", KEYWORD_function }, { "function", KEYWORD_function },
{ "Function", KEYWORD_capital_function }, { "Function", KEYWORD_capital_function },
{ "object", KEYWORD_object },
{ "Object", KEYWORD_capital_object },
{ "prototype", KEYWORD_prototype }, { "prototype", KEYWORD_prototype },
{ "var", KEYWORD_var }, { "var", KEYWORD_var },
{ "new", KEYWORD_new }, { "new", KEYWORD_new },
@ -137,7 +152,10 @@ static const keywordDesc JsKeywordTable [] = {
{ "do", KEYWORD_do }, { "do", KEYWORD_do },
{ "if", KEYWORD_if }, { "if", KEYWORD_if },
{ "else", KEYWORD_else }, { "else", KEYWORD_else },
{ "switch", KEYWORD_switch } { "switch", KEYWORD_switch },
{ "try", KEYWORD_try },
{ "catch", KEYWORD_catch },
{ "finally", KEYWORD_finally }
}; };
/* /*
@ -149,18 +167,6 @@ static void parseFunction (tokenInfo *const token);
static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent); static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent);
static boolean parseLine (tokenInfo *const token, boolean is_inside_class); static boolean parseLine (tokenInfo *const token, boolean is_inside_class);
static boolean isIdentChar1 (const int c)
{
/*
* Other databases are less restrictive on the first character of
* an identifier.
* isIdentChar1 is used to identify the first character of an
* identifier, so we are removing some restrictions.
*/
return (boolean)
(isalpha (c) || c == '@' || c == '_' );
}
static boolean isIdentChar (const int c) static boolean isIdentChar (const int c)
{ {
return (boolean) return (boolean)
@ -189,6 +195,9 @@ static tokenInfo *newToken (void)
token->string = vStringNew (); token->string = vStringNew ();
token->scope = vStringNew (); token->scope = vStringNew ();
token->nestLevel = 0; token->nestLevel = 0;
token->ignoreTag = FALSE;
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
return token; return token;
} }
@ -206,7 +215,7 @@ static void deleteToken (tokenInfo *const token)
static void makeConstTag (tokenInfo *const token, const jsKind kind) static void makeConstTag (tokenInfo *const token, const jsKind kind)
{ {
if (JsKinds [kind].enabled) if (JsKinds [kind].enabled && ! token->ignoreTag )
{ {
const char *const name = vStringValue (token->string); const char *const name = vStringValue (token->string);
tagEntryInfo e; tagEntryInfo e;
@ -225,7 +234,7 @@ static void makeJsTag (tokenInfo *const token, const jsKind kind)
{ {
vString * fulltag; vString * fulltag;
if (JsKinds [kind].enabled) if (JsKinds [kind].enabled && ! token->ignoreTag )
{ {
/* /*
* If a scope has been added to the token, change the token * If a scope has been added to the token, change the token
@ -247,19 +256,25 @@ static void makeJsTag (tokenInfo *const token, const jsKind kind)
static void makeClassTag (tokenInfo *const token) static void makeClassTag (tokenInfo *const token)
{ {
if ( ! stringListHas(ClassNames, vStringValue (token->string)) ) if ( ! token->ignoreTag )
{ {
stringListAdd (ClassNames, vStringNewCopy (token->string)); if ( ! stringListHas(ClassNames, vStringValue (token->string)) )
makeJsTag (token, JSTAG_CLASS); {
stringListAdd (ClassNames, vStringNewCopy (token->string));
makeJsTag (token, JSTAG_CLASS);
}
} }
} }
static void makeFunctionTag (tokenInfo *const token) static void makeFunctionTag (tokenInfo *const token)
{ {
if ( ! stringListHas(FunctionNames, vStringValue (token->string)) ) if ( ! token->ignoreTag )
{ {
stringListAdd (FunctionNames, vStringNewCopy (token->string)); if ( ! stringListHas(FunctionNames, vStringValue (token->string)) )
makeJsTag (token, JSTAG_FUNCTION); {
stringListAdd (FunctionNames, vStringNewCopy (token->string));
makeJsTag (token, JSTAG_FUNCTION);
}
} }
} }
@ -280,12 +295,16 @@ static int skipToCharacter (const int c)
static void parseString (vString *const string, const int delimiter) static void parseString (vString *const string, const int delimiter)
{ {
boolean end = FALSE; boolean end = FALSE;
int c;
while (! end) while (! end)
{ {
c = fileGetc (); int c = fileGetc ();
if (c == EOF) if (c == EOF)
end = TRUE; end = TRUE;
else if (c == '\\')
{
c = fileGetc(); /* This maybe a ' or ". */
vStringPut(string, c);
}
else if (c == delimiter) else if (c == delimiter)
end = TRUE; end = TRUE;
else else
@ -300,7 +319,7 @@ static void parseString (vString *const string, const int delimiter)
static void parseIdentifier (vString *const string, const int firstChar) static void parseIdentifier (vString *const string, const int firstChar)
{ {
int c = firstChar; int c = firstChar;
Assert (isIdentChar1 (c)); Assert (isIdentChar (c));
do do
{ {
vStringPut (string, c); vStringPut (string, c);
@ -313,11 +332,12 @@ static void parseIdentifier (vString *const string, const int firstChar)
static keywordId analyzeToken (vString *const name) static keywordId analyzeToken (vString *const name)
{ {
static vString *keyword = NULL; vString *keyword = vStringNew ();
if (keyword == NULL) keywordId result;
keyword = vStringNew ();
vStringCopyToLower (keyword, name); vStringCopyToLower (keyword, name);
return (keywordId) lookupKeyword (vStringValue (keyword), Lang_js); result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
vStringDelete (keyword);
return result;
} }
static void readToken (tokenInfo *const token) static void readToken (tokenInfo *const token)
@ -332,26 +352,25 @@ getNextChar:
do do
{ {
c = fileGetc (); c = fileGetc ();
/* token->lineNumber = getSourceLineNumber ();
* Added " to the list of ignores, not sure what this token->filePosition = getInputFilePosition ();
* might break but it gets by this issue:
* create table "t1" (...)
*/
} }
while (c == '\t' || c == ' ' || c == '\n'); while (c == '\t' || c == ' ' || c == '\n');
switch (c) switch (c)
{ {
case EOF: longjmp (Exception, (int)ExceptionEOF); break; case EOF: longjmp (Exception, (int)ExceptionEOF); break;
case '(': token->type = TOKEN_OPEN_PAREN; break; case '(': token->type = TOKEN_OPEN_PAREN; break;
case ')': token->type = TOKEN_CLOSE_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break;
case ';': token->type = TOKEN_SEMICOLON; break; case ';': token->type = TOKEN_SEMICOLON; break;
case ',': token->type = TOKEN_COMMA; break; case ',': token->type = TOKEN_COMMA; break;
case '.': token->type = TOKEN_PERIOD; break; case '.': token->type = TOKEN_PERIOD; break;
case ':': token->type = TOKEN_COLON; break; case ':': token->type = TOKEN_COLON; break;
case '{': token->type = TOKEN_OPEN_CURLY; break; case '{': token->type = TOKEN_OPEN_CURLY; break;
case '}': token->type = TOKEN_CLOSE_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break;
case '=': token->type = TOKEN_EQUAL_SIGN; break; case '=': token->type = TOKEN_EQUAL_SIGN; break;
case '[': token->type = TOKEN_OPEN_SQUARE; break;
case ']': token->type = TOKEN_CLOSE_SQUARE; break;
case '\'': case '\'':
case '"': case '"':
@ -361,6 +380,15 @@ getNextChar:
token->filePosition = getInputFilePosition (); token->filePosition = getInputFilePosition ();
break; break;
case '\\':
c = fileGetc ();
if (c != '\\' && c != '"' && !isspace (c))
fileUngetc (c);
token->type = TOKEN_CHARACTER;
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
break;
case '/': case '/':
{ {
int d = fileGetc (); int d = fileGetc ();
@ -395,7 +423,7 @@ getNextChar:
} }
default: default:
if (! isIdentChar1 (c)) if (! isIdentChar (c))
token->type = TOKEN_UNDEFINED; token->type = TOKEN_UNDEFINED;
else else
{ {
@ -460,6 +488,38 @@ static void skipArgumentList (tokenInfo *const token)
} }
} }
static void skipArrayList (tokenInfo *const token)
{
int nest_level = 0;
/*
* Handle square brackets
* var name[1]
* So we must check for nested open and closing square brackets
*/
if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
{
nest_level++;
while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0)))
{
readToken (token);
if (isType (token, TOKEN_OPEN_SQUARE))
{
nest_level++;
}
if (isType (token, TOKEN_CLOSE_SQUARE))
{
if (nest_level > 0)
{
nest_level--;
}
}
}
readToken (token);
}
}
static void addContext (tokenInfo* const parent, const tokenInfo* const child) static void addContext (tokenInfo* const parent, const tokenInfo* const child)
{ {
if (vStringLength (parent->string) > 0) if (vStringLength (parent->string) > 0)
@ -498,6 +558,10 @@ static void findCmdTerm (tokenInfo *const token)
{ {
parseBlock (token, token); parseBlock (token, token);
} }
else if ( isType (token, TOKEN_OPEN_PAREN) )
{
skipArgumentList(token);
}
else else
{ {
readToken (token); readToken (token);
@ -632,21 +696,39 @@ static void parseLoop (tokenInfo *const token)
} }
} }
static void parseIf (tokenInfo *const token) static boolean parseIf (tokenInfo *const token)
{ {
boolean read_next_token = TRUE;
/* /*
* If statements have two forms * If statements have two forms
* if ( ... ) * if ( ... )
* one line; * one line;
* *
* if ( ... )
* statement;
* else
* statement
*
* if ( ... ) { * if ( ... ) {
* multiple; * multiple;
* statements; * statements;
* } * }
* *
*
* if ( ... ) { * if ( ... ) {
* return elem * return elem
* } * }
*
* This example if correctly written, but the
* else contains only 1 statement without a terminator
* since the function finishes with the closing brace.
*
* function a(flag){
* if(flag)
* test(1);
* else
* test(2)
* }
* *
* TODO: Deal with statements that can optional end * TODO: Deal with statements that can optional end
* without a semi-colon. Currently this messes up * without a semi-colon. Currently this messes up
@ -681,7 +763,39 @@ static void parseIf (tokenInfo *const token)
else else
{ {
findCmdTerm (token); findCmdTerm (token);
/*
* The IF could be followed by an ELSE statement.
* This too could have two formats, a curly braced
* multiline section, or another single line.
*/
if (isType (token, TOKEN_CLOSE_CURLY))
{
/*
* This statement did not have a line terminator.
*/
read_next_token = FALSE;
}
else
{
readToken (token);
if (isType (token, TOKEN_CLOSE_CURLY))
{
/*
* This statement did not have a line terminator.
*/
read_next_token = FALSE;
}
else
{
if (isKeyword (token, KEYWORD_else))
read_next_token = parseIf (token);
}
}
} }
return read_next_token;
} }
static void parseFunction (tokenInfo *const token) static void parseFunction (tokenInfo *const token)
@ -756,20 +870,19 @@ static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent)
if (isKeyword (token, KEYWORD_this)) if (isKeyword (token, KEYWORD_this))
{ {
/* /*
* Then we are inside a class and we have found * Means we are inside a class and have found
* a class, not a function * a class, not a function
*/ */
is_class = TRUE; is_class = TRUE;
vStringCopy(saveScope, token->scope); vStringCopy(saveScope, token->scope);
addToScope (token, parent->string); addToScope (token, parent->string);
/* Move past this */
readToken(token);
/* Move past a potential . */
if ( isType (token, TOKEN_PERIOD) )
readToken(token);
/*
* Ignore the remainder of the line
* findCmdTerm(token);
*/
parseLine (token, is_class); parseLine (token, is_class);
vStringCopy(token->scope, saveScope); vStringCopy(token->scope, saveScope);
} }
else if (isKeyword (token, KEYWORD_var)) else if (isKeyword (token, KEYWORD_var))
@ -832,13 +945,15 @@ static void parseMethods (tokenInfo *const token, tokenInfo *const class)
/* /*
* This deals with these formats * This deals with these formats
* 'validMethod' : function(a,b) {} * validProperty : 2,
* validMethod : function(a,b) {}
* 'validMethod2' : function(a,b) {}
*/ */
do do
{ {
readToken (token); readToken (token);
if (isType (token, TOKEN_STRING)) if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
{ {
copyToken(name, token); copyToken(name, token);
@ -862,11 +977,22 @@ static void parseMethods (tokenInfo *const token, tokenInfo *const class)
/* /*
* Read to the closing curly, check next * Read to the closing curly, check next
* token, if comma, we must loop again * token, if a comma, we must loop again
*/ */
readToken (token); readToken (token);
} }
} }
else
{
addToScope (name, class->string);
makeJsTag (name, JSTAG_PROPERTY);
/*
* Read the next token, if a comma
* we must loop again
*/
readToken (token);
}
} }
} }
} while ( isType(token, TOKEN_COMMA) ); } while ( isType(token, TOKEN_COMMA) );
@ -884,6 +1010,7 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
boolean is_class = FALSE; boolean is_class = FALSE;
boolean is_terminated = TRUE; boolean is_terminated = TRUE;
boolean is_global = FALSE; boolean is_global = FALSE;
boolean is_prototype = FALSE;
vStringClear(saveScope); vStringClear(saveScope);
/* /*
@ -905,7 +1032,13 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
* 'validMethodOne' : function(a,b) {}, * 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {} * 'validMethodTwo' : function(a,b) {}
* } * }
* Database.prototype.getTodaysDate = Database_getTodaysDate; * ValidClassTwo = function ()
* {
* this.validMethodThree = function() {}
* // unnamed method
* this.validMethodFour = () {}
* }
* Database.prototype.validMethodThree = Database_getTodaysDate;
*/ */
if ( is_inside_class ) if ( is_inside_class )
@ -925,41 +1058,176 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
readToken(token); readToken(token);
} }
if ( isKeyword(token, KEYWORD_this) )
{
readToken(token);
if (isType (token, TOKEN_PERIOD))
{
readToken(token);
}
}
copyToken(name, token); copyToken(name, token);
/* Potentially the name of the function */ while (! isType (token, TOKEN_CLOSE_CURLY) &&
readToken (token); ! isType (token, TOKEN_SEMICOLON) &&
if (isType (token, TOKEN_PERIOD)) ! isType (token, TOKEN_EQUAL_SIGN) )
{
/* Potentially the name of the function */
readToken (token);
if (isType (token, TOKEN_PERIOD))
{
/*
* Cannot be a global variable is it has dot references in the name
*/
is_global = FALSE;
do
{
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
if ( is_class )
{
vStringCopy(saveScope, token->scope);
addToScope(token, name->string);
}
else
addContext (name, token);
}
else if ( isKeyword(token, KEYWORD_prototype) )
{
/*
* When we reach the "prototype" tag, we infer:
* "BindAgent" is a class
* "build" is a method
*
* function BindAgent( repeatableIdName, newParentIdName ) {
* }
*
* CASE 1
* Specified function name: "build"
* BindAgent.prototype.build = function( mode ) {
* ignore everything within this function
* }
*
* CASE 2
* Prototype listing
* ValidClassOne.prototype = {
* 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {}
* }
*
*/
makeClassTag (name);
is_class = TRUE;
is_prototype = TRUE;
/*
* There should a ".function_name" next.
*/
readToken (token);
if (isType (token, TOKEN_PERIOD))
{
/*
* Handle CASE 1
*/
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
vStringCopy(saveScope, token->scope);
addToScope(token, name->string);
makeJsTag (token, JSTAG_METHOD);
/*
* We can read until the end of the block / statement.
* We need to correctly parse any nested blocks, but
* we do NOT want to create any tags based on what is
* within the blocks.
*/
token->ignoreTag = TRUE;
/*
* Find to the end of the statement
*/
findCmdTerm (token);
token->ignoreTag = FALSE;
is_terminated = TRUE;
goto cleanUp;
}
}
else if (isType (token, TOKEN_EQUAL_SIGN))
{
readToken (token);
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* Handle CASE 2
*
* Creates tags for each of these class methods
* ValidClassOne.prototype = {
* 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {}
* }
*/
parseMethods(token, name);
/*
* Find to the end of the statement
*/
findCmdTerm (token);
token->ignoreTag = FALSE;
is_terminated = TRUE;
goto cleanUp;
}
}
}
readToken (token);
} while (isType (token, TOKEN_PERIOD));
}
if ( isType (token, TOKEN_OPEN_PAREN) )
skipArgumentList(token);
if ( isType (token, TOKEN_OPEN_SQUARE) )
skipArrayList(token);
/*
if ( isType (token, TOKEN_OPEN_CURLY) )
{
is_class = parseBlock (token, name);
}
*/
}
if ( isType (token, TOKEN_CLOSE_CURLY) )
{ {
/* /*
* Cannot be a global variable is it has dot references in the name * Reaching this section without having
* processed an open curly brace indicates
* the statement is most likely not terminated.
*/ */
is_global = FALSE; is_terminated = FALSE;
do goto cleanUp;
{ }
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
if ( is_class )
{
vStringCopy(saveScope, token->scope);
addToScope(token, name->string);
makeJsTag (token, JSTAG_METHOD);
/* Find to the end of the statement */ if ( isType (token, TOKEN_SEMICOLON) )
findCmdTerm (token); {
goto cleanUp; /*
} * Only create variables for global scope
else */
addContext (name, token); if ( token->nestLevel == 0 && is_global )
} {
else if ( isKeyword(token, KEYWORD_prototype) ) /*
{ * Handles this syntax:
makeClassTag (name); * var g_var2;
is_class = TRUE; */
} if (isType (token, TOKEN_SEMICOLON))
readToken (token); makeJsTag (name, JSTAG_VARIABLE);
} while (isType (token, TOKEN_PERIOD)); }
/*
* Statement has ended.
* This deals with calls to functions, like:
* alert(..);
*/
goto cleanUp;
} }
if ( isType (token, TOKEN_EQUAL_SIGN) ) if ( isType (token, TOKEN_EQUAL_SIGN) )
@ -975,14 +1243,14 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
{ {
/* /*
* Functions of this format: * Functions of this format:
* var D2A=function theAdd(a, b) * var D2A = function theAdd(a, b)
* { * {
* return a+b; * return a+b;
* } * }
* Are really two separately defined functions and * Are really two separate defined functions and
* can be referenced in two ways: * can be referenced in two ways:
* alert(D2A(1,2)); // produces 3 * alert( D2A(1,2) ); // produces 3
* alert(theAdd(1,2)); // also produces 3 * alert( theAdd(1,2) ); // also produces 3
* So it must have two tags: * So it must have two tags:
* D2A * D2A
* theAdd * theAdd
@ -1022,24 +1290,26 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
if ( vStringLength(secondary_name->string) > 0 ) if ( vStringLength(secondary_name->string) > 0 )
makeFunctionTag (secondary_name); makeFunctionTag (secondary_name);
/*
* Find to the end of the statement
*/
goto cleanUp;
} }
} }
} }
else if (isType (token, TOKEN_OPEN_PAREN)) else if (isType (token, TOKEN_OPEN_PAREN))
{ {
/* /*
* Handle nameless functions, these will only * Handle nameless functions
* be considered methods. * this.method_name = () {}
*/ */
skipArgumentList(token); skipArgumentList(token);
if (isType (token, TOKEN_OPEN_CURLY)) if (isType (token, TOKEN_OPEN_CURLY))
{ {
/* /*
* This will be either a function or a class. * Nameless functions are only setup as methods.
* We can only determine this by checking the body
* of the function. If we find a "this." we know
* it is a class, otherwise it is a function.
*/ */
makeJsTag (name, JSTAG_METHOD); makeJsTag (name, JSTAG_METHOD);
parseBlock (token, name); parseBlock (token, name);
@ -1047,20 +1317,40 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
} }
else if (isType (token, TOKEN_OPEN_CURLY)) else if (isType (token, TOKEN_OPEN_CURLY))
{ {
/*
* Creates tags for each of these class methods
* ValidClassOne.prototype = {
* 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {}
* }
*/
parseMethods(token, name); parseMethods(token, name);
} }
else if (isKeyword (token, KEYWORD_new)) else if (isKeyword (token, KEYWORD_new))
{ {
readToken (token); readToken (token);
if ( isKeyword (token, KEYWORD_function) || if ( isKeyword (token, KEYWORD_function) ||
isKeyword (token, KEYWORD_capital_function) ) isKeyword (token, KEYWORD_capital_function) ||
isKeyword (token, KEYWORD_object) ||
isKeyword (token, KEYWORD_capital_object) )
{ {
if ( isKeyword (token, KEYWORD_object) ||
isKeyword (token, KEYWORD_capital_object) )
is_class = TRUE;
readToken (token); readToken (token);
if ( isType (token, TOKEN_OPEN_PAREN) ) if ( isType (token, TOKEN_OPEN_PAREN) )
skipArgumentList(token); skipArgumentList(token);
if (isType (token, TOKEN_SEMICOLON)) if (isType (token, TOKEN_SEMICOLON))
makeFunctionTag (name); {
if ( is_class )
{
makeClassTag (name);
} else {
makeFunctionTag (name);
}
}
} }
} }
else if (isKeyword (token, KEYWORD_NONE)) else if (isKeyword (token, KEYWORD_NONE))
@ -1086,28 +1376,13 @@ static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
if ( ! stringListHas(FunctionNames, vStringValue (token->string)) && if ( ! stringListHas(FunctionNames, vStringValue (token->string)) &&
! stringListHas(ClassNames, vStringValue (token->string)) ) ! stringListHas(ClassNames, vStringValue (token->string)) )
{ {
readToken (token); findCmdTerm (token);
if (isType (token, TOKEN_SEMICOLON)) if (isType (token, TOKEN_SEMICOLON))
makeJsTag (name, JSTAG_VARIABLE); makeJsTag (name, JSTAG_VARIABLE);
} }
} }
} }
} }
else
{
/*
* Only create variables for global scope
*/
if ( token->nestLevel == 0 && is_global )
{
/*
* Handles this syntax:
* var g_var2;
*/
if (isType (token, TOKEN_SEMICOLON))
makeJsTag (name, JSTAG_VARIABLE);
}
}
findCmdTerm (token); findCmdTerm (token);
/* /*
@ -1159,7 +1434,11 @@ static boolean parseLine (tokenInfo *const token, boolean is_inside_class)
break; break;
case KEYWORD_if: case KEYWORD_if:
case KEYWORD_else: case KEYWORD_else:
parseIf (token); case KEYWORD_try:
case KEYWORD_catch:
case KEYWORD_finally:
/* Common semantics */
is_terminated = parseIf (token);
break; break;
case KEYWORD_switch: case KEYWORD_switch:
parseSwitch (token); parseSwitch (token);
@ -1173,7 +1452,7 @@ static boolean parseLine (tokenInfo *const token, boolean is_inside_class)
{ {
/* /*
* Special case where single line statements may not be * Special case where single line statements may not be
* SEMICOLON termianted. parseBlock needs to know this * SEMICOLON terminated. parseBlock needs to know this
* so that it does not read the next token. * so that it does not read the next token.
*/ */
is_terminated = parseStatement (token, is_inside_class); is_terminated = parseStatement (token, is_inside_class);
@ -1212,10 +1491,12 @@ static void initialize (const langType language)
static void findJsTags (void) static void findJsTags (void)
{ {
tokenInfo *const token = newToken (); tokenInfo *const token = newToken ();
exception_t exception = (exception_t) (setjmp (Exception)); exception_t exception;
ClassNames = stringListNew (); ClassNames = stringListNew ();
FunctionNames = stringListNew (); FunctionNames = stringListNew ();
exception = (exception_t) (setjmp (Exception));
while (exception == ExceptionNone) while (exception == ExceptionNone)
parseJsFile (token); parseJsFile (token);
@ -1230,8 +1511,7 @@ static void findJsTags (void)
extern parserDefinition* JavaScriptParser (void) extern parserDefinition* JavaScriptParser (void)
{ {
static const char *const extensions [] = { "js", NULL }; static const char *const extensions [] = { "js", NULL };
parserDefinition *const def = parserNew ("JavaScript");
parserDefinition *const def = parserNew ("Javascript");
def->extensions = extensions; def->extensions = extensions;
/* /*
* New definitions for parsing instead of regex * New definitions for parsing instead of regex
@ -1243,5 +1523,4 @@ extern parserDefinition* JavaScriptParser (void)
return def; return def;
} }
/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */

View File

@ -162,7 +162,7 @@ extern int lookupKeyword (const char *const string, langType language)
{ {
const unsigned long hashedValue = hashValue (string); const unsigned long hashedValue = hashValue (string);
hashEntry *entry = getHashTableEntry (hashedValue); hashEntry *entry = getHashTableEntry (hashedValue);
int value = 0; int value = -1;
while (entry != NULL) while (entry != NULL)
{ {