Colomban Wendling 569670b771 Move most of TagManager's I/O to MIO
Moved the parsers to MIO, which allows both in-memory and file parsing.


git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@5555 ea778897-0a13-0410-b9d1-a72fbfd435f5
2011-03-05 22:40:50 +00:00

1528 lines
33 KiB
C

/*
* $Id$
*
* Copyright (c) 2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for JavaScript language
* files.
*
* This is a good reference for different forms of the function statement:
* http://www.permadi.com/tutorial/jsFunc/
* Another good reference:
* http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
*/
/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include <ctype.h> /* to define isalpha () */
#include <setjmp.h>
#include <mio/mio.h>
#ifdef DEBUG
#include <stdio.h>
#endif
#include "keyword.h"
#include "parse.h"
#include "read.h"
#include "main.h"
#include "vstring.h"
/*
* MACROS
*/
#define isType(token,t) (boolean) ((token)->type == (t))
#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
/*
* DATA DECLARATIONS
*/
typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
/*
* Tracks class and function names already created
*/
static stringList *ClassNames;
static stringList *FunctionNames;
/* Used to specify type of keyword.
*/
typedef enum eKeywordId {
KEYWORD_NONE = -1,
KEYWORD_function,
KEYWORD_capital_function,
KEYWORD_object,
KEYWORD_capital_object,
KEYWORD_prototype,
KEYWORD_var,
KEYWORD_new,
KEYWORD_this,
KEYWORD_for,
KEYWORD_while,
KEYWORD_do,
KEYWORD_if,
KEYWORD_else,
KEYWORD_switch,
KEYWORD_try,
KEYWORD_catch,
KEYWORD_finally
} keywordId;
/* Used to determine whether keyword is valid for the token language and
* what its ID is.
*/
typedef struct sKeywordDesc {
const char *name;
keywordId id;
} keywordDesc;
typedef enum eTokenType {
TOKEN_UNDEFINED,
TOKEN_CHARACTER,
TOKEN_CLOSE_PAREN,
TOKEN_SEMICOLON,
TOKEN_COLON,
TOKEN_COMMA,
TOKEN_KEYWORD,
TOKEN_OPEN_PAREN,
TOKEN_OPERATOR,
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_PERIOD,
TOKEN_OPEN_CURLY,
TOKEN_CLOSE_CURLY,
TOKEN_EQUAL_SIGN,
TOKEN_FORWARD_SLASH,
TOKEN_OPEN_SQUARE,
TOKEN_CLOSE_SQUARE
} tokenType;
typedef struct sTokenInfo {
tokenType type;
keywordId keyword;
vString * string;
vString * scope;
unsigned long lineNumber;
MIOPos filePosition;
int nestLevel;
boolean ignoreTag;
} tokenInfo;
/*
* DATA DEFINITIONS
*/
static langType Lang_js;
static jmp_buf Exception;
typedef enum {
JSTAG_FUNCTION,
JSTAG_CLASS,
JSTAG_METHOD,
JSTAG_PROPERTY,
JSTAG_VARIABLE,
JSTAG_COUNT
} jsKind;
static kindOption JsKinds [] = {
{ TRUE, 'f', "function", "functions" },
{ TRUE, 'c', "class", "classes" },
{ TRUE, 'm', "method", "methods" },
{ TRUE, 'p', "member", "properties" },
{ TRUE, 'v', "variable", "global variables" }
};
static const keywordDesc JsKeywordTable [] = {
/* keyword keyword ID */
{ "function", KEYWORD_function },
{ "Function", KEYWORD_capital_function },
{ "object", KEYWORD_object },
{ "Object", KEYWORD_capital_object },
{ "prototype", KEYWORD_prototype },
{ "var", KEYWORD_var },
{ "new", KEYWORD_new },
{ "this", KEYWORD_this },
{ "for", KEYWORD_for },
{ "while", KEYWORD_while },
{ "do", KEYWORD_do },
{ "if", KEYWORD_if },
{ "else", KEYWORD_else },
{ "switch", KEYWORD_switch },
{ "try", KEYWORD_try },
{ "catch", KEYWORD_catch },
{ "finally", KEYWORD_finally }
};
/*
* FUNCTION DEFINITIONS
*/
/* Recursive functions */
static void parseFunction (tokenInfo *const token);
static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent);
static boolean parseLine (tokenInfo *const token, boolean is_inside_class);
static boolean isIdentChar (const int c)
{
return (boolean)
(isalpha (c) || isdigit (c) || c == '$' ||
c == '@' || c == '_' || c == '#');
}
static void buildJsKeywordHash (void)
{
const size_t count = sizeof (JsKeywordTable) /
sizeof (JsKeywordTable [0]);
size_t i;
for (i = 0 ; i < count ; ++i)
{
const keywordDesc* const p = &JsKeywordTable [i];
addKeyword (p->name, Lang_js, (int) p->id);
}
}
static tokenInfo *newToken (void)
{
tokenInfo *const token = xMalloc (1, tokenInfo);
token->type = TOKEN_UNDEFINED;
token->keyword = KEYWORD_NONE;
token->string = vStringNew ();
token->scope = vStringNew ();
token->nestLevel = 0;
token->ignoreTag = FALSE;
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
return token;
}
static void deleteToken (tokenInfo *const token)
{
vStringDelete (token->string);
vStringDelete (token->scope);
eFree (token);
}
/*
* Tag generation functions
*/
static void makeConstTag (tokenInfo *const token, const jsKind kind)
{
if (JsKinds [kind].enabled && ! token->ignoreTag )
{
const char *const name = vStringValue (token->string);
tagEntryInfo e;
initTagEntry (&e, name);
e.lineNumber = token->lineNumber;
e.filePosition = token->filePosition;
e.kindName = JsKinds [kind].name;
e.kind = JsKinds [kind].letter;
makeTagEntry (&e);
}
}
static void makeJsTag (tokenInfo *const token, const jsKind kind)
{
vString * fulltag;
if (JsKinds [kind].enabled && ! token->ignoreTag )
{
/*
* If a scope has been added to the token, change the token
* string to include the scope when making the tag.
*/
if ( vStringLength(token->scope) > 0 )
{
fulltag = vStringNew ();
vStringCopy(fulltag, token->scope);
vStringCatS (fulltag, ".");
vStringCatS (fulltag, vStringValue(token->string));
vStringTerminate(fulltag);
vStringCopy(token->string, fulltag);
vStringDelete (fulltag);
}
makeConstTag (token, kind);
}
}
static void makeClassTag (tokenInfo *const token)
{
if ( ! token->ignoreTag )
{
if ( ! stringListHas(ClassNames, vStringValue (token->string)) )
{
stringListAdd (ClassNames, vStringNewCopy (token->string));
makeJsTag (token, JSTAG_CLASS);
}
}
}
static void makeFunctionTag (tokenInfo *const token)
{
if ( ! token->ignoreTag )
{
if ( ! stringListHas(FunctionNames, vStringValue (token->string)) )
{
stringListAdd (FunctionNames, vStringNewCopy (token->string));
makeJsTag (token, JSTAG_FUNCTION);
}
}
}
/*
* Parsing functions
*/
static int skipToCharacter (const int c)
{
int d;
do
{
d = fileGetc ();
} while (d != EOF && d != c);
return d;
}
static void parseString (vString *const string, const int delimiter)
{
boolean end = FALSE;
while (! end)
{
int c = fileGetc ();
if (c == EOF)
end = TRUE;
else if (c == '\\')
{
c = fileGetc(); /* This maybe a ' or ". */
vStringPut(string, c);
}
else if (c == delimiter)
end = TRUE;
else
vStringPut (string, c);
}
vStringTerminate (string);
}
/* Read a C identifier beginning with "firstChar" and places it into
* "name".
*/
static void parseIdentifier (vString *const string, const int firstChar)
{
int c = firstChar;
Assert (isIdentChar (c));
do
{
vStringPut (string, c);
c = fileGetc ();
} while (isIdentChar (c));
vStringTerminate (string);
if (!isspace (c))
fileUngetc (c); /* unget non-identifier character */
}
static keywordId analyzeToken (vString *const name)
{
vString *keyword = vStringNew ();
keywordId result;
vStringCopyToLower (keyword, name);
result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
vStringDelete (keyword);
return result;
}
static void readToken (tokenInfo *const token)
{
int c;
token->type = TOKEN_UNDEFINED;
token->keyword = KEYWORD_NONE;
vStringClear (token->string);
getNextChar:
do
{
c = fileGetc ();
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
}
while (c == '\t' || c == ' ' || c == '\n');
switch (c)
{
case EOF: longjmp (Exception, (int)ExceptionEOF); break;
case '(': token->type = TOKEN_OPEN_PAREN; break;
case ')': token->type = TOKEN_CLOSE_PAREN; break;
case ';': token->type = TOKEN_SEMICOLON; break;
case ',': token->type = TOKEN_COMMA; break;
case '.': token->type = TOKEN_PERIOD; break;
case ':': token->type = TOKEN_COLON; break;
case '{': token->type = TOKEN_OPEN_CURLY; break;
case '}': token->type = TOKEN_CLOSE_CURLY; break;
case '=': token->type = TOKEN_EQUAL_SIGN; break;
case '[': token->type = TOKEN_OPEN_SQUARE; break;
case ']': token->type = TOKEN_CLOSE_SQUARE; break;
case '\'':
case '"':
token->type = TOKEN_STRING;
parseString (token->string, c);
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
break;
case '\\':
c = fileGetc ();
if (c != '\\' && c != '"' && !isspace (c))
fileUngetc (c);
token->type = TOKEN_CHARACTER;
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
break;
case '/':
{
int d = fileGetc ();
if ( (d != '*') && /* is this the start of a comment? */
(d != '/') ) /* is a one line comment? */
{
token->type = TOKEN_FORWARD_SLASH;
fileUngetc (d);
}
else
{
if (d == '*')
{
do
{
skipToCharacter ('*');
c = fileGetc ();
if (c == '/')
break;
else
fileUngetc (c);
} while (c != EOF && c != '\0');
goto getNextChar;
}
else if (d == '/') /* is this the start of a comment? */
{
skipToCharacter ('\n');
goto getNextChar;
}
}
break;
}
default:
if (! isIdentChar (c))
token->type = TOKEN_UNDEFINED;
else
{
parseIdentifier (token->string, c);
token->lineNumber = getSourceLineNumber ();
token->filePosition = getInputFilePosition ();
token->keyword = analyzeToken (token->string);
if (isKeyword (token, KEYWORD_NONE))
token->type = TOKEN_IDENTIFIER;
else
token->type = TOKEN_KEYWORD;
}
break;
}
}
static void copyToken (tokenInfo *const dest, tokenInfo *const src)
{
dest->nestLevel = src->nestLevel;
dest->lineNumber = src->lineNumber;
dest->filePosition = src->filePosition;
dest->type = src->type;
dest->keyword = src->keyword;
vStringCopy(dest->string, src->string);
vStringCopy(dest->scope, src->scope);
}
/*
* Token parsing functions
*/
static void skipArgumentList (tokenInfo *const token)
{
int nest_level = 0;
/*
* Other databases can have arguments with fully declared
* datatypes:
* ( name varchar(30), text binary(10) )
* So we must check for nested open and closing parantheses
*/
if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
{
nest_level++;
while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0)))
{
readToken (token);
if (isType (token, TOKEN_OPEN_PAREN))
{
nest_level++;
}
if (isType (token, TOKEN_CLOSE_PAREN))
{
if (nest_level > 0)
{
nest_level--;
}
}
}
readToken (token);
}
}
static void skipArrayList (tokenInfo *const token)
{
int nest_level = 0;
/*
* Handle square brackets
* var name[1]
* So we must check for nested open and closing square brackets
*/
if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
{
nest_level++;
while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0)))
{
readToken (token);
if (isType (token, TOKEN_OPEN_SQUARE))
{
nest_level++;
}
if (isType (token, TOKEN_CLOSE_SQUARE))
{
if (nest_level > 0)
{
nest_level--;
}
}
}
readToken (token);
}
}
static void addContext (tokenInfo* const parent, const tokenInfo* const child)
{
if (vStringLength (parent->string) > 0)
{
vStringCatS (parent->string, ".");
}
vStringCatS (parent->string, vStringValue(child->string));
vStringTerminate(parent->string);
}
static void addToScope (tokenInfo* const token, vString* const extra)
{
if (vStringLength (token->scope) > 0)
{
vStringCatS (token->scope, ".");
}
vStringCatS (token->scope, vStringValue(extra));
vStringTerminate(token->scope);
}
/*
* Scanning functions
*/
static void findCmdTerm (tokenInfo *const token)
{
/*
* Read until we find either a semicolon or closing brace.
* Any nested braces will be handled within.
*/
while (! ( isType (token, TOKEN_SEMICOLON) ||
isType (token, TOKEN_CLOSE_CURLY) ) )
{
/* Handle nested blocks */
if ( isType (token, TOKEN_OPEN_CURLY))
{
parseBlock (token, token);
}
else if ( isType (token, TOKEN_OPEN_PAREN) )
{
skipArgumentList(token);
}
else
{
readToken (token);
}
}
}
static void parseSwitch (tokenInfo *const token)
{
/*
* switch (expression){
* case value1:
* statement;
* break;
* case value2:
* statement;
* break;
* default : statement;
* }
*/
readToken (token);
if (isType (token, TOKEN_OPEN_PAREN))
{
/*
* Handle nameless functions, these will only
* be considered methods.
*/
skipArgumentList(token);
}
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* This will be either a function or a class.
* We can only determine this by checking the body
* of the function. If we find a "this." we know
* it is a class, otherwise it is a function.
*/
parseBlock (token, token);
}
}
static void parseLoop (tokenInfo *const token)
{
/*
* Handles these statements
* for (x=0; x<3; x++)
* document.write("This text is repeated three times<br>");
*
* for (x=0; x<3; x++)
* {
* document.write("This text is repeated three times<br>");
* }
*
* while (number<5){
* document.write(number+"<br>");
* number++;
* }
*
* do{
* document.write(number+"<br>");
* number++;
* }
* while (number<5);
*/
if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
{
readToken(token);
if (isType (token, TOKEN_OPEN_PAREN))
{
/*
* Handle nameless functions, these will only
* be considered methods.
*/
skipArgumentList(token);
}
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* This will be either a function or a class.
* We can only determine this by checking the body
* of the function. If we find a "this." we know
* it is a class, otherwise it is a function.
*/
parseBlock (token, token);
}
else
{
parseLine(token, FALSE);
}
}
else if (isKeyword (token, KEYWORD_do))
{
readToken(token);
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* This will be either a function or a class.
* We can only determine this by checking the body
* of the function. If we find a "this." we know
* it is a class, otherwise it is a function.
*/
parseBlock (token, token);
}
else
{
parseLine(token, FALSE);
}
readToken(token);
if (isKeyword (token, KEYWORD_while))
{
readToken(token);
if (isType (token, TOKEN_OPEN_PAREN))
{
/*
* Handle nameless functions, these will only
* be considered methods.
*/
skipArgumentList(token);
}
}
}
}
static boolean parseIf (tokenInfo *const token)
{
boolean read_next_token = TRUE;
/*
* If statements have two forms
* if ( ... )
* one line;
*
* if ( ... )
* statement;
* else
* statement
*
* if ( ... ) {
* multiple;
* statements;
* }
*
*
* if ( ... ) {
* return elem
* }
*
* This example if correctly written, but the
* else contains only 1 statement without a terminator
* since the function finishes with the closing brace.
*
* function a(flag){
* if(flag)
* test(1);
* else
* test(2)
* }
*
* TODO: Deal with statements that can optional end
* without a semi-colon. Currently this messes up
* the parsing of blocks.
* Need to somehow detect this has happened, and either
* backup a token, or skip reading the next token if
* that is possible from all code locations.
*
*/
readToken (token);
if (isType (token, TOKEN_OPEN_PAREN))
{
/*
* Handle nameless functions, these will only
* be considered methods.
*/
skipArgumentList(token);
}
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* This will be either a function or a class.
* We can only determine this by checking the body
* of the function. If we find a "this." we know
* it is a class, otherwise it is a function.
*/
parseBlock (token, token);
}
else
{
findCmdTerm (token);
/*
* The IF could be followed by an ELSE statement.
* This too could have two formats, a curly braced
* multiline section, or another single line.
*/
if (isType (token, TOKEN_CLOSE_CURLY))
{
/*
* This statement did not have a line terminator.
*/
read_next_token = FALSE;
}
else
{
readToken (token);
if (isType (token, TOKEN_CLOSE_CURLY))
{
/*
* This statement did not have a line terminator.
*/
read_next_token = FALSE;
}
else
{
if (isKeyword (token, KEYWORD_else))
read_next_token = parseIf (token);
}
}
}
return read_next_token;
}
static void parseFunction (tokenInfo *const token)
{
tokenInfo *const name = newToken ();
boolean is_class = FALSE;
/*
* This deals with these formats
* function validFunctionTwo(a,b) {}
*/
readToken (name);
/* Add scope in case this is an INNER function */
addToScope(name, token->scope);
readToken (token);
if (isType (token, TOKEN_PERIOD))
{
do
{
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
addContext (name, token);
readToken (token);
}
} while (isType (token, TOKEN_PERIOD));
}
if ( isType (token, TOKEN_OPEN_PAREN) )
skipArgumentList(token);
if ( isType (token, TOKEN_OPEN_CURLY) )
{
is_class = parseBlock (token, name);
if ( is_class )
makeClassTag (name);
else
makeFunctionTag (name);
}
findCmdTerm (token);
deleteToken (name);
}
static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent)
{
boolean is_class = FALSE;
boolean read_next_token = TRUE;
vString * saveScope = vStringNew ();
token->nestLevel++;
/*
* Make this routine a bit more forgiving.
* If called on an open_curly advance it
*/
if ( isType (token, TOKEN_OPEN_CURLY) &&
isKeyword(token, KEYWORD_NONE) )
readToken(token);
if (! isType (token, TOKEN_CLOSE_CURLY))
{
/*
* Read until we find the closing brace,
* any nested braces will be handled within
*/
do
{
read_next_token = TRUE;
if (isKeyword (token, KEYWORD_this))
{
/*
* Means we are inside a class and have found
* a class, not a function
*/
is_class = TRUE;
vStringCopy(saveScope, token->scope);
addToScope (token, parent->string);
/*
* Ignore the remainder of the line
* findCmdTerm(token);
*/
parseLine (token, is_class);
vStringCopy(token->scope, saveScope);
}
else if (isKeyword (token, KEYWORD_var))
{
/*
* Potentially we have found an inner function.
* Set something to indicate the scope
*/
vStringCopy(saveScope, token->scope);
addToScope (token, parent->string);
parseLine (token, is_class);
vStringCopy(token->scope, saveScope);
}
else if (isKeyword (token, KEYWORD_function))
{
vStringCopy(saveScope, token->scope);
addToScope (token, parent->string);
parseFunction (token);
vStringCopy(token->scope, saveScope);
}
else if (isType (token, TOKEN_OPEN_CURLY))
{
/* Handle nested blocks */
parseBlock (token, parent);
}
else
{
/*
* It is possible for a line to have no terminator
* if the following line is a closing brace.
* parseLine will detect this case and indicate
* whether we should read an additional token.
*/
read_next_token = parseLine (token, is_class);
}
/*
* Always read a new token unless we find a statement without
* a ending terminator
*/
if( read_next_token )
readToken(token);
/*
* If we find a statement without a terminator consider the
* block finished, otherwise the stack will be off by one.
*/
} while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token );
}
vStringDelete(saveScope);
token->nestLevel--;
return is_class;
}
static void parseMethods (tokenInfo *const token, tokenInfo *const class)
{
tokenInfo *const name = newToken ();
/*
* This deals with these formats
* validProperty : 2,
* validMethod : function(a,b) {}
* 'validMethod2' : function(a,b) {}
*/
do
{
readToken (token);
if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
{
copyToken(name, token);
readToken (token);
if ( isType (token, TOKEN_COLON) )
{
readToken (token);
if ( isKeyword (token, KEYWORD_function) )
{
readToken (token);
if ( isType (token, TOKEN_OPEN_PAREN) )
{
skipArgumentList(token);
}
if (isType (token, TOKEN_OPEN_CURLY))
{
addToScope (name, class->string);
makeJsTag (name, JSTAG_METHOD);
parseBlock (token, name);
/*
* Read to the closing curly, check next
* token, if a comma, we must loop again
*/
readToken (token);
}
}
else
{
addToScope (name, class->string);
makeJsTag (name, JSTAG_PROPERTY);
/*
* Read the next token, if a comma
* we must loop again
*/
readToken (token);
}
}
}
} while ( isType(token, TOKEN_COMMA) );
findCmdTerm (token);
deleteToken (name);
}
static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
{
tokenInfo *const name = newToken ();
tokenInfo *const secondary_name = newToken ();
vString * saveScope = vStringNew ();
boolean is_class = FALSE;
boolean is_terminated = TRUE;
boolean is_global = FALSE;
boolean is_prototype = FALSE;
vStringClear(saveScope);
/*
* Functions can be named or unnamed.
* This deals with these formats:
* Function
* validFunctionOne = function(a,b) {}
* testlib.validFunctionFive = function(a,b) {}
* var innerThree = function(a,b) {}
* var innerFour = (a,b) {}
* var D2 = secondary_fcn_name(a,b) {}
* var D3 = new Function("a", "b", "return a+b;");
* Class
* testlib.extras.ValidClassOne = function(a,b) {
* this.a = a;
* }
* Class Methods
* testlib.extras.ValidClassOne.prototype = {
* 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {}
* }
* ValidClassTwo = function ()
* {
* this.validMethodThree = function() {}
* // unnamed method
* this.validMethodFour = () {}
* }
* Database.prototype.validMethodThree = Database_getTodaysDate;
*/
if ( is_inside_class )
is_class = TRUE;
/*
* var can preceed an inner function
*/
if ( isKeyword(token, KEYWORD_var) )
{
/*
* Only create variables for global scope
*/
if ( token->nestLevel == 0 )
{
is_global = TRUE;
}
readToken(token);
}
if ( isKeyword(token, KEYWORD_this) )
{
readToken(token);
if (isType (token, TOKEN_PERIOD))
{
readToken(token);
}
}
copyToken(name, token);
while (! isType (token, TOKEN_CLOSE_CURLY) &&
! isType (token, TOKEN_SEMICOLON) &&
! isType (token, TOKEN_EQUAL_SIGN) )
{
/* Potentially the name of the function */
readToken (token);
if (isType (token, TOKEN_PERIOD))
{
/*
* Cannot be a global variable is it has dot references in the name
*/
is_global = FALSE;
do
{
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
if ( is_class )
{
vStringCopy(saveScope, token->scope);
addToScope(token, name->string);
}
else
addContext (name, token);
}
else if ( isKeyword(token, KEYWORD_prototype) )
{
/*
* When we reach the "prototype" tag, we infer:
* "BindAgent" is a class
* "build" is a method
*
* function BindAgent( repeatableIdName, newParentIdName ) {
* }
*
* CASE 1
* Specified function name: "build"
* BindAgent.prototype.build = function( mode ) {
* ignore everything within this function
* }
*
* CASE 2
* Prototype listing
* ValidClassOne.prototype = {
* 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {}
* }
*
*/
makeClassTag (name);
is_class = TRUE;
is_prototype = TRUE;
/*
* There should a ".function_name" next.
*/
readToken (token);
if (isType (token, TOKEN_PERIOD))
{
/*
* Handle CASE 1
*/
readToken (token);
if ( isKeyword(token, KEYWORD_NONE) )
{
vStringCopy(saveScope, token->scope);
addToScope(token, name->string);
makeJsTag (token, JSTAG_METHOD);
/*
* We can read until the end of the block / statement.
* We need to correctly parse any nested blocks, but
* we do NOT want to create any tags based on what is
* within the blocks.
*/
token->ignoreTag = TRUE;
/*
* Find to the end of the statement
*/
findCmdTerm (token);
token->ignoreTag = FALSE;
is_terminated = TRUE;
goto cleanUp;
}
}
else if (isType (token, TOKEN_EQUAL_SIGN))
{
readToken (token);
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* Handle CASE 2
*
* Creates tags for each of these class methods
* ValidClassOne.prototype = {
* 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {}
* }
*/
parseMethods(token, name);
/*
* Find to the end of the statement
*/
findCmdTerm (token);
token->ignoreTag = FALSE;
is_terminated = TRUE;
goto cleanUp;
}
}
}
readToken (token);
} while (isType (token, TOKEN_PERIOD));
}
if ( isType (token, TOKEN_OPEN_PAREN) )
skipArgumentList(token);
if ( isType (token, TOKEN_OPEN_SQUARE) )
skipArrayList(token);
/*
if ( isType (token, TOKEN_OPEN_CURLY) )
{
is_class = parseBlock (token, name);
}
*/
}
if ( isType (token, TOKEN_CLOSE_CURLY) )
{
/*
* Reaching this section without having
* processed an open curly brace indicates
* the statement is most likely not terminated.
*/
is_terminated = FALSE;
goto cleanUp;
}
if ( isType (token, TOKEN_SEMICOLON) )
{
/*
* Only create variables for global scope
*/
if ( token->nestLevel == 0 && is_global )
{
/*
* Handles this syntax:
* var g_var2;
*/
if (isType (token, TOKEN_SEMICOLON))
makeJsTag (name, JSTAG_VARIABLE);
}
/*
* Statement has ended.
* This deals with calls to functions, like:
* alert(..);
*/
goto cleanUp;
}
if ( isType (token, TOKEN_EQUAL_SIGN) )
{
readToken (token);
if ( isKeyword (token, KEYWORD_function) )
{
readToken (token);
if ( isKeyword (token, KEYWORD_NONE) &&
! isType (token, TOKEN_OPEN_PAREN) )
{
/*
* Functions of this format:
* var D2A = function theAdd(a, b)
* {
* return a+b;
* }
* Are really two separate defined functions and
* can be referenced in two ways:
* alert( D2A(1,2) ); // produces 3
* alert( theAdd(1,2) ); // also produces 3
* So it must have two tags:
* D2A
* theAdd
* Save the reference to the name for later use, once
* we have established this is a valid function we will
* create the secondary reference to it.
*/
copyToken(secondary_name, token);
readToken (token);
}
if ( isType (token, TOKEN_OPEN_PAREN) )
skipArgumentList(token);
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* This will be either a function or a class.
* We can only determine this by checking the body
* of the function. If we find a "this." we know
* it is a class, otherwise it is a function.
*/
if ( is_inside_class )
{
makeJsTag (name, JSTAG_METHOD);
if ( vStringLength(secondary_name->string) > 0 )
makeFunctionTag (secondary_name);
parseBlock (token, name);
}
else
{
is_class = parseBlock (token, name);
if ( is_class )
makeClassTag (name);
else
makeFunctionTag (name);
if ( vStringLength(secondary_name->string) > 0 )
makeFunctionTag (secondary_name);
/*
* Find to the end of the statement
*/
goto cleanUp;
}
}
}
else if (isType (token, TOKEN_OPEN_PAREN))
{
/*
* Handle nameless functions
* this.method_name = () {}
*/
skipArgumentList(token);
if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* Nameless functions are only setup as methods.
*/
makeJsTag (name, JSTAG_METHOD);
parseBlock (token, name);
}
}
else if (isType (token, TOKEN_OPEN_CURLY))
{
/*
* Creates tags for each of these class methods
* ValidClassOne.prototype = {
* 'validMethodOne' : function(a,b) {},
* 'validMethodTwo' : function(a,b) {}
* }
*/
parseMethods(token, name);
}
else if (isKeyword (token, KEYWORD_new))
{
readToken (token);
if ( isKeyword (token, KEYWORD_function) ||
isKeyword (token, KEYWORD_capital_function) ||
isKeyword (token, KEYWORD_object) ||
isKeyword (token, KEYWORD_capital_object) )
{
if ( isKeyword (token, KEYWORD_object) ||
isKeyword (token, KEYWORD_capital_object) )
is_class = TRUE;
readToken (token);
if ( isType (token, TOKEN_OPEN_PAREN) )
skipArgumentList(token);
if (isType (token, TOKEN_SEMICOLON))
{
if ( is_class )
{
makeClassTag (name);
} else {
makeFunctionTag (name);
}
}
}
}
else if (isKeyword (token, KEYWORD_NONE))
{
/*
* Only create variables for global scope
*/
if ( token->nestLevel == 0 && is_global )
{
/*
* A pointer can be created to the function.
* If we recognize the function/class name ignore the variable.
* This format looks identical to a variable definition.
* A variable defined outside of a block is considered
* a global variable:
* var g_var1 = 1;
* var g_var2;
* This is not a global variable:
* var g_var = function;
* This is a global variable:
* var g_var = different_var_name;
*/
if ( ! stringListHas(FunctionNames, vStringValue (token->string)) &&
! stringListHas(ClassNames, vStringValue (token->string)) )
{
findCmdTerm (token);
if (isType (token, TOKEN_SEMICOLON))
makeJsTag (name, JSTAG_VARIABLE);
}
}
}
}
findCmdTerm (token);
/*
* Statements can be optionally terminated in the case of
* statement prior to a close curly brace as in the
* document.write line below:
*
* function checkForUpdate() {
* if( 1==1 ) {
* document.write("hello from checkForUpdate<br>")
* }
* return 1;
* }
*/
if (isType (token, TOKEN_CLOSE_CURLY))
is_terminated = FALSE;
cleanUp:
vStringCopy(token->scope, saveScope);
deleteToken (name);
deleteToken (secondary_name);
vStringDelete(saveScope);
return is_terminated;
}
static boolean parseLine (tokenInfo *const token, boolean is_inside_class)
{
boolean is_terminated = TRUE;
/*
* Detect the common statements, if, while, for, do, ...
* This is necessary since the last statement within a block "{}"
* can be optionally terminated.
*
* If the statement is not terminated, we need to tell
* the calling routine to prevent reading an additional token
* looking for the end of the statement.
*/
if (isType(token, TOKEN_KEYWORD))
{
switch (token->keyword)
{
case KEYWORD_for:
case KEYWORD_while:
case KEYWORD_do:
parseLoop (token);
break;
case KEYWORD_if:
case KEYWORD_else:
case KEYWORD_try:
case KEYWORD_catch:
case KEYWORD_finally:
/* Common semantics */
is_terminated = parseIf (token);
break;
case KEYWORD_switch:
parseSwitch (token);
break;
default:
parseStatement (token, is_inside_class);
break;
}
}
else
{
/*
* Special case where single line statements may not be
* SEMICOLON terminated. parseBlock needs to know this
* so that it does not read the next token.
*/
is_terminated = parseStatement (token, is_inside_class);
}
return is_terminated;
}
static void parseJsFile (tokenInfo *const token)
{
do
{
readToken (token);
if (isType(token, TOKEN_KEYWORD))
{
switch (token->keyword)
{
case KEYWORD_function: parseFunction (token); break;
default: parseLine (token, FALSE); break;
}
}
else
{
parseLine (token, FALSE);
}
} while (TRUE);
}
static void initialize (const langType language)
{
Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT);
Lang_js = language;
buildJsKeywordHash ();
}
static void findJsTags (void)
{
tokenInfo *const token = newToken ();
exception_t exception;
ClassNames = stringListNew ();
FunctionNames = stringListNew ();
exception = (exception_t) (setjmp (Exception));
while (exception == ExceptionNone)
parseJsFile (token);
stringListDelete (ClassNames);
stringListDelete (FunctionNames);
ClassNames = NULL;
FunctionNames = NULL;
deleteToken (token);
}
/* Create parser definition stucture */
extern parserDefinition* JavaScriptParser (void)
{
static const char *const extensions [] = { "js", NULL };
parserDefinition *const def = parserNew ("JavaScript");
def->extensions = extensions;
/*
* New definitions for parsing instead of regex
*/
def->kinds = JsKinds;
def->kindCount = KIND_COUNT (JsKinds);
def->parser = findJsTags;
def->initialize = initialize;
return def;
}
/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */