geany/tagmanager/objc.c

1143 lines
21 KiB
C
Raw Normal View History

/*
* Copyright (c) 2010, Vincent Berthoux
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
*
* This module contains functions for generating tags for Objective C
* language files.
*/
/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include <string.h>
#include "keyword.h"
#include "entry.h"
#include "options.h"
#include "read.h"
#include "vstring.h"
/* To get rid of unused parameter warning in
* -Wextra */
#ifdef UNUSED
#elif defined(__GNUC__)
# define UNUSED(x) UNUSED_ ## x __attribute__((unused))
#elif defined(__LCLINT__)
# define UNUSED(x) /*@unused@*/ x
#else
# define UNUSED(x) x
#endif
typedef enum {
K_INTERFACE,
K_IMPLEMENTATION,
K_PROTOCOL,
K_METHOD,
K_CLASSMETHOD,
K_VAR,
K_FIELD,
K_FUNCTION,
K_PROPERTY,
K_TYPEDEF,
K_STRUCT,
K_ENUM,
K_MACRO
} objcKind;
static kindOption ObjcKinds[] = {
{TRUE, 'i', "interface", "class interface"},
{TRUE, 'I', "implementation", "class implementation"},
{TRUE, 'p', "protocol", "Protocol"},
{TRUE, 'm', "method", "Object's method"},
{TRUE, 'c', "class", "Class' method"},
{TRUE, 'v', "var", "Global variable"},
{TRUE, 'F', "field", "Object field"},
{TRUE, 'f', "function", "A function"},
{TRUE, 'p', "property", "A property"},
{TRUE, 't', "typedef", "A type alias"},
{TRUE, 's', "struct", "A type structure"},
{TRUE, 'e', "enum", "An enumeration"},
{TRUE, 'M', "macro", "A preprocessor macro"},
};
typedef enum {
ObjcTYPEDEF,
ObjcSTRUCT,
ObjcENUM,
ObjcIMPLEMENTATION,
ObjcINTERFACE,
ObjcPROTOCOL,
ObjcENCODE,
ObjcSYNCHRONIZED,
ObjcSELECTOR,
ObjcPROPERTY,
ObjcEND,
ObjcDEFS,
ObjcCLASS,
ObjcPRIVATE,
ObjcPACKAGE,
ObjcPUBLIC,
ObjcPROTECTED,
ObjcSYNTHESIZE,
ObjcDYNAMIC,
ObjcOPTIONAL,
ObjcREQUIRED,
ObjcSTRING,
ObjcIDENTIFIER,
Tok_COMA, /* ',' */
Tok_PLUS, /* '+' */
Tok_MINUS, /* '-' */
Tok_PARL, /* '(' */
Tok_PARR, /* ')' */
Tok_CurlL, /* '{' */
Tok_CurlR, /* '}' */
Tok_SQUAREL, /* '[' */
Tok_SQUARER, /* ']' */
Tok_semi, /* ';' */
Tok_dpoint, /* ':' */
Tok_Sharp, /* '#' */
Tok_Backslash, /* '\\' */
Tok_EOL, /* '\r''\n' */
Tok_any,
Tok_EOF /* END of file */
} objcKeyword;
typedef objcKeyword objcToken;
typedef struct sOBjcKeywordDesc {
const char *name;
objcKeyword id;
} objcKeywordDesc;
static const objcKeywordDesc objcKeywordTable[] = {
{"typedef", ObjcTYPEDEF},
{"struct", ObjcSTRUCT},
{"enum", ObjcENUM},
{"@implementation", ObjcIMPLEMENTATION},
{"@interface", ObjcINTERFACE},
{"@protocol", ObjcPROTOCOL},
{"@encode", ObjcENCODE},
{"@property", ObjcPROPERTY},
{"@synchronized", ObjcSYNCHRONIZED},
{"@selector", ObjcSELECTOR},
{"@end", ObjcEND},
{"@defs", ObjcDEFS},
{"@class", ObjcCLASS},
{"@private", ObjcPRIVATE},
{"@package", ObjcPACKAGE},
{"@public", ObjcPUBLIC},
{"@protected", ObjcPROTECTED},
{"@synthesize", ObjcSYNTHESIZE},
{"@dynamic", ObjcDYNAMIC},
{"@optional", ObjcOPTIONAL},
{"@required", ObjcREQUIRED},
};
static langType Lang_ObjectiveC;
/*//////////////////////////////////////////////////////////////////
//// lexingInit */
typedef struct _lexingState {
vString *name; /* current parsed identifier/operator */
const unsigned char *cp; /* position in stream */
} lexingState;
static void initKeywordHash (void)
{
const size_t count = sizeof (objcKeywordTable) / sizeof (objcKeywordDesc);
size_t i;
for (i = 0; i < count; ++i)
{
addKeyword (objcKeywordTable[i].name, Lang_ObjectiveC,
(int) objcKeywordTable[i].id);
}
}
/*//////////////////////////////////////////////////////////////////////
//// Lexing */
static boolean isNum (char c)
{
return c >= '0' && c <= '9';
}
static boolean isLowerAlpha (char c)
{
return c >= 'a' && c <= 'z';
}
static boolean isUpperAlpha (char c)
{
return c >= 'A' && c <= 'Z';
}
static boolean isAlpha (char c)
{
return isLowerAlpha (c) || isUpperAlpha (c);
}
static boolean isIdent (char c)
{
return isNum (c) || isAlpha (c) || c == '_';
}
static boolean isSpace (char c)
{
return c == ' ' || c == '\t';
}
/* return true if it end with an end of line */
static void eatWhiteSpace (lexingState * st)
{
const unsigned char *cp = st->cp;
while (isSpace (*cp))
cp++;
st->cp = cp;
}
static void eatString (lexingState * st)
{
boolean lastIsBackSlash = FALSE;
boolean unfinished = TRUE;
const unsigned char *c = st->cp + 1;
while (unfinished)
{
/* end of line should never happen.
* we tolerate it */
if (c == NULL || c[0] == '\0')
break;
else if (*c == '"' && !lastIsBackSlash)
unfinished = FALSE;
else
lastIsBackSlash = *c == '\\';
c++;
}
st->cp = c;
}
static void eatComment (lexingState * st)
{
boolean unfinished = TRUE;
boolean lastIsStar = FALSE;
const unsigned char *c = st->cp + 2;
while (unfinished)
{
/* we've reached the end of the line..
* so we have to reload a line... */
if (c == NULL || *c == '\0')
{
st->cp = fileReadLine ();
/* WOOPS... no more input...
* we return, next lexing read
* will be null and ok */
if (st->cp == NULL)
return;
c = st->cp;
}
/* we've reached the end of the comment */
else if (*c == '/' && lastIsStar)
unfinished = FALSE;
else
{
lastIsStar = '*' == *c;
c++;
}
}
st->cp = c;
}
static void readIdentifier (lexingState * st)
{
const unsigned char *p;
vStringClear (st->name);
/* first char is a simple letter */
if (isAlpha (*st->cp) || *st->cp == '_')
vStringPut (st->name, (int) *st->cp);
/* Go till you get identifier chars */
for (p = st->cp + 1; isIdent (*p); p++)
vStringPut (st->name, (int) *p);
st->cp = p;
vStringTerminate (st->name);
}
/* read the @something directives */
static void readIdentifierObjcDirective (lexingState * st)
{
const unsigned char *p;
vStringClear (st->name);
/* first char is a simple letter */
if (*st->cp == '@')
vStringPut (st->name, (int) *st->cp);
/* Go till you get identifier chars */
for (p = st->cp + 1; isIdent (*p); p++)
vStringPut (st->name, (int) *p);
st->cp = p;
vStringTerminate (st->name);
}
/* The lexer is in charge of reading the file.
* Some of sub-lexer (like eatComment) also read file.
* lexing is finished when the lexer return Tok_EOF */
static objcKeyword lex (lexingState * st)
{
int retType;
/* handling data input here */
while (st->cp == NULL || st->cp[0] == '\0')
{
st->cp = fileReadLine ();
if (st->cp == NULL)
return Tok_EOF;
return Tok_EOL;
}
if (isAlpha (*st->cp))
{
readIdentifier (st);
retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC);
if (retType == -1) /* If it's not a keyword */
{
return ObjcIDENTIFIER;
}
else
{
return retType;
}
}
else if (*st->cp == '@')
{
readIdentifierObjcDirective (st);
retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC);
if (retType == -1) /* If it's not a keyword */
{
return Tok_any;
}
else
{
return retType;
}
}
else if (isSpace (*st->cp))
{
eatWhiteSpace (st);
return lex (st);
}
else
switch (*st->cp)
{
case '(':
st->cp++;
return Tok_PARL;
case '\\':
st->cp++;
return Tok_Backslash;
case '#':
st->cp++;
return Tok_Sharp;
case '/':
if (st->cp[1] == '*') /* ergl, a comment */
{
eatComment (st);
return lex (st);
}
else if (st->cp[1] == '/')
{
st->cp = NULL;
return lex (st);
}
else
{
st->cp++;
return Tok_any;
}
break;
case ')':
st->cp++;
return Tok_PARR;
case '{':
st->cp++;
return Tok_CurlL;
case '}':
st->cp++;
return Tok_CurlR;
case '[':
st->cp++;
return Tok_SQUAREL;
case ']':
st->cp++;
return Tok_SQUARER;
case ',':
st->cp++;
return Tok_COMA;
case ';':
st->cp++;
return Tok_semi;
case ':':
st->cp++;
return Tok_dpoint;
case '"':
eatString (st);
return Tok_any;
case '+':
st->cp++;
return Tok_PLUS;
case '-':
st->cp++;
return Tok_MINUS;
default:
st->cp++;
break;
}
/* default return if nothing is recognized,
* shouldn't happen, but at least, it will
* be handled without destroying the parsing. */
return Tok_any;
}
/*//////////////////////////////////////////////////////////////////////
//// Parsing */
typedef void (*parseNext) (vString * const ident, objcToken what);
/********** Helpers */
/* This variable hold the 'parser' which is going to
* handle the next token */
parseNext toDoNext;
/* Special variable used by parser eater to
* determine which action to put after their
* job is finished. */
parseNext comeAfter;
/* Used by some parsers detecting certain token
* to revert to previous parser. */
parseNext fallback;
/********** Grammar */
static void globalScope (vString * const ident, objcToken what);
static void parseMethods (vString * const ident, objcToken what);
static void parseImplemMethods (vString * const ident, objcToken what);
static vString *tempName = NULL;
static vString *parentName = NULL;
static objcKind parentType = K_INTERFACE;
/* used to prepare tag for OCaml, just in case their is a need to
* add additional information to the tag. */
static void prepareTag (tagEntryInfo * tag, vString const *name, objcKind kind)
{
initTagEntry (tag, vStringValue (name));
tag->kindName = ObjcKinds[kind].name;
tag->kind = ObjcKinds[kind].letter;
if (parentName != NULL)
{
tag->extensionFields.scope[0] = ObjcKinds[parentType].name;
tag->extensionFields.scope[1] = vStringValue (parentName);
}
}
void pushEnclosingContext (const vString * parent, objcKind type)
{
vStringCopy (parentName, parent);
parentType = type;
}
void popEnclosingContext (void)
{
vStringClear (parentName);
}
/* Used to centralise tag creation, and be able to add
* more information to it in the future */
static void addTag (vString * const ident, int kind)
{
tagEntryInfo toCreate;
prepareTag (&toCreate, ident, kind);
makeTagEntry (&toCreate);
}
objcToken waitedToken, fallBackToken;
/* Ignore everything till waitedToken and jump to comeAfter.
* If the "end" keyword is encountered break, doesn't remember
* why though. */
static void tillToken (vString * const UNUSED (ident), objcToken what)
{
if (what == waitedToken)
toDoNext = comeAfter;
}
static void tillTokenOrFallBack (vString * const UNUSED (ident), objcToken what)
{
if (what == waitedToken)
toDoNext = comeAfter;
else if (what == fallBackToken)
{
toDoNext = fallback;
}
}
static void ignoreBalanced (vString * const UNUSED (ident), objcToken what)
{
static int count = 0;
switch (what)
{
case Tok_PARL:
case Tok_CurlL:
case Tok_SQUAREL:
count++;
break;
case Tok_PARR:
case Tok_CurlR:
case Tok_SQUARER:
count--;
break;
default:
/* don't care */
break;
}
if (count == 0)
toDoNext = comeAfter;
}
static void parseFields (vString * const ident, objcToken what)
{
switch (what)
{
case Tok_CurlR:
toDoNext = &parseMethods;
break;
case Tok_SQUAREL:
case Tok_PARL:
toDoNext = &ignoreBalanced;
comeAfter = &parseFields;
break;
/* we got an identifier, keep track of it */
case ObjcIDENTIFIER:
vStringCopy (tempName, ident);
break;
/* our last kept identifier must be our variable name =) */
case Tok_semi:
addTag (tempName, K_FIELD);
vStringClear (tempName);
break;
default:
/* NOTHING */
break;
}
}
objcKind methodKind;
static vString *fullMethodName;
static vString *prevIdent;
static void parseMethodsName (vString * const ident, objcToken what)
{
switch (what)
{
case Tok_PARL:
toDoNext = &tillToken;
comeAfter = &parseMethodsName;
waitedToken = Tok_PARR;
break;
case Tok_dpoint:
vStringCat (fullMethodName, prevIdent);
vStringCatS (fullMethodName, ":");
vStringClear (prevIdent);
break;
case ObjcIDENTIFIER:
vStringCopy (prevIdent, ident);
break;
case Tok_CurlL:
case Tok_semi:
/* method name is not simple */
if (vStringLength (fullMethodName) != '\0')
{
addTag (fullMethodName, methodKind);
vStringClear (fullMethodName);
}
else
addTag (prevIdent, methodKind);
toDoNext = &parseMethods;
parseImplemMethods (ident, what);
vStringClear (prevIdent);
break;
default:
break;
}
}
static void parseMethodsImplemName (vString * const ident, objcToken what)
{
switch (what)
{
case Tok_PARL:
toDoNext = &tillToken;
comeAfter = &parseMethodsImplemName;
waitedToken = Tok_PARR;
break;
case Tok_dpoint:
vStringCat (fullMethodName, prevIdent);
vStringCatS (fullMethodName, ":");
vStringClear (prevIdent);
break;
case ObjcIDENTIFIER:
vStringCopy (prevIdent, ident);
break;
case Tok_CurlL:
case Tok_semi:
/* method name is not simple */
if (vStringLength (fullMethodName) != '\0')
{
addTag (fullMethodName, methodKind);
vStringClear (fullMethodName);
}
else
addTag (prevIdent, methodKind);
toDoNext = &parseImplemMethods;
parseImplemMethods (ident, what);
vStringClear (prevIdent);
break;
default:
break;
}
}
static void parseImplemMethods (vString * const ident, objcToken what)
{
switch (what)
{
case Tok_PLUS: /* + */
toDoNext = &parseMethodsImplemName;
methodKind = K_CLASSMETHOD;
break;
case Tok_MINUS: /* - */
toDoNext = &parseMethodsImplemName;
methodKind = K_METHOD;
break;
case ObjcEND: /* @end */
popEnclosingContext ();
toDoNext = &globalScope;
break;
case Tok_CurlL: /* { */
toDoNext = &ignoreBalanced;
ignoreBalanced (ident, what);
comeAfter = &parseImplemMethods;
break;
default:
break;
}
}
static void parseProperty (vString * const ident, objcToken what)
{
switch (what)
{
case Tok_PARL:
toDoNext = &tillToken;
comeAfter = &parseProperty;
waitedToken = Tok_PARR;
break;
/* we got an identifier, keep track of it */
case ObjcIDENTIFIER:
vStringCopy (tempName, ident);
break;
/* our last kept identifier must be our variable name =) */
case Tok_semi:
addTag (tempName, K_PROPERTY);
vStringClear (tempName);
break;
default:
break;
}
}
static void parseMethods (vString * const UNUSED (ident), objcToken what)
{
switch (what)
{
case Tok_PLUS: /* + */
toDoNext = &parseMethodsName;
methodKind = K_CLASSMETHOD;
break;
case Tok_MINUS: /* - */
toDoNext = &parseMethodsName;
methodKind = K_METHOD;
break;
case ObjcPROPERTY:
toDoNext = &parseProperty;
break;
case ObjcEND: /* @end */
popEnclosingContext ();
toDoNext = &globalScope;
break;
case Tok_CurlL: /* { */
toDoNext = &parseFields;
break;
default:
break;
}
}
static void parseProtocol (vString * const ident, objcToken what)
{
if (what == ObjcIDENTIFIER)
{
pushEnclosingContext (ident, K_PROTOCOL);
addTag (ident, K_PROTOCOL);
}
toDoNext = &parseMethods;
}
static void parseImplementation (vString * const ident, objcToken what)
{
if (what == ObjcIDENTIFIER)
{
addTag (ident, K_IMPLEMENTATION);
pushEnclosingContext (ident, K_IMPLEMENTATION);
}
toDoNext = &parseImplemMethods;
}
static void parseInterface (vString * const ident, objcToken what)
{
if (what == ObjcIDENTIFIER)
{
addTag (ident, K_INTERFACE);
pushEnclosingContext (ident, K_INTERFACE);
}
toDoNext = &parseMethods;
}
static void parseStructMembers (vString * const ident, objcToken what)
{
static parseNext prev = NULL;
if (prev != NULL)
{
comeAfter = prev;
prev = NULL;
}
switch (what)
{
case ObjcIDENTIFIER:
vStringCopy (tempName, ident);
break;
case Tok_semi: /* ';' */
addTag (tempName, K_FIELD);
vStringClear (tempName);
break;
/* some types are complex, the only one
* we will loose is the function type. */
case Tok_CurlL: /* '{' */
case Tok_PARL: /* '(' */
case Tok_SQUAREL: /* '[' */
toDoNext = &ignoreBalanced;
prev = comeAfter;
comeAfter = &parseStructMembers;
ignoreBalanced (ident, what);
break;
case Tok_CurlR:
toDoNext = comeAfter;
break;
default:
/* don't care */
break;
}
}
/* Called just after the struct keyword */
static void parseStruct (vString * const ident, objcToken what)
{
static boolean gotName = FALSE;
switch (what)
{
case ObjcIDENTIFIER:
if (!gotName)
{
addTag (ident, K_STRUCT);
pushEnclosingContext (ident, K_STRUCT);
gotName = TRUE;
}
else
{
gotName = FALSE;
popEnclosingContext ();
toDoNext = comeAfter;
comeAfter (ident, what);
}
break;
case Tok_CurlL:
toDoNext = &parseStructMembers;
break;
/* maybe it was just a forward declaration
* in which case, we pop the context */
case Tok_semi:
if (gotName)
popEnclosingContext ();
toDoNext = comeAfter;
comeAfter (ident, what);
break;
default:
/* we don't care */
break;
}
}
/* Parse enumeration members, ignoring potential initialization */
static void parseEnumFields (vString * const ident, objcToken what)
{
static parseNext prev = NULL;
if (prev != NULL)
{
comeAfter = prev;
prev = NULL;
}
switch (what)
{
case ObjcIDENTIFIER:
addTag (ident, K_ENUM);
prev = comeAfter;
waitedToken = Tok_COMA;
/* last item might not have a coma */
fallBackToken = Tok_CurlR;
fallback = comeAfter;
comeAfter = parseEnumFields;
toDoNext = &tillTokenOrFallBack;
break;
case Tok_CurlR:
toDoNext = comeAfter;
popEnclosingContext ();
break;
default:
/* don't care */
break;
}
}
/* parse enum ... { ... */
static void parseEnum (vString * const ident, objcToken what)
{
static boolean named = FALSE;
switch (what)
{
case ObjcIDENTIFIER:
if (!named)
{
addTag (ident, K_ENUM);
pushEnclosingContext (ident, K_ENUM);
named = TRUE;
}
else
{
named = FALSE;
popEnclosingContext ();
toDoNext = comeAfter;
comeAfter (ident, what);
}
break;
case Tok_CurlL: /* '{' */
toDoNext = &parseEnumFields;
named = FALSE;
break;
case Tok_semi: /* ';' */
if (named)
popEnclosingContext ();
toDoNext = comeAfter;
comeAfter (ident, what);
break;
default:
/* don't care */
break;
}
}
/* Parse something like
* typedef .... ident ;
* ignoring the defined type but in the case of struct,
* in which case struct are parsed.
*/
static void parseTypedef (vString * const ident, objcToken what)
{
switch (what)
{
case ObjcSTRUCT:
toDoNext = &parseStruct;
comeAfter = &parseTypedef;
break;
case ObjcENUM:
toDoNext = &parseEnum;
comeAfter = &parseTypedef;
break;
case ObjcIDENTIFIER:
vStringCopy (tempName, ident);
break;
case Tok_semi: /* ';' */
addTag (tempName, K_TYPEDEF);
vStringClear (tempName);
toDoNext = &globalScope;
break;
default:
/* we don't care */
break;
}
}
static void ignorePreprocStuff (vString * const UNUSED (ident), objcToken what)
{
static boolean escaped = FALSE;
switch (what)
{
case Tok_Backslash:
escaped = TRUE;
break;
case Tok_EOL:
if (escaped)
{
escaped = FALSE;
}
else
{
toDoNext = &globalScope;
}
break;
default:
escaped = FALSE;
break;
}
}
static void parseMacroName (vString * const ident, objcToken what)
{
if (what == ObjcIDENTIFIER)
addTag (ident, K_MACRO);
toDoNext = &ignorePreprocStuff;
}
static void parsePreproc (vString * const ident, objcToken what)
{
switch (what)
{
case ObjcIDENTIFIER:
if (strcmp (vStringValue (ident), "define") == 0)
toDoNext = &parseMacroName;
else
toDoNext = &ignorePreprocStuff;
break;
default:
toDoNext = &ignorePreprocStuff;
break;
}
}
/* Handle the "strong" top levels, all 'big' declarations
* happen here */
static void globalScope (vString * const ident, objcToken what)
{
switch (what)
{
case Tok_Sharp:
toDoNext = &parsePreproc;
break;
case ObjcSTRUCT:
toDoNext = &parseStruct;
comeAfter = &globalScope;
break;
case ObjcIDENTIFIER:
/* we keep track of the identifier if we
* come across a function. */
vStringCopy (tempName, ident);
break;
case Tok_PARL:
/* if we find an opening parenthesis it means we
* found a function (or a macro...) */
addTag (tempName, K_FUNCTION);
vStringClear (tempName);
comeAfter = &globalScope;
toDoNext = &ignoreBalanced;
ignoreBalanced (ident, what);
break;
case ObjcINTERFACE:
toDoNext = &parseInterface;
break;
case ObjcIMPLEMENTATION:
toDoNext = &parseImplementation;
break;
case ObjcPROTOCOL:
toDoNext = &parseProtocol;
break;
case ObjcTYPEDEF:
toDoNext = parseTypedef;
comeAfter = &globalScope;
break;
case Tok_CurlL:
comeAfter = &globalScope;
toDoNext = &ignoreBalanced;
ignoreBalanced (ident, what);
break;
case ObjcEND:
case ObjcPUBLIC:
case ObjcPROTECTED:
case ObjcPRIVATE:
default:
/* we don't care */
break;
}
}
/*////////////////////////////////////////////////////////////////
//// Deal with the system */
static void findObjcTags (void)
{
vString *name = vStringNew ();
lexingState st;
objcToken tok;
parentName = vStringNew ();
tempName = vStringNew ();
fullMethodName = vStringNew ();
prevIdent = vStringNew ();
st.name = vStringNew ();
st.cp = fileReadLine ();
toDoNext = &globalScope;
tok = lex (&st);
while (tok != Tok_EOF)
{
(*toDoNext) (st.name, tok);
tok = lex (&st);
}
vStringDelete (name);
vStringDelete (parentName);
vStringDelete (tempName);
vStringDelete (fullMethodName);
vStringDelete (prevIdent);
parentName = NULL;
tempName = NULL;
prevIdent = NULL;
fullMethodName = NULL;
}
static void objcInitialize (const langType language)
{
Lang_ObjectiveC = language;
initKeywordHash ();
}
extern parserDefinition *ObjcParser (void)
{
static const char *const extensions[] = { "m", "h", NULL };
parserDefinition *def = parserNew ("ObjectiveC");
def->kinds = ObjcKinds;
def->kindCount = KIND_COUNT (ObjcKinds);
def->extensions = extensions;
def->parser = findObjcTags;
def->initialize = objcInitialize;
return def;
}