tokenizing assignment operators

This commit is contained in:
Josh Wolfe 2015-12-12 18:17:27 -07:00
parent 2082588201
commit eb1542c102
3 changed files with 231 additions and 66 deletions

View File

@ -88,7 +88,9 @@ AsmInputItem : token(LBracket) token(Symbol) token(RBracket) token(String) token
AsmClobbers: token(Colon) list(token(String), token(Comma)) AsmClobbers: token(Colon) list(token(String), token(Comma))
AssignmentExpression : BoolOrExpression token(Equal) BoolOrExpression | BoolOrExpression AssignmentExpression : BoolOrExpression AssignmentOperator BoolOrExpression | BoolOrExpression
AssignmentOperator : token(Eq) | token(TimesEq) | token(DivEq) | token(ModEq) | token(PlusEq) | token(MinusEq) | token(BitShiftLeftEq) | token(BitShiftRightEq) | token(BitAndEq) | token(BitXorEq) | token(BitOrEq) | token(BoolAndEq) | token(BoolOrEq)
BlockExpression : IfExpression | Block BlockExpression : IfExpression | Block
@ -164,7 +166,7 @@ as
== != < > <= >= == != < > <= >=
&& &&
|| ||
= += -= = *= /= %= += -= <<= >>= &= ^= |= &&= ||=
``` ```
## Literals ## Literals

View File

@ -96,20 +96,28 @@ enum TokenizeState {
TokenizeStateSymbolFirst, TokenizeStateSymbolFirst,
TokenizeStateNumber, TokenizeStateNumber,
TokenizeStateString, TokenizeStateString,
TokenizeStateSawDash, TokenizeStateSawStar,
TokenizeStateSawSlash, TokenizeStateSawSlash,
TokenizeStateSawPercent,
TokenizeStateSawPlus,
TokenizeStateSawDash,
TokenizeStateSawAmpersand,
TokenizeStateSawAmpersandAmpersand,
TokenizeStateSawCaret,
TokenizeStateSawPipe,
TokenizeStateSawPipePipe,
TokenizeStateLineComment, TokenizeStateLineComment,
TokenizeStateMultiLineComment, TokenizeStateMultiLineComment,
TokenizeStateMultiLineCommentSlash, TokenizeStateMultiLineCommentSlash,
TokenizeStateMultiLineCommentStar, TokenizeStateMultiLineCommentStar,
TokenizeStatePipe, TokenizeStateSawEq,
TokenizeStateAmpersand, TokenizeStateSawBang,
TokenizeStateEq, TokenizeStateSawLessThan,
TokenizeStateBang, TokenizeStateSawLessThanLessThan,
TokenizeStateLessThan, TokenizeStateSawGreaterThan,
TokenizeStateGreaterThan, TokenizeStateSawGreaterThanGreaterThan,
TokenizeStateDot, TokenizeStateSawDot,
TokenizeStateDotDot, TokenizeStateSawDotDot,
TokenizeStateError, TokenizeStateError,
}; };
@ -259,14 +267,6 @@ void tokenize(Buf *buf, Tokenization *out) {
begin_token(&t, TokenIdComma); begin_token(&t, TokenIdComma);
end_token(&t); end_token(&t);
break; break;
case '*':
begin_token(&t, TokenIdStar);
end_token(&t);
break;
case '%':
begin_token(&t, TokenIdPercent);
end_token(&t);
break;
case '{': case '{':
begin_token(&t, TokenIdLBrace); begin_token(&t, TokenIdLBrace);
end_token(&t); end_token(&t);
@ -291,9 +291,25 @@ void tokenize(Buf *buf, Tokenization *out) {
begin_token(&t, TokenIdColon); begin_token(&t, TokenIdColon);
end_token(&t); end_token(&t);
break; break;
case '#':
begin_token(&t, TokenIdNumberSign);
end_token(&t);
break;
case '*':
begin_token(&t, TokenIdStar);
t.state = TokenizeStateSawStar;
break;
case '/':
begin_token(&t, TokenIdSlash);
t.state = TokenizeStateSawSlash;
break;
case '%':
begin_token(&t, TokenIdPercent);
t.state = TokenizeStateSawPercent;
break;
case '+': case '+':
begin_token(&t, TokenIdPlus); begin_token(&t, TokenIdPlus);
end_token(&t); t.state = TokenizeStateSawPlus;
break; break;
case '~': case '~':
begin_token(&t, TokenIdTilde); begin_token(&t, TokenIdTilde);
@ -303,54 +319,46 @@ void tokenize(Buf *buf, Tokenization *out) {
begin_token(&t, TokenIdDash); begin_token(&t, TokenIdDash);
t.state = TokenizeStateSawDash; t.state = TokenizeStateSawDash;
break; break;
case '#': case '&':
begin_token(&t, TokenIdNumberSign); begin_token(&t, TokenIdBinAnd);
end_token(&t); t.state = TokenizeStateSawAmpersand;
break; break;
case '^': case '^':
begin_token(&t, TokenIdBinXor); begin_token(&t, TokenIdBinXor);
end_token(&t); t.state = TokenizeStateSawCaret;
break;
case '/':
begin_token(&t, TokenIdSlash);
t.state = TokenizeStateSawSlash;
break; break;
case '|': case '|':
begin_token(&t, TokenIdBinOr); begin_token(&t, TokenIdBinOr);
t.state = TokenizeStatePipe; t.state = TokenizeStateSawPipe;
break;
case '&':
begin_token(&t, TokenIdBinAnd);
t.state = TokenizeStateAmpersand;
break; break;
case '=': case '=':
begin_token(&t, TokenIdEq); begin_token(&t, TokenIdEq);
t.state = TokenizeStateEq; t.state = TokenizeStateSawEq;
break; break;
case '!': case '!':
begin_token(&t, TokenIdBang); begin_token(&t, TokenIdBang);
t.state = TokenizeStateBang; t.state = TokenizeStateSawBang;
break; break;
case '<': case '<':
begin_token(&t, TokenIdCmpLessThan); begin_token(&t, TokenIdCmpLessThan);
t.state = TokenizeStateLessThan; t.state = TokenizeStateSawLessThan;
break; break;
case '>': case '>':
begin_token(&t, TokenIdCmpGreaterThan); begin_token(&t, TokenIdCmpGreaterThan);
t.state = TokenizeStateGreaterThan; t.state = TokenizeStateSawGreaterThan;
break; break;
case '.': case '.':
begin_token(&t, TokenIdDot); begin_token(&t, TokenIdDot);
t.state = TokenizeStateDot; t.state = TokenizeStateSawDot;
break; break;
default: default:
tokenize_error(&t, "invalid character: '%c'", c); tokenize_error(&t, "invalid character: '%c'", c);
} }
break; break;
case TokenizeStateDot: case TokenizeStateSawDot:
switch (c) { switch (c) {
case '.': case '.':
t.state = TokenizeStateDotDot; t.state = TokenizeStateSawDotDot;
t.cur_tok->id = TokenIdEllipsis; t.cur_tok->id = TokenIdEllipsis;
break; break;
default: default:
@ -360,20 +368,17 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
break; break;
case TokenizeStateDotDot: case TokenizeStateSawDotDot:
switch (c) { switch (c) {
case '.': case '.':
t.state = TokenizeStateStart; t.state = TokenizeStateStart;
end_token(&t); end_token(&t);
break; break;
default: default:
t.pos -= 1; tokenize_error(&t, "invalid character: '%c'", c);
end_token(&t);
t.state = TokenizeStateStart;
continue;
} }
break; break;
case TokenizeStateGreaterThan: case TokenizeStateSawGreaterThan:
switch (c) { switch (c) {
case '=': case '=':
t.cur_tok->id = TokenIdCmpGreaterOrEq; t.cur_tok->id = TokenIdCmpGreaterOrEq;
@ -382,8 +387,7 @@ void tokenize(Buf *buf, Tokenization *out) {
break; break;
case '>': case '>':
t.cur_tok->id = TokenIdBitShiftRight; t.cur_tok->id = TokenIdBitShiftRight;
end_token(&t); t.state = TokenizeStateSawGreaterThanGreaterThan;
t.state = TokenizeStateStart;
break; break;
default: default:
t.pos -= 1; t.pos -= 1;
@ -392,7 +396,20 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
break; break;
case TokenizeStateLessThan: case TokenizeStateSawGreaterThanGreaterThan:
switch (c) {
case '=':
t.cur_tok->id = TokenIdBitShiftRightEq;
end_token(&t);
t.state = TokenizeStateStart;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawLessThan:
switch (c) { switch (c) {
case '=': case '=':
t.cur_tok->id = TokenIdCmpLessOrEq; t.cur_tok->id = TokenIdCmpLessOrEq;
@ -400,8 +417,7 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart; t.state = TokenizeStateStart;
case '<': case '<':
t.cur_tok->id = TokenIdBitShiftLeft; t.cur_tok->id = TokenIdBitShiftLeft;
end_token(&t); t.state = TokenizeStateSawLessThanLessThan;
t.state = TokenizeStateStart;
break; break;
default: default:
t.pos -= 1; t.pos -= 1;
@ -410,7 +426,20 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
break; break;
case TokenizeStateBang: case TokenizeStateSawLessThanLessThan:
switch (c) {
case '=':
t.cur_tok->id = TokenIdBitShiftLeftEq;
end_token(&t);
t.state = TokenizeStateStart;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawBang:
switch (c) { switch (c) {
case '=': case '=':
t.cur_tok->id = TokenIdCmpNotEq; t.cur_tok->id = TokenIdCmpNotEq;
@ -424,7 +453,7 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
break; break;
case TokenizeStateEq: case TokenizeStateSawEq:
switch (c) { switch (c) {
case '=': case '=':
t.cur_tok->id = TokenIdCmpEq; t.cur_tok->id = TokenIdCmpEq;
@ -438,10 +467,10 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
break; break;
case TokenizeStateAmpersand: case TokenizeStateSawStar:
switch (c) { switch (c) {
case '&': case '=':
t.cur_tok->id = TokenIdBoolAnd; t.cur_tok->id = TokenIdTimesEq;
end_token(&t); end_token(&t);
t.state = TokenizeStateStart; t.state = TokenizeStateStart;
break; break;
@ -452,10 +481,102 @@ void tokenize(Buf *buf, Tokenization *out) {
continue; continue;
} }
break; break;
case TokenizeStatePipe: case TokenizeStateSawPercent:
switch (c) {
case '=':
t.cur_tok->id = TokenIdModEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawPlus:
switch (c) {
case '=':
t.cur_tok->id = TokenIdPlusEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawAmpersand:
switch (c) {
case '&':
t.cur_tok->id = TokenIdBoolAnd;
t.state = TokenizeStateSawAmpersandAmpersand;
break;
case '=':
t.cur_tok->id = TokenIdBitAndEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawAmpersandAmpersand:
switch (c) {
case '=':
t.cur_tok->id = TokenIdBoolAndEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawCaret:
switch (c) {
case '=':
t.cur_tok->id = TokenIdBitXorEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawPipe:
switch (c) { switch (c) {
case '|': case '|':
t.cur_tok->id = TokenIdBoolOr; t.cur_tok->id = TokenIdBoolOr;
t.state = TokenizeStateSawPipePipe;
break;
case '=':
t.cur_tok->id = TokenIdBitOrEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawPipePipe:
switch (c) {
case '=':
t.cur_tok->id = TokenIdBoolOrEq;
end_token(&t); end_token(&t);
t.state = TokenizeStateStart; t.state = TokenizeStateStart;
break; break;
@ -477,6 +598,11 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateMultiLineComment; t.state = TokenizeStateMultiLineComment;
t.multi_line_comment_count = 1; t.multi_line_comment_count = 1;
break; break;
case '=':
t.cur_tok->id = TokenIdDivEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default: default:
t.pos -= 1; t.pos -= 1;
end_token(&t); end_token(&t);
@ -592,6 +718,11 @@ void tokenize(Buf *buf, Tokenization *out) {
end_token(&t); end_token(&t);
t.state = TokenizeStateStart; t.state = TokenizeStateStart;
break; break;
case '=':
t.cur_tok->id = TokenIdMinusEq;
end_token(&t);
t.state = TokenizeStateStart;
break;
default: default:
t.pos -= 1; t.pos -= 1;
end_token(&t); end_token(&t);
@ -619,18 +750,26 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSymbol: case TokenizeStateSymbol:
case TokenizeStateSymbolFirst: case TokenizeStateSymbolFirst:
case TokenizeStateNumber: case TokenizeStateNumber:
case TokenizeStateSawStar:
case TokenizeStateSawSlash:
case TokenizeStateSawPercent:
case TokenizeStateSawPlus:
case TokenizeStateSawDash: case TokenizeStateSawDash:
case TokenizeStatePipe: case TokenizeStateSawAmpersand:
case TokenizeStateAmpersand: case TokenizeStateSawAmpersandAmpersand:
case TokenizeStateEq: case TokenizeStateSawCaret:
case TokenizeStateBang: case TokenizeStateSawPipe:
case TokenizeStateLessThan: case TokenizeStateSawPipePipe:
case TokenizeStateGreaterThan: case TokenizeStateSawEq:
case TokenizeStateDot: case TokenizeStateSawBang:
case TokenizeStateSawLessThan:
case TokenizeStateSawLessThanLessThan:
case TokenizeStateSawGreaterThan:
case TokenizeStateSawGreaterThanGreaterThan:
case TokenizeStateSawDot:
end_token(&t); end_token(&t);
break; break;
case TokenizeStateSawSlash: case TokenizeStateSawDotDot:
case TokenizeStateDotDot:
tokenize_error(&t, "unexpected EOF"); tokenize_error(&t, "unexpected EOF");
break; break;
case TokenizeStateLineComment: case TokenizeStateLineComment:
@ -695,6 +834,18 @@ static const char * token_name(Token *token) {
case TokenIdBoolOr: return "BoolOr"; case TokenIdBoolOr: return "BoolOr";
case TokenIdBoolAnd: return "BoolAnd"; case TokenIdBoolAnd: return "BoolAnd";
case TokenIdEq: return "Eq"; case TokenIdEq: return "Eq";
case TokenIdTimesEq: return "TimesEq";
case TokenIdDivEq: return "DivEq";
case TokenIdModEq: return "ModEq";
case TokenIdPlusEq: return "PlusEq";
case TokenIdMinusEq: return "MinusEq";
case TokenIdBitShiftLeftEq: return "BitShiftLeftEq";
case TokenIdBitShiftRightEq: return "BitShiftRightEq";
case TokenIdBitAndEq: return "BitAndEq";
case TokenIdBitXorEq: return "BitXorEq";
case TokenIdBitOrEq: return "BitOrEq";
case TokenIdBoolAndEq: return "BoolAndEq";
case TokenIdBoolOrEq: return "BoolOrEq";
case TokenIdBang: return "Bang"; case TokenIdBang: return "Bang";
case TokenIdTilde: return "Tilde"; case TokenIdTilde: return "Tilde";
case TokenIdCmpEq: return "CmpEq"; case TokenIdCmpEq: return "CmpEq";

View File

@ -55,6 +55,18 @@ enum TokenId {
TokenIdBinAnd, TokenIdBinAnd,
TokenIdBinXor, TokenIdBinXor,
TokenIdEq, TokenIdEq,
TokenIdTimesEq,
TokenIdDivEq,
TokenIdModEq,
TokenIdPlusEq,
TokenIdMinusEq,
TokenIdBitShiftLeftEq,
TokenIdBitShiftRightEq,
TokenIdBitAndEq,
TokenIdBitXorEq,
TokenIdBitOrEq,
TokenIdBoolAndEq,
TokenIdBoolOrEq,
TokenIdCmpEq, TokenIdCmpEq,
TokenIdBang, TokenIdBang,
TokenIdTilde, TokenIdTilde,