diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index 8438ff7c5..186b7a8ab 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -8,6 +8,7 @@ #include "frontend/TokenStream.h" +#include "mozilla/ArrayUtils.h" #include "mozilla/IntegerTypeTraits.h" #include "mozilla/PodOperations.h" @@ -33,6 +34,7 @@ using namespace js; using namespace js::frontend; +using mozilla::ArrayLength; using mozilla::Maybe; using mozilla::PodAssign; using mozilla::PodCopy; @@ -223,8 +225,13 @@ TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset) // only if lineStartOffsets_.append succeeds, to keep sentinel. // Otherwise return false to tell TokenStream about OOM. uint32_t maxPtr = MAX_PTR; - if (!lineStartOffsets_.append(maxPtr)) + if (!lineStartOffsets_.append(maxPtr)) { + static_assert(mozilla::IsSame::value, + "this function's caller depends on it reporting an " + "error on failure, as TempAllocPolicy ensures"); return false; + } lineStartOffsets_[lineIndex] = lineStartOffset; } else { @@ -554,8 +561,9 @@ TokenStream::advance(size_t position) MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type)); lookahead = 0; - if (flags.hitOOM) - return reportError(JSMSG_OUT_OF_MEMORY); + if (flags.hitOOM) { + return false; + } return true; } @@ -775,7 +783,7 @@ TokenStream::reportErrorNoOffset(unsigned errorNumber, ...) } bool -TokenStream::reportWarning(unsigned errorNumber, ...) +TokenStream::warning(unsigned errorNumber, ...) { va_list args; va_start(args, errorNumber); @@ -806,6 +814,32 @@ TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...) va_end(args); } +void +TokenStream::error(unsigned errorNumber, ...) +{ + va_list args; + va_start(args, errorNumber); +#ifdef DEBUG + bool result = +#endif + reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber, args); + MOZ_ASSERT(!result, "reporting an error returned true?"); + va_end(args); +} + +void +TokenStream::errorAt(uint32_t offset, unsigned errorNumber, ...) +{ + va_list args; + va_start(args, errorNumber); +#ifdef DEBUG + bool result = +#endif + reportCompileErrorNumberVA(offset, JSREPORT_ERROR, errorNumber, args); + MOZ_ASSERT(!result, "reporting an error returned true?"); + va_end(args); +} + // We have encountered a '\': check for a Unicode escape sequence after it. // Return the length of the escape sequence and the character code point (by // value) if we found a Unicode escape sequence. Otherwise, return 0. In both @@ -934,34 +968,49 @@ TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated) bool TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated, - const char* directive, int directiveLength, + const char* directive, uint8_t directiveLength, const char* errorMsgPragma, UniqueTwoByteChars* destination) { MOZ_ASSERT(directiveLength <= 18); char16_t peeked[18]; - int32_t c; if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) { - if (shouldWarnDeprecated && - !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) - return false; + if (shouldWarnDeprecated) { + if (!warning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) + return false; + } skipChars(directiveLength); tokenbuf.clear(); - while ((c = peekChar()) && c != EOF && !unicode::IsSpaceOrBOM2(c)) { - getChar(); + do { + int32_t c; + if (!peekChar(&c)) + return false; + + if (c == EOF || unicode::IsSpaceOrBOM2(c)) + break; + + consumeKnownChar(c); + // Debugging directives can occur in both single- and multi-line // comments. If we're currently inside a multi-line comment, we also // need to recognize multi-line comment terminators. - if (isMultiline && c == '*' && peekChar() == '/') { - ungetChar('*'); - break; + if (isMultiline && c == '*') { + int32_t c2; + if (!peekChar(&c2)) + return false; + + if (c2 == '/') { + ungetChar('*'); + break; + } } + if (!tokenbuf.append(c)) return false; - } + } while (true); if (tokenbuf.empty()) { // The directive's URL was missing, but this is not quite an @@ -993,7 +1042,10 @@ TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated) // developer would like to refer to the source as from the source's actual // URL. - return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11, + static const char sourceURLDirective[] = " sourceURL="; + constexpr uint8_t sourceURLDirectiveLength = ArrayLength(sourceURLDirective) - 1; + return getDirective(isMultiline, shouldWarnDeprecated, + sourceURLDirective, sourceURLDirectiveLength, "sourceURL", &displayURL_); } @@ -1003,7 +1055,10 @@ TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated) // Match comments of the form "//# sourceMappingURL=" or // "/\* //# sourceMappingURL= *\/" - return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18, + static const char sourceMappingURLDirective[] = " sourceMappingURL="; + constexpr uint8_t sourceMappingURLDirectiveLength = ArrayLength(sourceMappingURLDirective) - 1; + return getDirective(isMultiline, shouldWarnDeprecated, + sourceMappingURLDirective, sourceMappingURLDirectiveLength, "sourceMappingURL", &sourceMapURL_); } @@ -1119,8 +1174,10 @@ TokenStream::checkForKeyword(const KeywordInfo* kw, TokenKind* ttp) return true; } - if (kw->tokentype == TOK_RESERVED) - return reportError(JSMSG_RESERVED_ID, kw->chars); + if (kw->tokentype == TOK_RESERVED) { + error(JSMSG_RESERVED_ID, kw->chars); + return false; + } if (kw->tokentype == TOK_STRICT_RESERVED) return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars); @@ -1538,10 +1595,11 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) // grammar. We might not always be so permissive, so we warn // about it. if (c >= '8') { - if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) { + if (!warning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) goto error; - } - goto decimal; // use the decimal scanner for the rest of the number + + // Use the decimal scanner for the rest of the number. + goto decimal; } c = getCharIgnoreEOL(); } @@ -1690,7 +1748,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) case '/': // Look for a single-line comment. if (matchChar('/')) { - c = peekChar(); + if (!peekChar(&c)) + goto error; if (c == '@' || c == '#') { bool shouldWarn = getChar() == '@'; if (!getDirectives(false, shouldWarn)) @@ -1757,7 +1816,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) RegExpFlag reflags = NoFlags; unsigned length = tokenbuf.length() + 1; while (true) { - c = peekChar(); + if (!peekChar(&c)) + goto error; if (c == 'g' && !(reflags & GlobalFlag)) reflags = RegExpFlag(reflags | GlobalFlag); else if (c == 'i' && !(reflags & IgnoreCaseFlag)) @@ -1774,7 +1834,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) length++; } - c = peekChar(); + if (!peekChar(&c)) + goto error; if (JS7_ISLET(c)) { char buf[2] = { '\0', '\0' }; tp->pos.begin += length + 1; @@ -1797,8 +1858,13 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) case '-': if (matchChar('-')) { - if (peekChar() == '>' && !flags.isDirtyLine) + int32_t c2; + if (!peekChar(&c2)) + goto error; + + if (c2 == '>' && !flags.isDirtyLine) goto skipline; + tp->type = TOK_DEC; } else { tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB; @@ -1814,8 +1880,9 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) MOZ_CRASH("should have jumped to |out| or |error|"); out: - if (flags.hitOOM) - return reportError(JSMSG_OUT_OF_MEMORY); + if (flags.hitOOM) { + return false; + } flags.isDirtyLine = true; tp->pos.end = userbuf.offset(); @@ -1831,8 +1898,9 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) return true; error: - if (flags.hitOOM) - return reportError(JSMSG_OUT_OF_MEMORY); + if (flags.hitOOM) { + return false; + } flags.isDirtyLine = true; tp->pos.end = userbuf.offset(); @@ -1850,32 +1918,50 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) } bool -TokenStream::getBracedUnicode(uint32_t* cp) +TokenStream::matchBracedUnicode(bool* matched, uint32_t* cp) { + int32_t c; + if (!peekChar(&c)) + return false; + if (c != '{') { + *matched = false; + return true; + } + consumeKnownChar('{'); + uint32_t start = userbuf.offset(); + bool first = true; - int32_t c; uint32_t code = 0; - while (true) { - c = getCharIgnoreEOL(); - if (c == EOF) + do { + int32_t c = getCharIgnoreEOL(); + if (c == EOF) { + error(JSMSG_MALFORMED_ESCAPE, "Unicode"); return false; + } if (c == '}') { - if (first) + if (first) { + error(JSMSG_MALFORMED_ESCAPE, "Unicode"); return false; + } break; } - if (!JS7_ISHEX(c)) + if (!JS7_ISHEX(c)) { + error(JSMSG_MALFORMED_ESCAPE, "Unicode"); return false; + } code = (code << 4) | JS7_UNHEX(c); - if (code > unicode::NonBMPMax) + if (code > unicode::NonBMPMax) { + errorAt(start, JSMSG_UNICODE_OVERFLOW, "escape sequence"); return false; + } first = false; - } + } while (true); + *matched = true; *cp = code; return true; } @@ -1897,7 +1983,7 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) while ((c = getCharIgnoreEOL()) != untilChar) { if (c == EOF) { ungetCharIgnoreEOL(c); - reportError(JSMSG_UNTERMINATED_STRING); + error(JSMSG_UNTERMINATED_STRING); return false; } @@ -1917,12 +2003,11 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) // Unicode character specification. case 'u': { - if (peekChar() == '{') { - uint32_t code; - if (!getBracedUnicode(&code)) { - reportError(JSMSG_MALFORMED_ESCAPE, "Unicode"); - return false; - } + bool matched; + uint32_t code; + if (!matchBracedUnicode(&matched, &code)) + return false; + if (matched) { MOZ_ASSERT(code <= unicode::NonBMPMax); if (code < unicode::NonBMPMin) { @@ -1945,7 +2030,7 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) c = (c << 4) + JS7_UNHEX(cp[3]); skipChars(4); } else { - reportError(JSMSG_MALFORMED_ESCAPE, "Unicode"); + error(JSMSG_MALFORMED_ESCAPE, "Unicode"); return false; } break; @@ -1958,7 +2043,7 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]); skipChars(2); } else { - reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal"); + error(JSMSG_MALFORMED_ESCAPE, "hexadecimal"); return false; } break; @@ -1969,12 +2054,13 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) if (JS7_ISOCT(c)) { int32_t val = JS7_UNOCT(c); - c = peekChar(); + if (!peekChar(&c)) + return false; // Strict mode code allows only \0, then a non-digit. if (val != 0 || JS7_ISDEC(c)) { if (parsingTemplate) { - reportError(JSMSG_DEPRECATED_OCTAL); + error(JSMSG_DEPRECATED_OCTAL); return false; } if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL)) @@ -1985,7 +2071,8 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) if (JS7_ISOCT(c)) { val = 8 * val + JS7_UNOCT(c); getChar(); - c = peekChar(); + if (!peekChar(&c)) + return false; if (JS7_ISOCT(c)) { int32_t save = val; val = 8 * val + JS7_UNOCT(c); @@ -2003,7 +2090,7 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) } else if (TokenBuf::isRawEOLChar(c)) { if (!parsingTemplate) { ungetCharIgnoreEOL(c); - reportError(JSMSG_UNTERMINATED_STRING); + error(JSMSG_UNTERMINATED_STRING); return false; } if (c == '\r') { diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h index 1705dd021..77be45578 100644 --- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -364,13 +364,21 @@ class MOZ_STACK_CLASS TokenStream // TokenStream-specific error reporters. bool reportError(unsigned errorNumber, ...); bool reportErrorNoOffset(unsigned errorNumber, ...); - bool reportWarning(unsigned errorNumber, ...); + + // Report the given error at the current offset. + void error(unsigned errorNumber, ...); + + // Report the given error at the given offset. + void errorAt(uint32_t offset, unsigned errorNumber, ...); + + // Warn at the current offset. + MOZ_MUST_USE bool warning(unsigned errorNumber, ...); static const uint32_t NoOffset = UINT32_MAX; // General-purpose error reporters. You should avoid calling these - // directly, and instead use the more succinct alternatives (e.g. - // reportError()) in TokenStream, Parser, and BytecodeEmitter. + // directly, and instead use the more succinct alternatives (error(), + // warning(), &c.) in TokenStream, Parser, and BytecodeEmitter. bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, va_list args); bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, @@ -946,7 +954,7 @@ class MOZ_STACK_CLASS TokenStream MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier); - MOZ_MUST_USE bool getBracedUnicode(uint32_t* code); + MOZ_MUST_USE bool matchBracedUnicode(bool* matched, uint32_t* code); MOZ_MUST_USE bool getStringOrTemplateToken(int untilChar, Token** tp); int32_t getChar(); @@ -963,7 +971,7 @@ class MOZ_STACK_CLASS TokenStream MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); MOZ_MUST_USE bool getDirective(bool isMultiline, bool shouldWarnDeprecated, - const char* directive, int directiveLength, + const char* directive, uint8_t directiveLength, const char* errorMsgPragma, UniquePtr* destination); MOZ_MUST_USE bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated); @@ -981,20 +989,25 @@ class MOZ_STACK_CLASS TokenStream MOZ_ASSERT(c == expect); } - int32_t peekChar() { - int32_t c = getChar(); - ungetChar(c); - return c; + MOZ_MUST_USE bool peekChar(int32_t* c) { + *c = getChar(); + ungetChar(*c); + return true; } - void skipChars(int n) { - while (--n >= 0) - getChar(); + void skipChars(uint8_t n) { + while (n-- > 0) { + MOZ_ASSERT(userbuf.hasRawChars()); + mozilla::DebugOnly c = getCharIgnoreEOL(); + MOZ_ASSERT(c != '\n'); + } } - void skipCharsIgnoreEOL(int n) { - while (--n >= 0) + void skipCharsIgnoreEOL(uint8_t n) { + while (n-- > 0) { + MOZ_ASSERT(userbuf.hasRawChars()); getCharIgnoreEOL(); + } } void updateLineInfoForEOL(); diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index ccc6ae3eb..8bd88047a 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -243,10 +243,10 @@ RegExpParser::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, template RegExpTree* -RegExpParser::ReportError(unsigned errorNumber) +RegExpParser::ReportError(unsigned errorNumber, const char* param /* = nullptr */) { gc::AutoSuppressGC suppressGC(ts.context()); - ts.reportError(errorNumber); + ts.reportError(errorNumber, param); return nullptr; } @@ -350,7 +350,7 @@ RegExpParser::ParseBracedHexEscape(widechar* value) } code = (code << 4) | d; if (code > unicode::NonBMPMax) { - ReportError(JSMSG_UNICODE_OVERFLOW); + ReportError(JSMSG_UNICODE_OVERFLOW, "regular expression"); return false; } Advance(); diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h index b5228a86f..0a7e61858 100644 --- a/js/src/irregexp/RegExpParser.h +++ b/js/src/irregexp/RegExpParser.h @@ -211,7 +211,7 @@ class RegExpParser bool ParseBackReferenceIndex(int* index_out); bool ParseClassAtom(char16_t* char_class, widechar *value); - RegExpTree* ReportError(unsigned errorNumber); + RegExpTree* ReportError(unsigned errorNumber, const char* param = nullptr); void Advance(); void Advance(int dist) { next_pos_ += dist - 1; diff --git a/js/src/js.msg b/js/src/js.msg index 50817f50f..495e3bfad 100644 --- a/js/src/js.msg +++ b/js/src/js.msg @@ -502,7 +502,7 @@ MSG_DEF(JSMSG_RANGE_WITH_CLASS_ESCAPE, 0, JSEXN_SYNTAXERR, "character class esca MSG_DEF(JSMSG_RAW_BRACE_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw brace is not allowed in regular expression with unicode flag") MSG_DEF(JSMSG_RAW_BRACKET_IN_REGEP, 0, JSEXN_SYNTAXERR, "raw bracket is not allowed in regular expression with unicode flag") MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parentheses in regular expression") -MSG_DEF(JSMSG_UNICODE_OVERFLOW, 0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression") +MSG_DEF(JSMSG_UNICODE_OVERFLOW, 1, JSEXN_SYNTAXERR, "Unicode codepoint must not be greater than 0x10FFFF in {0}") MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression") MSG_DEF(JSMSG_UNTERM_CLASS, 0, JSEXN_SYNTAXERR, "unterminated character class")