Fix an issue with the html5 tokenizer and tree builder.

master
Fedor 2019-09-20 13:10:37 +03:00
parent fdc0a37db5
commit 1743745986
6 changed files with 103 additions and 39 deletions

View File

@ -1,5 +1,6 @@
/* /*
* Copyright (c) 2008-2015 Mozilla Foundation * Copyright (c) 2008-2015 Mozilla Foundation
* Copyright (c) 2019 Moonchild Productions
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@ -44,32 +45,66 @@ struct staticJArray {
} }
}; };
template<class T, class L> template <class T, class L>
struct jArray { class autoJArray;
template <class T, class L>
class jArray {
friend class autoJArray<T, L>;
private:
T* arr; T* arr;
public:
L length; L length;
static jArray<T,L> newJArray(L const len) { static jArray<T,L> newJArray(L const len) {
MOZ_ASSERT(len >= 0, "Negative length."); MOZ_ASSERT(len >= 0, "Negative length.");
jArray<T,L> newArray = { new T[size_t(len)], len }; jArray<T,L> newArray = { new T[size_t(len)], len };
return newArray; return newArray;
} }
static jArray<T,L> newFallibleJArray(L const len) { static jArray<T,L> newFallibleJArray(L const len) {
MOZ_ASSERT(len >= 0, "Negative length."); MOZ_ASSERT(len >= 0, "Negative length.");
T* a = new (mozilla::fallible) T[size_t(len)]; T* a = new (mozilla::fallible) T[size_t(len)];
jArray<T,L> newArray = { a, a ? len : 0 }; jArray<T,L> newArray = { a, a ? len : 0 };
return newArray; return newArray;
} }
operator T*() { return arr; }
operator T*() {
return arr;
}
T& operator[] (L const index) { T& operator[] (L const index) {
MOZ_ASSERT(index >= 0, "Array access with negative index."); MOZ_ASSERT(index >= 0, "Array access with negative index.");
MOZ_ASSERT(index < length, "Array index out of bounds."); MOZ_ASSERT(index < length, "Array index out of bounds.");
return arr[index]; return arr[index];
} }
void operator=(staticJArray<T,L>& other) { void operator=(staticJArray<T,L>& other) {
arr = (T*)other.arr; arr = (T*)other.arr;
length = other.length; length = other.length;
} }
};
MOZ_IMPLICIT jArray(decltype(nullptr))
: arr(nullptr)
, length(0)
{
}
jArray()
: arr(nullptr)
, length(0)
{
}
private:
jArray(T* aArr, L aLength)
: arr(aArr)
, length(aLength)
{
}
}; // class jArray
template<class T, class L> template<class T, class L>
class autoJArray { class autoJArray {

View File

@ -1,6 +1,7 @@
/* /*
* Copyright (c) 2005-2007 Henri Sivonen * Copyright (c) 2005-2007 Henri Sivonen
* Copyright (c) 2007-2015 Mozilla Foundation * Copyright (c) 2007-2015 Mozilla Foundation
* Copyright (c) 2019 Moonchild Productions
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
* Foundation, and Opera Software ASA. * Foundation, and Opera Software ASA.
* *
@ -680,6 +681,22 @@ public class Tokenizer implements Locator {
* *
* @param specialTokenizerState * @param specialTokenizerState
* the tokenizer state to set * the tokenizer state to set
*/
public void setState(int specialTokenizerState) {
this.stateSave = specialTokenizerState;
this.endTagExpectation = null;
this.endTagExpectationAsArray = null;
}
// [NOCPP[
/**
* Sets the tokenizer state and the associated element name. This should
* only ever used to put the tokenizer into one of the states that have
* a special end tag expectation. For use from the tokenizer test harness.
*
* @param specialTokenizerState
* the tokenizer state to set
* @param endTagExpectation * @param endTagExpectation
* the expected end tag for transitioning back to normal * the expected end tag for transitioning back to normal
*/ */
@ -695,6 +712,8 @@ public class Tokenizer implements Locator {
endTagExpectationToArray(); endTagExpectationToArray();
} }
// ]NOCPP]
/** /**
* Sets the tokenizer state and the associated element name. This should * Sets the tokenizer state and the associated element name. This should
* only ever used to put the tokenizer into one of the states that have * only ever used to put the tokenizer into one of the states that have
@ -3759,11 +3778,17 @@ public class Tokenizer implements Locator {
c = checkChar(buf, pos); c = checkChar(buf, pos);
/* /*
* ASSERT! when entering this state, set index to 0 and * ASSERT! when entering this state, set index to 0 and
* call clearStrBufBeforeUse() assert (contentModelElement != * call clearStrBufBeforeUse(); Let's implement the above
* null); Let's implement the above without lookahead. * without lookahead. strBuf is the 'temporary buffer'.
* strBuf is the 'temporary buffer'.
*/ */
if (index < endTagExpectationAsArray.length) { if (endTagExpectationAsArray == null) {
tokenHandler.characters(Tokenizer.LT_SOLIDUS,
0, 2);
cstart = pos;
reconsume = true;
state = transition(state, returnState, reconsume, pos);
continue stateloop;
} else if (index < endTagExpectationAsArray.length) {
char e = endTagExpectationAsArray[index]; char e = endTagExpectationAsArray[index];
char folded = c; char folded = c;
if (c >= 'A' && c <= 'Z') { if (c >= 'A' && c <= 'Z') {

View File

@ -1,6 +1,7 @@
/* /*
* Copyright (c) 2007 Henri Sivonen * Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007-2015 Mozilla Foundation * Copyright (c) 2007-2015 Mozilla Foundation
* Copyright (c) 2018-2019 Moonchild Productions
* Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
* Foundation, and Opera Software ASA. * Foundation, and Opera Software ASA.
* *
@ -640,8 +641,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
); );
currentPtr++; currentPtr++;
stack[currentPtr] = node; stack[currentPtr] = node;
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, tokenizer.setState(Tokenizer.DATA);
contextName);
// The frameset-ok flag is set even though <frameset> never // The frameset-ok flag is set even though <frameset> never
// ends up being allowed as HTML frameset in the fragment case. // ends up being allowed as HTML frameset in the fragment case.
mode = FRAMESET_OK; mode = FRAMESET_OK;
@ -671,8 +671,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
); );
currentPtr++; currentPtr++;
stack[currentPtr] = node; stack[currentPtr] = node;
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, tokenizer.setState(Tokenizer.DATA);
contextName);
// The frameset-ok flag is set even though <frameset> never // The frameset-ok flag is set even though <frameset> never
// ends up being allowed as HTML frameset in the fragment case. // ends up being allowed as HTML frameset in the fragment case.
mode = FRAMESET_OK; mode = FRAMESET_OK;
@ -691,23 +690,18 @@ public abstract class TreeBuilder<T> implements TokenHandler,
resetTheInsertionMode(); resetTheInsertionMode();
formPointer = getFormPointerForContext(contextNode); formPointer = getFormPointerForContext(contextNode);
if ("title" == contextName || "textarea" == contextName) { if ("title" == contextName || "textarea" == contextName) {
tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, tokenizer.setState(Tokenizer.RCDATA);
contextName);
} else if ("style" == contextName || "xmp" == contextName } else if ("style" == contextName || "xmp" == contextName
|| "iframe" == contextName || "noembed" == contextName || "iframe" == contextName || "noembed" == contextName
|| "noframes" == contextName || "noframes" == contextName
|| (scriptingEnabled && "noscript" == contextName)) { || (scriptingEnabled && "noscript" == contextName)) {
tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, tokenizer.setState(Tokenizer.RAWTEXT);
contextName);
} else if ("plaintext" == contextName) { } else if ("plaintext" == contextName) {
tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT, tokenizer.setState(Tokenizer.PLAINTEXT);
contextName);
} else if ("script" == contextName) { } else if ("script" == contextName) {
tokenizer.setStateAndEndTagExpectation( tokenizer.setState(Tokenizer.SCRIPT_DATA);
Tokenizer.SCRIPT_DATA, contextName);
} else { } else {
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, tokenizer.setState(Tokenizer.DATA);
contextName);
} }
} }
contextName = null; contextName = null;

View File

@ -1,6 +1,7 @@
/* /*
* Copyright (c) 2005-2007 Henri Sivonen * Copyright (c) 2005-2007 Henri Sivonen
* Copyright (c) 2007-2015 Mozilla Foundation * Copyright (c) 2007-2015 Mozilla Foundation
* Copyright (c) 2019 Moonchild Productions
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
* Foundation, and Opera Software ASA. * Foundation, and Opera Software ASA.
* *
@ -127,15 +128,11 @@ nsHtml5Tokenizer::isViewingXmlSource()
} }
void void
nsHtml5Tokenizer::setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation) nsHtml5Tokenizer::setState(int32_t specialTokenizerState)
{ {
this->stateSave = specialTokenizerState; this->stateSave = specialTokenizerState;
if (specialTokenizerState == NS_HTML5TOKENIZER_DATA) { this->endTagExpectation = nullptr;
return; this->endTagExpectationAsArray = nullptr;
}
autoJArray<char16_t,int32_t> asArray = nsHtml5Portability::newCharArrayFromLocal(endTagExpectation);
this->endTagExpectation = nsHtml5ElementName::elementNameByBuffer(asArray, 0, asArray.length, interner);
endTagExpectationToArray();
} }
void void
@ -2040,7 +2037,13 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
NS_HTML5_BREAK(stateloop); NS_HTML5_BREAK(stateloop);
} }
c = checkChar(buf, pos); c = checkChar(buf, pos);
if (index < endTagExpectationAsArray.length) { if (!endTagExpectationAsArray) {
tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
cstart = pos;
reconsume = true;
state = P::transition(mViewSource, returnState, reconsume, pos);
NS_HTML5_CONTINUE(stateloop);
} else if (index < endTagExpectationAsArray.length) {
char16_t e = endTagExpectationAsArray[index]; char16_t e = endTagExpectationAsArray[index];
char16_t folded = c; char16_t folded = c;
if (c >= 'A' && c <= 'Z') { if (c >= 'A' && c <= 'Z') {

View File

@ -1,6 +1,7 @@
/* /*
* Copyright (c) 2005-2007 Henri Sivonen * Copyright (c) 2005-2007 Henri Sivonen
* Copyright (c) 2007-2015 Mozilla Foundation * Copyright (c) 2007-2015 Mozilla Foundation
* Copyright (c) 2019 Moonchild Productions
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
* Foundation, and Opera Software ASA. * Foundation, and Opera Software ASA.
* *
@ -143,7 +144,7 @@ class nsHtml5Tokenizer
void setInterner(nsHtml5AtomTable* interner); void setInterner(nsHtml5AtomTable* interner);
void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId); void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
bool isViewingXmlSource(); bool isViewingXmlSource();
void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation); void setState(int32_t specialTokenizerState);
void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation); void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
private: private:
void endTagExpectationToArray(); void endTagExpectationToArray();

View File

@ -1,6 +1,7 @@
/* /*
* Copyright (c) 2007 Henri Sivonen * Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007-2015 Mozilla Foundation * Copyright (c) 2007-2015 Mozilla Foundation
* Copyright (c) 2019 Moonchild Productions
* Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
* Foundation, and Opera Software ASA. * Foundation, and Opera Software ASA.
* *
@ -105,7 +106,7 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elementName->camelCaseName, elt); nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elementName->camelCaseName, elt);
currentPtr++; currentPtr++;
stack[currentPtr] = node; stack[currentPtr] = node;
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName); tokenizer->setState(NS_HTML5TOKENIZER_DATA);
mode = NS_HTML5TREE_BUILDER_FRAMESET_OK; mode = NS_HTML5TREE_BUILDER_FRAMESET_OK;
} else if (contextNamespace == kNameSpaceID_MathML) { } else if (contextNamespace == kNameSpaceID_MathML) {
nsHtml5ElementName* elementName = nsHtml5ElementName::ELT_MATH; nsHtml5ElementName* elementName = nsHtml5ElementName::ELT_MATH;
@ -117,7 +118,7 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elt, elementName->name, false); nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elt, elementName->name, false);
currentPtr++; currentPtr++;
stack[currentPtr] = node; stack[currentPtr] = node;
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName); tokenizer->setState(NS_HTML5TOKENIZER_DATA);
mode = NS_HTML5TREE_BUILDER_FRAMESET_OK; mode = NS_HTML5TREE_BUILDER_FRAMESET_OK;
} else { } else {
nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_HTML, elt); nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_HTML, elt);
@ -129,15 +130,20 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
resetTheInsertionMode(); resetTheInsertionMode();
formPointer = getFormPointerForContext(contextNode); formPointer = getFormPointerForContext(contextNode);
if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::textarea == contextName) { if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::textarea == contextName) {
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, contextName); tokenizer->setState(NS_HTML5TOKENIZER_RCDATA);
} else if (nsHtml5Atoms::style == contextName || nsHtml5Atoms::xmp == contextName || nsHtml5Atoms::iframe == contextName || nsHtml5Atoms::noembed == contextName || nsHtml5Atoms::noframes == contextName || (scriptingEnabled && nsHtml5Atoms::noscript == contextName)) { } else if (nsHtml5Atoms::style == contextName ||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, contextName); nsHtml5Atoms::xmp == contextName ||
nsHtml5Atoms::iframe == contextName ||
nsHtml5Atoms::noembed == contextName ||
nsHtml5Atoms::noframes == contextName ||
(scriptingEnabled && nsHtml5Atoms::noscript == contextName)) {
tokenizer->setState(NS_HTML5TOKENIZER_RAWTEXT);
} else if (nsHtml5Atoms::plaintext == contextName) { } else if (nsHtml5Atoms::plaintext == contextName) {
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_PLAINTEXT, contextName); tokenizer->setState(NS_HTML5TOKENIZER_PLAINTEXT);
} else if (nsHtml5Atoms::script == contextName) { } else if (nsHtml5Atoms::script == contextName) {
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, contextName); tokenizer->setState(NS_HTML5TOKENIZER_SCRIPT_DATA);
} else { } else {
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName); tokenizer->setState(NS_HTML5TOKENIZER_DATA);
} }
} }
contextName = nullptr; contextName = nullptr;