Fix an issue with the html5 tokenizer and tree builder.
parent
fdc0a37db5
commit
1743745986
|
@ -1,5 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2015 Mozilla Foundation
|
* Copyright (c) 2008-2015 Mozilla Foundation
|
||||||
|
* Copyright (c) 2019 Moonchild Productions
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
@ -44,32 +45,66 @@ struct staticJArray {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class T, class L>
|
template <class T, class L>
|
||||||
struct jArray {
|
class autoJArray;
|
||||||
|
|
||||||
|
template <class T, class L>
|
||||||
|
class jArray {
|
||||||
|
friend class autoJArray<T, L>;
|
||||||
|
|
||||||
|
private:
|
||||||
T* arr;
|
T* arr;
|
||||||
|
|
||||||
|
public:
|
||||||
L length;
|
L length;
|
||||||
|
|
||||||
static jArray<T,L> newJArray(L const len) {
|
static jArray<T,L> newJArray(L const len) {
|
||||||
MOZ_ASSERT(len >= 0, "Negative length.");
|
MOZ_ASSERT(len >= 0, "Negative length.");
|
||||||
jArray<T,L> newArray = { new T[size_t(len)], len };
|
jArray<T,L> newArray = { new T[size_t(len)], len };
|
||||||
return newArray;
|
return newArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
static jArray<T,L> newFallibleJArray(L const len) {
|
static jArray<T,L> newFallibleJArray(L const len) {
|
||||||
MOZ_ASSERT(len >= 0, "Negative length.");
|
MOZ_ASSERT(len >= 0, "Negative length.");
|
||||||
T* a = new (mozilla::fallible) T[size_t(len)];
|
T* a = new (mozilla::fallible) T[size_t(len)];
|
||||||
jArray<T,L> newArray = { a, a ? len : 0 };
|
jArray<T,L> newArray = { a, a ? len : 0 };
|
||||||
return newArray;
|
return newArray;
|
||||||
}
|
}
|
||||||
operator T*() { return arr; }
|
|
||||||
|
operator T*() {
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
|
||||||
T& operator[] (L const index) {
|
T& operator[] (L const index) {
|
||||||
MOZ_ASSERT(index >= 0, "Array access with negative index.");
|
MOZ_ASSERT(index >= 0, "Array access with negative index.");
|
||||||
MOZ_ASSERT(index < length, "Array index out of bounds.");
|
MOZ_ASSERT(index < length, "Array index out of bounds.");
|
||||||
return arr[index];
|
return arr[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator=(staticJArray<T,L>& other) {
|
void operator=(staticJArray<T,L>& other) {
|
||||||
arr = (T*)other.arr;
|
arr = (T*)other.arr;
|
||||||
length = other.length;
|
length = other.length;
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
MOZ_IMPLICIT jArray(decltype(nullptr))
|
||||||
|
: arr(nullptr)
|
||||||
|
, length(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
jArray()
|
||||||
|
: arr(nullptr)
|
||||||
|
, length(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
jArray(T* aArr, L aLength)
|
||||||
|
: arr(aArr)
|
||||||
|
, length(aLength)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}; // class jArray
|
||||||
|
|
||||||
template<class T, class L>
|
template<class T, class L>
|
||||||
class autoJArray {
|
class autoJArray {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005-2007 Henri Sivonen
|
* Copyright (c) 2005-2007 Henri Sivonen
|
||||||
* Copyright (c) 2007-2015 Mozilla Foundation
|
* Copyright (c) 2007-2015 Mozilla Foundation
|
||||||
|
* Copyright (c) 2019 Moonchild Productions
|
||||||
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
|
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
|
||||||
* Foundation, and Opera Software ASA.
|
* Foundation, and Opera Software ASA.
|
||||||
*
|
*
|
||||||
|
@ -680,6 +681,22 @@ public class Tokenizer implements Locator {
|
||||||
*
|
*
|
||||||
* @param specialTokenizerState
|
* @param specialTokenizerState
|
||||||
* the tokenizer state to set
|
* the tokenizer state to set
|
||||||
|
*/
|
||||||
|
public void setState(int specialTokenizerState) {
|
||||||
|
this.stateSave = specialTokenizerState;
|
||||||
|
this.endTagExpectation = null;
|
||||||
|
this.endTagExpectationAsArray = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// [NOCPP[
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the tokenizer state and the associated element name. This should
|
||||||
|
* only ever used to put the tokenizer into one of the states that have
|
||||||
|
* a special end tag expectation. For use from the tokenizer test harness.
|
||||||
|
*
|
||||||
|
* @param specialTokenizerState
|
||||||
|
* the tokenizer state to set
|
||||||
* @param endTagExpectation
|
* @param endTagExpectation
|
||||||
* the expected end tag for transitioning back to normal
|
* the expected end tag for transitioning back to normal
|
||||||
*/
|
*/
|
||||||
|
@ -695,6 +712,8 @@ public class Tokenizer implements Locator {
|
||||||
endTagExpectationToArray();
|
endTagExpectationToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ]NOCPP]
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the tokenizer state and the associated element name. This should
|
* Sets the tokenizer state and the associated element name. This should
|
||||||
* only ever used to put the tokenizer into one of the states that have
|
* only ever used to put the tokenizer into one of the states that have
|
||||||
|
@ -3759,11 +3778,17 @@ public class Tokenizer implements Locator {
|
||||||
c = checkChar(buf, pos);
|
c = checkChar(buf, pos);
|
||||||
/*
|
/*
|
||||||
* ASSERT! when entering this state, set index to 0 and
|
* ASSERT! when entering this state, set index to 0 and
|
||||||
* call clearStrBufBeforeUse() assert (contentModelElement !=
|
* call clearStrBufBeforeUse(); Let's implement the above
|
||||||
* null); Let's implement the above without lookahead.
|
* without lookahead. strBuf is the 'temporary buffer'.
|
||||||
* strBuf is the 'temporary buffer'.
|
|
||||||
*/
|
*/
|
||||||
if (index < endTagExpectationAsArray.length) {
|
if (endTagExpectationAsArray == null) {
|
||||||
|
tokenHandler.characters(Tokenizer.LT_SOLIDUS,
|
||||||
|
0, 2);
|
||||||
|
cstart = pos;
|
||||||
|
reconsume = true;
|
||||||
|
state = transition(state, returnState, reconsume, pos);
|
||||||
|
continue stateloop;
|
||||||
|
} else if (index < endTagExpectationAsArray.length) {
|
||||||
char e = endTagExpectationAsArray[index];
|
char e = endTagExpectationAsArray[index];
|
||||||
char folded = c;
|
char folded = c;
|
||||||
if (c >= 'A' && c <= 'Z') {
|
if (c >= 'A' && c <= 'Z') {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007 Henri Sivonen
|
* Copyright (c) 2007 Henri Sivonen
|
||||||
* Copyright (c) 2007-2015 Mozilla Foundation
|
* Copyright (c) 2007-2015 Mozilla Foundation
|
||||||
|
* Copyright (c) 2018-2019 Moonchild Productions
|
||||||
* Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
|
* Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
|
||||||
* Foundation, and Opera Software ASA.
|
* Foundation, and Opera Software ASA.
|
||||||
*
|
*
|
||||||
|
@ -640,8 +641,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
|
||||||
);
|
);
|
||||||
currentPtr++;
|
currentPtr++;
|
||||||
stack[currentPtr] = node;
|
stack[currentPtr] = node;
|
||||||
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
|
tokenizer.setState(Tokenizer.DATA);
|
||||||
contextName);
|
|
||||||
// The frameset-ok flag is set even though <frameset> never
|
// The frameset-ok flag is set even though <frameset> never
|
||||||
// ends up being allowed as HTML frameset in the fragment case.
|
// ends up being allowed as HTML frameset in the fragment case.
|
||||||
mode = FRAMESET_OK;
|
mode = FRAMESET_OK;
|
||||||
|
@ -671,8 +671,7 @@ public abstract class TreeBuilder<T> implements TokenHandler,
|
||||||
);
|
);
|
||||||
currentPtr++;
|
currentPtr++;
|
||||||
stack[currentPtr] = node;
|
stack[currentPtr] = node;
|
||||||
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
|
tokenizer.setState(Tokenizer.DATA);
|
||||||
contextName);
|
|
||||||
// The frameset-ok flag is set even though <frameset> never
|
// The frameset-ok flag is set even though <frameset> never
|
||||||
// ends up being allowed as HTML frameset in the fragment case.
|
// ends up being allowed as HTML frameset in the fragment case.
|
||||||
mode = FRAMESET_OK;
|
mode = FRAMESET_OK;
|
||||||
|
@ -691,23 +690,18 @@ public abstract class TreeBuilder<T> implements TokenHandler,
|
||||||
resetTheInsertionMode();
|
resetTheInsertionMode();
|
||||||
formPointer = getFormPointerForContext(contextNode);
|
formPointer = getFormPointerForContext(contextNode);
|
||||||
if ("title" == contextName || "textarea" == contextName) {
|
if ("title" == contextName || "textarea" == contextName) {
|
||||||
tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA,
|
tokenizer.setState(Tokenizer.RCDATA);
|
||||||
contextName);
|
|
||||||
} else if ("style" == contextName || "xmp" == contextName
|
} else if ("style" == contextName || "xmp" == contextName
|
||||||
|| "iframe" == contextName || "noembed" == contextName
|
|| "iframe" == contextName || "noembed" == contextName
|
||||||
|| "noframes" == contextName
|
|| "noframes" == contextName
|
||||||
|| (scriptingEnabled && "noscript" == contextName)) {
|
|| (scriptingEnabled && "noscript" == contextName)) {
|
||||||
tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT,
|
tokenizer.setState(Tokenizer.RAWTEXT);
|
||||||
contextName);
|
|
||||||
} else if ("plaintext" == contextName) {
|
} else if ("plaintext" == contextName) {
|
||||||
tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT,
|
tokenizer.setState(Tokenizer.PLAINTEXT);
|
||||||
contextName);
|
|
||||||
} else if ("script" == contextName) {
|
} else if ("script" == contextName) {
|
||||||
tokenizer.setStateAndEndTagExpectation(
|
tokenizer.setState(Tokenizer.SCRIPT_DATA);
|
||||||
Tokenizer.SCRIPT_DATA, contextName);
|
|
||||||
} else {
|
} else {
|
||||||
tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA,
|
tokenizer.setState(Tokenizer.DATA);
|
||||||
contextName);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
contextName = null;
|
contextName = null;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005-2007 Henri Sivonen
|
* Copyright (c) 2005-2007 Henri Sivonen
|
||||||
* Copyright (c) 2007-2015 Mozilla Foundation
|
* Copyright (c) 2007-2015 Mozilla Foundation
|
||||||
|
* Copyright (c) 2019 Moonchild Productions
|
||||||
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
|
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
|
||||||
* Foundation, and Opera Software ASA.
|
* Foundation, and Opera Software ASA.
|
||||||
*
|
*
|
||||||
|
@ -127,15 +128,11 @@ nsHtml5Tokenizer::isViewingXmlSource()
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
nsHtml5Tokenizer::setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation)
|
nsHtml5Tokenizer::setState(int32_t specialTokenizerState)
|
||||||
{
|
{
|
||||||
this->stateSave = specialTokenizerState;
|
this->stateSave = specialTokenizerState;
|
||||||
if (specialTokenizerState == NS_HTML5TOKENIZER_DATA) {
|
this->endTagExpectation = nullptr;
|
||||||
return;
|
this->endTagExpectationAsArray = nullptr;
|
||||||
}
|
|
||||||
autoJArray<char16_t,int32_t> asArray = nsHtml5Portability::newCharArrayFromLocal(endTagExpectation);
|
|
||||||
this->endTagExpectation = nsHtml5ElementName::elementNameByBuffer(asArray, 0, asArray.length, interner);
|
|
||||||
endTagExpectationToArray();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -2040,7 +2037,13 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
|
||||||
NS_HTML5_BREAK(stateloop);
|
NS_HTML5_BREAK(stateloop);
|
||||||
}
|
}
|
||||||
c = checkChar(buf, pos);
|
c = checkChar(buf, pos);
|
||||||
if (index < endTagExpectationAsArray.length) {
|
if (!endTagExpectationAsArray) {
|
||||||
|
tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
|
||||||
|
cstart = pos;
|
||||||
|
reconsume = true;
|
||||||
|
state = P::transition(mViewSource, returnState, reconsume, pos);
|
||||||
|
NS_HTML5_CONTINUE(stateloop);
|
||||||
|
} else if (index < endTagExpectationAsArray.length) {
|
||||||
char16_t e = endTagExpectationAsArray[index];
|
char16_t e = endTagExpectationAsArray[index];
|
||||||
char16_t folded = c;
|
char16_t folded = c;
|
||||||
if (c >= 'A' && c <= 'Z') {
|
if (c >= 'A' && c <= 'Z') {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005-2007 Henri Sivonen
|
* Copyright (c) 2005-2007 Henri Sivonen
|
||||||
* Copyright (c) 2007-2015 Mozilla Foundation
|
* Copyright (c) 2007-2015 Mozilla Foundation
|
||||||
|
* Copyright (c) 2019 Moonchild Productions
|
||||||
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
|
* Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
|
||||||
* Foundation, and Opera Software ASA.
|
* Foundation, and Opera Software ASA.
|
||||||
*
|
*
|
||||||
|
@ -143,7 +144,7 @@ class nsHtml5Tokenizer
|
||||||
void setInterner(nsHtml5AtomTable* interner);
|
void setInterner(nsHtml5AtomTable* interner);
|
||||||
void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
|
void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
|
||||||
bool isViewingXmlSource();
|
bool isViewingXmlSource();
|
||||||
void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
|
void setState(int32_t specialTokenizerState);
|
||||||
void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
|
void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
|
||||||
private:
|
private:
|
||||||
void endTagExpectationToArray();
|
void endTagExpectationToArray();
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007 Henri Sivonen
|
* Copyright (c) 2007 Henri Sivonen
|
||||||
* Copyright (c) 2007-2015 Mozilla Foundation
|
* Copyright (c) 2007-2015 Mozilla Foundation
|
||||||
|
* Copyright (c) 2019 Moonchild Productions
|
||||||
* Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
|
* Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
|
||||||
* Foundation, and Opera Software ASA.
|
* Foundation, and Opera Software ASA.
|
||||||
*
|
*
|
||||||
|
@ -105,7 +106,7 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
|
||||||
nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elementName->camelCaseName, elt);
|
nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elementName->camelCaseName, elt);
|
||||||
currentPtr++;
|
currentPtr++;
|
||||||
stack[currentPtr] = node;
|
stack[currentPtr] = node;
|
||||||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName);
|
tokenizer->setState(NS_HTML5TOKENIZER_DATA);
|
||||||
mode = NS_HTML5TREE_BUILDER_FRAMESET_OK;
|
mode = NS_HTML5TREE_BUILDER_FRAMESET_OK;
|
||||||
} else if (contextNamespace == kNameSpaceID_MathML) {
|
} else if (contextNamespace == kNameSpaceID_MathML) {
|
||||||
nsHtml5ElementName* elementName = nsHtml5ElementName::ELT_MATH;
|
nsHtml5ElementName* elementName = nsHtml5ElementName::ELT_MATH;
|
||||||
|
@ -117,7 +118,7 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
|
||||||
nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elt, elementName->name, false);
|
nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elt, elementName->name, false);
|
||||||
currentPtr++;
|
currentPtr++;
|
||||||
stack[currentPtr] = node;
|
stack[currentPtr] = node;
|
||||||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName);
|
tokenizer->setState(NS_HTML5TOKENIZER_DATA);
|
||||||
mode = NS_HTML5TREE_BUILDER_FRAMESET_OK;
|
mode = NS_HTML5TREE_BUILDER_FRAMESET_OK;
|
||||||
} else {
|
} else {
|
||||||
nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_HTML, elt);
|
nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_HTML, elt);
|
||||||
|
@ -129,15 +130,20 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self)
|
||||||
resetTheInsertionMode();
|
resetTheInsertionMode();
|
||||||
formPointer = getFormPointerForContext(contextNode);
|
formPointer = getFormPointerForContext(contextNode);
|
||||||
if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::textarea == contextName) {
|
if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::textarea == contextName) {
|
||||||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, contextName);
|
tokenizer->setState(NS_HTML5TOKENIZER_RCDATA);
|
||||||
} else if (nsHtml5Atoms::style == contextName || nsHtml5Atoms::xmp == contextName || nsHtml5Atoms::iframe == contextName || nsHtml5Atoms::noembed == contextName || nsHtml5Atoms::noframes == contextName || (scriptingEnabled && nsHtml5Atoms::noscript == contextName)) {
|
} else if (nsHtml5Atoms::style == contextName ||
|
||||||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, contextName);
|
nsHtml5Atoms::xmp == contextName ||
|
||||||
|
nsHtml5Atoms::iframe == contextName ||
|
||||||
|
nsHtml5Atoms::noembed == contextName ||
|
||||||
|
nsHtml5Atoms::noframes == contextName ||
|
||||||
|
(scriptingEnabled && nsHtml5Atoms::noscript == contextName)) {
|
||||||
|
tokenizer->setState(NS_HTML5TOKENIZER_RAWTEXT);
|
||||||
} else if (nsHtml5Atoms::plaintext == contextName) {
|
} else if (nsHtml5Atoms::plaintext == contextName) {
|
||||||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_PLAINTEXT, contextName);
|
tokenizer->setState(NS_HTML5TOKENIZER_PLAINTEXT);
|
||||||
} else if (nsHtml5Atoms::script == contextName) {
|
} else if (nsHtml5Atoms::script == contextName) {
|
||||||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, contextName);
|
tokenizer->setState(NS_HTML5TOKENIZER_SCRIPT_DATA);
|
||||||
} else {
|
} else {
|
||||||
tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName);
|
tokenizer->setState(NS_HTML5TOKENIZER_DATA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
contextName = nullptr;
|
contextName = nullptr;
|
||||||
|
|
Loading…
Reference in New Issue