616 lines
21 KiB
C++
616 lines
21 KiB
C++
// Copyright 2016 the V8 project authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#ifndef V8_REGEXP_REGEXP_AST_H_
|
|
#define V8_REGEXP_REGEXP_AST_H_
|
|
|
|
#include "regexp/regexp-shim.h"
|
|
|
|
namespace v8 {
|
|
namespace internal {
|
|
|
|
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
|
|
VISIT(Disjunction) \
|
|
VISIT(Alternative) \
|
|
VISIT(Assertion) \
|
|
VISIT(CharacterClass) \
|
|
VISIT(Atom) \
|
|
VISIT(Quantifier) \
|
|
VISIT(Capture) \
|
|
VISIT(Group) \
|
|
VISIT(Lookaround) \
|
|
VISIT(BackReference) \
|
|
VISIT(Empty) \
|
|
VISIT(Text)
|
|
|
|
#define FORWARD_DECLARE(Name) class RegExp##Name;
|
|
FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
|
|
#undef FORWARD_DECLARE
|
|
|
|
class RegExpCompiler;
|
|
class RegExpNode;
|
|
class RegExpTree;
|
|
|
|
class RegExpVisitor {
|
|
public:
|
|
virtual ~RegExpVisitor() = default;
|
|
#define MAKE_CASE(Name) \
|
|
virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
|
|
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
|
|
#undef MAKE_CASE
|
|
};
|
|
|
|
|
|
// A simple closed interval.
|
|
class Interval {
|
|
public:
|
|
Interval() : from_(kNone), to_(kNone - 1) {} // '- 1' for branchless size().
|
|
Interval(int from, int to) : from_(from), to_(to) {}
|
|
Interval Union(Interval that) {
|
|
if (that.from_ == kNone)
|
|
return *this;
|
|
else if (from_ == kNone)
|
|
return that;
|
|
else
|
|
return Interval(Min(from_, that.from_), Max(to_, that.to_));
|
|
}
|
|
|
|
bool Contains(int value) { return (from_ <= value) && (value <= to_); }
|
|
bool is_empty() { return from_ == kNone; }
|
|
int from() const { return from_; }
|
|
int to() const { return to_; }
|
|
int size() const { return to_ - from_ + 1; }
|
|
|
|
static Interval Empty() { return Interval(); }
|
|
|
|
static constexpr int kNone = -1;
|
|
|
|
private:
|
|
int from_;
|
|
int to_;
|
|
};
|
|
|
|
|
|
// Represents code units in the range from from_ to to_, both ends are
|
|
// inclusive.
|
|
class CharacterRange {
|
|
public:
|
|
CharacterRange() : from_(0), to_(0) {}
|
|
// For compatibility with the CHECK_OK macro
|
|
CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
|
|
V8_EXPORT_PRIVATE static void AddClassEscape(char type,
|
|
ZoneList<CharacterRange>* ranges,
|
|
Zone* zone);
|
|
// Add class escapes. Add case equivalent closure for \w and \W if necessary.
|
|
V8_EXPORT_PRIVATE static void AddClassEscape(
|
|
char type, ZoneList<CharacterRange>* ranges,
|
|
bool add_unicode_case_equivalents, Zone* zone);
|
|
static Vector<const int> GetWordBounds();
|
|
static inline CharacterRange Singleton(uc32 value) {
|
|
return CharacterRange(value, value);
|
|
}
|
|
static inline CharacterRange Range(uc32 from, uc32 to) {
|
|
DCHECK(0 <= from && to <= String::kMaxCodePoint);
|
|
DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to));
|
|
return CharacterRange(from, to);
|
|
}
|
|
static inline CharacterRange Everything() {
|
|
return CharacterRange(0, String::kMaxCodePoint);
|
|
}
|
|
static inline ZoneList<CharacterRange>* List(Zone* zone,
|
|
CharacterRange range) {
|
|
ZoneList<CharacterRange>* list =
|
|
new (zone) ZoneList<CharacterRange>(1, zone);
|
|
list->Add(range, zone);
|
|
return list;
|
|
}
|
|
bool Contains(uc32 i) { return from_ <= i && i <= to_; }
|
|
uc32 from() const { return from_; }
|
|
void set_from(uc32 value) { from_ = value; }
|
|
uc32 to() const { return to_; }
|
|
void set_to(uc32 value) { to_ = value; }
|
|
bool is_valid() { return from_ <= to_; }
|
|
bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; }
|
|
bool IsSingleton() { return (from_ == to_); }
|
|
V8_EXPORT_PRIVATE static void AddCaseEquivalents(
|
|
Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
|
|
bool is_one_byte);
|
|
// Whether a range list is in canonical form: Ranges ordered by from value,
|
|
// and ranges non-overlapping and non-adjacent.
|
|
V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges);
|
|
// Convert range list to canonical form. The characters covered by the ranges
|
|
// will still be the same, but no character is in more than one range, and
|
|
// adjacent ranges are merged. The resulting list may be shorter than the
|
|
// original, but cannot be longer.
|
|
static void Canonicalize(ZoneList<CharacterRange>* ranges);
|
|
// Negate the contents of a character range in canonical form.
|
|
static void Negate(ZoneList<CharacterRange>* src,
|
|
ZoneList<CharacterRange>* dst, Zone* zone);
|
|
static const int kStartMarker = (1 << 24);
|
|
static const int kPayloadMask = (1 << 24) - 1;
|
|
|
|
private:
|
|
CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {}
|
|
|
|
uc32 from_;
|
|
uc32 to_;
|
|
};
|
|
|
|
class CharacterSet final {
|
|
public:
|
|
explicit CharacterSet(uc16 standard_set_type)
|
|
: ranges_(nullptr), standard_set_type_(standard_set_type) {}
|
|
explicit CharacterSet(ZoneList<CharacterRange>* ranges)
|
|
: ranges_(ranges), standard_set_type_(0) {}
|
|
ZoneList<CharacterRange>* ranges(Zone* zone);
|
|
uc16 standard_set_type() const { return standard_set_type_; }
|
|
void set_standard_set_type(uc16 special_set_type) {
|
|
standard_set_type_ = special_set_type;
|
|
}
|
|
bool is_standard() { return standard_set_type_ != 0; }
|
|
V8_EXPORT_PRIVATE void Canonicalize();
|
|
|
|
private:
|
|
ZoneList<CharacterRange>* ranges_;
|
|
// If non-zero, the value represents a standard set (e.g., all whitespace
|
|
// characters) without having to expand the ranges.
|
|
uc16 standard_set_type_;
|
|
};
|
|
|
|
class TextElement final {
|
|
public:
|
|
enum TextType { ATOM, CHAR_CLASS };
|
|
|
|
static TextElement Atom(RegExpAtom* atom);
|
|
static TextElement CharClass(RegExpCharacterClass* char_class);
|
|
|
|
int cp_offset() const { return cp_offset_; }
|
|
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
|
|
int length() const;
|
|
|
|
TextType text_type() const { return text_type_; }
|
|
|
|
RegExpTree* tree() const { return tree_; }
|
|
|
|
RegExpAtom* atom() const {
|
|
DCHECK(text_type() == ATOM);
|
|
return reinterpret_cast<RegExpAtom*>(tree());
|
|
}
|
|
|
|
RegExpCharacterClass* char_class() const {
|
|
DCHECK(text_type() == CHAR_CLASS);
|
|
return reinterpret_cast<RegExpCharacterClass*>(tree());
|
|
}
|
|
|
|
private:
|
|
TextElement(TextType text_type, RegExpTree* tree)
|
|
: cp_offset_(-1), text_type_(text_type), tree_(tree) {}
|
|
|
|
int cp_offset_;
|
|
TextType text_type_;
|
|
RegExpTree* tree_;
|
|
};
|
|
|
|
|
|
class RegExpTree : public ZoneObject {
|
|
public:
|
|
static const int kInfinity = kMaxInt;
|
|
virtual ~RegExpTree() = default;
|
|
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
|
|
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success) = 0;
|
|
virtual bool IsTextElement() { return false; }
|
|
virtual bool IsAnchoredAtStart() { return false; }
|
|
virtual bool IsAnchoredAtEnd() { return false; }
|
|
virtual int min_match() = 0;
|
|
virtual int max_match() = 0;
|
|
// Returns the interval of registers used for captures within this
|
|
// expression.
|
|
virtual Interval CaptureRegisters() { return Interval::Empty(); }
|
|
virtual void AppendToText(RegExpText* text, Zone* zone);
|
|
V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os,
|
|
Zone* zone); // NOLINT
|
|
#define MAKE_ASTYPE(Name) \
|
|
virtual RegExp##Name* As##Name(); \
|
|
virtual bool Is##Name();
|
|
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
|
|
#undef MAKE_ASTYPE
|
|
};
|
|
|
|
|
|
class RegExpDisjunction final : public RegExpTree {
|
|
public:
|
|
explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpDisjunction* AsDisjunction() override;
|
|
Interval CaptureRegisters() override;
|
|
bool IsDisjunction() override;
|
|
bool IsAnchoredAtStart() override;
|
|
bool IsAnchoredAtEnd() override;
|
|
int min_match() override { return min_match_; }
|
|
int max_match() override { return max_match_; }
|
|
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
|
|
|
|
private:
|
|
bool SortConsecutiveAtoms(RegExpCompiler* compiler);
|
|
void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
|
|
void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
|
|
ZoneList<RegExpTree*>* alternatives_;
|
|
int min_match_;
|
|
int max_match_;
|
|
};
|
|
|
|
|
|
class RegExpAlternative final : public RegExpTree {
|
|
public:
|
|
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpAlternative* AsAlternative() override;
|
|
Interval CaptureRegisters() override;
|
|
bool IsAlternative() override;
|
|
bool IsAnchoredAtStart() override;
|
|
bool IsAnchoredAtEnd() override;
|
|
int min_match() override { return min_match_; }
|
|
int max_match() override { return max_match_; }
|
|
ZoneList<RegExpTree*>* nodes() { return nodes_; }
|
|
|
|
private:
|
|
ZoneList<RegExpTree*>* nodes_;
|
|
int min_match_;
|
|
int max_match_;
|
|
};
|
|
|
|
|
|
class RegExpAssertion final : public RegExpTree {
|
|
public:
|
|
enum AssertionType {
|
|
START_OF_LINE = 0,
|
|
START_OF_INPUT = 1,
|
|
END_OF_LINE = 2,
|
|
END_OF_INPUT = 3,
|
|
BOUNDARY = 4,
|
|
NON_BOUNDARY = 5,
|
|
LAST_TYPE = NON_BOUNDARY,
|
|
};
|
|
RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
|
|
: assertion_type_(type), flags_(flags) {}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpAssertion* AsAssertion() override;
|
|
bool IsAssertion() override;
|
|
bool IsAnchoredAtStart() override;
|
|
bool IsAnchoredAtEnd() override;
|
|
int min_match() override { return 0; }
|
|
int max_match() override { return 0; }
|
|
AssertionType assertion_type() const { return assertion_type_; }
|
|
JSRegExp::Flags flags() const { return flags_; }
|
|
|
|
private:
|
|
const AssertionType assertion_type_;
|
|
const JSRegExp::Flags flags_;
|
|
};
|
|
|
|
|
|
class RegExpCharacterClass final : public RegExpTree {
|
|
public:
|
|
// NEGATED: The character class is negated and should match everything but
|
|
// the specified ranges.
|
|
// CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
|
|
// surrogate and should not be unicode-desugared (crbug.com/641091).
|
|
enum Flag {
|
|
NEGATED = 1 << 0,
|
|
CONTAINS_SPLIT_SURROGATE = 1 << 1,
|
|
};
|
|
using CharacterClassFlags = base::Flags<Flag>;
|
|
|
|
RegExpCharacterClass(
|
|
Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
|
|
CharacterClassFlags character_class_flags = CharacterClassFlags())
|
|
: set_(ranges),
|
|
flags_(flags),
|
|
character_class_flags_(character_class_flags) {
|
|
// Convert the empty set of ranges to the negated Everything() range.
|
|
if (ranges->is_empty()) {
|
|
ranges->Add(CharacterRange::Everything(), zone);
|
|
character_class_flags_ ^= NEGATED;
|
|
}
|
|
}
|
|
RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
|
|
: set_(type),
|
|
flags_(flags),
|
|
character_class_flags_(CharacterClassFlags()) {}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpCharacterClass* AsCharacterClass() override;
|
|
bool IsCharacterClass() override;
|
|
bool IsTextElement() override { return true; }
|
|
int min_match() override { return 1; }
|
|
// The character class may match two code units for unicode regexps.
|
|
// TODO(yangguo): we should split this class for usage in TextElement, and
|
|
// make max_match() dependent on the character class content.
|
|
int max_match() override { return 2; }
|
|
void AppendToText(RegExpText* text, Zone* zone) override;
|
|
CharacterSet character_set() { return set_; }
|
|
// TODO(lrn): Remove need for complex version if is_standard that
|
|
// recognizes a mangled standard set and just do { return set_.is_special(); }
|
|
bool is_standard(Zone* zone);
|
|
// Returns a value representing the standard character set if is_standard()
|
|
// returns true.
|
|
// Currently used values are:
|
|
// s : unicode whitespace
|
|
// S : unicode non-whitespace
|
|
// w : ASCII word character (digit, letter, underscore)
|
|
// W : non-ASCII word character
|
|
// d : ASCII digit
|
|
// D : non-ASCII digit
|
|
// . : non-newline
|
|
// * : All characters, for advancing unanchored regexp
|
|
uc16 standard_type() const { return set_.standard_set_type(); }
|
|
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
|
|
bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
|
|
JSRegExp::Flags flags() const { return flags_; }
|
|
bool contains_split_surrogate() const {
|
|
return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
|
|
}
|
|
|
|
private:
|
|
CharacterSet set_;
|
|
const JSRegExp::Flags flags_;
|
|
CharacterClassFlags character_class_flags_;
|
|
};
|
|
|
|
|
|
class RegExpAtom final : public RegExpTree {
|
|
public:
|
|
explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags)
|
|
: data_(data), flags_(flags) {}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpAtom* AsAtom() override;
|
|
bool IsAtom() override;
|
|
bool IsTextElement() override { return true; }
|
|
int min_match() override { return data_.length(); }
|
|
int max_match() override { return data_.length(); }
|
|
void AppendToText(RegExpText* text, Zone* zone) override;
|
|
Vector<const uc16> data() { return data_; }
|
|
int length() { return data_.length(); }
|
|
JSRegExp::Flags flags() const { return flags_; }
|
|
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
|
|
|
private:
|
|
Vector<const uc16> data_;
|
|
const JSRegExp::Flags flags_;
|
|
};
|
|
|
|
|
|
class RegExpText final : public RegExpTree {
|
|
public:
|
|
explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpText* AsText() override;
|
|
bool IsText() override;
|
|
bool IsTextElement() override { return true; }
|
|
int min_match() override { return length_; }
|
|
int max_match() override { return length_; }
|
|
void AppendToText(RegExpText* text, Zone* zone) override;
|
|
void AddElement(TextElement elm, Zone* zone) {
|
|
elements_.Add(elm, zone);
|
|
length_ += elm.length();
|
|
}
|
|
ZoneList<TextElement>* elements() { return &elements_; }
|
|
|
|
private:
|
|
ZoneList<TextElement> elements_;
|
|
int length_;
|
|
};
|
|
|
|
|
|
class RegExpQuantifier final : public RegExpTree {
|
|
public:
|
|
enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
|
|
RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
|
|
: body_(body),
|
|
min_(min),
|
|
max_(max),
|
|
quantifier_type_(type) {
|
|
if (min > 0 && body->min_match() > kInfinity / min) {
|
|
min_match_ = kInfinity;
|
|
} else {
|
|
min_match_ = min * body->min_match();
|
|
}
|
|
if (max > 0 && body->max_match() > kInfinity / max) {
|
|
max_match_ = kInfinity;
|
|
} else {
|
|
max_match_ = max * body->max_match();
|
|
}
|
|
}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body,
|
|
RegExpCompiler* compiler, RegExpNode* on_success,
|
|
bool not_at_start = false);
|
|
RegExpQuantifier* AsQuantifier() override;
|
|
Interval CaptureRegisters() override;
|
|
bool IsQuantifier() override;
|
|
int min_match() override { return min_match_; }
|
|
int max_match() override { return max_match_; }
|
|
int min() { return min_; }
|
|
int max() { return max_; }
|
|
bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
|
|
bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
|
|
bool is_greedy() { return quantifier_type_ == GREEDY; }
|
|
RegExpTree* body() { return body_; }
|
|
|
|
private:
|
|
RegExpTree* body_;
|
|
int min_;
|
|
int max_;
|
|
int min_match_;
|
|
int max_match_;
|
|
QuantifierType quantifier_type_;
|
|
};
|
|
|
|
|
|
class RegExpCapture final : public RegExpTree {
|
|
public:
|
|
explicit RegExpCapture(int index)
|
|
: body_(nullptr),
|
|
index_(index),
|
|
min_match_(0),
|
|
max_match_(0),
|
|
name_(nullptr) {}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
static RegExpNode* ToNode(RegExpTree* body, int index,
|
|
RegExpCompiler* compiler, RegExpNode* on_success);
|
|
RegExpCapture* AsCapture() override;
|
|
bool IsAnchoredAtStart() override;
|
|
bool IsAnchoredAtEnd() override;
|
|
Interval CaptureRegisters() override;
|
|
bool IsCapture() override;
|
|
int min_match() override { return min_match_; }
|
|
int max_match() override { return max_match_; }
|
|
RegExpTree* body() { return body_; }
|
|
void set_body(RegExpTree* body) {
|
|
body_ = body;
|
|
min_match_ = body->min_match();
|
|
max_match_ = body->max_match();
|
|
}
|
|
int index() const { return index_; }
|
|
const ZoneVector<uc16>* name() const { return name_; }
|
|
void set_name(const ZoneVector<uc16>* name) { name_ = name; }
|
|
static int StartRegister(int index) { return index * 2; }
|
|
static int EndRegister(int index) { return index * 2 + 1; }
|
|
|
|
private:
|
|
RegExpTree* body_;
|
|
int index_;
|
|
int min_match_;
|
|
int max_match_;
|
|
const ZoneVector<uc16>* name_;
|
|
};
|
|
|
|
class RegExpGroup final : public RegExpTree {
|
|
public:
|
|
explicit RegExpGroup(RegExpTree* body)
|
|
: body_(body),
|
|
min_match_(body->min_match()),
|
|
max_match_(body->max_match()) {}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler,
|
|
RegExpNode* on_success) override {
|
|
return body_->ToNode(compiler, on_success);
|
|
}
|
|
RegExpGroup* AsGroup() override;
|
|
bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); }
|
|
bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); }
|
|
bool IsGroup() override;
|
|
int min_match() override { return min_match_; }
|
|
int max_match() override { return max_match_; }
|
|
Interval CaptureRegisters() override { return body_->CaptureRegisters(); }
|
|
RegExpTree* body() { return body_; }
|
|
|
|
private:
|
|
RegExpTree* body_;
|
|
int min_match_;
|
|
int max_match_;
|
|
};
|
|
|
|
class RegExpLookaround final : public RegExpTree {
|
|
public:
|
|
enum Type { LOOKAHEAD, LOOKBEHIND };
|
|
|
|
RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
|
|
int capture_from, Type type)
|
|
: body_(body),
|
|
is_positive_(is_positive),
|
|
capture_count_(capture_count),
|
|
capture_from_(capture_from),
|
|
type_(type) {}
|
|
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpLookaround* AsLookaround() override;
|
|
Interval CaptureRegisters() override;
|
|
bool IsLookaround() override;
|
|
bool IsAnchoredAtStart() override;
|
|
int min_match() override { return 0; }
|
|
int max_match() override { return 0; }
|
|
RegExpTree* body() { return body_; }
|
|
bool is_positive() { return is_positive_; }
|
|
int capture_count() { return capture_count_; }
|
|
int capture_from() { return capture_from_; }
|
|
Type type() { return type_; }
|
|
|
|
class Builder {
|
|
public:
|
|
Builder(bool is_positive, RegExpNode* on_success,
|
|
int stack_pointer_register, int position_register,
|
|
int capture_register_count = 0, int capture_register_start = 0);
|
|
RegExpNode* on_match_success() { return on_match_success_; }
|
|
RegExpNode* ForMatch(RegExpNode* match);
|
|
|
|
private:
|
|
bool is_positive_;
|
|
RegExpNode* on_match_success_;
|
|
RegExpNode* on_success_;
|
|
int stack_pointer_register_;
|
|
int position_register_;
|
|
};
|
|
|
|
private:
|
|
RegExpTree* body_;
|
|
bool is_positive_;
|
|
int capture_count_;
|
|
int capture_from_;
|
|
Type type_;
|
|
};
|
|
|
|
|
|
class RegExpBackReference final : public RegExpTree {
|
|
public:
|
|
explicit RegExpBackReference(JSRegExp::Flags flags)
|
|
: capture_(nullptr), name_(nullptr), flags_(flags) {}
|
|
RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags)
|
|
: capture_(capture), name_(nullptr), flags_(flags) {}
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpBackReference* AsBackReference() override;
|
|
bool IsBackReference() override;
|
|
int min_match() override { return 0; }
|
|
// The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
|
|
// recursion, we give up. Ignorance is bliss.
|
|
int max_match() override { return kInfinity; }
|
|
int index() { return capture_->index(); }
|
|
RegExpCapture* capture() { return capture_; }
|
|
void set_capture(RegExpCapture* capture) { capture_ = capture; }
|
|
const ZoneVector<uc16>* name() const { return name_; }
|
|
void set_name(const ZoneVector<uc16>* name) { name_ = name; }
|
|
|
|
private:
|
|
RegExpCapture* capture_;
|
|
const ZoneVector<uc16>* name_;
|
|
const JSRegExp::Flags flags_;
|
|
};
|
|
|
|
|
|
class RegExpEmpty final : public RegExpTree {
|
|
public:
|
|
RegExpEmpty() = default;
|
|
void* Accept(RegExpVisitor* visitor, void* data) override;
|
|
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
|
RegExpEmpty* AsEmpty() override;
|
|
bool IsEmpty() override;
|
|
int min_match() override { return 0; }
|
|
int max_match() override { return 0; }
|
|
};
|
|
|
|
} // namespace internal
|
|
} // namespace v8
|
|
|
|
#endif // V8_REGEXP_REGEXP_AST_H_
|