292 lines
11 KiB
C++
292 lines
11 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
* vim: set ts=8 sts=2 et sw=2 tw=80:
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
// Copyright 2020 the V8 project authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// This file implements the NativeRegExpMacroAssembler interface for
|
|
// SpiderMonkey. It provides the same interface as each of V8's
|
|
// architecture-specific implementations.
|
|
|
|
#ifndef RegexpMacroAssemblerArch_h
|
|
#define RegexpMacroAssemblerArch_h
|
|
|
|
#include "jit/MacroAssembler.h"
|
|
#include "regexp/regexp-macro-assembler.h"
|
|
|
|
namespace v8 {
|
|
namespace internal {
|
|
|
|
struct FrameData {
|
|
// Character position at the start of the input, stored as a
|
|
// negative offset from the end of the string (input_end_pointer_).
|
|
size_t inputStart;
|
|
|
|
// The backtrack_stack_pointer_ register points to the top of the stack.
|
|
// This points to the bottom of the backtrack stack.
|
|
void* backtrackStackBase;
|
|
|
|
// Copy of the input MatchPairs.
|
|
int32_t* matches; // pointer to capture array
|
|
int32_t numMatches; // size of capture array
|
|
};
|
|
|
|
class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
|
|
public:
|
|
SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate,
|
|
js::jit::StackMacroAssembler& masm, Zone* zone,
|
|
Mode mode, uint32_t num_capture_registers);
|
|
virtual ~SMRegExpMacroAssembler() {} // Nothing to do here
|
|
|
|
virtual int stack_limit_slack();
|
|
virtual IrregexpImplementation Implementation();
|
|
|
|
virtual bool Succeed();
|
|
virtual void Fail();
|
|
|
|
virtual void AdvanceCurrentPosition(int by);
|
|
virtual void PopCurrentPosition();
|
|
virtual void PushCurrentPosition();
|
|
virtual void SetCurrentPositionFromEnd(int by);
|
|
|
|
virtual void Backtrack();
|
|
virtual void Bind(Label* label);
|
|
virtual void GoTo(Label* label);
|
|
virtual void PushBacktrack(Label* label);
|
|
|
|
virtual void CheckCharacter(uint32_t c, Label* on_equal);
|
|
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
|
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
|
|
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
|
|
virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
|
|
Label* on_equal);
|
|
virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
|
|
Label* on_not_equal);
|
|
virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask,
|
|
Label* on_not_equal);
|
|
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
|
|
virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range);
|
|
virtual void CheckCharacterNotInRange(uc16 from, uc16 to,
|
|
Label* on_not_in_range);
|
|
virtual void CheckAtStart(int cp_offset, Label* on_at_start);
|
|
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
|
|
virtual void CheckPosition(int cp_offset, Label* on_outside_input);
|
|
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
|
|
virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
|
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
|
Label* on_no_match);
|
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
|
bool read_backward,
|
|
Label* on_no_match);
|
|
|
|
virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
|
|
bool check_bounds, int characters,
|
|
int eats_at_least);
|
|
|
|
virtual void AdvanceRegister(int reg, int by);
|
|
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
|
|
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
|
|
virtual void IfRegisterEqPos(int reg, Label* if_eq);
|
|
virtual void PopRegister(int register_index);
|
|
virtual void PushRegister(int register_index,
|
|
StackCheckFlag check_stack_limit);
|
|
virtual void ReadCurrentPositionFromRegister(int reg);
|
|
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
|
|
virtual void ReadStackPointerFromRegister(int reg);
|
|
virtual void WriteStackPointerToRegister(int reg);
|
|
virtual void SetRegister(int register_index, int to);
|
|
virtual void ClearRegisters(int reg_from, int reg_to);
|
|
|
|
virtual Handle<HeapObject> GetCode(Handle<String> source);
|
|
|
|
private:
|
|
size_t frameSize_ = 0;
|
|
|
|
void createStackFrame();
|
|
void initFrameAndRegs();
|
|
void successHandler();
|
|
void exitHandler();
|
|
void backtrackHandler();
|
|
void stackOverflowHandler();
|
|
|
|
// Push a register on the backtrack stack.
|
|
void Push(js::jit::Register value);
|
|
|
|
// Pop a value from the backtrack stack.
|
|
void Pop(js::jit::Register target);
|
|
|
|
void CheckAtStartImpl(int cp_offset, Label* on_cond,
|
|
js::jit::Assembler::Condition cond);
|
|
void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond,
|
|
js::jit::Assembler::Condition cond);
|
|
void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond,
|
|
bool negate);
|
|
void CheckCharacterInRangeImpl(uc16 from, uc16 to, Label* on_cond,
|
|
js::jit::Assembler::Condition cond);
|
|
void CheckNotBackReferenceImpl(int start_reg, bool read_backward,
|
|
Label* on_no_match, bool ignore_case);
|
|
|
|
void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
|
|
|
|
void JumpOrBacktrack(Label* to);
|
|
|
|
// MacroAssembler methods that take a Label can be called with a
|
|
// null label, which means that we should backtrack if we would jump
|
|
// to that label. This is a helper to avoid writing out the same
|
|
// logic a dozen times.
|
|
inline js::jit::Label* LabelOrBacktrack(Label* to) {
|
|
return to ? to->inner() : &backtrack_label_;
|
|
}
|
|
|
|
void CheckBacktrackStackLimit();
|
|
|
|
static bool GrowBacktrackStack(RegExpStack* regexp_stack);
|
|
|
|
static uint32_t CaseInsensitiveCompareStrings(const char16_t* substring1,
|
|
const char16_t* substring2,
|
|
size_t byteLength);
|
|
static uint32_t CaseInsensitiveCompareUCStrings(const char16_t* substring1,
|
|
const char16_t* substring2,
|
|
size_t byteLength);
|
|
|
|
inline int char_size() { return static_cast<int>(mode_); }
|
|
inline js::jit::Scale factor() {
|
|
return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne;
|
|
}
|
|
|
|
js::jit::Address inputStart() {
|
|
return js::jit::Address(masm_.getStackPointer(),
|
|
offsetof(FrameData, inputStart));
|
|
}
|
|
js::jit::Address backtrackStackBase() {
|
|
return js::jit::Address(masm_.getStackPointer(),
|
|
offsetof(FrameData, backtrackStackBase));
|
|
}
|
|
js::jit::Address matches() {
|
|
return js::jit::Address(masm_.getStackPointer(),
|
|
offsetof(FrameData, matches));
|
|
}
|
|
js::jit::Address numMatches() {
|
|
return js::jit::Address(masm_.getStackPointer(),
|
|
offsetof(FrameData, numMatches));
|
|
}
|
|
|
|
// The stack-pointer-relative location of a regexp register.
|
|
js::jit::Address register_location(int register_index) {
|
|
return js::jit::Address(masm_.getStackPointer(),
|
|
register_offset(register_index));
|
|
}
|
|
|
|
int32_t register_offset(int register_index) {
|
|
MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister);
|
|
if (num_registers_ <= register_index) {
|
|
num_registers_ = register_index + 1;
|
|
}
|
|
static_assert(alignof(uintptr_t) <= alignof(FrameData));
|
|
return sizeof(FrameData) + register_index * sizeof(uintptr_t*);
|
|
}
|
|
|
|
JSContext* cx_;
|
|
js::jit::StackMacroAssembler& masm_;
|
|
|
|
/*
|
|
* This assembler uses the following registers:
|
|
*
|
|
* - current_character_:
|
|
* Contains the character (or characters) currently being examined.
|
|
* Must be loaded using LoadCurrentCharacter before using any of the
|
|
* dispatch methods. After a matching pass for a global regexp,
|
|
* temporarily stores the index of capture start.
|
|
* - current_position_:
|
|
* Current position in input *as negative byte offset from end of string*.
|
|
* - input_end_pointer_:
|
|
* Points to byte after last character in the input. current_position_ is
|
|
* relative to this.
|
|
* - backtrack_stack_pointer_:
|
|
* Points to tip of the (heap-allocated) backtrack stack. The stack grows
|
|
* downward (like the native stack).
|
|
* - temp0_, temp1_, temp2_:
|
|
* Scratch registers.
|
|
*
|
|
* The native stack pointer is used to access arguments (InputOutputData),
|
|
* local variables (FrameData), and irregexp's internal virtual registers
|
|
* (see register_location).
|
|
*/
|
|
|
|
js::jit::Register current_character_;
|
|
js::jit::Register current_position_;
|
|
js::jit::Register input_end_pointer_;
|
|
js::jit::Register backtrack_stack_pointer_;
|
|
js::jit::Register temp0_, temp1_, temp2_;
|
|
|
|
js::jit::Label entry_label_;
|
|
js::jit::Label start_label_;
|
|
js::jit::Label backtrack_label_;
|
|
js::jit::Label success_label_;
|
|
js::jit::Label exit_label_;
|
|
js::jit::Label stack_overflow_label_;
|
|
js::jit::Label exit_with_exception_label_;
|
|
|
|
// When we generate the code to push a backtrack label's address
|
|
// onto the backtrack stack, we don't know its final address. We
|
|
// have to patch it after linking. This is slightly delicate, as the
|
|
// Label itself (which is allocated on the stack) may not exist by
|
|
// the time we link. The approach is as follows:
|
|
//
|
|
// 1. When we push a label on the backtrack stack (PushBacktrack),
|
|
// we bind the label's patchOffset_ field to the offset within
|
|
// the code that should be overwritten. This works because each
|
|
// label is only pushed by a single instruction.
|
|
//
|
|
// 2. When we bind a label (Bind), we check to see if it has a
|
|
// bound patchOffset_. If it does, we create a LabelPatch mapping
|
|
// its patch offset to the offset of the label itself.
|
|
//
|
|
// 3. While linking the code, we walk the list of label patches
|
|
// and patch the code accordingly.
|
|
class LabelPatch {
|
|
public:
|
|
LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset)
|
|
: patchOffset_(patchOffset), labelOffset_(labelOffset) {}
|
|
|
|
js::jit::CodeOffset patchOffset_;
|
|
size_t labelOffset_ = 0;
|
|
};
|
|
|
|
js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_;
|
|
void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) {
|
|
js::AutoEnterOOMUnsafeRegion oomUnsafe;
|
|
if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) {
|
|
oomUnsafe.crash("Irregexp label patch");
|
|
}
|
|
}
|
|
|
|
Mode mode_;
|
|
int num_registers_;
|
|
int num_capture_registers_;
|
|
js::jit::LiveGeneralRegisterSet savedRegisters_;
|
|
|
|
public:
|
|
using TableVector =
|
|
js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>;
|
|
TableVector& tables() { return tables_; }
|
|
|
|
private:
|
|
TableVector tables_;
|
|
void AddTable(PseudoHandle<ByteArrayData> table) {
|
|
js::AutoEnterOOMUnsafeRegion oomUnsafe;
|
|
if (!tables_.append(std::move(table))) {
|
|
oomUnsafe.crash("Irregexp table append");
|
|
}
|
|
}
|
|
};
|
|
|
|
} // namespace internal
|
|
} // namespace v8
|
|
|
|
#endif // RegexpMacroAssemblerArch_h
|