Mypal/js/src/jit/ProcessExecutableMemory.cpp

673 lines
20 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "jit/ProcessExecutableMemory.h"
#include "mozilla/Array.h"
#include "mozilla/Atomics.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/Maybe.h"
#include "mozilla/TaggedAnonymousMemory.h"
#include "mozilla/XorShift128PlusRNG.h"
#include "jsfriendapi.h"
#include "jsmath.h"
#include "jsutil.h"
#include "jswin.h"
#include <errno.h>
#include "gc/Memory.h"
#include "threading/LockGuard.h"
#include "threading/Mutex.h"
#include "vm/MutexIDs.h"
#ifdef XP_WIN
# include "mozilla/StackWalk_windows.h"
# include "mozilla/WindowsVersion.h"
#else
# include <sys/mman.h>
# include <unistd.h>
#endif
using namespace js;
using namespace js::jit;
#ifdef XP_WIN
static void*
ComputeRandomAllocationAddress()
{
/*
* Inspiration is V8's OS::Allocate in platform-win32.cc.
*
* VirtualAlloc takes 64K chunks out of the virtual address space, so we
* keep 16b alignment.
*
* x86: V8 comments say that keeping addresses in the [64MiB, 1GiB) range
* tries to avoid system default DLL mapping space. In the end, we get 13
* bits of randomness in our selection.
* x64: [2GiB, 4TiB), with 25 bits of randomness.
*/
# ifdef HAVE_64BIT_BUILD
static const uintptr_t base = 0x0000000080000000;
static const uintptr_t mask = 0x000003ffffff0000;
# elif defined(_M_IX86) || defined(__i386__)
static const uintptr_t base = 0x04000000;
static const uintptr_t mask = 0x3fff0000;
# else
# error "Unsupported architecture"
# endif
uint64_t rand = js::GenerateRandomSeed();
return (void*) (base | (rand & mask));
}
# ifdef HAVE_64BIT_BUILD
static js::JitExceptionHandler sJitExceptionHandler;
JS_FRIEND_API(void)
js::SetJitExceptionHandler(JitExceptionHandler handler)
{
MOZ_ASSERT(!sJitExceptionHandler);
sJitExceptionHandler = handler;
}
// From documentation for UNWIND_INFO on
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
struct UnwindInfo
{
uint8_t version : 3;
uint8_t flags : 5;
uint8_t sizeOfPrologue;
uint8_t countOfUnwindCodes;
uint8_t frameRegister : 4;
uint8_t frameOffset : 4;
ULONG exceptionHandler;
};
static const unsigned ThunkLength = 12;
struct ExceptionHandlerRecord
{
RUNTIME_FUNCTION runtimeFunction;
UnwindInfo unwindInfo;
uint8_t thunk[ThunkLength];
};
// This function must match the function pointer type PEXCEPTION_HANDLER
// mentioned in:
// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx.
// This type is rather elusive in documentation; Wine is the best I've found:
// http://source.winehq.org/source/include/winnt.h
static DWORD
ExceptionHandler(PEXCEPTION_RECORD exceptionRecord, _EXCEPTION_REGISTRATION_RECORD*,
PCONTEXT context, _EXCEPTION_REGISTRATION_RECORD**)
{
return sJitExceptionHandler(exceptionRecord, context);
}
// For an explanation of the problem being solved here, see
// SetJitExceptionFilter in jsfriendapi.h.
static bool
RegisterExecutableMemory(void* p, size_t bytes, size_t pageSize)
{
if (!VirtualAlloc(p, pageSize, MEM_COMMIT, PAGE_READWRITE))
MOZ_CRASH();
ExceptionHandlerRecord* r = reinterpret_cast<ExceptionHandlerRecord*>(p);
// All these fields are specified to be offsets from the base of the
// executable code (which is 'p'), even if they have 'Address' in their
// names. In particular, exceptionHandler is a ULONG offset which is a
// 32-bit integer. Since 'p' can be farther than INT32_MAX away from
// sJitExceptionHandler, we must generate a little thunk inside the
// record. The record is put on its own page so that we can take away write
// access to protect against accidental clobbering.
r->runtimeFunction.BeginAddress = pageSize;
r->runtimeFunction.EndAddress = (DWORD)bytes;
r->runtimeFunction.UnwindData = offsetof(ExceptionHandlerRecord, unwindInfo);
r->unwindInfo.version = 1;
r->unwindInfo.flags = UNW_FLAG_EHANDLER;
r->unwindInfo.sizeOfPrologue = 0;
r->unwindInfo.countOfUnwindCodes = 0;
r->unwindInfo.frameRegister = 0;
r->unwindInfo.frameOffset = 0;
r->unwindInfo.exceptionHandler = offsetof(ExceptionHandlerRecord, thunk);
// mov imm64, rax
r->thunk[0] = 0x48;
r->thunk[1] = 0xb8;
void* handler = JS_FUNC_TO_DATA_PTR(void*, ExceptionHandler);
memcpy(&r->thunk[2], &handler, 8);
// jmp rax
r->thunk[10] = 0xff;
r->thunk[11] = 0xe0;
DWORD oldProtect;
if (!VirtualProtect(p, pageSize, PAGE_EXECUTE_READ, &oldProtect))
MOZ_CRASH();
// XXX NB: The profiler believes this function is only called from the main
// thread. If that ever becomes untrue, the profiler must be updated
// immediately.
AcquireStackWalkWorkaroundLock();
bool success = RtlAddFunctionTable(&r->runtimeFunction, 1, reinterpret_cast<DWORD64>(p));
ReleaseStackWalkWorkaroundLock();
return success;
}
static void
UnregisterExecutableMemory(void* p, size_t bytes, size_t pageSize)
{
ExceptionHandlerRecord* r = reinterpret_cast<ExceptionHandlerRecord*>(p);
// XXX NB: The profiler believes this function is only called from the main
// thread. If that ever becomes untrue, the profiler must be updated
// immediately.
AcquireStackWalkWorkaroundLock();
RtlDeleteFunctionTable(&r->runtimeFunction);
ReleaseStackWalkWorkaroundLock();
}
# endif
static void*
ReserveProcessExecutableMemory(size_t bytes)
{
# ifdef HAVE_64BIT_BUILD
size_t pageSize = gc::SystemPageSize();
if (sJitExceptionHandler)
bytes += pageSize;
# endif
void* p = nullptr;
for (size_t i = 0; i < 10; i++) {
void* randomAddr = ComputeRandomAllocationAddress();
p = VirtualAlloc(randomAddr, bytes, MEM_RESERVE, PAGE_NOACCESS);
if (p)
break;
}
if (!p) {
// Try again without randomization.
p = VirtualAlloc(nullptr, bytes, MEM_RESERVE, PAGE_NOACCESS);
if (!p)
return nullptr;
}
# ifdef HAVE_64BIT_BUILD
if (sJitExceptionHandler) {
if (!RegisterExecutableMemory(p, bytes, pageSize)) {
VirtualFree(p, 0, MEM_RELEASE);
return nullptr;
}
p = (uint8_t*)p + pageSize;
}
# endif
return p;
}
static void
DeallocateProcessExecutableMemory(void* addr, size_t bytes)
{
# ifdef HAVE_64BIT_BUILD
if (sJitExceptionHandler) {
size_t pageSize = gc::SystemPageSize();
addr = (uint8_t*)addr - pageSize;
UnregisterExecutableMemory(addr, bytes, pageSize);
}
# endif
VirtualFree(addr, 0, MEM_RELEASE);
}
static DWORD
ProtectionSettingToFlags(ProtectionSetting protection)
{
switch (protection) {
case ProtectionSetting::Protected: return PAGE_NOACCESS;
case ProtectionSetting::Writable: return PAGE_READWRITE;
case ProtectionSetting::Executable: return PAGE_EXECUTE_READ;
}
MOZ_CRASH();
}
static void
CommitPages(void* addr, size_t bytes, ProtectionSetting protection)
{
if (!VirtualAlloc(addr, bytes, MEM_COMMIT, ProtectionSettingToFlags(protection)))
MOZ_CRASH("CommitPages failed");
}
static void
DecommitPages(void* addr, size_t bytes)
{
if (!VirtualFree(addr, bytes, MEM_DECOMMIT))
MOZ_CRASH("DecommitPages failed");
}
#else // !XP_WIN
static void*
ComputeRandomAllocationAddress()
{
uint64_t rand = js::GenerateRandomSeed();
# ifdef HAVE_64BIT_BUILD
// x64 CPUs have a 48-bit address space and on some platforms the OS will
// give us access to 47 bits, so to be safe we right shift by 18 to leave
// 46 bits.
rand >>= 18;
# else
// On 32-bit, right shift by 34 to leave 30 bits, range [0, 1GiB). Then add
// 512MiB to get range [512MiB, 1.5GiB), or [0x20000000, 0x60000000). This
// is based on V8 comments in platform-posix.cc saying this range is
// relatively unpopulated across a variety of kernels.
rand >>= 34;
rand += 512 * 1024 * 1024;
# endif
// Ensure page alignment.
uintptr_t mask = ~uintptr_t(gc::SystemPageSize() - 1);
return (void*) uintptr_t(rand & mask);
}
static void*
ReserveProcessExecutableMemory(size_t bytes)
{
// Note that randomAddr is just a hint: if the address is not available
// mmap will pick a different address.
void* randomAddr = ComputeRandomAllocationAddress();
void* p = MozTaggedAnonymousMmap(randomAddr, bytes, PROT_NONE, MAP_PRIVATE | MAP_ANON,
-1, 0, "js-executable-memory");
if (p == MAP_FAILED)
return nullptr;
return p;
}
static void
DeallocateProcessExecutableMemory(void* addr, size_t bytes)
{
mozilla::DebugOnly<int> result = munmap(addr, bytes);
MOZ_ASSERT(!result || errno == ENOMEM);
}
static unsigned
ProtectionSettingToFlags(ProtectionSetting protection)
{
switch (protection) {
case ProtectionSetting::Protected: return PROT_NONE;
case ProtectionSetting::Writable: return PROT_READ | PROT_WRITE;
case ProtectionSetting::Executable: return PROT_READ | PROT_EXEC;
}
MOZ_CRASH();
}
static void
CommitPages(void* addr, size_t bytes, ProtectionSetting protection)
{
void* p = MozTaggedAnonymousMmap(addr, bytes, ProtectionSettingToFlags(protection),
MAP_FIXED | MAP_PRIVATE | MAP_ANON,
-1, 0, "js-executable-memory");
MOZ_RELEASE_ASSERT(addr == p);
}
static void
DecommitPages(void* addr, size_t bytes)
{
// Use mmap with MAP_FIXED and PROT_NONE. Inspired by jemalloc's
// pages_decommit.
void* p = MozTaggedAnonymousMmap(addr, bytes, PROT_NONE,
MAP_FIXED | MAP_PRIVATE | MAP_ANON,
-1, 0, "js-executable-memory");
MOZ_RELEASE_ASSERT(addr == p);
}
#endif
template <size_t NumBits>
class PageBitSet
{
using WordType = uint32_t;
static const size_t BitsPerWord = sizeof(WordType) * 8;
static_assert((NumBits % BitsPerWord) == 0,
"NumBits must be a multiple of BitsPerWord");
static const size_t NumWords = NumBits / BitsPerWord;
mozilla::Array<WordType, NumWords> words_;
uint32_t indexToWord(uint32_t index) const {
MOZ_ASSERT(index < NumBits);
return index / BitsPerWord;
}
WordType indexToBit(uint32_t index) const {
MOZ_ASSERT(index < NumBits);
return WordType(1) << (index % BitsPerWord);
}
public:
void init() {
mozilla::PodArrayZero(words_);
}
bool contains(size_t index) const {
uint32_t word = indexToWord(index);
return words_[word] & indexToBit(index);
}
void insert(size_t index) {
MOZ_ASSERT(!contains(index));
uint32_t word = indexToWord(index);
words_[word] |= indexToBit(index);
}
void remove(size_t index) {
MOZ_ASSERT(contains(index));
uint32_t word = indexToWord(index);
words_[word] &= ~indexToBit(index);
}
#ifdef DEBUG
bool empty() const {
for (size_t i = 0; i < NumWords; i++) {
if (words_[i] != 0)
return false;
}
return true;
}
#endif
};
// Limit on the number of bytes of executable memory to prevent JIT spraying
// attacks.
#if JS_BITS_PER_WORD == 32
static const size_t MaxCodeBytesPerProcess = 128 * 1024 * 1024;
#else
static const size_t MaxCodeBytesPerProcess = 1 * 1024 * 1024 * 1024;
#endif
// Per-process executable memory allocator. It reserves a block of memory of
// MaxCodeBytesPerProcess bytes, then allocates/deallocates pages from that.
//
// This has a number of benefits compared to raw mmap/VirtualAlloc:
//
// * More resillient against certain attacks.
//
// * Behaves more consistently across platforms: it avoids the 64K granularity
// issues on Windows, for instance.
//
// * On x64, near jumps can be used for jumps to other JIT pages.
//
// * On Win64, we have to register the exception handler only once (at process
// startup). This saves some memory and avoids RtlAddFunctionTable profiler
// deadlocks.
class ProcessExecutableMemory
{
static_assert((MaxCodeBytesPerProcess % ExecutableCodePageSize) == 0,
"MaxCodeBytesPerProcess must be a multiple of ExecutableCodePageSize");
static const size_t MaxCodePages = MaxCodeBytesPerProcess / ExecutableCodePageSize;
// Start of the MaxCodeBytesPerProcess memory block or nullptr if
// uninitialized. Note that this is NOT guaranteed to be aligned to
// ExecutableCodePageSize.
uint8_t* base_;
// The fields below should only be accessed while we hold the lock.
Mutex lock_;
// pagesAllocated_ is an Atomic so that bytesAllocated does not have to
// take the lock.
mozilla::Atomic<size_t, mozilla::ReleaseAcquire> pagesAllocated_;
// Page where we should try to allocate next.
size_t cursor_;
mozilla::Maybe<mozilla::non_crypto::XorShift128PlusRNG> rng_;
PageBitSet<MaxCodePages> pages_;
public:
ProcessExecutableMemory()
: base_(nullptr),
lock_(mutexid::ProcessExecutableRegion),
pagesAllocated_(0),
cursor_(0),
rng_(),
pages_()
{}
MOZ_MUST_USE bool init() {
pages_.init();
MOZ_RELEASE_ASSERT(!initialized());
MOZ_RELEASE_ASSERT(gc::SystemPageSize() <= ExecutableCodePageSize);
void* p = ReserveProcessExecutableMemory(MaxCodeBytesPerProcess);
if (!p)
return false;
base_ = static_cast<uint8_t*>(p);
mozilla::Array<uint64_t, 2> seed;
GenerateXorShift128PlusSeed(seed);
rng_.emplace(seed[0], seed[1]);
return true;
}
bool initialized() const {
return base_ != nullptr;
}
size_t bytesAllocated() const {
MOZ_ASSERT(pagesAllocated_ <= MaxCodePages);
return pagesAllocated_ * ExecutableCodePageSize;
}
void release() {
MOZ_ASSERT(initialized());
MOZ_ASSERT(pages_.empty());
MOZ_ASSERT(pagesAllocated_ == 0);
DeallocateProcessExecutableMemory(base_, MaxCodeBytesPerProcess);
base_ = nullptr;
rng_.reset();
MOZ_ASSERT(!initialized());
}
void assertValidAddress(void* p, size_t bytes) const {
MOZ_RELEASE_ASSERT(p >= base_ &&
uintptr_t(p) + bytes <= uintptr_t(base_) + MaxCodeBytesPerProcess);
}
void* allocate(size_t bytes, ProtectionSetting protection);
void deallocate(void* addr, size_t bytes);
};
void*
ProcessExecutableMemory::allocate(size_t bytes, ProtectionSetting protection)
{
MOZ_ASSERT(initialized());
MOZ_ASSERT(bytes > 0);
MOZ_ASSERT((bytes % ExecutableCodePageSize) == 0);
size_t numPages = bytes / ExecutableCodePageSize;
// Take the lock and try to allocate.
void* p = nullptr;
{
LockGuard<Mutex> guard(lock_);
MOZ_ASSERT(pagesAllocated_ <= MaxCodePages);
// Check if we have enough pages available.
if (pagesAllocated_ + numPages >= MaxCodePages)
return nullptr;
MOZ_ASSERT(bytes <= MaxCodeBytesPerProcess);
// Maybe skip a page to make allocations less predictable.
size_t page = cursor_ + (rng_.ref().next() % 2);
for (size_t i = 0; i < MaxCodePages; i++) {
// Make sure page + numPages - 1 is a valid index.
if (page + numPages > MaxCodePages)
page = 0;
bool available = true;
for (size_t j = 0; j < numPages; j++) {
if (pages_.contains(page + j)) {
available = false;
break;
}
}
if (!available) {
page++;
continue;
}
// Mark the pages as unavailable.
for (size_t j = 0; j < numPages; j++)
pages_.insert(page + j);
pagesAllocated_ += numPages;
MOZ_ASSERT(pagesAllocated_ <= MaxCodePages);
// If we allocated a small number of pages, move cursor_ to the
// next page. We don't do this for larger allocations to avoid
// skipping a large number of small holes.
if (numPages <= 2)
cursor_ = page + numPages;
p = base_ + page * ExecutableCodePageSize;
break;
}
if (!p)
return nullptr;
}
// Commit the pages after releasing the lock.
CommitPages(p, bytes, protection);
return p;
}
void
ProcessExecutableMemory::deallocate(void* addr, size_t bytes)
{
MOZ_ASSERT(initialized());
MOZ_ASSERT(addr);
MOZ_ASSERT((uintptr_t(addr) % gc::SystemPageSize()) == 0);
MOZ_ASSERT(bytes > 0);
MOZ_ASSERT((bytes % ExecutableCodePageSize) == 0);
assertValidAddress(addr, bytes);
size_t firstPage = (static_cast<uint8_t*>(addr) - base_) / ExecutableCodePageSize;
size_t numPages = bytes / ExecutableCodePageSize;
// Decommit before taking the lock.
DecommitPages(addr, bytes);
LockGuard<Mutex> guard(lock_);
MOZ_ASSERT(numPages <= pagesAllocated_);
pagesAllocated_ -= numPages;
for (size_t i = 0; i < numPages; i++)
pages_.remove(firstPage + i);
// Move the cursor back so we can reuse pages instead of fragmenting the
// whole region.
if (firstPage < cursor_)
cursor_ = firstPage;
}
static ProcessExecutableMemory execMemory;
void*
js::jit::AllocateExecutableMemory(size_t bytes, ProtectionSetting protection)
{
#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
// On MIPS, j/jal instructions to branch within the current
// 256 MB-aligned region.
void* allocation = nullptr;
js::Vector<void*, 8, SystemAllocPolicy> unused_maps;
for (;;) {
allocation = execMemory.allocate(bytes, protection);
if ((uintptr_t(allocation) >> 28) == (uintptr_t(allocation + bytes) >> 28))
break;
unused_maps.append(allocation);
}
for (size_t i = 0; i < unused_maps.length(); i++)
DeallocateExecutableMemory(unused_maps[i], bytes);
return allocation;
#else
return execMemory.allocate(bytes, protection);
#endif
}
void
js::jit::DeallocateExecutableMemory(void* addr, size_t bytes)
{
execMemory.deallocate(addr, bytes);
}
bool
js::jit::InitProcessExecutableMemory()
{
return execMemory.init();
}
void
js::jit::ReleaseProcessExecutableMemory()
{
execMemory.release();
}
bool
js::jit::CanLikelyAllocateMoreExecutableMemory()
{
// Use a 16 MB buffer.
static const size_t BufferSize = 16 * 1024 * 1024;
MOZ_ASSERT(execMemory.bytesAllocated() <= MaxCodeBytesPerProcess);
return execMemory.bytesAllocated() + BufferSize <= MaxCodeBytesPerProcess;
}
bool
js::jit::ReprotectRegion(void* start, size_t size, ProtectionSetting protection)
{
// Calculate the start of the page containing this region,
// and account for this extra memory within size.
size_t pageSize = gc::SystemPageSize();
intptr_t startPtr = reinterpret_cast<intptr_t>(start);
intptr_t pageStartPtr = startPtr & ~(pageSize - 1);
void* pageStart = reinterpret_cast<void*>(pageStartPtr);
size += (startPtr - pageStartPtr);
// Round size up
size += (pageSize - 1);
size &= ~(pageSize - 1);
MOZ_ASSERT((uintptr_t(pageStart) % pageSize) == 0);
execMemory.assertValidAddress(pageStart, size);
#ifdef XP_WIN
DWORD oldProtect;
DWORD flags = ProtectionSettingToFlags(protection);
if (!VirtualProtect(pageStart, size, flags, &oldProtect))
return false;
#else
unsigned flags = ProtectionSettingToFlags(protection);
if (mprotect(pageStart, size, flags))
return false;
#endif
execMemory.assertValidAddress(pageStart, size);
return true;
}