648 lines
17 KiB
C++
648 lines
17 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
|
* vim: set ts=8 sts=4 et sw=4 tw=99:
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "vm/Stopwatch.h"
|
|
|
|
#include "mozilla/ArrayUtils.h"
|
|
#include "mozilla/IntegerTypeTraits.h"
|
|
#include "mozilla/Unused.h"
|
|
|
|
#if defined(XP_WIN)
|
|
#include <processthreadsapi.h>
|
|
#include <windows.h>
|
|
#endif // defined(XP_WIN)
|
|
|
|
#include "jscompartment.h"
|
|
|
|
#include "gc/Zone.h"
|
|
#include "vm/Runtime.h"
|
|
|
|
|
|
namespace js {
|
|
|
|
bool
|
|
PerformanceMonitoring::addRecentGroup(PerformanceGroup* group)
|
|
{
|
|
if (group->isUsedInThisIteration())
|
|
return true;
|
|
|
|
group->setIsUsedInThisIteration(true);
|
|
return recentGroups_.append(group);
|
|
}
|
|
|
|
void
|
|
PerformanceMonitoring::reset()
|
|
{
|
|
// All ongoing measures are dependent on the current iteration#.
|
|
// By incrementing it, we mark all data as stale. Stale data will
|
|
// be overwritten progressively during the execution.
|
|
++iteration_;
|
|
recentGroups_.clear();
|
|
|
|
// Every so often, we will be rescheduled to another CPU. If this
|
|
// happens, we may end up with an entirely unsynchronized
|
|
// timestamp counter. If we do not reset
|
|
// `highestTimestampCounter_`, we could end up ignoring entirely
|
|
// valid sets of measures just because we are on a CPU that has a
|
|
// lower RDTSC.
|
|
highestTimestampCounter_ = 0;
|
|
}
|
|
|
|
void
|
|
PerformanceMonitoring::start()
|
|
{
|
|
if (!isMonitoringJank_)
|
|
return;
|
|
|
|
if (iteration_ == startedAtIteration_) {
|
|
// The stopwatch is already started for this iteration.
|
|
return;
|
|
}
|
|
|
|
startedAtIteration_ = iteration_;
|
|
if (stopwatchStartCallback)
|
|
stopwatchStartCallback(iteration_, stopwatchStartClosure);
|
|
}
|
|
|
|
// Commit the data that has been collected during the iteration
|
|
// into the actual `PerformanceData`.
|
|
//
|
|
// We use the proportion of cycles-spent-in-group over
|
|
// cycles-spent-in-toplevel-group as an approximation to allocate
|
|
// system (kernel) time and user (CPU) time to each group. Note
|
|
// that cycles are not an exact measure:
|
|
//
|
|
// 1. if the computer has gone to sleep, the clock may be reset to 0;
|
|
// 2. if the process is moved between CPUs/cores, it may end up on a CPU
|
|
// or core with an unsynchronized clock;
|
|
// 3. the mapping between clock cycles and walltime varies with the current
|
|
// frequency of the CPU;
|
|
// 4. other threads/processes using the same CPU will also increment
|
|
// the counter.
|
|
//
|
|
// ** Effect of 1. (computer going to sleep)
|
|
//
|
|
// We assume that this will happen very seldom. Since the final numbers
|
|
// are bounded by the CPU time and Kernel time reported by `getresources`,
|
|
// the effect will be contained to a single iteration of the event loop.
|
|
//
|
|
// ** Effect of 2. (moving between CPUs/cores)
|
|
//
|
|
// On platforms that support it, we only measure the number of cycles
|
|
// if we start and end execution of a group on the same
|
|
// CPU/core. While there is a small window (a few cycles) during which
|
|
// the thread can be migrated without us noticing, we expect that this
|
|
// will happen rarely enough that this won't affect the statistics
|
|
// meaningfully.
|
|
//
|
|
// On other platforms, assuming that the probability of jumping
|
|
// between CPUs/cores during a given (real) cycle is constant, and
|
|
// that the distribution of differences between clocks is even, the
|
|
// probability that the number of cycles reported by a measure is
|
|
// modified by X cycles should be a gaussian distribution, with groups
|
|
// with longer execution having a larger amplitude than groups with
|
|
// shorter execution. Since we discard measures that result in a
|
|
// negative number of cycles, this distribution is actually skewed
|
|
// towards over-estimating the number of cycles of groups that already
|
|
// have many cycles and under-estimating the number of cycles that
|
|
// already have fewer cycles.
|
|
//
|
|
// Since the final numbers are bounded by the CPU time and Kernel time
|
|
// reported by `getresources`, we accept this bias.
|
|
//
|
|
// ** Effect of 3. (mapping between clock cycles and walltime)
|
|
//
|
|
// Assuming that this is evenly distributed, we expect that this will
|
|
// eventually balance out.
|
|
//
|
|
// ** Effect of 4. (cycles increase with system activity)
|
|
//
|
|
// Assuming that, within an iteration of the event loop, this happens
|
|
// unformly over time, this will skew towards over-estimating the number
|
|
// of cycles of groups that already have many cycles and under-estimating
|
|
// the number of cycles that already have fewer cycles.
|
|
//
|
|
// Since the final numbers are bounded by the CPU time and Kernel time
|
|
// reported by `getresources`, we accept this bias.
|
|
//
|
|
// ** Big picture
|
|
//
|
|
// Computing the number of cycles is fast and should be accurate
|
|
// enough in practice. Alternatives (such as calling `getresources`
|
|
// all the time or sampling from another thread) are very expensive
|
|
// in system calls and/or battery and not necessarily more accurate.
|
|
bool
|
|
PerformanceMonitoring::commit()
|
|
{
|
|
// Maximal initialization size, in elements for the vector of groups.
|
|
static const size_t MAX_GROUPS_INIT_CAPACITY = 1024;
|
|
|
|
#if !defined(MOZ_HAVE_RDTSC)
|
|
// The AutoStopwatch is only executed if `MOZ_HAVE_RDTSC`.
|
|
return false;
|
|
#endif // !defined(MOZ_HAVE_RDTSC)
|
|
|
|
if (!isMonitoringJank_) {
|
|
// Either we have not started monitoring or monitoring has
|
|
// been cancelled during the iteration.
|
|
return true;
|
|
}
|
|
|
|
if (startedAtIteration_ != iteration_) {
|
|
// No JS code has been monitored during this iteration.
|
|
return true;
|
|
}
|
|
|
|
// The move operation is generally constant time, unless
|
|
// `recentGroups_.length()` is very small, in which case
|
|
// it's fast just because it's small.
|
|
PerformanceGroupVector recentGroups(Move(recentGroups_));
|
|
recentGroups_ = PerformanceGroupVector(); // Reconstruct after `Move`.
|
|
|
|
bool success = true;
|
|
if (stopwatchCommitCallback)
|
|
success = stopwatchCommitCallback(iteration_, recentGroups, stopwatchCommitClosure);
|
|
|
|
// Heuristic: we expect to have roughly the same number of groups as in
|
|
// the previous iteration.
|
|
const size_t capacity = recentGroups.capacity() < MAX_GROUPS_INIT_CAPACITY ?
|
|
recentGroups.capacity() :
|
|
MAX_GROUPS_INIT_CAPACITY;
|
|
success = recentGroups_.reserve(capacity)
|
|
&& success;
|
|
|
|
// Reset immediately, to make sure that we're not hit by the end
|
|
// of a nested event loop (which would cause `commit` to be called
|
|
// twice in succession).
|
|
reset();
|
|
return success;
|
|
}
|
|
|
|
uint64_t
|
|
PerformanceMonitoring::monotonicReadTimestampCounter()
|
|
{
|
|
#if defined(MOZ_HAVE_RDTSC)
|
|
const uint64_t hardware = ReadTimestampCounter();
|
|
if (highestTimestampCounter_ < hardware)
|
|
highestTimestampCounter_ = hardware;
|
|
return highestTimestampCounter_;
|
|
#else
|
|
return 0;
|
|
#endif // defined(MOZ_HAVE_RDTSC)
|
|
}
|
|
|
|
void
|
|
PerformanceMonitoring::dispose(JSRuntime* rt)
|
|
{
|
|
reset();
|
|
for (CompartmentsIter c(rt, SkipAtoms); !c.done(); c.next()) {
|
|
c->performanceMonitoring.unlink();
|
|
}
|
|
}
|
|
|
|
PerformanceGroupHolder::~PerformanceGroupHolder()
|
|
{
|
|
unlink();
|
|
}
|
|
|
|
void
|
|
PerformanceGroupHolder::unlink()
|
|
{
|
|
initialized_ = false;
|
|
groups_.clear();
|
|
}
|
|
|
|
const PerformanceGroupVector*
|
|
PerformanceGroupHolder::getGroups(JSContext* cx)
|
|
{
|
|
if (initialized_)
|
|
return &groups_;
|
|
|
|
if (!runtime_->performanceMonitoring.getGroupsCallback)
|
|
return nullptr;
|
|
|
|
if (!runtime_->performanceMonitoring.getGroupsCallback(cx, groups_, runtime_->performanceMonitoring.getGroupsClosure))
|
|
return nullptr;
|
|
|
|
initialized_ = true;
|
|
return &groups_;
|
|
}
|
|
|
|
AutoStopwatch::AutoStopwatch(JSContext* cx MOZ_GUARD_OBJECT_NOTIFIER_PARAM_IN_IMPL)
|
|
: cx_(cx)
|
|
, iteration_(0)
|
|
, isMonitoringJank_(false)
|
|
, isMonitoringCPOW_(false)
|
|
, cyclesStart_(0)
|
|
, CPOWTimeStart_(0)
|
|
{
|
|
MOZ_GUARD_OBJECT_NOTIFIER_INIT;
|
|
|
|
JSCompartment* compartment = cx_->compartment();
|
|
if (MOZ_UNLIKELY(compartment->scheduledForDestruction))
|
|
return;
|
|
|
|
JSRuntime* runtime = cx_->runtime();
|
|
iteration_ = runtime->performanceMonitoring.iteration();
|
|
|
|
const PerformanceGroupVector* groups = compartment->performanceMonitoring.getGroups(cx);
|
|
if (!groups) {
|
|
// Either the embedding has not provided any performance
|
|
// monitoring logistics or there was an error that prevents
|
|
// performance monitoring.
|
|
return;
|
|
}
|
|
for (auto group = groups->begin(); group < groups->end(); group++) {
|
|
auto acquired = acquireGroup(*group);
|
|
if (acquired) {
|
|
if (!groups_.append(acquired))
|
|
MOZ_CRASH();
|
|
}
|
|
}
|
|
if (groups_.length() == 0) {
|
|
// We are not in charge of monitoring anything.
|
|
return;
|
|
}
|
|
|
|
// Now that we are sure that JS code is being executed,
|
|
// initialize the stopwatch for this iteration, lazily.
|
|
runtime->performanceMonitoring.start();
|
|
enter();
|
|
}
|
|
|
|
AutoStopwatch::~AutoStopwatch()
|
|
{
|
|
if (groups_.length() == 0) {
|
|
// We are not in charge of monitoring anything.
|
|
return;
|
|
}
|
|
|
|
JSCompartment* compartment = cx_->compartment();
|
|
if (MOZ_UNLIKELY(compartment->scheduledForDestruction))
|
|
return;
|
|
|
|
JSRuntime* runtime = cx_->runtime();
|
|
if (MOZ_UNLIKELY(iteration_ != runtime->performanceMonitoring.iteration())) {
|
|
// We have entered a nested event loop at some point.
|
|
// Any information we may have is obsolete.
|
|
return;
|
|
}
|
|
|
|
mozilla::Unused << exit(); // Sadly, there is nothing we can do about an error at this point.
|
|
|
|
for (auto group = groups_.begin(); group < groups_.end(); group++)
|
|
releaseGroup(*group);
|
|
}
|
|
|
|
void
|
|
AutoStopwatch::enter()
|
|
{
|
|
JSRuntime* runtime = cx_->runtime();
|
|
|
|
if (runtime->performanceMonitoring.isMonitoringCPOW()) {
|
|
CPOWTimeStart_ = runtime->performanceMonitoring.totalCPOWTime;
|
|
isMonitoringCPOW_ = true;
|
|
}
|
|
|
|
if (runtime->performanceMonitoring.isMonitoringJank()) {
|
|
cyclesStart_ = this->getCycles(runtime);
|
|
cpuStart_ = this->getCPU();
|
|
isMonitoringJank_ = true;
|
|
}
|
|
}
|
|
|
|
bool
|
|
AutoStopwatch::exit()
|
|
{
|
|
JSRuntime* runtime = cx_->runtime();
|
|
|
|
uint64_t cyclesDelta = 0;
|
|
if (isMonitoringJank_ && runtime->performanceMonitoring.isMonitoringJank()) {
|
|
// We were monitoring jank when we entered and we still are.
|
|
|
|
// If possible, discard results when we don't end on the
|
|
// same CPU as we started. Note that we can be
|
|
// rescheduled to another CPU beween `getCycles()` and
|
|
// `getCPU()`. We hope that this will happen rarely
|
|
// enough that the impact on our statistics will remain
|
|
// limited.
|
|
const cpuid_t cpuEnd = this->getCPU();
|
|
if (isSameCPU(cpuStart_, cpuEnd)) {
|
|
const uint64_t cyclesEnd = getCycles(runtime);
|
|
cyclesDelta = cyclesEnd - cyclesStart_; // Always >= 0 by definition of `getCycles`.
|
|
}
|
|
}
|
|
|
|
uint64_t CPOWTimeDelta = 0;
|
|
if (isMonitoringCPOW_ && runtime->performanceMonitoring.isMonitoringCPOW()) {
|
|
// We were monitoring CPOW when we entered and we still are.
|
|
const uint64_t CPOWTimeEnd = runtime->performanceMonitoring.totalCPOWTime;
|
|
CPOWTimeDelta = getDelta(CPOWTimeEnd, CPOWTimeStart_);
|
|
}
|
|
return addToGroups(cyclesDelta, CPOWTimeDelta);
|
|
}
|
|
|
|
PerformanceGroup*
|
|
AutoStopwatch::acquireGroup(PerformanceGroup* group)
|
|
{
|
|
MOZ_ASSERT(group);
|
|
|
|
if (group->isAcquired(iteration_))
|
|
return nullptr;
|
|
|
|
if (!group->isActive())
|
|
return nullptr;
|
|
|
|
group->acquire(iteration_, this);
|
|
return group;
|
|
}
|
|
|
|
void
|
|
AutoStopwatch::releaseGroup(PerformanceGroup* group)
|
|
{
|
|
MOZ_ASSERT(group);
|
|
group->release(iteration_, this);
|
|
}
|
|
|
|
bool
|
|
AutoStopwatch::addToGroups(uint64_t cyclesDelta, uint64_t CPOWTimeDelta)
|
|
{
|
|
JSRuntime* runtime = cx_->runtime();
|
|
|
|
for (auto group = groups_.begin(); group < groups_.end(); ++group) {
|
|
if (!addToGroup(runtime, cyclesDelta, CPOWTimeDelta, *group))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
AutoStopwatch::addToGroup(JSRuntime* runtime, uint64_t cyclesDelta, uint64_t CPOWTimeDelta, PerformanceGroup* group)
|
|
{
|
|
MOZ_ASSERT(group);
|
|
MOZ_ASSERT(group->isAcquired(iteration_, this));
|
|
|
|
if (!runtime->performanceMonitoring.addRecentGroup(group))
|
|
return false;
|
|
group->addRecentTicks(iteration_, 1);
|
|
group->addRecentCycles(iteration_, cyclesDelta);
|
|
group->addRecentCPOW(iteration_, CPOWTimeDelta);
|
|
return true;
|
|
}
|
|
|
|
uint64_t
|
|
AutoStopwatch::getDelta(const uint64_t end, const uint64_t start) const
|
|
{
|
|
if (start >= end)
|
|
return 0;
|
|
return end - start;
|
|
}
|
|
|
|
uint64_t
|
|
AutoStopwatch::getCycles(JSRuntime* runtime) const
|
|
{
|
|
return runtime->performanceMonitoring.monotonicReadTimestampCounter();
|
|
}
|
|
|
|
cpuid_t inline
|
|
AutoStopwatch::getCPU() const
|
|
{
|
|
#if defined(XP_WIN) && WINVER >= _WIN32_WINNT_VISTA
|
|
PROCESSOR_NUMBER proc;
|
|
GetCurrentProcessorNumberEx(&proc);
|
|
|
|
cpuid_t result(proc.Group, proc.Number);
|
|
return result;
|
|
#else
|
|
return {};
|
|
#endif // defined(XP_WIN)
|
|
}
|
|
|
|
bool inline
|
|
AutoStopwatch::isSameCPU(const cpuid_t& a, const cpuid_t& b) const
|
|
{
|
|
#if defined(XP_WIN) && WINVER >= _WIN32_WINNT_VISTA
|
|
return a.group_ == b.group_ && a.number_ == b.number_;
|
|
#else
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
PerformanceGroup::PerformanceGroup()
|
|
: recentCycles_(0)
|
|
, recentTicks_(0)
|
|
, recentCPOW_(0)
|
|
, iteration_(0)
|
|
, isActive_(false)
|
|
, isUsedInThisIteration_(false)
|
|
, owner_(nullptr)
|
|
, refCount_(0)
|
|
{ }
|
|
|
|
uint64_t
|
|
PerformanceGroup::iteration() const
|
|
{
|
|
return iteration_;
|
|
}
|
|
|
|
|
|
bool
|
|
PerformanceGroup::isAcquired(uint64_t it) const
|
|
{
|
|
return owner_ != nullptr && iteration_ == it;
|
|
}
|
|
|
|
bool
|
|
PerformanceGroup::isAcquired(uint64_t it, const AutoStopwatch* owner) const
|
|
{
|
|
return owner_ == owner && iteration_ == it;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::acquire(uint64_t it, const AutoStopwatch* owner)
|
|
{
|
|
if (iteration_ != it) {
|
|
// Any data that pretends to be recent is actually bound
|
|
// to an older iteration and therefore stale.
|
|
resetRecentData();
|
|
}
|
|
iteration_ = it;
|
|
owner_ = owner;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::release(uint64_t it, const AutoStopwatch* owner)
|
|
{
|
|
if (iteration_ != it)
|
|
return;
|
|
|
|
MOZ_ASSERT(owner == owner_ || owner_ == nullptr);
|
|
owner_ = nullptr;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::resetRecentData()
|
|
{
|
|
recentCycles_ = 0;
|
|
recentTicks_ = 0;
|
|
recentCPOW_ = 0;
|
|
isUsedInThisIteration_ = false;
|
|
}
|
|
|
|
|
|
uint64_t
|
|
PerformanceGroup::recentCycles(uint64_t iteration) const
|
|
{
|
|
MOZ_ASSERT(iteration == iteration_);
|
|
return recentCycles_;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::addRecentCycles(uint64_t iteration, uint64_t cycles)
|
|
{
|
|
MOZ_ASSERT(iteration == iteration_);
|
|
recentCycles_ += cycles;
|
|
}
|
|
|
|
uint64_t
|
|
PerformanceGroup::recentTicks(uint64_t iteration) const
|
|
{
|
|
MOZ_ASSERT(iteration == iteration_);
|
|
return recentTicks_;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::addRecentTicks(uint64_t iteration, uint64_t ticks)
|
|
{
|
|
MOZ_ASSERT(iteration == iteration_);
|
|
recentTicks_ += ticks;
|
|
}
|
|
|
|
|
|
uint64_t
|
|
PerformanceGroup::recentCPOW(uint64_t iteration) const
|
|
{
|
|
MOZ_ASSERT(iteration == iteration_);
|
|
return recentCPOW_;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::addRecentCPOW(uint64_t iteration, uint64_t CPOW)
|
|
{
|
|
MOZ_ASSERT(iteration == iteration_);
|
|
recentCPOW_ += CPOW;
|
|
}
|
|
|
|
|
|
bool
|
|
PerformanceGroup::isActive() const
|
|
{
|
|
return isActive_;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::setIsActive(bool value)
|
|
{
|
|
isActive_ = value;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::setIsUsedInThisIteration(bool value)
|
|
{
|
|
isUsedInThisIteration_ = value;
|
|
}
|
|
bool
|
|
PerformanceGroup::isUsedInThisIteration() const
|
|
{
|
|
return isUsedInThisIteration_;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::AddRef()
|
|
{
|
|
++refCount_;
|
|
}
|
|
|
|
void
|
|
PerformanceGroup::Release()
|
|
{
|
|
MOZ_ASSERT(refCount_ > 0);
|
|
--refCount_;
|
|
if (refCount_ > 0)
|
|
return;
|
|
|
|
this->Delete();
|
|
}
|
|
|
|
JS_PUBLIC_API(bool)
|
|
SetStopwatchStartCallback(JSContext* cx, StopwatchStartCallback cb, void* closure)
|
|
{
|
|
cx->performanceMonitoring.setStopwatchStartCallback(cb, closure);
|
|
return true;
|
|
}
|
|
|
|
JS_PUBLIC_API(bool)
|
|
SetStopwatchCommitCallback(JSContext* cx, StopwatchCommitCallback cb, void* closure)
|
|
{
|
|
cx->performanceMonitoring.setStopwatchCommitCallback(cb, closure);
|
|
return true;
|
|
}
|
|
|
|
JS_PUBLIC_API(bool)
|
|
SetGetPerformanceGroupsCallback(JSContext* cx, GetGroupsCallback cb, void* closure)
|
|
{
|
|
cx->performanceMonitoring.setGetGroupsCallback(cb, closure);
|
|
return true;
|
|
}
|
|
|
|
JS_PUBLIC_API(bool)
|
|
FlushPerformanceMonitoring(JSContext* cx)
|
|
{
|
|
return cx->performanceMonitoring.commit();
|
|
}
|
|
JS_PUBLIC_API(void)
|
|
ResetPerformanceMonitoring(JSContext* cx)
|
|
{
|
|
return cx->performanceMonitoring.reset();
|
|
}
|
|
JS_PUBLIC_API(void)
|
|
DisposePerformanceMonitoring(JSContext* cx)
|
|
{
|
|
return cx->performanceMonitoring.dispose(cx);
|
|
}
|
|
|
|
JS_PUBLIC_API(bool)
|
|
SetStopwatchIsMonitoringJank(JSContext* cx, bool value)
|
|
{
|
|
return cx->performanceMonitoring.setIsMonitoringJank(value);
|
|
}
|
|
JS_PUBLIC_API(bool)
|
|
GetStopwatchIsMonitoringJank(JSContext* cx)
|
|
{
|
|
return cx->performanceMonitoring.isMonitoringJank();
|
|
}
|
|
|
|
JS_PUBLIC_API(bool)
|
|
SetStopwatchIsMonitoringCPOW(JSContext* cx, bool value)
|
|
{
|
|
return cx->performanceMonitoring.setIsMonitoringCPOW(value);
|
|
}
|
|
JS_PUBLIC_API(bool)
|
|
GetStopwatchIsMonitoringCPOW(JSContext* cx)
|
|
{
|
|
return cx->performanceMonitoring.isMonitoringCPOW();
|
|
}
|
|
|
|
JS_PUBLIC_API(void)
|
|
AddCPOWPerformanceDelta(JSContext* cx, uint64_t delta)
|
|
{
|
|
cx->performanceMonitoring.totalCPOWTime += delta;
|
|
}
|
|
|
|
|
|
} // namespace js
|
|
|