Mypal/xpcom/string/nsReadableUtilsSSE2.cpp

70 lines
2.0 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <emmintrin.h>
#include "nsReadableUtilsImpl.h"
namespace mozilla {
namespace SSE2 {
static inline bool
is_zero (__m128i x)
{
return
_mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
}
int32_t
FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
{
const size_t kNumUnicharsPerVector = sizeof(__m128i) / sizeof(char16_t);
typedef NonASCIIParameters<sizeof(size_t)> p;
const size_t kMask = p::mask();
const uintptr_t kXmmAlignMask = 0xf;
const uint16_t kShortMask = 0xff80;
const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
const char16_t* idx = aBegin;
// Align ourselves to a 16-byte boundary as required by _mm_load_si128
for (; idx != aEnd && ((uintptr_t(idx) & kXmmAlignMask) != 0); idx++) {
if (!IsASCII(*idx)) {
return idx - aBegin;
}
}
// Check one XMM register (16 bytes) at a time.
const char16_t* vectWalkEnd = aligned(aEnd, kXmmAlignMask);
__m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(kShortMask));
for (; idx != vectWalkEnd; idx += kNumUnicharsPerVector) {
const __m128i vect = *reinterpret_cast<const __m128i*>(idx);
if (!is_zero(_mm_and_si128(vect, vectmask))) {
return idx - aBegin;
}
}
// Check one word at a time.
const char16_t* wordWalkEnd = aligned(aEnd, p::alignMask());
for(; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
const size_t word = *reinterpret_cast<const size_t*>(idx);
if (word & kMask) {
return idx - aBegin;
}
}
// Take care of the remainder one character at a time.
for (; idx != aEnd; idx++) {
if (!IsASCII(*idx)) {
return idx - aBegin;
}
}
return -1;
}
} // namespace SSE2
} // namespace mozilla