/**************************************************************************/ /* */ /* OCaml */ /* */ /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ /* */ /* Copyright 1996 Institut National de Recherche en Informatique et */ /* en Automatique. */ /* */ /* All rights reserved. This file is distributed under the terms of */ /* the GNU Lesser General Public License version 2.1, with the */ /* special exception on linking described in the file LICENSE. */ /* */ /**************************************************************************/ #define CAML_INTERNALS /* The generic hashing primitive */ /* The interface of this file is in "mlvalues.h" (for [caml_hash_variant]) and in "hash.h" (for the other exported functions). */ #include "caml/mlvalues.h" #include "caml/custom.h" #include "caml/memory.h" #include "caml/hash.h" /* The implementation based on MurmurHash 3, https://github.com/aappleby/smhasher/ */ #define ROTL32(x,n) ((x) << n | (x) >> (32-n)) #define MIX(h,d) \ d *= 0xcc9e2d51; \ d = ROTL32(d, 15); \ d *= 0x1b873593; \ h ^= d; \ h = ROTL32(h, 13); \ h = h * 5 + 0xe6546b64; #define FINAL_MIX(h) \ h ^= h >> 16; \ h *= 0x85ebca6b; \ h ^= h >> 13; \ h *= 0xc2b2ae35; \ h ^= h >> 16; CAMLexport uint32_t caml_hash_mix_uint32(uint32_t h, uint32_t d) { MIX(h, d); return h; } /* Mix a platform-native integer. */ CAMLexport uint32_t caml_hash_mix_intnat(uint32_t h, intnat d) { uint32_t n; #ifdef ARCH_SIXTYFOUR /* Mix the low 32 bits and the high 32 bits, in a way that preserves 32/64 compatibility: we want n = (uint32_t) d if d is in the range [-2^31, 2^31-1]. */ n = (d >> 32) ^ (d >> 63) ^ d; /* If 0 <= d < 2^31: d >> 32 = 0 d >> 63 = 0 If -2^31 <= d < 0: d >> 32 = -1 d >> 63 = -1 In both cases, n = (uint32_t) d. */ #else n = d; #endif MIX(h, n); return h; } /* Mix a 64-bit integer. */ CAMLexport uint32_t caml_hash_mix_int64(uint32_t h, int64_t d) { uint32_t hi = (uint32_t) (d >> 32), lo = (uint32_t) d; MIX(h, lo); MIX(h, hi); return h; } /* Mix a double-precision float. Treats +0.0 and -0.0 identically. Treats all NaNs identically. */ CAMLexport uint32_t caml_hash_mix_double(uint32_t hash, double d) { union { double d; #if defined(ARCH_BIG_ENDIAN) || (defined(__arm__) && !defined(__ARM_EABI__)) struct { uint32_t h; uint32_t l; } i; #else struct { uint32_t l; uint32_t h; } i; #endif } u; uint32_t h, l; /* Convert to two 32-bit halves */ u.d = d; h = u.i.h; l = u.i.l; /* Normalize NaNs */ if ((h & 0x7FF00000) == 0x7FF00000 && (l | (h & 0xFFFFF)) != 0) { h = 0x7FF00000; l = 0x00000001; } /* Normalize -0 into +0 */ else if (h == 0x80000000 && l == 0) { h = 0; } MIX(hash, l); MIX(hash, h); return hash; } /* Mix a single-precision float. Treats +0.0 and -0.0 identically. Treats all NaNs identically. */ CAMLexport uint32_t caml_hash_mix_float(uint32_t hash, float d) { union { float f; uint32_t i; } u; uint32_t n; /* Convert to int32_t */ u.f = d; n = u.i; /* Normalize NaNs */ if ((n & 0x7F800000) == 0x7F800000 && (n & 0x007FFFFF) != 0) { n = 0x7F800001; } /* Normalize -0 into +0 */ else if (n == 0x80000000) { n = 0; } MIX(hash, n); return hash; } /* Mix an OCaml string */ CAMLexport uint32_t caml_hash_mix_string(uint32_t h, value s) { mlsize_t len = caml_string_length(s); mlsize_t i; uint32_t w; /* Mix by 32-bit blocks (little-endian) */ for (i = 0; i + 4 <= len; i += 4) { #ifdef ARCH_BIG_ENDIAN w = Byte_u(s, i) | (Byte_u(s, i+1) << 8) | (Byte_u(s, i+2) << 16) | (Byte_u(s, i+3) << 24); #else w = *((uint32_t *) &Byte_u(s, i)); #endif MIX(h, w); } /* Finish with up to 3 bytes */ w = 0; switch (len & 3) { case 3: w = Byte_u(s, i+2) << 16; /* fallthrough */ case 2: w |= Byte_u(s, i+1) << 8; /* fallthrough */ case 1: w |= Byte_u(s, i); MIX(h, w); default: /*skip*/; /* len & 3 == 0, no extra bytes, do nothing */ } /* Finally, mix in the length. Ignore the upper 32 bits, generally 0. */ h ^= (uint32_t) len; return h; } /* Maximal size of the queue used for breadth-first traversal. */ #define HASH_QUEUE_SIZE 256 /* Maximal number of Forward_tag links followed in one step */ #define MAX_FORWARD_DEREFERENCE 1000 /* The generic hash function */ CAMLprim value caml_hash(value count, value limit, value seed, value obj) { value queue[HASH_QUEUE_SIZE]; /* Queue of values to examine */ intnat rd; /* Position of first value in queue */ intnat wr; /* One past position of last value in queue */ intnat sz; /* Max number of values to put in queue */ intnat num; /* Max number of meaningful values to see */ uint32_t h; /* Rolling hash */ value v; mlsize_t i, len; sz = Long_val(limit); if (sz < 0 || sz > HASH_QUEUE_SIZE) sz = HASH_QUEUE_SIZE; num = Long_val(count); h = Int_val(seed); queue[0] = obj; rd = 0; wr = 1; while (rd < wr && num > 0) { v = queue[rd++]; again: if (Is_long(v)) { h = caml_hash_mix_intnat(h, v); num--; } else if (!Is_in_value_area(v)) { /* v is a pointer outside the heap, probably a code pointer. Shall we count it? Let's say yes by compatibility with old code. */ h = caml_hash_mix_intnat(h, v); num--; } else { switch (Tag_val(v)) { case String_tag: h = caml_hash_mix_string(h, v); num--; break; case Double_tag: h = caml_hash_mix_double(h, Double_val(v)); num--; break; case Double_array_tag: for (i = 0, len = Wosize_val(v) / Double_wosize; i < len; i++) { h = caml_hash_mix_double(h, Double_flat_field(v, i)); num--; if (num <= 0) break; } break; case Abstract_tag: /* Block contents unknown. Do nothing. */ break; case Infix_tag: /* Mix in the offset to distinguish different functions from the same mutually-recursive definition */ h = caml_hash_mix_uint32(h, Infix_offset_val(v)); v = v - Infix_offset_val(v); goto again; case Forward_tag: /* PR#6361: we can have a loop here, so limit the number of Forward_tag links being followed */ for (i = MAX_FORWARD_DEREFERENCE; i > 0; i--) { v = Forward_val(v); if (Is_long(v) || !Is_in_value_area(v) || Tag_val(v) != Forward_tag) goto again; } /* Give up on this object and move to the next */ break; case Object_tag: h = caml_hash_mix_intnat(h, Oid_val(v)); num--; break; case Custom_tag: /* If no hashing function provided, do nothing. */ /* Only use low 32 bits of custom hash, for 32/64 compatibility */ if (Custom_ops_val(v)->hash != NULL) { uint32_t n = (uint32_t) Custom_ops_val(v)->hash(v); h = caml_hash_mix_uint32(h, n); num--; } break; #ifdef NO_NAKED_POINTERS case Closure_tag: { mlsize_t startenv; len = Wosize_val(v); startenv = Start_env_closinfo(Closinfo_val(v)); CAMLassert (startenv <= len); /* Mix in the tag and size, but do not count this towards [num] */ h = caml_hash_mix_uint32(h, Whitehd_hd(Hd_val(v))); /* Mix the code pointers, closure info fields, and infix headers */ for (i = 0; i < startenv; i++) { h = caml_hash_mix_intnat(h, Field(v, i)); num--; } /* Copy environment fields into queue, not exceeding the total size [sz] */ for (/*nothing*/; i < len; i++) { if (wr >= sz) break; queue[wr++] = Field(v, i); } break; } #endif default: /* Mix in the tag and size, but do not count this towards [num] */ h = caml_hash_mix_uint32(h, Whitehd_hd(Hd_val(v))); /* Copy fields into queue, not exceeding the total size [sz] */ for (i = 0, len = Wosize_val(v); i < len; i++) { if (wr >= sz) break; queue[wr++] = Field(v, i); } break; } } } /* Final mixing of bits */ FINAL_MIX(h); /* Fold result to the range [0, 2^30-1] so that it is a nonnegative OCaml integer both on 32 and 64-bit platforms. */ return Val_int(h & 0x3FFFFFFFU); } /* Hashing variant tags */ CAMLexport value caml_hash_variant(char const * tag) { value accu; /* Same hashing algorithm as in ../typing/btype.ml, function hash_variant */ for (accu = Val_int(0); *tag != 0; tag++) accu = Val_int(223 * Int_val(accu) + *((unsigned char *) tag)); #ifdef ARCH_SIXTYFOUR accu = accu & Val_long(0x7FFFFFFFL); #endif /* Force sign extension of bit 31 for compatibility between 32 and 64-bit platforms */ return (int32_t) accu; }