diff --git a/doc/langref.html.in b/doc/langref.html.in
index f019ec6b2..56db72386 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -5864,7 +5864,7 @@ volatile (
     : [number] "{rax}" (number),
         [arg1] "{rdi}" (arg1)
 // Next is the list of clobbers. These declare a set of registers whose
-// values will not be preserved by the execution of this assembly code. 
+// values will not be preserved by the execution of this assembly code.
 // These do not include output or input registers. The special clobber
 // value of "memory" means that the assembly writes to arbitrary undeclared
 // memory locations - not only the memory pointed to by a declared indirect
@@ -5885,7 +5885,7 @@ volatile (
       </p>
       {#header_open|Output Constraints#}
       <p>
-      Output constraints are still considered to be unstable in Zig, and 
+      Output constraints are still considered to be unstable in Zig, and
       so
       <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
       and
@@ -5900,7 +5900,7 @@ volatile (
 
       {#header_open|Input Constraints#}
       <p>
-      Input constraints are still considered to be unstable in Zig, and 
+      Input constraints are still considered to be unstable in Zig, and
       so
       <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
       and
@@ -5919,7 +5919,7 @@ volatile (
       the assembly code. These do not include output or input registers. The special clobber
       value of {#syntax#}"memory"{#endsyntax#} means that the assembly causes writes to
       arbitrary undeclared memory locations - not only the memory pointed to by a declared
-      indirect output. 
+      indirect output.
       </p>
       <p>
       Failure to declare the full set of clobbers for a given inline assembly
@@ -6542,12 +6542,21 @@ async fn func(y: *i32) void {
       {#header_close#}
 
       {#header_open|@byteSwap#}
-      <pre>{#syntax#}@byteSwap(comptime T: type, integer: T) T{#endsyntax#}</pre>
+      <pre>{#syntax#}@byteSwap(comptime T: type, operand: T) T{#endsyntax#}</pre>
       <p>{#syntax#}T{#endsyntax#} must be an integer type with bit count evenly divisible by 8.</p>
+      <p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
       <p>
       Swaps the byte order of the integer. This converts a big endian integer to a little endian integer,
       and converts a little endian integer to a big endian integer.
       </p>
+      <p>
+      Note that for the purposes of memory layout with respect to endianness, the integer type should be
+      related to the number of bytes reported by {#link|@sizeOf#} bytes. This is demonstrated with
+      {#syntax#}u24{#endsyntax#}. {#syntax#}@sizeOf(u24) == 4{#endsyntax#}, which means that a
+      {#syntax#}u24{#endsyntax#} stored in memory takes 4 bytes, and those 4 bytes are what are swapped on
+      a little vs big endian system. On the other hand, if {#syntax#}T{#endsyntax#} is specified to
+      be {#syntax#}u24{#endsyntax#}, then only 3 bytes are reversed.
+      </p>
       {#header_close#}
 
       {#header_open|@bitReverse#}
@@ -6641,7 +6650,7 @@ async fn func(y: *i32) void {
       {#header_open|@clz#}
       <pre>{#syntax#}@clz(comptime T: type, integer: T){#endsyntax#}</pre>
       <p>
-      This function counts the number of leading zeroes in {#syntax#}integer{#endsyntax#}.
+      This function counts the number of most-significant (leading in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
       </p>
       <p>
       If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
@@ -6783,7 +6792,7 @@ test "main" {
       {#header_open|@ctz#}
       <pre>{#syntax#}@ctz(comptime T: type, integer: T){#endsyntax#}</pre>
       <p>
-      This function counts the number of trailing zeroes in {#syntax#}integer{#endsyntax#}.
+      This function counts the number of least-significant (trailing in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
       </p>
       <p>
       If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
@@ -7673,6 +7682,43 @@ test "@setRuntimeSafety" {
       {#see_also|@shlExact|@shlWithOverflow#}
       {#header_close#}
 
+      {#header_open|@shuffle#}
+      <pre>{#syntax#}@shuffle(comptime E: type, a: @Vector(a_len, E), b: @Vector(b_len, E), comptime mask: @Vector(mask_len, i32)) @Vector(mask_len, E){#endsyntax#}</pre>
+      <p>
+      Constructs a new {#link|vector|Vectors#} by selecting elements from {#syntax#}a{#endsyntax#} and
+      {#syntax#}b{#endsyntax#} based on {#syntax#}mask{#endsyntax#}.
+      </p>
+      <p>
+      Each element in {#syntax#}mask{#endsyntax#} selects an element from either {#syntax#}a{#endsyntax#} or
+      {#syntax#}b{#endsyntax#}. Positive numbers select from {#syntax#}a{#endsyntax#} starting at 0.
+      Negative values select from {#syntax#}b{#endsyntax#}, starting at {#syntax#}-1{#endsyntax#} and going down.
+      It is recommended to use the {#syntax#}~{#endsyntax#} operator from indexes from {#syntax#}b{#endsyntax#}
+      so that both indexes can start from {#syntax#}0{#endsyntax#} (i.e. {#syntax#}~i32(0){#endsyntax#} is
+      {#syntax#}-1{#endsyntax#}).
+      </p>
+      <p>
+      For each element of {#syntax#}mask{#endsyntax#}, if it or the selected value from
+      {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#},
+      then the resulting element is {#syntax#}undefined{#endsyntax#}.
+      </p>
+      <p>
+      {#syntax#}a_len{#endsyntax#} and {#syntax#}b_len{#endsyntax#} may differ in length. Out-of-bounds element
+      indexes in {#syntax#}mask{#endsyntax#} result in compile errors.
+      </p>
+      <p>
+      If {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#}, it
+      is equivalent to a vector of all {#syntax#}undefined{#endsyntax#} with the same length as the other vector.
+      If both vectors are {#syntax#}undefined{#endsyntax#}, {#syntax#}@shuffle{#endsyntax#} returns
+      a vector with all elements {#syntax#}undefined{#endsyntax#}.
+      </p>
+      <p>
+      {#syntax#}E{#endsyntax#} must be an {#link|integer|Integers#}, {#link|float|Floats#},
+      {#link|pointer|Pointers#}, or {#syntax#}bool{#endsyntax#}. The mask may be any vector length, and its
+      length determines the result length.
+      </p>
+      {#see_also|SIMD#}
+      {#header_close#}
+
       {#header_open|@sizeOf#}
       <pre>{#syntax#}@sizeOf(comptime T: type) comptime_int{#endsyntax#}</pre>
       <p>
@@ -7700,6 +7746,30 @@ test "@setRuntimeSafety" {
       </p>
       {#header_close#}
 
+      {#header_open|@splat#}
+      <pre>{#syntax#}@splat(comptime len: u32, scalar: var) @Vector(len, @typeOf(scalar)){#endsyntax#}</pre>
+      <p>
+      Produces a vector of length {#syntax#}len{#endsyntax#} where each element is the value
+      {#syntax#}scalar{#endsyntax#}:
+      </p>
+      {#code_begin|test#}
+const std = @import("std");
+const assert = std.debug.assert;
+
+test "vector @splat" {
+    const scalar: u32 = 5;
+    const result = @splat(4, scalar);
+    comptime assert(@typeOf(result) == @Vector(4, u32));
+    assert(std.mem.eql(u32, ([4]u32)(result), [_]u32{ 5, 5, 5, 5 }));
+}
+      {#code_end#}
+      <p>
+      {#syntax#}scalar{#endsyntax#} must be an {#link|integer|Integers#}, {#link|bool|Primitive Types#},
+      {#link|float|Floats#}, or {#link|pointer|Pointers#}.
+      </p>
+      {#see_also|Vectors|@shuffle#}
+      {#header_close#}
+
       {#header_open|@sqrt#}
       <pre>{#syntax#}@sqrt(comptime T: type, value: T) T{#endsyntax#}</pre>
       <p>
@@ -9411,8 +9481,8 @@ const c = @cImport({
         <li>Does not support Zig-only pointer attributes such as alignment. Use normal {#link|Pointers#}
         please!</li>
       </ul>
-      <p>When a C pointer is pointing to a single struct (not an array), deference the C pointer to 
-        access to the struct's fields or member data. That syntax looks like 
+      <p>When a C pointer is pointing to a single struct (not an array), deference the C pointer to
+        access to the struct's fields or member data. That syntax looks like
         this: </p>
         <p>{#syntax#}ptr_to_struct.*.struct_member{#endsyntax#}</p>
         <p>This is comparable to doing {#syntax#}->{#endsyntax#} in C.</p>
diff --git a/src/all_types.hpp b/src/all_types.hpp
index 60b292662..695f22ac9 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1351,7 +1351,7 @@ struct ZigTypeBoundFn {
 };
 
 struct ZigTypeVector {
-    // The type must be a pointer, integer, or float
+    // The type must be a pointer, integer, bool, or float
     ZigType *elem_type;
     uint32_t len;
 };
@@ -1611,6 +1611,8 @@ enum BuiltinFnId {
     BuiltinFnIdIntToEnum,
     BuiltinFnIdIntType,
     BuiltinFnIdVectorType,
+    BuiltinFnIdShuffle,
+    BuiltinFnIdSplat,
     BuiltinFnIdSetCold,
     BuiltinFnIdSetRuntimeSafety,
     BuiltinFnIdSetFloatMode,
@@ -1770,6 +1772,7 @@ struct ZigLLVMFnKey {
         } overflow_arithmetic;
         struct {
             uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
         } bswap;
         struct {
             uint32_t bit_count;
@@ -2428,6 +2431,9 @@ enum IrInstructionId {
     IrInstructionIdBoolToInt,
     IrInstructionIdIntType,
     IrInstructionIdVectorType,
+    IrInstructionIdShuffleVector,
+    IrInstructionIdSplatSrc,
+    IrInstructionIdSplatGen,
     IrInstructionIdBoolNot,
     IrInstructionIdMemset,
     IrInstructionIdMemcpy,
@@ -3669,6 +3675,28 @@ struct IrInstructionVectorToArray {
     IrInstruction *result_loc;
 };
 
+struct IrInstructionShuffleVector {
+    IrInstruction base;
+
+    IrInstruction *scalar_type;
+    IrInstruction *a;
+    IrInstruction *b;
+    IrInstruction *mask; // This is in zig-format, not llvm format
+};
+
+struct IrInstructionSplatSrc {
+    IrInstruction base;
+
+    IrInstruction *len;
+    IrInstruction *scalar;
+};
+
+struct IrInstructionSplatGen {
+    IrInstruction base;
+
+    IrInstruction *scalar;
+};
+
 struct IrInstructionAssertZero {
     IrInstruction base;
 
diff --git a/src/analyze.cpp b/src/analyze.cpp
index d5d874501..66b72b935 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -4708,6 +4708,7 @@ ZigType *get_int_type(CodeGen *g, bool is_signed, uint32_t size_in_bits) {
 bool is_valid_vector_elem_type(ZigType *elem_type) {
     return elem_type->id == ZigTypeIdInt ||
         elem_type->id == ZigTypeIdFloat ||
+        elem_type->id == ZigTypeIdBool ||
         get_codegen_ptr_type(elem_type) != nullptr;
 }
 
@@ -4727,7 +4728,7 @@ ZigType *get_vector_type(CodeGen *g, uint32_t len, ZigType *elem_type) {
 
     ZigType *entry = new_type_table_entry(ZigTypeIdVector);
     if ((len != 0) && type_has_bits(elem_type)) {
-        // Vectors can only be ints, floats, or pointers. ints and floats have trivially resolvable
+        // Vectors can only be ints, floats, bools, or pointers. ints (inc. bools) and floats have trivially resolvable
         // llvm type refs. pointers we will use usize instead.
         LLVMTypeRef example_vector_llvm_type;
         if (elem_type->id == ZigTypeIdPointer) {
@@ -6895,7 +6896,8 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
                    (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBswap:
-            return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
+            return (uint32_t)(x.data.bswap.bit_count) * ((uint32_t)3661994335) +
+                   (uint32_t)(x.data.bswap.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBitReverse:
             return (uint32_t)(x.data.bit_reverse.bit_count) * (uint32_t)2621398431;
         case ZigLLVMFnIdOverflowArithmetic:
@@ -6918,7 +6920,8 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
         case ZigLLVMFnIdPopCount:
             return a.data.pop_count.bit_count == b.data.pop_count.bit_count;
         case ZigLLVMFnIdBswap:
-            return a.data.bswap.bit_count == b.data.bswap.bit_count;
+            return a.data.bswap.bit_count == b.data.bswap.bit_count &&
+                   a.data.bswap.vector_len == b.data.bswap.vector_len;
         case ZigLLVMFnIdBitReverse:
             return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
         case ZigLLVMFnIdFloatOp:
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 5caf72cd3..aed45e74d 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4505,7 +4505,11 @@ static LLVMValueRef ir_render_optional_unwrap_ptr(CodeGen *g, IrExecutable *exec
     }
 }
 
-static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnId fn_id) {
+static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFnId fn_id) {
+    bool is_vector = expr_type->id == ZigTypeIdVector;
+    ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
+    assert(int_type->id == ZigTypeIdInt);
+    uint32_t vector_len = is_vector ? expr_type->data.vector.len : 0;
     ZigLLVMFnKey key = {};
     const char *fn_name;
     uint32_t n_args;
@@ -4529,6 +4533,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
         n_args = 1;
         key.id = ZigLLVMFnIdBswap;
         key.data.bswap.bit_count = (uint32_t)int_type->data.integral.bit_count;
+        key.data.bswap.vector_len = vector_len;
     } else if (fn_id == BuiltinFnIdBitReverse) {
         fn_name = "bitreverse";
         n_args = 1;
@@ -4543,12 +4548,15 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
         return existing_entry->value;
 
     char llvm_name[64];
-    sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
+    if (is_vector)
+        sprintf(llvm_name, "llvm.%s.v%" PRIu32 "i%" PRIu32, fn_name, vector_len, int_type->data.integral.bit_count);
+    else
+        sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
     LLVMTypeRef param_types[] = {
-        get_llvm_type(g, int_type),
+        get_llvm_type(g, expr_type),
         LLVMInt1Type(),
     };
-    LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, int_type), param_types, n_args, false);
+    LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, expr_type), param_types, n_args, false);
     LLVMValueRef fn_val = LLVMAddFunction(g->module, llvm_name, fn_type);
     assert(LLVMGetIntrinsicID(fn_val));
 
@@ -4581,6 +4589,48 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
     return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int);
 }
 
+static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) {
+    uint64_t len_a = instruction->a->value.type->data.vector.len;
+    uint64_t len_mask = instruction->mask->value.type->data.vector.len;
+
+    // LLVM uses integers larger than the length of the first array to
+    // index into the second array. This was deemed unnecessarily fragile
+    // when changing code, so Zig uses negative numbers to index the
+    // second vector. These start at -1 and go down, and are easiest to use
+    // with the ~ operator. Here we convert between the two formats.
+    IrInstruction *mask = instruction->mask;
+    LLVMValueRef *values = allocate<LLVMValueRef>(len_mask);
+    for (uint64_t i = 0; i < len_mask; i++) {
+        if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) {
+            values[i] = LLVMGetUndef(LLVMInt32Type());
+        } else {
+            int32_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+            uint32_t index_val = (v >= 0) ? (uint32_t)v : (uint32_t)~v + (uint32_t)len_a;
+            values[i] = LLVMConstInt(LLVMInt32Type(), index_val, false);
+        }
+    }
+
+    LLVMValueRef llvm_mask_value = LLVMConstVector(values, len_mask);
+    free(values);
+
+    return LLVMBuildShuffleVector(g->builder,
+        ir_llvm_value(g, instruction->a),
+        ir_llvm_value(g, instruction->b),
+        llvm_mask_value, "");
+}
+
+static LLVMValueRef ir_render_splat(CodeGen *g, IrExecutable *executable, IrInstructionSplatGen *instruction) {
+    ZigType *result_type = instruction->base.value.type;
+    src_assert(result_type->id == ZigTypeIdVector, instruction->base.source_node);
+    uint32_t len = result_type->data.vector.len;
+    LLVMTypeRef op_llvm_type = LLVMVectorType(get_llvm_type(g, instruction->scalar->value.type), 1);
+    LLVMTypeRef mask_llvm_type = LLVMVectorType(LLVMInt32Type(), len);
+    LLVMValueRef undef_vector = LLVMGetUndef(op_llvm_type);
+    LLVMValueRef op_vector = LLVMBuildInsertElement(g->builder, undef_vector,
+            ir_llvm_value(g, instruction->scalar), LLVMConstInt(LLVMInt32Type(), 0, false), "");
+    return LLVMBuildShuffleVector(g->builder, op_vector, undef_vector, LLVMConstNull(mask_llvm_type), "");
+}
+
 static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
     ZigType *int_type = instruction->op->value.type;
     LLVMValueRef fn_val = get_int_builtin_fn(g, int_type, BuiltinFnIdPopCount);
@@ -5512,25 +5562,36 @@ static LLVMValueRef ir_render_mul_add(CodeGen *g, IrExecutable *executable, IrIn
 
 static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInstructionBswap *instruction) {
     LLVMValueRef op = ir_llvm_value(g, instruction->op);
-    ZigType *int_type = instruction->base.value.type;
+    ZigType *expr_type = instruction->base.value.type;
+    bool is_vector = expr_type->id == ZigTypeIdVector;
+    ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
     assert(int_type->id == ZigTypeIdInt);
     if (int_type->data.integral.bit_count % 16 == 0) {
-        LLVMValueRef fn_val = get_int_builtin_fn(g, instruction->base.value.type, BuiltinFnIdBswap);
+        LLVMValueRef fn_val = get_int_builtin_fn(g, expr_type, BuiltinFnIdBswap);
         return LLVMBuildCall(g->builder, fn_val, &op, 1, "");
     }
     // Not an even number of bytes, so we zext 1 byte, then bswap, shift right 1 byte, truncate
     ZigType *extended_type = get_int_type(g, int_type->data.integral.is_signed,
             int_type->data.integral.bit_count + 8);
+    LLVMValueRef shift_amt = LLVMConstInt(get_llvm_type(g, extended_type), 8, false);
+    if (is_vector) {
+        extended_type = get_vector_type(g, expr_type->data.vector.len, extended_type);
+        LLVMValueRef *values = allocate_nonzero<LLVMValueRef>(expr_type->data.vector.len);
+        for (uint32_t i = 0; i < expr_type->data.vector.len; i += 1) {
+            values[i] = shift_amt;
+        }
+        shift_amt = LLVMConstVector(values, expr_type->data.vector.len);
+        free(values);
+    }
     // aabbcc
     LLVMValueRef extended = LLVMBuildZExt(g->builder, op, get_llvm_type(g, extended_type), "");
     // 00aabbcc
     LLVMValueRef fn_val = get_int_builtin_fn(g, extended_type, BuiltinFnIdBswap);
     LLVMValueRef swapped = LLVMBuildCall(g->builder, fn_val, &extended, 1, "");
     // ccbbaa00
-    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped,
-            LLVMConstInt(get_llvm_type(g, extended_type), 8, false), "");
+    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped, shift_amt, "");
     // 00ccbbaa
-    return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, int_type), "");
+    return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, expr_type), "");
 }
 
 static LLVMValueRef ir_render_bit_reverse(CodeGen *g, IrExecutable *executable, IrInstructionBitReverse *instruction) {
@@ -5549,10 +5610,29 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
     assert(handle_is_ptr(array_type));
     LLVMValueRef result_loc = ir_llvm_value(g, instruction->result_loc);
     LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
-    LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
-            LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
-    uint32_t alignment = get_ptr_align(g, instruction->result_loc->value.type);
-    gen_store_untyped(g, vector, casted_ptr, alignment, false);
+
+    ZigType *elem_type = array_type->data.array.child_type;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
+    if (bitcast_ok) {
+        LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
+                LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
+        uint32_t alignment = get_ptr_align(g, instruction->result_loc->value.type);
+        gen_store_untyped(g, vector, casted_ptr, alignment, false);
+    } else {
+        // If the ABI size of the element type is not evenly divisible by size_in_bits, a simple bitcast
+        // will not work, and we fall back to extractelement.
+        LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+        LLVMTypeRef u32_type_ref = LLVMInt32Type();
+        LLVMValueRef zero = LLVMConstInt(usize_type_ref, 0, false);
+        for (uintptr_t i = 0; i < instruction->vector->value.type->data.vector.len; i++) {
+            LLVMValueRef index_usize = LLVMConstInt(usize_type_ref, i, false);
+            LLVMValueRef index_u32 = LLVMConstInt(u32_type_ref, i, false);
+            LLVMValueRef indexes[] = { zero, index_usize };
+            LLVMValueRef elem_ptr = LLVMBuildInBoundsGEP(g->builder, result_loc, indexes, 2, "");
+            LLVMValueRef elem = LLVMBuildExtractElement(g->builder, vector, index_u32, "");
+            LLVMBuildStore(g->builder, elem, elem_ptr);
+        }
+    }
     return result_loc;
 }
 
@@ -5563,12 +5643,34 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
     assert(vector_type->id == ZigTypeIdVector);
     assert(!handle_is_ptr(vector_type));
     LLVMValueRef array_ptr = ir_llvm_value(g, instruction->array);
-    LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
-            LLVMPointerType(get_llvm_type(g, vector_type), 0), "");
-    ZigType *array_type = instruction->array->value.type;
-    assert(array_type->id == ZigTypeIdArray);
-    uint32_t alignment = get_abi_alignment(g, array_type->data.array.child_type);
-    return gen_load_untyped(g, casted_ptr, alignment, false, "");
+    LLVMTypeRef vector_type_ref = get_llvm_type(g, vector_type);
+
+    ZigType *elem_type = vector_type->data.vector.elem_type;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
+    if (bitcast_ok) {
+        LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
+                LLVMPointerType(vector_type_ref, 0), "");
+        ZigType *array_type = instruction->array->value.type;
+        assert(array_type->id == ZigTypeIdArray);
+        uint32_t alignment = get_abi_alignment(g, array_type->data.array.child_type);
+        return gen_load_untyped(g, casted_ptr, alignment, false, "");
+    } else {
+        // If the ABI size of the element type is not evenly divisible by size_in_bits, a simple bitcast
+        // will not work, and we fall back to insertelement.
+        LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+        LLVMTypeRef u32_type_ref = LLVMInt32Type();
+        LLVMValueRef zero = LLVMConstInt(usize_type_ref, 0, false);
+        LLVMValueRef vector = LLVMGetUndef(vector_type_ref);
+        for (uintptr_t i = 0; i < instruction->base.value.type->data.vector.len; i++) {
+            LLVMValueRef index_usize = LLVMConstInt(usize_type_ref, i, false);
+            LLVMValueRef index_u32 = LLVMConstInt(u32_type_ref, i, false);
+            LLVMValueRef indexes[] = { zero, index_usize };
+            LLVMValueRef elem_ptr = LLVMBuildInBoundsGEP(g->builder, array_ptr, indexes, 2, "");
+            LLVMValueRef elem = LLVMBuildLoad(g->builder, elem_ptr, "");
+            vector = LLVMBuildInsertElement(g->builder, vector, elem, index_u32, "");
+        }
+        return vector;
+    }
 }
 
 static LLVMValueRef ir_render_assert_zero(CodeGen *g, IrExecutable *executable,
@@ -5896,6 +5998,7 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
         case IrInstructionIdFrameSizeSrc:
         case IrInstructionIdAllocaGen:
         case IrInstructionIdAwaitSrc:
+        case IrInstructionIdSplatSrc:
             zig_unreachable();
 
         case IrInstructionIdDeclVarGen:
@@ -6054,6 +6157,10 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_spill_begin(g, executable, (IrInstructionSpillBegin *)instruction);
         case IrInstructionIdSpillEnd:
             return ir_render_spill_end(g, executable, (IrInstructionSpillEnd *)instruction);
+        case IrInstructionIdShuffleVector:
+            return ir_render_shuffle_vector(g, executable, (IrInstructionShuffleVector *) instruction);
+        case IrInstructionIdSplatGen:
+            return ir_render_splat(g, executable, (IrInstructionSplatGen *) instruction);
     }
     zig_unreachable();
 }
@@ -7419,7 +7526,9 @@ static void do_code_gen(CodeGen *g) {
     }
 
     char *error = nullptr;
-    LLVMVerifyModule(g->module, LLVMAbortProcessAction, &error);
+    if (LLVMVerifyModule(g->module, LLVMReturnStatusAction, &error)) {
+        zig_panic("broken LLVM module found: %s", error);
+    }
 }
 
 static void zig_llvm_emit_output(CodeGen *g) {
@@ -7744,6 +7853,8 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdCompileLog, "compileLog", SIZE_MAX);
     create_builtin_fn(g, BuiltinFnIdIntType, "IntType", 2); // TODO rename to Int
     create_builtin_fn(g, BuiltinFnIdVectorType, "Vector", 2);
+    create_builtin_fn(g, BuiltinFnIdShuffle, "shuffle", 4);
+    create_builtin_fn(g, BuiltinFnIdSplat, "splat", 2);
     create_builtin_fn(g, BuiltinFnIdSetCold, "setCold", 1);
     create_builtin_fn(g, BuiltinFnIdSetRuntimeSafety, "setRuntimeSafety", 1);
     create_builtin_fn(g, BuiltinFnIdSetFloatMode, "setFloatMode", 1);
diff --git a/src/ir.cpp b/src/ir.cpp
index f29afdcf7..0c48a2f98 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -717,6 +717,18 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionVectorType *) {
     return IrInstructionIdVectorType;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionShuffleVector *) {
+    return IrInstructionIdShuffleVector;
+}
+
+static constexpr IrInstructionId ir_instruction_id(IrInstructionSplatSrc *) {
+    return IrInstructionIdSplatSrc;
+}
+
+static constexpr IrInstructionId ir_instruction_id(IrInstructionSplatGen *) {
+    return IrInstructionIdSplatGen;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionBoolNot *) {
     return IrInstructionIdBoolNot;
 }
@@ -2277,6 +2289,38 @@ static IrInstruction *ir_build_vector_type(IrBuilder *irb, Scope *scope, AstNode
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_shuffle_vector(IrBuilder *irb, Scope *scope, AstNode *source_node,
+    IrInstruction *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+    IrInstructionShuffleVector *instruction = ir_build_instruction<IrInstructionShuffleVector>(irb, scope, source_node);
+    instruction->scalar_type = scalar_type;
+    instruction->a = a;
+    instruction->b = b;
+    instruction->mask = mask;
+
+    if (scalar_type != nullptr) {
+        ir_ref_instruction(scalar_type, irb->current_basic_block);
+    }
+    ir_ref_instruction(a, irb->current_basic_block);
+    ir_ref_instruction(b, irb->current_basic_block);
+    ir_ref_instruction(mask, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
+static IrInstruction *ir_build_splat_src(IrBuilder *irb, Scope *scope, AstNode *source_node,
+    IrInstruction *len, IrInstruction *scalar)
+{
+    IrInstructionSplatSrc *instruction = ir_build_instruction<IrInstructionSplatSrc>(irb, scope, source_node);
+    instruction->len = len;
+    instruction->scalar = scalar;
+
+    ir_ref_instruction(len, irb->current_basic_block);
+    ir_ref_instruction(scalar, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_bool_not(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *value) {
     IrInstructionBoolNot *instruction = ir_build_instruction<IrInstructionBoolNot>(irb, scope, source_node);
     instruction->value = value;
@@ -2333,6 +2377,19 @@ static IrInstruction *ir_build_slice_src(IrBuilder *irb, Scope *scope, AstNode *
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_splat_gen(IrAnalyze *ira, IrInstruction *source_instruction, ZigType *result_type,
+    IrInstruction *scalar)
+{
+    IrInstructionSplatGen *instruction = ir_build_instruction<IrInstructionSplatGen>(
+            &ira->new_irb, source_instruction->scope, source_instruction->source_node);
+    instruction->base.value.type = result_type;
+    instruction->scalar = scalar;
+
+    ir_ref_instruction(scalar, ira->new_irb.current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_slice_gen(IrAnalyze *ira, IrInstruction *source_instruction, ZigType *slice_type,
     IrInstruction *ptr, IrInstruction *start, IrInstruction *end, bool safety_check_on, IrInstruction *result_loc)
 {
@@ -4936,6 +4993,48 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                 IrInstruction *vector_type = ir_build_vector_type(irb, scope, node, arg0_value, arg1_value);
                 return ir_lval_wrap(irb, scope, vector_type, lval, result_loc);
             }
+        case BuiltinFnIdShuffle:
+            {
+                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
+                IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
+                if (arg0_value == irb->codegen->invalid_instruction)
+                    return arg0_value;
+
+                AstNode *arg1_node = node->data.fn_call_expr.params.at(1);
+                IrInstruction *arg1_value = ir_gen_node(irb, arg1_node, scope);
+                if (arg1_value == irb->codegen->invalid_instruction)
+                    return arg1_value;
+
+                AstNode *arg2_node = node->data.fn_call_expr.params.at(2);
+                IrInstruction *arg2_value = ir_gen_node(irb, arg2_node, scope);
+                if (arg2_value == irb->codegen->invalid_instruction)
+                    return arg2_value;
+
+                AstNode *arg3_node = node->data.fn_call_expr.params.at(3);
+                IrInstruction *arg3_value = ir_gen_node(irb, arg3_node, scope);
+                if (arg3_value == irb->codegen->invalid_instruction)
+                    return arg3_value;
+
+                IrInstruction *shuffle_vector = ir_build_shuffle_vector(irb, scope, node,
+                    arg0_value, arg1_value, arg2_value, arg3_value);
+                return ir_lval_wrap(irb, scope, shuffle_vector, lval, result_loc);
+            }
+        case BuiltinFnIdSplat:
+            {
+                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
+                IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
+                if (arg0_value == irb->codegen->invalid_instruction)
+                    return arg0_value;
+
+                AstNode *arg1_node = node->data.fn_call_expr.params.at(1);
+                IrInstruction *arg1_value = ir_gen_node(irb, arg1_node, scope);
+                if (arg1_value == irb->codegen->invalid_instruction)
+                    return arg1_value;
+
+                IrInstruction *splat = ir_build_splat_src(irb, scope, node,
+                    arg0_value, arg1_value);
+                return ir_lval_wrap(irb, scope, splat, lval, result_loc);
+            }
         case BuiltinFnIdMemcpy:
             {
                 AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@@ -11000,14 +11099,41 @@ static ZigType *ir_resolve_type(IrAnalyze *ira, IrInstruction *type_value) {
     return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val);
 }
 
+static Error ir_validate_vector_elem_type(IrAnalyze *ira, IrInstruction *source_instr, ZigType *elem_type) {
+    if (!is_valid_vector_elem_type(elem_type)) {
+        ir_add_error(ira, source_instr,
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+                buf_ptr(&elem_type->name)));
+        return ErrorSemanticAnalyzeFail;
+    }
+    return ErrorNone;
+}
+
+static ZigType *ir_resolve_vector_elem_type(IrAnalyze *ira, IrInstruction *elem_type_value) {
+    Error err;
+    ZigType *elem_type = ir_resolve_type(ira, elem_type_value);
+    if (type_is_invalid(elem_type))
+        return ira->codegen->builtin_types.entry_invalid;
+    if ((err = ir_validate_vector_elem_type(ira, elem_type_value, elem_type)))
+        return ira->codegen->builtin_types.entry_invalid;
+    return elem_type;
+}
+
 static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
     ZigType *ty = ir_resolve_type(ira, type_value);
     if (type_is_invalid(ty))
         return ira->codegen->builtin_types.entry_invalid;
 
     if (ty->id != ZigTypeIdInt) {
-        ir_add_error(ira, type_value,
+        ErrorMsg *msg = ir_add_error(ira, type_value,
             buf_sprintf("expected integer type, found '%s'", buf_ptr(&ty->name)));
+        if (ty->id == ZigTypeIdVector &&
+            ty->data.vector.elem_type->id == ZigTypeIdInt)
+        {
+            add_error_note(ira->codegen, msg, type_value->source_node,
+                buf_sprintf("represent vectors with their element types, i.e. '%s'",
+                    buf_ptr(&ty->data.vector.elem_type->name)));
+        }
         return ira->codegen->builtin_types.entry_invalid;
     }
 
@@ -13092,6 +13218,59 @@ static bool optional_value_is_null(ConstExprValue *val) {
     }
 }
 
+static IrInstruction *ir_evaluate_bin_op_cmp(IrAnalyze *ira, ZigType *resolved_type,
+    ConstExprValue *op1_val, ConstExprValue *op2_val, IrInstructionBinOp *bin_op_instruction, IrBinOp op_id,
+    bool one_possible_value) {
+    if (op1_val->special == ConstValSpecialUndef ||
+        op2_val->special == ConstValSpecialUndef)
+        return ir_const_undef(ira, &bin_op_instruction->base, resolved_type);
+    if (resolved_type->id == ZigTypeIdComptimeFloat || resolved_type->id == ZigTypeIdFloat) {
+        if (float_is_nan(op1_val) || float_is_nan(op2_val)) {
+            return ir_const_bool(ira, &bin_op_instruction->base, op_id == IrBinOpCmpNotEq);
+        }
+        Cmp cmp_result = float_cmp(op1_val, op2_val);
+        bool answer = resolve_cmp_op_id(op_id, cmp_result);
+        return ir_const_bool(ira, &bin_op_instruction->base, answer);
+    } else if (resolved_type->id == ZigTypeIdComptimeInt || resolved_type->id == ZigTypeIdInt) {
+        Cmp cmp_result = bigint_cmp(&op1_val->data.x_bigint, &op2_val->data.x_bigint);
+        bool answer = resolve_cmp_op_id(op_id, cmp_result);
+        return ir_const_bool(ira, &bin_op_instruction->base, answer);
+    } else if (resolved_type->id == ZigTypeIdPointer && op_id != IrBinOpCmpEq && op_id != IrBinOpCmpNotEq) {
+        if ((op1_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
+                op1_val->data.x_ptr.special == ConstPtrSpecialNull) &&
+            (op2_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
+                op2_val->data.x_ptr.special == ConstPtrSpecialNull))
+        {
+            uint64_t op1_addr = op1_val->data.x_ptr.special == ConstPtrSpecialNull ?
+                0 : op1_val->data.x_ptr.data.hard_coded_addr.addr;
+            uint64_t op2_addr = op2_val->data.x_ptr.special == ConstPtrSpecialNull ?
+                0 : op2_val->data.x_ptr.data.hard_coded_addr.addr;
+            Cmp cmp_result;
+            if (op1_addr > op2_addr) {
+                cmp_result = CmpGT;
+            } else if (op1_addr < op2_addr) {
+                cmp_result = CmpLT;
+            } else {
+                cmp_result = CmpEQ;
+            }
+            bool answer = resolve_cmp_op_id(op_id, cmp_result);
+            return ir_const_bool(ira, &bin_op_instruction->base, answer);
+        }
+    } else {
+        bool are_equal = one_possible_value || const_values_equal(ira->codegen, op1_val, op2_val);
+        bool answer;
+        if (op_id == IrBinOpCmpEq) {
+            answer = are_equal;
+        } else if (op_id == IrBinOpCmpNotEq) {
+            answer = !are_equal;
+        } else {
+            zig_unreachable();
+        }
+        return ir_const_bool(ira, &bin_op_instruction->base, answer);
+    }
+    zig_unreachable();
+}
+
 // Returns ErrorNotLazy when the value cannot be determined
 static Error lazy_cmp_zero(AstNode *source_node, ConstExprValue *val, Cmp *result) {
     Error err;
@@ -13477,51 +13656,22 @@ never_mind_just_calculate_it_normally:
         ConstExprValue *op2_val = one_possible_value ? &casted_op2->value : ir_resolve_const(ira, casted_op2, UndefBad);
         if (op2_val == nullptr)
             return ira->codegen->invalid_instruction;
+        if (resolved_type->id != ZigTypeIdVector)
+            return ir_evaluate_bin_op_cmp(ira, resolved_type, op1_val, op2_val, bin_op_instruction, op_id, one_possible_value);
+        IrInstruction *result = ir_const(ira, &bin_op_instruction->base,
+            get_vector_type(ira->codegen, resolved_type->data.vector.len, ira->codegen->builtin_types.entry_bool));
+        result->value.data.x_array.data.s_none.elements =
+            create_const_vals(resolved_type->data.vector.len);
 
-        if (resolved_type->id == ZigTypeIdComptimeFloat || resolved_type->id == ZigTypeIdFloat) {
-            if (float_is_nan(op1_val) || float_is_nan(op2_val)) {
-                return ir_const_bool(ira, &bin_op_instruction->base, op_id == IrBinOpCmpNotEq);
-            }
-            Cmp cmp_result = float_cmp(op1_val, op2_val);
-            bool answer = resolve_cmp_op_id(op_id, cmp_result);
-            return ir_const_bool(ira, &bin_op_instruction->base, answer);
-        } else if (resolved_type->id == ZigTypeIdComptimeInt || resolved_type->id == ZigTypeIdInt) {
-            Cmp cmp_result = bigint_cmp(&op1_val->data.x_bigint, &op2_val->data.x_bigint);
-            bool answer = resolve_cmp_op_id(op_id, cmp_result);
-            return ir_const_bool(ira, &bin_op_instruction->base, answer);
-        } else if (resolved_type->id == ZigTypeIdPointer && op_id != IrBinOpCmpEq && op_id != IrBinOpCmpNotEq) {
-            if ((op1_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
-                    op1_val->data.x_ptr.special == ConstPtrSpecialNull) &&
-                (op2_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
-                    op2_val->data.x_ptr.special == ConstPtrSpecialNull))
-            {
-                uint64_t op1_addr = op1_val->data.x_ptr.special == ConstPtrSpecialNull ?
-                    0 : op1_val->data.x_ptr.data.hard_coded_addr.addr;
-                uint64_t op2_addr = op2_val->data.x_ptr.special == ConstPtrSpecialNull ?
-                    0 : op2_val->data.x_ptr.data.hard_coded_addr.addr;
-                Cmp cmp_result;
-                if (op1_addr > op2_addr) {
-                    cmp_result = CmpGT;
-                } else if (op1_addr < op2_addr) {
-                    cmp_result = CmpLT;
-                } else {
-                    cmp_result = CmpEQ;
-                }
-                bool answer = resolve_cmp_op_id(op_id, cmp_result);
-                return ir_const_bool(ira, &bin_op_instruction->base, answer);
-            }
-        } else {
-            bool are_equal = one_possible_value || const_values_equal(ira->codegen, op1_val, op2_val);
-            bool answer;
-            if (op_id == IrBinOpCmpEq) {
-                answer = are_equal;
-            } else if (op_id == IrBinOpCmpNotEq) {
-                answer = !are_equal;
-            } else {
-                zig_unreachable();
-            }
-            return ir_const_bool(ira, &bin_op_instruction->base, answer);
+        expand_undef_array(ira->codegen, &result->value);
+        for (size_t i = 0;i < resolved_type->data.vector.len;i++) {
+            IrInstruction *cur_res = ir_evaluate_bin_op_cmp(ira, resolved_type->data.vector.elem_type,
+                &op1_val->data.x_array.data.s_none.elements[i],
+                &op2_val->data.x_array.data.s_none.elements[i],
+                bin_op_instruction, op_id, one_possible_value);
+            copy_const_val(&result->value.data.x_array.data.s_none.elements[i], &cur_res->value, false);
         }
+        return result;
     }
 
     // some comparisons with unsigned numbers can be evaluated
@@ -13564,7 +13714,12 @@ never_mind_just_calculate_it_normally:
     IrInstruction *result = ir_build_bin_op(&ira->new_irb,
             bin_op_instruction->base.scope, bin_op_instruction->base.source_node,
             op_id, casted_op1, casted_op2, bin_op_instruction->safety_check_on);
-    result->value.type = ira->codegen->builtin_types.entry_bool;
+    if (resolved_type->id == ZigTypeIdVector) {
+        result->value.type = get_vector_type(ira->codegen, resolved_type->data.vector.len,
+            ira->codegen->builtin_types.entry_bool);
+    } else {
+        result->value.type = ira->codegen->builtin_types.entry_bool;
+    }
     return result;
 }
 
@@ -15198,7 +15353,7 @@ static IrInstruction *ir_resolve_result_raw(IrAnalyze *ira, IrInstruction *suspe
                 }
                 peer_parent->skipped = true;
                 return ir_resolve_result(ira, suspend_source_instr, peer_parent->parent,
-                        value_type, value, force_runtime, true, true);
+                        value_type, value, force_runtime || !is_comptime, true, true);
             }
 
             if (peer_parent->resolved_type == nullptr) {
@@ -22018,22 +22173,253 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
     if (!ir_resolve_unsigned(ira, instruction->len->child, ira->codegen->builtin_types.entry_u32, &len))
         return ira->codegen->invalid_instruction;
 
-    ZigType *elem_type = ir_resolve_type(ira, instruction->elem_type->child);
+    ZigType *elem_type = ir_resolve_vector_elem_type(ira, instruction->elem_type->child);
     if (type_is_invalid(elem_type))
         return ira->codegen->invalid_instruction;
 
-    if (!is_valid_vector_elem_type(elem_type)) {
-        ir_add_error(ira, instruction->elem_type,
-            buf_sprintf("vector element type must be integer, float, or pointer; '%s' is invalid",
-                buf_ptr(&elem_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
     ZigType *vector_type = get_vector_type(ira->codegen, len, elem_type);
 
     return ir_const_type(ira, &instruction->base, vector_type);
 }
 
+static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
+    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+    ir_assert(source_instr && scalar_type && a && b && mask, source_instr);
+    ir_assert(is_valid_vector_elem_type(scalar_type), source_instr);
+
+    uint32_t len_mask;
+    if (mask->value.type->id == ZigTypeIdVector) {
+        len_mask = mask->value.type->data.vector.len;
+    } else if (mask->value.type->id == ZigTypeIdArray) {
+        len_mask = mask->value.type->data.array.len;
+    } else {
+        ir_add_error(ira, mask,
+            buf_sprintf("expected vector or array, found '%s'",
+                buf_ptr(&mask->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+    mask = ir_implicit_cast(ira, mask, get_vector_type(ira->codegen, len_mask,
+                ira->codegen->builtin_types.entry_i32));
+    if (type_is_invalid(mask->value.type))
+        return ira->codegen->invalid_instruction;
+
+    uint32_t len_a;
+    if (a->value.type->id == ZigTypeIdVector) {
+        len_a = a->value.type->data.vector.len;
+    } else if (a->value.type->id == ZigTypeIdArray) {
+        len_a = a->value.type->data.array.len;
+    } else if (a->value.type->id == ZigTypeIdUndefined) {
+        len_a = UINT32_MAX;
+    } else {
+        ir_add_error(ira, a,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&a->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    uint32_t len_b;
+    if (b->value.type->id == ZigTypeIdVector) {
+        len_b = b->value.type->data.vector.len;
+    } else if (b->value.type->id == ZigTypeIdArray) {
+        len_b = b->value.type->data.array.len;
+    } else if (b->value.type->id == ZigTypeIdUndefined) {
+        len_b = UINT32_MAX;
+    } else {
+        ir_add_error(ira, b,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&b->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    if (len_a == UINT32_MAX && len_b == UINT32_MAX) {
+        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_mask, scalar_type));
+    }
+
+    if (len_a == UINT32_MAX) {
+        len_a = len_b;
+        a = ir_const_undef(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+    } else {
+        a = ir_implicit_cast(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+        if (type_is_invalid(a->value.type))
+            return ira->codegen->invalid_instruction;
+    }
+
+    if (len_b == UINT32_MAX) {
+        len_b = len_a;
+        b = ir_const_undef(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+    } else {
+        b = ir_implicit_cast(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+        if (type_is_invalid(b->value.type))
+            return ira->codegen->invalid_instruction;
+    }
+
+    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
+    if (mask_val == nullptr)
+        return ira->codegen->invalid_instruction;
+
+    expand_undef_array(ira->codegen, mask_val);
+
+    for (uint32_t i = 0; i < len_mask; i += 1) {
+        ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+        if (mask_elem_val->special == ConstValSpecialUndef)
+            continue;
+        int32_t v_i32 = bigint_as_signed(&mask_elem_val->data.x_bigint);
+        uint32_t v;
+        IrInstruction *chosen_operand;
+        if (v_i32 >= 0) {
+            v = (uint32_t)v_i32;
+            chosen_operand = a;
+        } else {
+            v = (uint32_t)~v_i32;
+            chosen_operand = b;
+        }
+        if (v >= chosen_operand->value.type->data.vector.len) {
+            ErrorMsg *msg = ir_add_error(ira, mask,
+                buf_sprintf("mask index '%u' has out-of-bounds selection", i));
+            add_error_note(ira->codegen, msg, chosen_operand->source_node,
+                buf_sprintf("selected index '%u' out of bounds of %s", v,
+                    buf_ptr(&chosen_operand->value.type->name)));
+            if (chosen_operand == a && v < len_a + len_b) {
+                add_error_note(ira->codegen, msg, b->source_node,
+                    buf_create_from_str("selections from the second vector are specified with negative numbers"));
+            }
+            return ira->codegen->invalid_instruction;
+        }
+    }
+
+    ZigType *result_type = get_vector_type(ira->codegen, len_mask, scalar_type);
+    if (instr_is_comptime(a) && instr_is_comptime(b)) {
+        ConstExprValue *a_val = ir_resolve_const(ira, a, UndefOk);
+        if (a_val == nullptr)
+            return ira->codegen->invalid_instruction;
+
+        ConstExprValue *b_val = ir_resolve_const(ira, b, UndefOk);
+        if (b_val == nullptr)
+            return ira->codegen->invalid_instruction;
+
+        expand_undef_array(ira->codegen, a_val);
+        expand_undef_array(ira->codegen, b_val);
+
+        IrInstruction *result = ir_const(ira, source_instr, result_type);
+        result->value.data.x_array.data.s_none.elements = create_const_vals(len_mask);
+        for (uint32_t i = 0; i < mask_val->type->data.vector.len; i += 1) {
+            ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+            ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+            if (mask_elem_val->special == ConstValSpecialUndef) {
+                result_elem_val->special = ConstValSpecialUndef;
+                continue;
+            }
+            int32_t v = bigint_as_signed(&mask_elem_val->data.x_bigint);
+            // We've already checked for and emitted compile errors for index out of bounds here.
+            ConstExprValue *src_elem_val = (v >= 0) ?
+                &a->value.data.x_array.data.s_none.elements[v] :
+                &b->value.data.x_array.data.s_none.elements[~v];
+            copy_const_val(result_elem_val, src_elem_val, false);
+
+            ir_assert(result_elem_val->special == ConstValSpecialStatic, source_instr);
+        }
+        result->value.special = ConstValSpecialStatic;
+        return result;
+    }
+
+    // All static analysis passed, and not comptime.
+    // For runtime codegen, vectors a and b must be the same length. Here we
+    // recursively @shuffle the smaller vector to append undefined elements
+    // to it up to the length of the longer vector. This recursion terminates
+    // in 1 call because these calls to ir_analyze_shuffle_vector guarantee
+    // len_a == len_b.
+    if (len_a != len_b) {
+        uint32_t len_min = min(len_a, len_b);
+        uint32_t len_max = max(len_a, len_b);
+
+        IrInstruction *expand_mask = ir_const(ira, mask,
+            get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
+        expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
+        uint32_t i = 0;
+        for (; i < len_min; i += 1)
+            bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
+        for (; i < len_max; i += 1)
+            bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
+
+        IrInstruction *undef = ir_const_undef(ira, source_instr,
+            get_vector_type(ira->codegen, len_min, scalar_type));
+
+        if (len_b < len_a) {
+            b = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, b, undef, expand_mask);
+        } else {
+            a = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, a, undef, expand_mask);
+        }
+    }
+
+    IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
+        source_instr->scope, source_instr->source_node,
+        nullptr, a, b, mask);
+    result->value.type = result_type;
+    return result;
+}
+
+static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
+    ZigType *scalar_type = ir_resolve_vector_elem_type(ira, instruction->scalar_type);
+    if (type_is_invalid(scalar_type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *a = instruction->a->child;
+    if (type_is_invalid(a->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *b = instruction->b->child;
+    if (type_is_invalid(b->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *mask = instruction->mask->child;
+    if (type_is_invalid(mask->value.type))
+        return ira->codegen->invalid_instruction;
+
+    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
+}
+
+static IrInstruction *ir_analyze_instruction_splat(IrAnalyze *ira, IrInstructionSplatSrc *instruction) {
+    Error err;
+
+    IrInstruction *len = instruction->len->child;
+    if (type_is_invalid(len->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *scalar = instruction->scalar->child;
+    if (type_is_invalid(scalar->value.type))
+        return ira->codegen->invalid_instruction;
+
+    uint64_t len_u64;
+    if (!ir_resolve_unsigned(ira, len, ira->codegen->builtin_types.entry_u32, &len_u64))
+        return ira->codegen->invalid_instruction;
+    uint32_t len_int = len_u64;
+
+    if ((err = ir_validate_vector_elem_type(ira, scalar, scalar->value.type)))
+        return ira->codegen->invalid_instruction;
+
+    ZigType *return_type = get_vector_type(ira->codegen, len_int, scalar->value.type);
+
+    if (instr_is_comptime(scalar)) {
+        ConstExprValue *scalar_val = ir_resolve_const(ira, scalar, UndefOk);
+        if (scalar_val == nullptr)
+            return ira->codegen->invalid_instruction;
+        if (scalar_val->special == ConstValSpecialUndef)
+            return ir_const_undef(ira, &instruction->base, return_type);
+
+        IrInstruction *result = ir_const(ira, &instruction->base, return_type);
+        result->value.data.x_array.data.s_none.elements = create_const_vals(len_int);
+        for (uint32_t i = 0; i < len_int; i += 1) {
+            copy_const_val(&result->value.data.x_array.data.s_none.elements[i], scalar_val, false);
+        }
+        return result;
+    }
+
+    return ir_build_splat_gen(ira, &instruction->base, return_type, scalar);
+}
+
 static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
     IrInstruction *value = instruction->value->child;
     if (type_is_invalid(value->value.type))
@@ -24970,21 +25356,35 @@ static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstruct
 }
 
 static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstructionBswap *instruction) {
+    Error err;
+
     ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
     if (type_is_invalid(int_type))
         return ira->codegen->invalid_instruction;
 
-    IrInstruction *op = ir_implicit_cast(ira, instruction->op->child, int_type);
+    IrInstruction *uncasted_op = instruction->op->child;
+    if (type_is_invalid(uncasted_op->value.type))
+        return ira->codegen->invalid_instruction;
+
+    uint32_t vector_len; // UINT32_MAX means not a vector
+    if (uncasted_op->value.type->id == ZigTypeIdArray &&
+        is_valid_vector_elem_type(uncasted_op->value.type->data.array.child_type))
+    {
+        vector_len = uncasted_op->value.type->data.array.len;
+    } else if (uncasted_op->value.type->id == ZigTypeIdVector) {
+        vector_len = uncasted_op->value.type->data.vector.len;
+    } else {
+        vector_len = UINT32_MAX;
+    }
+
+    bool is_vector = (vector_len != UINT32_MAX);
+    ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
+
+    IrInstruction *op = ir_implicit_cast(ira, uncasted_op, op_type);
     if (type_is_invalid(op->value.type))
         return ira->codegen->invalid_instruction;
 
-    if (int_type->data.integral.bit_count == 0) {
-        IrInstruction *result = ir_const(ira, &instruction->base, int_type);
-        bigint_init_unsigned(&result->value.data.x_bigint, 0);
-        return result;
-    }
-
-    if (int_type->data.integral.bit_count == 8)
+    if (int_type->data.integral.bit_count == 8 || int_type->data.integral.bit_count == 0)
         return op;
 
     if (int_type->data.integral.bit_count % 8 != 0) {
@@ -24999,20 +25399,44 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
         if (val == nullptr)
             return ira->codegen->invalid_instruction;
         if (val->special == ConstValSpecialUndef)
-            return ir_const_undef(ira, &instruction->base, int_type);
+            return ir_const_undef(ira, &instruction->base, op_type);
 
-        IrInstruction *result = ir_const(ira, &instruction->base, int_type);
+        IrInstruction *result = ir_const(ira, &instruction->base, op_type);
         size_t buf_size = int_type->data.integral.bit_count / 8;
         uint8_t *buf = allocate_nonzero<uint8_t>(buf_size);
-        bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
-        bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
-                int_type->data.integral.is_signed);
+        if (is_vector) {
+            expand_undef_array(ira->codegen, val);
+            result->value.data.x_array.data.s_none.elements = create_const_vals(op_type->data.vector.len);
+            for (unsigned i = 0; i < op_type->data.vector.len; i += 1) {
+                ConstExprValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
+                if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
+                    op_elem_val, UndefOk)))
+                {
+                    return ira->codegen->invalid_instruction;
+                }
+                ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+                result_elem_val->type = int_type;
+                result_elem_val->special = op_elem_val->special;
+                if (op_elem_val->special == ConstValSpecialUndef)
+                    continue;
+
+                bigint_write_twos_complement(&op_elem_val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+                bigint_read_twos_complement(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint,
+                        buf, int_type->data.integral.bit_count, false,
+                        int_type->data.integral.is_signed);
+            }
+        } else {
+            bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+            bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
+                    int_type->data.integral.is_signed);
+        }
+        free(buf);
         return result;
     }
 
     IrInstruction *result = ir_build_bswap(&ira->new_irb, instruction->base.scope,
             instruction->base.source_node, nullptr, op);
-    result->value.type = int_type;
+    result->value.type = op_type;
     return result;
 }
 
@@ -25450,6 +25874,7 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
         case IrInstructionIdTestErrGen:
         case IrInstructionIdFrameSizeGen:
         case IrInstructionIdAwaitGen:
+        case IrInstructionIdSplatGen:
             zig_unreachable();
 
         case IrInstructionIdReturn:
@@ -25578,6 +26003,10 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
             return ir_analyze_instruction_int_type(ira, (IrInstructionIntType *)instruction);
         case IrInstructionIdVectorType:
             return ir_analyze_instruction_vector_type(ira, (IrInstructionVectorType *)instruction);
+        case IrInstructionIdShuffleVector:
+            return ir_analyze_instruction_shuffle_vector(ira, (IrInstructionShuffleVector *)instruction);
+         case IrInstructionIdSplatSrc:
+            return ir_analyze_instruction_splat(ira, (IrInstructionSplatSrc *)instruction);
         case IrInstructionIdBoolNot:
             return ir_analyze_instruction_bool_not(ira, (IrInstructionBoolNot *)instruction);
         case IrInstructionIdMemset:
@@ -25913,6 +26342,9 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdTruncate:
         case IrInstructionIdIntType:
         case IrInstructionIdVectorType:
+        case IrInstructionIdShuffleVector:
+        case IrInstructionIdSplatSrc:
+        case IrInstructionIdSplatGen:
         case IrInstructionIdBoolNot:
         case IrInstructionIdSliceSrc:
         case IrInstructionIdMemberCount:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index f2877b46e..aae65d50a 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -42,6 +42,12 @@ static const char* ir_instruction_type_str(IrInstruction* instruction) {
     switch (instruction->id) {
         case IrInstructionIdInvalid:
             return "Invalid";
+        case IrInstructionIdShuffleVector:
+            return "Shuffle";
+        case IrInstructionIdSplatSrc:
+            return "SplatSrc";
+        case IrInstructionIdSplatGen:
+            return "SplatGen";
         case IrInstructionIdDeclVarSrc:
             return "DeclVarSrc";
         case IrInstructionIdDeclVarGen:
@@ -1208,6 +1214,32 @@ static void ir_print_vector_type(IrPrint *irp, IrInstructionVectorType *instruct
     fprintf(irp->f, ")");
 }
 
+static void ir_print_shuffle_vector(IrPrint *irp, IrInstructionShuffleVector *instruction) {
+    fprintf(irp->f, "@shuffle(");
+    ir_print_other_instruction(irp, instruction->scalar_type);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->a);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->b);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->mask);
+    fprintf(irp->f, ")");
+}
+
+static void ir_print_splat_src(IrPrint *irp, IrInstructionSplatSrc *instruction) {
+    fprintf(irp->f, "@splat(");
+    ir_print_other_instruction(irp, instruction->len);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->scalar);
+    fprintf(irp->f, ")");
+}
+
+static void ir_print_splat_gen(IrPrint *irp, IrInstructionSplatGen *instruction) {
+    fprintf(irp->f, "@splat(");
+    ir_print_other_instruction(irp, instruction->scalar);
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_bool_not(IrPrint *irp, IrInstructionBoolNot *instruction) {
     fprintf(irp->f, "! ");
     ir_print_other_instruction(irp, instruction->value);
@@ -2143,6 +2175,15 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction, bool
         case IrInstructionIdVectorType:
             ir_print_vector_type(irp, (IrInstructionVectorType *)instruction);
             break;
+        case IrInstructionIdShuffleVector:
+            ir_print_shuffle_vector(irp, (IrInstructionShuffleVector *)instruction);
+            break;
+        case IrInstructionIdSplatSrc:
+            ir_print_splat_src(irp, (IrInstructionSplatSrc *)instruction);
+            break;
+        case IrInstructionIdSplatGen:
+            ir_print_splat_gen(irp, (IrInstructionSplatGen *)instruction);
+            break;
         case IrInstructionIdBoolNot:
             ir_print_bool_not(irp, (IrInstructionBoolNot *)instruction);
             break;
diff --git a/src/list.hpp b/src/list.hpp
index 8dce75f2b..59782b46a 100644
--- a/src/list.hpp
+++ b/src/list.hpp
@@ -15,7 +15,7 @@ struct ZigList {
     void deinit() {
         free(items);
     }
-    void append(T item) {
+    void append(const T& item) {
         ensure_capacity(length + 1);
         items[length++] = item;
     }
diff --git a/src/main.cpp b/src/main.cpp
index 006d62dfa..03709745f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -90,7 +90,7 @@ static int print_full_usage(const char *arg0, FILE *file, int return_code) {
         "  -mllvm [arg]                 (unsupported) forward an arg to LLVM's option processing\n"
         "  --override-std-dir [arg]     override path to Zig standard library\n"
         "  --override-lib-dir [arg]     override path to Zig lib library\n"
-        "  -ffunction-sections          places each function in a seperate section\n"
+        "  -ffunction-sections          places each function in a separate section\n"
         "\n"
         "Link Options:\n"
         "  --bundle-compiler-rt         for static libraries, include compiler-rt symbols\n"
diff --git a/std/event/channel.zig b/std/event/channel.zig
index a397d280d..2f211d21e 100644
--- a/std/event/channel.zig
+++ b/std/event/channel.zig
@@ -306,6 +306,8 @@ pub fn Channel(comptime T: type) type {
 test "std.event.Channel" {
     // https://github.com/ziglang/zig/issues/1908
     if (builtin.single_threaded) return error.SkipZigTest;
+    // https://github.com/ziglang/zig/issues/3251
+    if (std.os.freebsd.is_the_target) return error.SkipZigTest;
 
     var loop: Loop = undefined;
     // TODO make a multi threaded test
diff --git a/std/event/future.zig b/std/event/future.zig
index b55b795de..1e3508de4 100644
--- a/std/event/future.zig
+++ b/std/event/future.zig
@@ -85,6 +85,8 @@ pub fn Future(comptime T: type) type {
 test "std.event.Future" {
     // https://github.com/ziglang/zig/issues/1908
     if (builtin.single_threaded) return error.SkipZigTest;
+    // https://github.com/ziglang/zig/issues/3251
+    if (std.os.freebsd.is_the_target) return error.SkipZigTest;
 
     const allocator = std.heap.direct_allocator;
 
diff --git a/std/event/lock.zig b/std/event/lock.zig
index 0fa65f031..a0b1fd3e5 100644
--- a/std/event/lock.zig
+++ b/std/event/lock.zig
@@ -118,6 +118,8 @@ pub const Lock = struct {
 test "std.event.Lock" {
     // TODO https://github.com/ziglang/zig/issues/1908
     if (builtin.single_threaded) return error.SkipZigTest;
+    // TODO https://github.com/ziglang/zig/issues/3251
+    if (std.os.freebsd.is_the_target) return error.SkipZigTest;
 
     const allocator = std.heap.direct_allocator;
 
diff --git a/std/hash/auto_hash.zig b/std/hash/auto_hash.zig
index d34fc2719..8a22788e5 100644
--- a/std/hash/auto_hash.zig
+++ b/std/hash/auto_hash.zig
@@ -116,7 +116,7 @@ pub fn hash(hasher: var, key: var, comptime strat: HashStrategy) void {
                 // Otherwise, hash every element.
                 // TODO remove the copy to an array once field access is done.
                 const array: [info.len]info.child = key;
-                comptime var i: u32 = 0;
+                comptime var i = 0;
                 inline while (i < info.len) : (i += 1) {
                     hash(hasher, array[i], strat);
                 }
@@ -357,10 +357,13 @@ test "testHash union" {
 test "testHash vector" {
     const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
     const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
-    const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
     testing.expect(testHash(a) == testHash(a));
     testing.expect(testHash(a) != testHash(b));
-    testing.expect(testHash(a) != testHash(c));
+
+    const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
+    const d: @Vector(4, u31) = [_]u31{ 1, 2, 3, 5 };
+    testing.expect(testHash(c) == testHash(c));
+    testing.expect(testHash(c) != testHash(d));
 }
 
 test "testHash error union" {
diff --git a/std/http/headers.zig b/std/http/headers.zig
index 67624b5b1..a8dfa6862 100644
--- a/std/http/headers.zig
+++ b/std/http/headers.zig
@@ -299,7 +299,7 @@ pub const Headers = struct {
         return buf;
     }
 
-    /// Returns all headers with the given name as a comma seperated string.
+    /// Returns all headers with the given name as a comma separated string.
     ///
     /// Useful for HTTP headers that follow RFC-7230 section 3.2.2:
     ///   A recipient MAY combine multiple header fields with the same field
diff --git a/std/special/c.zig b/std/special/c.zig
index 0254ed15d..4d566a001 100644
--- a/std/special/c.zig
+++ b/std/special/c.zig
@@ -269,16 +269,11 @@ nakedcc fn clone() void {
             \\    bx lr
             \\
             \\1:  mov r0,r6
-            \\    tst r5,#1
-            \\    bne 1f
-            \\    mov lr,pc
-            \\    mov pc,r5
+            \\    bl 3f
             \\2:  mov r7,#1
             \\    svc 0
-            \\
-            \\1:  mov lr,pc
-            \\    bx r5
             \\    b 2b
+            \\3:  bx r5
         );
     } else {
         @compileError("Implement clone() for this arch.");
diff --git a/std/special/start.zig b/std/special/start.zig
index 956e4b6b6..c5017507e 100644
--- a/std/special/start.zig
+++ b/std/special/start.zig
@@ -23,7 +23,7 @@ comptime {
     } else if (builtin.os == .uefi) {
         @export("EfiMain", EfiMain, .Strong);
     } else {
-        @export("_start", _start, .Strong);
+        if (!@hasDecl(root, "_start")) @export("_start", _start, .Strong);
     }
 }
 
diff --git a/std/zig/ast.zig b/std/zig/ast.zig
index c2f369ff4..79e664714 100644
--- a/std/zig/ast.zig
+++ b/std/zig/ast.zig
@@ -255,39 +255,39 @@ pub const Error = union(enum) {
         }
     }
 
-    pub const InvalidToken = SingleTokenError("Invalid token {}");
-    pub const ExpectedContainerMembers = SingleTokenError("Expected test, comptime, var decl, or container field, found {}");
-    pub const ExpectedStringLiteral = SingleTokenError("Expected string literal, found {}");
-    pub const ExpectedIntegerLiteral = SingleTokenError("Expected integer literal, found {}");
-    pub const ExpectedIdentifier = SingleTokenError("Expected identifier, found {}");
-    pub const ExpectedStatement = SingleTokenError("Expected statement, found {}");
-    pub const ExpectedVarDeclOrFn = SingleTokenError("Expected variable declaration or function, found {}");
-    pub const ExpectedVarDecl = SingleTokenError("Expected variable declaration, found {}");
-    pub const ExpectedReturnType = SingleTokenError("Expected 'var' or return type expression, found {}");
-    pub const ExpectedAggregateKw = SingleTokenError("Expected " ++ @tagName(Token.Id.Keyword_struct) ++ ", " ++ @tagName(Token.Id.Keyword_union) ++ ", or " ++ @tagName(Token.Id.Keyword_enum) ++ ", found {}");
-    pub const ExpectedEqOrSemi = SingleTokenError("Expected '=' or ';', found {}");
-    pub const ExpectedSemiOrLBrace = SingleTokenError("Expected ';' or '{{', found {}");
-    pub const ExpectedSemiOrElse = SingleTokenError("Expected ';' or 'else', found {}");
-    pub const ExpectedLBrace = SingleTokenError("Expected '{{', found {}");
-    pub const ExpectedLabelOrLBrace = SingleTokenError("Expected label or '{{', found {}");
-    pub const ExpectedColonOrRParen = SingleTokenError("Expected ':' or ')', found {}");
-    pub const ExpectedLabelable = SingleTokenError("Expected 'while', 'for', 'inline', 'suspend', or '{{', found {}");
-    pub const ExpectedInlinable = SingleTokenError("Expected 'while' or 'for', found {}");
-    pub const ExpectedAsmOutputReturnOrType = SingleTokenError("Expected '->' or " ++ @tagName(Token.Id.Identifier) ++ ", found {}");
-    pub const ExpectedSliceOrRBracket = SingleTokenError("Expected ']' or '..', found {}");
-    pub const ExpectedTypeExpr = SingleTokenError("Expected type expression, found {}");
-    pub const ExpectedPrimaryTypeExpr = SingleTokenError("Expected primary type expression, found {}");
-    pub const ExpectedExpr = SingleTokenError("Expected expression, found {}");
-    pub const ExpectedPrimaryExpr = SingleTokenError("Expected primary expression, found {}");
-    pub const ExpectedParamList = SingleTokenError("Expected parameter list, found {}");
-    pub const ExpectedPayload = SingleTokenError("Expected loop payload, found {}");
-    pub const ExpectedBlockOrAssignment = SingleTokenError("Expected block or assignment, found {}");
-    pub const ExpectedBlockOrExpression = SingleTokenError("Expected block or expression, found {}");
-    pub const ExpectedExprOrAssignment = SingleTokenError("Expected expression or assignment, found {}");
-    pub const ExpectedPrefixExpr = SingleTokenError("Expected prefix expression, found {}");
-    pub const ExpectedLoopExpr = SingleTokenError("Expected loop expression, found {}");
-    pub const ExpectedDerefOrUnwrap = SingleTokenError("Expected pointer dereference or optional unwrap, found {}");
-    pub const ExpectedSuffixOp = SingleTokenError("Expected pointer dereference, optional unwrap, or field access, found {}");
+    pub const InvalidToken = SingleTokenError("Invalid token '{}'");
+    pub const ExpectedContainerMembers = SingleTokenError("Expected test, comptime, var decl, or container field, found '{}'");
+    pub const ExpectedStringLiteral = SingleTokenError("Expected string literal, found '{}'");
+    pub const ExpectedIntegerLiteral = SingleTokenError("Expected integer literal, found '{}'");
+    pub const ExpectedIdentifier = SingleTokenError("Expected identifier, found '{}'");
+    pub const ExpectedStatement = SingleTokenError("Expected statement, found '{}'");
+    pub const ExpectedVarDeclOrFn = SingleTokenError("Expected variable declaration or function, found '{}'");
+    pub const ExpectedVarDecl = SingleTokenError("Expected variable declaration, found '{}'");
+    pub const ExpectedReturnType = SingleTokenError("Expected 'var' or return type expression, found '{}'");
+    pub const ExpectedAggregateKw = SingleTokenError("Expected '" ++ Token.Id.Keyword_struct.symbol() ++ "', '" ++ Token.Id.Keyword_union.symbol() ++ "', or '" ++ Token.Id.Keyword_enum.symbol() ++ "', found '{}'");
+    pub const ExpectedEqOrSemi = SingleTokenError("Expected '=' or ';', found '{}'");
+    pub const ExpectedSemiOrLBrace = SingleTokenError("Expected ';' or '{{', found '{}'");
+    pub const ExpectedSemiOrElse = SingleTokenError("Expected ';' or 'else', found '{}'");
+    pub const ExpectedLBrace = SingleTokenError("Expected '{{', found '{}'");
+    pub const ExpectedLabelOrLBrace = SingleTokenError("Expected label or '{{', found '{}'");
+    pub const ExpectedColonOrRParen = SingleTokenError("Expected ':' or ')', found '{}'");
+    pub const ExpectedLabelable = SingleTokenError("Expected 'while', 'for', 'inline', 'suspend', or '{{', found '{}'");
+    pub const ExpectedInlinable = SingleTokenError("Expected 'while' or 'for', found '{}'");
+    pub const ExpectedAsmOutputReturnOrType = SingleTokenError("Expected '->' or '" ++ Token.Id.Identifier.symbol() ++ "', found '{}'");
+    pub const ExpectedSliceOrRBracket = SingleTokenError("Expected ']' or '..', found '{}'");
+    pub const ExpectedTypeExpr = SingleTokenError("Expected type expression, found '{}'");
+    pub const ExpectedPrimaryTypeExpr = SingleTokenError("Expected primary type expression, found '{}'");
+    pub const ExpectedExpr = SingleTokenError("Expected expression, found '{}'");
+    pub const ExpectedPrimaryExpr = SingleTokenError("Expected primary expression, found '{}'");
+    pub const ExpectedParamList = SingleTokenError("Expected parameter list, found '{}'");
+    pub const ExpectedPayload = SingleTokenError("Expected loop payload, found '{}'");
+    pub const ExpectedBlockOrAssignment = SingleTokenError("Expected block or assignment, found '{}'");
+    pub const ExpectedBlockOrExpression = SingleTokenError("Expected block or expression, found '{}'");
+    pub const ExpectedExprOrAssignment = SingleTokenError("Expected expression or assignment, found '{}'");
+    pub const ExpectedPrefixExpr = SingleTokenError("Expected prefix expression, found '{}'");
+    pub const ExpectedLoopExpr = SingleTokenError("Expected loop expression, found '{}'");
+    pub const ExpectedDerefOrUnwrap = SingleTokenError("Expected pointer dereference or optional unwrap, found '{}'");
+    pub const ExpectedSuffixOp = SingleTokenError("Expected pointer dereference, optional unwrap, or field access, found '{}'");
 
     pub const ExpectedParamType = SimpleError("Expected parameter type");
     pub const ExpectedPubItem = SimpleError("Pub must be followed by fn decl, var decl, or container member");
@@ -324,11 +324,11 @@ pub const Error = union(enum) {
                     return stream.print("`&&` is invalid. Note that `and` is boolean AND.");
                 },
                 .Invalid => {
-                    return stream.print("expected {}, found invalid bytes", @tagName(self.expected_id));
+                    return stream.print("expected '{}', found invalid bytes", self.expected_id.symbol());
                 },
                 else => {
-                    const token_name = @tagName(found_token.id);
-                    return stream.print("expected {}, found {}", @tagName(self.expected_id), token_name);
+                    const token_name = found_token.id.symbol();
+                    return stream.print("expected '{}', found '{}'", self.expected_id.symbol(), token_name);
                 },
             }
         }
@@ -339,8 +339,8 @@ pub const Error = union(enum) {
         end_id: Token.Id,
 
         pub fn render(self: *const ExpectedCommaOrEnd, tokens: *Tree.TokenList, stream: var) !void {
-            const token_name = @tagName(tokens.at(self.token).id);
-            return stream.print("expected ',' or {}, found {}", @tagName(self.end_id), token_name);
+            const actual_token = tokens.at(self.token);
+            return stream.print("expected ',' or '{}', found '{}'", self.end_id.symbol(), actual_token.id.symbol());
         }
     };
 
@@ -351,8 +351,8 @@ pub const Error = union(enum) {
             token: TokenIndex,
 
             pub fn render(self: *const ThisError, tokens: *Tree.TokenList, stream: var) !void {
-                const token_name = @tagName(tokens.at(self.token).id);
-                return stream.print(msg, token_name);
+                const actual_token = tokens.at(self.token);
+                return stream.print(msg, actual_token.id.symbol());
             }
         };
     }
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index f25da12a9..b0a6cd112 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -196,6 +196,131 @@ pub const Token = struct {
         Keyword_var,
         Keyword_volatile,
         Keyword_while,
+
+        pub fn symbol(id: Id) []const u8 {
+            return switch (id) {
+                .Invalid => "Invalid",
+                .Invalid_ampersands => "&&",
+                .Identifier => "Identifier",
+                .StringLiteral => "StringLiteral",
+                .MultilineStringLiteralLine => "MultilineStringLiteralLine",
+                .CharLiteral => "CharLiteral",
+                .Eof => "Eof",
+                .Builtin => "Builtin",
+                .IntegerLiteral => "IntegerLiteral",
+                .FloatLiteral => "FloatLiteral",
+                .LineComment => "LineComment",
+                .DocComment => "DocComment",
+                .ShebangLine => "ShebangLine",
+
+                .Bang => "!",
+                .Pipe => "|",
+                .PipePipe => "||",
+                .PipeEqual => "|=",
+                .Equal => "=",
+                .EqualEqual => "==",
+                .EqualAngleBracketRight => "=>",
+                .BangEqual => "!=",
+                .LParen => "(",
+                .RParen => ")",
+                .Semicolon => ";",
+                .Percent => "%",
+                .PercentEqual => "%=",
+                .LBrace => "{",
+                .RBrace => "}",
+                .LBracket => "[",
+                .RBracket => "]",
+                .Period => ".",
+                .Ellipsis2 => "..",
+                .Ellipsis3 => "...",
+                .Caret => "^",
+                .CaretEqual => "^=",
+                .Plus => "+",
+                .PlusPlus => "++",
+                .PlusEqual => "+=",
+                .PlusPercent => "+%",
+                .PlusPercentEqual => "+%=",
+                .Minus => "-",
+                .MinusEqual => "-=",
+                .MinusPercent => "-%",
+                .MinusPercentEqual => "-%=",
+                .Asterisk => "*",
+                .AsteriskEqual => "*=",
+                .AsteriskAsterisk => "**",
+                .AsteriskPercent => "*%",
+                .AsteriskPercentEqual => "*%=",
+                .Arrow => "->",
+                .Colon => ":",
+                .Slash => "/",
+                .SlashEqual => "/=",
+                .Comma => ",",
+                .Ampersand => "&",
+                .AmpersandEqual => "&=",
+                .QuestionMark => "?",
+                .AngleBracketLeft => "<",
+                .AngleBracketLeftEqual => "<=",
+                .AngleBracketAngleBracketLeft => "<<",
+                .AngleBracketAngleBracketLeftEqual => "<<=",
+                .AngleBracketRight => ">",
+                .AngleBracketRightEqual => ">=",
+                .AngleBracketAngleBracketRight => ">>",
+                .AngleBracketAngleBracketRightEqual => ">>=",
+                .Tilde => "~",
+                .BracketStarBracket => "[*]",
+                .BracketStarCBracket => "[*c]",
+                .Keyword_align => "align",
+                .Keyword_allowzero => "allowzero",
+                .Keyword_and => "and",
+                .Keyword_anyframe => "anyframe",
+                .Keyword_asm => "asm",
+                .Keyword_async => "async",
+                .Keyword_await => "await",
+                .Keyword_break => "break",
+                .Keyword_catch => "catch",
+                .Keyword_comptime => "comptime",
+                .Keyword_const => "const",
+                .Keyword_continue => "continue",
+                .Keyword_defer => "defer",
+                .Keyword_else => "else",
+                .Keyword_enum => "enum",
+                .Keyword_errdefer => "errdefer",
+                .Keyword_error => "error",
+                .Keyword_export => "export",
+                .Keyword_extern => "extern",
+                .Keyword_false => "false",
+                .Keyword_fn => "fn",
+                .Keyword_for => "for",
+                .Keyword_if => "if",
+                .Keyword_inline => "inline",
+                .Keyword_nakedcc => "nakedcc",
+                .Keyword_noalias => "noalias",
+                .Keyword_noasync => "noasync",
+                .Keyword_noinline => "noinline",
+                .Keyword_null => "null",
+                .Keyword_or => "or",
+                .Keyword_orelse => "orelse",
+                .Keyword_packed => "packed",
+                .Keyword_pub => "pub",
+                .Keyword_resume => "resume",
+                .Keyword_return => "return",
+                .Keyword_linksection => "linksection",
+                .Keyword_stdcallcc => "stdcallcc",
+                .Keyword_struct => "struct",
+                .Keyword_suspend => "suspend",
+                .Keyword_switch => "switch",
+                .Keyword_test => "test",
+                .Keyword_threadlocal => "threadlocal",
+                .Keyword_true => "true",
+                .Keyword_try => "try",
+                .Keyword_undefined => "undefined",
+                .Keyword_union => "union",
+                .Keyword_unreachable => "unreachable",
+                .Keyword_usingnamespace => "usingnamespace",
+                .Keyword_var => "var",
+                .Keyword_volatile => "volatile",
+                .Keyword_while => "while",
+            };
+        }
     };
 };
 
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 6365ca64c..034800fd4 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6484,6 +6484,19 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         "tmp.zig:7:23: error: unable to evaluate constant expression",
     );
 
+    cases.addTest(
+        "@shuffle with selected index past first vector length",
+        \\export fn entry() void {
+        \\    const v: @Vector(4, u32) = [4]u32{ 10, 11, 12, 13 };
+        \\    const x: @Vector(4, u32) = [4]u32{ 14, 15, 16, 17 };
+        \\    var z = @shuffle(u32, v, x, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
+        \\}
+    ,
+        "tmp.zig:4:39: error: mask index '4' has out-of-bounds selection",
+        "tmp.zig:4:27: note: selected index '7' out of bounds of @Vector(4, u32)",
+        "tmp.zig:4:30: note: selections from the second vector are specified with negative numbers",
+    );
+
     cases.addTest(
         "nested vectors",
         \\export fn entry() void {
@@ -6491,7 +6504,17 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         \\    var v: V = undefined;
         \\}
     ,
-        "tmp.zig:2:26: error: vector element type must be integer, float, or pointer; '@Vector(4, u8)' is invalid",
+        "tmp.zig:2:26: error: vector element type must be integer, float, bool, or pointer; '@Vector(4, u8)' is invalid",
+    );
+
+    cases.addTest(
+        "bad @splat type",
+        \\export fn entry() void {
+        \\    const c = 4;
+        \\    var v = @splat(4, c);
+        \\}
+    ,
+        "tmp.zig:3:23: error: vector element type must be integer, float, bool, or pointer; 'comptime_int' is invalid",
     );
 
     cases.add("compileLog of tagged enum doesn't crash the compiler",
diff --git a/test/stage1/behavior.zig b/test/stage1/behavior.zig
index db6cdad3b..e56fc7ba7 100644
--- a/test/stage1/behavior.zig
+++ b/test/stage1/behavior.zig
@@ -80,6 +80,7 @@ comptime {
     _ = @import("behavior/pub_enum.zig");
     _ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");
     _ = @import("behavior/reflection.zig");
+    _ = @import("behavior/shuffle.zig");
     _ = @import("behavior/sizeof_and_typeof.zig");
     _ = @import("behavior/slice.zig");
     _ = @import("behavior/slicetobytes.zig");
diff --git a/test/stage1/behavior/byteswap.zig b/test/stage1/behavior/byteswap.zig
index 3e7c34cb8..d8fc55480 100644
--- a/test/stage1/behavior/byteswap.zig
+++ b/test/stage1/behavior/byteswap.zig
@@ -1,32 +1,62 @@
 const std = @import("std");
 const expect = std.testing.expect;
 
-test "@byteSwap" {
-    comptime testByteSwap();
-    testByteSwap();
+test "@byteSwap integers" {
+    const ByteSwapIntTest = struct {
+        fn run() void {
+            t(u0, 0, 0);
+            t(u8, 0x12, 0x12);
+            t(u16, 0x1234, 0x3412);
+            t(u24, 0x123456, 0x563412);
+            t(u32, 0x12345678, 0x78563412);
+            t(u40, 0x123456789a, 0x9a78563412);
+            t(i48, 0x123456789abc, @bitCast(i48, u48(0xbc9a78563412)));
+            t(u56, 0x123456789abcde, 0xdebc9a78563412);
+            t(u64, 0x123456789abcdef1, 0xf1debc9a78563412);
+            t(u128, 0x123456789abcdef11121314151617181, 0x8171615141312111f1debc9a78563412);
+
+            t(u0, u0(0), 0);
+            t(i8, i8(-50), -50);
+            t(i16, @bitCast(i16, u16(0x1234)), @bitCast(i16, u16(0x3412)));
+            t(i24, @bitCast(i24, u24(0x123456)), @bitCast(i24, u24(0x563412)));
+            t(i32, @bitCast(i32, u32(0x12345678)), @bitCast(i32, u32(0x78563412)));
+            t(u40, @bitCast(i40, u40(0x123456789a)), u40(0x9a78563412));
+            t(i48, @bitCast(i48, u48(0x123456789abc)), @bitCast(i48, u48(0xbc9a78563412)));
+            t(i56, @bitCast(i56, u56(0x123456789abcde)), @bitCast(i56, u56(0xdebc9a78563412)));
+            t(i64, @bitCast(i64, u64(0x123456789abcdef1)), @bitCast(i64, u64(0xf1debc9a78563412)));
+            t(
+                i128,
+                @bitCast(i128, u128(0x123456789abcdef11121314151617181)),
+                @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)),
+            );
+        }
+        fn t(comptime I: type, input: I, expected_output: I) void {
+            std.testing.expectEqual(expected_output, @byteSwap(I, input));
+        }
+    };
+    comptime ByteSwapIntTest.run();
+    ByteSwapIntTest.run();
 }
 
-fn testByteSwap() void {
-    expect(@byteSwap(u0, 0) == 0);
-    expect(@byteSwap(u8, 0x12) == 0x12);
-    expect(@byteSwap(u16, 0x1234) == 0x3412);
-    expect(@byteSwap(u24, 0x123456) == 0x563412);
-    expect(@byteSwap(u32, 0x12345678) == 0x78563412);
-    expect(@byteSwap(u40, 0x123456789a) == 0x9a78563412);
-    expect(@byteSwap(i48, 0x123456789abc) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(u56, 0x123456789abcde) == 0xdebc9a78563412);
-    expect(@byteSwap(u64, 0x123456789abcdef1) == 0xf1debc9a78563412);
-    expect(@byteSwap(u128, 0x123456789abcdef11121314151617181) == 0x8171615141312111f1debc9a78563412);
+test "@byteSwap vectors" {
+    const ByteSwapVectorTest = struct {
+        fn run() void {
+            t(u8, 2, [_]u8{ 0x12, 0x13 }, [_]u8{ 0x12, 0x13 });
+            t(u16, 2, [_]u16{ 0x1234, 0x2345 }, [_]u16{ 0x3412, 0x4523 });
+            t(u24, 2, [_]u24{ 0x123456, 0x234567 }, [_]u24{ 0x563412, 0x674523 });
+        }
 
-    expect(@byteSwap(u0, u0(0)) == 0);
-    expect(@byteSwap(i8, i8(-50)) == -50);
-    expect(@byteSwap(i16, @bitCast(i16, u16(0x1234))) == @bitCast(i16, u16(0x3412)));
-    expect(@byteSwap(i24, @bitCast(i24, u24(0x123456))) == @bitCast(i24, u24(0x563412)));
-    expect(@byteSwap(i32, @bitCast(i32, u32(0x12345678))) == @bitCast(i32, u32(0x78563412)));
-    expect(@byteSwap(u40, @bitCast(i40, u40(0x123456789a))) == u40(0x9a78563412));
-    expect(@byteSwap(i48, @bitCast(i48, u48(0x123456789abc))) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(i56, @bitCast(i56, u56(0x123456789abcde))) == @bitCast(i56, u56(0xdebc9a78563412)));
-    expect(@byteSwap(i64, @bitCast(i64, u64(0x123456789abcdef1))) == @bitCast(i64, u64(0xf1debc9a78563412)));
-    expect(@byteSwap(i128, @bitCast(i128, u128(0x123456789abcdef11121314151617181))) ==
-        @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)));
+        fn t(
+            comptime I: type,
+            comptime n: comptime_int,
+            input: @Vector(n, I),
+            expected_vector: @Vector(n, I),
+        ) void {
+            const actual_output: [n]I = @byteSwap(I, input);
+            const expected_output: [n]I = expected_vector;
+            std.testing.expectEqual(expected_output, actual_output);
+        }
+    };
+    comptime ByteSwapVectorTest.run();
+    ByteSwapVectorTest.run();
 }
diff --git a/test/stage1/behavior/misc.zig b/test/stage1/behavior/misc.zig
index c122b18e0..613bb9ac5 100644
--- a/test/stage1/behavior/misc.zig
+++ b/test/stage1/behavior/misc.zig
@@ -721,3 +721,23 @@ test "global variable assignment with optional unwrapping with var initialized t
     };
     expect(global_foo.* == 1234);
 }
+
+test "peer result location with typed parent, runtime condition, comptime prongs" {
+    const S = struct {
+        fn doTheTest(arg: i32) i32 {
+            const st = Structy{
+                .bleh = if (arg == 1) 1 else 1,
+            };
+
+            if (st.bleh == 1)
+                return 1234;
+            return 0;
+        }
+
+        const Structy = struct {
+            bleh: i32,
+        };
+    };
+    expect(S.doTheTest(0) == 1234);
+    expect(S.doTheTest(1) == 1234);
+}
diff --git a/test/stage1/behavior/shuffle.zig b/test/stage1/behavior/shuffle.zig
new file mode 100644
index 000000000..2029ec582
--- /dev/null
+++ b/test/stage1/behavior/shuffle.zig
@@ -0,0 +1,57 @@
+const std = @import("std");
+const mem = std.mem;
+const expect = std.testing.expect;
+
+test "@shuffle" {
+    const S = struct {
+        fn doTheTest() void {
+            var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
+            var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 3, 4 };
+            const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3) };
+            var res = @shuffle(i32, v, x, mask);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
+
+            // Implicit cast from array (of mask)
+            res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3) });
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
+
+            // Undefined
+            const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0 };
+            res = @shuffle(i32, v, undefined, mask2);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647 }));
+
+            // Upcasting of b
+            var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined };
+            const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3 };
+            res = @shuffle(i32, x, v2, mask3);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 2147483647, 4 }));
+
+            // Upcasting of a
+            var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2 };
+            const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3) };
+            res = @shuffle(i32, v3, x, mask4);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, -2, 4 }));
+
+            // bool
+            {
+                var x2: @Vector(4, bool) = [4]bool{ false, true, false, true };
+                var v4: @Vector(2, bool) = [2]bool{ true, false };
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
+                var res2 = @shuffle(bool, x2, v4, mask5);
+                expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
+            }
+
+            // TODO re-enable when LLVM codegen is fixed
+            // https://github.com/ziglang/zig/issues/3246
+            if (false) {
+                var x2: @Vector(3, bool) = [3]bool{ false, true, false };
+                var v4: @Vector(2, bool) = [2]bool{ true, false };
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
+                var res2 = @shuffle(bool, x2, v4, mask5);
+                expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
+            }
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
index 431e3fe27..d3a771fca 100644
--- a/test/stage1/behavior/vector.zig
+++ b/test/stage1/behavior/vector.zig
@@ -2,6 +2,18 @@ const std = @import("std");
 const mem = std.mem;
 const expect = std.testing.expect;
 
+test "implicit cast vector to array - bool" {
+    const S = struct {
+        fn doTheTest() void {
+            const a: @Vector(4, bool) = [_]bool{ true, false, true, false };
+            const result_array: [4]bool = a;
+            expect(mem.eql(bool, result_array, [4]bool{ true, false, true, false }));
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}
+
 test "vector wrap operators" {
     const S = struct {
         fn doTheTest() void {
@@ -18,6 +30,23 @@ test "vector wrap operators" {
     comptime S.doTheTest();
 }
 
+test "vector bin compares with mem.eql" {
+    const S = struct {
+        fn doTheTest() void {
+            var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
+            var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 30, 4 };
+            expect(mem.eql(bool, ([4]bool)(v == x), [4]bool{ false, false, true, false }));
+            expect(mem.eql(bool, ([4]bool)(v != x), [4]bool{ true, true, false, true }));
+            expect(mem.eql(bool, ([4]bool)(v < x), [4]bool{ false, true, false, false }));
+            expect(mem.eql(bool, ([4]bool)(v > x), [4]bool{ true, false, false, true }));
+            expect(mem.eql(bool, ([4]bool)(v <= x), [4]bool{ false, true, true, false }));
+            expect(mem.eql(bool, ([4]bool)(v >= x), [4]bool{ true, false, true, true }));
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}
+
 test "vector int operators" {
     const S = struct {
         fn doTheTest() void {
@@ -80,3 +109,49 @@ test "array to vector" {
     var arr = [4]f32{ foo, 1.5, 0.0, 0.0 };
     var vec: @Vector(4, f32) = arr;
 }
+
+test "vector casts of sizes not divisable by 8" {
+    const S = struct {
+        fn doTheTest() void {
+            {
+                var v: @Vector(4, u3) = [4]u3{ 5, 2, 3, 0 };
+                var x: [4]u3 = v;
+                expect(mem.eql(u3, x, ([4]u3)(v)));
+            }
+            {
+                var v: @Vector(4, u2) = [4]u2{ 1, 2, 3, 0 };
+                var x: [4]u2 = v;
+                expect(mem.eql(u2, x, ([4]u2)(v)));
+            }
+            {
+                var v: @Vector(4, u1) = [4]u1{ 1, 0, 1, 0 };
+                var x: [4]u1 = v;
+                expect(mem.eql(u1, x, ([4]u1)(v)));
+            }
+            {
+                var v: @Vector(4, bool) = [4]bool{ false, false, true, false };
+                var x: [4]bool = v;
+                expect(mem.eql(bool, x, ([4]bool)(v)));
+            }
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}
+
+test "vector @splat" {
+    const S = struct {
+        fn doTheTest() void {
+            var v: u32 = 5;
+            var x = @splat(4, v);
+            expect(@typeOf(x) == @Vector(4, u32));
+            var array_x: [4]u32 = x;
+            expect(array_x[0] == 5);
+            expect(array_x[1] == 5);
+            expect(array_x[2] == 5);
+            expect(array_x[3] == 5);
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}