Now that they support vectors, use math.rot{l,r}

master
Frank Denis 2020-11-02 21:56:45 +01:00 committed by Andrew Kelley
parent 34502b9c4d
commit 4417206230
4 changed files with 26 additions and 41 deletions

View File

@ -66,17 +66,13 @@ const CompressVectorized = struct {
const Lane = Vector(4, u32);
const Rows = [4]Lane;
inline fn rot(x: Lane, comptime n: u5) Lane {
return (x >> @splat(4, @as(u5, n))) | (x << @splat(4, @as(u5, 1 +% ~n)));
}
inline fn g(comptime even: bool, rows: *Rows, m: Lane) void {
rows[0] +%= rows[1] +% m;
rows[3] ^= rows[0];
rows[3] = rot(rows[3], if (even) 8 else 16);
rows[3] = math.rotr(Lane, rows[3], if (even) 8 else 16);
rows[2] +%= rows[3];
rows[1] ^= rows[2];
rows[1] = rot(rows[1], if (even) 7 else 12);
rows[1] = math.rotr(Lane, rows[1], if (even) 7 else 12);
}
inline fn diagonalize(rows: *Rows) void {

View File

@ -6,10 +6,11 @@
// Based on public domain Supercop by Daniel J. Bernstein
const std = @import("../std.zig");
const math = std.math;
const mem = std.mem;
const assert = std.debug.assert;
const testing = std.testing;
const maxInt = std.math.maxInt;
const maxInt = math.maxInt;
const Vector = std.meta.Vector;
const Poly1305 = std.crypto.onetimeauth.Poly1305;
@ -34,10 +35,6 @@ const ChaCha20VecImpl = struct {
};
}
inline fn rot(x: Lane, comptime n: comptime_int) Lane {
return (x << @splat(4, @as(u5, n))) | (x >> @splat(4, @as(u5, 32 - n)));
}
inline fn chacha20Core(x: *BlockVec, input: BlockVec) void {
x.* = input;
@ -45,41 +42,41 @@ const ChaCha20VecImpl = struct {
while (r < 20) : (r += 2) {
x[0] +%= x[1];
x[3] ^= x[0];
x[3] = rot(x[3], 16);
x[3] = math.rotl(Lane, x[3], 16);
x[2] +%= x[3];
x[1] ^= x[2];
x[1] = rot(x[1], 12);
x[1] = math.rotl(Lane, x[1], 12);
x[0] +%= x[1];
x[3] ^= x[0];
x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 3, 0, 1, 2 });
x[3] = rot(x[3], 8);
x[3] = math.rotl(Lane, x[3], 8);
x[2] +%= x[3];
x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
x[1] ^= x[2];
x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 1, 2, 3, 0 });
x[1] = rot(x[1], 7);
x[1] = math.rotl(Lane, x[1], 7);
x[0] +%= x[1];
x[3] ^= x[0];
x[3] = rot(x[3], 16);
x[3] = math.rotl(Lane, x[3], 16);
x[2] +%= x[3];
x[1] ^= x[2];
x[1] = rot(x[1], 12);
x[1] = math.rotl(Lane, x[1], 12);
x[0] +%= x[1];
x[3] ^= x[0];
x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 1, 2, 3, 0 });
x[3] = rot(x[3], 8);
x[3] = math.rotl(Lane, x[3], 8);
x[2] +%= x[3];
x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
x[1] ^= x[2];
x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 3, 0, 1, 2 });
x[1] = rot(x[1], 7);
x[1] = math.rotl(Lane, x[1], 7);
}
}
@ -211,13 +208,13 @@ const ChaCha20NonVecImpl = struct {
inline while (j < 20) : (j += 2) {
inline for (rounds) |r| {
x[r.a] +%= x[r.b];
x[r.d] = std.math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 16));
x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 16));
x[r.c] +%= x[r.d];
x[r.b] = std.math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 12));
x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 12));
x[r.a] +%= x[r.b];
x[r.d] = std.math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 8));
x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 8));
x[r.c] +%= x[r.d];
x[r.b] = std.math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 7));
x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 7));
}
}
}

View File

@ -120,10 +120,6 @@ pub const State = struct {
return x << @splat(4, @as(u5, n));
}
inline fn rot(x: Lane, comptime n: comptime_int) Lane {
return (x << @splat(4, @as(u5, n))) | (x >> @splat(4, @as(u5, 32 - n)));
}
fn permute_vectorized(self: *Self) void {
self.endianSwap();
const state = &self.data;
@ -132,8 +128,8 @@ pub const State = struct {
var z = Lane{ state[8], state[9], state[10], state[11] };
var round = @as(u32, 24);
while (round > 0) : (round -= 1) {
x = rot(x, 24);
y = rot(y, 9);
x = math.rotl(Lane, x, 24);
y = math.rotl(Lane, y, 9);
const newz = x ^ shift(z, 1) ^ shift(y & z, 2);
const newy = y ^ x ^ shift(x | z, 1);
const newx = z ^ y ^ shift(x & y, 3);

View File

@ -36,10 +36,6 @@ const Salsa20VecImpl = struct {
};
}
inline fn rot(x: Lane, comptime n: u5) Lane {
return (x << @splat(4, @as(u5, n))) | (x >> @splat(4, @as(u5, 1 +% ~n)));
}
inline fn salsa20Core(x: *BlockVec, input: BlockVec, comptime feedback: bool) void {
const n1n2n3n0 = Lane{ input[3][1], input[3][2], input[3][3], input[3][0] };
const n1n2 = Half{ n1n2n3n0[0], n1n2n3n0[1] };
@ -71,13 +67,13 @@ const Salsa20VecImpl = struct {
var i: usize = 0;
while (i < 20) : (i += 2) {
var a0 = diag1 +% diag0;
diag3 ^= rot(a0, 7);
diag3 ^= math.rotl(Lane, a0, 7);
var a1 = diag0 +% diag3;
diag2 ^= rot(a1, 9);
diag2 ^= math.rotl(Lane, a1, 9);
var a2 = diag3 +% diag2;
diag1 ^= rot(a2, 13);
diag1 ^= math.rotl(Lane, a2, 13);
var a3 = diag2 +% diag1;
diag0 ^= rot(a3, 18);
diag0 ^= math.rotl(Lane, a3, 18);
var diag3_shift = @shuffle(u32, diag3, undefined, [_]i32{ 3, 0, 1, 2 });
var diag2_shift = @shuffle(u32, diag2, undefined, [_]i32{ 2, 3, 0, 1 });
@ -87,13 +83,13 @@ const Salsa20VecImpl = struct {
diag1 = diag1_shift;
a0 = diag3 +% diag0;
diag1 ^= rot(a0, 7);
diag1 ^= math.rotl(Lane, a0, 7);
a1 = diag0 +% diag1;
diag2 ^= rot(a1, 9);
diag2 ^= math.rotl(Lane, a1, 9);
a2 = diag1 +% diag2;
diag3 ^= rot(a2, 13);
diag3 ^= math.rotl(Lane, a2, 13);
a3 = diag2 +% diag3;
diag0 ^= rot(a3, 18);
diag0 ^= math.rotl(Lane, a3, 18);
diag1_shift = @shuffle(u32, diag1, undefined, [_]i32{ 3, 0, 1, 2 });
diag2_shift = @shuffle(u32, diag2, undefined, [_]i32{ 2, 3, 0, 1 });