std/crypto: Clean up poly1305/x25519

2018-09-04 20:16:12 +12:00 · 2018-09-04 20:16:12 +12:00 · 8b50d10a84
commit 8b50d10a84
parent 763845f95c
4 changed files with 485 additions and 500 deletions
--- a/std/crypto/index.zig
+++ b/std/crypto/index.zig
@ -32,13 +32,13 @@ pub const Poly1305 = @import("poly1305.zig").Poly1305;
 pub const X25519 = @import("x25519.zig").X25519;

 test "crypto" {
+    _ = @import("blake2.zig");
+    _ = @import("chacha20.zig");
+    _ = @import("hmac.zig");
    _ = @import("md5.zig");
+    _ = @import("poly1305.zig");
    _ = @import("sha1.zig");
    _ = @import("sha2.zig");
    _ = @import("sha3.zig");
-    _ = @import("blake2.zig");
-    _ = @import("hmac.zig");
-    _ = @import("chacha20.zig");
-    _ = @import("poly1305.zig");
    _ = @import("x25519.zig");
 }
--- a/std/crypto/poly1305.zig
+++ b/std/crypto/poly1305.zig
@ -26,7 +26,7 @@ pub const Poly1305 = struct {
    // How many bytes are there in the chunk.
    c_idx: usize,

-    fn secure_zero(self: *Poly1305) void {
+    fn secureZero(self: *Self) void {
        std.mem.secureZero(u8, @ptrCast([*]u8, self)[0..@sizeOf(Poly1305)]);
    }

@ -53,7 +53,7 @@ pub const Poly1305 = struct {
        }
        // add 2^130 to every input block
        ctx.c[4] = 1;
-        poly_clear_c(&ctx);
+        polyClearC(&ctx);

        // load r and pad (r has some of its bits cleared)
        {
@ -85,7 +85,7 @@ pub const Poly1305 = struct {
    //   ctx->r <=   0ffffffc_0ffffffc_0ffffffc_0fffffff
    // Postcondition:
    //   ctx->h <= 4_ffffffff_ffffffff_ffffffff_ffffffff
-    fn poly_block(ctx: *Poly1305) void {
+    fn polyBlock(ctx: *Self) void {
        // s = h + c, without carry propagation
        const s0 = u64(ctx.h[0]) + ctx.c[0]; // s0 <= 1_fffffffe
        const s1 = u64(ctx.h[1]) + ctx.c[1]; // s1 <= 1_fffffffe
@ -127,7 +127,7 @@ pub const Poly1305 = struct {
    }

    // (re-)initializes the input counter and input buffer
-    fn poly_clear_c(ctx: *Poly1305) void {
+    fn polyClearC(ctx: *Self) void {
        ctx.c[0] = 0;
        ctx.c[1] = 0;
        ctx.c[2] = 0;
@ -135,32 +135,32 @@ pub const Poly1305 = struct {
        ctx.c_idx = 0;
    }

-    fn poly_take_input(ctx: *Poly1305, input: u8) void {
+    fn polyTakeInput(ctx: *Self, input: u8) void {
        const word = ctx.c_idx >> 2;
        const byte = ctx.c_idx & 3;
        ctx.c[word] |= std.math.shl(u32, input, byte * 8);
        ctx.c_idx += 1;
    }

-    fn poly_update(ctx: *Poly1305, msg: []const u8) void {
+    fn polyUpdate(ctx: *Self, msg: []const u8) void {
        for (msg) |b| {
-            poly_take_input(ctx, b);
+            polyTakeInput(ctx, b);
            if (ctx.c_idx == 16) {
-                poly_block(ctx);
-                poly_clear_c(ctx);
+                polyBlock(ctx);
+                polyClearC(ctx);
            }
        }
    }

-    inline fn alignto(x: usize, block_size: usize) usize {
+    fn alignTo(x: usize, block_size: usize) usize {
        return ((~x) +% 1) & (block_size - 1);
    }

    // Feed data into the MAC context.
    pub fn update(ctx: *Self, msg: []const u8) void {
        // Align ourselves with block boundaries
-        const alignm = std.math.min(alignto(ctx.c_idx, 16), msg.len);
-        poly_update(ctx, msg[0..alignm]);
+        const alignm = std.math.min(alignTo(ctx.c_idx, 16), msg.len);
+        polyUpdate(ctx, msg[0..alignm]);

        var nmsg = msg[alignm..];

@ -172,15 +172,15 @@ pub const Poly1305 = struct {
            ctx.c[1] = readInt(nmsg[4..8], u32, Endian.Little);
            ctx.c[2] = readInt(nmsg[8..12], u32, Endian.Little);
            ctx.c[3] = readInt(nmsg[12..16], u32, Endian.Little);
-            poly_block(ctx);
+            polyBlock(ctx);
            nmsg = nmsg[16..];
        }
        if (nb_blocks > 0) {
-            poly_clear_c(ctx);
+            polyClearC(ctx);
        }

        // remaining bytes
-        poly_update(ctx, nmsg[0..]);
+        polyUpdate(ctx, nmsg[0..]);
    }

    // Finalize the MAC and output into buffer provided by caller.
@ -190,9 +190,9 @@ pub const Poly1305 = struct {
            // move the final 1 according to remaining input length
            // (We may add less than 2^130 to the last input block)
            ctx.c[4] = 0;
-            poly_take_input(ctx, 1);
+            polyTakeInput(ctx, 1);
            // one last hash update
-            poly_block(ctx);
+            polyBlock(ctx);
        }

        // check if we should subtract 2^130-5 by performing the
@ -215,7 +215,7 @@ pub const Poly1305 = struct {
        writeInt(out[8..], @truncate(u32, uu2), Endian.Little);
        writeInt(out[12..], @truncate(u32, uu3), Endian.Little);

-        ctx.secure_zero();
+        ctx.secureZero();
    }
 };

--- a/std/crypto/throughput_test.zig
+++ b/std/crypto/throughput_test.zig
@ -168,11 +168,6 @@ pub fn main() !void {
    }

    inline for (hashes) |H| {
-        // TODO: Inverted early continue case here segfaults compiler. Create reduced test case.
-        //
-        // if (filter != null and std.mem.indexOf(u8, H.name, filter.?) == null) {
-        //     continue;
-        // }
        if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) {
            const throughput = try benchmarkHash(H.ty, mode(32 * MiB));
            try printPad(stdout, H.name);
--- a/std/crypto/x25519.zig
+++ b/std/crypto/x25519.zig
@ -14,13 +14,13 @@ pub const X25519 = struct {
    pub const secret_length = 32;
    pub const minimum_key_length = 32;

-    fn trim_scalar(s: []u8) void {
+    fn trimScalar(s: []u8) void {
        s[0] &= 248;
        s[31] &= 127;
        s[31] |= 64;
    }

-    fn scalar_bit(s: []const u8, i: usize) i32 {
+    fn scalarBit(s: []const u8, i: usize) i32 {
        return (s[i >> 3] >> @intCast(u3, i & 7)) & 1;
    }

@ -30,7 +30,6 @@ pub const X25519 = struct {
        std.debug.assert(public_key.len >= minimum_key_length);

        var storage: [7]Fe = undefined;
-
        var x1 = &storage[0];
        var x2 = &storage[1];
        var z2 = &storage[2];
@ -40,74 +39,74 @@ pub const X25519 = struct {
        var t1 = &storage[6];

        // computes the scalar product
-        fe_frombytes(x1, public_key);
+        Fe.fromBytes(x1, public_key);

        // restrict the possible scalar values
        var e: [32]u8 = undefined;
        for (e[0..]) |_, i| {
            e[i] = private_key[i];
        }
-        trim_scalar(e[0..]);
+        trimScalar(e[0..]);

        // computes the actual scalar product (the result is in x2 and z2)

        // Montgomery ladder
        // In projective coordinates, to avoid divisons: x = X / Z
        // We don't care about the y coordinate, it's only 1 bit of information
-        fe_1(x2);
-        fe_0(z2); // "zero" point
-        fe_copy(x3, x1);
-        fe_1(z3);
+        Fe.init1(x2);
+        Fe.init0(z2); // "zero" point
+        Fe.copy(x3, x1);
+        Fe.init1(z3);

        var swap: i32 = 0;
        var pos: isize = 254;
        while (pos >= 0) : (pos -= 1) {
            // constant time conditional swap before ladder step
-            const b = scalar_bit(e, @intCast(usize, pos));
+            const b = scalarBit(e, @intCast(usize, pos));
            swap ^= b; // xor trick avoids swapping at the end of the loop
-            fe_cswap(x2, x3, swap);
-            fe_cswap(z2, z3, swap);
+            Fe.cswap(x2, x3, swap);
+            Fe.cswap(z2, z3, swap);
            swap = b; // anticipates one last swap after the loop

            // Montgomery ladder step: replaces (P2, P3) by (P2*2, P2+P3)
            // with differential addition
-            fe_sub(t0, x3, z3);
-            fe_sub(t1, x2, z2);
-            fe_add(x2, x2, z2);
-            fe_add(z2, x3, z3);
-            fe_mul(z3, t0, x2);
-            fe_mul(z2, z2, t1);
-            fe_sq(t0, t1);
-            fe_sq(t1, x2);
-            fe_add(x3, z3, z2);
-            fe_sub(z2, z3, z2);
-            fe_mul(x2, t1, t0);
-            fe_sub(t1, t1, t0);
-            fe_sq(z2, z2);
-            fe_mul121666(z3, t1);
-            fe_sq(x3, x3);
-            fe_add(t0, t0, z3);
-            fe_mul(z3, x1, z2);
-            fe_mul(z2, t1, t0);
+            Fe.sub(t0, x3, z3);
+            Fe.sub(t1, x2, z2);
+            Fe.add(x2, x2, z2);
+            Fe.add(z2, x3, z3);
+            Fe.mul(z3, t0, x2);
+            Fe.mul(z2, z2, t1);
+            Fe.sq(t0, t1);
+            Fe.sq(t1, x2);
+            Fe.add(x3, z3, z2);
+            Fe.sub(z2, z3, z2);
+            Fe.mul(x2, t1, t0);
+            Fe.sub(t1, t1, t0);
+            Fe.sq(z2, z2);
+            Fe.mulSmall(z3, t1, 121666);
+            Fe.sq(x3, x3);
+            Fe.add(t0, t0, z3);
+            Fe.mul(z3, x1, z2);
+            Fe.mul(z2, t1, t0);
        }

        // last swap is necessary to compensate for the xor trick
        // Note: after this swap, P3 == P2 + P1.
-        fe_cswap(x2, x3, swap);
-        fe_cswap(z2, z3, swap);
+        Fe.cswap(x2, x3, swap);
+        Fe.cswap(z2, z3, swap);

        // normalises the coordinates: x == X / Z
-        fe_invert(z2, z2);
-        fe_mul(x2, x2, z2);
-        fe_tobytes(out, x2);
+        Fe.invert(z2, z2);
+        Fe.mul(x2, x2, z2);
+        Fe.toBytes(out, x2);

-        x1.secure_zero();
-        x2.secure_zero();
-        x3.secure_zero();
-        t0.secure_zero();
-        t1.secure_zero();
-        z2.secure_zero();
-        z3.secure_zero();
+        x1.secureZero();
+        x2.secureZero();
+        x3.secureZero();
+        t0.secureZero();
+        t1.secureZero();
+        z2.secureZero();
+        z3.secureZero();
        std.mem.secureZero(u8, e[0..]);

        // Returns false if the output is all zero
@ -140,49 +139,48 @@ fn zerocmp(comptime T: type, a: []const T) bool {
 const Fe = struct {
    b: [10]i32,

-    fn secure_zero(self: *Fe) void {
+    fn secureZero(self: *Fe) void {
        std.mem.secureZero(u8, @ptrCast([*]u8, self)[0..@sizeOf(Fe)]);
    }
-};

-fn fe_0(h: *Fe) void {
+    fn init0(h: *Fe) void {
        for (h.b) |*e| {
            e.* = 0;
        }
    }

-fn fe_1(h: *Fe) void {
+    fn init1(h: *Fe) void {
        for (h.b[1..]) |*e| {
            e.* = 0;
        }
        h.b[0] = 1;
    }

-fn fe_copy(h: *Fe, f: *const Fe) void {
+    fn copy(h: *Fe, f: *const Fe) void {
        for (h.b) |_, i| {
            h.b[i] = f.b[i];
        }
    }

-fn fe_neg(h: *Fe, f: *const Fe) void {
+    fn neg(h: *Fe, f: *const Fe) void {
        for (h.b) |_, i| {
            h.b[i] = -f.b[i];
        }
    }

-fn fe_add(h: *Fe, f: *const Fe, g: *const Fe) void {
+    fn add(h: *Fe, f: *const Fe, g: *const Fe) void {
        for (h.b) |_, i| {
            h.b[i] = f.b[i] + g.b[i];
        }
    }

-fn fe_sub(h: *Fe, f: *const Fe, g: *const Fe) void {
+    fn sub(h: *Fe, f: *const Fe, g: *const Fe) void {
        for (h.b) |_, i| {
            h.b[i] = f.b[i] - g.b[i];
        }
    }

-fn fe_cswap(f: *Fe, g: *Fe, b: i32) void {
+    fn cswap(f: *Fe, g: *Fe, b: i32) void {
        for (f.b) |_, i| {
            const x = (f.b[i] ^ g.b[i]) & -b;
            f.b[i] ^= x;
@ -190,14 +188,14 @@ fn fe_cswap(f: *Fe, g: *Fe, b: i32) void {
        }
    }

-fn fe_ccopy(f: *Fe, g: *const Fe, b: i32) void {
+    fn ccopy(f: *Fe, g: *const Fe, b: i32) void {
        for (f.b) |_, i| {
            const x = (f.b[i] ^ g.b[i]) & -b;
            f.b[i] ^= x;
        }
    }

-inline fn carryround(c: []i64, t: []i64, comptime i: comptime_int, comptime shift: comptime_int, comptime mult: comptime_int) void {
+    inline fn carryRound(c: []i64, t: []i64, comptime i: comptime_int, comptime shift: comptime_int, comptime mult: comptime_int) void {
        const j = (i + 1) % 10;

        c[i] = (t[i] + (i64(1) << shift)) >> (shift + 1);
@ -205,91 +203,82 @@ inline fn carryround(c: []i64, t: []i64, comptime i: comptime_int, comptime shif
        t[i] -= c[i] * (i64(1) << (shift + 1));
    }

-fn feCarry1(h: *Fe, t: []i64) void {
+    fn carry1(h: *Fe, t: []i64) void {
        var c: [10]i64 = undefined;

        var sc = c[0..];
        var st = t[0..];

-    carryround(sc, st, 9, 24, 19);
-    carryround(sc, st, 1, 24, 1);
-    carryround(sc, st, 3, 24, 1);
-    carryround(sc, st, 5, 24, 1);
-    carryround(sc, st, 7, 24, 1);
-    carryround(sc, st, 0, 25, 1);
-    carryround(sc, st, 2, 25, 1);
-    carryround(sc, st, 4, 25, 1);
-    carryround(sc, st, 6, 25, 1);
-    carryround(sc, st, 8, 25, 1);
+        carryRound(sc, st, 9, 24, 19);
+        carryRound(sc, st, 1, 24, 1);
+        carryRound(sc, st, 3, 24, 1);
+        carryRound(sc, st, 5, 24, 1);
+        carryRound(sc, st, 7, 24, 1);
+        carryRound(sc, st, 0, 25, 1);
+        carryRound(sc, st, 2, 25, 1);
+        carryRound(sc, st, 4, 25, 1);
+        carryRound(sc, st, 6, 25, 1);
+        carryRound(sc, st, 8, 25, 1);

        for (h.b) |_, i| {
            h.b[i] = @intCast(i32, t[i]);
        }
    }

-fn feCarry2(h: *Fe, t: []i64) void {
+    fn carry2(h: *Fe, t: []i64) void {
        var c: [10]i64 = undefined;

        var sc = c[0..];
        var st = t[0..];

-    carryround(sc, st, 0, 25, 1);
-    carryround(sc, st, 4, 25, 1);
-    carryround(sc, st, 1, 24, 1);
-    carryround(sc, st, 5, 24, 1);
-    carryround(sc, st, 2, 25, 1);
-    carryround(sc, st, 6, 25, 1);
-    carryround(sc, st, 3, 24, 1);
-    carryround(sc, st, 7, 24, 1);
-    carryround(sc, st, 4, 25, 1);
-    carryround(sc, st, 8, 25, 1);
-    carryround(sc, st, 9, 24, 19);
-    carryround(sc, st, 0, 25, 1);
+        carryRound(sc, st, 0, 25, 1);
+        carryRound(sc, st, 4, 25, 1);
+        carryRound(sc, st, 1, 24, 1);
+        carryRound(sc, st, 5, 24, 1);
+        carryRound(sc, st, 2, 25, 1);
+        carryRound(sc, st, 6, 25, 1);
+        carryRound(sc, st, 3, 24, 1);
+        carryRound(sc, st, 7, 24, 1);
+        carryRound(sc, st, 4, 25, 1);
+        carryRound(sc, st, 8, 25, 1);
+        carryRound(sc, st, 9, 24, 19);
+        carryRound(sc, st, 0, 25, 1);

        for (h.b) |_, i| {
            h.b[i] = @intCast(i32, t[i]);
        }
    }

-// TODO: Use readInt(u24) but double check alignment since currently it produces different values.
-fn load24_le(s: []const u8) u32 {
-    return s[0] | (u32(s[1]) << 8) | (u32(s[2]) << 16);
-}
-
-fn fe_frombytes(h: *Fe, s: []const u8) void {
+    fn fromBytes(h: *Fe, s: []const u8) void {
        std.debug.assert(s.len >= 32);

        var t: [10]i64 = undefined;

        t[0] = readInt(s[0..4], u32, Endian.Little);
-    t[1] = load24_le(s[4..7]) << 6;
-    t[2] = load24_le(s[7..10]) << 5;
-    t[3] = load24_le(s[10..13]) << 3;
-    t[4] = load24_le(s[13..16]) << 2;
+        t[1] = readInt(s[4..7], u32, Endian.Little) << 6;
+        t[2] = readInt(s[7..10], u32, Endian.Little) << 5;
+        t[3] = readInt(s[10..13], u32, Endian.Little) << 3;
+        t[4] = readInt(s[13..16], u32, Endian.Little) << 2;
        t[5] = readInt(s[16..20], u32, Endian.Little);
-    t[6] = load24_le(s[20..23]) << 7;
-    t[7] = load24_le(s[23..26]) << 5;
-    t[8] = load24_le(s[26..29]) << 4;
-    t[9] = (load24_le(s[29..32]) & 0x7fffff) << 2;
+        t[6] = readInt(s[20..23], u32, Endian.Little) << 7;
+        t[7] = readInt(s[23..26], u32, Endian.Little) << 5;
+        t[8] = readInt(s[26..29], u32, Endian.Little) << 4;
+        t[9] = (readInt(s[29..32], u32, Endian.Little) & 0x7fffff) << 2;

-    feCarry1(h, t[0..]);
+        carry1(h, t[0..]);
    }

-fn fe_mul_small(h: *Fe, f: *const Fe, comptime g: comptime_int) void {
+    fn mulSmall(h: *Fe, f: *const Fe, comptime g: comptime_int) void {
        var t: [10]i64 = undefined;

        for (t[0..]) |_, i| {
            t[i] = i64(f.b[i]) * g;
        }

-    feCarry1(h, t[0..]);
+        carry1(h, t[0..]);
    }

-fn fe_mul121666(h: *Fe, f: *const Fe) void {
-    fe_mul_small(h, f, 121666);
-}
-
-fn fe_mul(h: *Fe, f1: *const Fe, g1: *const Fe) void {
+    fn mul(h: *Fe, f1: *const Fe, g1: *const Fe) void {
        const f = f1.b;
        const g = g1.b;

@ -326,11 +315,11 @@ fn fe_mul(h: *Fe, f1: *const Fe, g1: *const Fe) void {
        t[8] = f[0] * i64(g[8]) + F[1] * i64(g[7]) + f[2] * i64(g[6]) + F[3] * i64(g[5]) + f[4] * i64(g[4]) + F[5] * i64(g[3]) + f[6] * i64(g[2]) + F[7] * i64(g[1]) + f[8] * i64(g[0]) + F[9] * i64(G[9]);
        t[9] = f[0] * i64(g[9]) + f[1] * i64(g[8]) + f[2] * i64(g[7]) + f[3] * i64(g[6]) + f[4] * i64(g[5]) + f[5] * i64(g[4]) + f[6] * i64(g[3]) + f[7] * i64(g[2]) + f[8] * i64(g[1]) + f[9] * i64(g[0]);

-    feCarry2(h, t[0..]);
+        carry2(h, t[0..]);
    }

-// we could use fe_mul() for this, but this is significantly faster
-fn fe_sq(h: *Fe, fz: *const Fe) void {
+    // we could use Fe.mul() for this, but this is significantly faster
+    fn sq(h: *Fe, fz: *const Fe) void {
        const f0 = fz.b[0];
        const f1 = fz.b[1];
        const f2 = fz.b[2];
@ -369,16 +358,16 @@ fn fe_sq(h: *Fe, fz: *const Fe) void {
        t[8] = f0_2 * i64(f8) + f1_2 * i64(f7_2) + f2_2 * i64(f6) + f3_2 * i64(f5_2) + f4 * i64(f4) + f9 * i64(f9_38);
        t[9] = f0_2 * i64(f9) + f1_2 * i64(f8) + f2_2 * i64(f7) + f3_2 * i64(f6) + f4 * i64(f5_2);

-    feCarry2(h, t[0..]);
+        carry2(h, t[0..]);
    }

-fn fe_sq2(h: *Fe, f: *const Fe) void {
-    fe_sq(h, f);
-    fe_mul_small(h, h, 2);
+    fn sq2(h: *Fe, f: *const Fe) void {
+        Fe.sq(h, f);
+        Fe.mul_small(h, h, 2);
    }

    // This could be simplified, but it would be slower
-fn fe_invert(out: *Fe, z: *const Fe) void {
+    fn invert(out: *Fe, z: *const Fe) void {
        var i: usize = undefined;

        var t: [4]Fe = undefined;
@ -387,63 +376,63 @@ fn fe_invert(out: *Fe, z: *const Fe) void {
        var t2 = &t[2];
        var t3 = &t[3];

-    fe_sq(t0, z);
-    fe_sq(t1, t0);
-    fe_sq(t1, t1);
-    fe_mul(t1, z, t1);
-    fe_mul(t0, t0, t1);
+        Fe.sq(t0, z);
+        Fe.sq(t1, t0);
+        Fe.sq(t1, t1);
+        Fe.mul(t1, z, t1);
+        Fe.mul(t0, t0, t1);

-    fe_sq(t2, t0);
-    fe_mul(t1, t1, t2);
+        Fe.sq(t2, t0);
+        Fe.mul(t1, t1, t2);

-    fe_sq(t2, t1);
+        Fe.sq(t2, t1);
        i = 1;
-    while (i < 5) : (i += 1) fe_sq(t2, t2);
-    fe_mul(t1, t2, t1);
+        while (i < 5) : (i += 1) Fe.sq(t2, t2);
+        Fe.mul(t1, t2, t1);

-    fe_sq(t2, t1);
+        Fe.sq(t2, t1);
        i = 1;
-    while (i < 10) : (i += 1) fe_sq(t2, t2);
-    fe_mul(t2, t2, t1);
+        while (i < 10) : (i += 1) Fe.sq(t2, t2);
+        Fe.mul(t2, t2, t1);

-    fe_sq(t3, t2);
+        Fe.sq(t3, t2);
        i = 1;
-    while (i < 20) : (i += 1) fe_sq(t3, t3);
-    fe_mul(t2, t3, t2);
+        while (i < 20) : (i += 1) Fe.sq(t3, t3);
+        Fe.mul(t2, t3, t2);

-    fe_sq(t2, t2);
+        Fe.sq(t2, t2);
        i = 1;
-    while (i < 10) : (i += 1) fe_sq(t2, t2);
-    fe_mul(t1, t2, t1);
+        while (i < 10) : (i += 1) Fe.sq(t2, t2);
+        Fe.mul(t1, t2, t1);

-    fe_sq(t2, t1);
+        Fe.sq(t2, t1);
        i = 1;
-    while (i < 50) : (i += 1) fe_sq(t2, t2);
-    fe_mul(t2, t2, t1);
+        while (i < 50) : (i += 1) Fe.sq(t2, t2);
+        Fe.mul(t2, t2, t1);

-    fe_sq(t3, t2);
+        Fe.sq(t3, t2);
        i = 1;
-    while (i < 100) : (i += 1) fe_sq(t3, t3);
-    fe_mul(t2, t3, t2);
+        while (i < 100) : (i += 1) Fe.sq(t3, t3);
+        Fe.mul(t2, t3, t2);

-    fe_sq(t2, t2);
+        Fe.sq(t2, t2);
        i = 1;
-    while (i < 50) : (i += 1) fe_sq(t2, t2);
-    fe_mul(t1, t2, t1);
+        while (i < 50) : (i += 1) Fe.sq(t2, t2);
+        Fe.mul(t1, t2, t1);

-    fe_sq(t1, t1);
+        Fe.sq(t1, t1);
        i = 1;
-    while (i < 5) : (i += 1) fe_sq(t1, t1);
-    fe_mul(out, t1, t0);
+        while (i < 5) : (i += 1) Fe.sq(t1, t1);
+        Fe.mul(out, t1, t0);

-    t0.secure_zero();
-    t1.secure_zero();
-    t2.secure_zero();
-    t3.secure_zero();
+        t0.secureZero();
+        t1.secureZero();
+        t2.secureZero();
+        t3.secureZero();
    }

    // This could be simplified, but it would be slower
-fn fe_pow22523(out: *Fe, z: *const Fe) void {
+    fn pow22523(out: *Fe, z: *const Fe) void {
        var i: usize = undefined;

        var t: [3]Fe = undefined;
@ -451,61 +440,61 @@ fn fe_pow22523(out: *Fe, z: *const Fe) void {
        var t1 = &t[1];
        var t2 = &t[2];

-    fe_sq(t0, z);
-    fe_sq(t1, t0);
-    fe_sq(t1, t1);
-    fe_mul(t1, z, t1);
-    fe_mul(t0, t0, t1);
+        Fe.sq(t0, z);
+        Fe.sq(t1, t0);
+        Fe.sq(t1, t1);
+        Fe.mul(t1, z, t1);
+        Fe.mul(t0, t0, t1);

-    fe_sq(t0, t0);
-    fe_mul(t0, t1, t0);
+        Fe.sq(t0, t0);
+        Fe.mul(t0, t1, t0);

-    fe_sq(t1, t0);
+        Fe.sq(t1, t0);
        i = 1;
-    while (i < 5) : (i += 1) fe_sq(t1, t1);
-    fe_mul(t0, t1, t0);
+        while (i < 5) : (i += 1) Fe.sq(t1, t1);
+        Fe.mul(t0, t1, t0);

-    fe_sq(t1, t0);
+        Fe.sq(t1, t0);
        i = 1;
-    while (i < 10) : (i += 1) fe_sq(t1, t1);
-    fe_mul(t1, t1, t0);
+        while (i < 10) : (i += 1) Fe.sq(t1, t1);
+        Fe.mul(t1, t1, t0);

-    fe_sq(t2, t1);
+        Fe.sq(t2, t1);
        i = 1;
-    while (i < 20) : (i += 1) fe_sq(t2, t2);
-    fe_mul(t1, t2, t1);
+        while (i < 20) : (i += 1) Fe.sq(t2, t2);
+        Fe.mul(t1, t2, t1);

-    fe_sq(t1, t1);
+        Fe.sq(t1, t1);
        i = 1;
-    while (i < 10) : (i += 1) fe_sq(t1, t1);
-    fe_mul(t0, t1, t0);
+        while (i < 10) : (i += 1) Fe.sq(t1, t1);
+        Fe.mul(t0, t1, t0);

-    fe_sq(t1, t0);
+        Fe.sq(t1, t0);
        i = 1;
-    while (i < 50) : (i += 1) fe_sq(t1, t1);
-    fe_mul(t1, t1, t0);
+        while (i < 50) : (i += 1) Fe.sq(t1, t1);
+        Fe.mul(t1, t1, t0);

-    fe_sq(t2, t1);
+        Fe.sq(t2, t1);
        i = 1;
-    while (i < 100) : (i += 1) fe_sq(t2, t2);
-    fe_mul(t1, t2, t1);
+        while (i < 100) : (i += 1) Fe.sq(t2, t2);
+        Fe.mul(t1, t2, t1);

-    fe_sq(t1, t1);
+        Fe.sq(t1, t1);
        i = 1;
-    while (i < 50) : (i += 1) fe_sq(t1, t1);
-    fe_mul(t0, t1, t0);
+        while (i < 50) : (i += 1) Fe.sq(t1, t1);
+        Fe.mul(t0, t1, t0);

-    fe_sq(t0, t0);
+        Fe.sq(t0, t0);
        i = 1;
-    while (i < 2) : (i += 1) fe_sq(t0, t0);
-    fe_mul(out, t0, z);
+        while (i < 2) : (i += 1) Fe.sq(t0, t0);
+        Fe.mul(out, t0, z);

-    t0.secure_zero();
-    t1.secure_zero();
-    t2.secure_zero();
+        t0.secureZero();
+        t1.secureZero();
+        t2.secureZero();
    }

-inline fn tobytesround(c: []i64, t: []i64, comptime i: comptime_int, comptime shift: comptime_int) void {
+    inline fn toBytesRound(c: []i64, t: []i64, comptime i: comptime_int, comptime shift: comptime_int) void {
        c[i] = t[i] >> shift;
        if (i + 1 < 10) {
            t[i + 1] += c[i];
@ -513,7 +502,7 @@ inline fn tobytesround(c: []i64, t: []i64, comptime i: comptime_int, comptime sh
        t[i] -= c[i] * (i32(1) << shift);
    }

-fn fe_tobytes(s: []u8, h: *const Fe) void {
+    fn toBytes(s: []u8, h: *const Fe) void {
        std.debug.assert(s.len >= 32);

        var t: [10]i64 = undefined;
@ -538,16 +527,16 @@ fn fe_tobytes(s: []u8, h: *const Fe) void {
        var st = t[0..];
        var sc = c[0..];

-    tobytesround(sc, st, 0, 26);
-    tobytesround(sc, st, 1, 25);
-    tobytesround(sc, st, 2, 26);
-    tobytesround(sc, st, 3, 25);
-    tobytesround(sc, st, 4, 26);
-    tobytesround(sc, st, 5, 25);
-    tobytesround(sc, st, 6, 26);
-    tobytesround(sc, st, 7, 25);
-    tobytesround(sc, st, 8, 26);
-    tobytesround(sc, st, 9, 25);
+        toBytesRound(sc, st, 0, 26);
+        toBytesRound(sc, st, 1, 25);
+        toBytesRound(sc, st, 2, 26);
+        toBytesRound(sc, st, 3, 25);
+        toBytesRound(sc, st, 4, 26);
+        toBytesRound(sc, st, 5, 25);
+        toBytesRound(sc, st, 6, 26);
+        toBytesRound(sc, st, 7, 25);
+        toBytesRound(sc, st, 8, 26);
+        toBytesRound(sc, st, 9, 25);

        var ut: [10]u32 = undefined;
        for (ut[0..]) |_, i| {
@ -567,21 +556,22 @@ fn fe_tobytes(s: []u8, h: *const Fe) void {
    }

    //  Parity check.  Returns 0 if even, 1 if odd
-fn fe_isnegative(f: *const Fe) bool {
+    fn isNegative(f: *const Fe) bool {
        var s: [32]u8 = undefined;
-    fe_tobytes(s[0..], f);
+        Fe.toBytes(s[0..], f);
        const isneg = s[0] & 1;
-    s.secure_zero();
+        s.secureZero();
        return isneg;
    }

-fn fe_isnonzero(f: *const Fe) bool {
+    fn isNonZero(f: *const Fe) bool {
        var s: [32]u8 = undefined;
-    fe_tobytes(s[0..], f);
+        Fe.toBytes(s[0..], f);
        const isnonzero = zerocmp(u8, s[0..]);
-    s.secure_zero();
+        s.secureZero();
        return isneg;
    }
+};

 test "x25519 rfc7748 vector1" {
    const secret_key = "\xa5\x46\xe3\x6b\xf0\x52\x7c\x9d\x3b\x16\x15\x4b\x82\x46\x5e\xdd\x62\x14\x4c\x0a\xc1\xfc\x5a\x18\x50\x6a\x22\x44\xba\x44\x9a\xc4";