std/crypto: adjust aesni parallelism to CPU models

Intel keeps changing the latency & throughput of the aes* and clmul instructions every time they release a new model. Adjust `optimal_parallel_blocks` accordingly, keeping 8 as a safe default for unknown data.
2020-10-27 13:33:08 +01:00 · 2020-10-27 13:33:08 +01:00 · 0adc144f88
commit 0adc144f88
parent ea45897fcc
1 changed files with 11 additions and 1 deletions
--- a/lib/std/crypto/aes/aesni.zig
+++ b/lib/std/crypto/aes/aesni.zig
@ -100,8 +100,18 @@ pub const Block = struct {

    /// Perform operations on multiple blocks in parallel.
    pub const parallel = struct {
+        const cpu = std.Target.x86.cpu;
+
        /// The recommended number of AES encryption/decryption to perform in parallel for the chosen implementation.
-        pub const optimal_parallel_blocks = 8;
+        pub const optimal_parallel_blocks = switch (std.Target.current.cpu.model) {
+            &cpu.westmere => 6,
+            &cpu.sandybridge, &cpu.ivybridge => 8,
+            &cpu.haswell, &cpu.broadwell => 7,
+            &cpu.cannonlake, &cpu.skylake, &cpu.skylake_avx512 => 4,
+            &cpu.icelake_client, &cpu.icelake_server => 6,
+            &cpu.znver1, &cpu.znver2 => 8,
+            else => 8,
+        };

        /// Encrypt multiple blocks in parallel, each their own round key.
        pub inline fn encryptParallel(comptime count: usize, blocks: [count]Block, round_keys: [count]Block) [count]Block {