zig/std/base64.zig

465 lines
19 KiB
Zig

const assert = @import("debug.zig").assert;
const mem = @import("mem.zig");
pub const standard_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
pub const standard_pad_char = '=';
/// ceil(source_len * 4/3)
pub fn calcEncodedSize(source_len: usize) -> usize {
return @divTrunc(source_len + 2, 3) * 4;
}
/// dest.len must be what you get from ::calcEncodedSize.
/// It is assumed that alphabet_chars and pad_char are all unique characters.
pub fn encode(dest: []u8, source: []const u8, alphabet_chars: []const u8, pad_char: u8) {
assert(alphabet_chars.len == 64);
assert(dest.len == calcEncodedSize(source.len));
var i: usize = 0;
var out_index: usize = 0;
while (i + 2 < source.len) : (i += 3) {
dest[out_index] = alphabet_chars[(source[i] >> 2) & 0x3f];
out_index += 1;
dest[out_index] = alphabet_chars[((source[i] & 0x3) << 4) |
((source[i + 1] & 0xf0) >> 4)];
out_index += 1;
dest[out_index] = alphabet_chars[((source[i + 1] & 0xf) << 2) |
((source[i + 2] & 0xc0) >> 6)];
out_index += 1;
dest[out_index] = alphabet_chars[source[i + 2] & 0x3f];
out_index += 1;
}
if (i < source.len) {
dest[out_index] = alphabet_chars[(source[i] >> 2) & 0x3f];
out_index += 1;
if (i + 1 == source.len) {
dest[out_index] = alphabet_chars[(source[i] & 0x3) << 4];
out_index += 1;
dest[out_index] = pad_char;
out_index += 1;
} else {
dest[out_index] = alphabet_chars[((source[i] & 0x3) << 4) |
((source[i + 1] & 0xf0) >> 4)];
out_index += 1;
dest[out_index] = alphabet_chars[(source[i + 1] & 0xf) << 2];
out_index += 1;
}
dest[out_index] = pad_char;
out_index += 1;
}
}
pub const standard_alphabet = Base64Alphabet.init(standard_alphabet_chars, standard_pad_char);
/// For use with ::decodeExact.
pub const Base64Alphabet = struct {
/// e.g. 'A' => 0.
/// undefined for any value not in the 64 alphabet chars.
char_to_index: [256]u8,
/// true only for the 64 chars in the alphabet, not the pad char.
char_in_alphabet: [256]bool,
pad_char: u8,
pub fn init(alphabet_chars: []const u8, pad_char: u8) -> Base64Alphabet {
assert(alphabet_chars.len == 64);
var result = Base64Alphabet{
.char_to_index = undefined,
.char_in_alphabet = []bool{false} ** 256,
.pad_char = pad_char,
};
for (alphabet_chars) |c, i| {
assert(!result.char_in_alphabet[c]);
assert(c != pad_char);
result.char_to_index[c] = u8(i);
result.char_in_alphabet[c] = true;
}
return result;
}
};
error InvalidPadding;
/// For use with ::decodeExact.
/// If the encoded buffer is detected to be invalid, returns error.InvalidPadding.
pub fn calcDecodedSizeExact(encoded: []const u8, pad_char: u8) -> %usize {
if (encoded.len % 4 != 0) return error.InvalidPadding;
return calcDecodedSizeExactUnsafe(encoded, pad_char);
}
error InvalidCharacter;
/// dest.len must be what you get from ::calcDecodedSizeExact.
/// invalid characters result in error.InvalidCharacter.
/// invalid padding results in error.InvalidPadding.
pub fn decodeExact(dest: []u8, source: []const u8, alphabet: &const Base64Alphabet) -> %void {
assert(dest.len == %%calcDecodedSizeExact(source, alphabet.pad_char));
assert(source.len % 4 == 0);
var src_cursor: usize = 0;
var dest_cursor: usize = 0;
while (src_cursor < source.len) : (src_cursor += 4) {
if (!alphabet.char_in_alphabet[source[src_cursor + 0]]) return error.InvalidCharacter;
if (!alphabet.char_in_alphabet[source[src_cursor + 1]]) return error.InvalidCharacter;
if (src_cursor < source.len - 4 or source[src_cursor + 3] != alphabet.pad_char) {
// common case
if (!alphabet.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter;
if (!alphabet.char_in_alphabet[source[src_cursor + 3]]) return error.InvalidCharacter;
dest[dest_cursor + 0] = alphabet.char_to_index[source[src_cursor + 0]] << 2 |
alphabet.char_to_index[source[src_cursor + 1]] >> 4;
dest[dest_cursor + 1] = alphabet.char_to_index[source[src_cursor + 1]] << 4 |
alphabet.char_to_index[source[src_cursor + 2]] >> 2;
dest[dest_cursor + 2] = alphabet.char_to_index[source[src_cursor + 2]] << 6 |
alphabet.char_to_index[source[src_cursor + 3]];
dest_cursor += 3;
} else if (source[src_cursor + 2] != alphabet.pad_char) {
// one pad char
if (!alphabet.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter;
dest[dest_cursor + 0] = alphabet.char_to_index[source[src_cursor + 0]] << 2 |
alphabet.char_to_index[source[src_cursor + 1]] >> 4;
dest[dest_cursor + 1] = alphabet.char_to_index[source[src_cursor + 1]] << 4 |
alphabet.char_to_index[source[src_cursor + 2]] >> 2;
if (alphabet.char_to_index[source[src_cursor + 2]] << 6 != 0) return error.InvalidPadding;
dest_cursor += 2;
} else {
// two pad chars
dest[dest_cursor + 0] = alphabet.char_to_index[source[src_cursor + 0]] << 2 |
alphabet.char_to_index[source[src_cursor + 1]] >> 4;
if (alphabet.char_to_index[source[src_cursor + 1]] << 4 != 0) return error.InvalidPadding;
dest_cursor += 1;
}
}
assert(src_cursor == source.len);
assert(dest_cursor == dest.len);
}
/// For use with ::decodeWithIgnore.
pub const Base64AlphabetWithIgnore = struct {
alphabet: Base64Alphabet,
char_is_ignored: [256]bool,
pub fn init(alphabet_chars: []const u8, pad_char: u8, ignore_chars: []const u8) -> Base64AlphabetWithIgnore {
var result = Base64AlphabetWithIgnore {
.alphabet = Base64Alphabet.init(alphabet_chars, pad_char),
.char_is_ignored = []bool{false} ** 256,
};
for (ignore_chars) |c| {
assert(!result.alphabet.char_in_alphabet[c]);
assert(!result.char_is_ignored[c]);
assert(result.alphabet.pad_char != c);
result.char_is_ignored[c] = true;
}
return result;
}
};
/// For use with ::decodeWithIgnore.
/// If no characters end up being ignored, this will be the exact decoded size.
pub fn calcDecodedSizeUpperBound(encoded_len: usize) -> %usize {
return @divTrunc(encoded_len, 4) * 3;
}
error OutputTooSmall;
/// Invalid characters that are not ignored results in error.InvalidCharacter.
/// Invalid padding results in error.InvalidPadding.
/// Decoding more data than can fit in dest results in error.OutputTooSmall. See also ::calcDecodedSizeUpperBound.
/// Returns the number of bytes writen to dest.
pub fn decodeWithIgnore(dest: []u8, source: []const u8, alphabet_with_ignore: &const Base64AlphabetWithIgnore) -> %usize {
const alphabet = &const alphabet_with_ignore.alphabet;
var src_cursor: usize = 0;
var dest_cursor: usize = 0;
while (true) {
// get the next 4 chars, if available
var next_4_chars: [4]u8 = undefined;
var available_chars: usize = 0;
var pad_char_count: usize = 0;
while (available_chars < 4 and src_cursor < source.len) {
var c = source[src_cursor];
src_cursor += 1;
if (alphabet.char_in_alphabet[c]) {
// normal char
next_4_chars[available_chars] = c;
available_chars += 1;
} else if (alphabet_with_ignore.char_is_ignored[c]) {
// we're told to skip this one
continue;
} else if (c == alphabet.pad_char) {
// the padding has begun. count the pad chars.
pad_char_count += 1;
while (src_cursor < source.len) {
c = source[src_cursor];
src_cursor += 1;
if (c == alphabet.pad_char) {
pad_char_count += 1;
if (pad_char_count > 2) return error.InvalidCharacter;
} else if (alphabet_with_ignore.char_is_ignored[c]) {
// we can even ignore chars during the padding
continue;
} else return error.InvalidCharacter;
}
break;
} else return error.InvalidCharacter;
}
switch (available_chars) {
4 => {
// common case
if (dest_cursor + 3 > dest.len) return error.OutputTooSmall;
assert(pad_char_count == 0);
dest[dest_cursor + 0] = alphabet.char_to_index[next_4_chars[0]] << 2 |
alphabet.char_to_index[next_4_chars[1]] >> 4;
dest[dest_cursor + 1] = alphabet.char_to_index[next_4_chars[1]] << 4 |
alphabet.char_to_index[next_4_chars[2]] >> 2;
dest[dest_cursor + 2] = alphabet.char_to_index[next_4_chars[2]] << 6 |
alphabet.char_to_index[next_4_chars[3]];
dest_cursor += 3;
continue;
},
3 => {
if (dest_cursor + 2 > dest.len) return error.OutputTooSmall;
if (pad_char_count != 1) return error.InvalidPadding;
dest[dest_cursor + 0] = alphabet.char_to_index[next_4_chars[0]] << 2 |
alphabet.char_to_index[next_4_chars[1]] >> 4;
dest[dest_cursor + 1] = alphabet.char_to_index[next_4_chars[1]] << 4 |
alphabet.char_to_index[next_4_chars[2]] >> 2;
if (alphabet.char_to_index[next_4_chars[2]] << 6 != 0) return error.InvalidPadding;
dest_cursor += 2;
break;
},
2 => {
if (dest_cursor + 1 > dest.len) return error.OutputTooSmall;
if (pad_char_count != 2) return error.InvalidPadding;
dest[dest_cursor + 0] = alphabet.char_to_index[next_4_chars[0]] << 2 |
alphabet.char_to_index[next_4_chars[1]] >> 4;
if (alphabet.char_to_index[next_4_chars[1]] << 4 != 0) return error.InvalidPadding;
dest_cursor += 1;
break;
},
1 => {
return error.InvalidPadding;
},
0 => {
if (pad_char_count != 0) return error.InvalidPadding;
break;
},
else => unreachable,
}
}
assert(src_cursor == source.len);
return dest_cursor;
}
pub const standard_alphabet_unsafe = Base64AlphabetUnsafe.init(standard_alphabet_chars, standard_pad_char);
/// For use with ::decodeExactUnsafe.
pub const Base64AlphabetUnsafe = struct {
/// e.g. 'A' => 0.
/// undefined for any value not in the 64 alphabet chars.
char_to_index: [256]u8,
pad_char: u8,
pub fn init(alphabet_chars: []const u8, pad_char: u8) -> Base64AlphabetUnsafe {
assert(alphabet_chars.len == 64);
var result = Base64AlphabetUnsafe {
.char_to_index = undefined,
.pad_char = pad_char,
};
for (alphabet_chars) |c, i| {
assert(c != pad_char);
result.char_to_index[c] = u8(i);
}
return result;
}
};
/// For use with ::decodeExactUnsafe.
/// The encoded buffer must be valid.
pub fn calcDecodedSizeExactUnsafe(encoded: []const u8, pad_char: u8) -> usize {
if (encoded.len == 0) return 0;
var result = @divExact(encoded.len, 4) * 3;
if (encoded[encoded.len - 1] == pad_char) {
result -= 1;
if (encoded[encoded.len - 2] == pad_char) {
result -= 1;
}
}
return result;
}
/// dest.len must be what you get from ::calcDecodedSizeExactUnsafe.
/// invalid characters or padding will result in undefined values.
pub fn decodeExactUnsafe(dest: []u8, source: []const u8, alphabet: &const Base64AlphabetUnsafe) {
assert(dest.len == calcDecodedSizeExactUnsafe(source, alphabet.pad_char));
var src_index: usize = 0;
var dest_index: usize = 0;
var in_buf_len: usize = source.len;
while (in_buf_len > 0 and source[in_buf_len - 1] == alphabet.pad_char) {
in_buf_len -= 1;
}
while (in_buf_len > 4) {
dest[dest_index] = alphabet.char_to_index[source[src_index + 0]] << 2 |
alphabet.char_to_index[source[src_index + 1]] >> 4;
dest_index += 1;
dest[dest_index] = alphabet.char_to_index[source[src_index + 1]] << 4 |
alphabet.char_to_index[source[src_index + 2]] >> 2;
dest_index += 1;
dest[dest_index] = alphabet.char_to_index[source[src_index + 2]] << 6 |
alphabet.char_to_index[source[src_index + 3]];
dest_index += 1;
src_index += 4;
in_buf_len -= 4;
}
if (in_buf_len > 1) {
dest[dest_index] = alphabet.char_to_index[source[src_index + 0]] << 2 |
alphabet.char_to_index[source[src_index + 1]] >> 4;
dest_index += 1;
}
if (in_buf_len > 2) {
dest[dest_index] = alphabet.char_to_index[source[src_index + 1]] << 4 |
alphabet.char_to_index[source[src_index + 2]] >> 2;
dest_index += 1;
}
if (in_buf_len > 3) {
dest[dest_index] = alphabet.char_to_index[source[src_index + 2]] << 6 |
alphabet.char_to_index[source[src_index + 3]];
dest_index += 1;
}
}
test "base64" {
@setEvalBranchQuota(5000);
%%testBase64();
comptime %%testBase64();
}
fn testBase64() -> %void {
%return testAllApis("", "");
%return testAllApis("f", "Zg==");
%return testAllApis("fo", "Zm8=");
%return testAllApis("foo", "Zm9v");
%return testAllApis("foob", "Zm9vYg==");
%return testAllApis("fooba", "Zm9vYmE=");
%return testAllApis("foobar", "Zm9vYmFy");
%return testDecodeIgnoreSpace("", " ");
%return testDecodeIgnoreSpace("f", "Z g= =");
%return testDecodeIgnoreSpace("fo", " Zm8=");
%return testDecodeIgnoreSpace("foo", "Zm9v ");
%return testDecodeIgnoreSpace("foob", "Zm9vYg = = ");
%return testDecodeIgnoreSpace("fooba", "Zm9v YmE=");
%return testDecodeIgnoreSpace("foobar", " Z m 9 v Y m F y ");
// test getting some api errors
%return testError("A", error.InvalidPadding);
%return testError("AA", error.InvalidPadding);
%return testError("AAA", error.InvalidPadding);
%return testError("A..A", error.InvalidCharacter);
%return testError("AA=A", error.InvalidCharacter);
%return testError("AA/=", error.InvalidPadding);
%return testError("A/==", error.InvalidPadding);
%return testError("A===", error.InvalidCharacter);
%return testError("====", error.InvalidCharacter);
%return testOutputTooSmallError("AA==");
%return testOutputTooSmallError("AAA=");
%return testOutputTooSmallError("AAAA");
%return testOutputTooSmallError("AAAAAA==");
}
fn testAllApis(expected_decoded: []const u8, expected_encoded: []const u8) -> %void {
// encode
{
var buffer: [0x100]u8 = undefined;
var encoded = buffer[0..calcEncodedSize(expected_decoded.len)];
encode(encoded, expected_decoded, standard_alphabet_chars, standard_pad_char);
assert(mem.eql(u8, encoded, expected_encoded));
}
// decodeExact
{
var buffer: [0x100]u8 = undefined;
var decoded = buffer[0..%return calcDecodedSizeExact(expected_encoded, standard_pad_char)];
%return decodeExact(decoded, expected_encoded, standard_alphabet);
assert(mem.eql(u8, decoded, expected_decoded));
}
// decodeWithIgnore
{
const standard_alphabet_ignore_nothing = Base64AlphabetWithIgnore.init(
standard_alphabet_chars, standard_pad_char, "");
var buffer: [0x100]u8 = undefined;
var decoded = buffer[0..%return calcDecodedSizeUpperBound(expected_encoded.len)];
var written = %return decodeWithIgnore(decoded, expected_encoded, standard_alphabet_ignore_nothing);
assert(written <= decoded.len);
assert(mem.eql(u8, decoded[0..written], expected_decoded));
}
// decodeExactUnsafe
{
var buffer: [0x100]u8 = undefined;
var decoded = buffer[0..calcDecodedSizeExactUnsafe(expected_encoded, standard_pad_char)];
decodeExactUnsafe(decoded, expected_encoded, standard_alphabet_unsafe);
assert(mem.eql(u8, decoded, expected_decoded));
}
}
fn testDecodeIgnoreSpace(expected_decoded: []const u8, encoded: []const u8) -> %void {
const standard_alphabet_ignore_space = Base64AlphabetWithIgnore.init(
standard_alphabet_chars, standard_pad_char, " ");
var buffer: [0x100]u8 = undefined;
var decoded = buffer[0..%return calcDecodedSizeUpperBound(encoded.len)];
var written = %return decodeWithIgnore(decoded, encoded, standard_alphabet_ignore_space);
assert(mem.eql(u8, decoded[0..written], expected_decoded));
}
error ExpectedError;
fn testError(encoded: []const u8, expected_err: error) -> %void {
const standard_alphabet_ignore_space = Base64AlphabetWithIgnore.init(
standard_alphabet_chars, standard_pad_char, " ");
var buffer: [0x100]u8 = undefined;
if (calcDecodedSizeExact(encoded, standard_pad_char)) |decoded_size| {
var decoded = buffer[0..decoded_size];
if (decodeExact(decoded, encoded, standard_alphabet)) |_| {
return error.ExpectedError;
} else |err| if (err != expected_err) return err;
} else |err| if (err != expected_err) return err;
if (decodeWithIgnore(buffer[0..], encoded, standard_alphabet_ignore_space)) |_| {
return error.ExpectedError;
} else |err| if (err != expected_err) return err;
}
fn testOutputTooSmallError(encoded: []const u8) -> %void {
const standard_alphabet_ignore_space = Base64AlphabetWithIgnore.init(
standard_alphabet_chars, standard_pad_char, " ");
var buffer: [0x100]u8 = undefined;
var decoded = buffer[0..calcDecodedSizeExactUnsafe(encoded, standard_pad_char) - 1];
if (decodeWithIgnore(decoded, encoded, standard_alphabet_ignore_space)) |_| {
return error.ExpectedError;
} else |err| if (err != error.OutputTooSmall) return err;
}