Added compression header to make it reusable for other formats and more future proof

This commit is contained in:
Marc Gilleron 2021-01-31 15:47:36 +00:00
parent 0526c4e860
commit a0e7a3229c
3 changed files with 133 additions and 57 deletions

View File

@ -5,6 +5,17 @@ Version: 2
This page describes the binary format used by default in this module to serialize voxel blocks to files, network or databases.
### Changes from version 1
Overall the format can be more standalone than before, where information had to be known up-front.
- Made compressed container a bit more independent with a header
- Added version header
- Added 3D size
- Added depth information on each channel
There is no migration available from version 1.
Specification
----------------
@ -12,24 +23,23 @@ Specification
### Compressed container
A block is usually serialized as compressed data.
This is the format provided by the `VoxelBlockSerializer` utility class. If you don't use compression, the layout will correspond to `BlockData` described in the next listing.
This is the format provided by the `VoxelBlockSerializer` utility class. If you don't use compression, the layout will correspond to `BlockData` described in the next listing, and won't have this wrapper.
```
CompressedBlockData
- decompressed_data_size: uint32_t
- compressed_data
```
Compressed data starts with one byte. Depending on its value, what follows is different.
`compressed_data` must be decompressed using the LZ4 algorithm (without header), into a buffer big enough to contain `decompressed_data_size` bytes. Knowing that size is also important later on.
- 0: no compression. Following bytes can be read as as block format directly. This is rarely used and could be for debugging.
- 1: LZ4 compression. The next 32-bit unsigned integer is the size of the decompressed data, and following bytes are compressed data using LZ4 default parameters. This mode is used by default.
### Top levels
Knowing the size of the decompressed data may be important when parsing the block later.
### Block format
The obtained data then contains the actual block.
It starts with version number `2`, then the 3D size of the block.
It starts with version number `2` in one byte, then some metadata and the actual voxels.
!!! note
The size and formats are be present to make the format standalone. When used within a chunked container like region files, it is recommended to check if they match the format expected for the volume as a whole.
The size and formats are present to make the format standalone. When used within a chunked container like region files, it is recommended to check if they match the format expected for the volume as a whole.
```
BlockData
@ -108,4 +118,4 @@ Although this format is currently implemented and usable, it has known issues.
Godot's `encode_variant` doesn't seem to care about endianess across architectures, so it's possible it becomes a problem in the future and gets changed to a custom format.
The rest of this spec is not affected by this and assumes we use little-endian, however the implementation of block channels with depth greater than 8-bit currently doesn't consider this either. This might be refined in a later iteration.
This will become important to adress if voxel games require communication between mobile and desktop, most likely.
This will become important to address if voxel games require communication between mobile and desktop.

View File

@ -11,6 +11,112 @@
//#include <core/map.h>
#include <core/os/file_access.h>
namespace VoxelCompressedData {
// Compressed data starts with a single byte telling which compression format is used.
// What follows depends on it.
enum Compression {
// No compression. All following bytes can be read as-is.
// Could be used for debugging.
COMPRESSION_NONE = 0,
// The next uint32_t will be the size of decompressed data.
// All following bytes are compressed data using LZ4 defaults.
// This is the fastest compression format.
COMPRESSION_LZ4 = 1,
COMPRESSION_COUNT = 2
};
bool decompress(ArraySlice<const uint8_t> src, std::vector<uint8_t> &dst) {
VOXEL_PROFILE_SCOPE();
FileAccessMemory f;
f.open_custom(src.data(), src.size());
const Compression comp = static_cast<Compression>(f.get_8());
ERR_FAIL_INDEX_V(comp, COMPRESSION_COUNT, false);
switch (comp) {
case COMPRESSION_NONE: {
// We still have to do a copy. The point of this container is compression,
// so we don't worry too much about the performance impact of not using `src` directly.
dst.resize(src.size() - 1);
memcpy(dst.data(), src.data() + 1, dst.size());
} break;
case COMPRESSION_LZ4: {
const uint32_t decompressed_size = f.get_32();
const uint32_t header_size = sizeof(uint8_t) + sizeof(uint32_t);
dst.resize(decompressed_size);
const uint32_t actually_decompressed_size = LZ4_decompress_safe(
(const char *)src.data() + header_size,
(char *)dst.data(),
src.size() - header_size,
dst.size());
ERR_FAIL_COND_V_MSG(actually_decompressed_size < 0, false,
String("LZ4 decompression error {0}").format(varray(actually_decompressed_size)));
ERR_FAIL_COND_V_MSG(actually_decompressed_size != decompressed_size, false,
String("Expected {0} bytes, obtained {1}")
.format(varray(decompressed_size, actually_decompressed_size)));
} break;
default:
ERR_PRINT("Invalid compression header");
return false;
}
return true;
}
bool compress(ArraySlice<const uint8_t> src, std::vector<uint8_t> &dst, Compression comp) {
VOXEL_PROFILE_SCOPE();
switch (comp) {
case COMPRESSION_NONE: {
dst.resize(src.size() + 1);
dst[0] = comp;
memcpy(dst.data() + 1, src.data(), src.size());
} break;
case COMPRESSION_LZ4: {
const uint32_t header_size = sizeof(uint8_t) + sizeof(uint32_t);
dst.resize(header_size + LZ4_compressBound(src.size()));
// Write header
FileAccessMemory f;
f.open_custom(dst.data(), dst.size());
f.store_8(comp);
f.store_32(src.size());
f.close();
const uint32_t compressed_size = LZ4_compress_default(
(const char *)src.data(),
(char *)dst.data() + header_size,
src.size(),
dst.size() - header_size);
ERR_FAIL_COND_V(compressed_size < 0, false);
ERR_FAIL_COND_V(compressed_size == 0, false);
dst.resize(header_size + compressed_size);
} break;
default:
ERR_PRINT("Invalid compression header");
return false;
}
return true;
}
} // namespace VoxelCompressedData
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace {
const uint8_t BLOCK_VERSION = 2;
const unsigned int BLOCK_TRAILING_MAGIC = 0x900df00d;
@ -369,24 +475,11 @@ VoxelBlockSerializerInternal::SerializeResult VoxelBlockSerializerInternal::seri
ERR_FAIL_COND_V(!res.success, SerializeResult(_compressed_data, false));
const std::vector<uint8_t> &data = res.data;
const unsigned int header_size = sizeof(unsigned int);
_compressed_data.resize(header_size + LZ4_compressBound(data.size()));
res.success = VoxelCompressedData::compress(
ArraySlice<const uint8_t>(data.data(), 0, data.size()), _compressed_data,
VoxelCompressedData::COMPRESSION_LZ4);
ERR_FAIL_COND_V(!res.success, SerializeResult(_compressed_data, false));
// Write header
CRASH_COND(_file_access_memory.open_custom(_compressed_data.data(), _compressed_data.size()) != OK);
_file_access_memory.store_32(data.size());
_file_access_memory.close();
int compressed_size = LZ4_compress_default(
(const char *)data.data(),
(char *)_compressed_data.data() + header_size,
data.size(),
_compressed_data.size() - header_size);
CRASH_COND(compressed_size < 0);
CRASH_COND(compressed_size == 0);
_compressed_data.resize(header_size + compressed_size);
return SerializeResult(_compressed_data, true);
}
@ -395,25 +488,8 @@ bool VoxelBlockSerializerInternal::decompress_and_deserialize(
VOXEL_PROFILE_SCOPE();
// Read header
const unsigned int header_size = sizeof(unsigned int);
ERR_FAIL_COND_V(_file_access_memory.open_custom(p_data.data(), p_data.size()) != OK, false);
const unsigned int decompressed_size = _file_access_memory.get_32();
_file_access_memory.close();
_data.resize(decompressed_size);
const unsigned int actually_decompressed_size = LZ4_decompress_safe(
(const char *)p_data.data() + header_size,
(char *)_data.data(),
p_data.size() - header_size,
_data.size());
ERR_FAIL_COND_V_MSG(actually_decompressed_size < 0, false,
String("LZ4 decompression error {0}").format(varray(actually_decompressed_size)));
ERR_FAIL_COND_V_MSG(actually_decompressed_size != decompressed_size, false,
String("Expected {0} bytes, obtained {1}").format(varray(decompressed_size, actually_decompressed_size)));
const bool res = VoxelCompressedData::decompress(ArraySlice<const uint8_t>(p_data.data(), 0, p_data.size()), _data);
ERR_FAIL_COND_V(!res, false);
return deserialize(_data, out_voxel_buffer);
}

View File

@ -58,16 +58,6 @@ public:
return ArraySlice<T>(_ptr + from, _size - from);
}
// const ArraySlice<T> sub_const(size_t from, size_t len) const {
// CRASH_COND(from + len >= _size);
// return ArraySlice{ _ptr + from, len };
// }
// const ArraySlice<T> sub_const(size_t from) const {
// CRASH_COND(from >= _size);
// return ArraySlice{ _ptr + from, _size - from };
// }
template <typename U>
ArraySlice<U> reinterpret_cast_to() const {
const size_t size_in_bytes = _size * sizeof(T);