From d1624a552151bcb152b7abf63df6501b63458d78 Mon Sep 17 00:00:00 2001 From: lhofhansl Date: Tue, 31 Aug 2021 17:32:31 -0700 Subject: [PATCH] Switch MapBlock compression to zstd (#10788) * Add zstd support. * Rearrange serialization order * Compress entire mapblock Co-authored-by: sfan5 --- .github/workflows/build.yml | 2 +- .github/workflows/macos.yml | 2 +- .gitlab-ci.yml | 4 +- Dockerfile | 2 +- android/native/jni/Android.mk | 10 +- builtin/settingtypes.txt | 16 ++- cmake/Modules/FindZstd.cmake | 9 ++ doc/world_format.txt | 91 +++++++++------- misc/debpkg-control | 2 +- src/CMakeLists.txt | 11 ++ src/defaultsettings.cpp | 4 +- src/main.cpp | 73 ++++++++++++- src/mapblock.cpp | 166 ++++++++++++++++++++---------- src/mapblock.h | 2 +- src/mapgen/mg_schematic.cpp | 9 +- src/mapgen/mg_schematic.h | 1 + src/mapnode.cpp | 30 ++---- src/mapnode.h | 5 +- src/serialization.cpp | 137 ++++++++++++++++++++++-- src/serialization.h | 14 ++- src/unittest/test_compression.cpp | 42 +++++++- util/buildbot/buildwin32.sh | 6 ++ util/buildbot/buildwin64.sh | 6 ++ util/ci/common.sh | 2 +- 24 files changed, 494 insertions(+), 152 deletions(-) create mode 100644 cmake/Modules/FindZstd.cmake diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d268aa0cb..98b1ffe8a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -227,7 +227,7 @@ jobs: env: VCPKG_VERSION: 0bf3923f9fab4001c00f0f429682a0853b5749e0 # 2020.11 - vcpkg_packages: irrlicht zlib curl[winssl] openal-soft libvorbis libogg sqlite3 freetype luajit + vcpkg_packages: irrlicht zlib zstd curl[winssl] openal-soft libvorbis libogg sqlite3 freetype luajit strategy: fail-fast: false matrix: diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 3ec157d0e..d97cff1aa 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -34,7 +34,7 @@ jobs: - uses: actions/checkout@v2 - name: Install deps run: | - pkgs=(cmake freetype gettext gmp hiredis jpeg jsoncpp leveldb libogg libpng libvorbis luajit) + pkgs=(cmake freetype gettext gmp hiredis jpeg jsoncpp leveldb libogg libpng libvorbis luajit zstd) brew update brew install ${pkgs[@]} brew unlink $(brew ls --formula) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cabce627f..252ed8a5b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,7 +17,7 @@ variables: stage: build before_script: - apt-get update - - apt-get -y install build-essential git cmake libpng-dev libjpeg-dev libxxf86vm-dev libgl1-mesa-dev libsqlite3-dev libleveldb-dev libogg-dev libvorbis-dev libopenal-dev libcurl4-gnutls-dev libfreetype6-dev zlib1g-dev libgmp-dev libjsoncpp-dev + - apt-get -y install build-essential git cmake libpng-dev libjpeg-dev libxxf86vm-dev libgl1-mesa-dev libsqlite3-dev libleveldb-dev libogg-dev libvorbis-dev libopenal-dev libcurl4-gnutls-dev libfreetype6-dev zlib1g-dev libgmp-dev libjsoncpp-dev libzstd-dev script: - git clone https://github.com/minetest/irrlicht -b $IRRLICHT_TAG lib/irrlichtmt - mkdir cmakebuild @@ -187,7 +187,7 @@ build:fedora-28: extends: .build_template image: fedora:28 before_script: - - dnf -y install make git gcc gcc-c++ kernel-devel cmake libjpeg-devel libpng-devel libcurl-devel openal-soft-devel libvorbis-devel libXxf86vm-devel libogg-devel freetype-devel mesa-libGL-devel zlib-devel jsoncpp-devel gmp-devel sqlite-devel luajit-devel leveldb-devel ncurses-devel spatialindex-devel + - dnf -y install make git gcc gcc-c++ kernel-devel cmake libjpeg-devel libpng-devel libcurl-devel openal-soft-devel libvorbis-devel libXxf86vm-devel libogg-devel freetype-devel mesa-libGL-devel zlib-devel jsoncpp-devel gmp-devel sqlite-devel luajit-devel leveldb-devel ncurses-devel spatialindex-devel libzstd-devel ## ## MinGW for Windows diff --git a/Dockerfile b/Dockerfile index 8843e4bbc..481dab237 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ COPY textures /usr/src/minetest/textures WORKDIR /usr/src/minetest -RUN apk add --no-cache git build-base cmake sqlite-dev curl-dev zlib-dev \ +RUN apk add --no-cache git build-base cmake sqlite-dev curl-dev zlib-dev zstd-dev \ gmp-dev jsoncpp-dev postgresql-dev ninja luajit-dev ca-certificates && \ git clone --depth=1 -b ${MINETEST_GAME_VERSION} https://github.com/minetest/minetest_game.git ./games/minetest_game && \ rm -fr ./games/minetest_game/.git diff --git a/android/native/jni/Android.mk b/android/native/jni/Android.mk index f92ac1d60..26e9b058b 100644 --- a/android/native/jni/Android.mk +++ b/android/native/jni/Android.mk @@ -57,6 +57,11 @@ LOCAL_MODULE := Vorbis LOCAL_SRC_FILES := deps/Android/Vorbis/${NDK_TOOLCHAIN_VERSION}/$(APP_ABI)/libvorbis.a include $(PREBUILT_STATIC_LIBRARY) +include $(CLEAR_VARS) +LOCAL_MODULE := Zstd +LOCAL_SRC_FILES := deps/Android/Zstd/${NDK_TOOLCHAIN_VERSION}/$(APP_ABI)/libzstd.a +include $(PREBUILT_STATIC_LIBRARY) + include $(CLEAR_VARS) LOCAL_MODULE := Minetest @@ -101,7 +106,8 @@ LOCAL_C_INCLUDES := \ deps/Android/LuaJIT/src \ deps/Android/OpenAL-Soft/include \ deps/Android/sqlite \ - deps/Android/Vorbis/include + deps/Android/Vorbis/include \ + deps/Android/Zstd/include LOCAL_SRC_FILES := \ $(wildcard ../../src/client/*.cpp) \ @@ -201,7 +207,7 @@ LOCAL_SRC_FILES += \ # SQLite3 LOCAL_SRC_FILES += deps/Android/sqlite/sqlite3.c -LOCAL_STATIC_LIBRARIES += Curl Freetype Irrlicht OpenAL mbedTLS mbedx509 mbedcrypto Vorbis LuaJIT GetText android_native_app_glue $(PROFILER_LIBS) #LevelDB +LOCAL_STATIC_LIBRARIES += Curl Freetype Irrlicht OpenAL mbedTLS mbedx509 mbedcrypto Vorbis LuaJIT GetText Zstd android_native_app_glue $(PROFILER_LIBS) #LevelDB LOCAL_LDLIBS := -lEGL -lGLESv1_CM -lGLESv2 -landroid -lOpenSLES diff --git a/builtin/settingtypes.txt b/builtin/settingtypes.txt index 25a51b888..43e70e052 100644 --- a/builtin/settingtypes.txt +++ b/builtin/settingtypes.txt @@ -1100,11 +1100,10 @@ full_block_send_enable_min_time_from_building (Delay in sending blocks after bui # client number. max_packets_per_iteration (Max. packets per iteration) int 1024 -# ZLib compression level to use when sending mapblocks to the client. -# -1 - Zlib's default compression level -# 0 - no compresson, fastest +# Compression level to use when sending mapblocks to the client. +# -1 - use default compression level +# 0 - least compresson, fastest # 9 - best compression, slowest -# (levels 1-3 use Zlib's "fast" method, 4-9 use the normal method) map_compression_level_net (Map Compression Level for Network Transfer) int -1 -1 9 [*Game] @@ -1303,12 +1302,11 @@ max_objects_per_block (Maximum objects per block) int 64 # See https://www.sqlite.org/pragma.html#pragma_synchronous sqlite_synchronous (Synchronous SQLite) enum 2 0,1,2 -# ZLib compression level to use when saving mapblocks to disk. -# -1 - Zlib's default compression level -# 0 - no compresson, fastest +# Compression level to use when saving mapblocks to disk. +# -1 - use default compression level +# 0 - least compresson, fastest # 9 - best compression, slowest -# (levels 1-3 use Zlib's "fast" method, 4-9 use the normal method) -map_compression_level_disk (Map Compression Level for Disk Storage) int 3 -1 9 +map_compression_level_disk (Map Compression Level for Disk Storage) int -1 -1 9 # Length of a server tick and the interval at which objects are generally updated over # network. diff --git a/cmake/Modules/FindZstd.cmake b/cmake/Modules/FindZstd.cmake new file mode 100644 index 000000000..461e4d5a6 --- /dev/null +++ b/cmake/Modules/FindZstd.cmake @@ -0,0 +1,9 @@ +mark_as_advanced(ZSTD_LIBRARY ZSTD_INCLUDE_DIR) + +find_path(ZSTD_INCLUDE_DIR NAMES zstd.h) + +find_library(ZSTD_LIBRARY NAMES zstd) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Zstd DEFAULT_MSG ZSTD_LIBRARY ZSTD_INCLUDE_DIR) + diff --git a/doc/world_format.txt b/doc/world_format.txt index a8a9e463e..eb1d7f728 100644 --- a/doc/world_format.txt +++ b/doc/world_format.txt @@ -1,5 +1,5 @@ ============================= -Minetest World Format 22...27 +Minetest World Format 22...29 ============================= This applies to a world format carrying the block serialization version @@ -8,6 +8,7 @@ This applies to a world format carrying the block serialization version - 0.4.0 (23) - 24 was never released as stable and existed for ~2 days - 27 was added in 0.4.15-dev +- 29 was added in 5.5.0-dev The block serialization version does not fully specify every aspect of this format; if compliance with this format is to be checked, it needs to be @@ -281,6 +282,8 @@ MapBlock serialization format NOTE: Byte order is MSB first (big-endian). NOTE: Zlib data is in such a format that Python's zlib at least can directly decompress. +NOTE: Since version 29 zstd is used instead of zlib. In addition the entire + block is first serialized and then compressed (except the version byte). u8 version - map format version number, see serialisation.h for the latest number @@ -324,6 +327,20 @@ u16 lighting_complete then Minetest will correct lighting in the day light bank when the block at (1, 0, 0) is also loaded. +if map format version >= 29: + u32 timestamp + - Timestamp when last saved, as seconds from starting the game. + - 0xffffffff = invalid/unknown timestamp, nothing should be done with the time + difference when loaded + + u16 num_name_id_mappings + foreach num_name_id_mappings + u16 id + u16 name_len + u8[name_len] name +if map format version < 29: + -- Nothing right here, timpstamp and node id mappings are serialized later + u8 content_width - Number of bytes in the content (param0) fields of nodes if map format version <= 23: @@ -335,7 +352,7 @@ u8 params_width - Number of bytes used for parameters per node - Always 2 -zlib-compressed node data: +node data (zlib-compressed if version < 29): if content_width == 1: - content: u8[4096]: param0 fields @@ -348,31 +365,31 @@ if content_width == 2: u8[4096]: param2 fields - The location of a node in each of those arrays is (z*16*16 + y*16 + x). -zlib-compressed node metadata list +node metadata list (zlib-compressed if version < 29): - content: -if map format version <= 22: - u16 version (=1) - u16 count of metadata - foreach count: - u16 position (p.Z*MAP_BLOCKSIZE*MAP_BLOCKSIZE + p.Y*MAP_BLOCKSIZE + p.X) - u16 type_id - u16 content_size - u8[content_size] content of metadata. Format depends on type_id, see below. -if map format version >= 23: - u8 version -- Note: type was u16 for map format version <= 22 - -- = 1 for map format version < 28 - -- = 2 since map format version 28 - u16 count of metadata - foreach count: - u16 position (p.Z*MAP_BLOCKSIZE*MAP_BLOCKSIZE + p.Y*MAP_BLOCKSIZE + p.X) - u32 num_vars - foreach num_vars: - u16 key_len - u8[key_len] key - u32 val_len - u8[val_len] value - u8 is_private -- only for version >= 2. 0 = not private, 1 = private - serialized inventory + if map format version <= 22: + u16 version (=1) + u16 count of metadata + foreach count: + u16 position (p.Z*MAP_BLOCKSIZE*MAP_BLOCKSIZE + p.Y*MAP_BLOCKSIZE + p.X) + u16 type_id + u16 content_size + u8[content_size] content of metadata. Format depends on type_id, see below. + if map format version >= 23: + u8 version -- Note: type was u16 for map format version <= 22 + -- = 1 for map format version < 28 + -- = 2 since map format version 28 + u16 count of metadata + foreach count: + u16 position (p.Z*MAP_BLOCKSIZE*MAP_BLOCKSIZE + p.Y*MAP_BLOCKSIZE + p.X) + u32 num_vars + foreach num_vars: + u16 key_len + u8[key_len] key + u32 val_len + u8[val_len] value + u8 is_private -- only for version >= 2. 0 = not private, 1 = private + serialized inventory - Node timers if map format version == 23: @@ -403,20 +420,18 @@ foreach static_object_count: u16 data_size u8[data_size] data -u32 timestamp -- Timestamp when last saved, as seconds from starting the game. -- 0xffffffff = invalid/unknown timestamp, nothing should be done with the time - difference when loaded +if map format version < 29: + u32 timestamp + - Same meaning as the timestamp further up -u8 name-id-mapping version -- Always 0 + u8 name-id-mapping version + - Always 0 -u16 num_name_id_mappings - -foreach num_name_id_mappings - u16 id - u16 name_len - u8[name_len] name + u16 num_name_id_mappings + foreach num_name_id_mappings + u16 id + u16 name_len + u8[name_len] name - Node timers if map format version == 25: diff --git a/misc/debpkg-control b/misc/debpkg-control index 7c0134bb0..e867f3eb9 100644 --- a/misc/debpkg-control +++ b/misc/debpkg-control @@ -3,7 +3,7 @@ Priority: extra Standards-Version: 3.6.2 Package: minetest-staging Version: 5.4.0-DATEPLACEHOLDER -Depends: libc6, libcurl3-gnutls, libfreetype6, libgl1, JPEG_PLACEHOLDER, JSONCPP_PLACEHOLDER, LEVELDB_PLACEHOLDER, libopenal1, libpng16-16, libsqlite3-0, libstdc++6, libvorbisfile3, libx11-6, libxxf86vm1, zlib1g +Depends: libc6, libcurl3-gnutls, libfreetype6, libgl1, JPEG_PLACEHOLDER, JSONCPP_PLACEHOLDER, LEVELDB_PLACEHOLDER, libopenal1, libpng16-16, libsqlite3-0, libstdc++6, libvorbisfile3, libx11-6, libxxf86vm1, libzstd1, zlib1g Maintainer: Loic Blot Homepage: https://www.minetest.net/ Vcs-Git: https://github.com/minetest/minetest.git diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7a5e48b49..addb0af3f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -271,9 +271,13 @@ if(WIN32) find_path(ZLIB_INCLUDE_DIR "zlib.h" DOC "Zlib include directory") find_library(ZLIB_LIBRARIES "zlib" DOC "Path to zlib library") + find_path(ZSTD_INCLUDE_DIR "zstd.h" DOC "Zstd include directory") + find_library(ZSTD_LIBRARY "zstd" DOC "Path to zstd library") + # Dll's are automatically copied to the output directory by vcpkg when VCPKG_APPLOCAL_DEPS=ON if(NOT VCPKG_APPLOCAL_DEPS) find_file(ZLIB_DLL NAMES "zlib.dll" "zlib1.dll" DOC "Path to zlib.dll for installation (optional)") + find_file(ZSTD_DLL NAMES "zstd.dll" DOC "Path to zstd.dll for installation (optional)") if(ENABLE_SOUND) set(OPENAL_DLL "" CACHE FILEPATH "Path to OpenAL32.dll for installation (optional)") set(OGG_DLL "" CACHE FILEPATH "Path to libogg.dll for installation (optional)") @@ -296,6 +300,7 @@ else() endif() find_package(ZLIB REQUIRED) + find_package(Zstd REQUIRED) set(PLATFORM_LIBS -lpthread ${CMAKE_DL_LIBS}) if(APPLE) set(PLATFORM_LIBS "-framework CoreFoundation" ${PLATFORM_LIBS}) @@ -486,6 +491,7 @@ include_directories( ${PROJECT_BINARY_DIR} ${PROJECT_SOURCE_DIR} ${ZLIB_INCLUDE_DIR} + ${ZSTD_INCLUDE_DIR} ${SOUND_INCLUDE_DIRS} ${SQLITE3_INCLUDE_DIR} ${LUA_INCLUDE_DIR} @@ -521,6 +527,7 @@ if(BUILD_CLIENT) ${PROJECT_NAME} ${ZLIB_LIBRARIES} IrrlichtMt::IrrlichtMt + ${ZSTD_LIBRARY} ${X11_LIBRARIES} ${SOUND_LIBRARIES} ${SQLITE3_LIBRARY} @@ -605,6 +612,7 @@ if(BUILD_SERVER) target_link_libraries( ${PROJECT_NAME}server ${ZLIB_LIBRARIES} + ${ZSTD_LIBRARY} ${SQLITE3_LIBRARY} ${JSON_LIBRARY} ${LUA_LIBRARY} @@ -821,6 +829,9 @@ if(WIN32) if(ZLIB_DLL) install(FILES ${ZLIB_DLL} DESTINATION ${BINDIR}) endif() + if(ZSTD_DLL) + install(FILES ${ZSTD_DLL} DESTINATION ${BINDIR}) + endif() if(BUILD_CLIENT AND FREETYPE_DLL) install(FILES ${FREETYPE_DLL} DESTINATION ${BINDIR}) endif() diff --git a/src/defaultsettings.cpp b/src/defaultsettings.cpp index faf839b3a..2cb345ba7 100644 --- a/src/defaultsettings.cpp +++ b/src/defaultsettings.cpp @@ -398,7 +398,7 @@ void set_default_settings() settings->setDefault("chat_message_limit_per_10sec", "8.0"); settings->setDefault("chat_message_limit_trigger_kick", "50"); settings->setDefault("sqlite_synchronous", "2"); - settings->setDefault("map_compression_level_disk", "3"); + settings->setDefault("map_compression_level_disk", "-1"); settings->setDefault("map_compression_level_net", "-1"); settings->setDefault("full_block_send_enable_min_time_from_building", "2.0"); settings->setDefault("dedicated_server_step", "0.09"); @@ -484,7 +484,7 @@ void set_default_settings() settings->setDefault("max_objects_per_block", "20"); settings->setDefault("sqlite_synchronous", "1"); settings->setDefault("map_compression_level_disk", "-1"); - settings->setDefault("map_compression_level_net", "3"); + settings->setDefault("map_compression_level_net", "-1"); settings->setDefault("server_map_save_interval", "15"); settings->setDefault("client_mapblock_limit", "1000"); settings->setDefault("active_block_range", "2"); diff --git a/src/main.cpp b/src/main.cpp index ffbdb7b5b..543b70333 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -38,6 +38,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "player.h" #include "porting.h" #include "network/socket.h" +#include "mapblock.h" #if USE_CURSES #include "terminal_chat_console.h" #endif @@ -111,6 +112,7 @@ static bool determine_subgame(GameParams *game_params); static bool run_dedicated_server(const GameParams &game_params, const Settings &cmd_args); static bool migrate_map_database(const GameParams &game_params, const Settings &cmd_args); +static bool recompress_map_database(const GameParams &game_params, const Settings &cmd_args, const Address &addr); /**********************************************************************/ @@ -302,6 +304,8 @@ static void set_allowed_options(OptionList *allowed_options) _("Migrate from current auth backend to another (Only works when using minetestserver or with --server)")))); allowed_options->insert(std::make_pair("terminal", ValueSpec(VALUETYPE_FLAG, _("Feature an interactive terminal (Only works when using minetestserver or with --server)")))); + allowed_options->insert(std::make_pair("recompress", ValueSpec(VALUETYPE_FLAG, + _("Recompress the blocks of the given map database.")))); #ifndef SERVER allowed_options->insert(std::make_pair("speedtests", ValueSpec(VALUETYPE_FLAG, _("Run speed tests")))); @@ -875,7 +879,7 @@ static bool run_dedicated_server(const GameParams &game_params, const Settings & return false; } - // Database migration + // Database migration/compression if (cmd_args.exists("migrate")) return migrate_map_database(game_params, cmd_args); @@ -885,6 +889,9 @@ static bool run_dedicated_server(const GameParams &game_params, const Settings & if (cmd_args.exists("migrate-auth")) return ServerEnvironment::migrateAuthDatabase(game_params, cmd_args); + if (cmd_args.getFlag("recompress")) + return recompress_map_database(game_params, cmd_args, bind_addr); + if (cmd_args.exists("terminal")) { #if USE_CURSES bool name_ok = true; @@ -1034,3 +1041,67 @@ static bool migrate_map_database(const GameParams &game_params, const Settings & return true; } + +static bool recompress_map_database(const GameParams &game_params, const Settings &cmd_args, const Address &addr) +{ + Settings world_mt; + const std::string world_mt_path = game_params.world_path + DIR_DELIM + "world.mt"; + + if (!world_mt.readConfigFile(world_mt_path.c_str())) { + errorstream << "Cannot read world.mt at " << world_mt_path << std::endl; + return false; + } + const std::string &backend = world_mt.get("backend"); + Server server(game_params.world_path, game_params.game_spec, false, addr, false); + MapDatabase *db = ServerMap::createDatabase(backend, game_params.world_path, world_mt); + + u32 count = 0; + u64 last_update_time = 0; + bool &kill = *porting::signal_handler_killstatus(); + const u8 serialize_as_ver = SER_FMT_VER_HIGHEST_WRITE; + + // This is ok because the server doesn't actually run + std::vector blocks; + db->listAllLoadableBlocks(blocks); + db->beginSave(); + std::istringstream iss(std::ios_base::binary); + std::ostringstream oss(std::ios_base::binary); + for (auto it = blocks.begin(); it != blocks.end(); ++it) { + if (kill) return false; + + std::string data; + db->loadBlock(*it, &data); + if (data.empty()) { + errorstream << "Failed to load block " << PP(*it) << std::endl; + return false; + } + + iss.str(data); + iss.clear(); + + MapBlock mb(nullptr, v3s16(0,0,0), &server); + u8 ver = readU8(iss); + mb.deSerialize(iss, ver, true); + + oss.str(""); + oss.clear(); + writeU8(oss, serialize_as_ver); + mb.serialize(oss, serialize_as_ver, true, -1); + + db->saveBlock(*it, oss.str()); + + count++; + if (count % 0xFF == 0 && porting::getTimeS() - last_update_time >= 1) { + std::cerr << " Recompressed " << count << " blocks, " + << (100.0f * count / blocks.size()) << "% completed.\r"; + db->endSave(); + db->beginSave(); + last_update_time = porting::getTimeS(); + } + } + std::cerr << std::endl; + db->endSave(); + + actionstream << "Done, " << count << " blocks were recompressed." << std::endl; + return true; +} diff --git a/src/mapblock.cpp b/src/mapblock.cpp index 0ca71e643..4958d3a65 100644 --- a/src/mapblock.cpp +++ b/src/mapblock.cpp @@ -355,7 +355,7 @@ static void correctBlockNodeIds(const NameIdMapping *nimap, MapNode *nodes, } } -void MapBlock::serialize(std::ostream &os, u8 version, bool disk, int compression_level) +void MapBlock::serialize(std::ostream &os_compressed, u8 version, bool disk, int compression_level) { if(!ser_ver_supported(version)) throw VersionMismatchException("ERROR: MapBlock format not supported"); @@ -365,6 +365,9 @@ void MapBlock::serialize(std::ostream &os, u8 version, bool disk, int compressio FATAL_ERROR_IF(version < SER_FMT_VER_LOWEST_WRITE, "Serialisation version error"); + std::ostringstream os_raw(std::ios_base::binary); + std::ostream &os = version >= 29 ? os_raw : os_compressed; + // First byte u8 flags = 0; if(is_underground) @@ -382,37 +385,52 @@ void MapBlock::serialize(std::ostream &os, u8 version, bool disk, int compressio Bulk node data */ NameIdMapping nimap; - if(disk) + SharedBuffer buf; + const u8 content_width = 2; + const u8 params_width = 2; + if(disk) { MapNode *tmp_nodes = new MapNode[nodecount]; - for(u32 i=0; indef()); - u8 content_width = 2; - u8 params_width = 2; - writeU8(os, content_width); - writeU8(os, params_width); - MapNode::serializeBulk(os, version, tmp_nodes, nodecount, - content_width, params_width, compression_level); + buf = MapNode::serializeBulk(version, tmp_nodes, nodecount, + content_width, params_width); delete[] tmp_nodes; + + // write timestamp and node/id mapping first + if (version >= 29) { + writeU32(os, getTimestamp()); + + nimap.serialize(os); + } } else { - u8 content_width = 2; - u8 params_width = 2; - writeU8(os, content_width); - writeU8(os, params_width); - MapNode::serializeBulk(os, version, data, nodecount, - content_width, params_width, compression_level); + buf = MapNode::serializeBulk(version, data, nodecount, + content_width, params_width); + } + + writeU8(os, content_width); + writeU8(os, params_width); + if (version >= 29) { + os.write(reinterpret_cast(*buf), buf.getSize()); + } else { + // prior to 29 node data was compressed individually + compress(buf, os, version, compression_level); } /* Node metadata */ - std::ostringstream oss(std::ios_base::binary); - m_node_metadata.serialize(oss, version, disk); - compressZlib(oss.str(), os, compression_level); + if (version >= 29) { + m_node_metadata.serialize(os, version, disk); + } else { + // use os_raw from above to avoid allocating another stream object + m_node_metadata.serialize(os_raw, version, disk); + // prior to 29 node data was compressed individually + compress(os_raw.str(), os, version, compression_level); + } /* Data that goes to disk, but not the network @@ -427,17 +445,24 @@ void MapBlock::serialize(std::ostream &os, u8 version, bool disk, int compressio // Static objects m_static_objects.serialize(os); - // Timestamp - writeU32(os, getTimestamp()); + if(version < 29){ + // Timestamp + writeU32(os, getTimestamp()); - // Write block-specific node definition id mapping - nimap.serialize(os); + // Write block-specific node definition id mapping + nimap.serialize(os); + } if(version >= 25){ // Node timers m_node_timers.serialize(os, version); } } + + if (version >= 29) { + // now compress the whole thing + compress(os_raw.str(), os_compressed, version, compression_level); + } } void MapBlock::serializeNetworkSpecific(std::ostream &os) @@ -449,7 +474,7 @@ void MapBlock::serializeNetworkSpecific(std::ostream &os) writeU8(os, 2); // version } -void MapBlock::deSerialize(std::istream &is, u8 version, bool disk) +void MapBlock::deSerialize(std::istream &in_compressed, u8 version, bool disk) { if(!ser_ver_supported(version)) throw VersionMismatchException("ERROR: MapBlock format not supported"); @@ -460,10 +485,16 @@ void MapBlock::deSerialize(std::istream &is, u8 version, bool disk) if(version <= 21) { - deSerialize_pre22(is, version, disk); + deSerialize_pre22(in_compressed, version, disk); return; } + // Decompress the whole block (version >= 29) + std::stringstream in_raw(std::ios_base::binary | std::ios_base::in | std::ios_base::out); + if (version >= 29) + decompress(in_compressed, in_raw, version); + std::istream &is = version >= 29 ? in_raw : in_compressed; + u8 flags = readU8(is); is_underground = (flags & 0x01) != 0; m_day_night_differs = (flags & 0x02) != 0; @@ -473,9 +504,20 @@ void MapBlock::deSerialize(std::istream &is, u8 version, bool disk) m_lighting_complete = readU16(is); m_generated = (flags & 0x08) == 0; - /* - Bulk node data - */ + NameIdMapping nimap; + if (disk && version >= 29) { + // Timestamp + TRACESTREAM(<<"MapBlock::deSerialize "<= 29) { + MapNode::deSerializeBulk(is, version, data, nodecount, content_width, params_width); + } else { + // use in_raw from above to avoid allocating another stream object + decompress(is, in_raw, version); + MapNode::deSerializeBulk(in_raw, version, data, nodecount, + content_width, params_width); + } /* NodeMetadata */ TRACESTREAM(<<"MapBlock::deSerialize "<= 23) - m_node_metadata.deSerialize(iss, m_gamedef->idef()); - else - content_nodemeta_deserialize_legacy(iss, - &m_node_metadata, &m_node_timers, - m_gamedef->idef()); - } catch(SerializationError &e) { - warningstream<<"MapBlock::deSerialize(): Ignoring an error" - <<" while deserializing node metadata at (" - <= 29) { + m_node_metadata.deSerialize(is, m_gamedef->idef()); + } else { + try { + // reuse in_raw + in_raw.str(""); + in_raw.clear(); + decompress(is, in_raw, version); + if (version >= 23) + m_node_metadata.deSerialize(in_raw, m_gamedef->idef()); + else + content_nodemeta_deserialize_legacy(in_raw, + &m_node_metadata, &m_node_timers, + m_gamedef->idef()); + } catch(SerializationError &e) { + warningstream<<"MapBlock::deSerialize(): Ignoring an error" + <<" while deserializing node metadata at (" + <= 25){ diff --git a/src/mapblock.h b/src/mapblock.h index 2e3eb0d76..8de631a29 100644 --- a/src/mapblock.h +++ b/src/mapblock.h @@ -473,7 +473,7 @@ public: // These don't write or read version by itself // Set disk to true for on-disk format, false for over-the-network format // Precondition: version >= SER_FMT_VER_LOWEST_WRITE - void serialize(std::ostream &os, u8 version, bool disk, int compression_level); + void serialize(std::ostream &result, u8 version, bool disk, int compression_level); // If disk == true: In addition to doing other things, will add // unknown blocks from id-name mapping to wndef void deSerialize(std::istream &is, u8 version, bool disk); diff --git a/src/mapgen/mg_schematic.cpp b/src/mapgen/mg_schematic.cpp index 848a43626..b9ba70302 100644 --- a/src/mapgen/mg_schematic.cpp +++ b/src/mapgen/mg_schematic.cpp @@ -339,7 +339,9 @@ bool Schematic::deserializeFromMts(std::istream *is) delete []schemdata; schemdata = new MapNode[nodecount]; - MapNode::deSerializeBulk(ss, SER_FMT_VER_HIGHEST_READ, schemdata, + std::stringstream d_ss(std::ios_base::binary | std::ios_base::in | std::ios_base::out); + decompress(ss, d_ss, MTSCHEM_MAPNODE_SER_FMT_VER); + MapNode::deSerializeBulk(d_ss, MTSCHEM_MAPNODE_SER_FMT_VER, schemdata, nodecount, 2, 2); // Fix probability values for nodes that were ignore; removed in v2 @@ -384,8 +386,9 @@ bool Schematic::serializeToMts(std::ostream *os) const } // compressed bulk node data - MapNode::serializeBulk(ss, SER_FMT_VER_HIGHEST_WRITE, - schemdata, size.X * size.Y * size.Z, 2, 2, -1); + SharedBuffer buf = MapNode::serializeBulk(MTSCHEM_MAPNODE_SER_FMT_VER, + schemdata, size.X * size.Y * size.Z, 2, 2); + compress(buf, ss, MTSCHEM_MAPNODE_SER_FMT_VER); return true; } diff --git a/src/mapgen/mg_schematic.h b/src/mapgen/mg_schematic.h index 5f64ea280..9189bb3a7 100644 --- a/src/mapgen/mg_schematic.h +++ b/src/mapgen/mg_schematic.h @@ -70,6 +70,7 @@ class Server; #define MTSCHEM_FILE_SIGNATURE 0x4d54534d // 'MTSM' #define MTSCHEM_FILE_VER_HIGHEST_READ 4 #define MTSCHEM_FILE_VER_HIGHEST_WRITE 4 +#define MTSCHEM_MAPNODE_SER_FMT_VER 28 // Fixed serialization version for schematics since these still need to use Zlib #define MTSCHEM_PROB_MASK 0x7F diff --git a/src/mapnode.cpp b/src/mapnode.cpp index f212ea8c9..73bd620fb 100644 --- a/src/mapnode.cpp +++ b/src/mapnode.cpp @@ -730,9 +730,10 @@ void MapNode::deSerialize(u8 *source, u8 version) } } } -void MapNode::serializeBulk(std::ostream &os, int version, + +SharedBuffer MapNode::serializeBulk(int version, const MapNode *nodes, u32 nodecount, - u8 content_width, u8 params_width, int compression_level) + u8 content_width, u8 params_width) { if (!ser_ver_supported(version)) throw VersionMismatchException("ERROR: MapNode format not supported"); @@ -746,8 +747,7 @@ void MapNode::serializeBulk(std::ostream &os, int version, throw SerializationError("MapNode::serializeBulk: serialization to " "version < 24 not possible"); - size_t databuf_size = nodecount * (content_width + params_width); - u8 *databuf = new u8[databuf_size]; + SharedBuffer databuf(nodecount * (content_width + params_width)); u32 start1 = content_width * nodecount; u32 start2 = (content_width + 1) * nodecount; @@ -758,14 +758,7 @@ void MapNode::serializeBulk(std::ostream &os, int version, writeU8(&databuf[start1 + i], nodes[i].param1); writeU8(&databuf[start2 + i], nodes[i].param2); } - - /* - Compress data to output stream - */ - - compressZlib(databuf, databuf_size, os, compression_level); - - delete [] databuf; + return databuf; } // Deserialize bulk node data @@ -781,15 +774,10 @@ void MapNode::deSerializeBulk(std::istream &is, int version, || params_width != 2) FATAL_ERROR("Deserialize bulk node data error"); - // Uncompress or read data - u32 len = nodecount * (content_width + params_width); - std::ostringstream os(std::ios_base::binary); - decompressZlib(is, os); - std::string s = os.str(); - if(s.size() != len) - throw SerializationError("deSerializeBulkNodes: " - "decompress resulted in invalid size"); - const u8 *databuf = reinterpret_cast(s.c_str()); + // read data + const u32 len = nodecount * (content_width + params_width); + Buffer databuf(len); + is.read(reinterpret_cast(*databuf), len); // Deserialize content if(content_width == 1) diff --git a/src/mapnode.h b/src/mapnode.h index 28ff9e43d..afd3a96be 100644 --- a/src/mapnode.h +++ b/src/mapnode.h @@ -21,6 +21,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "irrlichttypes_bloated.h" #include "light.h" +#include "util/pointer.h" #include #include @@ -293,9 +294,9 @@ struct MapNode // content_width = the number of bytes of content per node // params_width = the number of bytes of params per node // compressed = true to zlib-compress output - static void serializeBulk(std::ostream &os, int version, + static SharedBuffer serializeBulk(int version, const MapNode *nodes, u32 nodecount, - u8 content_width, u8 params_width, int compression_level); + u8 content_width, u8 params_width); static void deSerializeBulk(std::istream &is, int version, MapNode *nodes, u32 nodecount, u8 content_width, u8 params_width); diff --git a/src/serialization.cpp b/src/serialization.cpp index 310604f54..b6ce3b37f 100644 --- a/src/serialization.cpp +++ b/src/serialization.cpp @@ -21,7 +21,8 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "util/serialize.h" -#include "zlib.h" +#include +#include /* report a zlib or i/o error */ void zerr(int ret) @@ -197,27 +198,133 @@ void decompressZlib(std::istream &is, std::ostream &os, size_t limit) inflateEnd(&z); } -void compress(const SharedBuffer &data, std::ostream &os, u8 version) +struct ZSTD_Deleter { + void operator() (ZSTD_CStream* cstream) { + ZSTD_freeCStream(cstream); + } + + void operator() (ZSTD_DStream* dstream) { + ZSTD_freeDStream(dstream); + } +}; + +void compressZstd(const u8 *data, size_t data_size, std::ostream &os, int level) { - if(version >= 11) + // reusing the context is recommended for performance + // it will destroyed when the thread ends + thread_local std::unique_ptr stream(ZSTD_createCStream()); + + ZSTD_initCStream(stream.get(), level); + + const size_t bufsize = 16384; + char output_buffer[bufsize]; + + ZSTD_inBuffer input = { data, data_size, 0 }; + ZSTD_outBuffer output = { output_buffer, bufsize, 0 }; + + while (input.pos < input.size) { + size_t ret = ZSTD_compressStream(stream.get(), &output, &input); + if (ZSTD_isError(ret)) { + dstream << ZSTD_getErrorName(ret) << std::endl; + throw SerializationError("compressZstd: failed"); + } + if (output.pos) { + os.write(output_buffer, output.pos); + output.pos = 0; + } + } + + size_t ret; + do { + ret = ZSTD_endStream(stream.get(), &output); + if (ZSTD_isError(ret)) { + dstream << ZSTD_getErrorName(ret) << std::endl; + throw SerializationError("compressZstd: failed"); + } + if (output.pos) { + os.write(output_buffer, output.pos); + output.pos = 0; + } + } while (ret != 0); + +} + +void compressZstd(const std::string &data, std::ostream &os, int level) +{ + compressZstd((u8*)data.c_str(), data.size(), os, level); +} + +void decompressZstd(std::istream &is, std::ostream &os) +{ + // reusing the context is recommended for performance + // it will destroyed when the thread ends + thread_local std::unique_ptr stream(ZSTD_createDStream()); + + ZSTD_initDStream(stream.get()); + + const size_t bufsize = 16384; + char output_buffer[bufsize]; + char input_buffer[bufsize]; + + ZSTD_outBuffer output = { output_buffer, bufsize, 0 }; + ZSTD_inBuffer input = { input_buffer, 0, 0 }; + size_t ret; + do { - compressZlib(*data ,data.getSize(), os); + if (input.size == input.pos) { + is.read(input_buffer, bufsize); + input.size = is.gcount(); + input.pos = 0; + } + + ret = ZSTD_decompressStream(stream.get(), &output, &input); + if (ZSTD_isError(ret)) { + dstream << ZSTD_getErrorName(ret) << std::endl; + throw SerializationError("decompressZstd: failed"); + } + if (output.pos) { + os.write(output_buffer, output.pos); + output.pos = 0; + } + } while (ret != 0); + + // Unget all the data that ZSTD_decompressStream didn't take + is.clear(); // Just in case EOF is set + for (u32 i = 0; i < input.size - input.pos; i++) { + is.unget(); + if (is.fail() || is.bad()) + throw SerializationError("decompressZstd: unget failed"); + } +} + +void compress(u8 *data, u32 size, std::ostream &os, u8 version, int level) +{ + if(version >= 29) + { + // map the zlib levels [0,9] to [1,10]. -1 becomes 0 which indicates the default (currently 3) + compressZstd(data, size, os, level + 1); return; } - if(data.getSize() == 0) + if(version >= 11) + { + compressZlib(data, size, os, level); + return; + } + + if(size == 0) return; // Write length (u32) u8 tmp[4]; - writeU32(tmp, data.getSize()); + writeU32(tmp, size); os.write((char*)tmp, 4); // We will be writing 8-bit pairs of more_count and byte u8 more_count = 0; u8 current_byte = data[0]; - for(u32 i=1; i &data, std::ostream &os, u8 version) os.write((char*)¤t_byte, 1); } +void compress(const SharedBuffer &data, std::ostream &os, u8 version, int level) +{ + compress(*data, data.getSize(), os, version, level); +} + +void compress(const std::string &data, std::ostream &os, u8 version, int level) +{ + compress((u8*)data.c_str(), data.size(), os, version, level); +} + void decompress(std::istream &is, std::ostream &os, u8 version) { + if(version >= 29) + { + decompressZstd(is, os); + return; + } + if(version >= 11) { decompressZlib(is, os); diff --git a/src/serialization.h b/src/serialization.h index f399983c4..e83a8c179 100644 --- a/src/serialization.h +++ b/src/serialization.h @@ -63,13 +63,14 @@ with this program; if not, write to the Free Software Foundation, Inc., 26: Never written; read the same as 25 27: Added light spreading flags to blocks 28: Added "private" flag to NodeMetadata + 29: Switched compression to zstd, a bit of reorganization */ // This represents an uninitialized or invalid format #define SER_FMT_VER_INVALID 255 // Highest supported serialization version -#define SER_FMT_VER_HIGHEST_READ 28 +#define SER_FMT_VER_HIGHEST_READ 29 // Saved on disk version -#define SER_FMT_VER_HIGHEST_WRITE 28 +#define SER_FMT_VER_HIGHEST_WRITE 29 // Lowest supported serialization version #define SER_FMT_VER_LOWEST_READ 0 // Lowest serialization version for writing @@ -89,7 +90,12 @@ void compressZlib(const u8 *data, size_t data_size, std::ostream &os, int level void compressZlib(const std::string &data, std::ostream &os, int level = -1); void decompressZlib(std::istream &is, std::ostream &os, size_t limit = 0); +void compressZstd(const u8 *data, size_t data_size, std::ostream &os, int level = 0); +void compressZstd(const std::string &data, std::ostream &os, int level = 0); +void decompressZstd(std::istream &is, std::ostream &os); + // These choose between zlib and a self-made one according to version -void compress(const SharedBuffer &data, std::ostream &os, u8 version); -//void compress(const std::string &data, std::ostream &os, u8 version); +void compress(const SharedBuffer &data, std::ostream &os, u8 version, int level = -1); +void compress(const std::string &data, std::ostream &os, u8 version, int level = -1); +void compress(u8 *data, u32 size, std::ostream &os, u8 version, int level = -1); void decompress(std::istream &is, std::ostream &os, u8 version); diff --git a/src/unittest/test_compression.cpp b/src/unittest/test_compression.cpp index dfcadd4b2..a96282f58 100644 --- a/src/unittest/test_compression.cpp +++ b/src/unittest/test_compression.cpp @@ -37,6 +37,7 @@ public: void testRLECompression(); void testZlibCompression(); void testZlibLargeData(); + void testZstdLargeData(); void testZlibLimit(); void _testZlibLimit(u32 size, u32 limit); }; @@ -48,6 +49,7 @@ void TestCompression::runTests(IGameDef *gamedef) TEST(testRLECompression); TEST(testZlibCompression); TEST(testZlibLargeData); + TEST(testZstdLargeData); TEST(testZlibLimit); } @@ -111,7 +113,7 @@ void TestCompression::testZlibCompression() fromdata[3]=1; std::ostringstream os(std::ios_base::binary); - compress(fromdata, os, SER_FMT_VER_HIGHEST_READ); + compressZlib(*fromdata, fromdata.getSize(), os); std::string str_out = os.str(); @@ -124,7 +126,7 @@ void TestCompression::testZlibCompression() std::istringstream is(str_out, std::ios_base::binary); std::ostringstream os2(std::ios_base::binary); - decompress(is, os2, SER_FMT_VER_HIGHEST_READ); + decompressZlib(is, os2); std::string str_out2 = os2.str(); infostream << "decompress: "; @@ -174,6 +176,42 @@ void TestCompression::testZlibLargeData() } } +void TestCompression::testZstdLargeData() +{ + infostream << "Test: Testing zstd wrappers with a large amount " + "of pseudorandom data" << std::endl; + + u32 size = 500000; + infostream << "Test: Input size of large compressZstd is " + << size << std::endl; + + std::string data_in; + data_in.resize(size); + PseudoRandom pseudorandom(9420); + for (u32 i = 0; i < size; i++) + data_in[i] = pseudorandom.range(0, 255); + + std::ostringstream os_compressed(std::ios::binary); + compressZstd(data_in, os_compressed, 0); + infostream << "Test: Output size of large compressZstd is " + << os_compressed.str().size()<