From b45d22c85145ec6420987bba067a2d162c7c9a84 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 15 Jan 2021 08:41:42 -0800 Subject: [PATCH] [contrib][recovery] Add recovery_directory program This program takes a file with concatenated zstd frames and splits the file up by frame. E.g. if `dir.zst` has 4 frames: ``` > ./recover_directory dir.zst recovery/file Recovering 4 files... Recovered recovery/file0 Recovered recovery/file1 Recovered recovery/file2 Recovered recovery/file3 Complete ``` --- contrib/recovery/Makefile | 35 ++++++ contrib/recovery/recover_directory.c | 152 +++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 contrib/recovery/Makefile create mode 100644 contrib/recovery/recover_directory.c diff --git a/contrib/recovery/Makefile b/contrib/recovery/Makefile new file mode 100644 index 00000000..9a9f4f2e --- /dev/null +++ b/contrib/recovery/Makefile @@ -0,0 +1,35 @@ +# ################################################################ +# Copyright (c) 2019-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +.PHONY: all +all: recover_directory + +ZSTDLIBDIR ?= ../../lib +PROGRAMDIR ?= ../../programs + +CFLAGS ?= -O3 +CFLAGS += -I$(ZSTDLIBDIR) -I$(PROGRAMDIR) +CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum \ + -Wstrict-prototypes -Wundef \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls -Wmissing-prototypes +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) + +.PHONY: $(ZSTDLIBDIR)/libzstd.a +$(ZSTDLIBDIR)/libzstd.a: + $(MAKE) -C $(ZSTDLIBDIR) libzstd.a + +recover_directory: recover_directory.c $(ZSTDLIBDIR)/libzstd.a $(PROGRAMDIR)/util.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +.PHONY: clean +clean: + rm -f recover_directory diff --git a/contrib/recovery/recover_directory.c b/contrib/recovery/recover_directory.c new file mode 100644 index 00000000..13f83fd1 --- /dev/null +++ b/contrib/recovery/recover_directory.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include "util.h" +#include "zstd.h" + +#define CHECK(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d CHECK(%s) failed: ", __FILE__, __LINE__, #cond); \ + fprintf(stderr, "" __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + exit(1); \ + } \ + } while (0) + +static void usage(char const *program) { + fprintf(stderr, "USAGE: %s FILE.zst PREFIX\n", program); + fprintf(stderr, "FILE.zst: A zstd compressed file with multiple frames\n"); + fprintf(stderr, "PREFIX: The output prefix. Uncompressed files will be " + "created named ${PREFIX}0 ${PREFIX}1...\n\n"); + fprintf(stderr, "This program takes concatenated zstd frames and " + "decompresses them into individual files.\n"); + fprintf(stderr, "E.g. files created with a command like: zstd -r directory " + "-o file.zst\n"); +} + +typedef struct { + char *data; + size_t size; + size_t frames; + size_t maxFrameSize; +} ZstdFrames; + +static ZstdFrames readFile(char const *fileName) { + U64 const fileSize = UTIL_getFileSize(fileName); + CHECK(fileSize != UTIL_FILESIZE_UNKNOWN, "Unknown file size!"); + + char *const data = (char *)malloc(fileSize); + CHECK(data != NULL, "Allocation failed"); + + FILE *file = fopen(fileName, "rb"); + CHECK(file != NULL, "fopen failed"); + + size_t const readSize = fread(data, 1, fileSize, file); + CHECK(readSize == fileSize, "fread failed"); + + fclose(file); + ZstdFrames frames; + frames.data = (char *)data; + frames.size = fileSize; + frames.frames = 0; + + size_t index; + size_t maxFrameSize = 0; + for (index = 0; index < fileSize;) { + size_t const frameSize = + ZSTD_findFrameCompressedSize(data + index, fileSize - index); + CHECK(!ZSTD_isError(frameSize), "Bad zstd frame: %s", + ZSTD_getErrorName(frameSize)); + if (frameSize > maxFrameSize) + maxFrameSize = frameSize; + frames.frames += 1; + index += frameSize; + } + CHECK(index == fileSize, "Zstd file corrupt!"); + frames.maxFrameSize = maxFrameSize; + + return frames; +} + +static int computePadding(size_t numFrames) { + return snprintf(NULL, 0, "%u", (unsigned)numFrames); +} + +int main(int argc, char **argv) { + if (argc != 3) { + usage(argv[0]); + exit(1); + } + char const *const zstdFile = argv[1]; + char const *const prefix = argv[2]; + + ZstdFrames frames = readFile(zstdFile); + + if (frames.frames <= 1) { + fprintf( + stderr, + "%s only has %u zstd frame. Simply use `zstd -d` to decompress it.\n", + zstdFile, (unsigned)frames.frames); + exit(1); + } + + int const padding = computePadding(frames.frames - 1); + + size_t const outFileNameSize = strlen(prefix) + padding + 1; + char* outFileName = malloc(outFileNameSize); + CHECK(outFileName != NULL, "Allocation failure"); + + size_t const bufferSize = 128 * 1024; + void *buffer = malloc(bufferSize); + CHECK(buffer != NULL, "Allocation failure"); + + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + CHECK(dctx != NULL, "Allocation failure"); + + fprintf(stderr, "Recovering %u files...\n", (unsigned)frames.frames); + + size_t index; + size_t frame = 0; + for (index = 0; index < frames.size; ++frame) { + size_t const frameSize = + ZSTD_findFrameCompressedSize(frames.data + index, frames.size - index); + + int const ret = snprintf(outFileName, outFileNameSize, "%s%0*u", prefix, padding, (unsigned)frame); + CHECK(ret >= 0 && (size_t)ret <= outFileNameSize, "snprintf failed!"); + + FILE* outFile = fopen(outFileName, "wb"); + CHECK(outFile != NULL, "fopen failed"); + + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only); + ZSTD_inBuffer in = {frames.data + index, frameSize, 0}; + while (in.pos < in.size) { + ZSTD_outBuffer out = {buffer, bufferSize, 0}; + CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &out, &in)), "decompression failed"); + size_t const writeSize = fwrite(out.dst, 1, out.pos, outFile); + CHECK(writeSize == out.pos, "fwrite failed"); + } + fclose(outFile); + fprintf(stderr, "Recovered %s\n", outFileName); + index += frameSize; + } + fprintf(stderr, "Complete\n"); + + free(outFileName); + ZSTD_freeDCtx(dctx); + free(buffer); + free(frames.data); + return 0; +}