Merge remote-tracking branch 'refs/remotes/facebook/dev' into dev11
commit
cdf5a7bd9f
4
NEWS
4
NEWS
|
@ -1,7 +1,7 @@
|
|||
v1.1.3
|
||||
cli : zstd can decompress .gz files (can be disabled with `make zstd-nogz` or `make HAVE_ZLIB=0`)
|
||||
cli : new : experimental target `make zstdmt`, with multi-threading support
|
||||
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell
|
||||
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell, based on prior work by Giuseppe Ottaviano.
|
||||
cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
|
||||
cli : fix zstdless on Mac OS-X, by Andrew Janke
|
||||
cli : fix #232 "compress non-files"
|
||||
|
@ -12,7 +12,7 @@ API : new : ZDICT_finalizeDictionary()
|
|||
API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
|
||||
API : fix : all symbols properly exposed in libzstd, by Nick Terrell
|
||||
build : support for Solaris target, by Przemyslaw Skibinski
|
||||
doc : clarified specification, by Andrew Purcell
|
||||
doc : clarified specification, by Sean Purcell
|
||||
|
||||
v1.1.2
|
||||
API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init
|
||||
|
|
|
@ -55,7 +55,7 @@ To solve this situation, Zstd offers a __training mode__, which can be used to t
|
|||
Training Zstandard is achieved by provide it with a few samples (one file per sample). The result of this training is stored in a file called "dictionary", which must be loaded before compression and decompression.
|
||||
Using this dictionary, the compression ratio achievable on small data improves dramatically.
|
||||
|
||||
The following example uses the `github-users` [sample set](https://www.dropbox.com/s/mnktkomhkjbf1i2/github_users.tar.zst?dl=0), created from [github public API](https://developer.github.com/v3/users/#get-all-users).
|
||||
The following example uses the `github-users` [sample set](https://github.com/facebook/zstd/releases/tag/v1.1.3), created from [github public API](https://developer.github.com/v3/users/#get-all-users).
|
||||
It consists of roughly 10K records weighting about 1KB each.
|
||||
|
||||
Compression Ratio | Compression Speed | Decompression Speed
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
Educational Decoder
|
||||
===================
|
||||
|
||||
`zstd_decompress.c` is a self-contained implementation in C99 of a decoder,
|
||||
according to the [Zstandard format specification].
|
||||
While it does not implement as many features as the reference decoder,
|
||||
such as the streaming API or content checksums, it is written to be easy to
|
||||
follow and understand, to help understand how the Zstandard format works.
|
||||
It's laid out to match the [format specification],
|
||||
so it can be used to understand how complex segments could be implemented.
|
||||
It also contains implementations of Huffman and FSE table decoding.
|
||||
|
||||
[Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
|
||||
[format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
|
||||
|
||||
`harness.c` provides a simple test harness around the decoder:
|
||||
|
||||
harness <input-file> <output-file> [dictionary]
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright (c) 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the BSD-style license found in the
|
||||
* LICENSE file in the root directory of this source tree. An additional grant
|
||||
* of patent rights can be found in the PATENTS file in the same directory.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "zstd_decompress.h"
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
// If the data doesn't have decompressed size with it, fallback on assuming the
|
||||
// compression ratio is at most 16
|
||||
#define MAX_COMPRESSION_RATIO (16)
|
||||
|
||||
// Protect against allocating too much memory for output
|
||||
#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
|
||||
|
||||
u8 *input;
|
||||
u8 *output;
|
||||
u8 *dict;
|
||||
|
||||
size_t read_file(const char *path, u8 **ptr) {
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (!f) {
|
||||
fprintf(stderr, "failed to open file %s\n", path);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fseek(f, 0L, SEEK_END);
|
||||
size_t size = ftell(f);
|
||||
rewind(f);
|
||||
|
||||
*ptr = malloc(size);
|
||||
if (!ptr) {
|
||||
fprintf(stderr, "failed to allocate memory to hold %s\n", path);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t pos = 0;
|
||||
while (!feof(f)) {
|
||||
size_t read = fread(&(*ptr)[pos], 1, size, f);
|
||||
if (ferror(f)) {
|
||||
fprintf(stderr, "error while reading file %s\n", path);
|
||||
exit(1);
|
||||
}
|
||||
pos += read;
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
void write_file(const char *path, const u8 *ptr, size_t size) {
|
||||
FILE *f = fopen(path, "wb");
|
||||
|
||||
size_t written = 0;
|
||||
while (written < size) {
|
||||
written += fwrite(&ptr[written], 1, size, f);
|
||||
if (ferror(f)) {
|
||||
fprintf(stderr, "error while writing file %s\n", path);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary]\n",
|
||||
argv[0]);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t input_size = read_file(argv[1], &input);
|
||||
size_t dict_size = 0;
|
||||
if (argc >= 4) {
|
||||
dict_size = read_file(argv[3], &dict);
|
||||
}
|
||||
|
||||
size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
|
||||
if (decompressed_size == -1) {
|
||||
decompressed_size = MAX_COMPRESSION_RATIO * input_size;
|
||||
fprintf(stderr, "WARNING: Compressed data does not contain "
|
||||
"decompressed size, going to assume the compression "
|
||||
"ratio is at most %d (decompressed size of at most "
|
||||
"%zu)\n",
|
||||
MAX_COMPRESSION_RATIO, decompressed_size);
|
||||
}
|
||||
if (decompressed_size > MAX_OUTPUT_SIZE) {
|
||||
fprintf(stderr,
|
||||
"Required output size too large for this implementation\n");
|
||||
return 1;
|
||||
}
|
||||
output = malloc(decompressed_size);
|
||||
if (!output) {
|
||||
fprintf(stderr, "failed to allocate memory\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t decompressed =
|
||||
ZSTD_decompress_with_dict(output, decompressed_size,
|
||||
input, input_size, dict, dict_size);
|
||||
|
||||
write_file(argv[2], output, decompressed);
|
||||
|
||||
free(input);
|
||||
free(output);
|
||||
free(dict);
|
||||
input = output = dict = NULL;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright (c) 2017-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the BSD-style license found in the
|
||||
* LICENSE file in the root directory of this source tree. An additional grant
|
||||
* of patent rights can be found in the PATENTS file in the same directory.
|
||||
*/
|
||||
|
||||
size_t ZSTD_decompress(void *const dst, const size_t dst_len,
|
||||
const void *const src, const size_t src_len);
|
||||
size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
|
||||
const void *const src, const size_t src_len,
|
||||
const void *const dict, const size_t dict_len);
|
||||
size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
|
||||
|
|
@ -7,6 +7,16 @@
|
|||
* of patent rights can be found in the PATENTS file in the same directory.
|
||||
*/
|
||||
|
||||
/* *****************************************************************************
|
||||
* Constructs a dictionary using a heuristic based on the following paper:
|
||||
*
|
||||
* Liao, Petri, Moffat, Wirth
|
||||
* Effective Construction of Relative Lempel-Ziv Dictionaries
|
||||
* Published in WWW 2016.
|
||||
*
|
||||
* Adapted from code originally written by @ot (Giuseppe Ottaviano).
|
||||
******************************************************************************/
|
||||
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
|
@ -621,13 +631,6 @@ static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
|
|||
return zdictParams;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a dictionary using a heuristic based on the following paper:
|
||||
*
|
||||
* Liao, Petri, Moffat, Wirth
|
||||
* Effective Construction of Relative Lempel-Ziv Dictionaries
|
||||
* Published in WWW 2016.
|
||||
*/
|
||||
ZDICTLIB_API size_t COVER_trainFromBuffer(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
|
||||
|
|
|
@ -67,7 +67,7 @@ endif
|
|||
|
||||
# zlib detection
|
||||
VOID = /dev/null
|
||||
HAVE_ZLIB := $(shell echo "int main(){}" | $(CC) -o $(VOID) -x c - -lz 2> $(VOID) && echo 1 || echo 0)
|
||||
HAVE_ZLIB := $(shell echo "\#include <zlib.h>\nint main(){}" | $(CC) -o $(VOID) -x c - -lz 2> $(VOID) && echo 1 || echo 0)
|
||||
ifeq ($(HAVE_ZLIB), 1)
|
||||
ZLIBCPP = -DZSTD_GZDECOMPRESS
|
||||
ZLIBLD = -lz
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
# include "zstdmt_compress.h"
|
||||
#endif
|
||||
#ifdef ZSTD_GZDECOMPRESS
|
||||
# include "zlib.h"
|
||||
# include <zlib.h>
|
||||
# if !defined(z_const)
|
||||
# define z_const
|
||||
# endif
|
||||
|
|
Loading…
Reference in New Issue