Merge remote-tracking branch 'refs/remotes/facebook/dev' into dev11

dev
Przemyslaw Skibinski 2017-02-08 13:49:35 +01:00
commit cdf5a7bd9f
9 changed files with 2515 additions and 12 deletions

4
NEWS
View File

@ -1,7 +1,7 @@
v1.1.3
cli : zstd can decompress .gz files (can be disabled with `make zstd-nogz` or `make HAVE_ZLIB=0`)
cli : new : experimental target `make zstdmt`, with multi-threading support
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell, based on prior work by Giuseppe Ottaviano.
cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
cli : fix zstdless on Mac OS-X, by Andrew Janke
cli : fix #232 "compress non-files"
@ -12,7 +12,7 @@ API : new : ZDICT_finalizeDictionary()
API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
API : fix : all symbols properly exposed in libzstd, by Nick Terrell
build : support for Solaris target, by Przemyslaw Skibinski
doc : clarified specification, by Andrew Purcell
doc : clarified specification, by Sean Purcell
v1.1.2
API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init

View File

@ -55,7 +55,7 @@ To solve this situation, Zstd offers a __training mode__, which can be used to t
Training Zstandard is achieved by provide it with a few samples (one file per sample). The result of this training is stored in a file called "dictionary", which must be loaded before compression and decompression.
Using this dictionary, the compression ratio achievable on small data improves dramatically.
The following example uses the `github-users` [sample set](https://www.dropbox.com/s/mnktkomhkjbf1i2/github_users.tar.zst?dl=0), created from [github public API](https://developer.github.com/v3/users/#get-all-users).
The following example uses the `github-users` [sample set](https://github.com/facebook/zstd/releases/tag/v1.1.3), created from [github public API](https://developer.github.com/v3/users/#get-all-users).
It consists of roughly 10K records weighting about 1KB each.
Compression Ratio | Compression Speed | Decompression Speed

View File

@ -0,0 +1,19 @@
Educational Decoder
===================
`zstd_decompress.c` is a self-contained implementation in C99 of a decoder,
according to the [Zstandard format specification].
While it does not implement as many features as the reference decoder,
such as the streaming API or content checksums, it is written to be easy to
follow and understand, to help understand how the Zstandard format works.
It's laid out to match the [format specification],
so it can be used to understand how complex segments could be implemented.
It also contains implementations of Huffman and FSE table decoding.
[Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
[format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
`harness.c` provides a simple test harness around the decoder:
harness <input-file> <output-file> [dictionary]

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
#include <stdio.h>
#include <stdlib.h>
#include "zstd_decompress.h"
typedef unsigned char u8;
// If the data doesn't have decompressed size with it, fallback on assuming the
// compression ratio is at most 16
#define MAX_COMPRESSION_RATIO (16)
// Protect against allocating too much memory for output
#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
u8 *input;
u8 *output;
u8 *dict;
size_t read_file(const char *path, u8 **ptr) {
FILE *f = fopen(path, "rb");
if (!f) {
fprintf(stderr, "failed to open file %s\n", path);
exit(1);
}
fseek(f, 0L, SEEK_END);
size_t size = ftell(f);
rewind(f);
*ptr = malloc(size);
if (!ptr) {
fprintf(stderr, "failed to allocate memory to hold %s\n", path);
exit(1);
}
size_t pos = 0;
while (!feof(f)) {
size_t read = fread(&(*ptr)[pos], 1, size, f);
if (ferror(f)) {
fprintf(stderr, "error while reading file %s\n", path);
exit(1);
}
pos += read;
}
fclose(f);
return pos;
}
void write_file(const char *path, const u8 *ptr, size_t size) {
FILE *f = fopen(path, "wb");
size_t written = 0;
while (written < size) {
written += fwrite(&ptr[written], 1, size, f);
if (ferror(f)) {
fprintf(stderr, "error while writing file %s\n", path);
exit(1);
}
}
fclose(f);
}
int main(int argc, char **argv) {
if (argc < 3) {
fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary]\n",
argv[0]);
return 1;
}
size_t input_size = read_file(argv[1], &input);
size_t dict_size = 0;
if (argc >= 4) {
dict_size = read_file(argv[3], &dict);
}
size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
if (decompressed_size == -1) {
decompressed_size = MAX_COMPRESSION_RATIO * input_size;
fprintf(stderr, "WARNING: Compressed data does not contain "
"decompressed size, going to assume the compression "
"ratio is at most %d (decompressed size of at most "
"%zu)\n",
MAX_COMPRESSION_RATIO, decompressed_size);
}
if (decompressed_size > MAX_OUTPUT_SIZE) {
fprintf(stderr,
"Required output size too large for this implementation\n");
return 1;
}
output = malloc(decompressed_size);
if (!output) {
fprintf(stderr, "failed to allocate memory\n");
return 1;
}
size_t decompressed =
ZSTD_decompress_with_dict(output, decompressed_size,
input, input_size, dict, dict_size);
write_file(argv[2], output, decompressed);
free(input);
free(output);
free(dict);
input = output = dict = NULL;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
size_t ZSTD_decompress(void *const dst, const size_t dst_len,
const void *const src, const size_t src_len);
size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
const void *const src, const size_t src_len,
const void *const dict, const size_t dict_len);
size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);

View File

@ -7,6 +7,16 @@
* of patent rights can be found in the PATENTS file in the same directory.
*/
/* *****************************************************************************
* Constructs a dictionary using a heuristic based on the following paper:
*
* Liao, Petri, Moffat, Wirth
* Effective Construction of Relative Lempel-Ziv Dictionaries
* Published in WWW 2016.
*
* Adapted from code originally written by @ot (Giuseppe Ottaviano).
******************************************************************************/
/*-*************************************
* Dependencies
***************************************/
@ -621,13 +631,6 @@ static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
return zdictParams;
}
/**
* Constructs a dictionary using a heuristic based on the following paper:
*
* Liao, Petri, Moffat, Wirth
* Effective Construction of Relative Lempel-Ziv Dictionaries
* Published in WWW 2016.
*/
ZDICTLIB_API size_t COVER_trainFromBuffer(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {

View File

@ -67,7 +67,7 @@ endif
# zlib detection
VOID = /dev/null
HAVE_ZLIB := $(shell echo "int main(){}" | $(CC) -o $(VOID) -x c - -lz 2> $(VOID) && echo 1 || echo 0)
HAVE_ZLIB := $(shell echo "\#include <zlib.h>\nint main(){}" | $(CC) -o $(VOID) -x c - -lz 2> $(VOID) && echo 1 || echo 0)
ifeq ($(HAVE_ZLIB), 1)
ZLIBCPP = -DZSTD_GZDECOMPRESS
ZLIBLD = -lz

View File

@ -39,7 +39,7 @@
# include "zstdmt_compress.h"
#endif
#ifdef ZSTD_GZDECOMPRESS
# include "zlib.h"
# include <zlib.h>
# if !defined(z_const)
# define z_const
# endif