Merge pull request #1434 from terrelln/regression
[regression] Add dictionary support
This commit is contained in:
commit
c631fdec9f
@ -19,6 +19,12 @@
|
|||||||
.name = "level -" #x, \
|
.name = "level -" #x, \
|
||||||
.cli_args = "--fast=" #x, \
|
.cli_args = "--fast=" #x, \
|
||||||
.param_values = PARAM_VALUES(level_fast##x##_param_values), \
|
.param_values = PARAM_VALUES(level_fast##x##_param_values), \
|
||||||
|
}; \
|
||||||
|
config_t const level_fast##x##_dict = { \
|
||||||
|
.name = "level -" #x " with dict", \
|
||||||
|
.cli_args = "--fast=" #x, \
|
||||||
|
.param_values = PARAM_VALUES(level_fast##x##_param_values), \
|
||||||
|
.use_dictionary = 1, \
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Define a config for each level we want to test with. */
|
/* Define a config for each level we want to test with. */
|
||||||
@ -30,6 +36,12 @@
|
|||||||
.name = "level " #x, \
|
.name = "level " #x, \
|
||||||
.cli_args = "-" #x, \
|
.cli_args = "-" #x, \
|
||||||
.param_values = PARAM_VALUES(level_##x##_param_values), \
|
.param_values = PARAM_VALUES(level_##x##_param_values), \
|
||||||
|
}; \
|
||||||
|
config_t const level_##x##_dict = { \
|
||||||
|
.name = "level " #x " with dict", \
|
||||||
|
.cli_args = "-" #x, \
|
||||||
|
.param_values = PARAM_VALUES(level_##x##_param_values), \
|
||||||
|
.use_dictionary = 1, \
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -41,17 +53,31 @@
|
|||||||
#undef LEVEL
|
#undef LEVEL
|
||||||
#undef FAST_LEVEL
|
#undef FAST_LEVEL
|
||||||
|
|
||||||
|
static config_t no_pledged_src_size = {
|
||||||
|
.name = "no source size",
|
||||||
|
.cli_args = "",
|
||||||
|
.param_values = {.data = NULL, .size = 0},
|
||||||
|
.no_pledged_src_size = 1,
|
||||||
|
};
|
||||||
|
|
||||||
static config_t const* g_configs[] = {
|
static config_t const* g_configs[] = {
|
||||||
#define FAST_LEVEL(x) &level_fast##x,
|
|
||||||
#define LEVEL(x) &level_##x,
|
#define FAST_LEVEL(x) &level_fast##x, &level_fast##x##_dict,
|
||||||
|
#define LEVEL(x) &level_##x, &level_##x##_dict,
|
||||||
#include "levels.h"
|
#include "levels.h"
|
||||||
#undef LEVEL
|
#undef LEVEL
|
||||||
#undef FAST_LEVEL
|
#undef FAST_LEVEL
|
||||||
|
|
||||||
|
&no_pledged_src_size,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
config_t const* const* configs = g_configs;
|
config_t const* const* configs = g_configs;
|
||||||
|
|
||||||
|
int config_skip_data(config_t const* config, data_t const* data) {
|
||||||
|
return config->use_dictionary && !data_has_dict(data);
|
||||||
|
}
|
||||||
|
|
||||||
int config_get_level(config_t const* config) {
|
int config_get_level(config_t const* config) {
|
||||||
param_values_t const params = config->param_values;
|
param_values_t const params = config->param_values;
|
||||||
size_t i;
|
size_t i;
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
#define ZSTD_STATIC_LINKING_ONLY
|
#define ZSTD_STATIC_LINKING_ONLY
|
||||||
#include <zstd.h>
|
#include <zstd.h>
|
||||||
|
|
||||||
|
#include "data.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
ZSTD_cParameter param;
|
ZSTD_cParameter param;
|
||||||
unsigned value;
|
unsigned value;
|
||||||
@ -41,8 +43,25 @@ typedef struct {
|
|||||||
* the parameters will be derived from these.
|
* the parameters will be derived from these.
|
||||||
*/
|
*/
|
||||||
param_values_t param_values;
|
param_values_t param_values;
|
||||||
|
/**
|
||||||
|
* Boolean parameter that says if we should use a dictionary. If the data
|
||||||
|
* doesn't have a dictionary, this config is skipped. Defaults to no.
|
||||||
|
*/
|
||||||
|
int use_dictionary;
|
||||||
|
/**
|
||||||
|
* Boolean parameter that says if we should pass the pledged source size
|
||||||
|
* when the method allows it. Defaults to yes.
|
||||||
|
*/
|
||||||
|
int no_pledged_src_size;
|
||||||
} config_t;
|
} config_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the config should skip this data.
|
||||||
|
* For instance, if the config requires a dictionary but the data doesn't have
|
||||||
|
* one.
|
||||||
|
*/
|
||||||
|
int config_skip_data(config_t const* config, data_t const* data);
|
||||||
|
|
||||||
#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1)
|
#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1)
|
||||||
/**
|
/**
|
||||||
* Returns the compression level specified by the config, or CONFIG_NO_LEVEL if
|
* Returns the compression level specified by the config, or CONFIG_NO_LEVEL if
|
||||||
|
@ -32,27 +32,58 @@
|
|||||||
"https://github.com/facebook/zstd/releases/download/regression-data/" x
|
"https://github.com/facebook/zstd/releases/download/regression-data/" x
|
||||||
|
|
||||||
data_t silesia = {
|
data_t silesia = {
|
||||||
.url = REGRESSION_RELEASE("silesia.tar.zst"),
|
|
||||||
.name = "silesia",
|
.name = "silesia",
|
||||||
.type = data_type_dir,
|
.type = data_type_dir,
|
||||||
.xxhash64 = 0x67558ee5506918b4LL,
|
.data =
|
||||||
|
{
|
||||||
|
.url = REGRESSION_RELEASE("silesia.tar.zst"),
|
||||||
|
.xxhash64 = 0x48a199f92f93e977LL,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
data_t silesia_tar = {
|
data_t silesia_tar = {
|
||||||
.url = REGRESSION_RELEASE("silesia.tar.zst"),
|
|
||||||
.name = "silesia.tar",
|
.name = "silesia.tar",
|
||||||
.type = data_type_file,
|
.type = data_type_file,
|
||||||
.xxhash64 = 0x67558ee5506918b4LL,
|
.data =
|
||||||
|
{
|
||||||
|
.url = REGRESSION_RELEASE("silesia.tar.zst"),
|
||||||
|
.xxhash64 = 0x48a199f92f93e977LL,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
data_t github = {
|
||||||
|
.name = "github",
|
||||||
|
.type = data_type_dir,
|
||||||
|
.data =
|
||||||
|
{
|
||||||
|
.url = REGRESSION_RELEASE("github.tar.zst"),
|
||||||
|
.xxhash64 = 0xa9b1b44b020df292LL,
|
||||||
|
},
|
||||||
|
.dict =
|
||||||
|
{
|
||||||
|
.url = REGRESSION_RELEASE("github.dict.zst"),
|
||||||
|
.xxhash64 = 0x1eddc6f737d3cb53LL,
|
||||||
|
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static data_t* g_data[] = {
|
static data_t* g_data[] = {
|
||||||
&silesia,
|
&silesia,
|
||||||
&silesia_tar,
|
&silesia_tar,
|
||||||
|
&github,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
data_t const* const* data = (data_t const* const*)g_data;
|
data_t const* const* data = (data_t const* const*)g_data;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* data helpers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int data_has_dict(data_t const* data) {
|
||||||
|
return data->dict.url != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* data buffer helper functions (documented in header).
|
* data buffer helper functions (documented in header).
|
||||||
*/
|
*/
|
||||||
@ -100,16 +131,24 @@ err:
|
|||||||
free(buffer.data);
|
free(buffer.data);
|
||||||
memset(&buffer, 0, sizeof(buffer));
|
memset(&buffer, 0, sizeof(buffer));
|
||||||
return buffer;
|
return buffer;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
data_buffer_t data_buffer_get(data_t const* data) {
|
data_buffer_t data_buffer_get_data(data_t const* data) {
|
||||||
data_buffer_t const kEmptyBuffer = {};
|
data_buffer_t const kEmptyBuffer = {};
|
||||||
|
|
||||||
if (data->type != data_type_file)
|
if (data->type != data_type_file)
|
||||||
return kEmptyBuffer;
|
return kEmptyBuffer;
|
||||||
|
|
||||||
return data_buffer_read(data->path);
|
return data_buffer_read(data->data.path);
|
||||||
|
}
|
||||||
|
|
||||||
|
data_buffer_t data_buffer_get_dict(data_t const* data) {
|
||||||
|
data_buffer_t const kEmptyBuffer = {};
|
||||||
|
|
||||||
|
if (!data_has_dict(data))
|
||||||
|
return kEmptyBuffer;
|
||||||
|
|
||||||
|
return data_buffer_read(data->dict.path);
|
||||||
}
|
}
|
||||||
|
|
||||||
int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
|
int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
|
||||||
@ -124,13 +163,69 @@ int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
|
|||||||
return 0;
|
return 0;
|
||||||
assert(buffer1.size > buffer2.size);
|
assert(buffer1.size > buffer2.size);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void data_buffer_free(data_buffer_t buffer) {
|
void data_buffer_free(data_buffer_t buffer) {
|
||||||
free(buffer.data);
|
free(buffer.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* data filenames helpers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
data_filenames_t data_filenames_get(data_t const* data) {
|
||||||
|
data_filenames_t filenames = {.buffer = NULL, .size = 0};
|
||||||
|
char const* path = data->data.path;
|
||||||
|
|
||||||
|
filenames.filenames = UTIL_createFileList(
|
||||||
|
&path,
|
||||||
|
1,
|
||||||
|
&filenames.buffer,
|
||||||
|
&filenames.size,
|
||||||
|
/* followLinks */ 0);
|
||||||
|
return filenames;
|
||||||
|
}
|
||||||
|
|
||||||
|
void data_filenames_free(data_filenames_t filenames) {
|
||||||
|
UTIL_freeFileList(filenames.filenames, filenames.buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* data buffers helpers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
data_buffers_t data_buffers_get(data_t const* data) {
|
||||||
|
data_buffers_t buffers = {.size = 0};
|
||||||
|
data_filenames_t filenames = data_filenames_get(data);
|
||||||
|
if (filenames.size == 0)
|
||||||
|
return buffers;
|
||||||
|
|
||||||
|
data_buffer_t* buffersPtr =
|
||||||
|
(data_buffer_t*)malloc(filenames.size * sizeof(data_buffer_t));
|
||||||
|
if (buffersPtr == NULL)
|
||||||
|
return buffers;
|
||||||
|
buffers.buffers = (data_buffer_t const*)buffersPtr;
|
||||||
|
buffers.size = filenames.size;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < filenames.size; ++i) {
|
||||||
|
buffersPtr[i] = data_buffer_read(filenames.filenames[i]);
|
||||||
|
if (buffersPtr[i].data == NULL) {
|
||||||
|
data_buffers_t const kEmptyBuffer = {};
|
||||||
|
data_buffers_free(buffers);
|
||||||
|
return kEmptyBuffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffers;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Frees the data buffers.
|
||||||
|
*/
|
||||||
|
void data_buffers_free(data_buffers_t buffers) {
|
||||||
|
free((data_buffer_t*)buffers.buffers);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialization and download functions.
|
* Initialization and download functions.
|
||||||
*/
|
*/
|
||||||
@ -174,18 +269,23 @@ out:
|
|||||||
static char* cat3(char const* str1, char const* str2, char const* str3) {
|
static char* cat3(char const* str1, char const* str2, char const* str3) {
|
||||||
size_t const size1 = strlen(str1);
|
size_t const size1 = strlen(str1);
|
||||||
size_t const size2 = strlen(str2);
|
size_t const size2 = strlen(str2);
|
||||||
size_t const size3 = strlen(str3);
|
size_t const size3 = str3 == NULL ? 0 : strlen(str3);
|
||||||
size_t const size = size1 + size2 + size3 + 1;
|
size_t const size = size1 + size2 + size3 + 1;
|
||||||
char* const dst = (char*)malloc(size);
|
char* const dst = (char*)malloc(size);
|
||||||
if (dst == NULL)
|
if (dst == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
strcpy(dst, str1);
|
strcpy(dst, str1);
|
||||||
strcpy(dst + size1, str2);
|
strcpy(dst + size1, str2);
|
||||||
strcpy(dst + size1 + size2, str3);
|
if (str3 != NULL)
|
||||||
|
strcpy(dst + size1 + size2, str3);
|
||||||
assert(strlen(dst) == size1 + size2 + size3);
|
assert(strlen(dst) == size1 + size2 + size3);
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char* cat2(char const* str1, char const* str2) {
|
||||||
|
return cat3(str1, str2, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* State needed by the curl callback.
|
* State needed by the curl callback.
|
||||||
* It takes data from curl, hashes it, and writes it to the file.
|
* It takes data from curl, hashes it, and writes it to the file.
|
||||||
@ -197,16 +297,18 @@ typedef struct {
|
|||||||
} curl_data_t;
|
} curl_data_t;
|
||||||
|
|
||||||
/** Create the curl state. */
|
/** Create the curl state. */
|
||||||
static curl_data_t curl_data_create(data_t const* data) {
|
static curl_data_t curl_data_create(
|
||||||
|
data_resource_t const* resource,
|
||||||
|
data_type_t type) {
|
||||||
curl_data_t cdata = {};
|
curl_data_t cdata = {};
|
||||||
|
|
||||||
XXH64_reset(&cdata.xxhash64, 0);
|
XXH64_reset(&cdata.xxhash64, 0);
|
||||||
|
|
||||||
assert(UTIL_isDirectory(g_data_dir));
|
assert(UTIL_isDirectory(g_data_dir));
|
||||||
|
|
||||||
if (data->type == data_type_file) {
|
if (type == data_type_file) {
|
||||||
/* Decompress the resource and store to the path. */
|
/* Decompress the resource and store to the path. */
|
||||||
char* cmd = cat3("zstd -dqfo '", data->path, "'");
|
char* cmd = cat3("zstd -dqfo '", resource->path, "'");
|
||||||
if (cmd == NULL) {
|
if (cmd == NULL) {
|
||||||
cdata.error = ENOMEM;
|
cdata.error = ENOMEM;
|
||||||
return cdata;
|
return cdata;
|
||||||
@ -243,54 +345,68 @@ static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
|
|||||||
return written;
|
return written;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Download a single data object. */
|
static int curl_download_resource(
|
||||||
static int curl_download_datum(CURL* curl, data_t const* data) {
|
CURL* curl,
|
||||||
curl_data_t cdata = curl_data_create(data);
|
data_resource_t const* resource,
|
||||||
int err = EFAULT;
|
data_type_t type) {
|
||||||
|
curl_data_t cdata;
|
||||||
if (cdata.error != 0) {
|
|
||||||
err = cdata.error;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Download the data. */
|
/* Download the data. */
|
||||||
if (curl_easy_setopt(curl, CURLOPT_URL, data->url) != 0)
|
if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0)
|
||||||
goto out;
|
return EINVAL;
|
||||||
if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
|
if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
|
||||||
goto out;
|
return EINVAL;
|
||||||
if (curl_easy_perform(curl) != 0) {
|
cdata = curl_data_create(resource, type);
|
||||||
fprintf(stderr, "downloading '%s' failed\n", data->url);
|
if (cdata.error != 0)
|
||||||
goto out;
|
return cdata.error;
|
||||||
}
|
int const curl_err = curl_easy_perform(curl);
|
||||||
/* check that the file exists. */
|
int const close_err = curl_data_free(cdata);
|
||||||
if (data->type == data_type_file && !UTIL_isRegularFile(data->path)) {
|
if (curl_err) {
|
||||||
fprintf(stderr, "output file '%s' does not exist\n", data->path);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (data->type == data_type_dir && !UTIL_isDirectory(data->path)) {
|
|
||||||
fprintf(stderr, "output directory '%s' does not exist\n", data->path);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
/* Check that the hash matches. */
|
|
||||||
if (XXH64_digest(&cdata.xxhash64) != data->xxhash64) {
|
|
||||||
fprintf(
|
fprintf(
|
||||||
stderr,
|
stderr,
|
||||||
"checksum does not match: %llx != %llx\n",
|
"downloading '%s' for '%s' failed\n",
|
||||||
|
resource->url,
|
||||||
|
resource->path);
|
||||||
|
return EIO;
|
||||||
|
}
|
||||||
|
if (close_err) {
|
||||||
|
fprintf(stderr, "writing data to '%s' failed\n", resource->path);
|
||||||
|
return EIO;
|
||||||
|
}
|
||||||
|
/* check that the file exists. */
|
||||||
|
if (type == data_type_file && !UTIL_isRegularFile(resource->path)) {
|
||||||
|
fprintf(stderr, "output file '%s' does not exist\n", resource->path);
|
||||||
|
return EIO;
|
||||||
|
}
|
||||||
|
if (type == data_type_dir && !UTIL_isDirectory(resource->path)) {
|
||||||
|
fprintf(
|
||||||
|
stderr, "output directory '%s' does not exist\n", resource->path);
|
||||||
|
return EIO;
|
||||||
|
}
|
||||||
|
/* Check that the hash matches. */
|
||||||
|
if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) {
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"checksum does not match: 0x%llxLL != 0x%llxLL\n",
|
||||||
(unsigned long long)XXH64_digest(&cdata.xxhash64),
|
(unsigned long long)XXH64_digest(&cdata.xxhash64),
|
||||||
(unsigned long long)data->xxhash64);
|
(unsigned long long)resource->xxhash64);
|
||||||
goto out;
|
return EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = 0;
|
return 0;
|
||||||
out:
|
}
|
||||||
if (err != 0)
|
|
||||||
fprintf(stderr, "downloading '%s' failed\n", data->name);
|
/** Download a single data object. */
|
||||||
int const close_err = curl_data_free(cdata);
|
static int curl_download_datum(CURL* curl, data_t const* data) {
|
||||||
if (close_err != 0 && err == 0) {
|
int ret;
|
||||||
fprintf(stderr, "failed to write data for '%s'\n", data->name);
|
ret = curl_download_resource(curl, &data->data, data->type);
|
||||||
err = close_err;
|
if (ret != 0)
|
||||||
|
return ret;
|
||||||
|
if (data_has_dict(data)) {
|
||||||
|
ret = curl_download_resource(curl, &data->dict, data_type_file);
|
||||||
|
if (ret != 0)
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
return err;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Download all the data. */
|
/** Download all the data. */
|
||||||
@ -331,9 +447,14 @@ static int data_create_paths(data_t* const* data, char const* dir) {
|
|||||||
assert(data != NULL);
|
assert(data != NULL);
|
||||||
for (; *data != NULL; ++data) {
|
for (; *data != NULL; ++data) {
|
||||||
data_t* const datum = *data;
|
data_t* const datum = *data;
|
||||||
datum->path = cat3(dir, "/", datum->name);
|
datum->data.path = cat3(dir, "/", datum->name);
|
||||||
if (datum->path == NULL)
|
if (datum->data.path == NULL)
|
||||||
return ENOMEM;
|
return ENOMEM;
|
||||||
|
if (data_has_dict(datum)) {
|
||||||
|
datum->dict.path = cat2(datum->data.path, ".dict");
|
||||||
|
if (datum->dict.path == NULL)
|
||||||
|
return ENOMEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -343,8 +464,10 @@ static void data_free_paths(data_t* const* data) {
|
|||||||
assert(data != NULL);
|
assert(data != NULL);
|
||||||
for (; *data != NULL; ++data) {
|
for (; *data != NULL; ++data) {
|
||||||
data_t* datum = *data;
|
data_t* datum = *data;
|
||||||
free((void*)datum->path);
|
free((void*)datum->data.path);
|
||||||
datum->path = NULL;
|
free((void*)datum->dict.path);
|
||||||
|
datum->data.path = NULL;
|
||||||
|
datum->dict.path = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -367,7 +490,8 @@ static uint64_t stamp_hash(data_t const* const* data) {
|
|||||||
/* We don't care about the URL that we fetch from. */
|
/* We don't care about the URL that we fetch from. */
|
||||||
/* The path is derived from the name. */
|
/* The path is derived from the name. */
|
||||||
XXH64_update(&state, datum->name, strlen(datum->name));
|
XXH64_update(&state, datum->name, strlen(datum->name));
|
||||||
xxh_update_le(&state, datum->xxhash64);
|
xxh_update_le(&state, datum->data.xxhash64);
|
||||||
|
xxh_update_le(&state, datum->dict.xxhash64);
|
||||||
xxh_update_le(&state, datum->type);
|
xxh_update_le(&state, datum->type);
|
||||||
}
|
}
|
||||||
return XXH64_digest(&state);
|
return XXH64_digest(&state);
|
||||||
|
@ -22,10 +22,14 @@ typedef enum {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
char const* url; /**< Where to get this resource. */
|
char const* url; /**< Where to get this resource. */
|
||||||
uint64_t xxhash64; /**< Hash of the url contents. */
|
uint64_t xxhash64; /**< Hash of the url contents. */
|
||||||
char const* name; /**< The logical name of the resource (no extension). */
|
|
||||||
data_type_t type; /**< The type of this resource. */
|
|
||||||
char const* path; /**< The path of the unpacked resource (derived). */
|
char const* path; /**< The path of the unpacked resource (derived). */
|
||||||
size_t size;
|
} data_resource_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
data_resource_t data;
|
||||||
|
data_resource_t dict;
|
||||||
|
data_type_t type; /**< The type of the data. */
|
||||||
|
char const* name; /**< The logical name of the data (no extension). */
|
||||||
} data_t;
|
} data_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -33,6 +37,9 @@ typedef struct {
|
|||||||
*/
|
*/
|
||||||
extern data_t const* const* data;
|
extern data_t const* const* data;
|
||||||
|
|
||||||
|
|
||||||
|
int data_has_dict(data_t const* data);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the data module and downloads the data necessary.
|
* Initializes the data module and downloads the data necessary.
|
||||||
* Caches the downloads in dir. We add a stamp file in the directory after
|
* Caches the downloads in dir. We add a stamp file in the directory after
|
||||||
@ -62,7 +69,14 @@ typedef struct {
|
|||||||
*
|
*
|
||||||
* @returns The buffer, which is NULL on failure.
|
* @returns The buffer, which is NULL on failure.
|
||||||
*/
|
*/
|
||||||
data_buffer_t data_buffer_get(data_t const* data);
|
data_buffer_t data_buffer_get_data(data_t const* data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the dictionary that the data points to into a buffer.
|
||||||
|
*
|
||||||
|
* @returns The buffer, which is NULL on failure.
|
||||||
|
*/
|
||||||
|
data_buffer_t data_buffer_get_dict(data_t const* data);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read the contents of filename into a buffer.
|
* Read the contents of filename into a buffer.
|
||||||
@ -88,5 +102,39 @@ int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2);
|
|||||||
*/
|
*/
|
||||||
void data_buffer_free(data_buffer_t buffer);
|
void data_buffer_free(data_buffer_t buffer);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char* buffer;
|
||||||
|
char const** filenames;
|
||||||
|
unsigned size;
|
||||||
|
} data_filenames_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a recursive list of filenames in the data object. If it is a file, it
|
||||||
|
* will only contain one entry. If it is a directory, it will recursively walk
|
||||||
|
* the directory.
|
||||||
|
*
|
||||||
|
* @returns The list of filenames, which has size 0 and NULL pointers on error.
|
||||||
|
*/
|
||||||
|
data_filenames_t data_filenames_get(data_t const* data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Frees the filenames table.
|
||||||
|
*/
|
||||||
|
void data_filenames_free(data_filenames_t filenames);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
data_buffer_t const* buffers;
|
||||||
|
size_t size;
|
||||||
|
} data_buffers_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns a list of buffers for every file in data. It is zero sized on error.
|
||||||
|
*/
|
||||||
|
data_buffers_t data_buffers_get(data_t const* data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Frees the data buffers.
|
||||||
|
*/
|
||||||
|
void data_buffers_free(data_buffers_t buffers);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -26,62 +26,88 @@ void method_set_zstdcli(char const* zstdcli) {
|
|||||||
* the given name, member.
|
* the given name, member.
|
||||||
*
|
*
|
||||||
* method_state_t* base = ...;
|
* method_state_t* base = ...;
|
||||||
* simple_state_t* state = container_of(base, simple_state_t, base);
|
* buffer_state_t* state = container_of(base, buffer_state_t, base);
|
||||||
*/
|
*/
|
||||||
#define container_of(ptr, type, member) \
|
#define container_of(ptr, type, member) \
|
||||||
((type*)(char*)(ptr)-offsetof(type, member))
|
((type*)(ptr == NULL ? NULL : (char*)(ptr)-offsetof(type, member)))
|
||||||
|
|
||||||
/** State to reuse the same buffers between compression calls. */
|
/** State to reuse the same buffers between compression calls. */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
method_state_t base;
|
method_state_t base;
|
||||||
data_buffer_t buffer; /**< The constant input data buffer. */
|
data_buffers_t inputs; /**< The input buffer for each file. */
|
||||||
data_buffer_t compressed; /**< The compressed data buffer. */
|
data_buffer_t compressed; /**< The compressed data buffer. */
|
||||||
data_buffer_t decompressed; /**< The decompressed data buffer. */
|
data_buffer_t decompressed; /**< The decompressed data buffer. */
|
||||||
} simple_state_t;
|
} buffer_state_t;
|
||||||
|
|
||||||
static method_state_t* simple_create(data_t const* data) {
|
static size_t buffers_max_size(data_buffers_t buffers) {
|
||||||
simple_state_t* state = (simple_state_t*)calloc(1, sizeof(simple_state_t));
|
size_t max = 0;
|
||||||
|
for (size_t i = 0; i < buffers.size; ++i) {
|
||||||
|
if (buffers.buffers[i].size > max)
|
||||||
|
max = buffers.buffers[i].size;
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
static method_state_t* buffer_state_create(data_t const* data) {
|
||||||
|
buffer_state_t* state = (buffer_state_t*)calloc(1, sizeof(buffer_state_t));
|
||||||
if (state == NULL)
|
if (state == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
state->base.data = data;
|
state->base.data = data;
|
||||||
state->buffer = data_buffer_get(data);
|
state->inputs = data_buffers_get(data);
|
||||||
state->compressed =
|
size_t const max_size = buffers_max_size(state->inputs);
|
||||||
data_buffer_create(ZSTD_compressBound(state->buffer.size));
|
state->compressed = data_buffer_create(ZSTD_compressBound(max_size));
|
||||||
state->decompressed = data_buffer_create(state->buffer.size);
|
state->decompressed = data_buffer_create(max_size);
|
||||||
return &state->base;
|
return &state->base;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void simple_destroy(method_state_t* base) {
|
static void buffer_state_destroy(method_state_t* base) {
|
||||||
if (base == NULL)
|
if (base == NULL)
|
||||||
return;
|
return;
|
||||||
simple_state_t* state = container_of(base, simple_state_t, base);
|
buffer_state_t* state = container_of(base, buffer_state_t, base);
|
||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static result_t simple_compress(method_state_t* base, config_t const* config) {
|
static int buffer_state_bad(buffer_state_t const* state) {
|
||||||
if (base == NULL)
|
if (state == NULL) {
|
||||||
return result_error(result_error_system_error);
|
fprintf(stderr, "buffer_state_t is NULL\n");
|
||||||
simple_state_t* state = container_of(base, simple_state_t, base);
|
return 1;
|
||||||
|
}
|
||||||
|
if (state->inputs.size == 0 || state->compressed.data == NULL ||
|
||||||
|
state->decompressed.data == NULL) {
|
||||||
|
fprintf(stderr, "buffer state allocation failure\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static result_t simple_compress(method_state_t* base, config_t const* config) {
|
||||||
|
buffer_state_t* state = container_of(base, buffer_state_t, base);
|
||||||
|
|
||||||
|
if (buffer_state_bad(state))
|
||||||
|
return result_error(result_error_system_error);
|
||||||
|
|
||||||
|
/* Keep the tests short by skipping directories, since behavior shouldn't
|
||||||
|
* change.
|
||||||
|
*/
|
||||||
if (base->data->type != data_type_file)
|
if (base->data->type != data_type_file)
|
||||||
return result_error(result_error_skip);
|
return result_error(result_error_skip);
|
||||||
|
|
||||||
if (state->buffer.data == NULL || state->compressed.data == NULL ||
|
if (config->use_dictionary || config->no_pledged_src_size)
|
||||||
state->decompressed.data == NULL) {
|
return result_error(result_error_skip);
|
||||||
return result_error(result_error_system_error);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If the config doesn't specify a level, skip. */
|
/* If the config doesn't specify a level, skip. */
|
||||||
int const level = config_get_level(config);
|
int const level = config_get_level(config);
|
||||||
if (level == CONFIG_NO_LEVEL)
|
if (level == CONFIG_NO_LEVEL)
|
||||||
return result_error(result_error_skip);
|
return result_error(result_error_skip);
|
||||||
|
|
||||||
|
data_buffer_t const input = state->inputs.buffers[0];
|
||||||
|
|
||||||
/* Compress, decompress, and check the result. */
|
/* Compress, decompress, and check the result. */
|
||||||
state->compressed.size = ZSTD_compress(
|
state->compressed.size = ZSTD_compress(
|
||||||
state->compressed.data,
|
state->compressed.data,
|
||||||
state->compressed.capacity,
|
state->compressed.capacity,
|
||||||
state->buffer.data,
|
input.data,
|
||||||
state->buffer.size,
|
input.size,
|
||||||
level);
|
level);
|
||||||
if (ZSTD_isError(state->compressed.size))
|
if (ZSTD_isError(state->compressed.size))
|
||||||
return result_error(result_error_compression_error);
|
return result_error(result_error_compression_error);
|
||||||
@ -93,7 +119,7 @@ static result_t simple_compress(method_state_t* base, config_t const* config) {
|
|||||||
state->compressed.size);
|
state->compressed.size);
|
||||||
if (ZSTD_isError(state->decompressed.size))
|
if (ZSTD_isError(state->decompressed.size))
|
||||||
return result_error(result_error_decompression_error);
|
return result_error(result_error_decompression_error);
|
||||||
if (data_buffer_compare(state->buffer, state->decompressed))
|
if (data_buffer_compare(input, state->decompressed))
|
||||||
return result_error(result_error_round_trip_error);
|
return result_error(result_error_round_trip_error);
|
||||||
|
|
||||||
result_data_t data;
|
result_data_t data;
|
||||||
@ -101,6 +127,70 @@ static result_t simple_compress(method_state_t* base, config_t const* config) {
|
|||||||
return result_data(data);
|
return result_data(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static result_t compress_cctx_compress(
|
||||||
|
method_state_t* base,
|
||||||
|
config_t const* config) {
|
||||||
|
buffer_state_t* state = container_of(base, buffer_state_t, base);
|
||||||
|
|
||||||
|
if (buffer_state_bad(state))
|
||||||
|
return result_error(result_error_system_error);
|
||||||
|
|
||||||
|
if (config->use_dictionary || config->no_pledged_src_size)
|
||||||
|
return result_error(result_error_skip);
|
||||||
|
|
||||||
|
if (base->data->type != data_type_dir)
|
||||||
|
return result_error(result_error_skip);
|
||||||
|
|
||||||
|
int const level = config_get_level(config);
|
||||||
|
if (level == CONFIG_NO_LEVEL)
|
||||||
|
return result_error(result_error_skip);
|
||||||
|
|
||||||
|
ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||||
|
if (cctx == NULL) {
|
||||||
|
fprintf(stderr, "ZSTD_createCCtx() failed\n");
|
||||||
|
return result_error(result_error_system_error);
|
||||||
|
}
|
||||||
|
|
||||||
|
result_t result;
|
||||||
|
result_data_t data = {.total_size = 0};
|
||||||
|
for (size_t i = 0; i < state->inputs.size; ++i) {
|
||||||
|
data_buffer_t const input = state->inputs.buffers[i];
|
||||||
|
|
||||||
|
state->compressed.size = ZSTD_compressCCtx(
|
||||||
|
cctx,
|
||||||
|
state->compressed.data,
|
||||||
|
state->compressed.capacity,
|
||||||
|
input.data,
|
||||||
|
input.size,
|
||||||
|
level);
|
||||||
|
if (ZSTD_isError(state->compressed.size)) {
|
||||||
|
result = result_error(result_error_compression_error);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
state->decompressed.size = ZSTD_decompress(
|
||||||
|
state->decompressed.data,
|
||||||
|
state->decompressed.capacity,
|
||||||
|
state->compressed.data,
|
||||||
|
state->compressed.size);
|
||||||
|
if (ZSTD_isError(state->decompressed.size)) {
|
||||||
|
result = result_error(result_error_decompression_error);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (data_buffer_compare(input, state->decompressed)) {
|
||||||
|
result = result_error(result_error_round_trip_error);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
data.total_size += state->compressed.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = result_data(data);
|
||||||
|
out:
|
||||||
|
ZSTD_freeCCtx(cctx);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/** Generic state creation function. */
|
/** Generic state creation function. */
|
||||||
static method_state_t* method_state_create(data_t const* data) {
|
static method_state_t* method_state_create(data_t const* data) {
|
||||||
method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t));
|
method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t));
|
||||||
@ -114,26 +204,32 @@ static void method_state_destroy(method_state_t* state) {
|
|||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_OUT 32
|
static result_t cli_compress(
|
||||||
|
|
||||||
static result_t cli_file_compress(
|
|
||||||
method_state_t* state,
|
method_state_t* state,
|
||||||
config_t const* config) {
|
config_t const* config) {
|
||||||
if (config->cli_args == NULL)
|
if (config->cli_args == NULL)
|
||||||
return result_error(result_error_skip);
|
return result_error(result_error_skip);
|
||||||
|
|
||||||
|
/* We don't support no pledged source size with directories. Too slow. */
|
||||||
|
if (state->data->type == data_type_dir && config->no_pledged_src_size)
|
||||||
|
return result_error(result_error_skip);
|
||||||
|
|
||||||
if (g_zstdcli == NULL)
|
if (g_zstdcli == NULL)
|
||||||
return result_error(result_error_system_error);
|
return result_error(result_error_system_error);
|
||||||
|
|
||||||
/* '<zstd>' -r <args> '<file/dir>' | wc -c */
|
/* '<zstd>' -cqr <args> [-D '<dict>'] '<file/dir>' */
|
||||||
char cmd[1024];
|
char cmd[1024];
|
||||||
size_t const cmd_size = snprintf(
|
size_t const cmd_size = snprintf(
|
||||||
cmd,
|
cmd,
|
||||||
sizeof(cmd),
|
sizeof(cmd),
|
||||||
"'%s' -cqr %s '%s' | wc -c",
|
"'%s' -cqr %s %s%s%s %s '%s'",
|
||||||
g_zstdcli,
|
g_zstdcli,
|
||||||
config->cli_args,
|
config->cli_args,
|
||||||
state->data->path);
|
config->use_dictionary ? "-D '" : "",
|
||||||
|
config->use_dictionary ? state->data->dict.path : "",
|
||||||
|
config->use_dictionary ? "'" : "",
|
||||||
|
config->no_pledged_src_size ? "<" : "",
|
||||||
|
state->data->data.path);
|
||||||
if (cmd_size >= sizeof(cmd)) {
|
if (cmd_size >= sizeof(cmd)) {
|
||||||
fprintf(stderr, "command too large: %s\n", cmd);
|
fprintf(stderr, "command too large: %s\n", cmd);
|
||||||
return result_error(result_error_system_error);
|
return result_error(result_error_system_error);
|
||||||
@ -144,42 +240,48 @@ static result_t cli_file_compress(
|
|||||||
return result_error(result_error_system_error);
|
return result_error(result_error_system_error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read the total compressed size. */
|
char out[4096];
|
||||||
char out[MAX_OUT + 1];
|
size_t total_size = 0;
|
||||||
size_t const out_size = fread(out, 1, MAX_OUT, zstd);
|
while (1) {
|
||||||
out[out_size] = '\0';
|
size_t const size = fread(out, 1, sizeof(out), zstd);
|
||||||
int const zstd_ret = pclose(zstd);
|
total_size += size;
|
||||||
if (zstd_ret != 0) {
|
if (size != sizeof(out))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ferror(zstd) || pclose(zstd) != 0) {
|
||||||
fprintf(stderr, "zstd failed with command: %s\n", cmd);
|
fprintf(stderr, "zstd failed with command: %s\n", cmd);
|
||||||
return result_error(result_error_compression_error);
|
return result_error(result_error_compression_error);
|
||||||
}
|
}
|
||||||
if (out_size == MAX_OUT) {
|
|
||||||
fprintf(stderr, "wc -c produced more bytes than expected: %s\n", out);
|
|
||||||
return result_error(result_error_system_error);
|
|
||||||
}
|
|
||||||
|
|
||||||
result_data_t data;
|
result_data_t const data = {.total_size = total_size};
|
||||||
data.total_size = atoll(out);
|
|
||||||
return result_data(data);
|
return result_data(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
method_t const simple = {
|
method_t const simple = {
|
||||||
.name = "simple",
|
.name = "ZSTD_compress",
|
||||||
.create = simple_create,
|
.create = buffer_state_create,
|
||||||
.compress = simple_compress,
|
.compress = simple_compress,
|
||||||
.destroy = simple_destroy,
|
.destroy = buffer_state_destroy,
|
||||||
};
|
};
|
||||||
|
|
||||||
method_t const cli_file = {
|
method_t const compress_cctx = {
|
||||||
.name = "cli file",
|
.name = "ZSTD_compressCCtx",
|
||||||
|
.create = buffer_state_create,
|
||||||
|
.compress = compress_cctx_compress,
|
||||||
|
.destroy = buffer_state_destroy,
|
||||||
|
};
|
||||||
|
|
||||||
|
method_t const cli = {
|
||||||
|
.name = "zstdcli",
|
||||||
.create = method_state_create,
|
.create = method_state_create,
|
||||||
.compress = cli_file_compress,
|
.compress = cli_compress,
|
||||||
.destroy = method_state_destroy,
|
.destroy = method_state_destroy,
|
||||||
};
|
};
|
||||||
|
|
||||||
static method_t const* g_methods[] = {
|
static method_t const* g_methods[] = {
|
||||||
&simple,
|
&simple,
|
||||||
&cli_file,
|
&compress_cctx,
|
||||||
|
&cli,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,43 +1,101 @@
|
|||||||
Data, Config, Method, Total compressed size
|
Data, Config, Method, Total compressed size
|
||||||
silesia.tar, level -5, simple, 106176430
|
This line is intentionally added to see how the nightly job reports failures
|
||||||
silesia.tar, level -3, simple, 98476550
|
silesia.tar, level -5, ZSTD_compress, 7160438
|
||||||
silesia.tar, level -1, simple, 87206767
|
silesia.tar, level -3, ZSTD_compress, 6789024
|
||||||
silesia.tar, level 0, simple, 66996953
|
silesia.tar, level -1, ZSTD_compress, 6195462
|
||||||
silesia.tar, level 1, simple, 73658303
|
silesia.tar, level 0, ZSTD_compress, 4875071
|
||||||
silesia.tar, level 3, simple, 66996953
|
silesia.tar, level 1, ZSTD_compress, 5339697
|
||||||
silesia.tar, level 4, simple, 65996020
|
silesia.tar, level 3, ZSTD_compress, 4875071
|
||||||
silesia.tar, level 5, simple, 64421326
|
silesia.tar, level 4, ZSTD_compress, 4813104
|
||||||
silesia.tar, level 6, simple, 62388673
|
silesia.tar, level 5, ZSTD_compress, 4726961
|
||||||
silesia.tar, level 7, simple, 61159525
|
silesia.tar, level 6, ZSTD_compress, 4654401
|
||||||
silesia.tar, level 9, simple, 60214921
|
silesia.tar, level 7, ZSTD_compress, 4591933
|
||||||
silesia.tar, level 13, simple, 58428642
|
silesia.tar, level 9, ZSTD_compress, 4554098
|
||||||
silesia.tar, level 16, simple, 56363759
|
silesia.tar, level 13, ZSTD_compress, 4503496
|
||||||
silesia.tar, level 19, simple, 53274173
|
silesia.tar, level 16, ZSTD_compress, 4387233
|
||||||
silesia, level -5, cli file, 106202112
|
silesia.tar, level 19, ZSTD_compress, 4283123
|
||||||
silesia, level -3, cli file, 98518660
|
silesia, level -5, ZSTD_compressCCtx, 7152294
|
||||||
silesia, level -1, cli file, 87226203
|
silesia, level -3, ZSTD_compressCCtx, 6789969
|
||||||
silesia, level 0, cli file, 67049190
|
silesia, level -1, ZSTD_compressCCtx, 6191548
|
||||||
silesia, level 1, cli file, 73676282
|
silesia, level 0, ZSTD_compressCCtx, 4862377
|
||||||
silesia, level 3, cli file, 67049190
|
silesia, level 1, ZSTD_compressCCtx, 5318036
|
||||||
silesia, level 4, cli file, 66090040
|
silesia, level 3, ZSTD_compressCCtx, 4862377
|
||||||
silesia, level 5, cli file, 64503721
|
silesia, level 4, ZSTD_compressCCtx, 4800629
|
||||||
silesia, level 6, cli file, 62446177
|
silesia, level 5, ZSTD_compressCCtx, 4715005
|
||||||
silesia, level 7, cli file, 61217029
|
silesia, level 6, ZSTD_compressCCtx, 4644055
|
||||||
silesia, level 9, cli file, 60282841
|
silesia, level 7, ZSTD_compressCCtx, 4581559
|
||||||
silesia, level 13, cli file, 58480658
|
silesia, level 9, ZSTD_compressCCtx, 4543862
|
||||||
silesia, level 16, cli file, 56414170
|
silesia, level 13, ZSTD_compressCCtx, 4493931
|
||||||
silesia, level 19, cli file, 53365292
|
silesia, level 16, ZSTD_compressCCtx, 4381885
|
||||||
silesia.tar, level -5, cli file, 106250113
|
silesia, level 19, ZSTD_compressCCtx, 4296899
|
||||||
silesia.tar, level -3, cli file, 98550747
|
github, level -5, ZSTD_compressCCtx, 232744
|
||||||
silesia.tar, level -1, cli file, 87227322
|
github, level -3, ZSTD_compressCCtx, 220611
|
||||||
silesia.tar, level 0, cli file, 67111168
|
github, level -1, ZSTD_compressCCtx, 176575
|
||||||
silesia.tar, level 1, cli file, 73694374
|
github, level 0, ZSTD_compressCCtx, 136397
|
||||||
silesia.tar, level 3, cli file, 67111168
|
github, level 1, ZSTD_compressCCtx, 143457
|
||||||
silesia.tar, level 4, cli file, 66154079
|
github, level 3, ZSTD_compressCCtx, 136397
|
||||||
silesia.tar, level 5, cli file, 64546998
|
github, level 4, ZSTD_compressCCtx, 136144
|
||||||
silesia.tar, level 6, cli file, 62458454
|
github, level 5, ZSTD_compressCCtx, 135106
|
||||||
silesia.tar, level 7, cli file, 61231085
|
github, level 6, ZSTD_compressCCtx, 135108
|
||||||
silesia.tar, level 9, cli file, 60310313
|
github, level 7, ZSTD_compressCCtx, 135108
|
||||||
silesia.tar, level 13, cli file, 58517476
|
github, level 9, ZSTD_compressCCtx, 135108
|
||||||
silesia.tar, level 16, cli file, 56448694
|
github, level 13, ZSTD_compressCCtx, 133741
|
||||||
silesia.tar, level 19, cli file, 53444920
|
github, level 16, ZSTD_compressCCtx, 133741
|
||||||
|
github, level 19, ZSTD_compressCCtx, 133717
|
||||||
|
silesia, level -5, zstdcli, 7152342
|
||||||
|
silesia, level -3, zstdcli, 6790021
|
||||||
|
silesia, level -1, zstdcli, 6191597
|
||||||
|
silesia, level 0, zstdcli, 4862425
|
||||||
|
silesia, level 1, zstdcli, 5318084
|
||||||
|
silesia, level 3, zstdcli, 4862425
|
||||||
|
silesia, level 4, zstdcli, 4800677
|
||||||
|
silesia, level 5, zstdcli, 4715053
|
||||||
|
silesia, level 6, zstdcli, 4644103
|
||||||
|
silesia, level 7, zstdcli, 4581607
|
||||||
|
silesia, level 9, zstdcli, 4543910
|
||||||
|
silesia, level 13, zstdcli, 4493979
|
||||||
|
silesia, level 16, zstdcli, 4381933
|
||||||
|
silesia, level 19, zstdcli, 4296947
|
||||||
|
silesia.tar, level -5, zstdcli, 7159586
|
||||||
|
silesia.tar, level -3, zstdcli, 6791018
|
||||||
|
silesia.tar, level -1, zstdcli, 6196283
|
||||||
|
silesia.tar, level 0, zstdcli, 4876730
|
||||||
|
silesia.tar, level 1, zstdcli, 5340312
|
||||||
|
silesia.tar, level 3, zstdcli, 4876730
|
||||||
|
silesia.tar, level 4, zstdcli, 4817723
|
||||||
|
silesia.tar, level 5, zstdcli, 4730389
|
||||||
|
silesia.tar, level 6, zstdcli, 4655708
|
||||||
|
silesia.tar, level 7, zstdcli, 4593407
|
||||||
|
silesia.tar, level 9, zstdcli, 4556135
|
||||||
|
silesia.tar, level 13, zstdcli, 4503500
|
||||||
|
silesia.tar, level 16, zstdcli, 4387237
|
||||||
|
silesia.tar, level 19, zstdcli, 4283127
|
||||||
|
silesia.tar, no source size, zstdcli, 4876726
|
||||||
|
github, level -5, zstdcli, 234744
|
||||||
|
github, level -5 with dict, zstdcli, 47528
|
||||||
|
github, level -3, zstdcli, 222611
|
||||||
|
github, level -3 with dict, zstdcli, 46394
|
||||||
|
github, level -1, zstdcli, 178575
|
||||||
|
github, level -1 with dict, zstdcli, 43401
|
||||||
|
github, level 0, zstdcli, 138397
|
||||||
|
github, level 0 with dict, zstdcli, 40316
|
||||||
|
github, level 1, zstdcli, 145457
|
||||||
|
github, level 1 with dict, zstdcli, 43242
|
||||||
|
github, level 3, zstdcli, 138397
|
||||||
|
github, level 3 with dict, zstdcli, 40316
|
||||||
|
github, level 4, zstdcli, 138144
|
||||||
|
github, level 4 with dict, zstdcli, 40292
|
||||||
|
github, level 5, zstdcli, 137106
|
||||||
|
github, level 5 with dict, zstdcli, 40938
|
||||||
|
github, level 6, zstdcli, 137108
|
||||||
|
github, level 6 with dict, zstdcli, 40632
|
||||||
|
github, level 7, zstdcli, 137108
|
||||||
|
github, level 7 with dict, zstdcli, 40766
|
||||||
|
github, level 9, zstdcli, 137108
|
||||||
|
github, level 9 with dict, zstdcli, 41326
|
||||||
|
github, level 13, zstdcli, 135741
|
||||||
|
github, level 13 with dict, zstdcli, 41670
|
||||||
|
github, level 16, zstdcli, 135741
|
||||||
|
github, level 16 with dict, zstdcli, 39940
|
||||||
|
github, level 19, zstdcli, 135717
|
||||||
|
github, level 19 with dict, zstdcli, 39576
|
||||||
|
|
@ -17,10 +17,15 @@
|
|||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "method.h"
|
#include "method.h"
|
||||||
|
|
||||||
/** Check if a name contains a comma. */
|
static int g_max_name_len = 0;
|
||||||
|
|
||||||
|
/** Check if a name contains a comma or is too long. */
|
||||||
static int is_name_bad(char const* name) {
|
static int is_name_bad(char const* name) {
|
||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
return 1;
|
return 1;
|
||||||
|
int const len = strlen(name);
|
||||||
|
if (len > g_max_name_len)
|
||||||
|
g_max_name_len = len;
|
||||||
for (; *name != '\0'; ++name)
|
for (; *name != '\0'; ++name)
|
||||||
if (*name == ',')
|
if (*name == ',')
|
||||||
return 1;
|
return 1;
|
||||||
@ -47,57 +52,6 @@ static int are_names_bad() {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Helper macro to print to stderr and a file. */
|
|
||||||
#define tprintf(file, ...) \
|
|
||||||
do { \
|
|
||||||
fprintf(file, __VA_ARGS__); \
|
|
||||||
fprintf(stderr, __VA_ARGS__); \
|
|
||||||
} while (0)
|
|
||||||
/** Helper macro to flush stderr and a file. */
|
|
||||||
#define tflush(file) \
|
|
||||||
do { \
|
|
||||||
fflush(file); \
|
|
||||||
fflush(stderr); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run all the regression tests and record the results table to results and
|
|
||||||
* stderr progressively.
|
|
||||||
*/
|
|
||||||
static int run_all(FILE* results) {
|
|
||||||
tprintf(results, "Data,\tConfig,\tMethod,\tTotal compressed size\n");
|
|
||||||
for (size_t method = 0; methods[method] != NULL; ++method) {
|
|
||||||
for (size_t datum = 0; data[datum] != NULL; ++datum) {
|
|
||||||
/* Create the state common to all configs */
|
|
||||||
method_state_t* state = methods[method]->create(data[datum]);
|
|
||||||
for (size_t config = 0; configs[config] != NULL; ++config) {
|
|
||||||
/* Print the result for the (method, data, config) tuple. */
|
|
||||||
result_t const result =
|
|
||||||
methods[method]->compress(state, configs[config]);
|
|
||||||
if (result_is_skip(result))
|
|
||||||
continue;
|
|
||||||
tprintf(
|
|
||||||
results,
|
|
||||||
"%s,\t%s,\t%s,\t",
|
|
||||||
data[datum]->name,
|
|
||||||
configs[config]->name,
|
|
||||||
methods[method]->name);
|
|
||||||
if (result_is_error(result)) {
|
|
||||||
tprintf(results, "%s\n", result_get_error_string(result));
|
|
||||||
} else {
|
|
||||||
tprintf(
|
|
||||||
results,
|
|
||||||
"%llu\n",
|
|
||||||
(unsigned long long)result_get_data(result).total_size);
|
|
||||||
}
|
|
||||||
tflush(results);
|
|
||||||
}
|
|
||||||
methods[method]->destroy(state);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Option parsing using getopt.
|
* Option parsing using getopt.
|
||||||
* When you add a new option update: long_options, long_extras, and
|
* When you add a new option update: long_options, long_extras, and
|
||||||
@ -109,6 +63,9 @@ static char const* g_output = NULL;
|
|||||||
static char const* g_diff = NULL;
|
static char const* g_diff = NULL;
|
||||||
static char const* g_cache = NULL;
|
static char const* g_cache = NULL;
|
||||||
static char const* g_zstdcli = NULL;
|
static char const* g_zstdcli = NULL;
|
||||||
|
static char const* g_config = NULL;
|
||||||
|
static char const* g_data = NULL;
|
||||||
|
static char const* g_method = NULL;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
required_option,
|
required_option,
|
||||||
@ -120,19 +77,22 @@ typedef enum {
|
|||||||
* Extra state that we need to keep per-option that we can't store in getopt.
|
* Extra state that we need to keep per-option that we can't store in getopt.
|
||||||
*/
|
*/
|
||||||
struct option_extra {
|
struct option_extra {
|
||||||
int id; /**< The short option name, used as an id. */
|
int id; /**< The short option name, used as an id. */
|
||||||
char const* help; /**< The help message. */
|
char const* help; /**< The help message. */
|
||||||
option_type opt_type; /**< The option type: required, optional, or help. */
|
option_type opt_type; /**< The option type: required, optional, or help. */
|
||||||
char const** value; /**< The value to set or NULL if no_argument. */
|
char const** value; /**< The value to set or NULL if no_argument. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/** The options. */
|
/** The options. */
|
||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
{"cache", required_argument, NULL, 'c'},
|
{"cache", required_argument, NULL, 'c'},
|
||||||
{"diff", required_argument, NULL, 'd'},
|
|
||||||
{"help", no_argument, NULL, 'h'},
|
|
||||||
{"output", required_argument, NULL, 'o'},
|
{"output", required_argument, NULL, 'o'},
|
||||||
{"zstd", required_argument, NULL, 'z'},
|
{"zstd", required_argument, NULL, 'z'},
|
||||||
|
{"config", required_argument, NULL, 128},
|
||||||
|
{"data", required_argument, NULL, 129},
|
||||||
|
{"method", required_argument, NULL, 130},
|
||||||
|
{"diff", required_argument, NULL, 'd'},
|
||||||
|
{"help", no_argument, NULL, 'h'},
|
||||||
};
|
};
|
||||||
|
|
||||||
static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
|
static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
|
||||||
@ -140,10 +100,13 @@ static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
|
|||||||
/** The extra info for the options. Must be in the same order as the options. */
|
/** The extra info for the options. Must be in the same order as the options. */
|
||||||
static struct option_extra long_extras[] = {
|
static struct option_extra long_extras[] = {
|
||||||
{'c', "the cache directory", required_option, &g_cache},
|
{'c', "the cache directory", required_option, &g_cache},
|
||||||
{'d', "compare the results to this file", optional_option, &g_diff},
|
|
||||||
{'h', "display this message", help_option, NULL},
|
|
||||||
{'o', "write the results here", required_option, &g_output},
|
{'o', "write the results here", required_option, &g_output},
|
||||||
{'z', "zstd cli tool", required_option, &g_zstdcli},
|
{'z', "zstd cli tool", required_option, &g_zstdcli},
|
||||||
|
{128, "use this config", optional_option, &g_config},
|
||||||
|
{129, "use this data", optional_option, &g_data},
|
||||||
|
{130, "use this method", optional_option, &g_method},
|
||||||
|
{'d', "compare the results to this file", optional_option, &g_diff},
|
||||||
|
{'h', "display this message", help_option, NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
/** The short options. Must correspond to the options. */
|
/** The short options. Must correspond to the options. */
|
||||||
@ -169,14 +132,24 @@ static void print_help(void) {
|
|||||||
fprintf(stderr, "regression test runner\n");
|
fprintf(stderr, "regression test runner\n");
|
||||||
size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
|
size_t const nargs = sizeof(long_options) / sizeof(long_options[0]);
|
||||||
for (size_t i = 0; i < nargs; ++i) {
|
for (size_t i = 0; i < nargs; ++i) {
|
||||||
/* Short / long - help [option type] */
|
if (long_options[i].val < 128) {
|
||||||
fprintf(
|
/* Long / short - help [option type] */
|
||||||
stderr,
|
fprintf(
|
||||||
"-%c / --%s \t- %s %s\n",
|
stderr,
|
||||||
long_options[i].val,
|
"--%s / -%c \t- %s %s\n",
|
||||||
long_options[i].name,
|
long_options[i].name,
|
||||||
long_extras[i].help,
|
long_options[i].val,
|
||||||
required_message(long_extras[i].opt_type));
|
long_extras[i].help,
|
||||||
|
required_message(long_extras[i].opt_type));
|
||||||
|
} else {
|
||||||
|
/* Short / long - help [option type] */
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"--%s \t- %s %s\n",
|
||||||
|
long_options[i].name,
|
||||||
|
long_extras[i].help,
|
||||||
|
required_message(long_extras[i].opt_type));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -220,8 +193,7 @@ static int parse_args(int argc, char** argv) {
|
|||||||
continue;
|
continue;
|
||||||
fprintf(
|
fprintf(
|
||||||
stderr,
|
stderr,
|
||||||
"-%c / --%s is a required argument but is not set\n",
|
"--%s is a required argument but is not set\n",
|
||||||
long_options[i].val,
|
|
||||||
long_options[i].name);
|
long_options[i].name);
|
||||||
bad = 1;
|
bad = 1;
|
||||||
}
|
}
|
||||||
@ -234,6 +206,88 @@ static int parse_args(int argc, char** argv) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Helper macro to print to stderr and a file. */
|
||||||
|
#define tprintf(file, ...) \
|
||||||
|
do { \
|
||||||
|
fprintf(file, __VA_ARGS__); \
|
||||||
|
fprintf(stderr, __VA_ARGS__); \
|
||||||
|
} while (0)
|
||||||
|
/** Helper macro to flush stderr and a file. */
|
||||||
|
#define tflush(file) \
|
||||||
|
do { \
|
||||||
|
fflush(file); \
|
||||||
|
fflush(stderr); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
void tprint_names(
|
||||||
|
FILE* results,
|
||||||
|
char const* data_name,
|
||||||
|
char const* config_name,
|
||||||
|
char const* method_name) {
|
||||||
|
int const data_padding = g_max_name_len - strlen(data_name);
|
||||||
|
int const config_padding = g_max_name_len - strlen(config_name);
|
||||||
|
int const method_padding = g_max_name_len - strlen(method_name);
|
||||||
|
|
||||||
|
tprintf(
|
||||||
|
results,
|
||||||
|
"%s, %*s%s, %*s%s, %*s",
|
||||||
|
data_name,
|
||||||
|
data_padding,
|
||||||
|
"",
|
||||||
|
config_name,
|
||||||
|
config_padding,
|
||||||
|
"",
|
||||||
|
method_name,
|
||||||
|
method_padding,
|
||||||
|
"");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run all the regression tests and record the results table to results and
|
||||||
|
* stderr progressively.
|
||||||
|
*/
|
||||||
|
static int run_all(FILE* results) {
|
||||||
|
tprint_names(results, "Data", "Config", "Method");
|
||||||
|
tprintf(results, "Total compressed size\n");
|
||||||
|
for (size_t method = 0; methods[method] != NULL; ++method) {
|
||||||
|
if (g_method != NULL && strcmp(methods[method]->name, g_method))
|
||||||
|
continue;
|
||||||
|
for (size_t datum = 0; data[datum] != NULL; ++datum) {
|
||||||
|
if (g_data != NULL && strcmp(data[datum]->name, g_data))
|
||||||
|
continue;
|
||||||
|
/* Create the state common to all configs */
|
||||||
|
method_state_t* state = methods[method]->create(data[datum]);
|
||||||
|
for (size_t config = 0; configs[config] != NULL; ++config) {
|
||||||
|
if (g_config != NULL && strcmp(configs[config]->name, g_config))
|
||||||
|
continue;
|
||||||
|
if (config_skip_data(configs[config], data[datum]))
|
||||||
|
continue;
|
||||||
|
/* Print the result for the (method, data, config) tuple. */
|
||||||
|
result_t const result =
|
||||||
|
methods[method]->compress(state, configs[config]);
|
||||||
|
if (result_is_skip(result))
|
||||||
|
continue;
|
||||||
|
tprint_names(
|
||||||
|
results,
|
||||||
|
data[datum]->name,
|
||||||
|
configs[config]->name,
|
||||||
|
methods[method]->name);
|
||||||
|
if (result_is_error(result)) {
|
||||||
|
tprintf(results, "%s\n", result_get_error_string(result));
|
||||||
|
} else {
|
||||||
|
tprintf(
|
||||||
|
results,
|
||||||
|
"%llu\n",
|
||||||
|
(unsigned long long)result_get_data(result).total_size);
|
||||||
|
}
|
||||||
|
tflush(results);
|
||||||
|
}
|
||||||
|
methods[method]->destroy(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/** memcmp() the old results file and the new results file. */
|
/** memcmp() the old results file and the new results file. */
|
||||||
static int diff_results(char const* actual_file, char const* expected_file) {
|
static int diff_results(char const* actual_file, char const* expected_file) {
|
||||||
data_buffer_t const actual = data_buffer_read(actual_file);
|
data_buffer_t const actual = data_buffer_read(actual_file);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user