Merge pull request #128 from Cyan4973/v051

V051
dev
Yann Collet 2016-02-17 17:18:28 +01:00
commit 0112ebf124
59 changed files with 4839 additions and 4859 deletions

6
.gitignore vendored
View File

@ -44,3 +44,9 @@ ipch/
# Other files
.directory
_codelite
_zstdbench
lib/zstd_opt_LZ5.c
lib/zstd_opt_llen.c
lib/zstd_opt_nollen.c

View File

@ -2,7 +2,6 @@ language: c
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq gcc-arm-linux-gnueabi
- sudo apt-get install -qq clang
- sudo apt-get install -qq g++-multilib
- sudo apt-get install -qq gcc-multilib
@ -13,7 +12,7 @@ env:
- ZSTD_TRAVIS_CI_ENV=cmaketest
- ZSTD_TRAVIS_CI_ENV=clangtest
- ZSTD_TRAVIS_CI_ENV=gpptest
- ZSTD_TRAVIS_CI_ENV=armtest
- ZSTD_TRAVIS_CI_ENV=armtest-w-install
- ZSTD_TRAVIS_CI_ENV=test
- ZSTD_TRAVIS_CI_ENV="-C programs test32"
- ZSTD_TRAVIS_CI_ENV="-C programs test-zstd_nolegacy"

View File

@ -1,6 +1,6 @@
# ################################################################
# zstd - Makefile
# Copyright (C) Yann Collet 2014-2015
# Copyright (C) Yann Collet 2014-2016
# All rights reserved.
#
# BSD license
@ -27,16 +27,14 @@
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# You can contact the author at :
# - zstd source repository : https://github.com/Cyan4973/zstd
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# - zstd homepage : http://www.zstd.net/
# ################################################################
# force a version number : uncomment below export (otherwise, default to the one declared into zstd.h)
#export VERSION := 0.4.6
#export VERSION := 0.5.1
PRGDIR = programs
ZSTDDIR = lib
DICTDIR = dictBuilder
# Define nul output
ifneq (,$(filter Windows%,$(OS)))
@ -52,7 +50,6 @@ default: zstdprogram
all:
$(MAKE) -C $(ZSTDDIR) $@
$(MAKE) -C $(PRGDIR) $@
$(MAKE) -C $(DICTDIR) $@
zstdprogram:
$(MAKE) -C $(PRGDIR)
@ -60,7 +57,6 @@ zstdprogram:
clean:
@$(MAKE) -C $(ZSTDDIR) $@ > $(VOID)
@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
@$(MAKE) -C $(DICTDIR) $@ > $(VOID)
@echo Cleaning completed
@ -81,7 +77,6 @@ travis-install:
test:
$(MAKE) -C $(PRGDIR) $@
$(MAKE) -C $(DICTDIR) $@
cmaketest:
cd contrib/cmake ; cmake . ; $(MAKE)
@ -94,8 +89,34 @@ gpptest: clean
$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
armtest: clean
$(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
$(MAKE) -C $(PRGDIR) CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static"
# $(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
$(MAKE) -C $(PRGDIR) datagen # use native, faster
$(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static"
# for Travis CI
arminstall: clean
sudo apt-get install -q qemu
sudo apt-get install -q binfmt-support
sudo apt-get install -q qemu-user-static
sudo apt-get install -q gcc-arm-linux-gnueabi
# for Travis CI
armtest-w-install: clean arminstall armtest
ppctest: clean
$(MAKE) -C $(PRGDIR) datagen # use native, faster
$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static"
# for Travis CI
ppcinstall: clean
sudo apt-get install -q qemu
sudo apt-get install -q binfmt-support
sudo apt-get install -q qemu-user-static
sudo apt-get update -q
sudo apt-get install -q gcc-powerpc-linux-gnu # unfortunately, doesn't work on Travis CI (package not available)
# for Travis CI
ppctest-w-install: clean ppcinstall ppctest
usan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined"

7
NEWS
View File

@ -1,3 +1,10 @@
v0.5.1
New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski
Changed : Dictionary builder integrated into libzstd and zstd cli
Changed : zstd cli now uses "multiple input files" as default mode
Fix : high compression modes for big-endian platforms
New : zstd cli : `-t` | `--test` command
v0.5.0
New : dictionary builder utility
Changed : streaming & dictionary API

View File

@ -16,7 +16,7 @@ As a reference, several fast compression algorithms were tested and compared to
|Name | Ratio | C.speed | D.speed |
|-----------------|-------|--------:|--------:|
| | | MB/s | MB/s |
|**zstd 0.4.7 -1**|**2.875**|**330**| **890** |
|**zstd 0.5.1 -1**|**2.876**|**330**| **890** |
| [zlib] 1.2.8 -1 | 2.730 | 95 | 360 |
| brotli -0 | 2.708 | 220 | 430 |
| QuickLZ 1.5 | 2.237 | 510 | 605 |
@ -40,33 +40,80 @@ Compression Speed vs Ratio | Decompression Speed
### The case for Small Data compression
The above chart is applicable to large files or large streams scenarios (200 MB in this case).
Above chart provides results applicable to large files or large streams scenarios (200 MB for this case).
Small data (< 64 KB) come with different perspectives.
The smaller the amount of data to compress, the more difficult it is to achieve any significant compression.
On reaching the 1 KB region, it becomes almost impossible to compress anything.
This problem is common to all compression algorithms, and throwing CPU power at it achieves no significant gains.
This problem is common to any compression algorithms, and throwing CPU power at it achieves little gains.
The reason is, compression algorithms learn from past data how to compress future data.
But at the beginning of a new file, there is no "past" to build upon.
[Starting with 0.5](https://github.com/Cyan4973/zstd/releases), Zstd now offers [a _Dictionary Builder_ tool](https://github.com/Cyan4973/zstd/tree/master/dictBuilder).
It can be used to train the algorithm to fit a selected type of data, by providing it with some samples.
The result is a file (or a byte buffer) called "dictionary", which can be loaded before compression and decompression.
By using this dictionary, the compression ratio achievable on small data improves dramatically :
To solve this situation, Zstd now offers a __training mode__,
which can be used to make the algorithm fit a selected type of data, by providing it with some samples.
The result of the training is a file called "dictionary", which can be loaded before compression and decompression.
Using this dictionary, the compression ratio achievable on small data improves dramatically :
| Collection Name | Direct compression | Dictionary Compression | Gains | Average unit | Range |
| --------------- | ------------------ | ---------------------- | ----- | ------------:| ----- |
| Small JSON records | x1.331 - x1.366 | x5.860 - x6.830 | ~ x4.7 | 300 | 200 - 400 |
| Mercurial events | x2.322 - x2.538 | x3.377 - x4.462 | ~ x1.5 | 1.5 KB | 20 - 200 KB |
| Large JSON docs | x3.813 - x4.043 | x8.935 - x13.366 | ~ x2.8 | 6 KB | 800 - 20 KB |
| Collection Name | Direct compression | Dictionary Compression | Gains | Average unit | Range |
| --------------- | ------------------ | ---------------------- | --------- | ------------:| ----- |
| Small JSON records | x1.331 - x1.366 | x5.860 - x6.830 | ~ __x4.7__ | 300 | 200 - 400 |
| Mercurial events | x2.322 - x2.538 | x3.377 - x4.462 | ~ __x1.5__ | 1.5 KB | 20 - 200 KB |
| Large JSON docs | x3.813 - x4.043 | x8.935 - x13.366 | ~ __x2.8__ | 6 KB | 800 - 20 KB |
It has to be noted that these compression gains are achieved without any speed loss, and even some faster decompression processing.
These compression gains are achieved without any speed loss, and prove in general a bit faster to compress and decompress.
Dictionary work if there is some correlation in a family of small data (there is no _universal dictionary_).
Hence, deploying one dictionary per type of data will provide the greater benefits.
Large documents will benefit proportionally less, since dictionary gains are mostly effective in the first few KB.
Then there is enough history to build upon, and the compression algorithm can rely on it to compress the rest of the file.
Then, the compression algorithm will rely more and more on already decoded content to compress the rest of the file.
#### Dictionary compression How To :
##### _Using the Command Line Utility_ :
1) Create the dictionary
`zstd --train FullPathToTrainingSet/* -o dictionaryName`
2) Compression with dictionary
`zstd FILE -D dictionaryName`
3) Decompress with dictionary
`zstd --decompress FILE.zst -D dictionaryName`
##### _Using API_ :
1) Create dictionary
```
#include "zdict.h"
(...)
/* Train a dictionary from a memory buffer `samplesBuffer`,
where `nbSamples` samples have been stored concatenated. */
size_t dictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity,
samplesBuffer, samplesSizes, nbSamples);
```
2) Compression with dictionary
```
#include "zstd.h"
(...)
ZSTD_CCtx* context = ZSTD_createCCtx();
size_t compressedSize = ZSTD_compress_usingDict(context, dst, dstCapacity, src, srcSize, dict, dictSize, compressionLevel);
```
3) Decompress with dictionary
```
#include "zstd.h"
(...)
ZSTD_DCtx* context = ZSTD_createDCtx();
size_t regeneratedSize = ZSTD_decompress_usingDict(context, dst, dstCapacity, cSrc, cSrcSize, dict, dictSize);
```
### Status

View File

@ -1,6 +1,6 @@
# ################################################################
# zstd - Makefile
# Copyright (C) Yann Collet 2014-2015
# Copyright (C) Yann Collet 2014-2016
# All rights reserved.
#
# BSD license

View File

@ -1,6 +1,6 @@
# ################################################################
# zstd - Makefile
# Copyright (C) Yann Collet 2014-2015
# Copyright (C) Yann Collet 2014-2016
# All rights reserved.
#
# BSD license
@ -27,8 +27,7 @@
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# You can contact the author at :
# - zstd source repository : https://github.com/Cyan4973/zstd
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# - zstd homepage : http://www.zstd.net/
# ################################################################
# Get library version based on information from input content (use regular exp)
@ -58,9 +57,11 @@ GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
MESSAGE("ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
SET(Sources
${LIBRARY_DIR}/divsufsort.c
${LIBRARY_DIR}/fse.c
${LIBRARY_DIR}/huff0.c
${LIBRARY_DIR}/zstd_buffered.c
${LIBRARY_DIR}/zbuff.c
${LIBRARY_DIR}/zdict.c
${LIBRARY_DIR}/zstd_compress.c
${LIBRARY_DIR}/zstd_decompress.c)
@ -73,8 +74,10 @@ SET(Headers
${LIBRARY_DIR}/huff0.h
${LIBRARY_DIR}/huff0_static.h
${LIBRARY_DIR}/mem.h
${LIBRARY_DIR}/zstd_buffered_static.h
${LIBRARY_DIR}/zstd_buffered.h
${LIBRARY_DIR}/zbuff.h
${LIBRARY_DIR}/zbuff_static.h
${LIBRARY_DIR}/zdict.h
${LIBRARY_DIR}/zdict_static.h
${LIBRARY_DIR}/zstd_internal.h
${LIBRARY_DIR}/zstd_static.h
${LIBRARY_DIR}/zstd.h)
@ -86,13 +89,15 @@ IF (ZSTD_LEGACY_SUPPORT)
SET(Sources ${Sources}
${LIBRARY_LEGACY_DIR}/zstd_v01.c
${LIBRARY_LEGACY_DIR}/zstd_v02.c
${LIBRARY_LEGACY_DIR}/zstd_v03.c)
${LIBRARY_LEGACY_DIR}/zstd_v03.c
${LIBRARY_LEGACY_DIR}/zstd_v04.c)
SET(Headers ${Headers}
${LIBRARY_LEGACY_DIR}/zstd_legacy.h
${LIBRARY_LEGACY_DIR}/zstd_v01.h
${LIBRARY_LEGACY_DIR}/zstd_v02.h
${LIBRARY_LEGACY_DIR}/zstd_v03.h)
${LIBRARY_LEGACY_DIR}/zstd_v03.h
${LIBRARY_LEGACY_DIR}/zstd_v04.h)
ENDIF (ZSTD_LEGACY_SUPPORT)
IF (MSVC)
@ -161,7 +166,7 @@ IF (UNIX)
SET(INSTALL_INCLUDE_DIR ${PREFIX}/include)
# install target
INSTALL(FILES ${LIBRARY_DIR}/zstd.h DESTINATION ${INSTALL_INCLUDE_DIR})
INSTALL(FILES ${LIBRARY_DIR}/zstd.h ${LIBRARY_DIR}/zstd_buffered.h ${LIBRARY_DIR}/dictBuilder.h DESTINATION ${INSTALL_INCLUDE_DIR})
INSTALL(TARGETS libzstd_static DESTINATION ${INSTALL_LIBRARY_DIR})
INSTALL(TARGETS libzstd_shared LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR})

View File

@ -1,6 +1,6 @@
# ################################################################
# zstd - Makefile
# Copyright (C) Yann Collet 2014-2015
# Copyright (C) Yann Collet 2014-2016
# All rights reserved.
#
# BSD license
@ -27,8 +27,7 @@
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# You can contact the author at :
# - zstd source repository : https://github.com/Cyan4973/zstd
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# - zstd homepage : http://www.zstd.net/
# ################################################################
PROJECT(programs)
@ -59,7 +58,7 @@ IF (ZSTD_LEGACY_SUPPORT)
SET(ZSTD_FILEIO_LEGACY ${PROGRAMS_LEGACY_DIR}/fileio_legacy.c)
ENDIF (ZSTD_LEGACY_SUPPORT)
ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/datagen.c ${ZSTD_FILEIO_LEGACY})
ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${ZSTD_FILEIO_LEGACY})
TARGET_LINK_LIBRARIES(zstd libzstd_static)
ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/fullbench.c)
@ -69,9 +68,9 @@ ADD_EXECUTABLE(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/xxhash.c ${PROGR
TARGET_LINK_LIBRARIES(fuzzer libzstd_static)
IF (UNIX)
ADD_EXECUTABLE(zstd-noBench ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${ZSTD_FILEIO_LEGACY})
TARGET_LINK_LIBRARIES(zstd-noBench libzstd_static)
SET_TARGET_PROPERTIES(zstd-noBench PROPERTIES COMPILE_DEFINITIONS "ZSTD_NOBENCH")
ADD_EXECUTABLE(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c)
TARGET_LINK_LIBRARIES(zstd-frugal libzstd_static)
SET_TARGET_PROPERTIES(zstd-frugal PROPERTIES COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT")
ADD_EXECUTABLE(zbufftest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/zbufftest.c)
TARGET_LINK_LIBRARIES(zbufftest libzstd_static)

View File

@ -1,339 +0,0 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

View File

@ -1,69 +0,0 @@
# ##########################################################################
# Dict Builder - Makefile
# Copyright (C) Yann Collet 2015
#
# GPL v2 License
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# You can contact the author at :
# - ZSTD source repository : http://code.google.com/p/zstd/
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# ##########################################################################
CPPFLAGS= -I../lib
CFLAGS ?= -O3
CFLAGS += -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -Wcast-align -Wundef -Wstrict-prototypes -Wstrict-aliasing=1
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
ZSTDDIR = ../lib
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
EXT =.exe
VOID = nul
else
EXT =
VOID = /dev/null
endif
.PHONY: default all test
default: dictBuilder
all: dictBuilder
dictBuilder: dictBuilder.c dibcli.c divsufsort.c sssort.c trsort.c $(ZSTDDIR)/huff0.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/zstd_decompress.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
clean:
@rm -f core *.o tmp* result* *.gcda \
dictBuilder$(EXT)
@echo Cleaning completed
test: dictBuilder
./dictBuilder *
@rm dictionary
clangtest: CC = clang
clangtest: CFLAGS += -Werror
clangtest: clean dictBuilder
gpptest: CC = g++
gpptest: CFLAGS=-O3 -Wall -Wextra -Wshadow -Wcast-align -Wcast-qual -Wundef -Werror
gpptest: clean dictBuilder

View File

@ -1,263 +0,0 @@
/*
dibcli - Command Line Interface (cli) for Dictionary Builder
Copyright (C) Yann Collet 2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
*/
/* **************************************
* Compiler Specifics
****************************************/
/* Disable some Visual warning messages */
#ifdef _MSC_VER
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
/*-************************************
* Includes
**************************************/
#include <stdlib.h> /* exit, calloc, free */
#include <string.h> /* strcmp, strlen */
#include <stdio.h> /* fprintf, getchar */
#include "dictBuilder.h"
/*-************************************
* Constants
**************************************/
#define PROGRAM_DESCRIPTION "Dictionary builder"
#ifndef PROGRAM_VERSION
# define QUOTE(str) #str
# define EXP_Q(str) QUOTE(str)
# define PROGRAM_VERSION "v" EXP_Q(DiB_VERSION_MAJOR) "." EXP_Q(DiB_VERSION_MINOR) "." EXP_Q(DiB_VERSION_RELEASE)
#endif
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, PROGRAM_VERSION, (int)(sizeof(void*)*8), AUTHOR
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
static const unsigned compressionLevelDefault = 5;
static const unsigned selectionLevelDefault = 9; /* determined experimentally */
static const unsigned maxDictSizeDefault = 110 KB;
static const char* dictFileNameDefault = "dictionary";
/*-************************************
* Display Macros
**************************************/
#define DISPLAY(...) fprintf(g_displayOut, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static FILE* g_displayOut;
static unsigned g_displayLevel = 2; // 0 : no display // 1: errors // 2 : + result + interaction + warnings ; // 3 : + progression; // 4 : + information
/*-************************************
* Exceptions
**************************************/
#define DEBUG 0
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, "\n"); \
exit(error); \
}
/*-************************************
* Command Line
**************************************/
static int usage(const char* programName)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [arg] [filenames]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -o : name of dictionary file (default: %s) \n", dictFileNameDefault);
DISPLAY( "--maxdict : limit dictionary to specified size (default : %u) \n", maxDictSizeDefault);
DISPLAY( " -h/-H : display help/long help and exit\n");
return 0;
}
static int usage_advanced(const char* programName)
{
DISPLAY(WELCOME_MESSAGE);
usage(programName);
DISPLAY( "\n");
DISPLAY( "Advanced arguments :\n");
DISPLAY( " -V : display Version number and exit\n");
DISPLAY( "--fast : fast sampling mode\n");
DISPLAY( " -L# : target compression level (default: %u)\n", compressionLevelDefault);
DISPLAY( " -S# : dictionary selectivity level (default: %u)\n", selectionLevelDefault);
DISPLAY( " -v : verbose mode\n");
DISPLAY( " -q : suppress notifications; specify twice to suppress errors too\n");
return 0;
}
static int badusage(const char* programName)
{
DISPLAYLEVEL(1, "Incorrect parameters\n");
if (g_displayLevel >= 1) usage(programName);
return 1;
}
static void waitEnter(void)
{
int unused;
DISPLAY("Press enter to continue...\n");
unused = getchar();
(void)unused;
}
int main(int argCount, const char** argv)
{
int i,
main_pause=0,
operationResult=0,
nextArgumentIsMaxDict=0,
nextArgumentIsDictFileName=0;
unsigned cLevel = compressionLevelDefault;
unsigned maxDictSize = maxDictSizeDefault;
unsigned selectionLevel = selectionLevelDefault;
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */
unsigned filenameIdx = 0;
const char* programName = argv[0];
const char* dictFileName = dictFileNameDefault;
/* init */
g_displayOut = stderr; /* unfortunately, cannot be set at declaration */
if (filenameTable==NULL) EXM_THROW(1, "not enough memory\n");
/* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
for (i = (int)strlen(programName); i > 0; i--) { if ((programName[i] == '/') || (programName[i] == '\\')) { i++; break; } }
programName += i;
/* command switches */
for(i=1; i<argCount; i++) {
const char* argument = argv[i];
if(!argument) continue; /* Protection if argument empty */
if (nextArgumentIsDictFileName) {
nextArgumentIsDictFileName=0;
dictFileName = argument;
continue;
}
if (nextArgumentIsMaxDict) {
nextArgumentIsMaxDict = 0;
maxDictSize = 0;
while ((*argument>='0') && (*argument<='9'))
maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
if (*argument=='k' || *argument=='K')
maxDictSize <<= 10;
continue;
}
/* long commands (--long-word) */
if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; }
if (!strcmp(argument, "--help")) { g_displayOut=stdout; return usage_advanced(programName); }
if (!strcmp(argument, "--verbose")) { g_displayLevel++; if (g_displayLevel<3) g_displayLevel=3; continue; }
if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
if (!strcmp(argument, "--fast")) { selectionLevel=1; cLevel=1; continue; }
/* Decode commands (note : aggregated commands are allowed) */
if (argument[0]=='-') {
argument++;
while (argument[0]!=0) {
switch(argument[0])
{
/* Display help */
case 'V': g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; /* Version Only */
case 'H':
case 'h': g_displayOut=stdout; return usage_advanced(programName);
/* Selection level */
case 'S': argument++;
selectionLevel = 0;
while ((*argument >= '0') && (*argument <= '9'))
selectionLevel *= 10, selectionLevel += *argument++ - '0';
break;
/* Selection level */
case 'L': argument++;
cLevel = 0;
while ((*argument >= '0') && (*argument <= '9'))
cLevel *= 10, cLevel += *argument++ - '0';
break;
/* Verbose mode */
case 'v': g_displayLevel++; if (g_displayLevel<3) g_displayLevel=3; argument++; break;
/* Quiet mode */
case 'q': g_displayLevel--; argument++; break;
/* dictionary name */
case 'o': nextArgumentIsDictFileName=1; argument++; break;
/* Pause at the end (hidden option) */
case 'p': main_pause=1; argument++; break;
/* unknown command */
default : return badusage(programName);
} }
continue;
}
/* add filename to list */
filenameTable[filenameIdx++] = argument;
}
/* Welcome message (if verbose) */
DISPLAYLEVEL(3, WELCOME_MESSAGE);
/* check nb files */
if (filenameIdx==0) return badusage(programName);
if (filenameIdx < 100)
{
DISPLAYLEVEL(2, "Warning : set contains only %u files ... \n", filenameIdx);
DISPLAYLEVEL(3, "!! For better results, consider providing > 1.000 samples !!\n");
DISPLAYLEVEL(3, "!! Each sample should preferably be stored as a separate file !!\n");
}
/* building ... */
{
DiB_params_t param;
param.selectivityLevel = selectionLevel;
param.compressionLevel = cLevel;
DiB_setNotificationLevel(g_displayLevel);
operationResult = DiB_trainFromFiles(dictFileName, maxDictSize,
filenameTable, filenameIdx,
param);
}
if (main_pause) waitEnter();
free((void*)filenameTable);
return operationResult;
}

View File

@ -1,94 +0,0 @@
/*
dictBuilder.h
Copyright (C) Yann Collet 2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/* This library is designed for a single-threaded console application.
* It exit() and printf() into stderr when it encounters an error condition. */
#ifndef DICTBUILDER_H_001
#define DICTBUILDER_H_001
/*-*************************************
* Version
***************************************/
#define DiB_VERSION_MAJOR 0 /* for breaking interface changes */
#define DiB_VERSION_MINOR 0 /* for new (non-breaking) interface capabilities */
#define DiB_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
#define DiB_VERSION_NUMBER (DiB_VERSION_MAJOR *100*100 + DiB_VERSION_MINOR *100 + DiB_VERSION_RELEASE)
unsigned DiB_versionNumber (void);
/*-*************************************
* Public type
***************************************/
typedef struct {
unsigned selectivityLevel; /* 0 means default; larger => bigger selection => larger dictionary */
unsigned compressionLevel; /* 0 means default; target a specific zstd compression level */
} DiB_params_t;
/*-*************************************
* Public functions
***************************************/
/*! DiB_trainFromBuffer
Train a dictionary from a memory buffer @samplesBuffer
where @nbSamples samples have been stored concatenated.
Each sample size is provided into an orderly table @sampleSizes.
Resulting dictionary will be saved into @dictBuffer.
@parameters is optional and can be provided with 0 values to mean "default".
@result : size of dictionary stored into @dictBuffer (<= @dictBufferSize)
or an error code, which can be tested by DiB_isError().
note : DiB_trainFromBuffer() will send notifications into stderr if instructed to, using DiB_setNotificationLevel()
*/
size_t DiB_trainFromBuffer(void* dictBuffer, size_t dictBufferSize,
const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
DiB_params_t parameters);
/*! DiB_trainFromFiles
Train a dictionary from a set of files provided by @fileNamesTable
Resulting dictionary is written into file @dictFileName.
@parameters is optional and can be provided with 0 values.
@result : 0 == ok. Any other : error.
*/
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
DiB_params_t parameters);
/*-*************************************
* Helper functions
***************************************/
unsigned DiB_isError(size_t errorCode);
const char* DiB_getErrorName(size_t errorCode);
/*! DiB_setNotificationLevel
Set amount of notification to be displayed on the console.
default initial value : 0 = no console notification.
Note : not thread-safe (use a global constant)
*/
void DiB_setNotificationLevel(unsigned l);
#endif

View File

@ -1,404 +0,0 @@
/*
* divsufsort.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*- Compiler specifics -*/
#ifdef __clang__
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
#endif
/*- Dependencies -*/
#include "divsufsort_private.h"
#ifdef _OPENMP
# include <omp.h>
#endif
/*- Private Functions -*/
/* Sorts suffixes of type B*. */
static
saidx_t
sort_typeBstar(const sauchar_t *T, saidx_t *SA,
saidx_t *bucket_A, saidx_t *bucket_B,
saidx_t n) {
saidx_t *PAb, *ISAb, *buf;
#ifdef _OPENMP
saidx_t *curbuf;
saidx_t l;
#endif
saidx_t i, j, k, t, m, bufsize;
saint_t c0, c1;
#ifdef _OPENMP
saint_t d0, d1;
int tmp;
#endif
/* Initialize bucket arrays. */
for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
/* Count the number of occurrences of the first one or two characters of each
type A, B and B* suffix. Moreover, store the beginning position of all
type B* suffixes into the array SA. */
for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
/* type A suffix. */
do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
if(0 <= i) {
/* type B* suffix. */
++BUCKET_BSTAR(c0, c1);
SA[--m] = i;
/* type B suffix. */
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
++BUCKET_B(c0, c1);
}
}
}
m = n - m;
/*
note:
A type B* suffix is lexicographically smaller than a type B suffix that
begins with the same first two characters.
*/
/* Calculate the index of start/end point of each bucket. */
for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
t = i + BUCKET_A(c0);
BUCKET_A(c0) = i + j; /* start point */
i = t + BUCKET_B(c0, c0);
for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
j += BUCKET_BSTAR(c0, c1);
BUCKET_BSTAR(c0, c1) = j; /* end point */
i += BUCKET_B(c0, c1);
}
}
if(0 < m) {
/* Sort the type B* suffixes by their first two characters. */
PAb = SA + n - m; ISAb = SA + m;
for(i = m - 2; 0 <= i; --i) {
t = PAb[i], c0 = T[t], c1 = T[t + 1];
SA[--BUCKET_BSTAR(c0, c1)] = i;
}
t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
/* Sort the type B* substrings using sssort. */
#ifdef _OPENMP
tmp = omp_get_max_threads();
buf = SA + m, bufsize = (n - (2 * m)) / tmp;
c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
{
tmp = omp_get_thread_num();
curbuf = buf + tmp * bufsize;
k = 0;
for(;;) {
#pragma omp critical(sssort_lock)
{
if(0 < (l = j)) {
d0 = c0, d1 = c1;
do {
k = BUCKET_BSTAR(d0, d1);
if(--d1 <= d0) {
d1 = ALPHABET_SIZE - 1;
if(--d0 < 0) { break; }
}
} while(((l - k) <= 1) && (0 < (l = k)));
c0 = d0, c1 = d1, j = k;
}
}
if(l == 0) { break; }
sssort(T, PAb, SA + k, SA + l,
curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
}
}
#else
buf = SA + m, bufsize = n - (2 * m);
for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
i = BUCKET_BSTAR(c0, c1);
if(1 < (j - i)) {
sssort(T, PAb, SA + i, SA + j,
buf, bufsize, 2, n, *(SA + i) == (m - 1));
}
}
}
#endif
/* Compute ranks of type B* substrings. */
for(i = m - 1; 0 <= i; --i) {
if(0 <= SA[i]) {
j = i;
do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
SA[i + 1] = i - j;
if(i <= 0) { break; }
}
j = i;
do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
ISAb[SA[i]] = j;
}
/* Construct the inverse suffix array of type B* suffixes using trsort. */
trsort(ISAb, SA, m, 1);
/* Set the sorted order of tyoe B* suffixes. */
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
if(0 <= i) {
t = i;
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
}
}
/* Calculate the index of start/end point of each bucket. */
BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
i = BUCKET_A(c0 + 1) - 1;
for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
t = i - BUCKET_B(c0, c1);
BUCKET_B(c0, c1) = i; /* end point */
/* Move all type B* suffixes to the correct position. */
for(i = t, j = BUCKET_BSTAR(c0, c1);
j <= k;
--i, --k) { SA[i] = SA[k]; }
}
BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
BUCKET_B(c0, c0) = i; /* end point */
}
}
return m;
}
/* Constructs the suffix array by using the sorted order of type B* suffixes. */
static
void
construct_SA(const sauchar_t *T, saidx_t *SA,
saidx_t *bucket_A, saidx_t *bucket_B,
saidx_t n, saidx_t m) {
saidx_t *i, *j, *k;
saidx_t s;
saint_t c0, c1, c2;
if(0 < m) {
/* Construct the sorted order of type B suffixes by using
the sorted order of type B* suffixes. */
for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
/* Scan the suffix array from right to left. */
for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
i <= j;
--j) {
if(0 < (s = *j)) {
assert(T[s] == c1);
assert(((s + 1) < n) && (T[s] <= T[s + 1]));
assert(T[s - 1] <= T[s]);
*j = ~s;
c0 = T[--s];
if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
if(c0 != c2) {
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
k = SA + BUCKET_B(c2 = c0, c1);
}
assert(k < j);
*k-- = s;
} else {
assert(((s == 0) && (T[s] == c1)) || (s < 0));
*j = ~s;
}
}
}
}
/* Construct the suffix array by using
the sorted order of type B suffixes. */
k = SA + BUCKET_A(c2 = T[n - 1]);
*k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
/* Scan the suffix array from left to right. */
for(i = SA, j = SA + n; i < j; ++i) {
if(0 < (s = *i)) {
assert(T[s - 1] >= T[s]);
c0 = T[--s];
if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
if(c0 != c2) {
BUCKET_A(c2) = k - SA;
k = SA + BUCKET_A(c2 = c0);
}
assert(i < k);
*k++ = s;
} else {
assert(s < 0);
*i = ~s;
}
}
}
/* Constructs the burrows-wheeler transformed string directly
by using the sorted order of type B* suffixes. */
static
saidx_t
construct_BWT(const sauchar_t *T, saidx_t *SA,
saidx_t *bucket_A, saidx_t *bucket_B,
saidx_t n, saidx_t m) {
saidx_t *i, *j, *k, *orig;
saidx_t s;
saint_t c0, c1, c2;
if(0 < m) {
/* Construct the sorted order of type B suffixes by using
the sorted order of type B* suffixes. */
for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
/* Scan the suffix array from right to left. */
for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
i <= j;
--j) {
if(0 < (s = *j)) {
assert(T[s] == c1);
assert(((s + 1) < n) && (T[s] <= T[s + 1]));
assert(T[s - 1] <= T[s]);
c0 = T[--s];
*j = ~((saidx_t)c0);
if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
if(c0 != c2) {
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
k = SA + BUCKET_B(c2 = c0, c1);
}
assert(k < j);
*k-- = s;
} else if(s != 0) {
*j = ~s;
#ifndef NDEBUG
} else {
assert(T[s] == c1);
#endif
}
}
}
}
/* Construct the BWTed string by using
the sorted order of type B suffixes. */
k = SA + BUCKET_A(c2 = T[n - 1]);
*k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
/* Scan the suffix array from left to right. */
for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
if(0 < (s = *i)) {
assert(T[s - 1] >= T[s]);
c0 = T[--s];
*i = c0;
if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
if(c0 != c2) {
BUCKET_A(c2) = k - SA;
k = SA + BUCKET_A(c2 = c0);
}
assert(i < k);
*k++ = s;
} else if(s != 0) {
*i = ~s;
} else {
orig = i;
}
}
return orig - SA;
}
/*---------------------------------------------------------------------------*/
/*- Function -*/
saint_t
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
saidx_t *bucket_A, *bucket_B;
saidx_t m;
saint_t err = 0;
/* Check arguments. */
if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
else if(n == 0) { return 0; }
else if(n == 1) { SA[0] = 0; return 0; }
else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
/* Suffixsort. */
if((bucket_A != NULL) && (bucket_B != NULL)) {
m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
construct_SA(T, SA, bucket_A, bucket_B, n, m);
} else {
err = -2;
}
free(bucket_B);
free(bucket_A);
return err;
}
saidx_t
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
saidx_t *B;
saidx_t *bucket_A, *bucket_B;
saidx_t m, pidx, i;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
/* Burrows-Wheeler Transform. */
if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
/* Copy to output string. */
U[0] = T[n - 1];
for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
pidx += 1;
} else {
pidx = -2;
}
free(bucket_B);
free(bucket_A);
if(A == NULL) { free(B); }
return pidx;
}
const char *
divsufsort_version(void) {
return PROJECT_VERSION_FULL;
}

View File

@ -1,180 +0,0 @@
/*
* divsufsort.h for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _DIVSUFSORT_H
#define _DIVSUFSORT_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#include <inttypes.h>
#ifndef DIVSUFSORT_API
# ifdef DIVSUFSORT_BUILD_DLL
# define DIVSUFSORT_API
# else
# define DIVSUFSORT_API
# endif
#endif
/*- Datatypes -*/
#ifndef SAUCHAR_T
#define SAUCHAR_T
typedef uint8_t sauchar_t;
#endif /* SAUCHAR_T */
#ifndef SAINT_T
#define SAINT_T
typedef int32_t saint_t;
#endif /* SAINT_T */
#ifndef SAIDX_T
#define SAIDX_T
typedef int32_t saidx_t;
#endif /* SAIDX_T */
#ifndef PRIdSAINT_T
#define PRIdSAINT_T PRId32
#endif /* PRIdSAINT_T */
#ifndef PRIdSAIDX_T
#define PRIdSAIDX_T PRId32
#endif /* PRIdSAIDX_T */
/*- Prototypes -*/
/**
* Constructs the suffix array of a given string.
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
/**
* Constructs the burrows-wheeler transformed string of a given string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @return The primary index if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saidx_t
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
/**
* Returns the version of the divsufsort library.
* @return The version number string.
*/
DIVSUFSORT_API
const char *
divsufsort_version(void);
/**
* Constructs the burrows-wheeler transformed string of a given string and suffix array.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param SA[0..n-1] The suffix array. (can be NULL)
* @param n The length of the given string.
* @param idx The output primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
bw_transform(const sauchar_t *T, sauchar_t *U,
saidx_t *SA /* can NULL */,
saidx_t n, saidx_t *idx);
/**
* Inverse BW-transforms a given BWTed string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @param idx The primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
saidx_t *A /* can NULL */,
saidx_t n, saidx_t idx);
/**
* Checks the correctness of a given suffix array.
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The input suffix array.
* @param n The length of the given string.
* @param verbose The verbose mode.
* @return 0 if no error occurred.
*/
DIVSUFSORT_API
saint_t
sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
/**
* Search for the pattern P in the string T.
* @param T[0..Tsize-1] The input string.
* @param Tsize The length of the given string.
* @param P[0..Psize-1] The input pattern string.
* @param Psize The length of the given pattern string.
* @param SA[0..SAsize-1] The input suffix array.
* @param SAsize The length of the given suffix array.
* @param idx The output index.
* @return The count of matches if no error occurred, -1 otherwise.
*/
DIVSUFSORT_API
saidx_t
sa_search(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
const saidx_t *SA, saidx_t SAsize,
saidx_t *left);
/**
* Search for the character c in the string T.
* @param T[0..Tsize-1] The input string.
* @param Tsize The length of the given string.
* @param SA[0..SAsize-1] The input suffix array.
* @param SAsize The length of the given suffix array.
* @param c The input character.
* @param idx The output index.
* @return The count of matches if no error occurred, -1 otherwise.
*/
DIVSUFSORT_API
saidx_t
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
const saidx_t *SA, saidx_t SAsize,
saint_t c, saidx_t *left);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _DIVSUFSORT_H */

View File

@ -1,212 +0,0 @@
/*
* divsufsort_private.h for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _DIVSUFSORT_PRIVATE_H
#define _DIVSUFSORT_PRIVATE_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* *************************
* Includes
***************************/
#include <assert.h>
#include <stdlib.h> /* unconditional */
#include <stdio.h>
#include "config.h" /* unconditional */
#if HAVE_STRING_H
# include <string.h>
#endif
#if HAVE_MEMORY_H
# include <memory.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#if HAVE_STRINGS_H
# ifdef _WIN32
# include <string.h>
# else
# include <strings.h>
# endif
#endif
#if HAVE_INTTYPES_H
# include <inttypes.h>
#else
# if HAVE_STDINT_H
# include <stdint.h>
# endif
#endif
#if defined(BUILD_DIVSUFSORT64)
# include "divsufsort64.h"
# ifndef SAIDX_T
# define SAIDX_T
# define saidx_t saidx64_t
# endif /* SAIDX_T */
# ifndef PRIdSAIDX_T
# define PRIdSAIDX_T PRIdSAIDX64_T
# endif /* PRIdSAIDX_T */
# define divsufsort divsufsort64
# define divbwt divbwt64
# define divsufsort_version divsufsort64_version
# define bw_transform bw_transform64
# define inverse_bw_transform inverse_bw_transform64
# define sufcheck sufcheck64
# define sa_search sa_search64
# define sa_simplesearch sa_simplesearch64
# define sssort sssort64
# define trsort trsort64
#else
# include "divsufsort.h"
#endif
/*- Constants -*/
#if !defined(UINT8_MAX)
# define UINT8_MAX (255)
#endif /* UINT8_MAX */
#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
# undef ALPHABET_SIZE
#endif
#if !defined(ALPHABET_SIZE)
# define ALPHABET_SIZE (UINT8_MAX + 1)
#endif
/* for divsufsort.c */
#define BUCKET_A_SIZE (ALPHABET_SIZE)
#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
/* for sssort.c */
#if defined(SS_INSERTIONSORT_THRESHOLD)
# if SS_INSERTIONSORT_THRESHOLD < 1
# undef SS_INSERTIONSORT_THRESHOLD
# define SS_INSERTIONSORT_THRESHOLD (1)
# endif
#else
# define SS_INSERTIONSORT_THRESHOLD (8)
#endif
#if defined(SS_BLOCKSIZE)
# if SS_BLOCKSIZE < 0
# undef SS_BLOCKSIZE
# define SS_BLOCKSIZE (0)
# elif 32768 <= SS_BLOCKSIZE
# undef SS_BLOCKSIZE
# define SS_BLOCKSIZE (32767)
# endif
#else
# define SS_BLOCKSIZE (1024)
#endif
/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
#if SS_BLOCKSIZE == 0
# if defined(BUILD_DIVSUFSORT64)
# define SS_MISORT_STACKSIZE (96)
# else
# define SS_MISORT_STACKSIZE (64)
# endif
#elif SS_BLOCKSIZE <= 4096
# define SS_MISORT_STACKSIZE (16)
#else
# define SS_MISORT_STACKSIZE (24)
#endif
#if defined(BUILD_DIVSUFSORT64)
# define SS_SMERGE_STACKSIZE (64)
#else
# define SS_SMERGE_STACKSIZE (32)
#endif
/* for trsort.c */
#define TR_INSERTIONSORT_THRESHOLD (8)
#if defined(BUILD_DIVSUFSORT64)
# define TR_STACKSIZE (96)
#else
# define TR_STACKSIZE (64)
#endif
/*- Macros -*/
#ifndef SWAP
# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
#endif /* SWAP */
#ifndef MIN
# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
#endif /* MIN */
#ifndef MAX
# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
#endif /* MAX */
#define STACK_PUSH(_a, _b, _c, _d)\
do {\
assert(ssize < STACK_SIZE);\
stack[ssize].a = (_a), stack[ssize].b = (_b),\
stack[ssize].c = (_c), stack[ssize++].d = (_d);\
} while(0)
#define STACK_PUSH5(_a, _b, _c, _d, _e)\
do {\
assert(ssize < STACK_SIZE);\
stack[ssize].a = (_a), stack[ssize].b = (_b),\
stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
} while(0)
#define STACK_POP(_a, _b, _c, _d)\
do {\
assert(0 <= ssize);\
if(ssize == 0) { return; }\
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
(_c) = stack[ssize].c, (_d) = stack[ssize].d;\
} while(0)
#define STACK_POP5(_a, _b, _c, _d, _e)\
do {\
assert(0 <= ssize);\
if(ssize == 0) { return; }\
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
(_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
} while(0)
/* for divsufsort.c */
#define BUCKET_A(_c0) bucket_A[(_c0)]
#if ALPHABET_SIZE == 256
#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
#else
#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
#endif
/*- Private Prototypes -*/
/* sssort.c */
void
sssort(const sauchar_t *Td, const saidx_t *PA,
saidx_t *first, saidx_t *last,
saidx_t *buf, saidx_t bufsize,
saidx_t depth, saidx_t n, saint_t lastsuffix);
/* trsort.c */
void
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _DIVSUFSORT_PRIVATE_H */

View File

@ -1,56 +0,0 @@
/*
* lfs.h for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _LFS_H
#define _LFS_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#ifndef __STRICT_ANSI__
# define LFS_OFF_T off_t
# define LFS_FOPEN fopen
# define LFS_FTELL ftello
# define LFS_FSEEK fseeko
# define LFS_PRId PRIdMAX
#else
# define LFS_OFF_T long
# define LFS_FOPEN fopen
# define LFS_FTELL ftell
# define LFS_FSEEK fseek
# define LFS_PRId "ld"
#endif
#ifndef PRIdOFF_T
# define PRIdOFF_T LFS_PRId
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _LFS_H */

View File

@ -1,844 +0,0 @@
/*
* sssort.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*- Compiler specifics -*/
#ifdef __clang__
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
#endif
#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
/* inline is defined */
#elif defined(_MSC_VER)
# define inline __inline
#else
# define inline /* disable inline */
#endif
#ifdef _MSC_VER /* Visual Studio */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# define FORCE_INLINE static __forceinline
#else
# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# ifdef __GNUC__
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
# else
# define FORCE_INLINE static
# endif /* __STDC_VERSION__ */
#endif
/*- Dependencies -*/
#include "divsufsort_private.h"
/*- Private Functions -*/
static const saint_t lg_table[256]= {
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
static INLINE
saint_t
ss_ilg(saidx_t n) {
#if SS_BLOCKSIZE == 0
# if defined(BUILD_DIVSUFSORT64)
return (n >> 32) ?
((n >> 48) ?
((n >> 56) ?
56 + lg_table[(n >> 56) & 0xff] :
48 + lg_table[(n >> 48) & 0xff]) :
((n >> 40) ?
40 + lg_table[(n >> 40) & 0xff] :
32 + lg_table[(n >> 32) & 0xff])) :
((n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]));
# else
return (n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]);
# endif
#elif SS_BLOCKSIZE < 256
return lg_table[n];
#else
return (n & 0xff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff];
#endif
}
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
#if SS_BLOCKSIZE != 0
static const saint_t sqq_table[256] = {
0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
};
static INLINE
saidx_t
ss_isqrt(saidx_t x) {
saidx_t y, e;
if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
e = (x & 0xffff0000) ?
((x & 0xff000000) ?
24 + lg_table[(x >> 24) & 0xff] :
16 + lg_table[(x >> 16) & 0xff]) :
((x & 0x0000ff00) ?
8 + lg_table[(x >> 8) & 0xff] :
0 + lg_table[(x >> 0) & 0xff]);
if(e >= 16) {
y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
if(e >= 24) { y = (y + 1 + x / y) >> 1; }
y = (y + 1 + x / y) >> 1;
} else if(e >= 8) {
y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
} else {
return sqq_table[x] >> 4;
}
return (x < (y * y)) ? y - 1 : y;
}
#endif /* SS_BLOCKSIZE != 0 */
/*---------------------------------------------------------------------------*/
/* Compares two suffixes. */
static INLINE
saint_t
ss_compare(const sauchar_t *T,
const saidx_t *p1, const saidx_t *p2,
saidx_t depth) {
const sauchar_t *U1, *U2, *U1n, *U2n;
for(U1 = T + depth + *p1,
U2 = T + depth + *p2,
U1n = T + *(p1 + 1) + 2,
U2n = T + *(p2 + 1) + 2;
(U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
++U1, ++U2) {
}
return U1 < U1n ?
(U2 < U2n ? *U1 - *U2 : 1) :
(U2 < U2n ? -1 : 0);
}
/*---------------------------------------------------------------------------*/
#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
/* Insertionsort for small size groups */
static
void
ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *last, saidx_t depth) {
saidx_t *i, *j;
saidx_t t;
saint_t r;
for(i = last - 2; first <= i; --i) {
for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
if(last <= j) { break; }
}
if(r == 0) { *j = ~*j; }
*(j - 1) = t;
}
}
#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
/*---------------------------------------------------------------------------*/
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
static INLINE
void
ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
saidx_t *SA, saidx_t i, saidx_t size) {
saidx_t j, k;
saidx_t v;
saint_t c, d, e;
for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
d = Td[PA[SA[k = j++]]];
if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
if(d <= c) { break; }
}
SA[i] = v;
}
/* Simple top-down heapsort. */
static
void
ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
saidx_t i, m;
saidx_t t;
m = size;
if((size % 2) == 0) {
m--;
if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
}
for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
for(i = m - 1; 0 < i; --i) {
t = SA[0], SA[0] = SA[i];
ss_fixdown(Td, PA, SA, 0, i);
SA[i] = t;
}
}
/*---------------------------------------------------------------------------*/
/* Returns the median of three elements. */
static INLINE
saidx_t *
ss_median3(const sauchar_t *Td, const saidx_t *PA,
saidx_t *v1, saidx_t *v2, saidx_t *v3) {
saidx_t *t;
if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
if(Td[PA[*v2]] > Td[PA[*v3]]) {
if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
else { return v3; }
}
return v2;
}
/* Returns the median of five elements. */
static INLINE
saidx_t *
ss_median5(const sauchar_t *Td, const saidx_t *PA,
saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
saidx_t *t;
if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
return v3;
}
/* Returns the pivot element. */
static INLINE
saidx_t *
ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
saidx_t *middle;
saidx_t t;
t = last - first;
middle = first + t / 2;
if(t <= 512) {
if(t <= 32) {
return ss_median3(Td, PA, first, middle, last - 1);
} else {
t >>= 2;
return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
}
}
t >>= 3;
first = ss_median3(Td, PA, first, first + t, first + (t << 1));
middle = ss_median3(Td, PA, middle - t, middle, middle + t);
last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
return ss_median3(Td, PA, first, middle, last);
}
/*---------------------------------------------------------------------------*/
/* Binary partition for substrings. */
static INLINE
saidx_t *
ss_partition(const saidx_t *PA,
saidx_t *first, saidx_t *last, saidx_t depth) {
saidx_t *a, *b;
saidx_t t;
for(a = first - 1, b = last;;) {
for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
if(b <= a) { break; }
t = ~*b;
*b = *a;
*a = t;
}
if(first < a) { *first = ~*first; }
return a;
}
/* Multikey introsort for medium size groups. */
static
void
ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *last,
saidx_t depth) {
#define STACK_SIZE SS_MISORT_STACKSIZE
struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
const sauchar_t *Td;
saidx_t *a, *b, *c, *d, *e, *f;
saidx_t s, t;
saint_t ssize;
saint_t limit;
saint_t v, x = 0;
for(ssize = 0, limit = ss_ilg(last - first);;) {
if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
#if 1 < SS_INSERTIONSORT_THRESHOLD
if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
#endif
STACK_POP(first, last, depth, limit);
continue;
}
Td = T + depth;
if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
if(limit < 0) {
for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
if((x = Td[PA[*a]]) != v) {
if(1 < (a - first)) { break; }
v = x;
first = a;
}
}
if(Td[PA[*first] - 1] < v) {
first = ss_partition(PA, first, a, depth);
}
if((a - first) <= (last - a)) {
if(1 < (a - first)) {
STACK_PUSH(a, last, depth, -1);
last = a, depth += 1, limit = ss_ilg(a - first);
} else {
first = a, limit = -1;
}
} else {
if(1 < (last - a)) {
STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
first = a, limit = -1;
} else {
last = a, depth += 1, limit = ss_ilg(a - first);
}
}
continue;
}
/* choose pivot */
a = ss_pivot(Td, PA, first, last);
v = Td[PA[*a]];
SWAP(*first, *a);
/* partition */
for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
if(((a = b) < last) && (x < v)) {
for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
}
for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
if((b < (d = c)) && (x > v)) {
for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
for(; b < c;) {
SWAP(*b, *c);
for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
if(a <= d) {
c = b - 1;
if((s = a - first) > (t = b - a)) { s = t; }
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
if((s = d - c) > (t = last - d - 1)) { s = t; }
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
a = first + (b - a), c = last - (d - c);
b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
if((a - first) <= (last - c)) {
if((last - c) <= (c - b)) {
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
STACK_PUSH(c, last, depth, limit);
last = a;
} else if((a - first) <= (c - b)) {
STACK_PUSH(c, last, depth, limit);
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
last = a;
} else {
STACK_PUSH(c, last, depth, limit);
STACK_PUSH(first, a, depth, limit);
first = b, last = c, depth += 1, limit = ss_ilg(c - b);
}
} else {
if((a - first) <= (c - b)) {
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
STACK_PUSH(first, a, depth, limit);
first = c;
} else if((last - c) <= (c - b)) {
STACK_PUSH(first, a, depth, limit);
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
first = c;
} else {
STACK_PUSH(first, a, depth, limit);
STACK_PUSH(c, last, depth, limit);
first = b, last = c, depth += 1, limit = ss_ilg(c - b);
}
}
} else {
limit += 1;
if(Td[PA[*first] - 1] < v) {
first = ss_partition(PA, first, last, depth);
limit = ss_ilg(last - first);
}
depth += 1;
}
}
#undef STACK_SIZE
}
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
/*---------------------------------------------------------------------------*/
#if SS_BLOCKSIZE != 0
static INLINE
void
ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
saidx_t t;
for(; 0 < n; --n, ++a, ++b) {
t = *a, *a = *b, *b = t;
}
}
static INLINE
void
ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
saidx_t *a, *b, t;
saidx_t l, r;
l = middle - first, r = last - middle;
for(; (0 < l) && (0 < r);) {
if(l == r) { ss_blockswap(first, middle, l); break; }
if(l < r) {
a = last - 1, b = middle - 1;
t = *a;
do {
*a-- = *b, *b-- = *a;
if(b < first) {
*a = t;
last = a;
if((r -= l + 1) <= l) { break; }
a -= 1, b = middle - 1;
t = *a;
}
} while(1);
} else {
a = first, b = middle;
t = *a;
do {
*a++ = *b, *b++ = *a;
if(last <= b) {
*a = t;
first = a + 1;
if((l -= r + 1) <= r) { break; }
a += 1, b = middle;
t = *a;
}
} while(1);
}
}
}
/*---------------------------------------------------------------------------*/
static
void
ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t depth) {
const saidx_t *p;
saidx_t *a, *b;
saidx_t len, half;
saint_t q, r;
saint_t x;
for(;;) {
if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
else { x = 0; p = PA + *(last - 1); }
for(a = first, len = middle - first, half = len >> 1, r = -1;
0 < len;
len = half, half >>= 1) {
b = a + half;
q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
if(q < 0) {
a = b + 1;
half -= (len & 1) ^ 1;
} else {
r = q;
}
}
if(a < middle) {
if(r == 0) { *a = ~*a; }
ss_rotate(a, middle, last);
last -= middle - a;
middle = a;
if(first == middle) { break; }
}
--last;
if(x != 0) { while(*--last < 0) { } }
if(middle == last) { break; }
}
}
/*---------------------------------------------------------------------------*/
/* Merge-forward with internal buffer. */
static
void
ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t *buf, saidx_t depth) {
saidx_t *a, *b, *c, *bufend;
saidx_t t;
saint_t r;
bufend = buf + (middle - first) - 1;
ss_blockswap(buf, first, middle - first);
for(t = *(a = first), b = buf, c = middle;;) {
r = ss_compare(T, PA + *b, PA + *c, depth);
if(r < 0) {
do {
*a++ = *b;
if(bufend <= b) { *bufend = t; return; }
*b++ = *a;
} while(*b < 0);
} else if(r > 0) {
do {
*a++ = *c, *c++ = *a;
if(last <= c) {
while(b < bufend) { *a++ = *b, *b++ = *a; }
*a = *b, *b = t;
return;
}
} while(*c < 0);
} else {
*c = ~*c;
do {
*a++ = *b;
if(bufend <= b) { *bufend = t; return; }
*b++ = *a;
} while(*b < 0);
do {
*a++ = *c, *c++ = *a;
if(last <= c) {
while(b < bufend) { *a++ = *b, *b++ = *a; }
*a = *b, *b = t;
return;
}
} while(*c < 0);
}
}
}
/* Merge-backward with internal buffer. */
static
void
ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t *buf, saidx_t depth) {
const saidx_t *p1, *p2;
saidx_t *a, *b, *c, *bufend;
saidx_t t;
saint_t r;
saint_t x;
bufend = buf + (last - middle) - 1;
ss_blockswap(buf, middle, last - middle);
x = 0;
if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
else { p1 = PA + *bufend; }
if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
else { p2 = PA + *(middle - 1); }
for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
r = ss_compare(T, p1, p2, depth);
if(0 < r) {
if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
*a-- = *b;
if(b <= buf) { *buf = t; break; }
*b-- = *a;
if(*b < 0) { p1 = PA + ~*b; x |= 1; }
else { p1 = PA + *b; }
} else if(r < 0) {
if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
*a-- = *c, *c-- = *a;
if(c < first) {
while(buf < b) { *a-- = *b, *b-- = *a; }
*a = *b, *b = t;
break;
}
if(*c < 0) { p2 = PA + ~*c; x |= 2; }
else { p2 = PA + *c; }
} else {
if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
*a-- = ~*b;
if(b <= buf) { *buf = t; break; }
*b-- = *a;
if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
*a-- = *c, *c-- = *a;
if(c < first) {
while(buf < b) { *a-- = *b, *b-- = *a; }
*a = *b, *b = t;
break;
}
if(*b < 0) { p1 = PA + ~*b; x |= 1; }
else { p1 = PA + *b; }
if(*c < 0) { p2 = PA + ~*c; x |= 2; }
else { p2 = PA + *c; }
}
}
}
/* D&C based merge. */
static
void
ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t *buf, saidx_t bufsize, saidx_t depth) {
#define STACK_SIZE SS_SMERGE_STACKSIZE
#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
#define MERGE_CHECK(a, b, c)\
do {\
if(((c) & 1) ||\
(((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
*(a) = ~*(a);\
}\
if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
*(b) = ~*(b);\
}\
} while(0)
struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
saidx_t *l, *r, *lm, *rm;
saidx_t m, len, half;
saint_t ssize;
saint_t check, next;
for(check = 0, ssize = 0;;) {
if((last - middle) <= bufsize) {
if((first < middle) && (middle < last)) {
ss_mergebackward(T, PA, first, middle, last, buf, depth);
}
MERGE_CHECK(first, last, check);
STACK_POP(first, middle, last, check);
continue;
}
if((middle - first) <= bufsize) {
if(first < middle) {
ss_mergeforward(T, PA, first, middle, last, buf, depth);
}
MERGE_CHECK(first, last, check);
STACK_POP(first, middle, last, check);
continue;
}
for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
0 < len;
len = half, half >>= 1) {
if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
m += half + 1;
half -= (len & 1) ^ 1;
}
}
if(0 < m) {
lm = middle - m, rm = middle + m;
ss_blockswap(lm, middle, m);
l = r = middle, next = 0;
if(rm < last) {
if(*rm < 0) {
*rm = ~*rm;
if(first < lm) { for(; *--l < 0;) { } next |= 4; }
next |= 1;
} else if(first < lm) {
for(; *r < 0; ++r) { }
next |= 2;
}
}
if((l - first) <= (last - r)) {
STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
middle = lm, last = l, check = (check & 3) | (next & 4);
} else {
if((next & 2) && (r == middle)) { next ^= 6; }
STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
first = r, middle = rm, check = (next & 3) | (check & 4);
}
} else {
if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
*middle = ~*middle;
}
MERGE_CHECK(first, last, check);
STACK_POP(first, middle, last, check);
}
}
#undef STACK_SIZE
}
#endif /* SS_BLOCKSIZE != 0 */
/*---------------------------------------------------------------------------*/
/*- Function -*/
/* Substring sort */
void
sssort(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *last,
saidx_t *buf, saidx_t bufsize,
saidx_t depth, saidx_t n, saint_t lastsuffix) {
saidx_t *a;
#if SS_BLOCKSIZE != 0
saidx_t *b, *middle, *curbuf;
saidx_t j, k, curbufsize, limit;
#endif
saidx_t i;
if(lastsuffix != 0) { ++first; }
#if SS_BLOCKSIZE == 0
ss_mintrosort(T, PA, first, last, depth);
#else
if((bufsize < SS_BLOCKSIZE) &&
(bufsize < (last - first)) &&
(bufsize < (limit = ss_isqrt(last - first)))) {
if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
buf = middle = last - limit, bufsize = limit;
} else {
middle = last, limit = 0;
}
for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
#elif 1 < SS_BLOCKSIZE
ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
#endif
curbufsize = last - (a + SS_BLOCKSIZE);
curbuf = a + SS_BLOCKSIZE;
if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
}
}
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
ss_mintrosort(T, PA, a, middle, depth);
#elif 1 < SS_BLOCKSIZE
ss_insertionsort(T, PA, a, middle, depth);
#endif
for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
if(i & 1) {
ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
a -= k;
}
}
if(limit != 0) {
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
ss_mintrosort(T, PA, middle, last, depth);
#elif 1 < SS_BLOCKSIZE
ss_insertionsort(T, PA, middle, last, depth);
#endif
ss_inplacemerge(T, PA, first, middle, last, depth);
}
#endif
if(lastsuffix != 0) {
/* Insert last type B* suffix. */
saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
for(a = first, i = *(first - 1);
(a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
++a) {
*(a - 1) = *a;
}
*(a - 1) = i;
}
}

View File

@ -1,615 +0,0 @@
/*
* trsort.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*- Compiler specifics -*/
#ifdef __clang__
#pragma clang diagnostic ignored "-Wshorten-64-to-32"
#endif
#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
/* inline is defined */
#elif defined(_MSC_VER)
# define inline __inline
#else
# define inline /* disable inline */
#endif
#ifdef _MSC_VER /* Visual Studio */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# define FORCE_INLINE static __forceinline
#else
# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# ifdef __GNUC__
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
# else
# define FORCE_INLINE static
# endif /* __STDC_VERSION__ */
#endif
/*- Dependencies -*/
#include "divsufsort_private.h"
/*- Private Functions -*/
static const saint_t lg_table[256]= {
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
static INLINE
saint_t
tr_ilg(saidx_t n) {
#if defined(BUILD_DIVSUFSORT64)
return (n >> 32) ?
((n >> 48) ?
((n >> 56) ?
56 + lg_table[(n >> 56) & 0xff] :
48 + lg_table[(n >> 48) & 0xff]) :
((n >> 40) ?
40 + lg_table[(n >> 40) & 0xff] :
32 + lg_table[(n >> 32) & 0xff])) :
((n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]));
#else
return (n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]);
#endif
}
/*---------------------------------------------------------------------------*/
/* Simple insertionsort for small size groups. */
static
void
tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
saidx_t *a, *b;
saidx_t t, r;
for(a = first + 1; a < last; ++a) {
for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
if(b < first) { break; }
}
if(r == 0) { *b = ~*b; }
*(b + 1) = t;
}
}
/*---------------------------------------------------------------------------*/
static INLINE
void
tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
saidx_t j, k;
saidx_t v;
saidx_t c, d, e;
for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
d = ISAd[SA[k = j++]];
if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
if(d <= c) { break; }
}
SA[i] = v;
}
/* Simple top-down heapsort. */
static
void
tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
saidx_t i, m;
saidx_t t;
m = size;
if((size % 2) == 0) {
m--;
if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
}
for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
for(i = m - 1; 0 < i; --i) {
t = SA[0], SA[0] = SA[i];
tr_fixdown(ISAd, SA, 0, i);
SA[i] = t;
}
}
/*---------------------------------------------------------------------------*/
/* Returns the median of three elements. */
static INLINE
saidx_t *
tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
saidx_t *t;
if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
if(ISAd[*v2] > ISAd[*v3]) {
if(ISAd[*v1] > ISAd[*v3]) { return v1; }
else { return v3; }
}
return v2;
}
/* Returns the median of five elements. */
static INLINE
saidx_t *
tr_median5(const saidx_t *ISAd,
saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
saidx_t *t;
if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
if(ISAd[*v3] > ISAd[*v4]) { return v4; }
return v3;
}
/* Returns the pivot element. */
static INLINE
saidx_t *
tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
saidx_t *middle;
saidx_t t;
t = last - first;
middle = first + t / 2;
if(t <= 512) {
if(t <= 32) {
return tr_median3(ISAd, first, middle, last - 1);
} else {
t >>= 2;
return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
}
}
t >>= 3;
first = tr_median3(ISAd, first, first + t, first + (t << 1));
middle = tr_median3(ISAd, middle - t, middle, middle + t);
last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
return tr_median3(ISAd, first, middle, last);
}
/*---------------------------------------------------------------------------*/
typedef struct _trbudget_t trbudget_t;
struct _trbudget_t {
saidx_t chance;
saidx_t remain;
saidx_t incval;
saidx_t count;
};
static INLINE
void
trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
budget->chance = chance;
budget->remain = budget->incval = incval;
}
static INLINE
saint_t
trbudget_check(trbudget_t *budget, saidx_t size) {
if(size <= budget->remain) { budget->remain -= size; return 1; }
if(budget->chance == 0) { budget->count += size; return 0; }
budget->remain += budget->incval - size;
budget->chance -= 1;
return 1;
}
/*---------------------------------------------------------------------------*/
static INLINE
void
tr_partition(const saidx_t *ISAd,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t **pa, saidx_t **pb, saidx_t v) {
saidx_t *a, *b, *c, *d, *e, *f;
saidx_t t, s;
saidx_t x = 0;
for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
if(((a = b) < last) && (x < v)) {
for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
}
for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
if((b < (d = c)) && (x > v)) {
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
for(; b < c;) {
SWAP(*b, *c);
for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
if(a <= d) {
c = b - 1;
if((s = a - first) > (t = b - a)) { s = t; }
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
if((s = d - c) > (t = last - d - 1)) { s = t; }
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
first += (b - a), last -= (d - c);
}
*pa = first, *pb = last;
}
static
void
tr_copy(saidx_t *ISA, const saidx_t *SA,
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
saidx_t depth) {
/* sort suffixes of middle partition
by using sorted order of suffixes of left and right partition. */
saidx_t *c, *d, *e;
saidx_t s, v;
v = b - SA - 1;
for(c = first, d = a - 1; c <= d; ++c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*++d = s;
ISA[s] = d - SA;
}
}
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*--d = s;
ISA[s] = d - SA;
}
}
}
static
void
tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
saidx_t depth) {
saidx_t *c, *d, *e;
saidx_t s, v;
saidx_t rank, lastrank, newrank = -1;
v = b - SA - 1;
lastrank = -1;
for(c = first, d = a - 1; c <= d; ++c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*++d = s;
rank = ISA[s + depth];
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
ISA[s] = newrank;
}
}
lastrank = -1;
for(e = d; first <= e; --e) {
rank = ISA[*e];
if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
if(newrank != rank) { ISA[*e] = newrank; }
}
lastrank = -1;
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*--d = s;
rank = ISA[s + depth];
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
ISA[s] = newrank;
}
}
}
static
void
tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
saidx_t *SA, saidx_t *first, saidx_t *last,
trbudget_t *budget) {
#define STACK_SIZE TR_STACKSIZE
struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
saidx_t *a, *b, *c;
saidx_t t;
saidx_t v, x = 0;
saidx_t incr = ISAd - ISA;
saint_t limit, next;
saint_t ssize, trlink = -1;
for(ssize = 0, limit = tr_ilg(last - first);;) {
if(limit < 0) {
if(limit == -1) {
/* tandem repeat partition */
tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
/* update ranks */
if(a < last) {
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
}
if(b < last) {
for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
}
/* push */
if(1 < (b - a)) {
STACK_PUSH5(NULL, a, b, 0, 0);
STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
trlink = ssize - 2;
}
if((a - first) <= (last - b)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
last = a, limit = tr_ilg(a - first);
} else if(1 < (last - b)) {
first = b, limit = tr_ilg(last - b);
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
} else {
if(1 < (last - b)) {
STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
first = b, limit = tr_ilg(last - b);
} else if(1 < (a - first)) {
last = a, limit = tr_ilg(a - first);
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
} else if(limit == -2) {
/* tandem repeat copy */
a = stack[--ssize].b, b = stack[ssize].c;
if(stack[ssize].d == 0) {
tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
} else {
if(0 <= trlink) { stack[trlink].d = -1; }
tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
}
STACK_POP5(ISAd, first, last, limit, trlink);
} else {
/* sorted partition */
if(0 <= *first) {
a = first;
do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
first = a;
}
if(first < last) {
a = first; do { *a = ~*a; } while(*++a < 0);
next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
/* push */
if(trbudget_check(budget, a - first)) {
if((a - first) <= (last - a)) {
STACK_PUSH5(ISAd, a, last, -3, trlink);
ISAd += incr, last = a, limit = next;
} else {
if(1 < (last - a)) {
STACK_PUSH5(ISAd + incr, first, a, next, trlink);
first = a, limit = -3;
} else {
ISAd += incr, last = a, limit = next;
}
}
} else {
if(0 <= trlink) { stack[trlink].d = -1; }
if(1 < (last - a)) {
first = a, limit = -3;
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
continue;
}
if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
tr_insertionsort(ISAd, first, last);
limit = -3;
continue;
}
if(limit-- == 0) {
tr_heapsort(ISAd, first, last - first);
for(a = last - 1; first < a; a = b) {
for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
}
limit = -3;
continue;
}
/* choose pivot */
a = tr_pivot(ISAd, first, last);
SWAP(*first, *a);
v = ISAd[*first];
/* partition */
tr_partition(ISAd, first, first + 1, last, &a, &b, v);
if((last - first) != (b - a)) {
next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
/* update ranks */
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
/* push */
if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
if((a - first) <= (last - b)) {
if((last - b) <= (b - a)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
STACK_PUSH5(ISAd, b, last, limit, trlink);
last = a;
} else if(1 < (last - b)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
first = b;
} else {
ISAd += incr, first = a, last = b, limit = next;
}
} else if((a - first) <= (b - a)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd, b, last, limit, trlink);
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
last = a;
} else {
STACK_PUSH5(ISAd, b, last, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
} else {
STACK_PUSH5(ISAd, b, last, limit, trlink);
STACK_PUSH5(ISAd, first, a, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
} else {
if((a - first) <= (b - a)) {
if(1 < (last - b)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
STACK_PUSH5(ISAd, first, a, limit, trlink);
first = b;
} else if(1 < (a - first)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
last = a;
} else {
ISAd += incr, first = a, last = b, limit = next;
}
} else if((last - b) <= (b - a)) {
if(1 < (last - b)) {
STACK_PUSH5(ISAd, first, a, limit, trlink);
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
first = b;
} else {
STACK_PUSH5(ISAd, first, a, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
} else {
STACK_PUSH5(ISAd, first, a, limit, trlink);
STACK_PUSH5(ISAd, b, last, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
}
} else {
if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
if((a - first) <= (last - b)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd, b, last, limit, trlink);
last = a;
} else if(1 < (last - b)) {
first = b;
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
} else {
if(1 < (last - b)) {
STACK_PUSH5(ISAd, first, a, limit, trlink);
first = b;
} else if(1 < (a - first)) {
last = a;
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
}
} else {
if(trbudget_check(budget, last - first)) {
limit = tr_ilg(last - first), ISAd += incr;
} else {
if(0 <= trlink) { stack[trlink].d = -1; }
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
}
#undef STACK_SIZE
}
/*---------------------------------------------------------------------------*/
/*- Function -*/
/* Tandem repeat sort */
void
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
saidx_t *ISAd;
saidx_t *first, *last;
trbudget_t budget;
saidx_t t, skip, unsorted;
trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
first = SA;
skip = 0;
unsorted = 0;
do {
if((t = *first) < 0) { first -= t; skip += t; }
else {
if(skip != 0) { *(first + skip) = skip; skip = 0; }
last = SA + ISA[t] + 1;
if(1 < (last - first)) {
budget.count = 0;
tr_introsort(ISA, ISAd, SA, first, last, &budget);
if(budget.count != 0) { unsorted += budget.count; }
else { skip = first - last; }
} else if((last - first) == 1) {
skip = -1;
}
first = last;
}
} while(first < (SA + n));
if(skip != 0) { *(first + skip) = skip; }
if(unsorted == 0) { break; }
}
}

View File

@ -1,381 +0,0 @@
/*
* utils.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "divsufsort_private.h"
/*- Private Function -*/
/* Binary search for inverse bwt. */
static
saidx_t
binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
saidx_t half, i;
for(i = 0, half = size >> 1;
0 < size;
size = half, half >>= 1) {
if(A[i + half] < value) {
i += half + 1;
half -= (size & 1) ^ 1;
}
}
return i;
}
/*- Functions -*/
/* Burrows-Wheeler transform. */
saint_t
bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
saidx_t n, saidx_t *idx) {
saidx_t *A, i, j, p, t;
saint_t c;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
if(n <= 1) {
if(n == 1) { U[0] = T[0]; }
*idx = n;
return 0;
}
if((A = SA) == NULL) {
i = divbwt(T, U, NULL, n);
if(0 <= i) { *idx = i; i = 0; }
return (saint_t)i;
}
/* BW transform. */
if(T == U) {
t = n;
for(i = 0, j = 0; i < n; ++i) {
p = t - 1;
t = A[i];
if(0 <= p) {
c = T[j];
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
A[j] = c;
j++;
} else {
*idx = i;
}
}
p = t - 1;
if(0 <= p) {
c = T[j];
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
A[j] = c;
} else {
*idx = i;
}
} else {
U[0] = T[n - 1];
for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
*idx = i + 1;
for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
}
if(SA == NULL) {
/* Deallocate memory. */
free(A);
}
return 0;
}
/* Inverse Burrows-Wheeler transform. */
saint_t
inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
saidx_t n, saidx_t idx) {
saidx_t C[ALPHABET_SIZE];
sauchar_t D[ALPHABET_SIZE];
saidx_t *B;
saidx_t i, p;
saint_t c, d;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
(n < idx) || ((0 < n) && (idx == 0))) {
return -1;
}
if(n <= 1) { return 0; }
if((B = A) == NULL) {
/* Allocate n*sizeof(saidx_t) bytes of memory. */
if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
}
/* Inverse BW transform. */
for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
p = C[c];
if(0 < p) {
C[c] = i;
D[d++] = (sauchar_t)c;
i += p;
}
}
for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
for(i = 0, p = idx; i < n; ++i) {
U[i] = D[binarysearch_lower(C, d, p)];
p = B[p - 1];
}
if(A == NULL) {
/* Deallocate memory. */
free(B);
}
return 0;
}
/* Checks the suffix array SA of the string T. */
saint_t
sufcheck(const sauchar_t *T, const saidx_t *SA,
saidx_t n, saint_t verbose) {
saidx_t C[ALPHABET_SIZE];
saidx_t i, p, q, t;
saint_t c;
if(verbose) { fprintf(stderr, "sufcheck: "); }
/* Check arguments. */
if((T == NULL) || (SA == NULL) || (n < 0)) {
if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
return -1;
}
if(n == 0) {
if(verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
/* check range: [0..n-1] */
for(i = 0; i < n; ++i) {
if((SA[i] < 0) || (n <= SA[i])) {
if(verbose) {
fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
n - 1, i, SA[i]);
}
return -2;
}
}
/* check first characters. */
for(i = 1; i < n; ++i) {
if(T[SA[i - 1]] > T[SA[i]]) {
if(verbose) {
fprintf(stderr, "Suffixes in wrong order.\n"
" T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
" > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
}
return -3;
}
}
/* check suffixes. */
for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
t = C[i];
C[i] = p;
p += t;
}
q = C[T[n - 1]];
C[T[n - 1]] += 1;
for(i = 0; i < n; ++i) {
p = SA[i];
if(0 < p) {
c = T[--p];
t = C[c];
} else {
c = T[p = n - 1];
t = q;
}
if((t < 0) || (p != SA[t])) {
if(verbose) {
fprintf(stderr, "Suffix in wrong position.\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
t, (0 <= t) ? SA[t] : -1, i, SA[i]);
}
return -4;
}
if(t != q) {
++C[c];
if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
}
}
if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
static
int
_compare(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
saidx_t suf, saidx_t *match) {
saidx_t i, j;
saint_t r;
for(i = suf + *match, j = *match, r = 0;
(i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
*match = j;
return (r == 0) ? -(j != Psize) : r;
}
/* Search for the pattern P in the string T. */
saidx_t
sa_search(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
const saidx_t *SA, saidx_t SAsize,
saidx_t *idx) {
saidx_t size, lsize, rsize, half;
saidx_t match, lmatch, rmatch;
saidx_t llmatch, lrmatch, rlmatch, rrmatch;
saidx_t i, j, k;
saint_t r;
if(idx != NULL) { *idx = -1; }
if((T == NULL) || (P == NULL) || (SA == NULL) ||
(Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
if((Tsize == 0) || (SAsize == 0)) { return 0; }
if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
0 < size;
size = half, half >>= 1) {
match = MIN(lmatch, rmatch);
r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
if(r < 0) {
i += half + 1;
half -= (size & 1) ^ 1;
lmatch = match;
} else if(r > 0) {
rmatch = match;
} else {
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
/* left part */
for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
0 < lsize;
lsize = half, half >>= 1) {
lmatch = MIN(llmatch, lrmatch);
r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
if(r < 0) {
j += half + 1;
half -= (lsize & 1) ^ 1;
llmatch = lmatch;
} else {
lrmatch = lmatch;
}
}
/* right part */
for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
0 < rsize;
rsize = half, half >>= 1) {
rmatch = MIN(rlmatch, rrmatch);
r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
if(r <= 0) {
k += half + 1;
half -= (rsize & 1) ^ 1;
rlmatch = rmatch;
} else {
rrmatch = rmatch;
}
}
break;
}
}
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
return k - j;
}
/* Search for the character c in the string T. */
saidx_t
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
const saidx_t *SA, saidx_t SAsize,
saint_t c, saidx_t *idx) {
saidx_t size, lsize, rsize, half;
saidx_t i, j, k, p;
saint_t r;
if(idx != NULL) { *idx = -1; }
if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
if((Tsize == 0) || (SAsize == 0)) { return 0; }
for(i = j = k = 0, size = SAsize, half = size >> 1;
0 < size;
size = half, half >>= 1) {
p = SA[i + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r < 0) {
i += half + 1;
half -= (size & 1) ^ 1;
} else if(r == 0) {
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
/* left part */
for(half = lsize >> 1;
0 < lsize;
lsize = half, half >>= 1) {
p = SA[j + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r < 0) {
j += half + 1;
half -= (lsize & 1) ^ 1;
}
}
/* right part */
for(half = rsize >> 1;
0 < rsize;
rsize = half, half >>= 1) {
p = SA[k + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r <= 0) {
k += half + 1;
half -= (rsize & 1) ^ 1;
}
}
break;
}
}
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
return k - j;
}

View File

@ -1,6 +1,6 @@
# ################################################################
# ZSTD library - Makefile
# Copyright (C) Yann Collet 2015
# Copyright (C) Yann Collet 2015-2016
# All rights reserved.
#
# BSD license
@ -28,7 +28,6 @@
#
# You can contact the author at :
# - ZSTD homepage : http://www.zstd.net
# - ZSTD source repository : https://github.com/Cyan4973/zstd
# ################################################################
# Version numbers
@ -52,7 +51,7 @@ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
LIBDIR ?= $(PREFIX)/lib
INCLUDEDIR=$(PREFIX)/include
ZSTD_FILES := zstd_compress.c zstd_decompress.c fse.c huff0.c
ZSTD_FILES := zstd_compress.c zstd_decompress.c fse.c huff0.c zdict.c divsufsort.c
ZSTD_LEGACY:= legacy/zstd_v01.c legacy/zstd_v02.c legacy/zstd_v03.c legacy/zstd_v04.c
ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
@ -119,6 +118,8 @@ install: libzstd libzstd.pc
@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h
@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zdict.h
@echo zstd static and shared library installed
uninstall:
@ -128,6 +129,8 @@ uninstall:
@[ -x $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
@[ -f $(DESTDIR)$(LIBDIR)/libzstd.a ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.a
@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zstd.h
@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zbuff.h
@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zdict.h
@echo zstd libraries successfully uninstalled
endif

1913
lib/divsufsort.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* config.h for libdivsufsort
* divsufsort.h for libdivsufsort-lite
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
@ -24,60 +24,44 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _CONFIG_H
#define _CONFIG_H 1
#ifndef _DIVSUFSORT_H
#define _DIVSUFSORT_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/** Define to the version of this package. **/
#define PROJECT_VERSION_FULL "2.0.1"
/** Define to 1 if you have the header files. **/
#define HAVE_INTTYPES_H 1
#define HAVE_STDDEF_H 1
#define HAVE_STDINT_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STRING_H 1
#define HAVE_STRINGS_H 1
#define HAVE_MEMORY_H 1
#define HAVE_SYS_TYPES_H 1
/*- Prototypes -*/
/** for WinIO **/
/* #undef HAVE_IO_H */
/* #undef HAVE_FCNTL_H */
/* #undef HAVE__SETMODE */
/* #undef HAVE_SETMODE */
/* #undef HAVE__FILENO */
/* #undef HAVE_FOPEN_S */
/* #undef HAVE__O_BINARY */
/*
#ifndef HAVE__SETMODE
# if HAVE_SETMODE
# define _setmode setmode
# define HAVE__SETMODE 1
# endif
# if HAVE__SETMODE && !HAVE__O_BINARY
# define _O_BINARY 0
# define HAVE__O_BINARY 1
# endif
#endif
*/
/**
* Constructs the suffix array of a given string.
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param n The length of the given string.
* @param openMP enables OpenMP optimization.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
int
divsufsort(const unsigned char *T, int *SA, int n, int openMP);
/** for inline **/
#ifndef INLINE
# define INLINE inline
#endif
/** for VC++ warning **/
#ifdef _MSC_VER
#pragma warning(disable: 4127)
#endif
/**
* Constructs the burrows-wheeler transformed string of a given string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @param num_indexes The length of secondary indexes array. (can be NULL)
* @param indexes The secondary indexes array. (can be NULL)
* @param openMP enables OpenMP optimization.
* @return The primary index if no error occurred, -1 or -2 otherwise.
*/
int
divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _CONFIG_H */
#endif /* _DIVSUFSORT_H */

View File

@ -2133,7 +2133,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
/* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
for (maxW = tableLog; rankStats[maxW]==0; maxW--)
{if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */
/* Get start index of each weight */
{
@ -2465,7 +2466,9 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable is too small */
/* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
for (maxW = tableLog; rankStats[maxW]==0; maxW--)
{ if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */
/* Get start index of each weight */
{

View File

@ -2133,7 +2133,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
/* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
for (maxW = tableLog; rankStats[maxW]==0; maxW--)
{ if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */
/* Get start index of each weight */
{
@ -2465,7 +2466,8 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable is too small */
/* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
for (maxW = tableLog; rankStats[maxW]==0; maxW--)
{ if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */
/* Get start index of each weight */
{

View File

@ -1342,9 +1342,9 @@ typedef struct
MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
{
const void* ptr = dt;
const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
FSE_DTableHeader DTableH;
memcpy(&DTableH, dt, sizeof(DTableH));
DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
BIT_reloadDStream(bitD);
DStatePtr->table = dt + 1;
}
@ -1465,7 +1465,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
/* **************************************************************
* Includes
* Dependencies
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
@ -1499,7 +1499,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
/* **************************************************************
/*-**************************************************************
* Templates
****************************************************************/
/*
@ -1841,9 +1841,11 @@ static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize,
const FSE_DTable* dt)
{
const void* ptr = dt;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
FSE_DTableHeader DTableH;
U32 fastMode;
memcpy(&DTableH, dt, sizeof(DTableH));
fastMode = DTableH.fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
@ -2561,7 +2563,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
/* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
for (maxW = tableLog; rankStats[maxW]==0; maxW--)
{ if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */
/* Get start index of each weight */
{
@ -2889,7 +2892,8 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable is too small */
/* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
for (maxW = tableLog; rankStats[maxW]==0; maxW--)
{ if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */
/* Get start index of each weight */
{
@ -4245,39 +4249,32 @@ static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDs
ip += headerSize;
headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
if (ZSTD_isError(headerSize)) return headerSize;
if (headerSize)
{
if (headerSize) {
/* not enough input to decode header : tell how many bytes would be necessary */
*maxDstSizePtr = 0;
return headerSize - zbc->hPos;
}
// zbc->stage = ZBUFFds_decodeHeader; break; /* useless : stage follows */
}
} }
case ZBUFFds_decodeHeader:
/* apply header to create / resize buffers */
{
size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
size_t neededInSize = BLOCKSIZE; /* a block is never > BLOCKSIZE */
if (zbc->inBuffSize < neededInSize)
{
if (zbc->inBuffSize < neededInSize) {
free(zbc->inBuff);
zbc->inBuffSize = neededInSize;
zbc->inBuff = (char*)malloc(neededInSize);
if (zbc->inBuff == NULL) return ERROR(memory_allocation);
}
if (zbc->outBuffSize < neededOutSize)
{
if (zbc->outBuffSize < neededOutSize) {
free(zbc->outBuff);
zbc->outBuffSize = neededOutSize;
zbc->outBuff = (char*)malloc(neededOutSize);
if (zbc->outBuff == NULL) return ERROR(memory_allocation);
}
}
} }
if (zbc->dictSize)
ZSTD_decompress_insertDictionary(zbc->zc, zbc->dict, zbc->dictSize);
if (zbc->hPos)
{
if (zbc->hPos) {
/* some data already loaded into headerBuffer : transfer into inBuff */
memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
zbc->inPos = zbc->hPos;

View File

@ -86,7 +86,7 @@ extern "C" {
/*-**************************************************************
* Memory I/O
*****************************************************************/
/*!MEM_FORCE_MEMORY_ACCESS
/* MEM_FORCE_MEMORY_ACCESS :
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
* The below switch allow to select different access method for improved performance.
@ -119,11 +119,12 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
/* violates C standard on structure alignment.
/* violates C standard, by lying on structure alignment.
Only use if no other choice to achieve best performance on target platform */
MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
@ -133,11 +134,12 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
@ -163,6 +165,11 @@ MEM_STATIC U64 MEM_read64(const void* memPtr)
U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC size_t MEM_readST(const void* memPtr)
{
size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
}
MEM_STATIC void MEM_write16(void* memPtr, U16 value)
{
memcpy(memPtr, &value, sizeof(value));
@ -178,7 +185,7 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value)
memcpy(memPtr, &value, sizeof(value));
}
#endif // MEM_FORCE_MEMORY_ACCESS
#endif /* MEM_FORCE_MEMORY_ACCESS */
MEM_STATIC U16 MEM_readLE16(const void* memPtr)

View File

@ -41,7 +41,7 @@
#include <stdlib.h>
#include "error_private.h"
#include "zstd_static.h"
#include "zstd_buffered_static.h"
#include "zbuff_static.h"
/* *************************************

View File

@ -1,6 +1,6 @@
/*
Buffered version of Zstd compression library
Copyright (C) 2015, Yann Collet.
Copyright (C) 2015-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -26,14 +26,13 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
- zstd homepage : http://www.zstd.net/
*/
#ifndef ZSTD_BUFFERED_H
#define ZSTD_BUFFERED_H
/* The objects defined into this file should be considered experimental.
* They are not labelled stable, as their prototype may change in the future.
* They are not considered stable, as their prototype may change in the future.
* You can use them for tests, provide feedback, or if you can endure risk of future changes.
*/
@ -42,7 +41,7 @@ extern "C" {
#endif
/* *************************************
* Includes
* Dependencies
***************************************/
#include <stddef.h> /* size_t */
@ -75,7 +74,7 @@ ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* d
ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
/** ************************************************
/*-*************************************************
* Streaming compression
*
* A ZBUFF_CCtx object is required to track streaming operation.
@ -123,12 +122,14 @@ ZSTDLIB_API size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
void* dst, size_t* dstCapacityPtr,
const void* src, size_t* srcSizePtr);
/** ************************************************
/*-***************************************************************************
* Streaming decompression
*
* A ZBUFF_DCtx object is required to track streaming operation.
* A ZBUFF_DCtx object is required to track streaming operations.
* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
* Use ZBUFF_decompressInit() to start a new decompression operation,
* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
@ -143,10 +144,10 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t*
* or 0 when a frame is completely decoded
* or an error code, which can be tested using ZBUFF_isError().
*
* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
* **************************************************/
* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() / ZBUFF_recommendedDOutSize()
* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
* *******************************************************************************/
/* *************************************
@ -155,7 +156,7 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t*
ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode);
ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode);
/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
* These sizes are just hints, and tend to offer better latency */
ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void);
ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void);

View File

@ -1,7 +1,7 @@
/*
zstd - buffered version of compression library
experimental complementary API, for static linking only
Copyright (C) 2015, Yann Collet.
Copyright (C) 2015-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -27,8 +27,7 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
- zstd homepage : http://www.zstd.net
*/
#ifndef ZSTD_BUFFERED_STATIC_H
#define ZSTD_BUFFERED_STATIC_H
@ -46,7 +45,7 @@ extern "C" {
* Includes
***************************************/
#include "zstd_static.h" /* ZSTD_parameters */
#include "zstd_buffered.h"
#include "zbuff.h"
/* *************************************

View File

@ -1,33 +1,41 @@
/*
dictBuilder - dictionary builder for LZ algorithms
dictBuilder - dictionary builder for zstd
Copyright (C) Yann Collet 2016
GPL v2 License
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- Zstd homepage : https://www.zstd.net
*/
/* **************************************
/*-**************************************
* Compiler Options
****************************************/
/* Disable some Visual warning messages */
#ifdef _MSC_VER
# define _CRT_SECURE_NO_WARNINGS /* fopen */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
@ -41,7 +49,7 @@
/*-*************************************
* Includes
* Dependencies
***************************************/
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset */
@ -52,10 +60,11 @@
#include "mem.h" /* read */
#include "error_private.h"
#include "divsufsort.h"
#include "dictBuilder.h"
#include "zstd_compress.c"
#include "fse.h"
#include "huff0_static.h"
#include "zstd_internal.h"
#include "divsufsort.h"
#include "zdict_static.h"
/*-*************************************
@ -74,8 +83,6 @@
#define GB *(1U<<30)
#define DICTLISTSIZE 10000
#define MEMMULT 11
static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
#define NOISELENGTH 32
#define PRIME1 2654435761U
@ -94,16 +101,15 @@ static const size_t g_min_fast_dictContent = 192;
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */
void DiB_setNotificationLevel(unsigned l) { g_displayLevel=l; }
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if (DiB_GetMilliSpan(g_time) > refreshRate) \
if (ZDICT_GetMilliSpan(g_time) > refreshRate) \
{ g_time = clock(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stdout); } }
static const unsigned refreshRate = 300;
static clock_t g_time = 0;
void DiB_printHex(U32 dlevel, const void* ptr, size_t length)
static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
{
const BYTE* const b = (const BYTE*)ptr;
size_t u;
@ -116,98 +122,25 @@ void DiB_printHex(U32 dlevel, const void* ptr, size_t length)
}
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, "\n"); \
exit(error); \
}
/* ********************************************************
/*-********************************************************
* Helper functions
**********************************************************/
unsigned DiB_versionNumber (void) { return DiB_VERSION_NUMBER; }
static unsigned DiB_GetMilliSpan(clock_t nPrevious)
static unsigned ZDICT_GetMilliSpan(clock_t nPrevious)
{
clock_t nCurrent = clock();
unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
return nSpan;
}
unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); }
unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
/* ********************************************************
* File related operations
**********************************************************/
static unsigned long long DiB_getFileSize(const char* infilename)
{
int r;
#if defined(_MSC_VER)
struct _stat64 statbuf;
r = _stat64(infilename, &statbuf);
#else
struct stat statbuf;
r = stat(infilename, &statbuf);
#endif
if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
return (unsigned long long)statbuf.st_size;
}
static unsigned long long DiB_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
{
unsigned long long total = 0;
unsigned n;
for (n=0; n<nbFiles; n++)
total += DiB_getFileSize(fileNamesTable[n]);
return total;
}
static void DiB_loadFiles(void* buffer, size_t bufferSize,
size_t* fileSizes,
const char** fileNamesTable, unsigned nbFiles)
{
char* buff = (char*)buffer;
size_t pos = 0;
unsigned n;
for (n=0; n<nbFiles; n++) {
size_t readSize;
unsigned long long fileSize = DiB_getFileSize(fileNamesTable[n]);
FILE* f = fopen(fileNamesTable[n], "rb");
if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
if (fileSize > bufferSize-pos) fileSize = 0; /* stop there, not enough memory to load all files */
readSize = fread(buff+pos, 1, (size_t)fileSize, f);
if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
pos += readSize;
fileSizes[n] = (size_t)fileSize;
fclose(f);
}
}
const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
/*-********************************************************
* Dictionary training functions
**********************************************************/
static size_t DiB_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
static unsigned DiB_NbCommonBytes (register size_t val)
static unsigned ZDICT_NbCommonBytes (register size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
@ -266,17 +199,17 @@ static unsigned DiB_NbCommonBytes (register size_t val)
}
/*! DiB_count() :
/*! ZDICT_count() :
Count the nb of common bytes between 2 pointers.
Note : this function presumes end of buffer followed by noisy guard band.
*/
static size_t DiB_count(const void* pIn, const void* pMatch)
static size_t ZDICT_count(const void* pIn, const void* pMatch)
{
const char* const pStart = (const char*)pIn;
for (;;) {
size_t diff = DiB_read_ARCH(pMatch) ^ DiB_read_ARCH(pIn);
size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
if (!diff) { pIn = (const char*)pIn+sizeof(size_t); pMatch = (const char*)pMatch+sizeof(size_t); continue; }
pIn = (const char*)pIn+DiB_NbCommonBytes(diff);
pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
return (size_t)((const char*)pIn - pStart);
}
}
@ -288,7 +221,7 @@ typedef struct {
U32 savings;
} dictItem;
void DiB_initDictItem(dictItem* d)
static void ZDICT_initDictItem(dictItem* d)
{
d->pos = 1;
d->length = 0;
@ -298,9 +231,9 @@ void DiB_initDictItem(dictItem* d)
#define LLIMIT 64 /* heuristic determined experimentally */
#define MINMATCHLENGTH 7 /* heuristic determined experimentally */
static dictItem DiB_analyzePos(
static dictItem ZDICT_analyzePos(
BYTE* doneMarks,
const saidx_t* suffix, U32 start,
const int* suffix, U32 start,
const void* buffer, U32 minRatio)
{
U32 lengthList[LLIMIT] = {0};
@ -334,12 +267,12 @@ static dictItem DiB_analyzePos(
/* look forward */
do {
end++;
length = DiB_count(b + pos, b + suffix[end]);
length = ZDICT_count(b + pos, b + suffix[end]);
} while (length >=MINMATCHLENGTH);
/* look backward */
do {
length = DiB_count(b + pos, b + *(suffix+start-1));
length = ZDICT_count(b + pos, b + *(suffix+start-1));
if (length >=MINMATCHLENGTH) start--;
} while(length >= MINMATCHLENGTH);
@ -400,14 +333,14 @@ static dictItem DiB_analyzePos(
/* look forward */
do {
end++;
length = DiB_count(b + pos, b + suffix[end]);
length = ZDICT_count(b + pos, b + suffix[end]);
if (length >= LLIMIT) length = LLIMIT-1;
lengthList[length]++;
} while (length >=MINMATCHLENGTH);
/* look backward */
do {
length = DiB_count(b + pos, b + suffix[start-1]);
length = ZDICT_count(b + pos, b + suffix[start-1]);
if (length >= LLIMIT) length = LLIMIT-1;
lengthList[length]++;
if (length >=MINMATCHLENGTH) start--;
@ -453,7 +386,7 @@ static dictItem DiB_analyzePos(
if (testedPos == pos)
length = solution.length;
else {
length = DiB_count(b+pos, b+testedPos);
length = ZDICT_count(b+pos, b+testedPos);
if (length > solution.length) length = solution.length;
}
pEnd = (U32)(testedPos + length);
@ -465,11 +398,11 @@ static dictItem DiB_analyzePos(
}
/*! DiB_checkMerge
/*! ZDICT_checkMerge
check if dictItem can be merged, do it if possible
@return : id of destination elt, 0 if not merged
*/
static U32 DiB_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
{
const U32 tableSize = table->pos;
const U32 max = elt.pos + (elt.length-1);
@ -513,7 +446,7 @@ static U32 DiB_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
}
static void DiB_removeDictItem(dictItem* table, U32 id)
static void ZDICT_removeDictItem(dictItem* table, U32 id)
{
/* convention : first element is nb of elts */
U32 max = table->pos;
@ -525,15 +458,15 @@ static void DiB_removeDictItem(dictItem* table, U32 id)
}
static void DiB_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
{
/* merge if possible */
U32 mergeId = DiB_checkMerge(table, elt, 0);
U32 mergeId = ZDICT_checkMerge(table, elt, 0);
if (mergeId) {
U32 newMerge = 1;
while (newMerge) {
newMerge = DiB_checkMerge(table, table[mergeId], mergeId);
if (newMerge) DiB_removeDictItem(table, mergeId);
newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
if (newMerge) ZDICT_removeDictItem(table, mergeId);
mergeId = newMerge;
}
return;
@ -555,7 +488,7 @@ static void DiB_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
}
static U32 DiB_dictSize(const dictItem* dictList)
static U32 ZDICT_dictSize(const dictItem* dictList)
{
U32 u, dictSize = 0;
for (u=1; u<dictList[0].pos; u++)
@ -564,32 +497,35 @@ static U32 DiB_dictSize(const dictItem* dictList)
}
static void DiB_trainBuffer(dictItem* dictList, U32 dictListSize,
static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
const void* const buffer, const size_t bufferSize, /* buffer must end with noisy guard band */
const size_t* fileSizes, unsigned nbFiles,
U32 shiftRatio, unsigned maxDictSize)
{
saidx_t* const suffix0 = (saidx_t*)malloc((bufferSize+2)*sizeof(*suffix0));
saidx_t* const suffix = suffix0+1;
int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
int* const suffix = suffix0+1;
U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks)); /* +16 for overflow security */
U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
U32 minRatio = nbFiles >> shiftRatio;
saint_t errorCode;
int divSuftSortResult;
size_t result = 0;
/* init */
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
if (!suffix0 || !reverseSuffix || !doneMarks || !filePos)
EXM_THROW(1, "not enough memory for DiB_trainBuffer");
if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) {
result = ERROR(memory_allocation);
goto _cleanup;
}
if (minRatio < MINRATIO) minRatio = MINRATIO;
memset(doneMarks, 0, bufferSize+16);
/* sort */
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
errorCode = divsufsort((const sauchar_t*)buffer, suffix, (saidx_t)bufferSize);
if (errorCode != 0) EXM_THROW(2, "sort failed");
suffix[bufferSize] = (saidx_t)bufferSize; /* leads into noise */
suffix0[0] = (saidx_t)bufferSize; /* leads into noise */
divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
suffix[bufferSize] = (int)bufferSize; /* leads into noise */
suffix0[0] = (int)bufferSize; /* leads into noise */
{
/* build reverse suffix sort */
size_t pos;
@ -608,9 +544,9 @@ static void DiB_trainBuffer(dictItem* dictList, U32 dictListSize,
U32 cursor; for (cursor=0; cursor < bufferSize; ) {
dictItem solution;
if (doneMarks[cursor]) { cursor++; continue; }
solution = DiB_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
if (solution.length==0) { cursor++; continue; }
DiB_insertDictItem(dictList, dictListSize, solution);
ZDICT_insertDictItem(dictList, dictListSize, solution);
cursor += solution.length;
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
} }
@ -626,33 +562,16 @@ static void DiB_trainBuffer(dictItem* dictList, U32 dictListSize,
dictList->pos = n;
}
_cleanup:
free(suffix0);
free(reverseSuffix);
free(doneMarks);
free(filePos);
return result;
}
static size_t DiB_findMaxMem(unsigned long long requiredMem)
{
size_t step = 8 MB;
void* testmem = NULL;
requiredMem = (((requiredMem >> 23) + 1) << 23);
requiredMem += 2 * step;
if (requiredMem > maxMemory) requiredMem = maxMemory;
while (!testmem) {
requiredMem -= step;
testmem = malloc((size_t)requiredMem);
}
free(testmem);
return (size_t)(requiredMem - step);
}
static void DiB_fillNoise(void* buffer, size_t length)
static void ZDICT_fillNoise(void* buffer, size_t length)
{
unsigned acc = PRIME1;
size_t p=0;;
@ -672,34 +591,36 @@ typedef struct
} EStats_ress_t;
static void DiB_countEStats(EStats_ress_t esr,
static void ZDICT_countEStats(EStats_ress_t esr,
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
const void* src, size_t srcSize)
{
const BYTE* bytePtr;
const U32* u32Ptr;
seqStore_t seqStore;
if (srcSize > BLOCKSIZE) srcSize = BLOCKSIZE; /* protection vs large samples */
ZSTD_copyCCtx(esr.zc, esr.ref);
ZSTD_compressBlock(esr.zc, esr.workPlace, BLOCKSIZE, src, srcSize);
seqStore = ZSTD_copySeqStore(esr.zc);
/* count stats */
for(bytePtr = esr.zc->seqStore.litStart; bytePtr < esr.zc->seqStore.lit; bytePtr++)
for(bytePtr = seqStore.litStart; bytePtr < seqStore.lit; bytePtr++)
countLit[*bytePtr]++;
for(u32Ptr = esr.zc->seqStore.offsetStart; u32Ptr < esr.zc->seqStore.offset; u32Ptr++) {
for(u32Ptr = seqStore.offsetStart; u32Ptr < seqStore.offset; u32Ptr++) {
BYTE offcode = (BYTE)ZSTD_highbit(*u32Ptr) + 1;
if (*u32Ptr==0) offcode=0;
offsetcodeCount[offcode]++;
}
for(bytePtr = esr.zc->seqStore.matchLengthStart; bytePtr < esr.zc->seqStore.matchLength; bytePtr++)
for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++)
matchlengthCount[*bytePtr]++;
for(bytePtr = esr.zc->seqStore.litLengthStart; bytePtr < esr.zc->seqStore.litLength; bytePtr++)
for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++)
litlengthCount[*bytePtr]++;
}
#define OFFCODE_MAX 18
static size_t DiB_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
#define OFFCODE_MAX 18 /* only applicable to first block */
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
unsigned compressionLevel,
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
const void* dictBuffer, size_t dictBufferSize)
@ -726,7 +647,11 @@ static size_t DiB_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
esr.ref = ZSTD_createCCtx();
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(BLOCKSIZE);
if (!esr.ref || !esr.zc || !esr.workPlace) EXM_THROW(30, "Not enough memory");
if (!esr.ref || !esr.zc || !esr.workPlace) {
eSize = ERROR(memory_allocation);
DISPLAYLEVEL(1, "Not enough memory");
goto _cleanup;
}
if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
params = ZSTD_getParams(compressionLevel, dictBufferSize + 15 KB);
params.strategy = ZSTD_greedy;
@ -734,7 +659,7 @@ static size_t DiB_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* collect stats on all files */
for (u=0; u<nbFiles; u++) {
DiB_countEStats(esr,
ZDICT_countEStats(esr,
countLit, offcodeCount, matchLengthCount, litlengthCount,
(const char*)srcBuffer + pos, fileSizes[u]);
pos += fileSizes[u];
@ -742,50 +667,82 @@ static size_t DiB_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* analyze */
errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
if (HUF_isError(errorCode)) EXM_THROW(31, "HUF_buildCTable error");
if (HUF_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "HUF_buildCTable error");
goto _cleanup;
}
huffLog = (U32)errorCode;
total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
if (FSE_isError(errorCode)) EXM_THROW(32, "FSE_normalizeCount error with offcodeCount");
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
goto _cleanup;
}
Offlog = (U32)errorCode;
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
if (FSE_isError(errorCode)) EXM_THROW(33, "FSE_normalizeCount error with matchLengthCount");
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
goto _cleanup;
}
mlLog = (U32)errorCode;
total=0; for (u=0; u<=MaxLL; u++) total+=litlengthCount[u];
errorCode = FSE_normalizeCount(litlengthNCount, llLog, litlengthCount, total, MaxLL);
if (FSE_isError(errorCode)) EXM_THROW(34, "FSE_normalizeCount error with litlengthCount");
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "FSE_normalizeCount error with litlengthCount");
goto _cleanup;
}
llLog = (U32)errorCode;
/* write result to buffer */
errorCode = HUF_writeCTable(dstBuffer, maxDstSize, hufTable, 255, huffLog);
if (HUF_isError(errorCode)) EXM_THROW(41, "HUF_writeCTable error");
if (HUF_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "HUF_writeCTable error");
goto _cleanup;
}
dstBuffer = (char*)dstBuffer + errorCode;
maxDstSize -= errorCode;
eSize += errorCode;
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
if (FSE_isError(errorCode)) EXM_THROW(42, "FSE_writeNCount error with offcodeNCount");
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
goto _cleanup;
}
dstBuffer = (char*)dstBuffer + errorCode;
maxDstSize -= errorCode;
eSize += errorCode;
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, matchLengthNCount, MaxML, mlLog);
if (FSE_isError(errorCode)) EXM_THROW(43, "FSE_writeNCount error with matchLengthNCount");
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
goto _cleanup;
}
dstBuffer = (char*)dstBuffer + errorCode;
maxDstSize -= errorCode;
eSize += errorCode;
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litlengthNCount, MaxLL, llLog);
if (FSE_isError(errorCode)) EXM_THROW(43, "FSE_writeNCount error with litlengthNCount");
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
goto _cleanup;
}
dstBuffer = (char*)dstBuffer + errorCode;
maxDstSize -= errorCode;
eSize += errorCode;
/* clean */
_cleanup:
ZSTD_freeCCtx(esr.ref);
ZSTD_freeCCtx(esr.zc);
free(esr.workPlace);
@ -794,33 +751,16 @@ static size_t DiB_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
}
static void DiB_saveDict(const char* dictFileName,
const void* buff, size_t buffSize)
{
FILE* f;
size_t n;
f = fopen(dictFileName, "wb");
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
n = fwrite(buff, 1, buffSize, f);
if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName)
n = (size_t)fclose(f);
if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName)
}
#define DIB_FASTSEGMENTSIZE 64
/*! DiB_fastSampling (based on an idea by Giuseppe Ottaviano)
Fill @dictBuffer with stripes of size DIB_FASTSEGMENTSIZE from @samplesBuffer
up to @dictSize.
Filling starts from the end of @dictBuffer, down to maximum possible.
if @dictSize is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of @dictBuffer won't be used.
@return : amount of data written into @dictBuffer
or an error Code (if @dictSize or @samplesSize too small)
/*! ZDICT_fastSampling() (based on an idea proposed by Giuseppe Ottaviano) :
Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
up to `dictSize`.
Filling starts from the end of `dictBuffer`, down to maximum possible.
if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
@return : amount of data written into `dictBuffer`,
or an error code
*/
static size_t DiB_fastSampling(void* dictBuffer, size_t dictSize,
static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
const void* samplesBuffer, size_t samplesSize)
{
char* dstPtr = (char*)dictBuffer + dictSize;
@ -851,10 +791,10 @@ static size_t DiB_fastSampling(void* dictBuffer, size_t dictSize,
}
static size_t DiB_trainFromBuffer_internal(
size_t ZDICT_trainFromBuffer_unsafe(
void* dictBuffer, size_t maxDictSize,
const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
DiB_params_t params)
ZDICT_params_t params)
{
const U32 dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@ -869,14 +809,15 @@ static size_t DiB_trainFromBuffer_internal(
/* init */
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
if (!dictList) { DISPLAYLEVEL(1, "not enough memory for DiB_trainFromBuffer"); return ERROR(memory_allocation); }
DiB_initDictItem(dictList);
if (!dictList) return ERROR(memory_allocation);
ZDICT_initDictItem(dictList);
g_displayLevel = params.notificationLevel;
if (selectivity==0) selectivity = g_selectivity_default;
if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
/* select stripes */
if (selectivity>1) {
DiB_trainBuffer(dictList, dictListSize,
/* build dictionary */
if (selectivity>1) { /* selectivity == 1 => fast mode */
ZDICT_trainBuffer(dictList, dictListSize,
samplesBuffer, sBuffSize,
sampleSizes, nbSamples,
selectivity, (U32)targetDictSize);
@ -885,7 +826,7 @@ static size_t DiB_trainFromBuffer_internal(
if (g_displayLevel>= 3) {
const U32 nb = 25;
U32 u;
U32 dictContentSize = DiB_dictSize(dictList);
U32 dictContentSize = ZDICT_dictSize(dictList);
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
DISPLAYLEVEL(3, "list %u best segments \n", nb);
for (u=1; u<=nb; u++) {
@ -894,13 +835,13 @@ static size_t DiB_trainFromBuffer_internal(
U32 d = MIN(40, l);
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
u, l, p, dictList[u].savings);
DiB_printHex(3, (const char*)samplesBuffer+p, d);
ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
DISPLAYLEVEL(3, "| \n");
} } }
/* create dictionary */
{
U32 dictContentSize = DiB_dictSize(dictList);
U32 dictContentSize = ZDICT_dictSize(dictList);
size_t hSize;
BYTE* ptr;
U32 u;
@ -918,7 +859,7 @@ static size_t DiB_trainFromBuffer_internal(
if (selectivity==1) { /* note could also be used to complete a dictionary, but not necessarily better */
DISPLAYLEVEL(3, "\r%70s\r", ""); /* clean display line */
DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
dictContentSize = (U32)DiB_fastSampling((char*)dictBuffer + g_provision_entropySize,
dictContentSize = (U32)ZDICT_fastSampling((char*)dictBuffer + g_provision_entropySize,
targetDictSize, samplesBuffer, sBuffSize);
}
@ -929,7 +870,7 @@ static size_t DiB_trainFromBuffer_internal(
/* entropic tables */
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
DISPLAYLEVEL(2, "statistics ... \n");
hSize += DiB_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4,
hSize += ZDICT_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4,
compressionLevel,
samplesBuffer, sampleSizes, nbSamples,
(char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize);
@ -945,76 +886,38 @@ static size_t DiB_trainFromBuffer_internal(
}
/* issue : samplesBuffer need to be followed by a noisy guard band.
* work around : duplicate the buffer, and add the noise ? */
size_t DiB_trainFromBuffer(void* dictBuffer, size_t maxDictSize,
const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
DiB_params_t params)
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
{
size_t sBuffSize;
void* newBuff;
size_t result;
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
newBuff = malloc(sBuffSize + NOISELENGTH);
if (!newBuff) return ERROR(memory_allocation);
memcpy(newBuff, samplesBuffer, sBuffSize);
DiB_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
result = DiB_trainFromBuffer_internal(dictBuffer, maxDictSize,
newBuff, sampleSizes, nbSamples,
result = ZDICT_trainFromBuffer_unsafe(dictBuffer, dictBufferCapacity,
newBuff, samplesSizes, nbSamples,
params);
free(newBuff);
return result;
}
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
DiB_params_t params)
/* issue : samplesBuffer need to be followed by a noisy guard band.
* work around : duplicate the buffer, and add the noise ? */
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
{
void* srcBuffer;
size_t benchedSize;
size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
unsigned long long totalSizeToLoad = DiB_getTotalFileSize(fileNamesTable, nbFiles);
void* dictBuffer = malloc(maxDictSize);
size_t dictSize;
int result = 0;
/* init */
benchedSize = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
if ((unsigned long long)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
if (benchedSize < totalSizeToLoad)
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
/* Memory allocation & restrictions */
srcBuffer = malloc(benchedSize+NOISELENGTH); /* + noise */
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
/* Load input buffer */
DiB_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
/* call buffer version */
dictSize = DiB_trainFromBuffer_internal(dictBuffer, maxDictSize,
srcBuffer, fileSizes, nbFiles,
params);
if (DiB_isError(dictSize))
{
DISPLAYLEVEL(1, "dictionary training failed : %s", DiB_getErrorName(dictSize)); /* should not happen */
result = 1;
goto _cleanup;
}
/* save dict */
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
DiB_saveDict(dictFileName, dictBuffer, dictSize);
/* clean up */
_cleanup:
free(srcBuffer);
free(dictBuffer);
free(fileSizes);
return result;
ZDICT_params_t params;
memset(&params, 0, sizeof(params));
return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
samplesBuffer, samplesSizes, nbSamples,
params);
}

67
lib/zdict.h Normal file
View File

@ -0,0 +1,67 @@
/*
dictBuilder header file
Copyright (C) Yann Collet 2016
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Zstd source repository : https://www.zstd.net
*/
#ifndef DICTBUILDER_H_001
#define DICTBUILDER_H_001
#if defined (__cplusplus)
extern "C" {
#endif
/*-*************************************
* Public functions
***************************************/
/*! ZDICT_trainFromBuffer() :
Train a dictionary from a memory buffer `samplesBuffer`,
where `nbSamples` samples have been stored concatenated.
Each sample size is provided into an orderly table `samplesSizes`.
Resulting dictionary will be saved into `dictBuffer`.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code, which can be tested by ZDICT_isError().
*/
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
/*-*************************************
* Helper functions
***************************************/
unsigned ZDICT_isError(size_t errorCode);
const char* ZDICT_getErrorName(size_t errorCode);
#if defined (__cplusplus)
}
#endif
#endif

80
lib/zdict_static.h Normal file
View File

@ -0,0 +1,80 @@
/*
dictBuilder header file
for static linking only
Copyright (C) Yann Collet 2016
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Zstd source repository : https://www.zstd.net
*/
/* This library is EXPERIMENTAL, below API is not yet stable */
#ifndef DICTBUILDER_STATIC_H_002
#define DICTBUILDER_STATIC_H_002
#if defined (__cplusplus)
extern "C" {
#endif
/*-*************************************
* Dependencies
***************************************/
#include "zdict.h"
/*-*************************************
* Public type
***************************************/
typedef struct {
unsigned selectivityLevel; /* 0 means default; larger => bigger selection => larger dictionary */
unsigned compressionLevel; /* 0 means default; target a specific zstd compression level */
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned reserved[3]; /* space for future parameters */
} ZDICT_params_t;
/*-*************************************
* Public functions
***************************************/
/*! ZDICT_trainFromBuffer_advanced() :
Same as ZDICT_trainFromBuffer() with control over more parameters.
`parameters` is optional and can be provided with values set to 0 to mean "default".
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`)
or an error code, which can be tested by DiB_isError().
note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using ZDICT_setNotificationLevel()
*/
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
#if defined (__cplusplus)
}
#endif
#endif /* DICTBUILDER_STATIC_H_002 */

View File

@ -61,7 +61,7 @@ extern "C" {
***************************************/
#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
#define ZSTD_VERSION_MINOR 5 /* for new (non-breaking) interface capabilities */
#define ZSTD_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber (void);

View File

@ -48,7 +48,7 @@
#endif
/* *************************************
/*-*************************************
* Dependencies
***************************************/
#include <stdlib.h> /* malloc */
@ -59,36 +59,39 @@
#include "zstd_internal.h"
/* *************************************
/*-*************************************
* Constants
***************************************/
static const U32 g_searchStrength = 8;
/* *************************************
/*-*************************************
* Helper functions
***************************************/
size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
/* *************************************
/*-*************************************
* Sequence storage
***************************************/
typedef struct {
void* buffer;
U32* offsetStart;
U32* offset;
BYTE* offCodeStart;
BYTE* offCode;
BYTE* litStart;
BYTE* lit;
BYTE* litLengthStart;
BYTE* litLength;
BYTE* matchLengthStart;
BYTE* matchLength;
BYTE* dumpsStart;
BYTE* dumps;
} seqStore_t;
/** ZSTD_resetFreqs() : for opt variants */
static void ZSTD_resetFreqs(seqStore_t* ssPtr)
{
unsigned u;
ssPtr->matchLengthSum = 512; // (1<<MLbits);
ssPtr->litLengthSum = 256; // (1<<LLbits);
ssPtr->litSum = (1<<Litbits);
ssPtr->offCodeSum = (1<<Offbits);
for (u=0; u<=MaxLit; u++)
ssPtr->litFreq[u] = 1;
for (u=0; u<=MaxLL; u++)
ssPtr->litLengthFreq[u] = 1;
for (u=0; u<=MaxML; u++)
ssPtr->matchLengthFreq[u] = 1;
for (u=0; u<=MaxOff; u++)
ssPtr->offCodeFreq[u] = 1;
}
static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
{
@ -100,7 +103,7 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
}
/* *************************************
/*-*************************************
* Context memory management
***************************************/
struct ZSTD_CCtx_s
@ -130,7 +133,6 @@ struct ZSTD_CCtx_s
FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
};
ZSTD_CCtx* ZSTD_createCCtx(void)
{
return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx));
@ -143,37 +145,40 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
return 0; /* reserved as a potential error code in the future */
}
seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx)
{
return ctx->seqStore;
}
static unsigned ZSTD_highbit(U32 val);
/** ZSTD_validateParams
correct params value to remain within authorized range
optimize for srcSize if srcSize > 0 */
#define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; }
/** ZSTD_validateParams() :
correct params value to remain within authorized range,
optimize for `srcSize` if srcSize > 0 */
void ZSTD_validateParams(ZSTD_parameters* params)
{
const U32 btPlus = (params->strategy == ZSTD_btlazy2);
const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt);
/* validate params */
if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */
if (params->windowLog > ZSTD_WINDOWLOG_MAX) params->windowLog = ZSTD_WINDOWLOG_MAX;
if (params->windowLog < ZSTD_WINDOWLOG_MIN) params->windowLog = ZSTD_WINDOWLOG_MIN;
CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX);
CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMP(params->searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt;
/* correct params, to use less memory */
if ((params->srcSize > 0) && (params->srcSize < (1<<ZSTD_WINDOWLOG_MAX))) {
U32 srcLog = ZSTD_highbit((U32)(params->srcSize)-1) + 1;
if (params->windowLog > srcLog) params->windowLog = srcLog;
}
if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */
if (params->contentLog < ZSTD_CONTENTLOG_MIN) params->contentLog = ZSTD_CONTENTLOG_MIN;
if (params->hashLog > ZSTD_HASHLOG_MAX) params->hashLog = ZSTD_HASHLOG_MAX;
if (params->hashLog < ZSTD_HASHLOG_MIN) params->hashLog = ZSTD_HASHLOG_MIN;
if (params->searchLog > ZSTD_SEARCHLOG_MAX) params->searchLog = ZSTD_SEARCHLOG_MAX;
if (params->searchLog < ZSTD_SEARCHLOG_MIN) params->searchLog = ZSTD_SEARCHLOG_MIN;
if (params->searchLength> ZSTD_SEARCHLENGTH_MAX) params->searchLength = ZSTD_SEARCHLENGTH_MAX;
if (params->searchLength< ZSTD_SEARCHLENGTH_MIN) params->searchLength = ZSTD_SEARCHLENGTH_MIN;
if ((U32)params->strategy>(U32)ZSTD_btlazy2) params->strategy = ZSTD_btlazy2;
}
@ -184,7 +189,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
/* reserve table memory */
const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32);
const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize);
const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) + ((1<<MLbits) + (1<<LLbits) + (1<<Offbits) + (1<<Litbits))*sizeof(U32);
if (zc->workSpaceSize < neededSpace) {
free(zc->workSpace);
zc->workSpace = malloc(neededSpace);
@ -207,12 +212,20 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
zc->lowLimit = 0;
zc->params = params;
zc->blockSize = blockSize;
zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer);
zc->seqStore.litFreq = (U32*) (zc->seqStore.buffer);
zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<<LLbits);
zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<<MLbits);
zc->seqStore.offsetStart = zc->seqStore.offCodeFreq + (1<<Offbits);
zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2));
zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2);
zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize;
zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2);
zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2);
// zc->seqStore.XXX = zc->seqStore.dumpsStart + (blockSize>>4);
zc->hbSize = 0;
zc->stage = 0;
zc->loadedDictEnd = 0;
@ -528,7 +541,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
const size_t maxCSize = srcSize - minGain;
BYTE* seqHead;
/* Compress literals */
{
size_t cSize;
@ -766,33 +778,9 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B
}
/* *************************************
/*-*************************************
* Match length counter
***************************************/
static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
static unsigned ZSTD_highbit(U32 val)
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return 31 - __builtin_clz(val);
# else /* Software version */
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
int r;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
return r;
# endif
}
static unsigned ZSTD_NbCommonBytes (register size_t val)
{
if (MEM_isLittleEndian()) {
@ -857,20 +845,19 @@ static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLim
const BYTE* const pStart = pIn;
while ((pIn<pInLimit-(sizeof(size_t)-1))) {
size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
pIn += ZSTD_NbCommonBytes(diff);
return (size_t)(pIn - pStart);
}
if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
return (size_t)(pIn - pStart);
}
/** ZSTD_count_2segments
* can count match length with ip & match in potentially 2 different segments.
/** ZSTD_count_2segments() :
* can count match length with `ip` & `match` in 2 different segments.
* convention : on reaching mEnd, match count continue starting from iStart
*/
static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
@ -894,15 +881,15 @@ static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read
static const U64 prime5bytes = 889523592379ULL;
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_read64(p), h); }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
static const U64 prime7bytes = 58295818150454627ULL;
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
{
@ -1009,8 +996,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
continue; /* faster when present ... (?) */
} } }
/* Last Literals */
{
{ /* Last Literals */
size_t lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
@ -1018,7 +1004,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
}
void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
const void* src, size_t srcSize)
{
const U32 mls = ctx->params.searchLength;
@ -1037,8 +1023,7 @@ void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
}
//FORCE_INLINE
void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize,
const U32 mls)
{
@ -1138,7 +1123,7 @@ void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
}
void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
const void* src, size_t srcSize)
{
const U32 mls = ctx->params.searchLength;
@ -1157,11 +1142,11 @@ void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
}
/* *************************************
/*-*************************************
* Binary Tree search
***************************************/
/** ZSTD_insertBt1 : add one or multiple positions to tree
* @ip : assumed <= iend-8
/** ZSTD_insertBt1() : add one or multiple positions to tree.
* ip : assumed <= iend-8 .
* @return : nb of positions added */
static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
U32 extDict)
@ -1187,6 +1172,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
U32 dummy32; /* to be nullified at the end */
const U32 windowLow = zc->lowLimit;
U32 matchEndIdx = current+8;
size_t bestLength = 8;
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
predictedSmall += (predictedSmall>0);
@ -1196,9 +1182,9 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
while (nbCompares-- && (matchIndex > windowLow)) {
U32* nextPtr = bt + 2*(matchIndex & btMask);
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
#if 1 /* note : can create issues when hlog small <= 11 */
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
if (matchIndex == predictedSmall) {
/* no need to check length, result known */
*smallerPtr = matchIndex;
@ -1208,7 +1194,6 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
predictedSmall = predictPtr[1] + (predictPtr[1]>0);
continue;
}
if (matchIndex == predictedLarge) {
*largerPtr = matchIndex;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
@ -1217,7 +1202,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
predictedLarge = predictPtr[0] + (predictPtr[0]>0);
continue;
}
#endif
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex;
if (match[matchLength] == ip[matchLength])
@ -1229,8 +1214,11 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
if (matchLength > bestLength) {
bestLength = matchLength;
if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength;
}
if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
@ -1252,22 +1240,13 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
} }
*smallerPtr = *largerPtr = 0;
return (matchEndIdx > current + 8) ? matchEndIdx - current - 8 : 1;
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));
if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
return 1;
}
static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
{
const BYTE* const base = zc->base;
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
for( ; idx < target ; )
idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
}
FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
size_t ZSTD_insertBtAndFindBestMatch (
static size_t ZSTD_insertBtAndFindBestMatch (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr,
@ -1337,8 +1316,7 @@ size_t ZSTD_insertBtAndFindBestMatch (
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
largerPtr = nextPtr;
matchIndex = nextPtr[0];
}
}
} }
*smallerPtr = *largerPtr = 0;
@ -1347,9 +1325,18 @@ size_t ZSTD_insertBtAndFindBestMatch (
}
static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
{
const BYTE* const base = zc->base;
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
while(idx < target)
idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
}
/** Tree updater, providing best match */
FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
size_t ZSTD_BtFindBestMatch (
static size_t ZSTD_BtFindBestMatch (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
@ -1361,7 +1348,7 @@ size_t ZSTD_BtFindBestMatch (
}
FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS (
static size_t ZSTD_BtFindBestMatch_selectMLS (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
@ -1383,14 +1370,12 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
for( ; idx < target ; )
idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
}
/** Tree updater, providing best match */
FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
size_t ZSTD_BtFindBestMatch_extDict (
static size_t ZSTD_BtFindBestMatch_extDict (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
@ -1402,7 +1387,7 @@ size_t ZSTD_BtFindBestMatch_extDict (
}
FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
@ -1426,7 +1411,8 @@ FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
/* Update chains up to ip (excluded)
Assumption : always within prefix (ie. not within extDict) */
static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
FORCE_INLINE
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
{
U32* const hashTable = zc->hashTable;
const U32 hashLog = zc->params.hashLog;
@ -1665,6 +1651,18 @@ _storeSequence:
}
}
#include "zstd_opt.h"
static void ZSTD_compressBlock_opt_bt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1, 2);
}
static void ZSTD_compressBlock_opt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0, 2);
}
static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
@ -1879,14 +1877,24 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
}
static void ZSTD_compressBlock_opt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0, 2);
}
static void ZSTD_compressBlock_opt_bt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1, 2);
}
typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
{
static const ZSTD_blockCompressor blockCompressor[2][5] = {
{ ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2 },
{ ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict }
static const ZSTD_blockCompressor blockCompressor[2][7] = {
{ ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_opt, ZSTD_compressBlock_opt_bt },
{ ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_opt_extDict, ZSTD_compressBlock_opt_bt_extDict }
};
return blockCompressor[extDict][(U32)strat];
@ -1980,7 +1988,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
/* preemptive overflow correction */
if (zc->lowLimit > (1<<30)) {
U32 btplus = (zc->params.strategy == ZSTD_btlazy2);
U32 btplus = (zc->params.strategy == ZSTD_btlazy2) || (zc->params.strategy == ZSTD_btopt);
U32 contentMask = (1 << (zc->params.contentLog - btplus)) - 1;
U32 newLowLimit = zc->lowLimit & contentMask; /* preserve position % contentSize */
U32 correction = zc->lowLimit - newLowLimit;
@ -2050,10 +2058,12 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
case ZSTD_opt:
ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.searchLength);
break;
case ZSTD_btlazy2:
case ZSTD_btopt:
ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.searchLog, zc->params.searchLength);
break;
@ -2256,106 +2266,112 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi
}
/*- Pre-defined compression levels -*/
/*-===== Pre-defined compression levels =====-*/
#define ZSTD_MAX_CLEVEL 21
unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" */
/* W, C, H, S, L, strat */
{ 0, 18, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */
{ 0, 19, 13, 14, 1, 7, ZSTD_fast }, /* level 1 */
{ 0, 19, 15, 16, 1, 6, ZSTD_fast }, /* level 2 */
{ 0, 20, 18, 20, 1, 6, ZSTD_fast }, /* level 3 */
{ 0, 21, 19, 21, 1, 6, ZSTD_fast }, /* level 4 */
{ 0, 20, 14, 18, 3, 5, ZSTD_greedy }, /* level 5 */
{ 0, 20, 18, 19, 3, 5, ZSTD_greedy }, /* level 6 */
{ 0, 21, 17, 20, 3, 5, ZSTD_lazy }, /* level 7 */
{ 0, 21, 19, 20, 3, 5, ZSTD_lazy }, /* level 8 */
{ 0, 21, 20, 20, 3, 5, ZSTD_lazy2 }, /* level 9 */
{ 0, 21, 19, 21, 4, 5, ZSTD_lazy2 }, /* level 10 */
{ 0, 22, 20, 22, 4, 5, ZSTD_lazy2 }, /* level 11 */
{ 0, 22, 20, 22, 5, 5, ZSTD_lazy2 }, /* level 12 */
{ 0, 22, 21, 22, 5, 5, ZSTD_lazy2 }, /* level 13 */
{ 0, 22, 22, 23, 5, 5, ZSTD_lazy2 }, /* level 14 */
{ 0, 23, 23, 23, 5, 5, ZSTD_lazy2 }, /* level 15 */
{ 0, 23, 21, 22, 5, 5, ZSTD_btlazy2 }, /* level 16 */
{ 0, 23, 24, 23, 4, 5, ZSTD_btlazy2 }, /* level 17 */
{ 0, 25, 24, 23, 5, 5, ZSTD_btlazy2 }, /* level 18 */
{ 0, 25, 26, 23, 5, 5, ZSTD_btlazy2 }, /* level 19 */
{ 0, 26, 27, 25, 9, 5, ZSTD_btlazy2 }, /* level 20 */
/* l, W, C, H, S, L, SL, strat */
{ 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */
{ 0, 19, 13, 14, 1, 7, 4, ZSTD_fast }, /* level 1 */
{ 0, 19, 15, 16, 1, 6, 4, ZSTD_fast }, /* level 2 */
{ 0, 20, 18, 20, 1, 6, 4, ZSTD_fast }, /* level 3 */
{ 0, 21, 19, 21, 1, 6, 4, ZSTD_fast }, /* level 4 */
{ 0, 20, 14, 18, 3, 5, 4, ZSTD_greedy }, /* level 5 */
{ 0, 20, 18, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
{ 0, 21, 17, 20, 3, 5, 4, ZSTD_lazy }, /* level 7 */
{ 0, 21, 19, 20, 3, 5, 4, ZSTD_lazy }, /* level 8 */
{ 0, 21, 20, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 9 */
{ 0, 21, 19, 21, 4, 5, 4, ZSTD_lazy2 }, /* level 10 */
{ 0, 22, 20, 22, 4, 5, 4, ZSTD_lazy2 }, /* level 11 */
{ 0, 22, 20, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 12 */
{ 0, 22, 21, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 13 */
{ 0, 22, 22, 23, 5, 5, 4, ZSTD_lazy2 }, /* level 14 */
{ 0, 23, 23, 23, 5, 5, 4, ZSTD_lazy2 }, /* level 15 */
{ 0, 23, 22, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 16 */
{ 0, 24, 24, 23, 4, 5, 4, ZSTD_btlazy2 }, /* level 17 */
{ 0, 24, 25, 24, 4, 4, 24, ZSTD_btopt }, /* level 18 */
{ 0, 25, 25, 24, 5, 4, 40, ZSTD_btopt }, /* level 19 */
{ 0, 26, 26, 25, 8, 4,256, ZSTD_btopt }, /* level 20 */
{ 0, 26, 27, 25, 10, 4,256, ZSTD_btopt }, /* level 21 */
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, strat */
{ 0, 18, 13, 14, 1, 7, ZSTD_fast }, /* level 0 - never used */
{ 0, 18, 14, 15, 1, 6, ZSTD_fast }, /* level 1 */
{ 0, 18, 14, 15, 1, 5, ZSTD_fast }, /* level 2 */
{ 0, 18, 12, 15, 3, 4, ZSTD_greedy }, /* level 3 */
{ 0, 18, 13, 15, 4, 4, ZSTD_greedy }, /* level 4 */
{ 0, 18, 14, 15, 5, 4, ZSTD_greedy }, /* level 5 */
{ 0, 18, 13, 15, 4, 4, ZSTD_lazy }, /* level 6 */
{ 0, 18, 14, 16, 5, 4, ZSTD_lazy }, /* level 7 */
{ 0, 18, 15, 16, 6, 4, ZSTD_lazy }, /* level 8 */
{ 0, 18, 15, 15, 7, 4, ZSTD_lazy }, /* level 9 */
{ 0, 18, 16, 16, 7, 4, ZSTD_lazy }, /* level 10 */
{ 0, 18, 16, 16, 8, 4, ZSTD_lazy }, /* level 11 */
{ 0, 18, 17, 16, 8, 4, ZSTD_lazy }, /* level 12 */
{ 0, 18, 17, 16, 9, 4, ZSTD_lazy }, /* level 13 */
{ 0, 18, 18, 16, 9, 4, ZSTD_lazy }, /* level 14 */
{ 0, 18, 17, 17, 9, 4, ZSTD_lazy2 }, /* level 15 */
{ 0, 18, 18, 18, 9, 4, ZSTD_lazy2 }, /* level 16 */
{ 0, 18, 18, 18, 10, 4, ZSTD_lazy2 }, /* level 17 */
{ 0, 18, 18, 18, 11, 4, ZSTD_lazy2 }, /* level 18 */
{ 0, 18, 18, 18, 12, 4, ZSTD_lazy2 }, /* level 19 */
{ 0, 18, 18, 18, 13, 4, ZSTD_lazy2 }, /* level 20 */
/* l, W, C, H, S, L, T, strat */
{ 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 */
{ 0, 18, 14, 15, 1, 6, 4, ZSTD_fast }, /* level 1 */
{ 0, 18, 14, 16, 1, 5, 4, ZSTD_fast }, /* level 2 */
{ 0, 18, 14, 17, 1, 5, 4, ZSTD_fast }, /* level 3.*/
{ 0, 18, 14, 15, 4, 4, 4, ZSTD_greedy }, /* level 4 */
{ 0, 18, 16, 17, 4, 4, 4, ZSTD_greedy }, /* level 5 */
{ 0, 18, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
{ 0, 18, 17, 17, 4, 4, 4, ZSTD_lazy }, /* level 7 */
{ 0, 18, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */
{ 0, 18, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */
{ 0, 18, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */
{ 0, 18, 17, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */
{ 0, 18, 18, 17, 4, 4, 4, ZSTD_btlazy2 }, /* level 12 */
{ 0, 18, 19, 17, 7, 4, 4, ZSTD_btlazy2 }, /* level 13.*/
{ 0, 18, 17, 19, 8, 4, 24, ZSTD_btopt }, /* level 14.*/
{ 0, 18, 19, 19, 8, 4, 48, ZSTD_btopt }, /* level 15.*/
{ 0, 18, 19, 18, 9, 4,128, ZSTD_btopt }, /* level 16.*/
{ 0, 18, 19, 18, 9, 4,192, ZSTD_btopt }, /* level 17.*/
{ 0, 18, 19, 18, 9, 4,256, ZSTD_btopt }, /* level 18.*/
{ 0, 18, 19, 18, 10, 4,256, ZSTD_btopt }, /* level 19.*/
{ 0, 18, 19, 18, 11, 4,256, ZSTD_btopt }, /* level 20.*/
{ 0, 18, 19, 18, 12, 4,256, ZSTD_btopt }, /* level 21.*/
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, strat */
{ 0, 17, 12, 12, 1, 4, ZSTD_fast }, /* level 0 - never used */
{ 0, 17, 12, 13, 1, 6, ZSTD_fast }, /* level 1 */
{ 0, 17, 14, 16, 1, 5, ZSTD_fast }, /* level 2 */
{ 0, 17, 15, 17, 1, 5, ZSTD_fast }, /* level 3 */
{ 0, 17, 13, 15, 2, 4, ZSTD_greedy }, /* level 4 */
{ 0, 17, 15, 17, 3, 4, ZSTD_greedy }, /* level 5 */
{ 0, 17, 14, 17, 3, 4, ZSTD_lazy }, /* level 6 */
{ 0, 17, 16, 17, 4, 4, ZSTD_lazy }, /* level 7 */
{ 0, 17, 16, 17, 4, 4, ZSTD_lazy2 }, /* level 8 */
{ 0, 17, 17, 16, 5, 4, ZSTD_lazy2 }, /* level 9 */
{ 0, 17, 17, 16, 6, 4, ZSTD_lazy2 }, /* level 10 */
{ 0, 17, 17, 16, 7, 4, ZSTD_lazy2 }, /* level 11 */
{ 0, 17, 17, 16, 8, 4, ZSTD_lazy2 }, /* level 12 */
{ 0, 17, 18, 16, 4, 4, ZSTD_btlazy2 }, /* level 13 */
{ 0, 17, 18, 16, 5, 4, ZSTD_btlazy2 }, /* level 14 */
{ 0, 17, 18, 16, 6, 4, ZSTD_btlazy2 }, /* level 15 */
{ 0, 17, 18, 16, 7, 4, ZSTD_btlazy2 }, /* level 16 */
{ 0, 17, 18, 16, 8, 4, ZSTD_btlazy2 }, /* level 17 */
{ 0, 17, 18, 16, 9, 4, ZSTD_btlazy2 }, /* level 18 */
{ 0, 17, 18, 16, 10, 4, ZSTD_btlazy2 }, /* level 19 */
{ 0, 17, 18, 18, 12, 4, ZSTD_btlazy2 }, /* level 20 */
/* l, W, C, H, S, L, T, strat */
{ 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */
{ 0, 17, 12, 13, 1, 6, 4, ZSTD_fast }, /* level 1 */
{ 0, 17, 13, 16, 1, 5, 4, ZSTD_fast }, /* level 2 */
{ 0, 17, 13, 14, 2, 5, 4, ZSTD_greedy }, /* level 3 */
{ 0, 17, 13, 15, 3, 4, 4, ZSTD_greedy }, /* level 4 */
{ 0, 17, 15, 17, 4, 4, 4, ZSTD_greedy }, /* level 5 */
{ 0, 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
{ 0, 17, 16, 17, 4, 4, 4, ZSTD_lazy }, /* level 7 */
{ 0, 17, 17, 16, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */
{ 0, 17, 17, 16, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */
{ 0, 17, 17, 16, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */
{ 0, 17, 17, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */
{ 0, 17, 17, 17, 8, 4, 4, ZSTD_lazy2 }, /* level 12 */
{ 0, 17, 17, 17, 9, 4, 4, ZSTD_lazy2 }, /* level 13 */
{ 0, 17, 18, 16, 5, 4, 20, ZSTD_btopt }, /* level 14 */
{ 0, 17, 18, 16, 9, 4, 48, ZSTD_btopt }, /* level 15 */
{ 0, 17, 18, 17, 7, 4,128, ZSTD_btopt }, /* level 16 */
{ 0, 17, 18, 17, 8, 4,128, ZSTD_btopt }, /* level 17 */
{ 0, 17, 18, 17, 8, 4,256, ZSTD_btopt }, /* level 18 */
{ 0, 17, 18, 17, 9, 4,256, ZSTD_btopt }, /* level 19 */
{ 0, 17, 18, 17, 10, 4,512, ZSTD_btopt }, /* level 20 */
{ 0, 17, 18, 17, 11, 4,512, ZSTD_btopt }, /* level 21 */
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, strat */
{ 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */
{ 0, 14, 14, 14, 1, 4, ZSTD_fast }, /* level 1 */
{ 0, 14, 14, 16, 1, 4, ZSTD_fast }, /* level 2 */
{ 0, 14, 14, 14, 5, 4, ZSTD_greedy }, /* level 3 */
{ 0, 14, 14, 14, 8, 4, ZSTD_greedy }, /* level 4 */
{ 0, 14, 11, 14, 6, 4, ZSTD_lazy }, /* level 5 */
{ 0, 14, 14, 13, 6, 5, ZSTD_lazy }, /* level 6 */
{ 0, 14, 14, 14, 7, 6, ZSTD_lazy }, /* level 7 */
{ 0, 14, 14, 14, 8, 4, ZSTD_lazy }, /* level 8 */
{ 0, 14, 14, 15, 9, 4, ZSTD_lazy }, /* level 9 */
{ 0, 14, 14, 15, 10, 4, ZSTD_lazy }, /* level 10 */
{ 0, 14, 15, 15, 6, 4, ZSTD_btlazy2 }, /* level 11 */
{ 0, 14, 15, 15, 7, 4, ZSTD_btlazy2 }, /* level 12 */
{ 0, 14, 15, 15, 8, 4, ZSTD_btlazy2 }, /* level 13 */
{ 0, 14, 15, 15, 9, 4, ZSTD_btlazy2 }, /* level 14 */
{ 0, 14, 15, 15, 10, 4, ZSTD_btlazy2 }, /* level 15 */
{ 0, 14, 15, 15, 11, 4, ZSTD_btlazy2 }, /* level 16 */
{ 0, 14, 15, 15, 12, 4, ZSTD_btlazy2 }, /* level 17 */
{ 0, 14, 15, 15, 13, 4, ZSTD_btlazy2 }, /* level 18 */
{ 0, 14, 15, 15, 14, 4, ZSTD_btlazy2 }, /* level 19 */
{ 0, 14, 15, 15, 15, 4, ZSTD_btlazy2 }, /* level 20 */
/* l, W, C, H, S, L, T, strat */
{ 0, 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 -- never used */
{ 0, 14, 14, 14, 1, 4, 4, ZSTD_fast }, /* level 1 */
{ 0, 14, 14, 15, 1, 4, 4, ZSTD_fast }, /* level 2 */
{ 0, 14, 13, 15, 4, 4, 4, ZSTD_greedy }, /* level 3 */
{ 0, 14, 14, 15, 3, 4, 4, ZSTD_lazy }, /* level 4 */
{ 0, 14, 14, 14, 6, 4, 4, ZSTD_lazy }, /* level 5 */
{ 0, 14, 14, 14, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */
{ 0, 14, 14, 14, 7, 4, 4, ZSTD_lazy2 }, /* level 7 */
{ 0, 14, 14, 14, 8, 4, 4, ZSTD_lazy2 }, /* level 8 */
{ 0, 14, 14, 14, 9, 4, 4, ZSTD_lazy2 }, /* level 9 */
{ 0, 14, 14, 14, 10, 4, 4, ZSTD_lazy2 }, /* level 10 */
{ 0, 14, 14, 14, 11, 4, 4, ZSTD_lazy2 }, /* level 11 */
{ 0, 14, 15, 15, 12, 4, 32, ZSTD_btopt }, /* level 12 */
{ 0, 14, 15, 15, 12, 4, 64, ZSTD_btopt }, /* level 13 */
{ 0, 14, 15, 15, 12, 4, 96, ZSTD_btopt }, /* level 14 */
{ 0, 14, 15, 15, 12, 4,128, ZSTD_btopt }, /* level 15 */
{ 0, 14, 15, 15, 12, 4,256, ZSTD_btopt }, /* level 16 */
{ 0, 14, 15, 15, 13, 4,256, ZSTD_btopt }, /* level 17 */
{ 0, 14, 15, 15, 14, 4,256, ZSTD_btopt }, /* level 18 */
{ 0, 14, 15, 15, 15, 4,256, ZSTD_btopt }, /* level 19 */
{ 0, 14, 15, 15, 16, 4,256, ZSTD_btopt }, /* level 20 */
{ 0, 14, 15, 15, 17, 4,256, ZSTD_btopt }, /* level 21 */
},
};
@ -2368,6 +2384,9 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint)
int tableID = ((srcSizeHint-1) <= 256 KB) + ((srcSizeHint-1) <= 128 KB) + ((srcSizeHint-1) <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
if (compressionLevel<=0) compressionLevel = 1;
if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
#if ZSTD_OPT_DEBUG >= 1
tableID=0;
#endif
result = ZSTD_defaultParameters[tableID][compressionLevel];
result.srcSize = srcSizeHint;
return result;

View File

@ -612,6 +612,8 @@ typedef struct {
const BYTE* dumpsEnd;
} seqState_t;
static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
{
size_t litLength;

View File

@ -32,11 +32,7 @@
#ifndef ZSTD_CCOMMON_H_MODULE
#define ZSTD_CCOMMON_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/* *************************************
/*-*************************************
* Dependencies
***************************************/
#include "mem.h"
@ -44,17 +40,16 @@ extern "C" {
#include "zstd_static.h"
/* *************************************
/*-*************************************
* Common macros
***************************************/
#define MIN(a,b) ((a)<(b) ? (a) : (b))
#define MAX(a,b) ((a)>(b) ? (a) : (b))
/* *************************************
/*-*************************************
* Common constants
***************************************/
#define ZSTD_MAGICNUMBER 0xFD2FB525 /* v0.5 */
#define ZSTD_DICT_MAGIC 0xEC30A435
#define KB *(1 <<10)
@ -82,9 +77,11 @@ static const size_t ZSTD_frameHeaderSize_min = 5;
#define MINMATCH 4
#define REPCODE_STARTVALUE 1
#define Litbits 8
#define MLbits 7
#define LLbits 6
#define Offbits 5
#define MaxLit ((1<<Litbits) - 1)
#define MaxML ((1<<MLbits) - 1)
#define MaxLL ((1<<LLbits) - 1)
#define MaxOff ((1<<Offbits)- 1)
@ -128,9 +125,58 @@ MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
while (op < oend);
}
#if defined (__cplusplus)
MEM_STATIC unsigned ZSTD_highbit(U32 val)
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return 31 - __builtin_clz(val);
# else /* Software version */
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
int r;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
return r;
# endif
}
#endif
/*-*******************************************
* Private interfaces
*********************************************/
typedef struct {
void* buffer;
U32* offsetStart;
U32* offset;
BYTE* offCodeStart;
BYTE* offCode;
BYTE* litStart;
BYTE* lit;
BYTE* litLengthStart;
BYTE* litLength;
BYTE* matchLengthStart;
BYTE* matchLength;
BYTE* dumpsStart;
BYTE* dumps;
/* opt */
U32* matchLengthFreq;
U32* litLengthFreq;
U32* litFreq;
U32* offCodeFreq;
U32 matchLengthSum;
U32 litLengthSum;
U32 litSum;
U32 offCodeSum;
} seqStore_t;
seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx);
#endif /* ZSTD_CCOMMON_H_MODULE */

1125
lib/zstd_opt.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -27,14 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- zstd homepage : http://www.zstd.net
*/
#ifndef ZSTD_STATIC_H
#define ZSTD_STATIC_H
/* The objects defined into this file shall be considered experimental.
* They are not considered stable, as their prototype may change in the future.
* You can use them for tests, provide feedback, or if you can endure risks of future changes.
/* The prototypes defined within this file are considered experimental.
* They should not be used in the context DLL as they may change in the future.
* Prefer static linking if you need them, to control breaking version changes issues.
*/
#if defined (__cplusplus)
@ -48,6 +48,12 @@ extern "C" {
#include "mem.h"
/*-*************************************
* Constants
***************************************/
#define ZSTD_MAGICNUMBER 0xFD2FB525 /* v0.5 */
/*-*************************************
* Types
***************************************/
@ -57,31 +63,33 @@ extern "C" {
#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
#define ZSTD_CONTENTLOG_MIN 4
#define ZSTD_HASHLOG_MAX 28
#define ZSTD_HASHLOG_MIN 4
#define ZSTD_HASHLOG_MIN 12
#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
#define ZSTD_SEARCHLOG_MIN 1
#define ZSTD_SEARCHLENGTH_MAX 7
#define ZSTD_SEARCHLENGTH_MIN 4
#define ZSTD_TARGETLENGTH_MIN 4
#define ZSTD_TARGETLENGTH_MAX 999
/** from faster to stronger */
typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
/* from faster to stronger */
typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_opt, ZSTD_btopt } ZSTD_strategy;
typedef struct
{
U64 srcSize; /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */
U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */
U32 hashLog; /* dispatch table : larger == more memory, faster */
U32 hashLog; /* dispatch table : larger == faster, more memory */
U32 searchLog; /* nb of searches : larger == more compression, slower */
U32 searchLength; /* size of matches : larger == faster decompression, sometimes less compression */
U32 searchLength; /* match length searched : larger == faster decompression, sometimes less compression */
U32 targetLength; /* acceptable match size for optimal parser (only) : larger == more compression, slower */
ZSTD_strategy strategy;
} ZSTD_parameters;
/* *************************************
/*-*************************************
* Advanced functions
***************************************/
#define ZSTD_MAX_CLEVEL 20
ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
/*! ZSTD_getParams() :
@ -203,7 +211,7 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
* Block functions
****************************************/
/*! Block functions produce and decode raw zstd blocks, without frame metadata.
User will have to save and regenerate necessary information to regenerate data, such as block sizes.
User will have to take in charge required information to regenerate data, such as block sizes.
A few rules to respect :
- Uncompressed block size must be <= 128 KB
@ -224,13 +232,13 @@ size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, cons
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* *************************************
/*-*************************************
* Error management
***************************************/
#include "error_public.h"
/*! ZSTD_getErrorCode() :
convert a `size_t` function result into a `ZSTD_error_code` enum type,
which can be used to compare directly with enum list within "error_public.h" */
which can be used to compare directly with enum list published into "error_public.h" */
ZSTD_ErrorCode ZSTD_getError(size_t code);

View File

@ -1,6 +1,6 @@
# ##########################################################################
# ZSTD programs - Makefile
# Copyright (C) Yann Collet 2015
# Copyright (C) Yann Collet 2015-2016
#
# GPL v2 License
#
@ -19,13 +19,14 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# You can contact the author at :
# - ZSTD source repository : http://code.google.com/p/zstd/
# - Public forum : https://groups.google.com/forum/#!forum/lz4c
# - zstd homepage : http://www.zstd.net/
# ##########################################################################
# zstd : Command Line Utility, supporting gzip-like arguments
# datagen : Synthetic and parametrable data generator, for tests
# fuzzer : Test tool, to check zstd integrity on target platform
# fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode
# zbufftest : Test tool, to check ZBUFF integrity on target platform
# zbufftest32: Same as zbufftest, but forced to compile in 32-bits mode
# fullbench : Precisely measure speed for each zstd inner function
# fullbench32: Same as fullbench, but forced to compile in 32-bits mode
# ##########################################################################
@ -52,15 +53,15 @@ BINDIR = $(PREFIX)/bin
MANDIR = $(PREFIX)/share/man/man1
ZSTDDIR = ../lib
ZSTD_FILES := $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c
ZSTD_LEGACY:= $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c $(ZSTDDIR)/legacy/zstd_v03.c $(ZSTDDIR)/legacy/zstd_v04.c
ZSTD_FILES := $(ZSTDDIR)/huff0.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c
ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
CPPFLAGS += -DZSTD_LEGACY_SUPPORT=0
ZSTD_FILES_LEGACY:=
else
ZSTD_FILES+= $(ZSTD_LEGACY)
CPPFLAGS += -I../lib/legacy -I./legacy -DZSTD_LEGACY_SUPPORT=1
ZSTD_FILEIO_LEGACY = legacy/fileio_legacy.c
ZSTD_LEGACY_SUPPORT:=1
CPPFLAGS += -I../lib/legacy -I./legacy
ZSTD_FILES_LEGACY:= $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c $(ZSTDDIR)/legacy/zstd_v03.c $(ZSTDDIR)/legacy/zstd_v04.c legacy/fileio_legacy.c
endif
@ -75,6 +76,7 @@ endif
ZBUFFTEST = -T2mn
FUZZERTEST= -T5mn
ZSTDRTTEST= --test-large-data
.PHONY: default all clean install uninstall test test32 test-all
@ -82,29 +84,33 @@ default: zstd
all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 paramgrill datagen
zstd : $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY) bench.c xxhash.c datagen.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
zstd : $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \
zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c
$(CC) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)
zstd32: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY) bench.c xxhash.c datagen.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
zstd32: $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \
zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c
$(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)
zstd_nolegacy :
$(MAKE) zstd ZSTD_LEGACY_SUPPORT=0
zstd-pgo : MOREFLAGS = -fprofile-generate
zstd-pgo : clean zstd
./zstd -b19i1 $(PROFILE_WITH)
./zstd -b16i1 $(PROFILE_WITH)
./zstd -b9i2 $(PROFILE_WITH)
./zstd -b $(PROFILE_WITH)
./zstd -b7i2 $(PROFILE_WITH)
./zstd -b5 $(PROFILE_WITH)
rm zstd
$(MAKE) zstd MOREFLAGS=-fprofile-use
zstd-noBench: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY)
$(CC) $(FLAGS) -DZSTD_NOBENCH $^ -o zstd$(EXT)
zstd-frugal: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c zstdcli.c fileio.c
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_LEGACY_SUPPORT=0 $^ -o zstd$(EXT)
zstd-frugal: clean
$(MAKE) zstd-noBench ZSTD_LEGACY_SUPPORT=0
zstd-small: clean
CFLAGS="-Os -s" $(MAKE) zstd-frugal
fullbench : $(ZSTD_FILES) \
datagen.c fullbench.c
@ -122,11 +128,11 @@ fuzzer32: $(ZSTD_FILES) \
datagen.c xxhash.c fuzzer.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
zbufftest : $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
zbufftest : $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
datagen.c xxhash.c zbufftest.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
zbufftest32: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
zbufftest32: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
datagen.c xxhash.c zbufftest.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
@ -138,7 +144,7 @@ datagen : datagen.c datagencli.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
clean:
@rm -f core *.o tmp* result* *.gcda \
@rm -f core *.o tmp* result* *.gcda dictionary *.zst \
zstd$(EXT) zstd32$(EXT) \
fullbench$(EXT) fullbench32$(EXT) \
fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \
@ -178,7 +184,7 @@ test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zbuff32
test-all: test test32 valgrindTest
zstd-playTests: datagen
ZSTD=$(ZSTD) ./playTests.sh --test-large-data
ZSTD=$(ZSTD) ./playTests.sh $(ZSTDRTTEST)
test-zstd: ZSTD = ./zstd
test-zstd: zstd zstd-playTests
@ -213,12 +219,12 @@ valgrindTest: zstd datagen fuzzer fullbench zbufftest
@echo "\n ---- valgrind tests : memory analyzer ----"
valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID)
./datagen -g16KB > tmp
valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID)
valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID)
./datagen -g2930KB > tmp
valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp tmp2
valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 $(VOID)
valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp -o tmp2
valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 -o $(VOID)
./datagen -g64MB > tmp
valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID)
valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID)
@rm tmp
valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -T1mn -t1
valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1

277
programs/dibio.c Normal file
View File

@ -0,0 +1,277 @@
/*
dibio - I/O API for dictionary builder
Copyright (C) Yann Collet 2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd homepage : http://www.zstd.net/
*/
/*-**************************************
* Compiler Options
****************************************/
/* Disable some Visual warning messages */
#ifdef _MSC_VER
# define _CRT_SECURE_NO_WARNINGS /* fopen */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
/* Unix Large Files support (>4GB) */
#define _FILE_OFFSET_BITS 64
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
# define _LARGEFILE_SOURCE
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
# define _LARGEFILE64_SOURCE
#endif
/*-*************************************
* Includes
***************************************/
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset */
#include <stdio.h> /* fprintf, fopen, ftello64 */
#include <sys/types.h> /* stat64 */
#include <sys/stat.h> /* stat64 */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "error_private.h"
#include "zdict_static.h"
/*-*************************************
* Compiler specifics
***************************************/
#if !defined(S_ISREG)
# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#endif
/*-*************************************
* Constants
***************************************/
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define DICTLISTSIZE 10000
#define MEMMULT 11
static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
#define NOISELENGTH 32
#define PRIME1 2654435761U
#define PRIME2 2246822519U
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, "\n"); \
exit(error); \
}
/* ********************************************************
* Helper functions
**********************************************************/
unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); }
const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
/* ********************************************************
* File related operations
**********************************************************/
static unsigned long long DiB_getFileSize(const char* infilename)
{
int r;
#if defined(_MSC_VER)
struct _stat64 statbuf;
r = _stat64(infilename, &statbuf);
#else
struct stat statbuf;
r = stat(infilename, &statbuf);
#endif
if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
return (unsigned long long)statbuf.st_size;
}
static unsigned long long DiB_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
{
unsigned long long total = 0;
unsigned n;
for (n=0; n<nbFiles; n++)
total += DiB_getFileSize(fileNamesTable[n]);
return total;
}
static void DiB_loadFiles(void* buffer, size_t bufferSize,
size_t* fileSizes,
const char** fileNamesTable, unsigned nbFiles)
{
char* buff = (char*)buffer;
size_t pos = 0;
unsigned n;
for (n=0; n<nbFiles; n++) {
size_t readSize;
unsigned long long fileSize = DiB_getFileSize(fileNamesTable[n]);
FILE* f = fopen(fileNamesTable[n], "rb");
if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
if (fileSize > bufferSize-pos) fileSize = 0; /* stop there, not enough memory to load all files */
readSize = fread(buff+pos, 1, (size_t)fileSize, f);
if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
pos += readSize;
fileSizes[n] = (size_t)fileSize;
fclose(f);
}
}
/*-********************************************************
* Dictionary training functions
**********************************************************/
static size_t DiB_findMaxMem(unsigned long long requiredMem)
{
size_t step = 8 MB;
void* testmem = NULL;
requiredMem = (((requiredMem >> 23) + 1) << 23);
requiredMem += 2 * step;
if (requiredMem > maxMemory) requiredMem = maxMemory;
while (!testmem) {
requiredMem -= step;
testmem = malloc((size_t)requiredMem);
}
free(testmem);
return (size_t)(requiredMem - step);
}
static void DiB_fillNoise(void* buffer, size_t length)
{
unsigned acc = PRIME1;
size_t p=0;;
for (p=0; p<length; p++) {
acc *= PRIME2;
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
}
}
static void DiB_saveDict(const char* dictFileName,
const void* buff, size_t buffSize)
{
FILE* f;
size_t n;
f = fopen(dictFileName, "wb");
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
n = fwrite(buff, 1, buffSize, f);
if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName)
n = (size_t)fclose(f);
if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName)
}
/*! ZDICT_trainFromBuffer_unsafe() :
Strictly Internal use only !!
Same as ZDICT_trainFromBuffer_advanced(), but does not control `samplesBuffer`.
`samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code.
*/
size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
ZDICT_params_t params)
{
void* srcBuffer;
size_t benchedSize;
size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
unsigned long long totalSizeToLoad = DiB_getTotalFileSize(fileNamesTable, nbFiles);
void* dictBuffer = malloc(maxDictSize);
size_t dictSize;
int result = 0;
/* init */
g_displayLevel = params.notificationLevel;
benchedSize = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
if ((unsigned long long)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
if (benchedSize < totalSizeToLoad)
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
/* Memory allocation & restrictions */
srcBuffer = malloc(benchedSize+NOISELENGTH); /* + noise */
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
/* Load input buffer */
DiB_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
/* call buffer version */
dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
srcBuffer, fileSizes, nbFiles,
params);
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;
goto _cleanup;
}
/* save dict */
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
DiB_saveDict(dictFileName, dictBuffer, dictSize);
/* clean up */
_cleanup:
free(srcBuffer);
free(dictBuffer);
free(fileSizes);
return result;
}

52
programs/dibio.h Normal file
View File

@ -0,0 +1,52 @@
/*
dibio.h - I/O API for dictionary builder
Copyright (C) Yann Collet 2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd homepage : http://www.zstd.net/
*/
/* This library is designed for a single-threaded console application.
* It exit() and printf() into stderr when it encounters an error condition. */
#ifndef DIBIO_H_003
#define DIBIO_H_003
/*-*************************************
* Dependencies
***************************************/
#include "zdict_static.h" /* ZDICT_params_t */
/*-*************************************
* Public functions
***************************************/
/*! DiB_trainFromFiles() :
Train a dictionary from a set of files provided by `fileNamesTable`.
Resulting dictionary is written into file `dictFileName`.
`parameters` is optional and can be provided with values set to 0, meaning "default".
@return : 0 == ok. Any other : error.
*/
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
ZDICT_params_t parameters);
#endif

View File

@ -1,6 +1,6 @@
/*
fileio.c - File i/o handler
Copyright (C) Yann Collet 2013-2015
fileio.c - File i/o handler for zstd
Copyright (C) Yann Collet 2013-2016
GPL v2 License
@ -19,8 +19,7 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- Public forum : https://groups.google.com/forum/#!forum/lz4c
- zstd homepage : http://www.zstd.net
*/
/*
Note : this is stand-alone program.
@ -33,7 +32,7 @@
* Tuning options
***************************************/
#ifndef ZSTD_LEGACY_SUPPORT
/**LEGACY_SUPPORT :
/* LEGACY_SUPPORT :
* decompressor can decode older formats (starting from Zstd 0.1+) */
# define ZSTD_LEGACY_SUPPORT 1
#endif
@ -53,7 +52,7 @@
#define _POSIX_SOURCE 1 /* enable fileno() within <stdio.h> on unix */
/* *************************************
/*-*************************************
* Includes
***************************************/
#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */
@ -66,23 +65,20 @@
#include "mem.h"
#include "fileio.h"
#include "zstd_static.h" /* ZSTD_magicNumber */
#include "zstd_buffered_static.h"
#include "zbuff_static.h"
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
# include "zstd_legacy.h" /* legacy */
# include "fileio_legacy.h" /* legacy */
# include "zstd_legacy.h" /* ZSTD_isLegacy */
# include "fileio_legacy.h" /* FIO_decompressLegacyFrame */
#endif
/* *************************************
/*-*************************************
* OS-specific Includes
***************************************/
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
# include <fcntl.h> /* _O_BINARY */
# include <io.h> /* _setmode, _isatty */
# ifdef __MINGW32__
// int _fileno(FILE *stream); /* seems no longer useful /* MINGW somehow forgets to include this windows declaration into <stdio.h> */
# endif
# define SET_BINARY_MODE(file) { int unused = _setmode(_fileno(file), _O_BINARY); (void)unused; }
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
#else
@ -96,7 +92,7 @@
#endif
/* *************************************
/*-*************************************
* Constants
***************************************/
#define KB *(1U<<10)
@ -116,14 +112,15 @@
#define BLOCKSIZE (128 KB)
#define ROLLBUFFERSIZE (BLOCKSIZE*8*64)
#define FIO_FRAMEHEADERSIZE 5 /* as a define, because needed to allocated table on stack */
#define FSE_CHECKSUM_SEED 0
#define FIO_FRAMEHEADERSIZE 5 /* as a define, because needed to allocated table on stack */
#define FSE_CHECKSUM_SEED 0
#define CACHELINE 64
#define MAX_DICT_SIZE (512 KB)
#define MAX_DICT_SIZE (1 MB) /* protection against large input (attack scenario) ; can be changed */
/* *************************************
/*-*************************************
* Macros
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
@ -137,17 +134,18 @@ static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result
static const unsigned refreshRate = 150;
static clock_t g_time = 0;
#define MAX(a,b) ((a)>(b)?(a):(b))
/* *************************************
/*-*************************************
* Local Parameters
***************************************/
static U32 g_overwrite = 0;
void FIO_overwriteMode(void) { g_overwrite=1; }
void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
/* *************************************
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
@ -164,7 +162,7 @@ void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
}
/* *************************************
/*-*************************************
* Functions
***************************************/
static unsigned FIO_GetMilliSpan(clock_t nPrevious)
@ -190,54 +188,57 @@ static U64 FIO_getFileSize(const char* infilename)
}
static int FIO_getFiles(FILE** fileOutPtr, FILE** fileInPtr,
const char* dstFileName, const char* srcFileName)
static FILE* FIO_openSrcFile(const char* srcFileName)
{
FILE* f;
if (!strcmp (srcFileName, stdinmark)) {
DISPLAYLEVEL(4,"Using stdin for input\n");
*fileInPtr = stdin;
f = stdin;
SET_BINARY_MODE(stdin);
} else {
*fileInPtr = fopen(srcFileName, "rb");
f = fopen(srcFileName, "rb");
}
if ( *fileInPtr==0 ) {
DISPLAYLEVEL(1, "Unable to access file for processing: %s\n", srcFileName);
return 1;
}
if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: No such file\n", srcFileName);
return f;
}
static FILE* FIO_openDstFile(const char* dstFileName)
{
FILE* f;
if (!strcmp (dstFileName, stdoutmark)) {
DISPLAYLEVEL(4,"Using stdout for output\n");
*fileOutPtr = stdout;
f = stdout;
SET_BINARY_MODE(stdout);
} else {
if (!g_overwrite) { /* Check if destination file already exists */
*fileOutPtr = fopen( dstFileName, "rb" );
if (*fileOutPtr != 0) { /* dest file exists, prompt for overwrite authorization */
fclose(*fileOutPtr);
DISPLAY("Warning : %s already exists \n", dstFileName);
if ((g_displayLevel <= 1) || (*fileInPtr == stdin)) {
f = fopen( dstFileName, "rb" );
if (f != 0) { /* dest file exists, prompt for overwrite authorization */
fclose(f);
if (g_displayLevel <= 1) {
/* No interaction possible */
DISPLAY("Operation aborted : %s already exists \n", dstFileName);
return 1;
DISPLAY("zstd: %s already exists; not overwritten \n", dstFileName);
return 0;
}
DISPLAY("Overwrite ? (y/N) : ");
DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
{
int ch = getchar();
if ((ch!='Y') && (ch!='y')) {
DISPLAY("No. Operation aborted : %s already exists \n", dstFileName);
return 1;
DISPLAY(" not overwritten \n");
return 0;
}
while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */
} } }
*fileOutPtr = fopen( dstFileName, "wb" );
f = fopen( dstFileName, "wb" );
}
if (*fileOutPtr==0) EXM_THROW(13, "Pb opening %s", dstFileName);
return 0;
return f;
}
/*!FIO_loadFile
* creates a buffer, pointed by *bufferPtr,
* loads "filename" content into it
@ -284,6 +285,8 @@ typedef struct {
void* dictBuffer;
size_t dictBufferSize;
ZBUFF_CCtx* ctx;
FILE* dstFile;
FILE* srcFile;
} cRess_t;
static cRess_t FIO_createCResources(const char* dictFileName)
@ -317,27 +320,24 @@ static void FIO_freeCResources(cRess_t ress)
}
/*
* FIO_compressFilename_extRess()
* result : 0 : compression completed correctly
* 1 : missing or pb opening srcFileName
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with ress.desFile already opened
* @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName
*/
static int FIO_compressFilename_extRess(cRess_t ress,
const char* dstFileName, const char* srcFileName,
int cLevel)
static int FIO_compressFilename_internal(cRess_t ress,
const char* dstFileName, const char* srcFileName,
int cLevel)
{
FILE* srcFile;
FILE* dstFile;
FILE* srcFile = ress.srcFile;
FILE* dstFile = ress.dstFile;
U64 filesize = 0;
U64 compressedfilesize = 0;
size_t dictSize = ress.dictBufferSize;
size_t sizeCheck, errorCode;
/* File check */
if (FIO_getFiles(&dstFile, &srcFile, dstFileName, srcFileName)) return 1;
/* init */
filesize = FIO_getFileSize(srcFileName) + dictSize;
filesize = MAX(FIO_getFileSize(srcFileName),dictSize);
errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, ZSTD_getParams(cLevel, filesize));
if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode));
@ -350,8 +350,7 @@ static int FIO_compressFilename_extRess(cRess_t ress,
filesize += inSize;
DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20));
{
/* Compress (buffered streaming ensures appropriate formatting) */
{ /* Compress using buffered streaming */
size_t usedInSize = inSize;
size_t cSize = ress.dstBufferSize;
size_t result = ZBUFF_compressContinue(ress.ctx, ress.dstBuffer, &cSize, ress.srcBuffer, &usedInSize);
@ -366,7 +365,6 @@ static int FIO_compressFilename_extRess(cRess_t ress,
if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
compressedfilesize += cSize;
}
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
}
@ -386,14 +384,56 @@ static int FIO_compressFilename_extRess(cRess_t ress,
DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
(unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
/* clean */
fclose(srcFile);
if (fclose(dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
return 0;
}
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with ress.desFile already opened
* @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName
*/
static int FIO_compressFilename_srcFile(cRess_t ress,
const char* dstFileName, const char* srcFileName,
int cLevel)
{
int result;
/* File check */
ress.srcFile = FIO_openSrcFile(srcFileName);
if (!ress.srcFile) return 1; /* srcFile could not be opened */
result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel);
/* clean */
fclose(ress.srcFile);
return result;
}
/*! FIO_compressFilename_extRess() :
* @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName
*/
static int FIO_compressFilename_extRess(cRess_t ress,
const char* dstFileName, const char* srcFileName,
int cLevel)
{
int result;
ress.srcFile = FIO_openSrcFile(srcFileName);
if (ress.srcFile==0) return 1;
ress.dstFile = FIO_openDstFile(dstFileName);
if (ress.dstFile==0) { fclose(ress.srcFile); return 1; }
result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel);
fclose(ress.srcFile); /* no pb to expect : only reading */
if (fclose(ress.dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
return result;
}
int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
const char* dictFileName, int compressionLevel)
{
@ -431,21 +471,28 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
int missed_files = 0;
char* dstFileName = (char*)malloc(FNSPACE);
size_t dfnSize = FNSPACE;
const size_t suffixSize = strlen(suffix);
const size_t suffixSize = suffix ? strlen(suffix) : 0;
cRess_t ress;
/* init */
ress = FIO_createCResources(dictFileName);
/* loop on each file */
for (u=0; u<nbFiles; u++) {
size_t ifnSize = strlen(inFileNamesTable[u]);
if (dfnSize <= ifnSize+suffixSize+1) { free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); }
strcpy(dstFileName, inFileNamesTable[u]);
strcat(dstFileName, suffix);
missed_files += FIO_compressFilename_extRess(ress, dstFileName, inFileNamesTable[u], compressionLevel);
}
if (!strcmp(suffix, stdoutmark)) {
ress.dstFile = stdout;
for (u=0; u<nbFiles; u++)
missed_files += FIO_compressFilename_srcFile(ress, stdoutmark,
inFileNamesTable[u], compressionLevel);
if (fclose(ress.dstFile)) EXM_THROW(29, "Write error : cannot properly close %s", stdoutmark);
} else {
for (u=0; u<nbFiles; u++) {
size_t ifnSize = strlen(inFileNamesTable[u]);
if (dfnSize <= ifnSize+suffixSize+1) { free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); }
strcpy(dstFileName, inFileNamesTable[u]);
strcat(dstFileName, suffix);
missed_files += FIO_compressFilename_extRess(ress, dstFileName,
inFileNamesTable[u], compressionLevel);
} }
/* Close & Free */
FIO_freeCResources(ress);
@ -466,6 +513,7 @@ typedef struct {
void* dictBuffer;
size_t dictBufferSize;
ZBUFF_DCtx* dctx;
FILE* dstFile;
} dRess_t;
static dRess_t FIO_createDResources(const char* dictFileName)
@ -534,15 +582,17 @@ unsigned long long FIO_decompressFrame(dRess_t ress,
}
static int FIO_decompressFile_extRess(dRess_t ress,
const char* dstFileName, const char* srcFileName)
/** FIO_decompressSrcFile() :
Decompression `srcFileName` into `ress.dstFile`
@return : 0 : OK
1 : operation not started
*/
static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
{
unsigned long long filesize = 0;
FILE* srcFile;
FILE* dstFile;
/* Init */
if (FIO_getFiles(&dstFile, &srcFile, dstFileName, srcFileName)) return 1;
FILE* dstFile = ress.dstFile;
FILE* srcFile = FIO_openSrcFile(srcFileName);
if (srcFile==0) return 1;
/* for each frame */
for ( ; ; ) {
@ -551,14 +601,17 @@ static int FIO_decompressFile_extRess(dRess_t ress,
size_t toRead = 4;
sizeCheck = fread(ress.srcBuffer, (size_t)1, toRead, srcFile);
if (sizeCheck==0) break; /* no more input */
if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header");
if (sizeCheck != toRead) EXM_THROW(31, "zstd: %s read error : cannot read header", srcFileName);
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
if (ZSTD_isLegacy(MEM_readLE32(ress.srcBuffer))) {
filesize += FIO_decompressLegacyFrame(dstFile, srcFile, MEM_readLE32(ress.srcBuffer));
continue;
}
#endif /* ZSTD_LEGACY_SUPPORT */
if (MEM_readLE32(ress.srcBuffer) != ZSTD_MAGICNUMBER) {
DISPLAYLEVEL(1, "zstd: %s: not in zstd format \n", srcFileName);
return 1;
}
filesize += FIO_decompressFrame(ress, dstFile, srcFile, toRead);
}
@ -568,8 +621,24 @@ static int FIO_decompressFile_extRess(dRess_t ress,
/* Close */
fclose(srcFile);
if (fclose(dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName);
return 0;
}
/** FIO_decompressFile_extRess() :
decompress `srcFileName` into `dstFileName`
@return : 0 : OK
1 : operation aborted (src not available, dst already taken, etc.)
*/
static int FIO_decompressFile_extRess(dRess_t ress,
const char* dstFileName, const char* srcFileName)
{
ress.dstFile = FIO_openDstFile(dstFileName);
if (ress.dstFile==0) return 1;
FIO_decompressSrcFile(ress, srcFileName);
if (fclose(ress.dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName);
return 0;
}
@ -597,29 +666,42 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
int missingFiles = 0;
char* dstFileName = (char*)malloc(FNSPACE);
size_t dfnSize = FNSPACE;
const size_t suffixSize = strlen(suffix);
const size_t suffixSize = suffix ? strlen(suffix) : 0;
dRess_t ress;
if (dstFileName==NULL) EXM_THROW(70, "not enough memory for dstFileName");
ress = FIO_createDResources(dictFileName);
for (u=0; u<nbFiles; u++) {
const char* srcFileName = srcNamesTable[u];
size_t sfnSize = strlen(srcFileName);
const char* suffixPtr = srcFileName + sfnSize - suffixSize;
if (dfnSize <= sfnSize-suffixSize+1) { free(dstFileName); dfnSize = sfnSize + 20; dstFileName = (char*)malloc(dfnSize); if (dstFileName==NULL) EXM_THROW(71, "not enough memory for dstFileName"); }
if (sfnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) {
DISPLAYLEVEL(1, "File extension doesn't match expected extension (%4s); will not process file: %s\n", suffix, srcFileName);
skippedFiles++;
continue;
}
memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
dstFileName[sfnSize-suffixSize] = '\0';
if (!strcmp(suffix, stdoutmark) || !strcmp(suffix, nulmark)) {
ress.dstFile = FIO_openDstFile(suffix);
if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", suffix);
for (u=0; u<nbFiles; u++)
missingFiles += FIO_decompressSrcFile(ress, srcNamesTable[u]);
if (fclose(ress.dstFile)) EXM_THROW(39, "Write error : cannot properly close %s", stdoutmark);
} else {
for (u=0; u<nbFiles; u++) { /* create dstFileName */
const char* srcFileName = srcNamesTable[u];
size_t sfnSize = strlen(srcFileName);
const char* suffixPtr = srcFileName + sfnSize - suffixSize;
if (dfnSize+suffixSize <= sfnSize+1) {
free(dstFileName);
dfnSize = sfnSize + 20;
dstFileName = (char*)malloc(dfnSize);
if (dstFileName==NULL) EXM_THROW(71, "not enough memory for dstFileName");
}
if (sfnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%4s expected) -- ignored \n", srcFileName, suffix);
skippedFiles++;
continue;
}
memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
dstFileName[sfnSize-suffixSize] = '\0';
missingFiles += FIO_decompressFile_extRess(ress, dstFileName, srcFileName);
}
missingFiles += FIO_decompressFile_extRess(ress, dstFileName, srcFileName);
} }
FIO_freeDResources(ress);
free(dstFileName);
return missingFiles + skippedFiles;
}

View File

@ -1,6 +1,6 @@
/*
fileio.h - file i/o handler
Copyright (C) Yann Collet 2013-2015
Copyright (C) Yann Collet 2013-2016
GPL v2 License
@ -19,8 +19,7 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- ZSTD source repository : https://github.com/Cyan4973/zstd
- Public forum : https://groups.google.com/forum/#!forum/lz4c
- ZSTD homepage : http://www.zstd.net/
*/
#pragma once
@ -33,8 +32,8 @@ extern "C" {
* Special i/o constants
**************************************/
#define nullString "null"
#define stdinmark "-"
#define stdoutmark "-"
#define stdinmark "stdin"
#define stdoutmark "stdout"
#ifdef _WIN32
# define nulmark "nul"
#else
@ -52,32 +51,29 @@ void FIO_setNotificationLevel(unsigned level);
/* *************************************
* Single File functions
***************************************/
/** FIO_compressFilename() :
@return : 0 == ok; 1 == pb with src file. */
int FIO_compressFilename (const char* outfilename, const char* infilename, const char* dictFileName, int compressionLevel);
int FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName);
/**
FIO_compressFilename :
@result : 0 == ok; 1 == pb with src file.
FIO_decompressFilename :
@result : 0 == ok; 1 == pb with src file.
*/
/** FIO_decompressFilename() :
@return : 0 == ok; 1 == pb with src file. */
int FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName);
/* *************************************
* Multiple File functions
***************************************/
/** FIO_compressMultipleFilenames() :
@return : nb of missing files */
int FIO_compressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles,
const char* suffix,
const char* dictFileName, int compressionLevel);
/** FIO_decompressMultipleFilenames() :
@return : nb of missing or skipped files */
int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles,
const char* suffix,
const char* dictFileName);
/**
FIO_compressMultipleFilenames :
@result : nb of missing files
FIO_decompressMultipleFilenames :
@result : nb of missing or skipped files
*/
#if defined (__cplusplus)

View File

@ -91,6 +91,7 @@ static U32 g_testTime = 0;
/*********************************************************
* Fuzzer functions
*********************************************************/
#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX(a,b) ((a)>(b)?(a):(b))
static U32 FUZ_GetMilliStart(void)
@ -452,7 +453,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
crcOrig = XXH64(sampleBuffer, sampleSize, 0);
/* compression test */
cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2)); /* use high compression levels with small samples, for speed */
//cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2)); /* high levels only for small samples, for manageable speed */
cLevelMod = MIN( ZSTD_maxCLevel(), (U32)MAX(1, 55 - 3*(int)sampleSizeLog) ); /* high levels only for small samples, for manageable speed */
cLevel = (FUZ_rand(&lseed) % cLevelMod) +1;
cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed");

View File

@ -1,6 +1,6 @@
/*
paramgrill.c - parameter tester for zstd_hc
Copyright (C) Yann Collet 2015
paramgrill.c - parameter tester for zstd
Copyright (C) Yann Collet 2015-2016
GPL v2 License
@ -19,11 +19,10 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
- zstd homepage : http://www.zstd.net/
*/
/**************************************
/*-************************************
* Compiler Options
**************************************/
/* Disable some Visual warning messages */
@ -48,8 +47,8 @@
#endif
/**************************************
* Includes
/*-************************************
* Dependencies
**************************************/
#include <stdlib.h> /* malloc */
#include <stdio.h> /* fprintf, fopen, ftello64 */
@ -71,7 +70,7 @@
#include "xxhash.h"
/**************************************
/*-************************************
* Compiler Options
**************************************/
/* S_ISREG & gettimeofday() are not supported by MSVC */
@ -80,7 +79,7 @@
#endif
/**************************************
/*-************************************
* Constants
**************************************/
#define PROGRAM_DESCRIPTION "ZSTD_HC parameters tester"
@ -98,6 +97,8 @@
#define NBLOOPS 2
#define TIMELOOP 2000
#define NB_LEVELS_TRACKED 30
static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
#define DEFAULT_CHUNKSIZE (4<<20)
@ -110,13 +111,13 @@ static const int g_maxVariationTime = 60000; /* 60 sec */
static const int g_maxNbVariations = 64;
/**************************************
/*-************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
/**************************************
/*-************************************
* Benchmark Parameters
**************************************/
static U32 g_nbIterations = NBLOOPS;
@ -126,7 +127,7 @@ static U32 g_rand = 1;
static U32 g_singleRun = 0;
static U32 g_target = 0;
static U32 g_noSeed = 0;
static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, ZSTD_greedy };
static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, 0, ZSTD_greedy };
void BMK_SetNbIterations(int nbLoops)
{
@ -135,7 +136,7 @@ void BMK_SetNbIterations(int nbLoops)
}
/*********************************************************
/*-*******************************************************
* Private functions
*********************************************************/
@ -187,8 +188,7 @@ static size_t BMK_findMaxMem(U64 requiredMem)
if (requiredMem > maxMemory) requiredMem = maxMemory;
requiredMem += 2*step;
while (!testmem)
{
while (!testmem) {
requiredMem -= step;
testmem = (BYTE*) malloc ((size_t)requiredMem);
}
@ -226,7 +226,7 @@ U32 FUZ_rand(U32* src)
}
/*********************************************************
/*-*******************************************************
* Bench functions
*********************************************************/
typedef struct {
@ -265,14 +265,14 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
U32 Hlog = params.hashLog;
U32 Slog = params.searchLog;
U32 Slength = params.searchLength;
U32 Tlength = params.targetLength;
ZSTD_strategy strat = params.strategy;
char name[30] = { 0 };
U64 crcOrig;
/* Memory allocation & restrictions */
snprintf(name, 30, "Sw%02uc%02uh%02us%02ul%1ut%1u", Wlog, Clog, Hlog, Slog, Slength, strat);
if (!compressedBuffer || !resultBuffer || !blockTable)
{
snprintf(name, 30, "Sw%02uc%02uh%02us%02ul%1ut%03uS%1u", Wlog, Clog, Hlog, Slog, Slength, Tlength, strat);
if (!compressedBuffer || !resultBuffer || !blockTable) {
DISPLAY("\nError: not enough memory!\n");
free(compressedBuffer);
free(resultBuffer);
@ -290,8 +290,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
const char* srcPtr = (const char*)srcBuffer;
char* cPtr = (char*)compressedBuffer;
char* resPtr = (char*)resultBuffer;
for (i=0; i<nbBlocks; i++)
{
for (i=0; i<nbBlocks; i++) {
size_t thisBlockSize = MIN(remaining, blockSize);
blockTable[i].srcPtr = srcPtr;
blockTable[i].cPtr = cPtr;
@ -302,8 +301,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
cPtr += blockTable[i].cRoom;
resPtr += thisBlockSize;
remaining -= thisBlockSize;
}
}
} }
/* warmimg up memory */
RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.10, 1);
@ -318,8 +316,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
const int startTime =BMK_GetMilliStart();
DISPLAY("\r%79s\r", "");
for (loopNb = 1; loopNb <= g_nbIterations; loopNb++)
{
for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) {
int nbLoops;
int milliTime;
U32 blockNb;
@ -336,8 +333,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
while (BMK_GetMilliSpan(milliTime) < TIMELOOP) {
for (blockNb=0; blockNb<nbBlocks; blockNb++)
blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx,
blockTable[blockNb].cPtr, blockTable[blockNb].cRoom,
@ -367,8 +363,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
milliTime = BMK_GetMilliStart();
while (BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++)
{
for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++) {
for (blockNb=0; blockNb<nbBlocks; blockNb++)
blockTable[blockNb].resSize = ZSTD_decompress(blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
@ -384,24 +379,19 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
/* CRC Checking */
crcCheck = XXH64(resultBuffer, srcSize, 0);
if (crcOrig!=crcCheck)
{
if (crcOrig!=crcCheck) {
unsigned u;
unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize));
DISPLAY("\n!!! WARNING !!! Invalid Checksum : %x != %x\n", (unsigned)crcOrig, (unsigned)crcCheck);
for (u=0; u<srcSize; u++)
{
if (((const BYTE*)srcBuffer)[u] != ((BYTE*)resultBuffer)[u])
{
for (u=0; u<srcSize; u++) {
if (((const BYTE*)srcBuffer)[u] != ((BYTE*)resultBuffer)[u]) {
printf("Decoding error at pos %u (block %u, pos %u) \n", u, u / eBlockSize, u % eBlockSize);
break;
}
}
} }
break;
}
#endif
}
}
} }
/* End cleaning */
DISPLAY("\r");
@ -415,21 +405,23 @@ const char* g_stratName[] = { "ZSTD_fast ",
"ZSTD_greedy ",
"ZSTD_lazy ",
"ZSTD_lazy2 ",
"ZSTD_btlazy2" };
"ZSTD_btlazy2",
"ZSTD_opt ",
"ZSTD_btopt " };
static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_parameters params, size_t srcSize)
{
DISPLAY("\r%79s\r", "");
fprintf(f," {%3u,%3u,%3u,%3u,%3u,%3u, %s }, ",
fprintf(f," {%3u,%3u,%3u,%3u,%3u,%3u,%3u, %s }, ",
0, params.windowLog, params.contentLog, params.hashLog, params.searchLog, params.searchLength,
g_stratName[(U32)(params.strategy)]);
params.targetLength, g_stratName[(U32)(params.strategy)]);
fprintf(f,
"/* level %2u */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.);
}
static U32 g_cSpeedTarget[ZSTD_MAX_CLEVEL+1] = { 0 };
static U32 g_cSpeedTarget[NB_LEVELS_TRACKED] = { 0 }; /* NB_LEVELS_TRACKED : checked at main() */
typedef struct {
BMK_result_t result;
@ -438,14 +430,12 @@ typedef struct {
static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSize)
{
int cLevel;
unsigned cLevel;
fprintf(f, "\n /* Proposed configurations : */ \n");
fprintf(f, "#define ZSTD_MAX_CLEVEL %2u \n", ZSTD_MAX_CLEVEL);
fprintf(f, "static const ZSTD_parameters ZSTD_defaultParameters[ZSTD_MAX_CLEVEL+1] = {\n");
fprintf(f, " /* l, W, C, H, S, L, strat */ \n");
fprintf(f, " /* l, W, C, H, S, L, T, strat */ \n");
for (cLevel=0; cLevel <= ZSTD_MAX_CLEVEL; cLevel++)
for (cLevel=0; cLevel <= ZSTD_maxCLevel(); cLevel++)
BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize);
}
@ -465,16 +455,14 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
{
BMK_result_t testResult;
int better = 0;
int cLevel;
unsigned cLevel;
BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params);
for (cLevel = 1; cLevel <= ZSTD_MAX_CLEVEL; cLevel++)
{
for (cLevel = 1; cLevel <= ZSTD_maxCLevel(); cLevel++) {
if (testResult.cSpeed < g_cSpeedTarget[cLevel])
continue; /* not fast enough for this level */
if (winners[cLevel].result.cSize==0)
{
if (winners[cLevel].result.cSize==0) {
/* first solution for this cLevel */
winners[cLevel].result = testResult;
winners[cLevel].params = params;
@ -483,8 +471,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
continue;
}
if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) )
{
if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) {
/* Validate solution is "good enough" */
double W_ratio = (double)srcSize / testResult.cSize;
double O_ratio = (double)srcSize / winners[cLevel].result.cSize;
@ -509,8 +496,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
if (W_DMemUsed_note < O_DMemUsed_note)
{
if (W_DMemUsed_note < O_DMemUsed_note) {
/* uses too much Decompression memory for too little benefit */
if (W_ratio > O_ratio)
DISPLAY ("Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n",
@ -518,8 +504,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel);
continue;
}
if (W_CMemUsed_note < O_CMemUsed_note)
{
if (W_CMemUsed_note < O_CMemUsed_note) {
/* uses too much memory for compression for too little benefit */
if (W_ratio > O_ratio)
DISPLAY ("Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n",
@ -527,8 +512,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
O_ratio, (double)(O_CMemUsed) / 1024 / 1024, cLevel);
continue;
}
if (W_CSpeed_note < O_CSpeed_note )
{
if (W_CSpeed_note < O_CSpeed_note ) {
/* too large compression speed difference for the compression benefit */
if (W_ratio > O_ratio)
DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
@ -536,8 +520,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
O_ratio, (double)(winners[cLevel].result.cSpeed) / 1000., cLevel);
continue;
}
if (W_DSpeed_note < O_DSpeed_note )
{
if (W_DSpeed_note < O_DSpeed_note ) {
/* too large decompression speed difference for the compression benefit */
if (W_ratio > O_ratio)
DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
@ -554,9 +537,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
BMK_printWinner(stdout, cLevel, testResult, params, srcSize);
better = 1;
}
}
} }
return better;
}
@ -567,10 +548,9 @@ static ZSTD_parameters* sanitizeParams(ZSTD_parameters params)
{
g_params = params;
if (params.strategy == ZSTD_fast)
{
g_params.contentLog = 0;
g_params.searchLog = 0;
}
g_params.contentLog = 0, g_params.searchLog = 0;
if ((params.strategy != ZSTD_opt) && (params.strategy != ZSTD_btopt ))
g_params.targetLength = 0;
return &g_params;
}
@ -578,9 +558,8 @@ static ZSTD_parameters* sanitizeParams(ZSTD_parameters params)
static void paramVariation(ZSTD_parameters* p)
{
U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1;
for (; nbChanges; nbChanges--)
{
const U32 changeID = FUZ_rand(&g_rand) % 12;
for (; nbChanges; nbChanges--) {
const U32 changeID = FUZ_rand(&g_rand) % 14;
switch(changeID)
{
case 0:
@ -607,6 +586,10 @@ static void paramVariation(ZSTD_parameters* p)
p->strategy = (ZSTD_strategy)(((U32)p->strategy)+1); break;
case 11:
p->strategy = (ZSTD_strategy)(((U32)p->strategy)-1); break;
case 12:
p->targetLength *= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
case 13:
p->targetLength /= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
}
}
ZSTD_validateParams(p);
@ -632,8 +615,7 @@ static void playAround(FILE* f, winnerInfo_t* winners,
int nbVariations = 0;
const int startTime = BMK_GetMilliStart();
while (BMK_GetMilliSpan(startTime) < g_maxVariationTime)
{
while (BMK_GetMilliSpan(startTime) < g_maxVariationTime) {
ZSTD_parameters p = params;
if (nbVariations++ > g_maxNbVariations) break;
@ -658,15 +640,15 @@ static void playAround(FILE* f, winnerInfo_t* winners,
static void potentialRandomParams(ZSTD_parameters* p, U32 inverseChance)
{
U32 chance = (FUZ_rand(&g_rand) % (inverseChance+1));
if (!chance)
{
if (!chance) {
/* totally random entry */
p->contentLog = FUZ_rand(&g_rand) % (ZSTD_CONTENTLOG_MAX+1 - ZSTD_CONTENTLOG_MIN) + ZSTD_CONTENTLOG_MIN;
p->hashLog = FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN) + ZSTD_HASHLOG_MIN;
p->searchLog = FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN) + ZSTD_SEARCHLOG_MIN;
p->windowLog = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN;
p->searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN;
p->strategy = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btlazy2+1));
p->targetLength=FUZ_rand(&g_rand) % (ZSTD_TARGETLENGTH_MAX+1 - ZSTD_TARGETLENGTH_MIN) + ZSTD_TARGETLENGTH_MIN;
p->strategy = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btopt +1));
ZSTD_validateParams(p);
}
}
@ -676,9 +658,8 @@ static void BMK_selectRandomStart(
const void* srcBuffer, size_t srcSize,
ZSTD_CCtx* ctx)
{
U32 id = (FUZ_rand(&g_rand) % (ZSTD_MAX_CLEVEL+1));
if ((id==0) || (winners[id].params.windowLog==0))
{
U32 id = (FUZ_rand(&g_rand) % (ZSTD_maxCLevel()+1));
if ((id==0) || (winners[id].params.windowLog==0)) {
/* totally random entry */
ZSTD_parameters p;
potentialRandomParams(&p, 1);
@ -695,14 +676,14 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
{
ZSTD_CCtx* ctx = ZSTD_createCCtx();
ZSTD_parameters params;
winnerInfo_t winners[ZSTD_MAX_CLEVEL+1];
winnerInfo_t winners[NB_LEVELS_TRACKED];
int i;
unsigned u;
const char* rfName = "grillResults.txt";
FILE* f;
const size_t blockSize = g_blockSize ? g_blockSize : srcSize;
if (g_singleRun)
{
if (g_singleRun) {
BMK_result_t testResult;
g_params.srcSize = blockSize;
ZSTD_validateParams(&g_params);
@ -718,8 +699,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
if (g_target)
g_cSpeedTarget[1] = g_target * 1000;
else
{
else {
/* baseline config for level 1 */
BMK_result_t testResult;
params = ZSTD_getParams(1, blockSize);
@ -728,14 +708,13 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
}
/* establish speed objectives (relative to level 1) */
for (i=2; i<=ZSTD_MAX_CLEVEL; i++)
g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) >> 5;
for (u=2; u<=ZSTD_maxCLevel(); u++)
g_cSpeedTarget[u] = (g_cSpeedTarget[u-1] * 25) >> 5;
/* populate initial solution */
{
const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL;
for (i=1; i<=maxSeeds; i++)
{
const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
for (i=1; i<=maxSeeds; i++) {
params = ZSTD_getParams(i, blockSize);
ZSTD_validateParams(&params);
BMK_seed(winners, params, srcBuffer, srcSize, ctx);
@ -746,8 +725,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
/* start tests */
{
const int milliStart = BMK_GetMilliStart();
do
{
do {
BMK_selectRandomStart(f, winners, srcBuffer, srcSize, ctx);
} while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
}
@ -764,17 +742,13 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
static int benchSample(void)
{
char* origBuff;
void* origBuff;
size_t benchedSize = sampleSize;
const char* name = "Sample 10MiB";
/* Allocation */
origBuff = (char*) malloc((size_t)benchedSize);
if(!origBuff)
{
DISPLAY("\nError: not enough memory!\n");
return 12;
}
origBuff = malloc(benchedSize);
if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; }
/* Fill buffer */
RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
@ -794,8 +768,7 @@ int benchFiles(char** fileNamesTable, int nbFiles)
int fileIdx=0;
/* Loop for each file */
while (fileIdx<nbFiles)
{
while (fileIdx<nbFiles) {
FILE* inFile;
char* inFileName;
U64 inFileSize;
@ -806,25 +779,21 @@ int benchFiles(char** fileNamesTable, int nbFiles)
/* Check file existence */
inFileName = fileNamesTable[fileIdx++];
inFile = fopen( inFileName, "rb" );
if (inFile==NULL)
{
if (inFile==NULL) {
DISPLAY( "Pb opening %s\n", inFileName);
return 11;
}
/* Memory allocation & restrictions */
inFileSize = BMK_GetFileSize(inFileName);
benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
if (benchedSize < inFileSize)
{
DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
}
/* Alloc */
origBuff = (char*) malloc((size_t)benchedSize);
if(!origBuff)
{
if(!origBuff) {
DISPLAY("\nError: not enough memory!\n");
fclose(inFile);
return 12;
@ -835,8 +804,7 @@ int benchFiles(char** fileNamesTable, int nbFiles)
readSize = fread(origBuff, 1, benchedSize, inFile);
fclose(inFile);
if(readSize != benchedSize)
{
if(readSize != benchedSize) {
DISPLAY("\nError: problem reading file '%s' !! \n", inFileName);
free(origBuff);
return 13;
@ -862,8 +830,7 @@ int optimizeForSize(char* inFileName)
/* Check file existence */
inFile = fopen( inFileName, "rb" );
if (inFile==NULL)
{
if (inFile==NULL) {
DISPLAY( "Pb opening %s\n", inFileName);
return 11;
}
@ -873,14 +840,11 @@ int optimizeForSize(char* inFileName)
benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
if (benchedSize < inFileSize)
{
DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
}
/* Alloc */
origBuff = (char*) malloc((size_t)benchedSize);
if(!origBuff)
{
if(!origBuff) {
DISPLAY("\nError: not enough memory!\n");
fclose(inFile);
return 12;
@ -891,8 +855,7 @@ int optimizeForSize(char* inFileName)
readSize = fread(origBuff, 1, benchedSize, inFile);
fclose(inFile);
if(readSize != benchedSize)
{
if(readSize != benchedSize) {
DISPLAY("\nError: problem reading file '%s' !! \n", inFileName);
free(origBuff);
return 13;
@ -916,9 +879,8 @@ int optimizeForSize(char* inFileName)
/* find best solution from default params */
{
const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL;
for (i=1; i<=maxSeeds; i++)
{
const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
for (i=1; i<=maxSeeds; i++) {
params = ZSTD_getParams(i, blockSize);
BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
if ( (candidate.cSize < winner.result.cSize)
@ -927,16 +889,14 @@ int optimizeForSize(char* inFileName)
winner.params = params;
winner.result = candidate;
BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize);
}
}
} }
}
BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
/* start tests */
{
const int milliStart = BMK_GetMilliStart();
do
{
do {
params = winner.params;
paramVariation(&params);
potentialRandomParams(&params, 16);
@ -950,13 +910,11 @@ int optimizeForSize(char* inFileName)
/* improvement found => new winner */
if ( (candidate.cSize < winner.result.cSize)
||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) )
{
||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) ) {
winner.params = params;
winner.result = candidate;
BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
}
} while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
}
@ -972,7 +930,7 @@ int optimizeForSize(char* inFileName)
}
int usage(char* exename)
static int usage(char* exename)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [arg] file\n", exename);
@ -982,16 +940,17 @@ int usage(char* exename)
return 0;
}
int usage_advanced(void)
static int usage_advanced(void)
{
DISPLAY( "\nAdvanced options :\n");
DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS);
DISPLAY( " -B# : cut input into blocks of size # (default : single block)\n");
DISPLAY( " -P# : generated sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
DISPLAY( " -S : Single run\n");
return 0;
}
int badusage(char* exename)
static int badusage(char* exename)
{
DISPLAY("Wrong parameters\n");
usage(exename);
@ -1008,6 +967,12 @@ int main(int argc, char** argv)
U32 optimizer = 0;
U32 main_pause = 0;
/* checks */
if (NB_LEVELS_TRACKED <= ZSTD_maxCLevel()) {
DISPLAY("Error : NB_LEVELS_TRACKED <= ZSTD_maxCLevel() \n");
exit(1);
}
/* Welcome message */
DISPLAY(WELCOME_MESSAGE);
@ -1022,12 +987,10 @@ int main(int argc, char** argv)
if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; }
/* Decode command (note : aggregated commands are allowed) */
if (argument[0]=='-')
{
if (argument[0]=='-') {
argument++;
while (argument[0]!=0)
{
while (argument[0]!=0) {
switch(argument[0])
{
@ -1050,8 +1013,7 @@ int main(int argc, char** argv)
argument++;
{
U32 proba32 = 0;
while ((argument[0]>= '0') && (argument[0]<= '9'))
{
while ((argument[0]>= '0') && (argument[0]<= '9')) {
proba32 *= 10;
proba32 += argument[0] - '0';
argument++;
@ -1070,8 +1032,7 @@ int main(int argc, char** argv)
g_singleRun = 1;
argument++;
g_params = ZSTD_getParams(2, g_blockSize);
for ( ; ; )
{
for ( ; ; ) {
switch(*argument)
{
case 'w':
@ -1104,14 +1065,16 @@ int main(int argc, char** argv)
while ((*argument>= '0') && (*argument<='9'))
g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0';
continue;
case 't': /* strategy */
g_params.strategy = (ZSTD_strategy)0;
case 't': /* target length */
g_params.targetLength = 0;
argument++;
while ((*argument>= '0') && (*argument<='9'))
{
g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy *10);
g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy + *argument++ - '0');
}
g_params.targetLength *= 10, g_params.targetLength += *argument++ - '0';
continue;
case 'S': /* strategy */
argument++;
while ((*argument>= '0') && (*argument<='9'))
g_params.strategy = (ZSTD_strategy)(*argument++ - '0');
continue;
case 'L':
{
@ -1132,8 +1095,7 @@ int main(int argc, char** argv)
case 'T':
argument++;
g_target = 0;
while ((*argument >= '0') && (*argument <= '9'))
{
while ((*argument >= '0') && (*argument <= '9')) {
g_target *= 10;
g_target += *argument - '0';
argument++;
@ -1167,8 +1129,7 @@ int main(int argc, char** argv)
if (filenamesStart==0)
result = benchSample();
else
{
else {
if (optimizer)
result = optimizeForSize(input_filename);
else
@ -1179,4 +1140,3 @@ int main(int argc, char** argv)
return result;
}

View File

@ -16,28 +16,45 @@ roundTripTest() {
rm -f tmp1 tmp2
echo "roundTripTest: ./datagen $1 $p | $ZSTD -v$c | $ZSTD -d"
./datagen $1 $p | md5sum > tmp1
./datagen $1 $p | $ZSTD -v$c | $ZSTD -d | md5sum > tmp2
./datagen $1 $p | $ZSTD -vq$c | $ZSTD -d | md5sum > tmp2
diff -q tmp1 tmp2
}
[ -n "$ZSTD" ] || die "ZSTD variable must be defined!"
printf "\n**** frame concatenation **** "
echo "\n**** simple tests **** "
./datagen > tmp
$ZSTD tmp
$ZSTD -99 tmp && die "too large compression level undetected"
$ZSTD tmp -c > tmpCompressed
$ZSTD tmp --stdout > tmpCompressed
$ZSTD -d tmpCompressed && die "wrong suffix error not detected!"
$ZSTD -d tmpCompressed -c > tmpResult
$ZSTD --decompress tmpCompressed -c > tmpResult
$ZSTD --decompress tmpCompressed --stdout > tmpResult
$ZSTD -q tmp && die "overwrite check failed!"
$ZSTD -q -f tmp
$ZSTD -q --force tmp
echo "\n**** frame concatenation **** "
echo "hello " > hello.tmp
echo "world!" > world.tmp
cat hello.tmp world.tmp > helloworld.tmp
$ZSTD hello.tmp > hello.zstd
$ZSTD world.tmp > world.zstd
$ZSTD -c hello.tmp > hello.zstd
$ZSTD -c world.tmp > world.zstd
cat hello.zstd world.zstd > helloworld.zstd
$ZSTD -df helloworld.zstd > result.tmp
$ZSTD -dc helloworld.zstd > result.tmp
cat result.tmp
sdiff helloworld.tmp result.tmp
rm ./*.tmp ./*.zstd
echo frame concatenation test completed
echo "**** flush write error test **** "
echo "\n**** flush write error test **** "
echo "echo foo | $ZSTD > /dev/full"
echo foo | $ZSTD > /dev/full && die "write error not detected!"
@ -45,30 +62,52 @@ echo "echo foo | $ZSTD | $ZSTD -d > /dev/full"
echo foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!"
echo "*** dictionary tests *** "
echo "\n**** dictionary tests **** "
./datagen > tmpDict
./datagen -g1M | md5sum > tmp1
./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dv | md5sum > tmp2
./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2
diff -q tmp1 tmp2
echo "*** multiple files tests *** "
echo "\n**** multiple files tests **** "
./datagen -s1 > tmp1 2> /dev/null
./datagen -s2 -g100K > tmp2 2> /dev/null
./datagen -s3 -g1M > tmp3 2> /dev/null
$ZSTD -f -m tmp*
$ZSTD -f tmp*
echo "compress tmp* : "
ls -ls tmp*
rm tmp1 tmp2 tmp3
$ZSTD -df -m *.zst
echo "decompress tmp* : "
$ZSTD -df *.zst
ls -ls tmp*
$ZSTD -f -m tmp1 notHere tmp2 && die "missing file not detected!"
rm tmp*
echo "compress tmp* into stdout > tmpall : "
$ZSTD -c tmp1 tmp2 tmp3 > tmpall
ls -ls tmp*
echo "decompress tmpall* into stdout > tmpdec : "
cp tmpall tmpall2
$ZSTD -dc tmpall* > tmpdec
ls -ls tmp*
echo "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
echo "**** zstd round-trip tests **** "
echo "\n**** integrity tests **** "
echo "test one file (tmp1.zst) "
$ZSTD -t tmp1.zst
$ZSTD --test tmp1.zst
echo "test multiple files (*.zst) "
$ZSTD -t *.zst
echo "test good and bad files (*) "
$ZSTD -t * && die "bad files not detected !"
echo "\n**** zstd round-trip tests **** "
roundTripTest
roundTripTest '' 6
roundTripTest -g512K 6 # greedy, hash chain
roundTripTest -g512K 16 # btlazy2
roundTripTest -g512K 19 # btopt
rm tmp*
if [ "$1" != "--test-large-data" ]; then
echo "Skipping large data tests"
@ -102,3 +141,6 @@ roundTripTest -g50000000 -P94 19
roundTripTest -g99000000 -P99 20
roundTripTest -g6000000000 -P99 q
rm tmp*

View File

@ -175,7 +175,7 @@ static U64 XXH_read64(const void* memPtr)
return val;
}
#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
/* ****************************************

View File

@ -41,7 +41,7 @@
#include <sys/timeb.h> /* timeb */
#include <string.h> /* strcmp */
#include "mem.h"
#include "zstd_buffered.h"
#include "zbuff.h"
#include "zstd.h" /* ZSTD_compressBound() */
#include "datagen.h" /* RDG_genBuffer */
#include "xxhash.h" /* XXH64 */

View File

@ -14,7 +14,7 @@
.SH SYNOPSIS
.TP 5
\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] <OUTPUT-FILE>
\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] [-o <OUTPUT-FILE>]
.PP
.B unzstd
is equivalent to
@ -28,15 +28,13 @@ is equivalent to
.SH DESCRIPTION
.PP
\fBzstd\fR is a fast lossless compression algorithm.
It is based on the \fBLZ77\fR family, with FSE & huff0 entropy stage.
zstd offers compression speed > 200 MB/s per core.
It also features a fast decoder, with speed > 500 MB/s per core.
It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages.
\fBzstd\fR offers configurable compression speed, with fast modes at > 200 MB/s per core.
It also features a very fast decoder, with speed > 500 MB/s per core.
\fBzstd\fR command line is generally similar to gzip, but features the following differences :
- Original files are preserved
- By default, \fBzstd file1 file2\fR means : compress file1 \fBinto\fR file2.
Use \fB-m\fR command if you want : compress file1 into file1.zstd and file2 into file2.zst
- By default, when compressing files, \fBzstd\fR displays advancement notification and result summary.
- By default, when compressing a single file, \fBzstd\fR displays progress notifications and result summary.
Use \fB-q\fR to turn them off
@ -45,22 +43,20 @@ It also features a fast decoder, with speed > 500 MB/s per core.
.SH OPTIONS
.TP
.B \-#
# compression level [1-19](default:1)
# compression level [1-21] (default:1)
.TP
.B \-d
.BR \-d ", " --decompress
decompression
.TP
.B \-f
.B \-D file
use `file` as Dictionary to compress or decompress FILE(s)
.TP
.B \-o file
save result into `file` (only possible with a single input FILE)
.TP
.BR \-f ", " --force
overwrite output without prompting
.TP
.BR \-m ", " --multiple
multiple files mode
In this mode, multiple files on the command line means compression or decompression of each named file
Notifications are also turned off by default
.TP
.B \-D
Use next file as dictionary content for compress / decompression
.TP
.BR \-h/\-H ", " --help
display help/long help and exit
.TP
@ -73,17 +69,47 @@ It also features a fast decoder, with speed > 500 MB/s per core.
.BR \-q ", " --quiet
suppress warnings and notifications; specify twice to suppress errors too
.TP
.B \-c
.BR \-c ", " --stdout
force write to standard output, even if it is the console
.SH DICTIONARY
.PP
\fBzstd\fR offers \fIdictionary\fR compression, useful for very small files and messages.
It's possible to train \fBzstd\fR with some samples, the result of which is saved into a file called `dictionary`.
Then during compression and decompression, make reference to the same dictionary.
It will improve compression ratio of small files.
Typical gains range from ~10% (at 64KB) to x5 better (at <1KB).
.TP
.B \-z
force compression
.B \--train FILEs
use FILEs as training set to create a dictionary.
The training set should contain a lot of small files (> 100).
and weight typically 100x the target dictionary size
(for example, 10 MB for a 100 KB dictionary)
.TP
.B \-o file
dictionary saved into `file` (default: dictionary)
.TP
.B \--maxdict #
limit dictionary to specified size (default : 112640)
.TP
.B \-s#
dictionary selectivity level (default: 9)
the smaller the value, the denser the dictionary, improving its efficiency but reducing its possible maximum size.
.SH BENCHMARK
.TP
.B \-b#
benchmark file(s) using compression level #
.TP
.B \-i#
iteration loops [1-9](default : 3), benchmark mode only
.TP
.B \-B#
cut file into independent blocks of size # (default: no block)
.TP
.B \-r#
test all compression levels from 1 to # (default: disabled)
.SH BUGS
Report bugs at:- https://github.com/Cyan4973/zstd/issues

View File

@ -1,6 +1,6 @@
/*
zstdcli - Command Line Interface (cli) for zstd
Copyright (C) Yann Collet 2014-2015
Copyright (C) Yann Collet 2014-2016
GPL v2 License
@ -19,25 +19,23 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- zstd source repository : https://github.com/Cyan4973/zstd
- ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
- zstd homepage : http://www.zstd.net/
*/
/*
Note : this is user program.
It is not part of zstd compression library.
The license of this compression CLI program is GPLv2.
The license of zstd library is BSD.
Note : this is user program, not part of libzstd.
The license of this command line program is GPLv2.
The license of libzstd is BSD.
*/
/**************************************
/*-************************************
* Compiler Options
**************************************/
#define _CRT_SECURE_NO_WARNINGS /* Visual : removes warning from strcpy */
#define _POSIX_SOURCE 1 /* triggers fileno() within <stdio.h> on unix */
/**************************************
/*-************************************
* Includes
**************************************/
#include <stdio.h> /* fprintf, getchar */
@ -47,18 +45,18 @@
#ifndef ZSTD_NOBENCH
# include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */
#endif
#include "zstd.h" /* ZSTD version numbers */
#include "zstd_static.h" /* ZSTD_maxCLevel, ZSTD version numbers */
#ifndef ZSTD_NODICT
# include "dibio.h" /* BMK_benchFiles, BMK_SetNbIterations */
#endif
/**************************************
/*-************************************
* OS-specific Includes
**************************************/
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
# include <fcntl.h> /* _O_BINARY */
# include <io.h> /* _setmode, _isatty */
# ifdef __MINGW32__
/* int _fileno(FILE *stream); // seems no longer useful // MINGW somehow forgets to include this windows declaration into <stdio.h> */
# endif
# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
#else
@ -68,7 +66,7 @@
#endif
/**************************************
/*-************************************
* Constants
**************************************/
#define COMPRESSOR_NAME "zstd command line interface"
@ -78,7 +76,8 @@
# define ZSTD_VERSION "v" EXPAND_AND_QUOTE(ZSTD_VERSION_MAJOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_MINOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_RELEASE)
#endif
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR
#define ZSTD_EXTENSION ".zst"
#define ZSTD_CAT "zstdcat"
#define ZSTD_UNZSTD "unzstd"
@ -87,46 +86,36 @@
#define MB *(1 <<20)
#define GB *(1U<<30)
static const char* g_defaultDictName = "dictionary";
static const unsigned g_defaultMaxDictSize = 110 KB;
static const unsigned g_defaultDictCLevel = 5;
static const unsigned g_defaultSelectivityLevel = 9;
/**************************************
/*-************************************
* Display Macros
**************************************/
#define DISPLAY(...) fprintf(displayOut, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static FILE* displayOut;
static unsigned displayLevel = 2; // 0 : no display // 1: errors // 2 : + result + interaction + warnings ; // 3 : + progression; // 4 : + information
static unsigned displayLevel = 2; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
/**************************************
* Exceptions
**************************************/
#define DEBUG 0
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, "\n"); \
exit(error); \
}
/**************************************
/*-************************************
* Command Line
**************************************/
static int usage(const char* programName)
{
DISPLAY( "Usage :\n");
DISPLAY( " %s [arg] [input] [output]\n", programName);
DISPLAY( " %s [args] [FILE(s)] [-o file]\n", programName);
DISPLAY( "\n");
DISPLAY( "input : a filename\n");
DISPLAY( "FILE : a filename\n");
DISPLAY( " with no FILE, or when FILE is - , read standard input\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -# : # compression level (1-19, default:1) \n");
DISPLAY( " -# : # compression level (1-%u, default:1) \n", ZSTD_maxCLevel());
DISPLAY( " -d : decompression \n");
DISPLAY( " -D file: use `file` as Dictionary \n");
//DISPLAY( " -z : force compression\n");
DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
DISPLAY( " -f : overwrite output without prompting \n");
DISPLAY( " -h/-H : display help/long help and exit\n");
return 0;
@ -139,16 +128,23 @@ static int usage_advanced(const char* programName)
DISPLAY( "\n");
DISPLAY( "Advanced arguments :\n");
DISPLAY( " -V : display Version number and exit\n");
DISPLAY( " -t : test compressed file integrity \n");
DISPLAY( " -v : verbose mode\n");
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -m : multiple input filenames mode \n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
#ifndef ZSTD_NODICT
DISPLAY( "Dictionary builder :\n");
DISPLAY( "--train : create a dictionary from a training set of files \n");
DISPLAY( " -o file: `file` is dictionary name (default: %s) \n", g_defaultDictName);
DISPLAY( "--maxdict:limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
#endif
#ifndef ZSTD_NOBENCH
DISPLAY( "Benchmark arguments :\n");
DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n");
DISPLAY( " -B# : cut file into independent blocks of size # (default : no block)\n");
DISPLAY( " -i# : iteration loops [1-9](default : 3)\n");
DISPLAY( " -r# : test all compression levels from 1 to # (default : disabled)\n");
DISPLAY( " -B# : cut file into independent blocks of size # (default: no block)\n");
DISPLAY( " -r# : test all compression levels from 1 to # (default: disabled)\n");
#endif
return 0;
}
@ -178,8 +174,10 @@ int main(int argCount, const char** argv)
forceStdout=0,
main_pause=0,
nextEntryIsDictionary=0,
multiple=0,
operationResult=0;
operationResult=0,
dictBuild=0,
nextArgumentIsOutFileName=0,
nextArgumentIsMaxDict=0;
unsigned cLevel = 1;
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */
unsigned filenameIdx = 0;
@ -187,11 +185,13 @@ int main(int argCount, const char** argv)
const char* outFileName = NULL;
const char* dictFileName = NULL;
char* dynNameSpace = NULL;
const char extension[] = ZSTD_EXTENSION;
int rangeBench = 1;
unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictCLevel = g_defaultDictCLevel;
unsigned dictSelect = g_defaultSelectivityLevel;
/* init */
(void)rangeBench; /* not used when ZSTD_NOBENCH set */
(void)rangeBench; (void)dictCLevel; /* not used when ZSTD_NOBENCH / ZSTD_NODICT set */
if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); }
displayOut = stderr;
/* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
@ -203,43 +203,46 @@ int main(int argCount, const char** argv)
if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; }
/* command switches */
for(i=1; i<argCount; i++)
{
for(i=1; i<argCount; i++) {
const char* argument = argv[i];
if(!argument) continue; /* Protection if argument empty */
/* long commands (--long-word) */
if (!strcmp(argument, "--decompress")) { decode=1; continue; }
if (!strcmp(argument, "--force")) { FIO_overwriteMode(); continue; }
if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; }
if (!strcmp(argument, "--help")) { displayOut=stdout; return usage_advanced(programName); }
if (!strcmp(argument, "--multiple")) { multiple=1; continue; }
if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; }
if (!strcmp(argument, "--test")) { decode=1; outFileName=nulmark; FIO_overwriteMode(); continue; }
if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
if (!strcmp(argument, "--keep")) { continue; } /* does nothing, since preserving input is default; for gzip/xz compatibility */
/* '-' means stdin/stdout */
if (!strcmp(argument, "-")){
if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; continue; }
outFileName=stdoutmark; continue;
}
/* Decode commands (note : aggregated commands are allowed) */
if (argument[0]=='-')
{
/* '-' means stdin/stdout */
if (argument[1]==0)
{
if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; continue; }
outFileName=stdoutmark; continue;
}
if (argument[0]=='-') {
argument++;
while (argument[0]!=0)
{
while (argument[0]!=0) {
/* compression Level */
if ((*argument>='0') && (*argument<='9'))
{
if ((*argument>='0') && (*argument<='9')) {
cLevel = 0;
while ((*argument >= '0') && (*argument <= '9'))
{
while ((*argument >= '0') && (*argument <= '9')) {
cLevel *= 10;
cLevel += *argument - '0';
argument++;
}
dictCLevel = cLevel;
if (dictCLevel > ZSTD_maxCLevel())
return badusage(programName);
continue;
}
@ -250,24 +253,15 @@ int main(int argCount, const char** argv)
case 'H':
case 'h': displayOut=stdout; return usage_advanced(programName);
/* Compression (default) */
//case 'z': forceCompress = 1; break;
/* Decoding */
/* Decoding */
case 'd': decode=1; argument++; break;
/* Multiple input files */
case 'm': multiple=1; argument++; break;
/* Force stdout, even if stdout==console */
case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
/* Use file content as dictionary */
case 'D': nextEntryIsDictionary = 1; argument++; break;
/* Test -- not implemented */
/* case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break; */
/* Overwrite */
case 'f': FIO_overwriteMode(); argument++; break;
@ -280,6 +274,12 @@ int main(int argCount, const char** argv)
/* keep source file (default anyway, so useless; for gzip/xz compatibility) */
case 'k': argument++; break;
/* test compressed file */
case 't': decode=1; outFileName=nulmark; FIO_overwriteMode(); argument++; break;
/* dictionary name */
case 'o': nextArgumentIsOutFileName=1; argument++; break;
#ifndef ZSTD_NOBENCH
/* Benchmark */
case 'b': bench=1; argument++; break;
@ -316,6 +316,13 @@ int main(int argCount, const char** argv)
break;
#endif /* ZSTD_NOBENCH */
/* Selection level */
case 's': argument++;
dictSelect = 0;
while ((*argument >= '0') && (*argument <= '9'))
dictSelect *= 10, dictSelect += *argument++ - '0';
break;
/* Pause at the end (hidden option) */
case 'p': main_pause=1; argument++; break;
@ -326,14 +333,29 @@ int main(int argCount, const char** argv)
continue;
}
/* dictionary */
if (nextEntryIsDictionary)
{
if (nextEntryIsDictionary) {
nextEntryIsDictionary = 0;
dictFileName = argument;
continue;
}
if (nextArgumentIsOutFileName) {
nextArgumentIsOutFileName = 0;
outFileName = argument;
if (!strcmp(outFileName, "-")) outFileName = stdoutmark;
continue;
}
if (nextArgumentIsMaxDict) {
nextArgumentIsMaxDict = 0;
maxDictSize = 0;
while ((*argument>='0') && (*argument<='9'))
maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
if (*argument=='k' || *argument=='K')
maxDictSize <<= 10;
continue;
}
/* add filename to list */
filenameTable[filenameIdx++] = argument;
}
@ -342,81 +364,54 @@ int main(int argCount, const char** argv)
DISPLAYLEVEL(3, WELCOME_MESSAGE);
/* Check if benchmark is selected */
if (bench)
{
if (bench) {
#ifndef ZSTD_NOBENCH
BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel*rangeBench);
#endif
goto _end;
}
/* No input filename ==> use stdin */
if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark;
/* Check if input defined as console; trigger an error in this case */
if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
/* No output filename ==> try to select one automatically (when possible) */
if (filenameIdx>=2) outFileName = filenameTable[1];
while (!outFileName) /* while : just to allow break statement */
{
if (!IS_CONSOLE(stdout)) { outFileName=stdoutmark; break; } /* Default to stdout whenever possible (i.e. not a console) */
if (!decode) /* compression to file */
{
size_t l = strlen(filenameTable[0]);
dynNameSpace = (char*)calloc(1,l+5);
if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); }
strcpy(dynNameSpace, filenameTable[0]);
strcpy(dynNameSpace+l, ZSTD_EXTENSION);
outFileName = dynNameSpace;
DISPLAYLEVEL(2, "Compressed filename will be : %s \n", outFileName);
break;
}
/* decompression to file (automatic name will work only if input filename has correct format extension) */
{
size_t filenameSize = strlen(filenameTable[0]);
if (strcmp(filenameTable[0] + (filenameSize-4), extension))
{
DISPLAYLEVEL(1, "unknown suffix - cannot determine destination filename\n");
return badusage(programName);
}
dynNameSpace = (char*)calloc(1,filenameSize+1);
if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); }
outFileName = dynNameSpace;
strcpy(dynNameSpace, filenameTable[0]);
dynNameSpace[filenameSize-4]=0;
DISPLAYLEVEL(2, "Decoding file %s \n", outFileName);
}
/* Check if dictionary builder is selected */
if (dictBuild) {
#ifndef ZSTD_NODICT
ZDICT_params_t dictParams;
dictParams.compressionLevel = dictCLevel;
dictParams.selectivityLevel = dictSelect;
dictParams.notificationLevel = displayLevel;
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, dictParams);
#endif
goto _end;
}
/* Check if output is defined as console; trigger an error in this case */
if (!strcmp(outFileName,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
/* No input filename ==> use stdin and stdout */
if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark, outFileName=stdoutmark;
/* No warning message in pure pipe mode (stdin + stdout) or multiple mode */
if (!strcmp(filenameTable[0], stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
if (multiple && (displayLevel==2)) displayLevel=1;
/* Check if input/output defined as console; trigger an error in this case */
if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
if ((!multiple) && (filenameIdx>2))
{
DISPLAY("Too many files on the command line (%u > 2). Do you mean -m ? \n", filenameIdx);
/* user-selected output filename, only possible with a single file */
if (outFileName && strcmp(outFileName,stdoutmark) && strcmp(outFileName,nulmark) && (filenameIdx>1)) {
DISPLAY("Too many files (%u) on the command line. \n", filenameIdx);
return filenameIdx;
}
/* No warning message in pipe mode (stdin + stdout) or multiple mode */
if (!strcmp(filenameTable[0], stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
if ((filenameIdx>1) && (displayLevel==2)) displayLevel=1;
/* IO Stream/File */
FIO_setNotificationLevel(displayLevel);
if (decode)
{
if (multiple)
operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, ZSTD_EXTENSION, dictFileName);
else
if (decode) {
if (filenameIdx==1 && outFileName)
operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName);
}
else
{
if (multiple)
operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, ZSTD_EXTENSION, dictFileName, cLevel);
else
else
operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName);
} else { /* compression */
if (filenameIdx==1 && outFileName)
operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel);
else
operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName, cLevel);
}
_end:

View File

@ -1,183 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{D4C01A3D-F609-4DA6-B53F-88D063CCE993}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>fuzzer</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IncludePath>$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
<RunCodeAnalysis>true</RunCodeAnalysis>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
<RunCodeAnalysis>true</RunCodeAnalysis>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
<RunCodeAnalysis>true</RunCodeAnalysis>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
<RunCodeAnalysis>true</RunCodeAnalysis>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnablePREfast>true</EnablePREfast>
<AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>setargv.obj;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnablePREfast>true</EnablePREfast>
<AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnablePREfast>true</EnablePREfast>
<AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnablePREfast>true</EnablePREfast>
<AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\dictBuilder\dibcli.c" />
<ClCompile Include="..\..\..\dictBuilder\dictBuilder.c" />
<ClCompile Include="..\..\..\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\dictBuilder\sssort.c" />
<ClCompile Include="..\..\..\dictBuilder\trsort.c" />
<ClCompile Include="..\..\..\dictBuilder\utils.c" />
<ClCompile Include="..\..\..\lib\fse.c" />
<ClCompile Include="..\..\..\lib\huff0.c" />
<ClCompile Include="..\..\..\lib\zstd_decompress.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\dictBuilder\config.h" />
<ClInclude Include="..\..\..\dictBuilder\dictBuilder.h" />
<ClInclude Include="..\..\..\dictBuilder\divsufsort.h" />
<ClInclude Include="..\..\..\dictBuilder\divsufsort_private.h" />
<ClInclude Include="..\..\..\dictBuilder\lfs.h" />
<ClInclude Include="..\..\..\lib\fse.h" />
<ClInclude Include="..\..\..\lib\huff0.h" />
<ClInclude Include="..\..\..\lib\huff0_static.h" />
<ClInclude Include="..\..\..\lib\zstd.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -1,75 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Fichiers sources">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Fichiers d%27en-tête">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Fichiers de ressources">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\dictBuilder\dibcli.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\dictBuilder\dictBuilder.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\dictBuilder\divsufsort.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\dictBuilder\sssort.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\dictBuilder\trsort.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\dictBuilder\utils.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\fse.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\huff0.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zstd_decompress.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\dictBuilder\config.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\dictBuilder\dictBuilder.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\dictBuilder\divsufsort.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\dictBuilder\divsufsort_private.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\dictBuilder\lfs.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\fse.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\huff0.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\huff0_static.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zstd.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@ -161,9 +161,6 @@
<ItemGroup>
<ClCompile Include="..\..\..\lib\fse.c" />
<ClCompile Include="..\..\..\lib\huff0.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
<ClCompile Include="..\..\..\lib\zstd_compress.c" />
<ClCompile Include="..\..\..\lib\zstd_decompress.c" />
<ClCompile Include="..\..\..\programs\datagen.c" />

View File

@ -24,24 +24,15 @@
<ClCompile Include="..\..\..\programs\datagen.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\huff0.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\legacy\zstd_v02.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zstd_compress.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zstd_decompress.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\lib\fse.h">

View File

@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.24720.0
# Visual Studio 2013
VisualStudioVersion = 12.0.40629.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstd", "zstd\zstd.vcxproj", "{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}"
EndProject
@ -11,8 +11,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\full
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstdlib", "zstdlib\zstdlib.vcxproj", "{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dictBuilder", "dictBuilder\dictBuilder.vcxproj", "{D4C01A3D-F609-4DA6-B53F-88D063CCE993}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
@ -53,14 +51,6 @@ Global
{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}.Release|Win32.Build.0 = Release|Win32
{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}.Release|x64.ActiveCfg = Release|x64
{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}.Release|x64.Build.0 = Release|x64
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|Win32.ActiveCfg = Debug|Win32
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|Win32.Build.0 = Debug|Win32
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|x64.ActiveCfg = Debug|x64
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|x64.Build.0 = Debug|x64
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|Win32.ActiveCfg = Release|Win32
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|Win32.Build.0 = Release|Win32
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|x64.ActiveCfg = Release|x64
{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -19,23 +19,27 @@
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\lib\divsufsort.c" />
<ClCompile Include="..\..\..\lib\fse.c" />
<ClCompile Include="..\..\..\lib\huff0.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v04.c" />
<ClCompile Include="..\..\..\lib\zstd_buffered.c" />
<ClCompile Include="..\..\..\lib\zbuff.c" />
<ClCompile Include="..\..\..\lib\zdict.c" />
<ClCompile Include="..\..\..\lib\zstd_compress.c" />
<ClCompile Include="..\..\..\lib\zstd_decompress.c" />
<ClCompile Include="..\..\..\programs\bench.c" />
<ClCompile Include="..\..\..\programs\datagen.c" />
<ClCompile Include="..\..\..\programs\dibio.c" />
<ClCompile Include="..\..\..\programs\fileio.c" />
<ClCompile Include="..\..\..\programs\legacy\fileio_legacy.c" />
<ClCompile Include="..\..\..\programs\xxhash.c" />
<ClCompile Include="..\..\..\programs\zstdcli.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\lib\divsufsort.h" />
<ClInclude Include="..\..\..\lib\fse.h" />
<ClInclude Include="..\..\..\lib\fse_static.h" />
<ClInclude Include="..\..\..\lib\huff0.h" />
@ -45,6 +49,10 @@
<ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
<ClInclude Include="..\..\..\lib\legacy\zstd_v03.h" />
<ClInclude Include="..\..\..\lib\legacy\zstd_v04.h" />
<ClInclude Include="..\..\..\lib\zbuff.h" />
<ClInclude Include="..\..\..\lib\zbuff_static.h" />
<ClInclude Include="..\..\..\lib\zdict.h" />
<ClInclude Include="..\..\..\lib\zdict_static.h" />
<ClInclude Include="..\..\..\lib\zstd.h" />
<ClInclude Include="..\..\..\lib\zstd_buffered.h" />
<ClInclude Include="..\..\..\lib\zstd_buffered_static.h" />
@ -52,6 +60,7 @@
<ClInclude Include="..\..\..\lib\zstd_static.h" />
<ClInclude Include="..\..\..\programs\bench.h" />
<ClInclude Include="..\..\..\programs\datagen.h" />
<ClInclude Include="..\..\..\programs\dibio.h" />
<ClInclude Include="..\..\..\programs\fileio.h" />
<ClInclude Include="..\..\..\programs\legacy\fileio_legacy.h" />
<ClInclude Include="..\..\..\programs\xxhash.h" />

View File

@ -48,9 +48,6 @@
<ClCompile Include="..\..\..\lib\zstd_decompress.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zstd_buffered.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
@ -60,6 +57,18 @@
<ClCompile Include="..\..\..\lib\legacy\zstd_v04.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\divsufsort.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zbuff.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zdict.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\..\..\programs\dibio.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\lib\fse.h">
@ -119,5 +128,23 @@
<ClInclude Include="..\..\..\lib\legacy\zstd_v04.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\divsufsort.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zbuff.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zbuff_static.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zdict.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zdict_static.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\..\..\programs\dibio.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@ -21,7 +21,7 @@
<ItemGroup>
<ClCompile Include="..\..\..\lib\fse.c" />
<ClCompile Include="..\..\..\lib\huff0.c" />
<ClCompile Include="..\..\..\lib\zstd_buffered.c" />
<ClCompile Include="..\..\..\lib\zbuff.c" />
<ClCompile Include="..\..\..\lib\zstd_compress.c" />
<ClCompile Include="..\..\..\lib\zstd_decompress.c" />
</ItemGroup>
@ -34,9 +34,9 @@
<ClInclude Include="..\..\..\lib\huff0.h" />
<ClInclude Include="..\..\..\lib\huff0_static.h" />
<ClInclude Include="..\..\..\lib\mem.h" />
<ClInclude Include="..\..\..\lib\zbuff.h" />
<ClInclude Include="..\..\..\lib\zbuff_static.h" />
<ClInclude Include="..\..\..\lib\zstd.h" />
<ClInclude Include="..\..\..\lib\zstd_buffered.h" />
<ClInclude Include="..\..\..\lib\zstd_buffered_static.h" />
<ClInclude Include="..\..\..\lib\zstd_internal.h" />
<ClInclude Include="..\..\..\lib\zstd_static.h" />
<ClInclude Include="resource.h" />

View File

@ -21,15 +21,15 @@
<ClCompile Include="..\..\..\lib\huff0.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zstd_buffered.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zstd_compress.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zstd_decompress.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\lib\zbuff.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\lib\fse.h">
@ -59,12 +59,6 @@
<ClInclude Include="..\..\..\lib\zstd_internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zstd_buffered.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zstd_buffered_static.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\mem.h">
<Filter>Header Files</Filter>
</ClInclude>
@ -74,6 +68,12 @@
<ClInclude Include="..\..\..\lib\error_public.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zbuff.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\..\lib\zbuff_static.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="zstdlib.rc">