all: add zstd compression library

This commit is contained in:
Markus F.X.J. Oberhumer 2023-01-13 22:07:24 +01:00
parent c52d302f07
commit 9f830e0c41
10 changed files with 334 additions and 10 deletions

View File

@ -259,7 +259,7 @@ jobs:
git config --global core.autocrlf input
git --version && bash --version
git clone --depth=1 https://github.com/upx/upx-testsuite ../deps/upx-testsuite
mkdir -p -v build/$C/$B/{ucl,upx,zlib}
mkdir -p -v build/$C/$B/{ucl,upx,zlib,zstd}
- name: 'Set up Developer Command Prompt'
uses: ilammy/msvc-dev-cmd@v1
with:
@ -281,12 +281,17 @@ jobs:
cd %BDIR%\zlib
cl -MT -J -O2 -W3 -WX %DEFS% -c %H%\vendor\zlib\*.c
link -lib -out:zlib.lib *.obj
@REM ===== build zstd =====
cd %BDIR%\zstd
set s=%H%\vendor\zstd\lib
cl -MT -J -O2 -W4 -WX -DDYNAMIC_BMI2=0 -DZSTD_DISABLE_ASM %DEFS% -c %s%\common\*.c %s%\compress\*.c %s%\decompress\*.c
link -lib -out:zstd.lib *.obj
@REM ===== build UPX =====
cd %BDIR%\upx
set s=%H%\src
cat .GITREV.txt
set /p GITREV=<.GITREV.txt
cl -std:c++17 -Zc:__cplusplus -EHsc -J -O2 -W4 -WX -DUPX_VERSION_GITREV="""%GITREV%""" %DEFS% -I%H%\vendor -I%H%\vendor\boost-pfr\include -Feupx.exe %s%\*.cpp %s%\util\*.cpp %BDIR%\ucl\ucl.lib %BDIR%\zlib\zlib.lib /link setargv.obj
cl -std:c++17 -Zc:__cplusplus -EHsc -J -O2 -W4 -WX -DUPX_VERSION_GITREV="""%GITREV%""" -DWITH_ZSTD %DEFS% -I%H%\vendor -I%H%\vendor\boost-pfr\include -Feupx.exe %s%\*.cpp %s%\util\*.cpp %BDIR%\ucl\ucl.lib %BDIR%\zlib\zlib.lib %BDIR%\zstd\zstd.lib /link setargv.obj
- name: 'Make artifact'
shell: bash
run: |

View File

@ -6,12 +6,14 @@ if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.git")
option(UPX_CONFIG_DISABLE_GITREV "Do not compile with default Git version info." ON)
option(UPX_CONFIG_DISABLE_SANITIZE "Do not compile with default sanitize options." ON)
option(UPX_CONFIG_DISABLE_WERROR "Do not compile with default -Werror option." ON)
option(UPX_CONFIG_DISABLE_ZSTD "Do not compile with zstd; NOTE: zstd is WIP." ON)
else()
# strict config defaults for devel builds
message(STATUS "upx info: strict config defaults enabled")
option(UPX_CONFIG_DISABLE_GITREV "Do not compile with default Git version info." OFF)
option(UPX_CONFIG_DISABLE_SANITIZE "Do not compile with default sanitize options." OFF)
option(UPX_CONFIG_DISABLE_WERROR "Do not compile with default -Werror option." OFF)
option(UPX_CONFIG_DISABLE_ZSTD "Do not compile with zstd; NOTE: zstd is WIP." OFF)
endif()
# test config options (see below)
@ -109,6 +111,13 @@ list(SORT zlib_SOURCES)
add_library(upx_vendor_zlib STATIC ${zlib_SOURCES})
set_property(TARGET upx_vendor_zlib PROPERTY C_STANDARD 11)
if(NOT UPX_CONFIG_DISABLE_ZSTD)
file(GLOB zstd_SOURCES "vendor/zstd/lib/*/*.c")
list(SORT zstd_SOURCES)
add_library(upx_vendor_zstd STATIC ${zstd_SOURCES})
set_property(TARGET upx_vendor_zstd PROPERTY C_STANDARD 11)
endif()
file(GLOB upx_SOURCES "src/*.cpp" "src/util/*.cpp")
list(SORT upx_SOURCES)
add_executable(upx ${upx_SOURCES})
@ -181,6 +190,17 @@ else()
target_compile_options(${t} PRIVATE -Wall -Wextra -Wvla -Wno-strict-prototypes ${warn_Werror})
endif()
if(NOT UPX_CONFIG_DISABLE_ZSTD)
set(t upx_vendor_zstd)
upx_sanitize_target(${t})
target_compile_options(${t} PRIVATE -DDYNAMIC_BMI2=0 -DZSTD_DISABLE_ASM)
if(MSVC)
target_compile_options(${t} PRIVATE -J -W4 ${warn_WX})
else()
target_compile_options(${t} PRIVATE -Wall -Wextra -Wcast-align -Wcast-qual -Wpointer-arith -Wvla -Wwrite-strings ${warn_Werror})
endif()
endif()
set(t upx)
target_include_directories(${t} PRIVATE vendor vendor/boost-pfr/include)
target_compile_definitions(${t} PRIVATE $<$<CONFIG:Debug>:DEBUG=1>)
@ -199,6 +219,10 @@ else()
-Wshadow -Wvla -Wwrite-strings ${warn_Werror}
)
endif()
if(NOT UPX_CONFIG_DISABLE_ZSTD)
target_compile_definitions(${t} PRIVATE WITH_ZSTD=1)
target_link_libraries(upx upx_vendor_zstd)
endif()
#***********************************************************************
# "make test"

View File

@ -150,3 +150,6 @@ endif
ifeq ($(wildcard ./vendor/zlib/crc32.c),)
$(error ERROR: missing git submodule; run 'git submodule update --init')
endif
ifeq ($(wildcard ./vendor/zstd/lib/.),)
$(error ERROR: missing git submodule; run 'git submodule update --init')
endif

View File

@ -117,6 +117,11 @@ int upx_compress( const upx_bytep src, unsigned src_len,
else if (M_IS_NRV2B(method) || M_IS_NRV2D(method) || M_IS_NRV2E(method))
r = upx_ucl_compress(src, src_len, dst, dst_len,
cb, method, level, cconf, cresult);
#endif
#if (WITH_ZSTD)
else if (M_IS_ZSTD(method))
r = upx_zstd_compress(src, src_len, dst, dst_len,
cb, method, level, cconf, cresult);
#endif
else {
throwInternalError("unknown compression method");
@ -164,6 +169,10 @@ int upx_decompress(const upx_bytep src, unsigned src_len,
#if (WITH_ZLIB)
else if (M_IS_DEFLATE(method))
r = upx_zlib_decompress(src, src_len, dst, dst_len, method, cresult);
#endif
#if (WITH_ZSTD)
else if (M_IS_ZSTD(method))
r = upx_zstd_decompress(src, src_len, dst, dst_len, method, cresult);
#endif
else {
throwInternalError("unknown decompression method");
@ -207,6 +216,10 @@ int upx_test_overlap( const upx_bytep buf,
#if (WITH_UCL)
else if (M_IS_NRV2B(method) || M_IS_NRV2D(method) || M_IS_NRV2E(method))
r = upx_ucl_test_overlap(buf, tbuf, src_off, src_len, dst_len, method, cresult);
#endif
#if (WITH_ZSTD)
else if (M_IS_ZSTD(method))
r = upx_zstd_test_overlap(buf, tbuf, src_off, src_len, dst_len, method, cresult);
#endif
else {
throwInternalError("unknown decompression method");

View File

@ -126,6 +126,28 @@ unsigned upx_zlib_crc32 (const void *buf, unsigned len, unsigned crc);
#endif
#if (WITH_ZSTD)
int upx_zstd_init(void);
const char *upx_zstd_version_string(void);
int upx_zstd_compress ( const upx_bytep src, unsigned src_len,
upx_bytep dst, unsigned* dst_len,
upx_callback_p cb,
int method, int level,
const upx_compress_config_t *cconf,
upx_compress_result_t *cresult );
int upx_zstd_decompress ( const upx_bytep src, unsigned src_len,
upx_bytep dst, unsigned* dst_len,
int method,
const upx_compress_result_t *cresult );
int upx_zstd_test_overlap ( const upx_bytep buf,
const upx_bytep tbuf,
unsigned src_off, unsigned src_len,
unsigned* dst_len,
int method,
const upx_compress_result_t *cresult );
#endif
#endif /* already included */
/* vim:set ts=4 sw=4 et: */

231
src/compress_zstd.cpp Normal file
View File

@ -0,0 +1,231 @@
/* compress_zstd.cpp --
This file is part of the UPX executable compressor.
Copyright (C) 1996-2023 Markus Franz Xaver Johannes Oberhumer
All Rights Reserved.
UPX and the UCL library are free software; you can redistribute them
and/or modify them under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Markus F.X.J. Oberhumer
<markus@oberhumer.com>
*/
#include "conf.h"
void zstd_compress_config_t::reset() { mem_clear(this, sizeof(*this)); }
#if WITH_ZSTD
#include "compress.h"
#include "util/membuffer.h"
#include <zstd/lib/zstd.h>
#include <zstd/lib/zstd_errors.h>
#include <zstd/lib/compress/hist.h>
static int convert_errno_from_zstd(size_t zr) {
const ZSTD_ErrorCode ze = ZSTD_getErrorCode(zr);
switch (ze) {
case ZSTD_error_memory_allocation:
return UPX_E_OUT_OF_MEMORY;
case ZSTD_error_srcSize_wrong:
return UPX_E_INPUT_OVERRUN;
case ZSTD_error_dstSize_tooSmall:
return UPX_E_OUTPUT_OVERRUN;
default:
break;
}
return UPX_E_ERROR;
}
/*************************************************************************
// TODO later: use advanced compression API for compression finetuning
**************************************************************************/
int upx_zstd_compress(const upx_bytep src, unsigned src_len, upx_bytep dst, unsigned *dst_len,
upx_callback_p cb_parm, int method, int level,
const upx_compress_config_t *cconf_parm, upx_compress_result_t *cresult) {
assert(method == M_ZSTD);
assert(level > 0);
assert(cresult != nullptr);
UNUSED(cb_parm);
int r = UPX_E_ERROR;
size_t zr;
const zstd_compress_config_t *const lcconf = cconf_parm ? &cconf_parm->conf_zstd : nullptr;
zstd_compress_result_t *const res = &cresult->result_zstd;
// TODO later: map level 1..10 to zstd-level 1..22
if (level == 10)
level = 22;
// cconf overrides
if (lcconf) {
UNUSED(lcconf);
}
res->dummy = 0;
zr = ZSTD_compress(dst, *dst_len, src, src_len, level);
if (ZSTD_isError(zr)) {
*dst_len = 0; // TODO ???
r = convert_errno_from_zstd(zr);
assert(r != UPX_E_OK);
} else {
assert(zr <= *dst_len);
*dst_len = (unsigned) zr;
r = UPX_E_OK;
}
return r;
}
/*************************************************************************
//
**************************************************************************/
int upx_zstd_decompress(const upx_bytep src, unsigned src_len, upx_bytep dst, unsigned *dst_len,
int method, const upx_compress_result_t *cresult) {
assert(method == M_ZSTD);
UNUSED(method);
UNUSED(cresult);
int r = UPX_E_ERROR;
size_t zr;
zr = ZSTD_decompress(dst, *dst_len, src, src_len);
if (ZSTD_isError(zr)) {
*dst_len = 0; // TODO ???
r = convert_errno_from_zstd(zr);
assert(r != UPX_E_OK);
} else {
assert(zr <= *dst_len);
*dst_len = (unsigned) zr;
r = UPX_E_OK;
}
return r;
}
/*************************************************************************
// test_overlap - see <ucl/ucl.h> for semantics
**************************************************************************/
int upx_zstd_test_overlap(const upx_bytep buf, const upx_bytep tbuf, unsigned src_off,
unsigned src_len, unsigned *dst_len, int method,
const upx_compress_result_t *cresult) {
assert(method == M_ZSTD);
MemBuffer b(src_off + src_len);
memcpy(b + src_off, buf + src_off, src_len);
unsigned saved_dst_len = *dst_len;
int r = upx_zstd_decompress(raw_index_bytes(b, src_off, src_len), src_len,
raw_bytes(b, *dst_len), dst_len, method, cresult);
if (r != UPX_E_OK)
return r;
if (*dst_len != saved_dst_len)
return UPX_E_ERROR;
// NOTE: there is a very tiny possibility that decompression has
// succeeded but the data is not restored correctly because of
// in-place buffer overlapping, so we use an extra memcmp().
if (tbuf != nullptr && memcmp(tbuf, b, *dst_len) != 0)
return UPX_E_ERROR;
return UPX_E_OK;
}
/*************************************************************************
// misc
**************************************************************************/
int upx_zstd_init(void) {
if (strcmp(ZSTD_VERSION_STRING, ZSTD_versionString()) != 0)
return -2;
return 0;
}
const char *upx_zstd_version_string(void) { return ZSTD_VERSION_STRING; }
/*************************************************************************
// doctest checks
**************************************************************************/
#if DEBUG && !defined(DOCTEST_CONFIG_DISABLE) && 1
#include "util/membuffer.h"
static bool check_zstd(const int method, const int level, const unsigned expected_c_len) {
const unsigned u_len = 16384;
const unsigned c_extra = 4096;
MemBuffer u_buf, c_buf, d_buf;
unsigned c_len, d_len;
upx_compress_result_t cresult;
int r;
u_buf.alloc(u_len);
memset(u_buf, 0, u_len);
c_buf.allocForCompression(u_len, c_extra);
d_buf.allocForDecompression(u_len);
c_len = c_buf.getSize() - c_extra;
r = upx_zstd_compress(raw_bytes(u_buf, u_len), u_len, raw_index_bytes(c_buf, c_extra, c_len),
&c_len, nullptr, method, level, NULL_cconf, &cresult);
if (r != 0 || c_len != expected_c_len)
return false;
d_len = d_buf.getSize();
r = upx_zstd_decompress(raw_index_bytes(c_buf, c_extra, c_len), c_len, raw_bytes(d_buf, d_len),
&d_len, method, nullptr);
if (r != 0 || d_len != u_len || memcmp(u_buf, d_buf, u_len) != 0)
return false;
d_len = u_len - 1;
r = upx_zstd_decompress(raw_index_bytes(c_buf, c_extra, c_len), c_len, raw_bytes(d_buf, d_len),
&d_len, method, nullptr);
if (r == 0)
return false;
// TODO: rewrite Packer::findOverlapOverhead() so that we can test it here
// unsigned x_len = d_len;
// r = upx_zstd_test_overlap(c_buf, u_buf, c_extra, c_len, &x_len, method, nullptr);
return true;
}
TEST_CASE("compress_zstd") {
CHECK(check_zstd(M_ZSTD, 1, 19));
CHECK(check_zstd(M_ZSTD, 3, 19));
CHECK(check_zstd(M_ZSTD, 5, 19));
}
#endif // DEBUG
TEST_CASE("upx_zstd_decompress") {
typedef const upx_byte C;
C *c_data;
upx_byte d_buf[32];
unsigned d_len;
int r;
c_data = (C *) "\x28\xb5\x2f\xfd\x20\x20\x3d\x00\x00\x08\xff\x01\x00\x34\x4e\x08";
d_len = 32;
r = upx_zstd_decompress(c_data, 16, d_buf, &d_len, M_ZSTD, nullptr);
CHECK((r == 0 && d_len == 32));
r = upx_zstd_decompress(c_data, 15, d_buf, &d_len, M_ZSTD, nullptr);
CHECK(r == UPX_E_INPUT_OVERRUN);
d_len = 31;
r = upx_zstd_decompress(c_data, 16, d_buf, &d_len, M_ZSTD, nullptr);
CHECK(r == UPX_E_OUTPUT_OVERRUN);
}
#endif // WITH_ZSTD
/* vim:set ts=4 sw=4 et: */

View File

@ -563,6 +563,7 @@ constexpr bool string_ge(const char *a, const char *b) {
//#define M_CL1B_LE16 13
#define M_LZMA 14
#define M_DEFLATE 15 /* zlib */
#define M_ZSTD 16
// compression methods internal usage
#define M_ALL (-1)
#define M_END (-2)
@ -576,6 +577,7 @@ constexpr bool string_ge(const char *a, const char *b) {
//#define M_IS_CL1B(x) ((x) >= M_CL1B_LE32 && (x) <= M_CL1B_LE16)
#define M_IS_LZMA(x) (((x) & 255) == M_LZMA)
#define M_IS_DEFLATE(x) ((x) == M_DEFLATE)
#define M_IS_ZSTD(x) ((x) == M_ZSTD)
// filters
@ -672,13 +674,11 @@ struct lzma_compress_config_t
void reset();
};
struct ucl_compress_config_t : public REAL_ucl_compress_config_t
{
void reset() { memset(this, 0xff, sizeof(*this)); }
};
struct zlib_compress_config_t
{
typedef OptVar<unsigned, 8u, 1u, 9u> mem_level_t; // ml
@ -692,13 +692,20 @@ struct zlib_compress_config_t
void reset();
};
struct zstd_compress_config_t
{
unsigned dummy;
void reset();
};
struct upx_compress_config_t
{
lzma_compress_config_t conf_lzma;
ucl_compress_config_t conf_ucl;
zlib_compress_config_t conf_zlib;
void reset() { conf_lzma.reset(); conf_ucl.reset(); conf_zlib.reset(); }
zstd_compress_config_t conf_zstd;
void reset() { conf_lzma.reset(); conf_ucl.reset(); conf_zlib.reset(); conf_zstd.reset(); }
};
#define NULL_cconf ((upx_compress_config_t *) nullptr)
@ -722,7 +729,6 @@ struct lzma_compress_result_t
void reset() { memset(this, 0, sizeof(*this)); }
};
struct ucl_compress_result_t
{
ucl_uint result[16];
@ -730,7 +736,6 @@ struct ucl_compress_result_t
void reset() { memset(this, 0, sizeof(*this)); }
};
struct zlib_compress_result_t
{
unsigned dummy;
@ -738,6 +743,12 @@ struct zlib_compress_result_t
void reset() { memset(this, 0, sizeof(*this)); }
};
struct zstd_compress_result_t
{
unsigned dummy;
void reset() { memset(this, 0, sizeof(*this)); }
};
struct upx_compress_result_t
{
@ -748,10 +759,11 @@ struct upx_compress_result_t
lzma_compress_result_t result_lzma;
ucl_compress_result_t result_ucl;
zlib_compress_result_t result_zlib;
zstd_compress_result_t result_zstd;
void reset() {
memset(this, 0, sizeof(*this));
result_lzma.reset(); result_ucl.reset(); result_zlib.reset();
result_lzma.reset(); result_ucl.reset(); result_zlib.reset(); result_zstd.reset();
}
};

View File

@ -411,6 +411,11 @@ void show_version(bool one_line)
if (v != nullptr && v[0])
fprintf(fp, "LZMA SDK version %s\n", v);
#endif
#if (WITH_ZSTD)
v = upx_zstd_version_string();
if (v != nullptr && v[0])
fprintf(fp, "zstd data compression library %s\n", v);
#endif
#if !defined(DOCTEST_CONFIG_DISABLE)
fprintf(fp, "doctest C++ testing framework version %s\n", DOCTEST_VERSION_STR);
#endif
@ -424,6 +429,10 @@ void show_version(bool one_line)
#if (WITH_LZMA)
fprintf(fp, "Copyright (C) 1999" "-2006 Igor Pavlov\n");
#endif
#if (WITH_ZSTD)
// see vendor/zstd/LICENSE; main author is Yann Collet
fprintf(fp, "Copyright (C) 2015" "-2023 Meta Platforms, Inc. and affiliates\n");
#endif
#if !defined(DOCTEST_CONFIG_DISABLE)
fprintf(fp, "Copyright (C) 2016" "-2021 Viktor Kirilov\n");
#endif

View File

@ -1208,11 +1208,14 @@ int upx_main(int argc, char *argv[]) {
set_term(stderr);
assert(upx_lzma_init() == 0);
assert(upx_ucl_init() == 0);
assert(upx_zlib_init() == 0);
#if (WITH_NRV)
assert(upx_nrv_init() == 0);
#endif
assert(upx_ucl_init() == 0);
assert(upx_zlib_init() == 0);
#if (WITH_ZSTD)
assert(upx_zstd_init() == 0);
#endif
/* get options */
first_options(argc, argv);

View File

@ -129,6 +129,8 @@ unsigned MemBuffer::getSizeForCompression(unsigned uncompressed_size, unsigned e
bytes = umax(bytes, (z / 3 * (8 + 2 * (w - 8) / 1)) / 8);
// NRV2E: 1 byte plus 3 bits per pair of width exceeding 7 ("ss12")
bytes = umax(bytes, (z / 3 * (8 + 3 * (w - 7) / 2)) / 8);
// zstd: ZSTD_COMPRESSBOUND
bytes = umax(bytes, z + (z >> 8) + ((z < (128 << 10)) ? (((128 << 10) - z) >> 11) : 0));
// extra + 256 safety for rounding
bytes = mem_size(1, bytes, extra, 256);
return bytes;