Add LZMA compression support.
Addresses #198.
diff --git a/INSTALL.md b/INSTALL.md
index 65b9378..512768c 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -8,7 +8,7 @@
For supporting bzip2-compressed zip archives, you need
[bzip2](http://bzip.org/).
-For supporting xz-compressed zip archives, you need
+For supporting lzma- and xz-compressed zip archives, you need
[liblzma](https://tukaani.org/xz/) which is part of xz, at least version 5.2.
For supporting zstd-compressed zip archives, you need
diff --git a/NEWS.md b/NEWS.md
index b95ee36..ea1dd9f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,7 @@
==================
* Add support for zstd (Zstandard) compression.
+* Add support for lzma (ID 14) compression.
1.7.3 [2020-07-15]
==================
diff --git a/THANKS b/THANKS
index fcb469d..ffdb478 100644
--- a/THANKS
+++ b/THANKS
@@ -44,6 +44,7 @@
Info-ZIP group
Jan Weiß <jan@geheimwerk.de>
Jay Freeman (saurik) <saurik@saurik.com>
+jloqfjgk@github
Joachim Reichel <joachim.reichel@gmx.de>
João Custódio <joao_custodio@symantec.com>
Joel Ebrahimi <joel.ebrahimi@gmail.com>
diff --git a/lib/zip_algorithm_xz.c b/lib/zip_algorithm_xz.c
index e8f7328..ee6ea38 100644
--- a/lib/zip_algorithm_xz.c
+++ b/lib/zip_algorithm_xz.c
@@ -1,7 +1,7 @@
/*
- zip_algorithm_xz.c -- XZ (de)compression routines
+ zip_algorithm_xz.c -- LZMA/XZ (de)compression routines
Bazed on zip_algorithm_deflate.c -- deflate (de)compression routines
- Copyright (C) 2017-2019 Dieter Baron and Thomas Klausner
+ Copyright (C) 2017-2020 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -39,6 +39,17 @@
#include <stdlib.h>
#include <zlib.h>
+enum header_state { INCOMPLETE, OUTPUT, DONE };
+
+#define HEADER_BYTES_ZIP 9
+#define HEADER_MAGIC_LENGTH 4
+#define HEADER_MAGIC1_OFFSET 0
+#define HEADER_MAGIC2_OFFSET 2
+#define HEADER_SIZE_OFFSET 9
+#define HEADER_SIZE_LENGTH 8
+#define HEADER_PARAMETERS_LENGTH 5
+#define HEADER_LZMA_ALONE_LENGTH (HEADER_PARAMETERS_LENGTH + HEADER_SIZE_LENGTH)
+
struct ctx {
zip_error_t *error;
bool compress;
@@ -46,6 +57,27 @@
bool end_of_input;
lzma_stream zstr;
zip_uint16_t method;
+ /* header member is used for converting from zip to "lzma alone"
+ * format
+ *
+ * "lzma alone" file format starts with:
+ * 5 bytes lzma parameters
+ * 8 bytes uncompressed size
+ * compressed data
+ *
+ * zip archive on-disk format starts with
+ * 4 bytes magic (first two bytes vary, e.g. 0x0914 or 0x1002, next bytes are 0x0500)
+ * 5 bytes lzma parameters
+ * compressed data
+ *
+ * we read the data into a header of the form
+ * 4 bytes magic
+ * 5 bytes lzma parameters
+ * 8 bytes uncompressed size (always 0xffffffffffffffff)
+ */
+ zip_uint8_t header[HEADER_MAGIC_LENGTH + HEADER_LZMA_ALONE_LENGTH];
+ zip_uint8_t header_bytes_offset;
+ enum header_state header_state;
};
@@ -60,7 +92,8 @@
2) You can try LZMA2, where
outSize can be = 1.001 * originalSize + 1 KB.
*/
- zip_uint64_t compressed_size = (zip_uint64_t)((double)uncompressed_size * 1.1) + 64 * 1024;
+ /* 13 bytes added for lzma alone header */
+ zip_uint64_t compressed_size = (zip_uint64_t)((double)uncompressed_size * 1.1) + 64 * 1024 + 13;
if (compressed_size < uncompressed_size) {
return ZIP_UINT64_MAX;
@@ -88,6 +121,13 @@
ctx->compression_flags = (zip_uint32_t)compression_flags;
ctx->compression_flags |= LZMA_PRESET_EXTREME;
ctx->end_of_input = false;
+ memset(ctx->header, 0, sizeof(ctx->header));
+ ctx->header_bytes_offset = 0;
+ if (ZIP_CM_LZMA) {
+ ctx->header_state = INCOMPLETE;
+ } else {
+ ctx->header_state = DONE;
+ }
memset(&ctx->zstr, 0, sizeof(ctx->zstr));
ctx->method = method;
return ctx;
@@ -122,6 +162,7 @@
static int
map_error(lzma_ret ret) {
switch (ret) {
+ case LZMA_DATA_ERROR:
case LZMA_UNSUPPORTED_CHECK:
return ZIP_ER_COMPRESSED_DATA;
@@ -194,6 +235,41 @@
return false;
}
+ /* For decompression of LZMA1: Have we read the full "lzma alone" header yet? */
+ if (ctx->method == ZIP_CM_LZMA && !ctx->compress && ctx->header_state == INCOMPLETE) {
+ /* if not, get more of the data */
+ zip_uint8_t got = ZIP_MIN(HEADER_BYTES_ZIP - ctx->header_bytes_offset, length);
+ memcpy(ctx->header + ctx->header_bytes_offset, data, got);
+ ctx->header_bytes_offset += got;
+ length -= got;
+ data += got;
+ /* Do we have a complete header now? */
+ if (ctx->header_bytes_offset == HEADER_BYTES_ZIP) {
+ Bytef buffer[1];
+ /* check magic */
+ if (ctx->header[HEADER_MAGIC2_OFFSET] != 0x05 || ctx->header[HEADER_MAGIC2_OFFSET+1] != 0x00) {
+ /* magic does not match */
+ zip_error_set(ctx->error, ZIP_ER_COMPRESSED_DATA, 0);
+ return false;
+ }
+ /* set size of uncompressed data in "lzma alone" header to "unknown" */
+ memset(ctx->header + HEADER_SIZE_OFFSET, 0xff, HEADER_SIZE_LENGTH);
+ /* Feed header into "lzma alone" decoder, for
+ * initialization; this should not produce output. */
+ ctx->zstr.next_in = (void*)(ctx->header + HEADER_MAGIC_LENGTH);
+ ctx->zstr.avail_in = HEADER_LZMA_ALONE_LENGTH;
+ ctx->zstr.total_in = 0;
+ ctx->zstr.next_out = buffer;
+ ctx->zstr.avail_out = sizeof(*buffer);
+ ctx->zstr.total_out = 0;
+ /* this just initializes the decoder and does not produce output, so it consumes the complete header */
+ if (lzma_code(&ctx->zstr, LZMA_RUN) != LZMA_OK || ctx->zstr.total_out > 0) {
+ zip_error_set(ctx->error, ZIP_ER_COMPRESSED_DATA, 0);
+ return false;
+ }
+ ctx->header_state = DONE;
+ }
+ }
ctx->zstr.avail_in = (uInt)length;
ctx->zstr.next_in = (Bytef *)data;
@@ -213,6 +289,36 @@
process(void *ud, zip_uint8_t *data, zip_uint64_t *length) {
struct ctx *ctx = (struct ctx *)ud;
lzma_ret ret;
+ /* for compression of LZMA1 */
+ if (ctx->method == ZIP_CM_LZMA && ctx->compress) {
+ if (ctx->header_state == INCOMPLETE) {
+ /* write magic to output buffer */
+ ctx->header[0] = 0x09;
+ ctx->header[1] = 0x14;
+ ctx->header[2] = 0x05;
+ ctx->header[3] = 0x00;
+ /* generate lzma parameters into output buffer */
+ ctx->zstr.avail_out = HEADER_LZMA_ALONE_LENGTH;
+ ctx->zstr.next_out = ctx->header + HEADER_MAGIC_LENGTH;
+ ret = lzma_code(&ctx->zstr, LZMA_RUN);
+ if (ret != LZMA_OK || ctx->zstr.avail_out != 0) {
+ /* assume that the whole header will be provided with the first call to lzma_code */
+ return ZIP_COMPRESSION_ERROR;
+ }
+ ctx->header_state = OUTPUT;
+ }
+ if (ctx->header_state == OUTPUT) {
+ /* write header */
+ zip_uint8_t write_len = ZIP_MIN(HEADER_BYTES_ZIP - ctx->header_bytes_offset, *length);
+ memcpy(data, ctx->header + ctx->header_bytes_offset, write_len);
+ ctx->header_bytes_offset += write_len;
+ *length = write_len;
+ if (ctx->header_bytes_offset == HEADER_BYTES_ZIP) {
+ ctx->header_state = DONE;
+ }
+ return ZIP_COMPRESSION_OK;
+ }
+ }
ctx->zstr.avail_out = (uInt)ZIP_MIN(UINT_MAX, *length);
ctx->zstr.next_out = (Bytef *)data;
diff --git a/lib/zip_source_compress.c b/lib/zip_source_compress.c
index 1baa5f1..c98a194 100644
--- a/lib/zip_source_compress.c
+++ b/lib/zip_source_compress.c
@@ -67,10 +67,10 @@
{ZIP_CM_BZIP2, &zip_algorithm_bzip2_compress, &zip_algorithm_bzip2_decompress},
#endif
#if defined(HAVE_LIBLZMA)
- /* Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2
+ {ZIP_CM_LZMA, &zip_algorithm_xz_compress, &zip_algorithm_xz_decompress},
+ /* Disabled - because 7z isn't able to unpack ZIP+LZMA2
archives made this way - and vice versa.
- {ZIP_CM_LZMA, &zip_algorithm_xz_compress, &zip_algorithm_xz_decompress},
{ZIP_CM_LZMA2, &zip_algorithm_xz_compress, &zip_algorithm_xz_decompress},
*/
{ZIP_CM_XZ, &zip_algorithm_xz_compress, &zip_algorithm_xz_decompress},
diff --git a/regress/CMakeLists.txt b/regress/CMakeLists.txt
index 15ca3fa..c75fe10 100644
--- a/regress/CMakeLists.txt
+++ b/regress/CMakeLists.txt
@@ -189,8 +189,10 @@
set_compression_deflate_to_bzip2.test
set_compression_deflate_to_deflate.test
set_compression_deflate_to_store.test
+ set_compression_lzma_to_store.test
set_compression_store_to_bzip2.test
set_compression_store_to_deflate.test
+ set_compression_store_to_lzma.test
set_compression_store_to_store.test
set_compression_store_to_xz.test
set_compression_store_to_zstd.test
diff --git a/src/ziptool.c b/src/ziptool.c
index c6681cf..149381b 100644
--- a/src/ziptool.c
+++ b/src/ziptool.c
@@ -645,14 +645,13 @@
/* Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2
archives made this way - and vice versa.
- else if (strcasecmp(arg, "lzma") == 0)
- return ZIP_CM_LZMA;
else if (strcasecmp(arg, "lzma2") == 0)
return ZIP_CM_LZMA2;
*/
+ else if (strcasecmp(arg, "lzma") == 0)
+ return ZIP_CM_LZMA;
else if (strcasecmp(arg, "xz") == 0)
return ZIP_CM_XZ;
-
#endif
#if defined(HAVE_LIBZSTD)
else if (strcasecmp(arg, "zstd") == 0)