blob: 03521141a01a36a80b783396a38bdfa969f0123c [file] [log] [blame]
/*
source_hole.c -- source for handling huge files that are mostly NULs
Copyright (C) 2014-2021 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. The names of the authors may not be used to endorse or promote
products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "zip.h"
/* public API */
zip_source_t *source_hole_create(const char *, int flags, zip_error_t *);
#ifndef EFTYPE
#define EFTYPE EINVAL
#endif
#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
#define FRAGMENT_SIZE (8 * 1024)
#define MARK_BEGIN "NiH0"
#define MARK_DATA "NiH1"
#define MARK_NUL "NiH2"
typedef struct buffer {
zip_uint64_t fragment_size;
zip_uint8_t **fragment;
zip_uint64_t nfragments;
zip_uint64_t size;
zip_uint64_t offset;
} buffer_t;
static void buffer_free(buffer_t *buffer);
static buffer_t *buffer_from_file(const char *fname, int flags, zip_error_t *error);
static buffer_t *buffer_new(void);
static zip_int64_t buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
static int buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error);
static zip_int64_t buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error);
static int buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error);
static zip_int64_t buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
static zip_uint64_t get_u64(const zip_uint8_t *b);
static int only_nul(const zip_uint8_t *data, zip_uint64_t length);
static int write_nuls(zip_uint64_t n, FILE *f);
static int write_u64(zip_uint64_t u64, FILE *f);
typedef struct hole {
zip_error_t error;
char *fname;
buffer_t *in;
buffer_t *out;
} hole_t;
static hole_t *hole_new(const char *fname, int flags, zip_error_t *error);
static zip_int64_t source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command);
zip_source_t *
source_hole_create(const char *fname, int flags, zip_error_t *error) {
hole_t *ud = hole_new(fname, flags, error);
if (ud == NULL) {
return NULL;
}
return zip_source_function_create(source_hole_cb, ud, error);
}
static void
buffer_free(buffer_t *buffer) {
zip_uint64_t i;
if (buffer == NULL) {
return;
}
if (buffer->fragment) {
for (i = 0; i < buffer->nfragments; i++) {
free(buffer->fragment[i]);
}
free(buffer->fragment);
}
free(buffer);
}
static buffer_t *
buffer_from_file(const char *fname, int flags, zip_error_t *error) {
buffer_t *buffer;
FILE *f;
if ((buffer = buffer_new()) == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return NULL;
}
if ((flags & ZIP_TRUNCATE) == 0) {
if ((f = fopen(fname, "rb")) == NULL) {
if (!(errno == ENOENT && (flags & ZIP_CREATE))) {
buffer_free(buffer);
return NULL;
}
}
else {
if (buffer_read_file(buffer, f, error) < 0) {
buffer_free(buffer);
fclose(f);
return NULL;
}
fclose(f);
}
}
return buffer;
}
static buffer_t *
buffer_new(void) {
buffer_t *buffer;
if ((buffer = (buffer_t *)malloc(sizeof(*buffer))) == NULL) {
return NULL;
}
buffer->fragment = NULL;
buffer->nfragments = 0;
buffer->fragment_size = FRAGMENT_SIZE;
buffer->size = 0;
buffer->offset = 0;
return buffer;
}
static zip_int64_t
buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error) {
zip_uint64_t n, i, fragment_offset;
length = MY_MIN(length, buffer->size - buffer->offset);
if (length == 0) {
return 0;
}
if (length > ZIP_INT64_MAX) {
return -1;
}
i = buffer->offset / buffer->fragment_size;
fragment_offset = buffer->offset % buffer->fragment_size;
n = 0;
while (n < length) {
zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
if (buffer->fragment[i]) {
memcpy(data + n, buffer->fragment[i] + fragment_offset, left);
}
else {
memset(data + n, 0, left);
}
n += left;
i++;
fragment_offset = 0;
}
buffer->offset += n;
return (zip_int64_t)n;
}
static int
buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error) {
zip_uint8_t b[20];
zip_uint64_t i;
if (fread(b, 20, 1, f) != 1) {
zip_error_set(error, ZIP_ER_READ, errno);
return -1;
}
if (memcmp(b, MARK_BEGIN, 4) != 0) {
zip_error_set(error, ZIP_ER_READ, EFTYPE);
return -1;
}
buffer->fragment_size = get_u64(b + 4);
buffer->size = get_u64(b + 12);
if (buffer->fragment_size == 0) {
zip_error_set(error, ZIP_ER_INCONS, 0);
return -1;
}
buffer->nfragments = buffer->size / buffer->fragment_size;
if (buffer->size % buffer->fragment_size != 0) {
buffer->nfragments += 1;
}
if ((buffer->nfragments > SIZE_MAX / sizeof(buffer->fragment[0])) || ((buffer->fragment = (zip_uint8_t **)malloc(sizeof(buffer->fragment[0]) * buffer->nfragments)) == NULL)) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return -1;
}
for (i = 0; i < buffer->nfragments; i++) {
buffer->fragment[i] = NULL;
}
i = 0;
while (i < buffer->nfragments) {
if (fread(b, 4, 1, f) != 1) {
zip_error_set(error, ZIP_ER_READ, errno);
return -1;
}
if (memcmp(b, MARK_DATA, 4) == 0) {
if (buffer->fragment_size > SIZE_MAX) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return -1;
}
if ((buffer->fragment[i] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return -1;
}
if (fread(buffer->fragment[i], buffer->fragment_size, 1, f) != 1) {
zip_error_set(error, ZIP_ER_READ, errno);
return -1;
}
i++;
}
else if (memcmp(b, MARK_NUL, 4) == 0) {
if (fread(b, 8, 1, f) != 1) {
zip_error_set(error, ZIP_ER_READ, errno);
return -1;
}
i += get_u64(b);
}
else {
zip_error_set(error, ZIP_ER_READ, EFTYPE);
return -1;
}
}
return 0;
}
static zip_int64_t
buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error) {
zip_int64_t new_offset = zip_source_seek_compute_offset(buffer->offset, buffer->size, data, length, error);
if (new_offset < 0) {
return -1;
}
buffer->offset = (zip_uint64_t)new_offset;
return 0;
}
static int
buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error) {
FILE *f = fopen(fname, "wb");
zip_uint64_t i;
zip_uint64_t nul_run;
if (f == NULL) {
zip_error_set(error, ZIP_ER_OPEN, errno);
return -1;
}
fwrite(MARK_BEGIN, 4, 1, f);
write_u64(buffer->fragment_size, f);
write_u64(buffer->size, f);
nul_run = 0;
for (i = 0; i * buffer->fragment_size < buffer->size; i++) {
if (buffer->fragment[i] == NULL || only_nul(buffer->fragment[i], buffer->fragment_size)) {
nul_run++;
}
else {
if (nul_run > 0) {
write_nuls(nul_run, f);
nul_run = 0;
}
fwrite(MARK_DATA, 4, 1, f);
fwrite(buffer->fragment[i], 1, buffer->fragment_size, f);
}
}
if (nul_run > 0) {
write_nuls(nul_run, f);
}
if (fclose(f) != 0) {
zip_error_set(error, ZIP_ER_WRITE, errno);
return -1;
}
return 0;
}
static zip_int64_t
buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error) {
zip_uint8_t **fragment;
if (buffer->offset + length > buffer->nfragments * buffer->fragment_size) {
zip_uint64_t needed_fragments = (buffer->offset + length + buffer->fragment_size - 1) / buffer->fragment_size;
zip_uint64_t new_capacity = buffer->nfragments;
zip_uint64_t i;
if (new_capacity == 0) {
new_capacity = 4;
}
while (new_capacity < needed_fragments) {
new_capacity *= 2;
}
fragment = realloc(buffer->fragment, new_capacity * sizeof(*fragment));
if (fragment == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return -1;
}
for (i = buffer->nfragments; i < new_capacity; i++) {
fragment[i] = NULL;
}
buffer->fragment = fragment;
buffer->nfragments = new_capacity;
}
if (!only_nul(data, length)) {
zip_uint64_t idx, n, fragment_offset;
idx = buffer->offset / buffer->fragment_size;
fragment_offset = buffer->offset % buffer->fragment_size;
n = 0;
while (n < length) {
zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
if (buffer->fragment[idx] == NULL) {
if ((buffer->fragment[idx] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return -1;
}
memset(buffer->fragment[idx], 0, buffer->fragment_size);
}
memcpy(buffer->fragment[idx] + fragment_offset, data + n, left);
n += left;
idx++;
fragment_offset = 0;
}
}
buffer->offset += length;
if (buffer->offset > buffer->size) {
buffer->size = buffer->offset;
}
return (zip_int64_t)length;
}
static zip_uint64_t
get_u64(const zip_uint8_t *b) {
zip_uint64_t i;
i = (zip_uint64_t)b[0] << 56 | (zip_uint64_t)b[1] << 48 | (zip_uint64_t)b[2] << 40 | (zip_uint64_t)b[3] << 32 | (zip_uint64_t)b[4] << 24 | (zip_uint64_t)b[5] << 16 | (zip_uint64_t)b[6] << 8 | (zip_uint64_t)b[7];
return i;
}
static int
only_nul(const zip_uint8_t *data, zip_uint64_t length) {
zip_uint64_t i;
for (i = 0; i < length; i++) {
if (data[i] != '\0') {
return 0;
}
}
return 1;
}
static int
write_nuls(zip_uint64_t n, FILE *f) {
if (fwrite(MARK_NUL, 4, 1, f) != 1) {
return -1;
}
return write_u64(n, f);
}
static int
write_u64(zip_uint64_t u64, FILE *f) {
zip_uint8_t b[8];
b[0] = (zip_uint8_t)((u64 >> 56) & 0xff);
b[1] = (zip_uint8_t)((u64 >> 48) & 0xff);
b[2] = (zip_uint8_t)((u64 >> 40) & 0xff);
b[3] = (zip_uint8_t)((u64 >> 32) & 0xff);
b[4] = (zip_uint8_t)((u64 >> 24) & 0xff);
b[5] = (zip_uint8_t)((u64 >> 16) & 0xff);
b[6] = (zip_uint8_t)((u64 >> 8) & 0xff);
b[7] = (zip_uint8_t)(u64 & 0xff);
return fwrite(b, 8, 1, f) == 1 ? 0 : -1;
}
static void
hole_free(hole_t *hole) {
if (hole == NULL) {
return;
}
zip_error_fini(&hole->error);
buffer_free(hole->in);
buffer_free(hole->out);
free(hole->fname);
free(hole);
}
static hole_t *
hole_new(const char *fname, int flags, zip_error_t *error) {
hole_t *ctx = (hole_t *)malloc(sizeof(*ctx));
if (ctx == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return NULL;
}
if ((ctx->fname = strdup(fname)) == NULL) {
free(ctx);
zip_error_set(error, ZIP_ER_MEMORY, 0);
return NULL;
}
if ((ctx->in = buffer_from_file(fname, flags, error)) == NULL) {
free(ctx);
return NULL;
}
zip_error_init(&ctx->error);
ctx->out = NULL;
return ctx;
}
static zip_int64_t
source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command) {
hole_t *ctx = (hole_t *)ud;
switch (command) {
case ZIP_SOURCE_BEGIN_WRITE:
ctx->out = buffer_new();
return 0;
case ZIP_SOURCE_CLOSE:
return 0;
case ZIP_SOURCE_COMMIT_WRITE:
if (buffer_to_file(ctx->out, ctx->fname, &ctx->error) < 0) {
return -1;
}
buffer_free(ctx->in);
ctx->in = ctx->out;
ctx->out = NULL;
return 0;
case ZIP_SOURCE_ERROR:
return zip_error_to_data(&ctx->error, data, length);
case ZIP_SOURCE_FREE:
hole_free(ctx);
return 0;
case ZIP_SOURCE_OPEN:
ctx->in->offset = 0;
return 0;
case ZIP_SOURCE_READ:
return buffer_read(ctx->in, data, length, &ctx->error);
case ZIP_SOURCE_REMOVE:
buffer_free(ctx->in);
ctx->in = buffer_new();
buffer_free(ctx->out);
ctx->out = NULL;
(void)remove(ctx->fname);
return 0;
case ZIP_SOURCE_ROLLBACK_WRITE:
buffer_free(ctx->out);
ctx->out = NULL;
return 0;
case ZIP_SOURCE_SEEK:
return buffer_seek(ctx->in, data, length, &ctx->error);
case ZIP_SOURCE_SEEK_WRITE:
return buffer_seek(ctx->out, data, length, &ctx->error);
case ZIP_SOURCE_STAT: {
zip_stat_t *st = ZIP_SOURCE_GET_ARGS(zip_stat_t, data, length, &ctx->error);
if (st == NULL) {
return -1;
}
/* TODO: return ENOENT if fname doesn't exist */
st->valid |= ZIP_STAT_SIZE;
st->size = ctx->in->size;
return 0;
}
case ZIP_SOURCE_TELL:
return (zip_int64_t)ctx->in->offset;
case ZIP_SOURCE_TELL_WRITE:
return (zip_int64_t)ctx->out->offset;
case ZIP_SOURCE_WRITE:
return buffer_write(ctx->out, data, length, &ctx->error);
case ZIP_SOURCE_SUPPORTS:
return zip_source_make_command_bitmap(ZIP_SOURCE_BEGIN_WRITE, ZIP_SOURCE_COMMIT_WRITE, ZIP_SOURCE_CLOSE, ZIP_SOURCE_ERROR, ZIP_SOURCE_FREE, ZIP_SOURCE_OPEN, ZIP_SOURCE_READ, ZIP_SOURCE_REMOVE, ZIP_SOURCE_ROLLBACK_WRITE, ZIP_SOURCE_SEEK, ZIP_SOURCE_SEEK_WRITE, ZIP_SOURCE_STAT, ZIP_SOURCE_TELL, ZIP_SOURCE_TELL_WRITE, ZIP_SOURCE_WRITE, -1);
default:
zip_error_set(&ctx->error, ZIP_ER_OPNOTSUPP, 0);
return -1;
}
}