blob: c2d7b926920b162c5891aa3eaa1e13509d22a8cf [file] [log] [blame]
Dieter Baron91374c72014-10-09 22:14:55 +02001/*
2 modify_hole.c -- source for handling huge files that are mostly NULs
3 Copyright (C) 2014 Dieter Baron and Thomas Klausner
4
5 This file is part of libzip, a library to manipulate ZIP archives.
6 The authors can be contacted at <libzip@nih.at>
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in
15 the documentation and/or other materials provided with the
16 distribution.
17 3. The names of the authors may not be used to endorse or promote
18 products derived from this software without specific prior
19 written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
22 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
25 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29 IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
31 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <errno.h>
35#include <stdlib.h>
36#include <stdio.h>
37#include <string.h>
38
39#include "zip.h"
40
41/* public API */
42
43zip_source_t *source_hole_create(const char *, int flags, zip_error_t *);
44
45
Dieter Barond50c6ee2014-10-20 13:07:27 +020046#ifndef EFTYPE
47#define EFTYPE EINVAL
48#endif
49
50
Dieter Baron91374c72014-10-09 22:14:55 +020051#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
52
53#define FRAGMENT_SIZE (8*1024)
54
55#define MARK_BEGIN "NiH0"
56#define MARK_DATA "NiH1"
57#define MARK_NUL "NiH2"
58
59
60typedef struct buffer {
61 zip_uint64_t fragment_size;
62 zip_uint8_t **fragment;
63 zip_uint64_t nfragments;
64 zip_uint64_t size;
65 zip_uint64_t offset;
66} buffer_t;
67
68static void buffer_free(buffer_t *buffer);
69static buffer_t *buffer_from_file(const char *fname, int flags, zip_error_t *error);
70static buffer_t *buffer_new(void);
71static zip_int64_t buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
72static int buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error);
73static zip_int64_t buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error);
74static int buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error);
75static zip_int64_t buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
76static zip_uint64_t get_u64(const zip_uint8_t *b);
77static int only_nul(const zip_uint8_t *data, zip_uint64_t length);
78static int write_nuls(zip_uint64_t n, FILE *f);
79static int write_u64(zip_uint64_t u64, FILE *f);
80
81
82typedef struct hole {
83 zip_error_t error;
84 char *fname;
85 buffer_t *in;
86 buffer_t *out;
87} hole_t;
88
89static hole_t *hole_new(const char *fname, int flags, zip_error_t *error);
90static zip_int64_t source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command);
91
92
93zip_source_t *source_hole_create(const char *fname, int flags, zip_error_t *error)
94{
95 hole_t *ud = hole_new(fname, flags, error);
96
97 if (ud == NULL) {
98 return NULL;
99 }
100 return zip_source_function_create(source_hole_cb, ud, error);
101}
102
103
104static void
105buffer_free(buffer_t *buffer)
106{
107 zip_uint64_t i;
108
109 if (buffer == NULL) {
110 return;
111 }
112
113 if (buffer->fragment) {
114 for (i=0; i<buffer->nfragments; i++) {
115 free(buffer->fragment[i]);
116 }
117 free(buffer->fragment);
118 }
119 free(buffer);
120}
121
122
123static buffer_t *
124buffer_from_file(const char *fname, int flags, zip_error_t *error)
125{
126 buffer_t *buffer;
127 FILE *f;
128
129 if ((buffer = buffer_new()) == NULL) {
130 zip_error_set(error, ZIP_ER_MEMORY, 0);
131 return NULL;
132
133 }
134
135 if ((flags & ZIP_TRUNCATE) == 0) {
136 if ((f = fopen(fname, "rb")) == NULL) {
137 if (!(errno == ENOENT && (flags & ZIP_CREATE))) {
138 buffer_free(buffer);
139 return NULL;
140 }
141 }
142 else {
143 if (buffer_read_file(buffer, f, error) < 0) {
144 buffer_free(buffer);
145 return NULL;
146 }
147 }
148 }
149
150 return buffer;
151}
152
153
154static buffer_t *
155buffer_new(void)
156{
157 buffer_t *buffer;
158
159 if ((buffer = (buffer_t *)malloc(sizeof(*buffer))) == NULL) {
160 return NULL;
161 }
162
163 buffer->fragment = NULL;
164 buffer->nfragments = 0;
165 buffer->fragment_size = FRAGMENT_SIZE;
166 buffer->size = 0;
167 buffer->offset = 0;
168
169 return buffer;
170}
171
172
173static zip_int64_t
174buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error)
175{
176 zip_uint64_t n, i, fragment_offset;
177
178 length = MY_MIN(length, buffer->size - buffer->offset);
179
180 if (length == 0) {
181 return 0;
182 }
183 if (length > ZIP_INT64_MAX) {
184 return -1;
185 }
186
187 i = buffer->offset / buffer->fragment_size;
188 fragment_offset = buffer->offset % buffer->fragment_size;
189 n = 0;
190 while (n < length) {
191 zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
192
193 if (buffer->fragment[i]) {
194 memcpy(data + n, buffer->fragment[i] + fragment_offset, left);
195 }
196 else {
197 memset(data + n, 0, left);
198 }
199
200 n += left;
201 i++;
202 fragment_offset = 0;
203 }
204
205 buffer->offset += n;
206 return (zip_int64_t)n;
207}
208
209
210static int
211buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error)
212{
213 zip_uint8_t b[20];
214 zip_uint64_t i;
215
216 if (fread(b, 20, 1, f) != 1) {
217 zip_error_set(error, ZIP_ER_READ, errno);
218 return -1;
219 }
220
221 if (memcmp(b, MARK_BEGIN, 4) != 0) {
222 zip_error_set(error, ZIP_ER_READ, EFTYPE);
223 return -1;
224 }
225
226 buffer->fragment_size = get_u64(b+4);
227 buffer->size = get_u64(b+12);
228
229 buffer->nfragments = (buffer->size + buffer->fragment_size - 1) / buffer->fragment_size;
230 if ((buffer->fragment = (zip_uint8_t **)malloc(sizeof(buffer->fragment[0]) * buffer->nfragments)) == NULL) {
231 zip_error_set(error, ZIP_ER_MEMORY, 0);
232 return -1;
233 }
234
235 for (i = 0; i < buffer->nfragments; i++) {
236 buffer->fragment[i] = NULL;
237 }
238
239 i = 0;
240 while (i < buffer->nfragments) {
241 if (fread(b, 4, 1, f) != 1) {
242 zip_error_set(error, ZIP_ER_READ, errno);
243 return -1;
244 }
245
246 if (memcmp(b, MARK_DATA, 4) == 0) {
247 if ((buffer->fragment[i] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
248 zip_error_set(error, ZIP_ER_MEMORY, 0);
249 return -1;
250 }
251 if (fread(buffer->fragment[i], buffer->fragment_size, 1, f) != 1) {
252 zip_error_set(error, ZIP_ER_READ, errno);
253 return -1;
254 }
255 i++;
256 }
257 else if (memcmp(b, MARK_NUL, 4) == 0) {
258 if (fread(b, 8, 1, f) != 1) {
259 zip_error_set(error, ZIP_ER_READ, errno);
260 return -1;
261 }
262 i += get_u64(b);
263 }
264 else {
265 zip_error_set(error, ZIP_ER_READ, EFTYPE);
266 return -1;
267 }
268 }
269
270 return 0;
271}
272
273static zip_int64_t
274buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error)
275{
Dieter Baronc548a182014-10-10 18:27:50 +0200276 zip_int64_t new_offset = zip_source_seek_compute_offset(buffer->offset, buffer->size, data, length, error);
277
278 if (new_offset < 0) {
Dieter Baron91374c72014-10-09 22:14:55 +0200279 return -1;
280 }
281
Dieter Baronc548a182014-10-10 18:27:50 +0200282 buffer->offset = (zip_uint64_t)new_offset;
Dieter Baron91374c72014-10-09 22:14:55 +0200283 return 0;
284}
285
286
287static int
288buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error)
289{
290 FILE *f = fopen(fname, "wb");
291 zip_uint64_t i;
292 zip_uint64_t nul_run;
293
294 if (f == NULL) {
295 zip_error_set(error, ZIP_ER_OPEN, errno);
296 return -1;
297 }
298
299 fwrite(MARK_BEGIN, 4, 1, f);
300 write_u64(buffer->fragment_size, f);
301 write_u64(buffer->size, f);
302
303 nul_run = 0;
304 for (i=0; i * buffer->fragment_size <buffer->size; i++) {
305 if (buffer->fragment[i] == NULL || only_nul(buffer->fragment[i], buffer->fragment_size)) {
306 nul_run++;
307 }
308 else {
309 if (nul_run > 0) {
310 write_nuls(nul_run, f);
311 nul_run = 0;
312 }
313 fwrite(MARK_DATA, 4, 1, f);
314
315 fwrite(buffer->fragment[i], 1, buffer->fragment_size, f);
316 }
317 }
318
319 if (nul_run > 0) {
320 write_nuls(nul_run, f);
321 }
322
323 if (fclose(f) != 0) {
324 zip_error_set(error, ZIP_ER_WRITE, errno);
325 return -1;
326 }
327
328 return 0;
329}
330
331
332static zip_int64_t
333buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error)
334{
335 if (buffer->offset + length > buffer->nfragments * buffer->fragment_size) {
336 zip_uint64_t needed_fragments = (buffer->offset + length + buffer->fragment_size - 1) / buffer->fragment_size;
337 zip_uint64_t new_capacity = buffer->nfragments;
338 zip_uint64_t i;
339
340 if (new_capacity == 0) {
341 new_capacity = 4;
342 }
343 while (new_capacity < needed_fragments) {
344 new_capacity *= 2;
345 }
346
347 zip_uint8_t **fragment = realloc(buffer->fragment, new_capacity * sizeof(*fragment));
348
349 if (fragment == NULL) {
350 zip_error_set(error, ZIP_ER_MEMORY, 0);
351 return -1;
352 }
353
354 for (i = buffer->nfragments; i < new_capacity; i++) {
355 fragment[i] = NULL;
356 }
357
358 buffer->fragment = fragment;
359 buffer->nfragments = new_capacity;
360 }
361
362 if (!only_nul(data, length)) {
363 zip_uint64_t idx, n, fragment_offset;
364
365 idx = buffer->offset / buffer->fragment_size;
366 fragment_offset = buffer->offset % buffer->fragment_size;
367 n = 0;
368
369 while (n < length) {
370 zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
371
372 if (buffer->fragment[idx] == NULL) {
373 if ((buffer->fragment[idx] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
374 zip_error_set(error, ZIP_ER_MEMORY, 0);
375 return -1;
376 }
377 memset(buffer->fragment[idx], 0, buffer->fragment_size);
378 }
379 memcpy(buffer->fragment[idx] + fragment_offset, data + n, left);
380
381 n += left;
382 idx++;
383 fragment_offset = 0;
384 }
385 }
386
387 buffer->offset += length;
388 if (buffer->offset > buffer->size) {
389 buffer->size = buffer->offset;
390 }
391
392 return (zip_int64_t)length;
393}
394
395
396static zip_uint64_t
397get_u64(const zip_uint8_t *b)
398{
399 zip_uint64_t i;
400
401 i = (zip_uint64_t)b[0] << 56 | (zip_uint64_t)b[1] << 48 | (zip_uint64_t)b[2] << 40 | (zip_uint64_t)b[3] << 32 | (zip_uint64_t)b[4] << 24 | (zip_uint64_t)b[5] << 16 | (zip_uint64_t)b[6] << 8 | (zip_uint64_t)b[7];
402
403 return i;
404}
405
406
407static int
408only_nul(const zip_uint8_t *data, zip_uint64_t length)
409{
410 zip_uint64_t i;
411
412 for (i=0; i< length; i++) {
413 if (data[i] != '\0') {
414 return 0;
415 }
416 }
417
418 return 1;
419}
420
421
422static int
423write_nuls(zip_uint64_t n, FILE *f)
424{
425 if (fwrite(MARK_NUL, 4, 1, f) != 1) {
426 return -1;
427 }
428 return write_u64(n, f);
429}
430
431
432static int
433write_u64(zip_uint64_t u64, FILE *f)
434{
435 zip_uint8_t b[8];
436
Thomas Klausner8cff4bd2014-12-02 12:17:39 +0100437 b[0] = (zip_uint8_t)((u64 >> 56) & 0xff);
438 b[1] = (zip_uint8_t)((u64 >> 48) & 0xff);
439 b[2] = (zip_uint8_t)((u64 >> 40) & 0xff);
440 b[3] = (zip_uint8_t)((u64 >> 32) & 0xff);
441 b[4] = (zip_uint8_t)((u64 >> 24) & 0xff);
442 b[5] = (zip_uint8_t)((u64 >> 16) & 0xff);
443 b[6] = (zip_uint8_t)((u64 >> 8) & 0xff);
444 b[7] = (zip_uint8_t)(u64 & 0xff);
Dieter Baron91374c72014-10-09 22:14:55 +0200445
446 return fwrite(b, 8, 1, f) == 1 ? 0 : -1;
447}
448
449
450static void
451hole_free(hole_t *hole) {
452 if (hole == NULL) {
453 return;
454 }
455 zip_error_fini(&hole->error);
456 buffer_free(hole->in);
457 buffer_free(hole->out);
458 free(hole->fname);
459 free(hole);
460}
461
462
463static hole_t *
464hole_new(const char *fname, int flags, zip_error_t *error)
465{
466 hole_t *ctx = (hole_t *)malloc(sizeof(*ctx));
467
468 if (ctx == NULL) {
469 zip_error_set(error, ZIP_ER_MEMORY, 0);
470 return NULL;
471 }
472
473 if ((ctx->fname = strdup(fname)) == NULL) {
474 free(ctx);
475 zip_error_set(error, ZIP_ER_MEMORY, 0);
476 return NULL;
477 }
478
479 if ((ctx->in = buffer_from_file(fname, flags, error)) == NULL) {
480 free(ctx);
481 return NULL;
482 }
483
484 zip_error_init(&ctx->error);
485 ctx->out = NULL;
486
487 return ctx;
488}
489
490
491static zip_int64_t
492source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command)
493{
494 hole_t *ctx = (hole_t *)ud;
495
496 switch (command) {
497 case ZIP_SOURCE_BEGIN_WRITE:
498 ctx->out = buffer_new();
499 return 0;
500
501 case ZIP_SOURCE_CLOSE:
502 return 0;
503
504 case ZIP_SOURCE_COMMIT_WRITE:
505 if (buffer_to_file(ctx->out, ctx->fname, &ctx->error) < 0) {
506 return -1;
507 }
508 buffer_free(ctx->in);
509 ctx->in = ctx->out;
510 ctx->out = NULL;
511 return 0;
512
513 case ZIP_SOURCE_ERROR:
514 return zip_error_to_data(&ctx->error, data, length);
515
516 case ZIP_SOURCE_FREE:
517 hole_free(ctx);
518 return 0;
519
520 case ZIP_SOURCE_OPEN:
521 ctx->in->offset = 0;
522 return 0;
523
524 case ZIP_SOURCE_READ:
525 return buffer_read(ctx->in, data, length, &ctx->error);
526
527 case ZIP_SOURCE_REMOVE:
528 buffer_free(ctx->in);
529 ctx->in = buffer_new();
530 buffer_free(ctx->out);
531 ctx->out = NULL;
532 remove(ctx->fname);
533 return 0;
534
535 case ZIP_SOURCE_ROLLBACK_WRITE:
536 buffer_free(ctx->out);
537 ctx->out = NULL;
538 return 0;
539
540 case ZIP_SOURCE_SEEK:
541 return buffer_seek(ctx->in, data, length, &ctx->error);
542
543 case ZIP_SOURCE_SEEK_WRITE:
544 return buffer_seek(ctx->out, data, length, &ctx->error);
545
546 case ZIP_SOURCE_STAT: {
547 zip_stat_t *st = ZIP_SOURCE_GET_ARGS(zip_stat_t, data, length, &ctx->error);
548
549 if (st == NULL) {
550 return -1;
551 }
552
553 /* TODO: return ENOENT if fname doesn't exist */
554
555 st->valid |= ZIP_STAT_SIZE;
556 st->size = ctx->in->size;
557 return 0;
558 }
559
560 case ZIP_SOURCE_TELL:
561 return (zip_int64_t)ctx->in->offset;
562
563 case ZIP_SOURCE_TELL_WRITE:
564 return (zip_int64_t)ctx->out->offset;
565
566 case ZIP_SOURCE_WRITE:
567 return buffer_write(ctx->out, data, length, &ctx->error);
568
569 case ZIP_SOURCE_SUPPORTS:
570 return zip_source_make_command_bitmap(ZIP_SOURCE_BEGIN_WRITE, ZIP_SOURCE_COMMIT_WRITE, ZIP_SOURCE_CLOSE, ZIP_SOURCE_ERROR, ZIP_SOURCE_FREE, ZIP_SOURCE_OPEN, ZIP_SOURCE_READ, ZIP_SOURCE_REMOVE, ZIP_SOURCE_ROLLBACK_WRITE, ZIP_SOURCE_SEEK, ZIP_SOURCE_SEEK_WRITE, ZIP_SOURCE_STAT, ZIP_SOURCE_TELL, ZIP_SOURCE_TELL_WRITE, ZIP_SOURCE_WRITE, -1);
571
572 default:
573 zip_error_set(&ctx->error, ZIP_ER_OPNOTSUPP, 0);
574 return -1;
575 }
576}