blob: 400b3fd3829493f40428546ea3a1bd48f46a886c [file] [log] [blame]
Dieter Baron91374c72014-10-09 22:14:55 +02001/*
2 modify_hole.c -- source for handling huge files that are mostly NULs
Thomas Klausnera1415de2015-04-29 15:36:30 +02003 Copyright (C) 2014-2015 Dieter Baron and Thomas Klausner
Dieter Baron91374c72014-10-09 22:14:55 +02004
5 This file is part of libzip, a library to manipulate ZIP archives.
6 The authors can be contacted at <libzip@nih.at>
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in
15 the documentation and/or other materials provided with the
16 distribution.
17 3. The names of the authors may not be used to endorse or promote
18 products derived from this software without specific prior
19 written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
22 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
25 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29 IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
31 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <errno.h>
35#include <stdlib.h>
36#include <stdio.h>
37#include <string.h>
38
39#include "zip.h"
40
41/* public API */
42
43zip_source_t *source_hole_create(const char *, int flags, zip_error_t *);
44
45
Dieter Barond50c6ee2014-10-20 13:07:27 +020046#ifndef EFTYPE
47#define EFTYPE EINVAL
48#endif
49
50
Dieter Baron91374c72014-10-09 22:14:55 +020051#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
52
53#define FRAGMENT_SIZE (8*1024)
54
55#define MARK_BEGIN "NiH0"
56#define MARK_DATA "NiH1"
57#define MARK_NUL "NiH2"
58
59
60typedef struct buffer {
61 zip_uint64_t fragment_size;
62 zip_uint8_t **fragment;
63 zip_uint64_t nfragments;
64 zip_uint64_t size;
65 zip_uint64_t offset;
66} buffer_t;
67
68static void buffer_free(buffer_t *buffer);
69static buffer_t *buffer_from_file(const char *fname, int flags, zip_error_t *error);
70static buffer_t *buffer_new(void);
71static zip_int64_t buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
72static int buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error);
73static zip_int64_t buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error);
74static int buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error);
75static zip_int64_t buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
76static zip_uint64_t get_u64(const zip_uint8_t *b);
77static int only_nul(const zip_uint8_t *data, zip_uint64_t length);
78static int write_nuls(zip_uint64_t n, FILE *f);
79static int write_u64(zip_uint64_t u64, FILE *f);
80
81
82typedef struct hole {
83 zip_error_t error;
84 char *fname;
85 buffer_t *in;
86 buffer_t *out;
87} hole_t;
88
89static hole_t *hole_new(const char *fname, int flags, zip_error_t *error);
90static zip_int64_t source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command);
91
92
93zip_source_t *source_hole_create(const char *fname, int flags, zip_error_t *error)
94{
95 hole_t *ud = hole_new(fname, flags, error);
96
97 if (ud == NULL) {
98 return NULL;
99 }
100 return zip_source_function_create(source_hole_cb, ud, error);
101}
102
103
104static void
105buffer_free(buffer_t *buffer)
106{
107 zip_uint64_t i;
108
109 if (buffer == NULL) {
110 return;
111 }
112
113 if (buffer->fragment) {
114 for (i=0; i<buffer->nfragments; i++) {
115 free(buffer->fragment[i]);
116 }
117 free(buffer->fragment);
118 }
119 free(buffer);
120}
121
122
123static buffer_t *
124buffer_from_file(const char *fname, int flags, zip_error_t *error)
125{
126 buffer_t *buffer;
127 FILE *f;
128
129 if ((buffer = buffer_new()) == NULL) {
130 zip_error_set(error, ZIP_ER_MEMORY, 0);
131 return NULL;
132
133 }
134
135 if ((flags & ZIP_TRUNCATE) == 0) {
136 if ((f = fopen(fname, "rb")) == NULL) {
137 if (!(errno == ENOENT && (flags & ZIP_CREATE))) {
138 buffer_free(buffer);
139 return NULL;
140 }
141 }
142 else {
143 if (buffer_read_file(buffer, f, error) < 0) {
144 buffer_free(buffer);
Thomas Klausner068665c2015-04-28 00:58:30 +0200145 fclose(f);
Dieter Baron91374c72014-10-09 22:14:55 +0200146 return NULL;
147 }
Thomas Klausnera0702aa2015-04-22 21:29:00 +0200148 fclose(f);
Dieter Baron91374c72014-10-09 22:14:55 +0200149 }
150 }
151
152 return buffer;
153}
154
155
156static buffer_t *
157buffer_new(void)
158{
159 buffer_t *buffer;
160
161 if ((buffer = (buffer_t *)malloc(sizeof(*buffer))) == NULL) {
162 return NULL;
163 }
164
165 buffer->fragment = NULL;
166 buffer->nfragments = 0;
167 buffer->fragment_size = FRAGMENT_SIZE;
168 buffer->size = 0;
169 buffer->offset = 0;
170
171 return buffer;
172}
173
174
175static zip_int64_t
176buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error)
177{
178 zip_uint64_t n, i, fragment_offset;
179
180 length = MY_MIN(length, buffer->size - buffer->offset);
181
182 if (length == 0) {
183 return 0;
184 }
185 if (length > ZIP_INT64_MAX) {
186 return -1;
187 }
188
189 i = buffer->offset / buffer->fragment_size;
190 fragment_offset = buffer->offset % buffer->fragment_size;
191 n = 0;
192 while (n < length) {
193 zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
194
195 if (buffer->fragment[i]) {
196 memcpy(data + n, buffer->fragment[i] + fragment_offset, left);
197 }
198 else {
199 memset(data + n, 0, left);
200 }
201
202 n += left;
203 i++;
204 fragment_offset = 0;
205 }
206
207 buffer->offset += n;
208 return (zip_int64_t)n;
209}
210
211
212static int
213buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error)
214{
215 zip_uint8_t b[20];
216 zip_uint64_t i;
217
218 if (fread(b, 20, 1, f) != 1) {
219 zip_error_set(error, ZIP_ER_READ, errno);
220 return -1;
221 }
222
223 if (memcmp(b, MARK_BEGIN, 4) != 0) {
224 zip_error_set(error, ZIP_ER_READ, EFTYPE);
225 return -1;
226 }
227
228 buffer->fragment_size = get_u64(b+4);
229 buffer->size = get_u64(b+12);
Thomas Klausner9cc8f112015-04-28 01:02:03 +0200230
231 if (buffer->size + buffer->fragment_size < buffer->size) {
232 zip_error_set(error, ZIP_ER_MEMORY, 0);
233 return -1;
234 }
Dieter Baron91374c72014-10-09 22:14:55 +0200235 buffer->nfragments = (buffer->size + buffer->fragment_size - 1) / buffer->fragment_size;
Thomas Klausner2e127402015-04-27 15:12:04 +0200236 if ((buffer->nfragments > SIZE_MAX/sizeof(buffer->fragment[0]))
237 || ((buffer->fragment = (zip_uint8_t **)malloc(sizeof(buffer->fragment[0]) * buffer->nfragments)) == NULL)) {
Dieter Baron91374c72014-10-09 22:14:55 +0200238 zip_error_set(error, ZIP_ER_MEMORY, 0);
239 return -1;
240 }
241
242 for (i = 0; i < buffer->nfragments; i++) {
243 buffer->fragment[i] = NULL;
244 }
245
246 i = 0;
247 while (i < buffer->nfragments) {
248 if (fread(b, 4, 1, f) != 1) {
249 zip_error_set(error, ZIP_ER_READ, errno);
250 return -1;
251 }
252
253 if (memcmp(b, MARK_DATA, 4) == 0) {
Thomas Klausner3cc58fa2015-04-29 18:04:25 +0200254 if (buffer->fragment_size > SIZE_MAX) {
255 zip_error_set(error, ZIP_ER_MEMORY, 0);
256 return -1;
257 }
Dieter Baron91374c72014-10-09 22:14:55 +0200258 if ((buffer->fragment[i] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
259 zip_error_set(error, ZIP_ER_MEMORY, 0);
260 return -1;
261 }
262 if (fread(buffer->fragment[i], buffer->fragment_size, 1, f) != 1) {
263 zip_error_set(error, ZIP_ER_READ, errno);
264 return -1;
265 }
266 i++;
267 }
268 else if (memcmp(b, MARK_NUL, 4) == 0) {
269 if (fread(b, 8, 1, f) != 1) {
270 zip_error_set(error, ZIP_ER_READ, errno);
271 return -1;
272 }
273 i += get_u64(b);
274 }
275 else {
276 zip_error_set(error, ZIP_ER_READ, EFTYPE);
277 return -1;
278 }
279 }
280
281 return 0;
282}
283
284static zip_int64_t
285buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error)
286{
Dieter Baronc548a182014-10-10 18:27:50 +0200287 zip_int64_t new_offset = zip_source_seek_compute_offset(buffer->offset, buffer->size, data, length, error);
288
289 if (new_offset < 0) {
Dieter Baron91374c72014-10-09 22:14:55 +0200290 return -1;
291 }
292
Dieter Baronc548a182014-10-10 18:27:50 +0200293 buffer->offset = (zip_uint64_t)new_offset;
Dieter Baron91374c72014-10-09 22:14:55 +0200294 return 0;
295}
296
297
298static int
299buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error)
300{
301 FILE *f = fopen(fname, "wb");
302 zip_uint64_t i;
303 zip_uint64_t nul_run;
304
305 if (f == NULL) {
306 zip_error_set(error, ZIP_ER_OPEN, errno);
307 return -1;
308 }
309
310 fwrite(MARK_BEGIN, 4, 1, f);
311 write_u64(buffer->fragment_size, f);
312 write_u64(buffer->size, f);
313
314 nul_run = 0;
315 for (i=0; i * buffer->fragment_size <buffer->size; i++) {
316 if (buffer->fragment[i] == NULL || only_nul(buffer->fragment[i], buffer->fragment_size)) {
317 nul_run++;
318 }
319 else {
320 if (nul_run > 0) {
321 write_nuls(nul_run, f);
322 nul_run = 0;
323 }
324 fwrite(MARK_DATA, 4, 1, f);
325
326 fwrite(buffer->fragment[i], 1, buffer->fragment_size, f);
327 }
328 }
329
330 if (nul_run > 0) {
331 write_nuls(nul_run, f);
332 }
333
334 if (fclose(f) != 0) {
335 zip_error_set(error, ZIP_ER_WRITE, errno);
336 return -1;
337 }
338
339 return 0;
340}
341
342
343static zip_int64_t
344buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error)
345{
346 if (buffer->offset + length > buffer->nfragments * buffer->fragment_size) {
347 zip_uint64_t needed_fragments = (buffer->offset + length + buffer->fragment_size - 1) / buffer->fragment_size;
348 zip_uint64_t new_capacity = buffer->nfragments;
349 zip_uint64_t i;
350
351 if (new_capacity == 0) {
352 new_capacity = 4;
353 }
354 while (new_capacity < needed_fragments) {
355 new_capacity *= 2;
356 }
357
358 zip_uint8_t **fragment = realloc(buffer->fragment, new_capacity * sizeof(*fragment));
359
360 if (fragment == NULL) {
361 zip_error_set(error, ZIP_ER_MEMORY, 0);
362 return -1;
363 }
364
365 for (i = buffer->nfragments; i < new_capacity; i++) {
366 fragment[i] = NULL;
367 }
368
369 buffer->fragment = fragment;
370 buffer->nfragments = new_capacity;
371 }
372
373 if (!only_nul(data, length)) {
374 zip_uint64_t idx, n, fragment_offset;
375
376 idx = buffer->offset / buffer->fragment_size;
377 fragment_offset = buffer->offset % buffer->fragment_size;
378 n = 0;
379
380 while (n < length) {
381 zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
382
383 if (buffer->fragment[idx] == NULL) {
384 if ((buffer->fragment[idx] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
385 zip_error_set(error, ZIP_ER_MEMORY, 0);
386 return -1;
387 }
388 memset(buffer->fragment[idx], 0, buffer->fragment_size);
389 }
390 memcpy(buffer->fragment[idx] + fragment_offset, data + n, left);
391
392 n += left;
393 idx++;
394 fragment_offset = 0;
395 }
396 }
397
398 buffer->offset += length;
399 if (buffer->offset > buffer->size) {
400 buffer->size = buffer->offset;
401 }
402
403 return (zip_int64_t)length;
404}
405
406
407static zip_uint64_t
408get_u64(const zip_uint8_t *b)
409{
410 zip_uint64_t i;
411
412 i = (zip_uint64_t)b[0] << 56 | (zip_uint64_t)b[1] << 48 | (zip_uint64_t)b[2] << 40 | (zip_uint64_t)b[3] << 32 | (zip_uint64_t)b[4] << 24 | (zip_uint64_t)b[5] << 16 | (zip_uint64_t)b[6] << 8 | (zip_uint64_t)b[7];
413
414 return i;
415}
416
417
418static int
419only_nul(const zip_uint8_t *data, zip_uint64_t length)
420{
421 zip_uint64_t i;
422
423 for (i=0; i< length; i++) {
424 if (data[i] != '\0') {
425 return 0;
426 }
427 }
428
429 return 1;
430}
431
432
433static int
434write_nuls(zip_uint64_t n, FILE *f)
435{
436 if (fwrite(MARK_NUL, 4, 1, f) != 1) {
437 return -1;
438 }
439 return write_u64(n, f);
440}
441
442
443static int
444write_u64(zip_uint64_t u64, FILE *f)
445{
446 zip_uint8_t b[8];
447
Thomas Klausner8cff4bd2014-12-02 12:17:39 +0100448 b[0] = (zip_uint8_t)((u64 >> 56) & 0xff);
449 b[1] = (zip_uint8_t)((u64 >> 48) & 0xff);
450 b[2] = (zip_uint8_t)((u64 >> 40) & 0xff);
451 b[3] = (zip_uint8_t)((u64 >> 32) & 0xff);
452 b[4] = (zip_uint8_t)((u64 >> 24) & 0xff);
453 b[5] = (zip_uint8_t)((u64 >> 16) & 0xff);
454 b[6] = (zip_uint8_t)((u64 >> 8) & 0xff);
455 b[7] = (zip_uint8_t)(u64 & 0xff);
Dieter Baron91374c72014-10-09 22:14:55 +0200456
457 return fwrite(b, 8, 1, f) == 1 ? 0 : -1;
458}
459
460
461static void
462hole_free(hole_t *hole) {
463 if (hole == NULL) {
464 return;
465 }
466 zip_error_fini(&hole->error);
467 buffer_free(hole->in);
468 buffer_free(hole->out);
469 free(hole->fname);
470 free(hole);
471}
472
473
474static hole_t *
475hole_new(const char *fname, int flags, zip_error_t *error)
476{
477 hole_t *ctx = (hole_t *)malloc(sizeof(*ctx));
478
479 if (ctx == NULL) {
480 zip_error_set(error, ZIP_ER_MEMORY, 0);
481 return NULL;
482 }
483
484 if ((ctx->fname = strdup(fname)) == NULL) {
485 free(ctx);
486 zip_error_set(error, ZIP_ER_MEMORY, 0);
487 return NULL;
488 }
489
490 if ((ctx->in = buffer_from_file(fname, flags, error)) == NULL) {
491 free(ctx);
492 return NULL;
493 }
494
495 zip_error_init(&ctx->error);
496 ctx->out = NULL;
497
498 return ctx;
499}
500
501
502static zip_int64_t
503source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command)
504{
505 hole_t *ctx = (hole_t *)ud;
506
507 switch (command) {
508 case ZIP_SOURCE_BEGIN_WRITE:
509 ctx->out = buffer_new();
510 return 0;
511
512 case ZIP_SOURCE_CLOSE:
513 return 0;
514
515 case ZIP_SOURCE_COMMIT_WRITE:
516 if (buffer_to_file(ctx->out, ctx->fname, &ctx->error) < 0) {
517 return -1;
518 }
519 buffer_free(ctx->in);
520 ctx->in = ctx->out;
521 ctx->out = NULL;
522 return 0;
523
524 case ZIP_SOURCE_ERROR:
525 return zip_error_to_data(&ctx->error, data, length);
526
527 case ZIP_SOURCE_FREE:
528 hole_free(ctx);
529 return 0;
530
531 case ZIP_SOURCE_OPEN:
532 ctx->in->offset = 0;
533 return 0;
534
535 case ZIP_SOURCE_READ:
536 return buffer_read(ctx->in, data, length, &ctx->error);
537
538 case ZIP_SOURCE_REMOVE:
539 buffer_free(ctx->in);
540 ctx->in = buffer_new();
541 buffer_free(ctx->out);
542 ctx->out = NULL;
Thomas Klausner1f6d0152015-04-27 14:59:55 +0200543 (void)remove(ctx->fname);
Dieter Baron91374c72014-10-09 22:14:55 +0200544 return 0;
545
546 case ZIP_SOURCE_ROLLBACK_WRITE:
547 buffer_free(ctx->out);
548 ctx->out = NULL;
549 return 0;
550
551 case ZIP_SOURCE_SEEK:
552 return buffer_seek(ctx->in, data, length, &ctx->error);
553
554 case ZIP_SOURCE_SEEK_WRITE:
555 return buffer_seek(ctx->out, data, length, &ctx->error);
556
557 case ZIP_SOURCE_STAT: {
558 zip_stat_t *st = ZIP_SOURCE_GET_ARGS(zip_stat_t, data, length, &ctx->error);
559
560 if (st == NULL) {
561 return -1;
562 }
563
564 /* TODO: return ENOENT if fname doesn't exist */
565
566 st->valid |= ZIP_STAT_SIZE;
567 st->size = ctx->in->size;
568 return 0;
569 }
570
571 case ZIP_SOURCE_TELL:
572 return (zip_int64_t)ctx->in->offset;
573
574 case ZIP_SOURCE_TELL_WRITE:
575 return (zip_int64_t)ctx->out->offset;
576
577 case ZIP_SOURCE_WRITE:
578 return buffer_write(ctx->out, data, length, &ctx->error);
579
580 case ZIP_SOURCE_SUPPORTS:
581 return zip_source_make_command_bitmap(ZIP_SOURCE_BEGIN_WRITE, ZIP_SOURCE_COMMIT_WRITE, ZIP_SOURCE_CLOSE, ZIP_SOURCE_ERROR, ZIP_SOURCE_FREE, ZIP_SOURCE_OPEN, ZIP_SOURCE_READ, ZIP_SOURCE_REMOVE, ZIP_SOURCE_ROLLBACK_WRITE, ZIP_SOURCE_SEEK, ZIP_SOURCE_SEEK_WRITE, ZIP_SOURCE_STAT, ZIP_SOURCE_TELL, ZIP_SOURCE_TELL_WRITE, ZIP_SOURCE_WRITE, -1);
582
583 default:
584 zip_error_set(&ctx->error, ZIP_ER_OPNOTSUPP, 0);
585 return -1;
586 }
587}