Add encoding support to zip_{get,set}_{file,archive}_comment,
and handle interaction in zip_{get,set}_name.
Set general purpose bit flag 11 when writing out UTF-8 names.
Add new error type ZIP_ER_ENCMISMATCH for the case where the
encoding of file name and file comment don't match.
Document changes and workaround for ZIP_ER_ENCMISMATCH.
Update copyright years.
--HG--
branch : HEAD
diff --git a/lib/zip.h b/lib/zip.h
index 58fd0cd..205d9e0 100644
--- a/lib/zip.h
+++ b/lib/zip.h
@@ -3,7 +3,7 @@
/*
zip.h -- exported declarations.
- Copyright (C) 1999-2011 Dieter Baron and Thomas Klausner
+ Copyright (C) 1999-2012 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -119,6 +119,7 @@
#define ZIP_ER_RDONLY 25 /* N Read-only archive */
#define ZIP_ER_NOPASSWD 26 /* N No password provided */
#define ZIP_ER_WRONGPASSWD 27 /* N Wrong password provided */
+#define ZIP_ER_ENCMISMATCH 28 /* N Encoding of name and comment do not match */
/* type of system error value */
diff --git a/lib/zip_close.c b/lib/zip_close.c
index 323d4ce..c6ea92b 100644
--- a/lib/zip_close.c
+++ b/lib/zip_close.c
@@ -82,6 +82,7 @@
struct filelist *filelist;
int reopen_on_error;
int new_torrentzip;
+ enum zip_encoding_type com_enc, enc;
reopen_on_error = 0;
@@ -185,15 +186,8 @@
/* set/update file name */
if ((za->entry[i].changes.valid & ZIP_DIRENT_FILENAME) == 0) {
- if (za->entry[i].state == ZIP_ST_ADDED) {
- /* XXX: this can't happen, remove code */
- de.settable.filename = strdup("-");
- cd->entry[j].settable.filename = "-";
- }
- else {
- de.settable.filename = strdup(za->cdir->entry[i].settable.filename);
- cd->entry[j].settable.filename = za->cdir->entry[i].settable.filename;
- }
+ de.settable.filename = strdup(za->cdir->entry[i].settable.filename);
+ cd->entry[j].settable.filename = za->cdir->entry[i].settable.filename;
de.settable.valid |= ZIP_DIRENT_FILENAME;
cd->entry[j].settable.valid |= ZIP_DIRENT_FILENAME;
}
@@ -256,6 +250,17 @@
cd->entry[j].settable.comment_len = za->entry[i].changes.comment_len;
}
+ /* set general purpose bit flag for file name/comment encoding */
+ enc = _zip_guess_encoding(de.settable.filename, strlen(de.settable.filename));
+ com_enc = _zip_guess_encoding(cd->entry[i].settable.comment, cd->entry[i].settable.comment_len);
+ if ((enc == ZIP_ENCODING_UTF8 && com_enc == ZIP_ENCODING_ASCII) ||
+ (enc == ZIP_ENCODING_ASCII && com_enc == ZIP_ENCODING_UTF8 ) ||
+ (enc == ZIP_ENCODING_UTF8 && com_enc == ZIP_ENCODING_UTF8 ))
+ de.bitflags |= ZIP_GPBF_ENCODING_UTF_8;
+ else
+ de.bitflags &= ~ZIP_GPBF_ENCODING_UTF_8;
+ cd->entry[i].bitflags = de.bitflags;
+
cd->entry[j].offset = ftello(out);
if (new_data) {
diff --git a/lib/zip_dirent.c b/lib/zip_dirent.c
index 1ec11d8..4006a5b 100644
--- a/lib/zip_dirent.c
+++ b/lib/zip_dirent.c
@@ -61,6 +61,7 @@
for (i=0; i<cd->nentry; i++)
_zip_dirent_finalize(cd->entry+i);
free(cd->comment);
+ free(cd->comment_converted);
free(cd->entry);
free(cd);
}
@@ -119,6 +120,8 @@
cd->size = cd->offset = 0;
cd->comment = NULL;
cd->comment_len = 0;
+ cd->comment_type = ZIP_ENCODING_UNKNOWN;
+ cd->comment_converted = NULL;
return cd;
}
@@ -164,6 +167,8 @@
{
free(zde->filename_converted);
zde->filename_converted = NULL;
+ free(zde->comment_converted);
+ zde->comment_converted = NULL;
free(zde->settable.filename);
zde->settable.filename = NULL;
free(zde->settable.extrafield);
@@ -197,6 +202,8 @@
de->offset = 0;
de->fn_type = ZIP_ENCODING_UNKNOWN;
de->filename_converted = NULL;
+ de->fc_type = ZIP_ENCODING_UNKNOWN;
+ de->comment_converted = NULL;
}
@@ -270,6 +277,7 @@
zde->version_needed = _zip_read2(&cur);
zde->bitflags = _zip_read2(&cur);
zde->settable.comp_method = _zip_read2(&cur);
+ zde->settable.valid |= ZIP_DIRENT_COMP_METHOD;
/* convert to time_t */
dostime = _zip_read2(&cur);
@@ -355,6 +363,12 @@
}
if (strlen(zde->settable.filename) != filename_len)
return -1;
+ zde->settable.valid |= ZIP_DIRENT_FILENAME;
+
+ if (zde->settable.comment_len)
+ zde->settable.valid |= ZIP_DIRENT_COMMENT;
+ if (zde->settable.extrafield_len)
+ zde->settable.valid |= ZIP_DIRENT_EXTRAFIELD;
if (bufp)
*bufp = cur;
@@ -406,6 +420,7 @@
de->version_needed = 20; /* 2.0 */
de->bitflags = 2; /* maximum compression */
de->settable.comp_method = ZIP_CM_DEFLATE;
+ de->settable.valid |= ZIP_DIRENT_COMP_METHOD;
de->last_mod = last_mod;
de->disk_number = 0;
@@ -419,6 +434,7 @@
free(de->settable.comment);
de->settable.comment = NULL;
de->settable.comment_len = 0;
+ de->settable.valid &= ~(ZIP_DIRENT_COMMENT|ZIP_DIRENT_EXTRAFIELD);
}
diff --git a/lib/zip_err_str.c b/lib/zip_err_str.c
index 8fb6003..7cf9b42 100644
--- a/lib/zip_err_str.c
+++ b/lib/zip_err_str.c
@@ -36,6 +36,7 @@
"Read-only archive",
"No password provided",
"Wrong password provided",
+ "Encoding of name and comment do not match",
};
const int _zip_nerr_str = sizeof(_zip_err_str)/sizeof(_zip_err_str[0]);
@@ -73,4 +74,5 @@
N,
N,
N,
+ N,
};
diff --git a/lib/zip_get_archive_comment.c b/lib/zip_get_archive_comment.c
index 669eb70..a2e219b 100644
--- a/lib/zip_get_archive_comment.c
+++ b/lib/zip_get_archive_comment.c
@@ -1,6 +1,6 @@
/*
zip_get_archive_comment.c -- get archive comment
- Copyright (C) 2006-2007 Dieter Baron and Thomas Klausner
+ Copyright (C) 2006-2012 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -33,6 +33,8 @@
+#include <string.h>
+
#include "zipint.h"
@@ -40,12 +42,31 @@
ZIP_EXTERN const char *
zip_get_archive_comment(struct zip *za, int *lenp, int flags)
{
+ const char *ret;
if ((flags & ZIP_FL_UNCHANGED)
|| (za->ch_comment_len == -1)) {
if (za->cdir) {
if (lenp != NULL)
*lenp = za->cdir->comment_len;
- return za->cdir->comment;
+ ret = za->cdir->comment;
+
+ if (flags & ZIP_FL_NAME_RAW)
+ return ret;
+
+ /* start guessing */
+ if (za->cdir->comment_type == ZIP_ENCODING_UNKNOWN)
+ za->cdir->comment_type = _zip_guess_encoding(ret, za->cdir->comment_len);
+
+ if (((flags & ZIP_FL_NAME_STRICT) && (za->cdir->comment_type != ZIP_ENCODING_ASCII))
+ || (za->cdir->comment_type == ZIP_ENCODING_CP437)) {
+ if (za->cdir->comment_converted == NULL)
+ za->cdir->comment_converted = _zip_cp437_to_utf8(ret, za->cdir->comment_len, &za->error);
+ ret = za->cdir->comment_converted;
+ if (lenp != NULL)
+ *lenp = strlen(ret);
+ }
+
+ return ret;
}
else {
if (lenp != NULL)
@@ -53,7 +74,8 @@
return NULL;
}
}
-
+
+ /* already UTF-8, no conversion needed */
if (lenp != NULL)
*lenp = za->ch_comment_len;
return za->ch_comment;
diff --git a/lib/zip_get_file_comment.c b/lib/zip_get_file_comment.c
index aef0a7a..fc8c1ae 100644
--- a/lib/zip_get_file_comment.c
+++ b/lib/zip_get_file_comment.c
@@ -1,6 +1,6 @@
/*
zip_get_file_comment.c -- get file comment
- Copyright (C) 2006-2007 Dieter Baron and Thomas Klausner
+ Copyright (C) 2006-2012 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -33,6 +33,8 @@
+#include <string.h>
+
#include "zipint.h"
@@ -40,12 +42,18 @@
ZIP_EXTERN const char *
zip_get_file_comment(struct zip *za, zip_uint64_t idx, int *lenp, int flags)
{
+ const char *ret;
+
if (idx >= za->nentry) {
_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
return NULL;
}
if ((flags & ZIP_FL_UNCHANGED) || (za->entry[idx].changes.valid & ZIP_DIRENT_COMMENT) == 0) {
+ if (za->cdir == NULL) {
+ _zip_error_set(&za->error, ZIP_ER_NOENT, 0);
+ return NULL;
+ }
if (idx >= za->cdir->nentry) {
_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
return NULL;
@@ -53,9 +61,32 @@
if (lenp != NULL)
*lenp = za->cdir->entry[idx].settable.comment_len;
- return za->cdir->entry[idx].settable.comment;
+ ret = za->cdir->entry[idx].settable.comment;
+
+ if (flags & ZIP_FL_NAME_RAW)
+ return ret;
+
+ /* file comment already is UTF-8? */
+ if (za->cdir->entry[idx].bitflags & ZIP_GPBF_ENCODING_UTF_8)
+ return ret;
+
+ /* undeclared, start guessing */
+ if (za->cdir->entry[idx].fc_type == ZIP_ENCODING_UNKNOWN)
+ za->cdir->entry[idx].fc_type = _zip_guess_encoding(ret, za->cdir->entry[idx].settable.comment_len);
+
+ if (((flags & ZIP_FL_NAME_STRICT) && (za->cdir->entry[idx].fc_type != ZIP_ENCODING_ASCII))
+ || (za->cdir->entry[idx].fc_type == ZIP_ENCODING_CP437)) {
+ if (za->cdir->entry[idx].comment_converted == NULL)
+ za->cdir->entry[idx].comment_converted = _zip_cp437_to_utf8(ret, za->cdir->entry[idx].settable.comment_len, &za->error);
+ ret = za->cdir->entry[idx].comment_converted;
+ if (lenp != NULL)
+ *lenp = strlen(ret);
+ }
+
+ return ret;
}
+ /* already UTF-8, no conversion necessary */
if (lenp != NULL)
*lenp = za->entry[idx].changes.comment_len;
return za->entry[idx].changes.comment;
diff --git a/lib/zip_get_name.c b/lib/zip_get_name.c
index 9e9e0ee..4cef50d 100644
--- a/lib/zip_get_name.c
+++ b/lib/zip_get_name.c
@@ -33,6 +33,8 @@
+#include <string.h>
+
#include "zipint.h"
diff --git a/lib/zip_set_archive_comment.c b/lib/zip_set_archive_comment.c
index 3bb85d1..bbb9a73 100644
--- a/lib/zip_set_archive_comment.c
+++ b/lib/zip_set_archive_comment.c
@@ -1,6 +1,6 @@
/*
zip_set_archive_comment.c -- set archive comment
- Copyright (C) 2006-2009 Dieter Baron and Thomas Klausner
+ Copyright (C) 2006-2012 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -55,6 +55,11 @@
return -1;
}
+ if (_zip_guess_encoding(comment, len) == ZIP_ENCODING_CP437) {
+ _zip_error_set(&za->error, ZIP_ER_INVAL, 0);
+ return -1;
+ }
+
if (len > 0) {
if ((tmpcom=(char *)_zip_memdup(comment, len, &za->error)) == NULL)
return -1;
diff --git a/lib/zip_set_file_comment.c b/lib/zip_set_file_comment.c
index 11b4504..bc869af 100644
--- a/lib/zip_set_file_comment.c
+++ b/lib/zip_set_file_comment.c
@@ -1,6 +1,6 @@
/*
zip_set_file_comment.c -- set comment for file in archive
- Copyright (C) 2006-2009 Dieter Baron and Thomas Klausner
+ Copyright (C) 2006-2012 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -34,6 +34,7 @@
#include <stdlib.h>
+#include <string.h>
#include "zipint.h"
@@ -44,6 +45,8 @@
const char *comment, int len)
{
char *tmpcom;
+ const char *name;
+ enum zip_encoding_type com_enc, enc;
if (idx >= za->nentry
|| len < 0 || len > MAXCOMLEN
@@ -57,6 +60,20 @@
return -1;
}
+ if ((com_enc=_zip_guess_encoding(comment, len)) == ZIP_ENCODING_CP437) {
+ _zip_error_set(&za->error, ZIP_ER_INVAL, 0);
+ return -1;
+ }
+
+ if ((name=zip_get_name(za, idx, 0)) == NULL)
+ return -1;
+ enc = _zip_guess_encoding(name, strlen(name));
+
+ if (enc == ZIP_ENCODING_CP437 && com_enc == ZIP_ENCODING_UTF8) {
+ _zip_error_set(&za->error, ZIP_ER_ENCMISMATCH, 0);
+ return -1;
+ }
+
if (len > 0) {
if ((tmpcom=(char *)_zip_memdup(comment, len, &za->error)) == NULL)
return -1;
diff --git a/lib/zip_set_name.c b/lib/zip_set_name.c
index 2a50f9e..d60d48e 100644
--- a/lib/zip_set_name.c
+++ b/lib/zip_set_name.c
@@ -1,6 +1,6 @@
/*
zip_set_name.c -- rename helper function
- Copyright (C) 1999-2007 Dieter Baron and Thomas Klausner
+ Copyright (C) 1999-2012 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -44,14 +44,17 @@
_zip_set_name(struct zip *za, zip_uint64_t idx, const char *name)
{
char *s;
+ const char *com;
+ int comlen;
zip_int64_t i;
+ enum zip_encoding_type enc, com_enc;
if (idx >= za->nentry || name == NULL) {
_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
return -1;
}
- if (_zip_guess_encoding(name, strlen(name)) == ZIP_ENCODING_CP437) {
+ if ((enc=_zip_guess_encoding(name, strlen(name))) == ZIP_ENCODING_CP437) {
_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
return -1;
}
@@ -64,7 +67,17 @@
/* no effective name change */
if (i == idx)
return 0;
-
+
+ com = zip_get_file_comment(za, idx, &comlen, 0);
+ if (com == NULL)
+ com_enc = ZIP_ENCODING_ASCII;
+ else
+ com_enc = _zip_guess_encoding(com, comlen);
+ if (com_enc == ZIP_ENCODING_CP437 && enc == ZIP_ENCODING_UTF8) {
+ _zip_error_set(&za->error, ZIP_ER_ENCMISMATCH, 0);
+ return -1;
+ }
+
if ((s=strdup(name)) == NULL) {
_zip_error_set(&za->error, ZIP_ER_MEMORY, 0);
return -1;
diff --git a/lib/zipint.h b/lib/zipint.h
index dbf8a62..2118e6d 100644
--- a/lib/zipint.h
+++ b/lib/zipint.h
@@ -3,7 +3,7 @@
/*
zipint.h -- internal declarations.
- Copyright (C) 1999-2011 Dieter Baron and Thomas Klausner
+ Copyright (C) 1999-2012 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
@@ -259,21 +259,25 @@
unsigned short int_attrib; /* (c) internal file attributes */
unsigned int ext_attrib; /* (c) external file attributes */
unsigned int offset; /* (c) offset of local header */
- unsigned short fn_type; /* encoding (autorecognition) */
+ unsigned short fn_type; /* file name encoding (autorecognition) */
char *filename_converted; /* file name (autoconverted) */
+ unsigned short fc_type; /* file comment encoding (autorecognition) */
+ char *comment_converted; /* file comment (autoconverted) */
struct zip_dirent_settable settable;
};
/* zip archive central directory */
struct zip_cdir {
- struct zip_dirent *entry; /* directory entries */
- int nentry; /* number of entries */
+ struct zip_dirent *entry; /* directory entries */
+ int nentry; /* number of entries */
- unsigned int size; /* size of central direcotry */
- unsigned int offset; /* offset of central directory in file */
- char *comment; /* zip archive comment */
- unsigned short comment_len; /* length of zip archive comment */
+ unsigned int size; /* size of central direcotry */
+ unsigned int offset; /* offset of central directory in file */
+ char *comment; /* zip archive comment */
+ unsigned short comment_len; /* length of zip archive comment */
+ unsigned short comment_type; /* archive comment encoding (autorecognition) */
+ char *comment_converted; /* archive comment (autoconverted) */
};