Fix encding handling in `zip_name_locate()`.
diff --git a/NEWS.md b/NEWS.md
index 0bc9fec..c0528a3 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,7 @@
* Add `zip_source_is_seekable()`.
* Improve compatibility with WinAES.
+* Fix encoding handling in `zip_name_locate()`.
1.8.0 [2021-06-18]
==================
diff --git a/lib/zip_name_locate.c b/lib/zip_name_locate.c
index 8a88404..5bfb41c 100644
--- a/lib/zip_name_locate.c
+++ b/lib/zip_name_locate.c
@@ -49,6 +49,7 @@
zip_int64_t
_zip_name_locate(zip_t *za, const char *fname, zip_flags_t flags, zip_error_t *error) {
int (*cmp)(const char *, const char *);
+ zip_string_t *str = NULL;
const char *fn, *p;
zip_uint64_t i;
@@ -60,7 +61,17 @@
return -1;
}
- if (flags & (ZIP_FL_NOCASE | ZIP_FL_NODIR | ZIP_FL_ENC_CP437)) {
+ if ((flags & (ZIP_FL_ENC_UTF_8 | ZIP_FL_ENC_RAW)) == 0 && fname[0] != '\0') {
+ if ((str = _zip_string_new((const zip_uint8_t *)fname, strlen(fname), flags, error)) == NULL) {
+ return -1;
+ }
+ if ((fname = (const char *)_zip_string_get(str, NULL, 0, error)) == NULL) {
+ _zip_string_free(str);
+ return -1;
+ }
+ }
+
+ if (flags & (ZIP_FL_NOCASE | ZIP_FL_NODIR | ZIP_FL_ENC_RAW | ZIP_FL_ENC_STRICT)) {
/* can't use hash table */
cmp = (flags & ZIP_FL_NOCASE) ? strcasecmp : strcmp;
@@ -79,14 +90,18 @@
if (cmp(fname, fn) == 0) {
_zip_error_clear(error);
+ _zip_string_free(str);
return (zip_int64_t)i;
}
}
zip_error_set(error, ZIP_ER_NOENT, 0);
+ _zip_string_free(str);
return -1;
}
else {
- return _zip_hash_lookup(za->names, (const zip_uint8_t *)fname, flags, error);
+ zip_int64_t ret = _zip_hash_lookup(za->names, (const zip_uint8_t *)fname, flags, error);
+ _zip_string_free(str);
+ return ret;
}
}
diff --git a/lib/zipint.h b/lib/zipint.h
index 6a26dc3..dc4ec2f 100644
--- a/lib/zipint.h
+++ b/lib/zipint.h
@@ -611,12 +611,12 @@
zip_source_t *_zip_source_zip_new(zip_t *, zip_uint64_t, zip_flags_t, zip_uint64_t, zip_uint64_t, const char *, zip_error_t *error);
int _zip_stat_merge(zip_stat_t *dst, const zip_stat_t *src, zip_error_t *error);
-int _zip_string_equal(const zip_string_t *, const zip_string_t *);
-void _zip_string_free(zip_string_t *);
-zip_uint32_t _zip_string_crc32(const zip_string_t *);
-const zip_uint8_t *_zip_string_get(zip_string_t *, zip_uint32_t *, zip_flags_t, zip_error_t *);
-zip_uint16_t _zip_string_length(const zip_string_t *);
-zip_string_t *_zip_string_new(const zip_uint8_t *, zip_uint16_t, zip_flags_t, zip_error_t *);
+int _zip_string_equal(const zip_string_t *a, const zip_string_t *b);
+void _zip_string_free(zip_string_t *string);
+zip_uint32_t _zip_string_crc32(const zip_string_t *string);
+const zip_uint8_t *_zip_string_get(zip_string_t *string, zip_uint32_t *lenp, zip_flags_t flags, zip_error_t *error);
+zip_uint16_t _zip_string_length(const zip_string_t *string);
+zip_string_t *_zip_string_new(const zip_uint8_t *raw, zip_uint16_t length, zip_flags_t flags, zip_error_t *error);
int _zip_string_write(zip_t *za, const zip_string_t *string);
bool _zip_winzip_aes_decrypt(zip_winzip_aes_t *ctx, zip_uint8_t *data, zip_uint64_t length);
bool _zip_winzip_aes_encrypt(zip_winzip_aes_t *ctx, zip_uint8_t *data, zip_uint64_t length);
diff --git a/man/zip_name_locate.mdoc b/man/zip_name_locate.mdoc
index 6ffe5c3..98c1dce 100644
--- a/man/zip_name_locate.mdoc
+++ b/man/zip_name_locate.mdoc
@@ -29,7 +29,7 @@
.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
.\" IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd September 22, 2020
+.Dd March 15, 2022
.Dt ZIP_NAME_LOCATE 3
.Os
.Sh NAME
@@ -51,12 +51,29 @@
If
.Ar archive
does not contain a file with that name, \-1 is returned.
+.Pp
+If neither
+.Dv ZIP_FL_ENC_RAW
+nor
+.Dv ZIP_FL_ENC_STRICT
+are specified, guess the encoding of the name in the ZIP archive and convert it
+to UTF-8, if necessary, before comparing.
+.Pp
+If neither
+.Dv ZIP_FL_ENC_CP437
+nor
+.Dv ZIP_FL_ENC_UTF_8
+are specified, guess the encoding of
+.Ar fname .
+.Pp
+Only CP-437 and UTF-8 are recognized.
+.Pp
The
.Fa flags
are specified by
.Em or Ns No 'ing
the following values, or 0 for none of them.
-.Bl -tag -offset indent -width ZIP_FL_NOCASE
+.Bl -tag -offset indent -width ZIP_FL_ENC_STRICT
.It Dv ZIP_FL_NOCASE
Ignore case distinctions.
(Will only work well if the file names are ASCII.)
@@ -68,18 +85,24 @@
With this flag,
.Fn zip_name_locate
will be slow for archives with many files.
-.It Dv ZIP_FL_ENC_RAW
-Compare against the unmodified names as it is in the ZIP archive.
.It Dv ZIP_FL_ENC_GUESS
-(Default.)
-Guess the encoding of the name in the ZIP archive and convert it
-to UTF-8, if necessary, before comparing.
-(Only CP-437 and UTF-8 are recognized.)
+This flag has no effect (its value is 0); it can be used to explicitly denote the absence of encoding flags.
+.It Dv ZIP_FL_ENC_RAW
+Compare
+.Ar fname
+against the unmodified names as they are in the ZIP archive, without converting them to UTF-8.
.It Dv ZIP_FL_ENC_STRICT
Follow the ZIP specification and expect CP-437 encoded names in
the ZIP archive (except if they are explicitly marked as UTF-8).
-Convert it to UTF-8 before comparing.
+Convert them to UTF-8 before comparing.
+.It Dv ZIP_FL_ENC_CP437
+.Ar fname
+is encoded as CP-437.
+.It Dv ZIP_FL_ENC_UTF_8
+.Ar fname
+is encoded as UTF-8.
.El
+.Pp
.Em Note :
ASCII is a subset of both CP-437 and UTF-8.
.Sh RETURN VALUES
diff --git a/man/ziptool.mdoc b/man/ziptool.mdoc
index 751eeb0..d7ee340 100644
--- a/man/ziptool.mdoc
+++ b/man/ziptool.mdoc
@@ -29,7 +29,7 @@
.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
.\" IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd December 18, 2017
+.Dd March 15, 2022
.Dt ZIPTOOL 1
.Os
.Sh NAME
@@ -257,9 +257,14 @@
.Ar index .
.El
.Ss Flags
-Some commands take flag arguments.
+Some commands take flag arguments. Each character in the argument sets the corresponding flag. Use 0 or the empty string for no flags.
+.Pp
Supported flags are:
.Bl -tag -width MMM -compact -offset indent
+.It Ar 4
+.Dv ZIP_FL_ENC_CP437
+.It Ar 8
+.Dv ZIP_FL_ENC_UTF_8
.It Ar C
.Dv ZIP_FL_NOCASE
.It Ar c
@@ -268,6 +273,10 @@
.Dv ZIP_FL_NODIR
.It Ar l
.Dv ZIP_FL_LOCAL
+.It Ar r
+.Dv ZIP_FL_ENC_RAW
+.It Ar s
+.Dv ZIP_FL_ENC_STRICT
.It Ar u
.Dv ZIP_FL_UNCHANGED
.El
diff --git a/regress/name_locate-cp437.test b/regress/name_locate-cp437.test
new file mode 100644
index 0000000..9c15e04
--- /dev/null
+++ b/regress/name_locate-cp437.test
@@ -0,0 +1,9 @@
+description tests for various encoding flags for zip_name_locate
+args test.zip name_locate " " 0 name_locate " " 4 name_locate " " 8 name_locate " " r name_locate " " s
+return 0
+file test.zip test-cp437.zip test-cp437.zip
+stdout name ' ' using flags '0' found at index 9
+stdout name ' ' using flags '4' found at index 9
+stdout name ' ' using flags 'r' found at index 9
+stdout name ' ' using flags 's' found at index 9
+stderr can't find entry with name ' ' using flags '8'
diff --git a/regress/name_locate-utf8.test b/regress/name_locate-utf8.test
new file mode 100644
index 0000000..730f709
--- /dev/null
+++ b/regress/name_locate-utf8.test
@@ -0,0 +1,9 @@
+description tests for various encoding flags for zip_name_locate
+args test.zip name_locate "æÆôöòûùÿÖÜ¢£¥₧ƒá" 0 name_locate "æÆôöòûùÿÖÜ¢£¥₧ƒá" 4 name_locate "æÆôöòûùÿÖÜ¢£¥₧ƒá" 8 name_locate "æÆôöòûùÿÖÜ¢£¥₧ƒá" r name_locate "æÆôöòûùÿÖÜ¢£¥₧ƒá" s
+return 0
+file test.zip test-cp437.zip test-cp437.zip
+stdout name 'æÆôöòûùÿÖÜ¢£¥₧ƒá' using flags '0' found at index 9
+stdout name 'æÆôöòûùÿÖÜ¢£¥₧ƒá' using flags '8' found at index 9
+stdout name 'æÆôöòûùÿÖÜ¢£¥₧ƒá' using flags 's' found at index 9
+stderr can't find entry with name 'æÆôöòûùÿÖÜ¢£¥₧ƒá' using flags '4'
+stderr can't find entry with name 'æÆôöòûùÿÖÜ¢£¥₧ƒá' using flags 'r'
diff --git a/src/ziptool.c b/src/ziptool.c
index db6f3e3..9861da3 100644
--- a/src/ziptool.c
+++ b/src/ziptool.c
@@ -626,6 +626,14 @@
flags |= ZIP_FL_LOCAL;
if (strchr(arg, 'u') != NULL)
flags |= ZIP_FL_UNCHANGED;
+ if (strchr(arg, '8') != NULL)
+ flags |= ZIP_FL_ENC_UTF_8;
+ if (strchr(arg, '4') != NULL)
+ flags |= ZIP_FL_ENC_CP437;
+ if (strchr(arg, 'r') != NULL)
+ flags |= ZIP_FL_ENC_RAW;
+ if (strchr(arg, 's') != NULL)
+ flags |= ZIP_FL_ENC_STRICT;
return flags;
}
@@ -824,10 +832,14 @@
}
fprintf(out, "\nSupported flags are:\n"
"\t0\t(no flags)\n"
+ "\t4\tZIP_FL_ENC_CP437\n"
+ "\t8\tZIP_FL_ENC_UTF_8\n"
"\tC\tZIP_FL_NOCASE\n"
"\tc\tZIP_FL_CENTRAL\n"
"\td\tZIP_FL_NODIR\n"
"\tl\tZIP_FL_LOCAL\n"
+ "\tr\tZIP_FL_ENC_RAW\n"
+ "\ts\tZIP_FL_ENC_STRICT\n"
"\tu\tZIP_FL_UNCHANGED\n");
fprintf(out, "\nSupported compression methods are:\n"
"\tdefault\n");