blob: 0d9e9961c264366de2ca33b7d272b0b900f2a637 [file] [log] [blame]
/*
zip_hash.c -- hash table string -> uint64
Copyright (C) 2015-2017 Dieter Baron and Thomas Klausner
This file is part of libzip, a library to manipulate ZIP archives.
The authors can be contacted at <libzip@nih.at>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. The names of the authors may not be used to endorse or promote
products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "zipint.h"
#include <stdlib.h>
#include <string.h>
/* parameter for the string hash function */
#define HASH_MULTIPLIER 33
#define HASH_START 5381
/* hash table's fill ratio is kept between these by doubling/halfing its size as necessary */
#define HASH_MAX_FILL .75
#define HASH_MIN_FILL .01
/* but hash table size is kept between these */
#define HASH_MIN_SIZE 256
#define HASH_MAX_SIZE 0x80000000ul
struct zip_hash_entry {
const zip_uint8_t *name;
zip_int64_t orig_index;
zip_int64_t current_index;
struct zip_hash_entry *next;
zip_uint32_t hash_value;
};
typedef struct zip_hash_entry zip_hash_entry_t;
struct zip_hash {
zip_uint32_t table_size;
zip_uint64_t nentries;
zip_hash_entry_t **table;
};
/* free list of entries */
static void
free_list(zip_hash_entry_t *entry) {
while (entry != NULL) {
zip_hash_entry_t *next = entry->next;
free(entry);
entry = next;
}
}
/* compute hash of string, full 32 bit value */
static zip_uint32_t
hash_string(const zip_uint8_t *name) {
zip_uint64_t value = HASH_START;
if (name == NULL) {
return 0;
}
while (*name != 0) {
value = (zip_uint64_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % 0x100000000ul);
name++;
}
return (zip_uint32_t)value;
}
/* resize hash table; new_size must be a power of 2, can be larger or smaller than current size */
static bool
hash_resize(zip_hash_t *hash, zip_uint32_t new_size, zip_error_t *error) {
zip_hash_entry_t **new_table;
if (new_size == hash->table_size) {
return true;
}
if ((new_table = (zip_hash_entry_t **)calloc(new_size, sizeof(zip_hash_entry_t *))) == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return false;
}
if (hash->nentries > 0) {
zip_uint32_t i;
for (i = 0; i < hash->table_size; i++) {
zip_hash_entry_t *entry = hash->table[i];
while (entry) {
zip_hash_entry_t *next = entry->next;
zip_uint32_t new_index = entry->hash_value % new_size;
entry->next = new_table[new_index];
new_table[new_index] = entry;
entry = next;
}
}
}
free(hash->table);
hash->table = new_table;
hash->table_size = new_size;
return true;
}
static zip_uint32_t
size_for_capacity(zip_uint64_t capacity) {
double needed_size = capacity / HASH_MAX_FILL;
zip_uint32_t v;
if (needed_size > ZIP_UINT32_MAX) {
v = ZIP_UINT32_MAX;
}
else {
v = (zip_uint32_t)needed_size;
}
if (v > HASH_MAX_SIZE) {
return HASH_MAX_SIZE;
}
/* From Bit Twiddling Hacks by Sean Eron Anderson <seander@cs.stanford.edu>
(http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2). */
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
zip_hash_t *
_zip_hash_new(zip_error_t *error) {
zip_hash_t *hash;
if ((hash = (zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return NULL;
}
hash->table_size = 0;
hash->nentries = 0;
hash->table = NULL;
return hash;
}
void
_zip_hash_free(zip_hash_t *hash) {
zip_uint32_t i;
if (hash == NULL) {
return;
}
if (hash->table != NULL) {
for (i = 0; i < hash->table_size; i++) {
if (hash->table[i] != NULL) {
free_list(hash->table[i]);
}
}
free(hash->table);
}
free(hash);
}
/* insert into hash, return error on existence or memory issues */
bool
_zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error) {
zip_uint32_t hash_value, table_index;
zip_hash_entry_t *entry;
if (hash == NULL || name == NULL || index > ZIP_INT64_MAX) {
zip_error_set(error, ZIP_ER_INVAL, 0);
return false;
}
if (hash->table_size == 0) {
if (!hash_resize(hash, HASH_MIN_SIZE, error)) {
return false;
}
}
hash_value = hash_string(name);
table_index = hash_value % hash->table_size;
for (entry = hash->table[table_index]; entry != NULL; entry = entry->next) {
if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) {
zip_error_set(error, ZIP_ER_EXISTS, 0);
return false;
}
else {
break;
}
}
}
if (entry == NULL) {
if ((entry = (zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) {
zip_error_set(error, ZIP_ER_MEMORY, 0);
return false;
}
entry->name = name;
entry->next = hash->table[table_index];
hash->table[table_index] = entry;
entry->hash_value = hash_value;
entry->orig_index = -1;
hash->nentries++;
if (hash->nentries > hash->table_size * HASH_MAX_FILL && hash->table_size < HASH_MAX_SIZE) {
if (!hash_resize(hash, hash->table_size * 2, error)) {
return false;
}
}
}
if (flags & ZIP_FL_UNCHANGED) {
entry->orig_index = (zip_int64_t)index;
}
entry->current_index = (zip_int64_t)index;
return true;
}
/* remove entry from hash, error if not found */
bool
_zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error) {
zip_uint32_t hash_value, index;
zip_hash_entry_t *entry, *previous;
if (hash == NULL || name == NULL) {
zip_error_set(error, ZIP_ER_INVAL, 0);
return false;
}
if (hash->nentries > 0) {
hash_value = hash_string(name);
index = hash_value % hash->table_size;
previous = NULL;
entry = hash->table[index];
while (entry) {
if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) {
if (entry->orig_index == -1) {
if (previous) {
previous->next = entry->next;
}
else {
hash->table[index] = entry->next;
}
free(entry);
hash->nentries--;
if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
if (!hash_resize(hash, hash->table_size / 2, error)) {
return false;
}
}
}
else {
entry->current_index = -1;
}
return true;
}
previous = entry;
entry = entry->next;
}
}
zip_error_set(error, ZIP_ER_NOENT, 0);
return false;
}
/* find value for entry in hash, -1 if not found */
zip_int64_t
_zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error) {
zip_uint32_t hash_value, index;
zip_hash_entry_t *entry;
if (hash == NULL || name == NULL) {
zip_error_set(error, ZIP_ER_INVAL, 0);
return -1;
}
if (hash->nentries > 0) {
hash_value = hash_string(name);
index = hash_value % hash->table_size;
for (entry = hash->table[index]; entry != NULL; entry = entry->next) {
if (strcmp((const char *)name, (const char *)entry->name) == 0) {
if (flags & ZIP_FL_UNCHANGED) {
if (entry->orig_index != -1) {
return entry->orig_index;
}
}
else {
if (entry->current_index != -1) {
return entry->current_index;
}
}
break;
}
}
}
zip_error_set(error, ZIP_ER_NOENT, 0);
return -1;
}
bool
_zip_hash_reserve_capacity(zip_hash_t *hash, zip_uint64_t capacity, zip_error_t *error) {
zip_uint32_t new_size;
if (capacity == 0) {
return true;
}
new_size = size_for_capacity(capacity);
if (new_size <= hash->table_size) {
return true;
}
if (!hash_resize(hash, new_size, error)) {
return false;
}
return true;
}
bool
_zip_hash_revert(zip_hash_t *hash, zip_error_t *error) {
zip_uint32_t i;
zip_hash_entry_t *entry, *previous;
for (i = 0; i < hash->table_size; i++) {
previous = NULL;
entry = hash->table[i];
while (entry) {
if (entry->orig_index == -1) {
zip_hash_entry_t *p;
if (previous) {
previous->next = entry->next;
}
else {
hash->table[i] = entry->next;
}
p = entry;
entry = entry->next;
/* previous does not change */
free(p);
hash->nentries--;
}
else {
entry->current_index = entry->orig_index;
previous = entry;
entry = entry->next;
}
}
}
if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) {
zip_uint32_t new_size = hash->table_size / 2;
while (hash->nentries < new_size * HASH_MIN_FILL && new_size > HASH_MIN_SIZE) {
new_size /= 2;
}
if (!hash_resize(hash, new_size, error)) {
return false;
}
}
return true;
}