summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgraham.gower <graham.gower@e8e0d7a0-c8d9-11dd-a880-a1081c7ac358>2009-11-18 19:24:08 (EST)
committer graham.gower <graham.gower@e8e0d7a0-c8d9-11dd-a880-a1081c7ac358>2009-11-18 19:24:08 (EST)
commitc9fc602ad5c318d50707393caf62d1468697f500 (patch)
treed120cc65e972a48cb2ee51349e12834715595868
parent8d9e0ecb10ca5fd8b3ae1bdcf244291e75654740 (diff)
Simplify hash_table somewhat.
- Use djb2 hash http://www.cse.yorku.ca/~oz/hash.html Performs similarly to the existing function, but removes the need for a prime number of buckets. Doesn't need to do an strlen every insert either. - Add some more heuristics. Collected in realtime (cheap), no postprocessing. git-svn-id: http://opkg.googlecode.com/svn/trunk@336 e8e0d7a0-c8d9-11dd-a880-a1081c7ac358
-rw-r--r--libopkg/hash_table.c95
-rw-r--r--libopkg/hash_table.h12
-rw-r--r--libopkg/opkg_conf.c30
3 files changed, 69 insertions, 68 deletions
diff --git a/libopkg/hash_table.c b/libopkg/hash_table.c
index 9e42da0..c98296e 100644
--- a/libopkg/hash_table.c
+++ b/libopkg/hash_table.c
@@ -24,54 +24,61 @@
#include "libbb/libbb.h"
-static int hash_index(hash_table_t *hash, const char *pkg_name);
-static int rotating(const char *key, int len, int prime);
-
-static int hash_index(hash_table_t *hash, const char *pkg_name)
+static unsigned long
+djb2_hash(const unsigned char *str)
{
- return rotating(pkg_name, strlen(pkg_name), hash->n_entries);
+ unsigned long hash = 5381;
+ int c;
+ while ((c = *str++))
+ hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+ return hash;
}
-
-static int rotating(const char *key, int len, int prime)
+
+static int
+hash_index(hash_table_t *hash, const char *key)
{
- unsigned int hash, i;
- for (hash=len, i=0; i<len; ++i)
- hash = (hash<<4)^(hash>>28)^key[i];
- return (hash % prime);
+ return djb2_hash((const unsigned char *)key) % hash->n_buckets;
}
-
/*
* this is an open table keyed by strings
*/
-int hash_table_init(const char *name, hash_table_t *hash, int len)
+int
+hash_table_init(const char *name, hash_table_t *hash, int len)
{
- static int primes_table[] = {
- 379, 761, 983, 1423, 2711, 3361, 3931, 4679, 5519, 6701, 9587,
- 19471, 23143, 33961, 46499, 49727, 99529, 0
- };
- int *picker;
-
- if (hash->entries != NULL) {
- /* we have been here already */
- return 0;
- }
+ if (hash->entries != NULL) {
+ fprintf(stderr, "ERROR: %s called on a non empty hash table\n",
+ __FUNCTION__);
+ return -1;
+ }
- hash->name = name;
- hash->entries = NULL;
- hash->n_entries = 0;
- hash->hash_entry_key = NULL;
+ memset(hash, 0, sizeof(hash_table_t));
- picker = primes_table;
- while(*picker && (*picker++ < len));
- if(!*picker)
- fprintf(stderr, "%s: primes table might not be big enough (! << %d)\n", __FUNCTION__, len);
- --picker;
+ hash->name = name;
+ hash->n_buckets = len;
+ hash->entries = xcalloc(hash->n_buckets, sizeof(hash_entry_t));
- hash->n_entries = *picker;
- hash->entries = xcalloc(hash->n_entries, sizeof(hash_entry_t));
+ return 0;
+}
- return 0;
+void
+hash_print_stats(hash_table_t *hash)
+{
+ printf("hash_table: %s, %d bytes\n"
+ "\tn_buckets=%d, n_elements=%d, n_collisions=%d\n"
+ "\tmax_bucket_len=%d, n_used_buckets=%d, ave_bucket_len=%.2f\n"
+ "\tn_hits=%d, n_misses=%d\n",
+ hash->name,
+ hash->n_buckets*sizeof(hash_entry_t),
+ hash->n_buckets,
+ hash->n_elements,
+ hash->n_collisions,
+ hash->max_bucket_len,
+ hash->n_used_buckets,
+ (hash->n_used_buckets ?
+ ((float)hash->n_elements)/hash->n_used_buckets : 0.0f),
+ hash->n_hits,
+ hash->n_misses);
}
void hash_table_deinit(hash_table_t *hash)
@@ -81,7 +88,7 @@ void hash_table_deinit(hash_table_t *hash)
return;
/* free the reminaing entries */
- for (i = 0; i < hash->n_entries; i++) {
+ for (i = 0; i < hash->n_buckets; i++) {
hash_entry_t *hash_entry = (hash->entries + i);
free (hash_entry->key);
/* skip the first entry as this is part of the array */
@@ -98,7 +105,7 @@ void hash_table_deinit(hash_table_t *hash)
free (hash->entries);
hash->entries = NULL;
- hash->n_entries = 0;
+ hash->n_buckets = 0;
}
void *hash_table_get(hash_table_t *hash, const char *key)
@@ -111,16 +118,19 @@ void *hash_table_get(hash_table_t *hash, const char *key)
{
if (strcmp(key, hash_entry->key) == 0) {
// opkg_message(NULL, OPKG_DEBUG, "Function: %s. Key found for '%s' \n", __FUNCTION__, key);
+ hash->n_hits++;
return hash_entry->data;
}
}
hash_entry = hash_entry->next;
}
+ hash->n_misses++;
return NULL;
}
int hash_table_insert(hash_table_t *hash, const char *key, void *value)
{
+ int bucket_len = 0;
int ndx= hash_index(hash, key);
hash_entry_t *hash_entry = hash->entries + ndx;
if (hash_entry->key) {
@@ -140,12 +150,19 @@ int hash_table_insert(hash_table_t *hash, const char *key, void *value)
hash_entry->data = value;
return 0;
}
+ bucket_len++;
}
hash_entry->next = xcalloc(1, sizeof(hash_entry_t));
hash_entry = hash_entry->next;
hash_entry->next = NULL;
+
+ hash->n_collisions++;
+ if (++bucket_len > hash->max_bucket_len)
+ hash->max_bucket_len = bucket_len;
}
- }
+ } else
+ hash->n_used_buckets++;
+
hash->n_elements++;
hash_entry->key = xstrdup(key);
hash_entry->data = value;
@@ -191,7 +208,7 @@ void hash_table_foreach(hash_table_t *hash, void (*f)(const char *key, void *ent
if (!hash || !f)
return;
- for (i = 0; i < hash->n_entries; i++) {
+ for (i = 0; i < hash->n_buckets; i++) {
hash_entry_t *hash_entry = (hash->entries + i);
do {
if(hash_entry->key) {
diff --git a/libopkg/hash_table.h b/libopkg/hash_table.h
index c065609..52b44d9 100644
--- a/libopkg/hash_table.h
+++ b/libopkg/hash_table.h
@@ -30,13 +30,19 @@ struct hash_entry {
struct hash_table {
const char *name;
hash_entry_t * entries;
- int n_entries; /* number of buckets */
- int n_elements;
- const char * (*hash_entry_key)(void * data);
+ unsigned int n_buckets;
+ unsigned int n_elements;
+
+ /* useful stats */
+ unsigned int n_used_buckets;
+ unsigned int n_collisions;
+ unsigned int max_bucket_len;
+ unsigned int n_hits, n_misses;
};
int hash_table_init(const char *name, hash_table_t *hash, int len);
void hash_table_deinit(hash_table_t *hash);
+void hash_print_stats(hash_table_t *hash);
void *hash_table_get(hash_table_t *hash, const char *key);
int hash_table_insert(hash_table_t *hash, const char *key, void *value);
int hash_table_remove(hash_table_t *has, const char *key);
diff --git a/libopkg/opkg_conf.c b/libopkg/opkg_conf.c
index c69c3d1..04be3df 100644
--- a/libopkg/opkg_conf.c
+++ b/libopkg/opkg_conf.c
@@ -235,7 +235,7 @@ int opkg_conf_init(opkg_conf_t *conf, const args_t *args)
pkg_hash_init("pkg-hash", &conf->pkg_hash, OPKG_CONF_DEFAULT_HASH_LEN);
hash_table_init("file-hash", &conf->file_hash, OPKG_CONF_DEFAULT_HASH_LEN);
- hash_table_init("obs-file-hash", &conf->obs_file_hash, OPKG_CONF_DEFAULT_HASH_LEN);
+ hash_table_init("obs-file-hash", &conf->obs_file_hash, OPKG_CONF_DEFAULT_HASH_LEN/16);
if (conf->lists_dir == NULL)
conf->lists_dir = xstrdup(OPKG_CONF_LISTS_DIR);
@@ -381,31 +381,9 @@ void opkg_conf_deinit(opkg_conf_t *conf)
#endif
if (conf->verbosity >= OPKG_DEBUG) {
- int i;
- hash_table_t *hashes[] = {
- &conf->pkg_hash,
- &conf->file_hash,
- &conf->obs_file_hash };
- for (i = 0; i < 3; i++) {
- hash_table_t *hash = hashes[i];
- int c = 0;
- int n_conflicts = 0;
- int j;
- for (j = 0; j < hash->n_entries; j++) {
- int len = 0;
- hash_entry_t *e = &hash->entries[j];
- if (e->next)
- n_conflicts++;
- while (e && e->key) {
- len++;
- e = e->next;
- }
- if (len > c)
- c = len;
- }
- opkg_message(conf, OPKG_DEBUG, "hash_table[%s] n_buckets=%d n_elements=%d max_conflicts=%d n_conflicts=%d\n",
- hash->name, hash->n_entries, hash->n_elements, c, n_conflicts);
- }
+ hash_print_stats(&conf->pkg_hash);
+ hash_print_stats(&conf->file_hash);
+ hash_print_stats(&conf->obs_file_hash);
}
if (&conf->pkg_hash)