From c9fc602ad5c318d50707393caf62d1468697f500 Mon Sep 17 00:00:00 2001 From: graham.gower Date: Wed, 18 Nov 2009 19:24:08 -0500 Subject: Simplify hash_table somewhat. - Use djb2 hash http://www.cse.yorku.ca/~oz/hash.html Performs similarly to the existing function, but removes the need for a prime number of buckets. Doesn't need to do an strlen every insert either. - Add some more heuristics. Collected in realtime (cheap), no postprocessing. git-svn-id: http://opkg.googlecode.com/svn/trunk@336 e8e0d7a0-c8d9-11dd-a880-a1081c7ac358 --- diff --git a/libopkg/hash_table.c b/libopkg/hash_table.c index 9e42da0..c98296e 100644 --- a/libopkg/hash_table.c +++ b/libopkg/hash_table.c @@ -24,54 +24,61 @@ #include "libbb/libbb.h" -static int hash_index(hash_table_t *hash, const char *pkg_name); -static int rotating(const char *key, int len, int prime); - -static int hash_index(hash_table_t *hash, const char *pkg_name) +static unsigned long +djb2_hash(const unsigned char *str) { - return rotating(pkg_name, strlen(pkg_name), hash->n_entries); + unsigned long hash = 5381; + int c; + while ((c = *str++)) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + return hash; } - -static int rotating(const char *key, int len, int prime) + +static int +hash_index(hash_table_t *hash, const char *key) { - unsigned int hash, i; - for (hash=len, i=0; i>28)^key[i]; - return (hash % prime); + return djb2_hash((const unsigned char *)key) % hash->n_buckets; } - /* * this is an open table keyed by strings */ -int hash_table_init(const char *name, hash_table_t *hash, int len) +int +hash_table_init(const char *name, hash_table_t *hash, int len) { - static int primes_table[] = { - 379, 761, 983, 1423, 2711, 3361, 3931, 4679, 5519, 6701, 9587, - 19471, 23143, 33961, 46499, 49727, 99529, 0 - }; - int *picker; - - if (hash->entries != NULL) { - /* we have been here already */ - return 0; - } + if (hash->entries != NULL) { + fprintf(stderr, "ERROR: %s called on a non empty hash table\n", + __FUNCTION__); + return -1; + } - hash->name = name; - hash->entries = NULL; - hash->n_entries = 0; - hash->hash_entry_key = NULL; + memset(hash, 0, sizeof(hash_table_t)); - picker = primes_table; - while(*picker && (*picker++ < len)); - if(!*picker) - fprintf(stderr, "%s: primes table might not be big enough (! << %d)\n", __FUNCTION__, len); - --picker; + hash->name = name; + hash->n_buckets = len; + hash->entries = xcalloc(hash->n_buckets, sizeof(hash_entry_t)); - hash->n_entries = *picker; - hash->entries = xcalloc(hash->n_entries, sizeof(hash_entry_t)); + return 0; +} - return 0; +void +hash_print_stats(hash_table_t *hash) +{ + printf("hash_table: %s, %d bytes\n" + "\tn_buckets=%d, n_elements=%d, n_collisions=%d\n" + "\tmax_bucket_len=%d, n_used_buckets=%d, ave_bucket_len=%.2f\n" + "\tn_hits=%d, n_misses=%d\n", + hash->name, + hash->n_buckets*sizeof(hash_entry_t), + hash->n_buckets, + hash->n_elements, + hash->n_collisions, + hash->max_bucket_len, + hash->n_used_buckets, + (hash->n_used_buckets ? + ((float)hash->n_elements)/hash->n_used_buckets : 0.0f), + hash->n_hits, + hash->n_misses); } void hash_table_deinit(hash_table_t *hash) @@ -81,7 +88,7 @@ void hash_table_deinit(hash_table_t *hash) return; /* free the reminaing entries */ - for (i = 0; i < hash->n_entries; i++) { + for (i = 0; i < hash->n_buckets; i++) { hash_entry_t *hash_entry = (hash->entries + i); free (hash_entry->key); /* skip the first entry as this is part of the array */ @@ -98,7 +105,7 @@ void hash_table_deinit(hash_table_t *hash) free (hash->entries); hash->entries = NULL; - hash->n_entries = 0; + hash->n_buckets = 0; } void *hash_table_get(hash_table_t *hash, const char *key) @@ -111,16 +118,19 @@ void *hash_table_get(hash_table_t *hash, const char *key) { if (strcmp(key, hash_entry->key) == 0) { // opkg_message(NULL, OPKG_DEBUG, "Function: %s. Key found for '%s' \n", __FUNCTION__, key); + hash->n_hits++; return hash_entry->data; } } hash_entry = hash_entry->next; } + hash->n_misses++; return NULL; } int hash_table_insert(hash_table_t *hash, const char *key, void *value) { + int bucket_len = 0; int ndx= hash_index(hash, key); hash_entry_t *hash_entry = hash->entries + ndx; if (hash_entry->key) { @@ -140,12 +150,19 @@ int hash_table_insert(hash_table_t *hash, const char *key, void *value) hash_entry->data = value; return 0; } + bucket_len++; } hash_entry->next = xcalloc(1, sizeof(hash_entry_t)); hash_entry = hash_entry->next; hash_entry->next = NULL; + + hash->n_collisions++; + if (++bucket_len > hash->max_bucket_len) + hash->max_bucket_len = bucket_len; } - } + } else + hash->n_used_buckets++; + hash->n_elements++; hash_entry->key = xstrdup(key); hash_entry->data = value; @@ -191,7 +208,7 @@ void hash_table_foreach(hash_table_t *hash, void (*f)(const char *key, void *ent if (!hash || !f) return; - for (i = 0; i < hash->n_entries; i++) { + for (i = 0; i < hash->n_buckets; i++) { hash_entry_t *hash_entry = (hash->entries + i); do { if(hash_entry->key) { diff --git a/libopkg/hash_table.h b/libopkg/hash_table.h index c065609..52b44d9 100644 --- a/libopkg/hash_table.h +++ b/libopkg/hash_table.h @@ -30,13 +30,19 @@ struct hash_entry { struct hash_table { const char *name; hash_entry_t * entries; - int n_entries; /* number of buckets */ - int n_elements; - const char * (*hash_entry_key)(void * data); + unsigned int n_buckets; + unsigned int n_elements; + + /* useful stats */ + unsigned int n_used_buckets; + unsigned int n_collisions; + unsigned int max_bucket_len; + unsigned int n_hits, n_misses; }; int hash_table_init(const char *name, hash_table_t *hash, int len); void hash_table_deinit(hash_table_t *hash); +void hash_print_stats(hash_table_t *hash); void *hash_table_get(hash_table_t *hash, const char *key); int hash_table_insert(hash_table_t *hash, const char *key, void *value); int hash_table_remove(hash_table_t *has, const char *key); diff --git a/libopkg/opkg_conf.c b/libopkg/opkg_conf.c index c69c3d1..04be3df 100644 --- a/libopkg/opkg_conf.c +++ b/libopkg/opkg_conf.c @@ -235,7 +235,7 @@ int opkg_conf_init(opkg_conf_t *conf, const args_t *args) pkg_hash_init("pkg-hash", &conf->pkg_hash, OPKG_CONF_DEFAULT_HASH_LEN); hash_table_init("file-hash", &conf->file_hash, OPKG_CONF_DEFAULT_HASH_LEN); - hash_table_init("obs-file-hash", &conf->obs_file_hash, OPKG_CONF_DEFAULT_HASH_LEN); + hash_table_init("obs-file-hash", &conf->obs_file_hash, OPKG_CONF_DEFAULT_HASH_LEN/16); if (conf->lists_dir == NULL) conf->lists_dir = xstrdup(OPKG_CONF_LISTS_DIR); @@ -381,31 +381,9 @@ void opkg_conf_deinit(opkg_conf_t *conf) #endif if (conf->verbosity >= OPKG_DEBUG) { - int i; - hash_table_t *hashes[] = { - &conf->pkg_hash, - &conf->file_hash, - &conf->obs_file_hash }; - for (i = 0; i < 3; i++) { - hash_table_t *hash = hashes[i]; - int c = 0; - int n_conflicts = 0; - int j; - for (j = 0; j < hash->n_entries; j++) { - int len = 0; - hash_entry_t *e = &hash->entries[j]; - if (e->next) - n_conflicts++; - while (e && e->key) { - len++; - e = e->next; - } - if (len > c) - c = len; - } - opkg_message(conf, OPKG_DEBUG, "hash_table[%s] n_buckets=%d n_elements=%d max_conflicts=%d n_conflicts=%d\n", - hash->name, hash->n_entries, hash->n_elements, c, n_conflicts); - } + hash_print_stats(&conf->pkg_hash); + hash_print_stats(&conf->file_hash); + hash_print_stats(&conf->obs_file_hash); } if (&conf->pkg_hash) -- cgit v0.9.1