From 2bd12a392af360f944e785f2285b8dfc45e58319 Mon Sep 17 00:00:00 2001 From: Julian Daube Date: Wed, 25 Oct 2017 18:35:04 +0200 Subject: [PATCH] add string hashmap --- fs.cpp => fs_old.cpp | 0 hashtable.c | 177 +++++++++++++++++++++++++++++++++++++++ hashtable.h | 117 ++++++++++++++++++++++++++ main.cpp => main_old.cpp | 0 strhash.c | 28 +++++++ strhash.h | 23 +++++ test_hashtable.c | 137 ++++++++++++++++++++++++++++++ 7 files changed, 482 insertions(+) rename fs.cpp => fs_old.cpp (100%) create mode 100644 hashtable.c create mode 100644 hashtable.h rename main.cpp => main_old.cpp (100%) create mode 100644 strhash.c create mode 100644 strhash.h create mode 100644 test_hashtable.c diff --git a/fs.cpp b/fs_old.cpp similarity index 100% rename from fs.cpp rename to fs_old.cpp diff --git a/hashtable.c b/hashtable.c new file mode 100644 index 0000000..2eb87b5 --- /dev/null +++ b/hashtable.c @@ -0,0 +1,177 @@ +/* + * hashtable.c + * + * Created on: 24.10.2017 + * Author: julian + */ + +#include "strhash.h" +#include "hashtable.h" + +#include +#include +#include + +#include +#include + +int key_compare(entry_key_t one, entry_key_t two) { + return strcmp(one, two) == 0; +} + +entry_hash_t key_hash(entry_key_t key) { + return hash_str(key) + 1; // make hash always nonzero +} + +hashtable_iterator_t hashtable_end(struct hashtable * table) { + return table->data + table->len; +} + + +hashtable_iterator_t hashtable_next(struct hashtable * table, hashtable_iterator_t current) { + if (table == NULL) { + return NULL; + } + + if (current == NULL) { + current = table->data; + } else { + // we want the NEXT pointer :) + current++; + } + + while(current != hashtable_end(table)) { + if (current->hash != 0) + break; + + current++; + } + + return current; +} + +hashtable_iterator_t hashtable_get_hash(struct hashtable * table, entry_hash_t hash) { + size_t index = hash % table->len; + + if (table->data[index].hash == 0) { + return hashtable_end(table); + } + + return table->data + index; +} + +hashtable_iterator_t hashtable_get(struct hashtable * table, entry_key_t key) { + entry_hash_t hash = key_hash(key); + return hashtable_get_hash(table, hash); +} + +void hashtable_clear(struct hashtable * table) { + if (table->dealloc_data) { + hashtable_iterator_t it = hashtable_next(table, NULL); + for(;it != hashtable_end(table); it = hashtable_next(table, it)){ + table->dealloc_data(it->data); + } + } + + free(table->data); + table->data = NULL; + table->count = table->len = 0; +} + +int hashtable_resize(struct hashtable * table, size_t newsize) { + if (table == NULL) { + return 0; + } + + if (newsize == 0) { + // will fail, correct to 1 + newsize = 1; + } + + struct hashtable temp = *table; + table->data = calloc(newsize, sizeof(struct entry)); + + if (table->data == NULL) { + table->data = temp.data; + + errno = ENOMEM; + return -1; + } + + // try to reinsert old data + table->len = newsize; + + for (size_t i = 0; i < temp.len; i++) { + if (temp.data[i].hash && hashtable_add(table, temp.data[i]) == -1) { + // abort mission, restore old table + free(table->data); + *table = temp; + + return -2; + } + } + + // delete old table + free(temp.data); + + return 0; +} + +int hashtable_add(struct hashtable * table, struct entry entry) { + if (table == NULL) { + return 0; + } + + if (table->len == 0) { + // initial alloc + int err = hashtable_resize(table, 1); + + if (err < 0) { + return err; + } + return hashtable_add(table, entry); + } + + + // try to insert into table + size_t index = entry.hash % table->len; + + if (table->data->hash && !key_compare(table->data[index].key, entry.key)) { + // key collision + // make table bigger + if (hashtable_resize(table, table->len*2) != 0) { + return -1; + } + + return hashtable_add(table, entry); + } + + // insert new entry + table->data[index] = entry; + table->count++; + return 0; +} + +struct entry hashtable_make_entry(entry_key_t key, void * data) { + return (struct entry){ .data = data, + .key = key, + .hash = key_hash(key), + }; +} + +int hashtable_remove(struct hashtable * table, hashtable_iterator_t it) { + if (!it || it->hash == 0) { + return 0; + } + + if (table->dealloc_data) { + table->dealloc_data(it->data); + } + + it->data = NULL; + it->hash = 0; + table->count--; + return 1; +} + + diff --git a/hashtable.h b/hashtable.h new file mode 100644 index 0000000..b354521 --- /dev/null +++ b/hashtable.h @@ -0,0 +1,117 @@ +/* + * hashtable.h + * + * Created on: 21.10.2017 + * Author: julian + */ + +#ifndef HASHTABLE_H_ +#define HASHTABLE_H_ + +#include +#include +#include +#include "strhash.h" +#include +#include + + +typedef strhash_t entry_hash_t; +typedef const char * entry_key_t; +typedef struct entry * hashtable_iterator_t; + +// data deallocator +typedef void(*hashtable_dealloc)(void*); +// data copy and allocator +typedef void(*hashtable_alloc)(void*); + +extern int key_compare(entry_key_t one, entry_key_t two); +extern entry_hash_t key_hash(entry_key_t key); + +struct entry { + entry_hash_t hash; + entry_key_t key; + void * data; +}; + +struct hashtable { + struct entry * data; + size_t len, count; + + hashtable_alloc alloc_data; + hashtable_dealloc dealloc_data; +}; + +extern hashtable_iterator_t hashtable_end(struct hashtable * table); + +/** + * Iterate of the hashmap. + * + * pass the return value to current to iterate over the entire map + * pass NULL to get the beginning + * + * returns the next set entry in the table from current + * returns hashtable_end() when there is nons + * example loop: + * hashmap_iterator_t current = hashtable_next(table, NULL); + * for(;current != hashtable_end(table); current = hashtable_next(table, current)) {} + * + */ +hashtable_iterator_t hashtable_next(struct hashtable * table, hashtable_iterator_t current); + +/** + * returns the entry identified by the given hash + * returns hashtable_end() if no entry matched + */ +hashtable_iterator_t hashtable_get_hash(struct hashtable * table, entry_hash_t hash); + +/** + * returns the entry identified by the given key + * returns hashtable_end() if no entry matched + */ +hashtable_iterator_t hashtable_get(struct hashtable * table, entry_key_t key); + +/** + * delete all entries of the given table + * + * calls dealloc of the table for every data element, if it was set. + */ +void hashtable_clear(struct hashtable * table); + +/** + * insert a new element in the table + */ +int hashtable_add(struct hashtable * table, struct entry entry); + +/** + * resize the hashtable to a new physical size + * can also try to make the hashtable smaller + * + * returns 0 on success + * returns -1 on error (sets errno) + * returns -2 if hashtable does not fit in new size. + */ +int hashtable_resize(struct hashtable * table, size_t newsize); + +/** + * add new element in hashtable + * + * returns 0 on success + * returns -1 on error + */ +int hashtable_add(struct hashtable * table, struct entry entry); + +/** + * make a new entry that can be added out of key and data + */ +struct entry hashtable_make_entry(entry_key_t key, void * data); + +/** + * remove one entry using the key + * @return -1 on error + * @return 1 on success + * @return 0 on not found + */ +int hashtable_remove(struct hashtable * table, hashtable_iterator_t it); + +#endif /* HASHTABLE_H_ */ diff --git a/main.cpp b/main_old.cpp similarity index 100% rename from main.cpp rename to main_old.cpp diff --git a/strhash.c b/strhash.c new file mode 100644 index 0000000..53f338d --- /dev/null +++ b/strhash.c @@ -0,0 +1,28 @@ +/* + * strhash.c + * + * Created on: 21.10.2017 + * Author: julian + */ + +#include "strhash.h" + +// use rule: +// hash(i) = hash(i - 1) * 33 ^ str[i] +// see: http://www.cse.yorku.ca/~oz/hash.html +strhash_t hash_add(strhash_t hash, char c) { + return hash * 33 ^ c; +} + + +strhash_t hash_str(const char * str) { + strhash_t hash = 0; + while(*str) { + hash = hash_add(hash, *str); + str++; + } + + return hash; +} + + diff --git a/strhash.h b/strhash.h new file mode 100644 index 0000000..7c67f78 --- /dev/null +++ b/strhash.h @@ -0,0 +1,23 @@ +/* + * strhash.h + * + * Created on: 21.10.2017 + * Author: julian + */ + +#ifndef STRHASH_H_ +#define STRHASH_H_ + +#include + +typedef size_t strhash_t; + +// "rehash" string with new char added to end +strhash_t hash_add(strhash_t hash, char c); + +/// hash a given string +strhash_t hash_str(const char * str); + + + +#endif /* STRHASH_H_ */ diff --git a/test_hashtable.c b/test_hashtable.c new file mode 100644 index 0000000..bc4c351 --- /dev/null +++ b/test_hashtable.c @@ -0,0 +1,137 @@ +/* + * test_hashtable.c + * + * Created on: 21.10.2017 + * Author: julian + */ + +#include "tests.h" +#include "hashtable.h" + +struct hashtable table = {}; +struct entry nEntry; + + +void testresize() { + init("resize"); + int err = hashtable_resize(&table, 20); + if (err != 0) { + fail("resize return code was error %d", err); + } + if (table.len != 20) { + fail("table has wrong size %d", table.len); + } + + pass(); +} + +void testadd() { + init("add"); + nEntry = hashtable_make_entry("hi", "20"); + size_t count = table.count; + + int err = hashtable_add(&table, nEntry); + + if (err != 0) { + fail("add gave error %d", err); + } + if (table.count != count +1) { + fail("table has wrong count (has %d, needs %d)", table.count, count+1); + } + + pass(); +} + +void testget() { + init("get"); + + hashtable_iterator_t elem = hashtable_get(&table, "hi"); + + if (elem == hashtable_end(&table)) { + fail("element was not found"); + } + + if (elem->data != nEntry.data) { + fail("returned wrong element"); + } + + pass(); +} + +void testiterate() { + init("iterate"); + + hashtable_iterator_t it = hashtable_next(&table, NULL); + + if (it == hashtable_end(&table)) { + fail("table seems empty?"); + } + + if (strcmp(it->key, "hi")) { + fail("wrong entry (smh)"); + } + + + pass(); +} + + +volatile int dealloc_called = 0; + +void test_dealloc(void* data) { + dealloc_called++; +} + +void testremove() { + init("remove"); + dealloc_called = 0; + table.dealloc_data = test_dealloc; + + if (hashtable_add(&table, hashtable_make_entry("woop", NULL)) < 0) { + fail("could not add"); + } + + hashtable_iterator_t it = hashtable_get(&table, "woop"); + int ret = hashtable_remove(&table, it); + + if (ret < 0) { + fail("negative return code"); + } + if (!ret) { + fail("could not remove"); + } + if (!dealloc_called) { + fail("deallocator not called"); + } + + pass(); +} +void testclear() { + init("clear"); + + size_t count = table.count; + table.dealloc_data = test_dealloc; + + hashtable_clear(&table); + if (table.count != 0 || table.len != 0 || table.data != NULL) { + fail("memory was not freed"); + } + + if (dealloc_called != count) { + fail("dealloc was not called for all the data"); + } + + pass(); +} + +int main() { + init("hashtable"); + testresize(); + testadd(); + testget(); + testiterate(); + testclear(); + pass(); +} + +