summaryrefslogtreecommitdiff
path: root/utils/bloom.h
diff options
context:
space:
mode:
authorRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2013-05-16 17:15:29 +0100
committerRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2013-05-16 17:15:29 +0100
commit7d60132816341e15a853f2f66f06d0755d0f2daa (patch)
treedb50ed8306258dcd4b242561d76e85cac4263772 /utils/bloom.h
parent023c014ac439b03de4e45dd4cf567fe3a393bbf7 (diff)
downloadnetsurf-7d60132816341e15a853f2f66f06d0755d0f2daa.tar.gz
netsurf-7d60132816341e15a853f2f66f06d0755d0f2daa.tar.bz2
Painfully trivial bloom filter implementation to experiment with optimising urldb lookups
Diffstat (limited to 'utils/bloom.h')
-rw-r--r--utils/bloom.h99
1 files changed, 99 insertions, 0 deletions
diff --git a/utils/bloom.h b/utils/bloom.h
new file mode 100644
index 000000000..4a7bd3800
--- /dev/null
+++ b/utils/bloom.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2013 Rob Kendrick <rjek@netsurf-browser.org>
+ *
+ * This file is part of NetSurf, http://www.netsurf-browser.org/
+ *
+ * NetSurf is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * NetSurf is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** \file
+ * Trivial bloom filter */
+
+#ifndef _NETSURF_UTILS_BLOOM_H_
+#define _NETSURF_UTILS_BLOOM_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct bloom_filter;
+
+/**
+ * Create a new bloom filter.
+ *
+ * \param size Size of bloom filter in bytes
+ * \return Handle for newly-created bloom filter, or NULL
+ */
+struct bloom_filter *bloom_create(size_t size);
+
+/**
+ * Destroy a previously-created bloom filter
+ *
+ * \param b Bloom filter to destroy
+ */
+void bloom_destroy(struct bloom_filter *b);
+
+/**
+ * Insert a string of given length (may include NULs) into the filter,
+ * using an internal hash function.
+ *
+ * \param b Bloom filter to add to
+ * \param s Pointer to data
+ * \param z Length of data
+ */
+void bloom_insert_str(struct bloom_filter *b, const char *s, size_t z);
+
+/**
+ * Insert a given hash value into the filter, should you already have
+ * one to hand.
+ *
+ * \param b Bloom filter to add to
+ * \param hash Value to add
+ */
+void bloom_insert_hash(struct bloom_filter *b, uint32_t hash);
+
+/**
+ * Search the filter for the given string, assuming it was added by
+ * bloom_insert_str(). May return false-positives.
+ *
+ * \param b Bloom filter to search
+ * \param s Pointer to data to search for
+ * \param z Length of data
+ *
+ * \return False if never added, True if it might have been.
+ */
+bool bloom_search_str(struct bloom_filter *b, const char *s, size_t z);
+
+/**
+ * Search the filter for the given hash value, assuming it was added by
+ * bloom_insert_hash(). May return false-positives.
+ *
+ * \param b Bloom filter to search
+ * \param hash Hash value to search for
+ *
+ * \return False if never added, True if it might have been.
+ */
+bool bloom_search_hash(struct bloom_filter *b, uint32_t hash);
+
+/**
+ * Find out how many items have been added to this bloom filter. This
+ * is useful for deciding the size of a new bloom filter should you
+ * need to rehash it.
+ *
+ * \param b Bloom filter to examine
+ *
+ * \return Number of items that have been added
+ */
+uint32_t bloom_items(struct bloom_filter *b);
+
+#endif