summaryrefslogtreecommitdiff
path: root/src/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/Makefile3
-rw-r--r--src/utils/character_valid.c217
-rw-r--r--src/utils/character_valid.h54
-rw-r--r--src/utils/hashtable.c492
-rw-r--r--src/utils/hashtable.h42
-rw-r--r--src/utils/list.h61
-rw-r--r--src/utils/namespace.c71
-rw-r--r--src/utils/namespace.h7
-rw-r--r--src/utils/resource_mgr.c105
-rw-r--r--src/utils/resource_mgr.h45
-rw-r--r--src/utils/validate.c177
-rw-r--r--src/utils/validate.h26
12 files changed, 1291 insertions, 9 deletions
diff --git a/src/utils/Makefile b/src/utils/Makefile
index c80f261..428a9cf 100644
--- a/src/utils/Makefile
+++ b/src/utils/Makefile
@@ -1,4 +1,5 @@
# Sources
-DIR_SOURCES := namespace.c
+DIR_SOURCES := namespace.c hashtable.c resource_mgr.c character_valid.c \
+ validate.c
include build/makefiles/Makefile.subdir
diff --git a/src/utils/character_valid.c b/src/utils/character_valid.c
new file mode 100644
index 0000000..2251075
--- /dev/null
+++ b/src/utils/character_valid.c
@@ -0,0 +1,217 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ */
+
+#include "utils/character_valid.h"
+
+#include <assert.h>
+
+static const struct xml_char_range base_char_range[] = { {0x41, 0x5a},
+ {0x61, 0x7a}, {0xc0, 0xd6}, {0xd8, 0xf6}, {0x00f8, 0x00ff},
+ {0x100, 0x131}, {0x134, 0x13e}, {0x141, 0x148}, {0x14a, 0x17e},
+ {0x180, 0x1c3}, {0x1cd, 0x1f0}, {0x1f4, 0x1f5}, {0x1fa, 0x217},
+ {0x250, 0x2a8}, {0x2bb, 0x2c1}, {0x386, 0x386}, {0x388, 0x38a},
+ {0x38c, 0x38c}, {0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3d6},
+ {0x3da, 0x3da}, {0x3dc, 0x3dc}, {0x3de, 0x3de}, {0x3e0, 0x3e0},
+ {0x3e2, 0x3f3}, {0x401, 0x40c}, {0x40e, 0x44f}, {0x451, 0x45c},
+ {0x45e, 0x481}, {0x490, 0x4c4}, {0x4c7, 0x4c8}, {0x4cb, 0x4cc},
+ {0x4d0, 0x4eb}, {0x4ee, 0x4f5}, {0x4f8, 0x4f9}, {0x531, 0x556},
+ {0x559, 0x559}, {0x561, 0x586}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2},
+ {0x621, 0x63a}, {0x641, 0x64a}, {0x671, 0x6b7}, {0x6ba, 0x6be},
+ {0x6c0, 0x6ce}, {0x6d0, 0x6d3}, {0x6d5, 0x6d5}, {0x6e5, 0x6e6},
+ {0x905, 0x939}, {0x93d, 0x93d}, {0x958, 0x961}, {0x985, 0x98c},
+ {0x98f, 0x990}, {0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2},
+ {0x9b6, 0x9b9}, {0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1},
+ {0xa05, 0xa0a}, {0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30},
+ {0xa32, 0xa33}, {0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c},
+ {0xa5e, 0xa5e}, {0xa72, 0xa74}, {0xa85, 0xa8b}, {0xa8d, 0xa8d},
+ {0xa8f, 0xa91}, {0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3},
+ {0xab5, 0xab9}, {0xabd, 0xabd}, {0xae0, 0xae0}, {0xb05, 0xb0c},
+ {0xb0f, 0xb10}, {0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33},
+ {0xb36, 0xb39}, {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61},
+ {0xb85, 0xb8a}, {0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a},
+ {0xb9c, 0xb9c}, {0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa},
+ {0xbae, 0xbb5}, {0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10},
+ {0xc12, 0xc28}, {0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61},
+ {0xc85, 0xc8c}, {0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3},
+ {0xcb5, 0xcb9}, {0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c},
+ {0xd0e, 0xd10}, {0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61},
+ {0xe01, 0xe2e}, {0xe30, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45},
+ {0xe81, 0xe82}, {0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a},
+ {0xe8d, 0xe8d}, {0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3},
+ {0xea5, 0xea5}, {0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeae},
+ {0xeb0, 0xeb0}, {0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4},
+ {0xf40, 0xf47}, {0xf49, 0xf69}, {0x10a0, 0x10c5}, {0x10d0, 0x10f6},
+ {0x1100, 0x1100}, {0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109},
+ {0x110b, 0x110c}, {0x110e, 0x1112}, {0x113c, 0x113c}, {0x113e, 0x113e},
+ {0x1140, 0x1140}, {0x114c, 0x114c}, {0x114e, 0x114e}, {0x1150, 0x1150},
+ {0x1154, 0x1155}, {0x1159, 0x1159}, {0x115f, 0x1161}, {0x1163, 0x1163},
+ {0x1165, 0x1165}, {0x1167, 0x1167}, {0x1169, 0x1169}, {0x116d, 0x116e},
+ {0x1172, 0x1173}, {0x1175, 0x1175}, {0x119e, 0x119e}, {0x11a8, 0x11a8},
+ {0x11ab, 0x11ab}, {0x11ae, 0x11af}, {0x11b7, 0x11b8}, {0x11ba, 0x11ba},
+ {0x11bc, 0x11c2}, {0x11eb, 0x11eb}, {0x11f0, 0x11f0}, {0x11f9, 0x11f9},
+ {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d},
+ {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f59, 0x1f59},
+ {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
+ {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc},
+ {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4},
+ {0x1ff6, 0x1ffc}, {0x2126, 0x2126}, {0x212a, 0x212b}, {0x212e, 0x212e},
+ {0x2180, 0x2182}, {0x3041, 0x3094}, {0x30a1, 0x30fa}, {0x3105, 0x312c},
+ {0xac00, 0xd7a3}
+};
+
+const struct xml_char_group base_char_group = {
+ sizeof(base_char_range) / sizeof(base_char_range[0]),
+ base_char_range};
+
+static const struct xml_char_range char_range[] = { {0x100, 0xd7ff},
+ {0xe000, 0xfffd}, {0x10000, 0x10ffff}
+};
+
+const struct xml_char_group char_group = {
+ sizeof(char_range) / sizeof(char_range[0]), char_range};
+
+static const struct xml_char_range combining_char_range[] = { {0x300, 0x345},
+ {0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9},
+ {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4},
+ {0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df},
+ {0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903},
+ {0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954},
+ {0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be},
+ {0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd},
+ {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c},
+ {0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48},
+ {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc},
+ {0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03},
+ {0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d},
+ {0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8},
+ {0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44},
+ {0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83},
+ {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6},
+ {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d},
+ {0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e},
+ {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd},
+ {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39},
+ {0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b},
+ {0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7},
+ {0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f},
+ {0x3099, 0x3099}, {0x309a, 0x309a}
+};
+
+const struct xml_char_group combining_char_group = {
+ sizeof(combining_char_range) / sizeof(combining_char_range[0]),
+ combining_char_range };
+
+static const struct xml_char_range digit_char_range[] = { {0x30, 0x39},
+ {0x660, 0x669}, {0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef},
+ {0xa66, 0xa6f}, {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef},
+ {0xc66, 0xc6f}, {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59},
+ {0xed0, 0xed9}, {0xf20, 0xf29}
+};
+
+const struct xml_char_group digit_char_group = {
+ sizeof(digit_char_range) / sizeof(digit_char_range[0]),
+ digit_char_range };
+
+static const struct xml_char_range extender_range[] = { {0xb7, 0xb7},
+ {0x2d0, 0x2d0}, {0x2d1, 0x2d1}, {0x387, 0x387}, {0x640, 0x640},
+ {0xe46, 0xe46}, {0xec6, 0xec6}, {0x3005, 0x3005}, {0x3031, 0x3035},
+ {0x309d, 0x309e}, {0x30fc, 0x30fe}
+};
+
+const struct xml_char_group extender_group = {
+ sizeof(extender_range) / sizeof(extender_range[0]),
+ extender_range };
+
+static const struct xml_char_range ideographic_range[] = { {0x3007, 0x3007},
+ {0x3021, 0x3029}, {0x4e00, 0x9fa5}
+};
+
+const struct xml_char_group ideographic_group = {
+ sizeof(ideographic_range) / sizeof(ideographic_range[0]),
+ ideographic_range };
+
+/* The binary search helper function */
+static bool binary_search(unsigned int ch, int left, int right,
+ const struct xml_char_range *range);
+
+/* Search for ch in range[left, right] */
+bool binary_search(unsigned int ch, int left, int right,
+ const struct xml_char_range *range)
+{
+ if (left > right)
+ return false;
+
+ int mid = (left + right) / 2;
+ if (ch >= range[mid].start && ch <= range[mid].end)
+ return true;
+
+ if (ch < range[mid].start)
+ return binary_search(ch, left, mid - 1, range);
+
+ if (ch > range[mid].end)
+ return binary_search(ch, mid + 1, right, range);
+
+ return false;
+}
+
+/**
+ * Test whether certain character belongs to some XML character group
+ *
+ * \param ch The character being tested
+ * \param group The character group
+ * \return true if the character belongs to the group, false otherwise.
+ *
+ * Generally, we use an algorithm like binary search to find the desired
+ * character in the group. The time complexity is about lg(n) and here n is
+ * at most 180, so, I think the algorithm is fast enough for name validation.
+ */
+bool _dom_is_character_in_group(unsigned int ch,
+ const struct xml_char_group *group)
+{
+ int len = group->len;
+ const struct xml_char_range *range = group->range;
+
+ if (ch < range[0].start || ch > range[len-1].end)
+ return false;
+
+ return binary_search(ch, 0, len - 1, range);
+}
+
+#ifdef CHVALID_DEBUG
+/* The following is the testcases for this file.
+ * Compile this file :
+ *
+ * gcc -o test -DCHVALID_DEBUG character_valid.c
+ *
+ */
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+ unsigned int ch = 0x666;
+
+ assert(is_digit(ch) == true);
+ assert(is_base_char(ch) == false);
+ assert(is_char(ch) == true);
+ assert(is_extender(ch) == false);
+ assert(is_combining_char(ch) == false);
+ assert(is_ideographic(ch) == false);
+
+ ch = 0xf40;
+
+ assert(is_digit(ch) == false);
+ assert(is_base_char(ch) == true);
+ assert(is_char(ch) == true);
+ assert(is_extender(ch) == false);
+ assert(is_combining_char(ch) == false);
+ assert(is_ideographic(ch) == false);
+
+ printf("The test pass.\n");
+ return 0;
+}
+
+#endif
diff --git a/src/utils/character_valid.h b/src/utils/character_valid.h
new file mode 100644
index 0000000..5094e7c
--- /dev/null
+++ b/src/utils/character_valid.h
@@ -0,0 +1,54 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ *
+ * This file contains the API used to validate whether certain character in
+ * name/value is legal according the XML 1.0 standard. See
+ *
+ * http://www.w3.org/TR/2004/REC-xml-20040204/
+ * http://www.w3.org/TR/REC-xml/
+ *
+ * for detail.
+ */
+
+#ifndef dom_utils_character_valid_h_
+#define dom_utils_character_valid_h_
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+struct xml_char_range {
+ unsigned int start;
+ unsigned int end;
+};
+
+struct xml_char_group {
+ size_t len;
+ const struct xml_char_range *range;
+};
+
+/* The groups */
+extern const struct xml_char_group base_char_group;
+extern const struct xml_char_group char_group;
+extern const struct xml_char_group combining_char_group;
+extern const struct xml_char_group digit_char_group;
+extern const struct xml_char_group extender_group;
+extern const struct xml_char_group ideographic_group;
+
+bool _dom_is_character_in_group(unsigned int ch,
+ const struct xml_char_group *group);
+
+#define is_base_char(ch) _dom_is_character_in_group((ch), &base_char_group)
+#define is_char(ch) _dom_is_character_in_group((ch), &char_group)
+#define is_combining_char(ch) _dom_is_character_in_group((ch), \
+ &combining_char_group)
+#define is_digit(ch) _dom_is_character_in_group((ch), &digit_char_group)
+#define is_extender(ch) _dom_is_character_in_group((ch), &extender_group)
+#define is_ideographic(ch) _dom_is_character_in_group((ch), &ideographic_group)
+
+#define is_letter(ch) (is_base_char(ch) || is_ideographic(ch))
+
+#endif
+
diff --git a/src/utils/hashtable.c b/src/utils/hashtable.c
new file mode 100644
index 0000000..c2ff8ce
--- /dev/null
+++ b/src/utils/hashtable.c
@@ -0,0 +1,492 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2006 Rob Kendrick <rjek@rjek.com>
+ * Copyright 2006 Richard Wilson <info@tinct.net>
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+#ifdef TEST_RIG
+#include <stdio.h>
+#endif
+#include "utils/hashtable.h"
+
+/* The hash table entry */
+struct _dom_hash_entry {
+ void *key; /**< The key pointer */
+ void *value; /**< The value pointer */
+ struct _dom_hash_entry *next; /**< Next entry */
+};
+
+/* The hash table */
+struct dom_hash_table {
+ unsigned int nchains; /**< The chains number */
+ dom_hash_func hash; /**< The hash function */
+ struct _dom_hash_entry **chain; /**< The chain head */
+ unsigned int number; /**< The enries in this table */
+
+ dom_alloc alloc; /**< Memory allocation function */
+ void *ptr; /**< The private data for the memory allocator */
+};
+
+
+/**
+ * Create a new hash table, and return a context for it. The memory consumption
+ * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty.
+ *
+ * \param chains Number of chains/buckets this hash table will have. This
+ * should be a prime number, and ideally a prime number just
+ * over a power of two, for best performance and distribution
+ * \param hash The hash function
+ * \param alloc The memory allocator
+ * \param ptr The private pointer for the allocator
+ * \return struct dom_hash_table containing the context of this hash table or
+ * NULL if there is insufficent memory to create it and its chains.
+ */
+struct dom_hash_table *_dom_hash_create(unsigned int chains, dom_hash_func hash,
+ dom_alloc alloc, void *ptr)
+{
+ struct dom_hash_table *r = alloc(NULL, sizeof(struct dom_hash_table),
+ ptr);
+
+ if (r == NULL) {
+ return NULL;
+ }
+
+ r->nchains = chains;
+ r->hash = hash;
+ r->alloc = alloc;
+ r->ptr = ptr;
+ r->chain = (struct _dom_hash_entry **)alloc(NULL,
+ chains*sizeof(struct _dom_hash_entry *), ptr);
+ r->number = 0;
+
+ unsigned int i;
+ for (i = 0; i < chains; i++)
+ r->chain[i] = NULL;
+
+ if (r->chain == NULL) {
+ alloc(r, 0, ptr);
+ return NULL;
+ }
+
+ return r;
+}
+
+/**
+ * Clone a hash table.
+ *
+ * \param ht Hash table to clone.
+ * \param alloc The allocator.
+ * \param pw The private data for the allocator.
+ * \param kf The function pointer used to copy the key.
+ * \param key_pw The private data for the key cloner.
+ * \param vf The function pointer used to copy the value.
+ * \param value_pw The private data for the value cloner.
+ *
+ * \return The cloned hash table.
+ */
+struct dom_hash_table *_dom_hash_clone(struct dom_hash_table *ht,
+ dom_alloc alloc, void *pw, dom_key_func kf, void *key_pw,
+ dom_value_func vf, void *value_pw)
+{
+ struct dom_hash_table *ret;
+
+ ret = _dom_hash_create(ht->nchains, ht->hash, alloc, pw);
+ if (ret == NULL)
+ return NULL;
+
+ void *key = NULL, *nkey = NULL;
+ void *value = NULL, *nvalue = NULL;
+ unsigned int c1, *c2 = NULL;
+ while ( (key = _dom_hash_iterate(ht, &c1, &c2)) != NULL) {
+ nkey = kf(key, key_pw, alloc, pw, true);
+ if (nkey == NULL) {
+ _dom_hash_destroy(ret, kf, key_pw, vf, value_pw);
+ return NULL;
+ }
+
+ value = _dom_hash_get(ht, key);
+ nvalue = vf(value, value_pw, alloc, pw, true);
+ if (nvalue == NULL) {
+ kf(nkey, key_pw, alloc, pw, false);
+ _dom_hash_destroy(ret, kf, key_pw, vf, value_pw);
+ return NULL;
+ }
+
+ if (_dom_hash_add(ret, nkey, nvalue, false) == false) {
+ _dom_hash_destroy(ret, kf, key_pw, vf, value_pw);
+ return NULL;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * Destroys a hash table, freeing all memory associated with it.
+ *
+ * \param ht Hash table to destroy. After the function returns, this
+ * will nolonger be valid
+ * \param kf The key destroy function
+ * \param key_pw The key destroy function private data
+ * \param vf The value destroy function
+ * \param value_pw The value destroy function private data
+ */
+void _dom_hash_destroy(struct dom_hash_table *ht, dom_key_func kf,
+ void *key_pw, dom_value_func vf, void *value_pw)
+{
+ unsigned int i;
+
+ if (ht == NULL)
+ return;
+
+ assert(ht->alloc != NULL);
+
+ for (i = 0; i < ht->nchains; i++) {
+ if (ht->chain[i] != NULL) {
+ struct _dom_hash_entry *e = ht->chain[i];
+ while (e) {
+ struct _dom_hash_entry *n = e->next;
+ if (kf != NULL) {
+ kf(e->key, key_pw, ht->alloc,
+ ht->ptr, false);
+ }
+ if (vf != NULL) {
+ vf(e->value, value_pw, ht->alloc,
+ ht->ptr, false);
+ }
+ ht->alloc(e, 0, ht->ptr);
+ e = n;
+ }
+ }
+ }
+
+ ht->alloc(ht->chain, 0, ht->ptr);
+ ht->alloc(ht, 0, ht->ptr);
+}
+
+/**
+ * Adds a key/value pair to a hash table
+ *
+ * \param ht The hash table context to add the key/value pair to.
+ * \param key The key to associate the value with.
+ * \param value The value to associate the key with.
+ * \return true if the add succeeded, false otherwise. (Failure most likely
+ * indicates insufficent memory to make copies of the key and value.
+ */
+bool _dom_hash_add(struct dom_hash_table *ht, void *key, void *value,
+ bool replace)
+{
+ unsigned int h, c;
+ struct _dom_hash_entry *e;
+
+ if (ht == NULL || key == NULL || value == NULL)
+ return false;
+
+ h = ht->hash(key);
+ c = h % ht->nchains;
+
+ for (e = ht->chain[c]; e; e = e->next)
+ if (key == e->key) {
+ if (replace == true) {
+ e->value = value;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ assert(ht->alloc != NULL);
+
+ e = ht->alloc(NULL, sizeof(struct _dom_hash_entry), ht->ptr);
+ if (e == NULL) {
+ return false;
+ }
+
+ e->key = key;
+ e->value = value;
+
+ e->next = ht->chain[c];
+ ht->chain[c] = e;
+ ht->number ++;
+
+ return true;
+}
+
+/**
+ * Looks up a the value associated with with a key from a specific hash table.
+ *
+ * \param ht The hash table context to look up
+ * \param key The key to search for
+ * \return The value associated with the key, or NULL if it was not found.
+ */
+void *_dom_hash_get(struct dom_hash_table *ht, void *key)
+{
+ unsigned int h, c;
+ struct _dom_hash_entry *e;
+
+ if (ht == NULL || key == NULL)
+ return NULL;
+
+ h = ht->hash(key);
+ c = h % ht->nchains;
+
+ for (e = ht->chain[c]; e; e = e->next)
+ if (key == e->key)
+ return e->value;
+
+ return NULL;
+}
+
+/**
+ * Delete the key from the hashtable.
+ *
+ * \param ht The hashtable object
+ * \param key The key to delete
+ * \return The deleted value
+ */
+void *_dom_hash_del(struct dom_hash_table *ht, void *key)
+{
+ unsigned int h, c;
+ struct _dom_hash_entry *e, *p;
+ void *ret;
+
+ if (ht == NULL || key == NULL)
+ return NULL;
+
+ h = ht->hash(key);
+ c = h % ht->nchains;
+
+ assert(ht->alloc != NULL);
+
+ p = ht->chain[c];
+ for (e = p; e; p = e, e = e->next)
+ if (key == e->key) {
+ if (p != e) {
+ p->next = e->next;
+ } else {
+ /* The first item in this chain is target*/
+ ht->chain[c] = e->next;
+ }
+
+ ret = e->value;
+ ht->alloc(e, 0, ht->ptr);
+ ht->number --;
+ return ret;
+ }
+
+ return NULL;
+}
+
+/**
+ * Iterate through all available hash keys.
+ *
+ * \param ht The hash table context to iterate.
+ * \param c1 Pointer to first context
+ * \param c2 Pointer to second context (set to 0 on first call)
+ * \return The next hash key, or NULL for no more keys
+ */
+void *_dom_hash_iterate(struct dom_hash_table *ht, unsigned int *c1,
+ unsigned int **c2)
+{
+ struct _dom_hash_entry **he = (struct _dom_hash_entry **)c2;
+
+ if (ht == NULL)
+ return NULL;
+
+ if (!*he)
+ *c1 = -1;
+ else
+ *he = (*he)->next;
+
+ if (*he)
+ return (*he)->key;
+
+ while (!*he) {
+ (*c1)++;
+ if (*c1 >= ht->nchains)
+ return NULL;
+ *he = ht->chain[*c1];
+ }
+ return (*he)->key;
+}
+
+/**
+ * Get the number of elements in this hash table
+ *
+ * \param ht The hash table
+ *
+ * \return the number of elements
+ */
+unsigned int _dom_hash_get_length(struct dom_hash_table *ht)
+{
+ return ht->number;
+}
+
+/**
+ * Get the chain number of this hash table
+ *
+ * \param ht The hash table
+ *
+ * \return the number of chains
+ */
+unsigned int _dom_hash_get_chains(struct dom_hash_table *ht)
+{
+ return ht->nchains;
+}
+
+/**
+ * Get the hash function of this hash table
+ *
+ * \param ht The hash table
+ *
+ * \return the hash function
+ */
+dom_hash_func _dom_hash_get_func(struct dom_hash_table *ht)
+{
+ return ht->hash;
+}
+
+/* A simple test rig. To compile, use:
+ * gcc -g -o hashtest -I../ -I../../include -DTEST_RIG hashtable.c
+ *
+ * If you make changes to this hash table implementation, please rerun this
+ * test, and if possible, through valgrind to make sure there are no memory
+ * leaks or invalid memory accesses. If you add new functionality, please
+ * include a test for it that has good coverage along side the other tests.
+ */
+
+#ifdef TEST_RIG
+
+
+/**
+ * Hash a pointer, returning a 32bit value.
+ *
+ * \param ptr The pointer to hash.
+ * \return the calculated hash value for the pointer.
+ */
+
+static inline unsigned int _dom_hash_pointer_fnv(void *ptr)
+{
+ return (unsigned int) ptr;
+}
+
+static void *test_alloc(void *p, size_t size, void *ptr)
+{
+ if (p != NULL) {
+ free(p);
+ return NULL;
+ }
+
+ if (p == NULL) {
+ return malloc(size);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct dom_hash_table *a, *b;
+ FILE *dict;
+ char keybuf[BUFSIZ], valbuf[BUFSIZ];
+ int i;
+ char *cow="cow", *moo="moo", *pig="pig", *oink="oink",
+ *chicken="chikcken", *cluck="cluck",
+ *dog="dog", *woof="woof", *cat="cat",
+ *meow="meow";
+ void *ret;
+
+ a = _dom_hash_create(79, _dom_hash_pointer_fnv, test_alloc, NULL);
+ assert(a != NULL);
+
+ b = _dom_hash_create(103, _dom_hash_pointer_fnv, test_alloc, NULL);
+ assert(b != NULL);
+
+ _dom_hash_add(a, cow, moo ,true);
+ _dom_hash_add(b, moo, cow ,true);
+
+ _dom_hash_add(a, pig, oink ,true);
+ _dom_hash_add(b, oink, pig ,true);
+
+ _dom_hash_add(a, chicken, cluck ,true);
+ _dom_hash_add(b, cluck, chicken ,true);
+
+ _dom_hash_add(a, dog, woof ,true);
+ _dom_hash_add(b, woof, dog ,true);
+
+ _dom_hash_add(a, cat, meow ,true);
+ _dom_hash_add(b, meow, cat ,true);
+
+#define MATCH(x,y) assert(!strcmp((char *)hash_get(a, x), (char *)y)); \
+ assert(!strcmp((char *)hash_get(b, y), (char *)x))
+ MATCH(cow, moo);
+ MATCH(pig, oink);
+ MATCH(chicken, cluck);
+ MATCH(dog, woof);
+ MATCH(cat, meow);
+
+ assert(hash_get_length(a) == 5);
+ assert(hash_get_length(b) == 5);
+
+ _dom_hash_del(a, cat);
+ _dom_hash_del(b, meow);
+ assert(hash_get(a, cat) == NULL);
+ assert(hash_get(b, meow) == NULL);
+
+ assert(hash_get_length(a) == 4);
+ assert(hash_get_length(b) == 4);
+
+ _dom_hash_destroy(a, NULL, NULL);
+ _dom_hash_destroy(b, NULL, NULL);
+
+ /* This test requires /usr/share/dict/words - a large list of English
+ * words. We load the entire file - odd lines are used as keys, and
+ * even lines are used as the values for the previous line. we then
+ * work through it again making sure everything matches.
+ *
+ * We do this twice - once in a hash table with many chains, and once
+ * with a hash table with fewer chains.
+ */
+
+ a = _dom_hash_create(1031, _dom_hash_pointer_fnv, test_alloc, NULL);
+ b = _dom_hash_create(7919, _dom_hash_pointer_fnv, test_alloc, NULL);
+
+ dict = fopen("/usr/share/dict/words", "r");
+ if (dict == NULL) {
+ fprintf(stderr, "Unable to open /usr/share/dict/words - \
+ extensive testing skipped.\n");
+ exit(0);
+ }
+
+ while (!feof(dict)) {
+ fscanf(dict, "%s", keybuf);
+ fscanf(dict, "%s", valbuf);
+ _dom_hash_add(a, keybuf, valbuf, true);
+ _dom_hash_add(b, keybuf, valbuf, true);
+ }
+
+ for (i = 0; i < 5; i++) {
+ fseek(dict, 0, SEEK_SET);
+
+ while (!feof(dict)) {
+ fscanf(dict, "%s", keybuf);
+ fscanf(dict, "%s", valbuf);
+ assert(strcmp(hash_get(a, keybuf), valbuf) == 0);
+ assert(strcmp(hash_get(b, keybuf), valbuf) == 0);
+ }
+ }
+
+ _dom_hash_destroy(a, NULL, NULL);
+ _dom_hash_destroy(b, NULL, NULL);
+
+ fclose(dict);
+
+ return 0;
+}
+
+#endif
diff --git a/src/utils/hashtable.h b/src/utils/hashtable.h
new file mode 100644
index 0000000..3cfe95d
--- /dev/null
+++ b/src/utils/hashtable.h
@@ -0,0 +1,42 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2006 Rob Kendrick <rjek@rjek.com>
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ */
+
+#ifndef dom_utils_hashtable_h_
+#define dom_utils_hashtable_h_
+
+#include <stdbool.h>
+#include <dom/functypes.h>
+
+typedef struct dom_hash_table dom_hash_table;
+/* The hash function */
+typedef unsigned int (*dom_hash_func)(void *key);
+/* Function to clone/delete key */
+typedef void *(*dom_key_func)(void *key, void *pw, dom_alloc alloc,
+ void *alloc_pw, bool clone);
+/* Function to clone/delete value */
+typedef void *(*dom_value_func)(void *value, void *pw, dom_alloc alloc,
+ void *alloc_pw, bool clone);
+
+struct dom_hash_table *_dom_hash_create(unsigned int chains, dom_hash_func hash,
+ dom_alloc alloc, void *ptr);
+struct dom_hash_table *_dom_hash_clone(struct dom_hash_table *ht,
+ dom_alloc alloc, void *pw, dom_key_func kf, void *key_pw,
+ dom_value_func vf, void *value_pw);
+void _dom_hash_destroy(struct dom_hash_table *ht, dom_key_func kf, void *key_pw,
+ dom_value_func vf, void *value_pw);
+bool _dom_hash_add(struct dom_hash_table *ht, void *key, void *value,
+ bool replace);
+void *_dom_hash_get(struct dom_hash_table *ht, void *key);
+void *_dom_hash_del(struct dom_hash_table *ht, void *key);
+void *_dom_hash_iterate(struct dom_hash_table *ht, unsigned int *c1,
+ unsigned int **c2);
+unsigned int _dom_hash_get_length(struct dom_hash_table *ht);
+unsigned int _dom_hash_get_chains(struct dom_hash_table *ht);
+dom_hash_func _dom_hash_get_func(struct dom_hash_table *ht);
+
+#endif
diff --git a/src/utils/list.h b/src/utils/list.h
new file mode 100644
index 0000000..6e3ba20
--- /dev/null
+++ b/src/utils/list.h
@@ -0,0 +1,61 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ *
+ * This file contains the list structure used to compose lists.
+ *
+ * Note: This is a implementation of a doubld-linked cyclar list.
+ */
+
+#ifndef dom_utils_list_h_
+#define dom_utils_list_h_
+
+#include <stddef.h>
+
+struct list_entry {
+ struct list_entry *prev;
+ struct list_entry *next;
+};
+
+/**
+ * Initialise a list_entry structure
+ *
+ * \param ent The entry to initialise
+ */
+static inline void list_init(struct list_entry *ent)
+{
+ ent->prev = ent;
+ ent->next = ent;
+}
+
+/**
+ * Append a new list_entry after the list
+ *
+ * \param head The list header
+ * \param new The new entry
+ */
+static inline void list_append(struct list_entry *head, struct list_entry *new)
+{
+ new->next = head;
+ new->prev = head->prev;
+ head->prev->next = new;
+ head->prev = new;
+}
+
+/**
+ * Delete a list_entry from the list
+ *
+ * \param entry The entry need to be deleted from the list
+ */
+static inline void list_del(struct list_entry *ent)
+{
+ ent->prev->next = ent->next;
+ ent->next->prev = ent->prev;
+
+ ent->prev = ent;
+ ent->next = ent;
+}
+
+#endif
diff --git a/src/utils/namespace.c b/src/utils/namespace.c
index ca5b01d..8d109ae 100644
--- a/src/utils/namespace.c
+++ b/src/utils/namespace.c
@@ -1,8 +1,9 @@
/*
* This file is part of libdom.
* Licensed under the MIT License,
- * http://www.opensource.org/licenses/mit-license.php
+ * http://www.opensource.org/licenses/mit-license.php
* Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
*/
#include <string.h>
@@ -10,6 +11,7 @@
#include <dom/dom.h>
#include "utils/namespace.h"
+#include "utils/validate.h"
#include "utils/utils.h"
@@ -18,7 +20,7 @@ static struct dom_string *xml;
/** XMLNS prefix */
static struct dom_string *xmlns;
-/** The namespace strings */
+/* The namespace strings */
static const char *namespaces[DOM_NAMESPACE_COUNT] = {
NULL,
"http://www.w3.org/1999/xhtml",
@@ -37,7 +39,7 @@ struct dom_string *dom_namespaces[DOM_NAMESPACE_COUNT] = {
* Initialise the namespace component
*
* \param alloc Pointer to memory (de)allocation function
- * \param pw Pointer to client-specific private data
+ * \param pw Pointer to client-specific private data
* \return DOM_NO_ERR on success.
*/
dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
@@ -130,10 +132,19 @@ dom_exception _dom_namespace_finalise(void)
dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
struct dom_string *namespace)
{
- uint32_t colon;
+ uint32_t colon, len;
+
+ if (qname == NULL){
+ if (namespace != NULL)
+ return DOM_NAMESPACE_ERR;
+ if (namespace == NULL)
+ return DOM_NO_ERR;
+ }
+
+ if (_dom_validate_name(qname) == false)
+ return DOM_NAMESPACE_ERR;
- /** \todo search qname for invalid characters */
- /** \todo ensure qname is not malformed */
+ len = dom_string_length(qname);
/* Find colon */
colon = dom_string_index(qname, ':');
@@ -147,9 +158,14 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
dom_string_cmp(qname, xmlns) != 0) {
return DOM_NAMESPACE_ERR;
}
+ } else if (colon == 0) {
+ /* Some name like ":name" */
+ if (namespace != NULL)
+ return DOM_NAMESPACE_ERR;
} else {
/* Prefix */
struct dom_string *prefix;
+ struct dom_string *lname;
dom_exception err;
/* Ensure there is a namespace URI */
@@ -157,11 +173,21 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
return DOM_NAMESPACE_ERR;
}
- err = dom_string_substr(qname, 0, colon - 1, &prefix);
+ err = dom_string_substr(qname, 0, colon, &prefix);
if (err != DOM_NO_ERR) {
return err;
}
+ err = dom_string_substr(qname, colon + 1, len, &lname);
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
+
+ if (_dom_validate_ncname(prefix) == false ||
+ _dom_validate_ncname(lname) == false) {
+ return DOM_NAMESPACE_ERR;
+ }
+
/* Test for invalid XML namespace */
if (dom_string_cmp(prefix, xml) == 0 &&
dom_string_cmp(namespace,
@@ -223,7 +249,7 @@ dom_exception _dom_namespace_split_qname(struct dom_string *qname,
}
} else {
/* Found one => prefix */
- err = dom_string_substr(qname, 0, colon - 1, prefix);
+ err = dom_string_substr(qname, 0, colon, prefix);
if (err != DOM_NO_ERR) {
return err;
}
@@ -240,3 +266,32 @@ dom_exception _dom_namespace_split_qname(struct dom_string *qname,
return DOM_NO_ERR;
}
+/**
+ * Get the XML prefix dom_string
+ *
+ * \return the xml prefix dom_string.
+ *
+ * Note: The client of this function may or may not call the dom_string_ref
+ * on the returned dom_string, because this string will only be destroyed when
+ * the dom_finalise is called. But if the client call dom_string_ref, it must
+ * call dom_string_unref to maintain a correct ref count of the dom_string.
+ */
+dom_string *_dom_namespace_get_xml_prefix(void)
+{
+ return xml;
+}
+
+/**
+ * Get the XMLNS prefix dom_string.
+ *
+ * \return the xmlns prefix dom_string
+ *
+ * Note: The client of this function may or may not call the dom_string_ref
+ * on the returned dom_string, because this string will only be destroyed when
+ * the dom_finalise is called. But if the client call dom_string_ref, it must
+ * call dom_string_unref to maintain a correct ref count of the dom_string.
+ */
+dom_string *_dom_namespace_get_xmlns_prefix(void)
+{
+ return xmlns;
+}
diff --git a/src/utils/namespace.h b/src/utils/namespace.h
index ec69035..900c9ee 100644
--- a/src/utils/namespace.h
+++ b/src/utils/namespace.h
@@ -14,6 +14,7 @@
struct dom_document;
struct dom_string;
+
/* Initialise the namespace component */
dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw);
@@ -28,5 +29,11 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
dom_exception _dom_namespace_split_qname(struct dom_string *qname,
struct dom_string **prefix, struct dom_string **localname);
+/* Get the XML prefix dom_string */
+struct dom_string *_dom_namespace_get_xml_prefix(void);
+
+/* Get the XMLNS prefix dom_string */
+struct dom_string *_dom_namespace_get_xmlns_prefix(void);
+
#endif
diff --git a/src/utils/resource_mgr.c b/src/utils/resource_mgr.c
new file mode 100644
index 0000000..c9c86d3
--- /dev/null
+++ b/src/utils/resource_mgr.c
@@ -0,0 +1,105 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ */
+
+#include "resource_mgr.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include <libwapcaplet/libwapcaplet.h>
+#include "core/string.h"
+
+/**
+ * Allocate some memory with this allocator
+ *
+ * \param res The resource manager
+ * \param size The size of memory to allocate
+ * \return the allocated memory pointer.
+ */
+void *_dom_resource_mgr_alloc(struct dom_resource_mgr *res, void *ptr,
+ size_t size)
+{
+ return res->alloc(ptr, size, res->pw);
+}
+
+/**
+ * Create a dom_string using this resource manager
+ *
+ * \param res The resource manager
+ * \param data The data pointer
+ * \param len The length of data
+ * \param result The returned dom_string
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception _dom_resource_mgr_create_string(struct dom_resource_mgr *res,
+ const uint8_t *data, size_t len, struct dom_string **result)
+{
+ return dom_string_create(res->alloc, res->pw, data, len, result);
+}
+
+/**
+ * Create a lwc_string using this resource manager
+ *
+ * \param res The resource manager
+ * \param data The data pointer
+ * \param len The length of the data
+ * \param result The returned lwc_string
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception _dom_resource_mgr_create_lwcstring(struct dom_resource_mgr *res,
+ const uint8_t *data, size_t len, struct lwc_string_s **result)
+{
+ lwc_error lerr;
+
+ assert(res->ctx != NULL);
+
+ lerr = lwc_context_intern(res->ctx, (const char *) data, len,
+ result);
+
+ return _dom_exception_from_lwc_error(lerr);
+}
+
+/**
+ * Create a dom_string from a lwc_string using this resource manager
+ *
+ * \param res The resource manager
+ * \param str The dom_string to intern
+ * \param result The returned lwc_string
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception _dom_resource_mgr_create_string_from_lwcstring(
+ struct dom_resource_mgr *res, struct lwc_string_s *str,
+ struct dom_string **result)
+{
+ assert(res->ctx != NULL);
+
+ return _dom_string_create_from_lwcstring(res->alloc, res->pw, res->ctx,
+ str, result);
+}
+
+/**
+ * Create a hash table using this resource manager
+ *
+ * \param res The resource manager
+ * \param chains The number of buckets of the hash table
+ * \param f The hash function
+ * \param ht The returned hash table
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception _dom_resource_mgr_create_hashtable(struct dom_resource_mgr *res,
+ size_t chains, dom_hash_func f, struct dom_hash_table **ht)
+{
+ struct dom_hash_table *ret;
+
+ ret = _dom_hash_create(chains, f, res->alloc, res->pw);
+ if (ret == NULL)
+ return DOM_NO_MEM_ERR;
+
+ *ht = ret;
+ return DOM_NO_ERR;
+}
+
diff --git a/src/utils/resource_mgr.h b/src/utils/resource_mgr.h
new file mode 100644
index 0000000..b58f665
--- /dev/null
+++ b/src/utils/resource_mgr.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ */
+
+#ifndef dom_utils_resource_mgr_h_
+#define dom_utils_resource_mgr_h_
+
+#include <dom/functypes.h>
+#include <dom/core/exceptions.h>
+
+#include "hashtable.h"
+
+struct lwc_context_s;
+struct lwc_string_s;
+struct dom_string;
+
+/**
+ * Resource manager
+ */
+typedef struct dom_resource_mgr {
+ dom_alloc alloc;
+ void *pw;
+ struct lwc_context_s *ctx;
+} dom_resource_mgr;
+
+void *_dom_resource_mgr_alloc(struct dom_resource_mgr *res, void *ptr,
+ size_t size);
+
+dom_exception _dom_resource_mgr_create_string(struct dom_resource_mgr *res,
+ const uint8_t *data, size_t len, struct dom_string **result);
+
+dom_exception _dom_resource_mgr_create_lwcstring(struct dom_resource_mgr *res,
+ const uint8_t *data, size_t len, struct lwc_string_s **result);
+
+dom_exception _dom_resource_mgr_create_string_from_lwcstring(
+ struct dom_resource_mgr *res, struct lwc_string_s *str,
+ struct dom_string **result);
+
+dom_exception _dom_resource_mgr_create_hashtable(struct dom_resource_mgr *res,
+ size_t chains, dom_hash_func f, struct dom_hash_table **ht);
+
+#endif
diff --git a/src/utils/validate.c b/src/utils/validate.c
new file mode 100644
index 0000000..eb6cb22
--- /dev/null
+++ b/src/utils/validate.c
@@ -0,0 +1,177 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ */
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "utils/validate.h"
+
+#include <dom/core/string.h>
+
+#include "utils/character_valid.h"
+#include "utils/namespace.h"
+#include "utils/utils.h"
+
+/* An combination of various tests */
+static bool is_first_char(uint32_t ch);
+static bool is_name_char(uint32_t ch);
+
+/* Test whether the character can be the first character of
+ * a NCName. */
+static bool is_first_char(uint32_t ch)
+{
+ /* Refer http://www.w3.org/TR/REC-xml/ for detail */
+ if (((ch >= 'a') && (ch <= 'z')) ||
+ ((ch >= 'A') && (ch <= 'Z')) ||
+ (ch == '_') || (ch == ':') ||
+ ((ch >= 0xC0) && (ch <= 0xD6)) ||
+ ((ch >= 0xD8) && (ch <= 0xF6)) ||
+ ((ch >= 0xF8) && (ch <= 0x2FF)) ||
+ ((ch >= 0x370) && (ch <= 0x37D)) ||
+ ((ch >= 0x37F) && (ch <= 0x1FFF)) ||
+ ((ch >= 0x200C) && (ch <= 0x200D)) ||
+ ((ch >= 0x2070) && (ch <= 0x218F)) ||
+ ((ch >= 0x2C00) && (ch <= 0x2FEF)) ||
+ ((ch >= 0x3001) && (ch <= 0xD7FF)) ||
+ ((ch >= 0xF900) && (ch <= 0xFDCF)) ||
+ ((ch >= 0xFDF0) && (ch <= 0xFFFD)) ||
+ ((ch >= 0x10000) && (ch <= 0xEFFFF)))
+ return true;
+
+ if (is_letter(ch) || ch == (uint32_t) '_' || ch == (uint32_t) ':') {
+ return true;
+ }
+
+ return false;
+}
+
+/* Test whether the character can be a part of a NCName */
+static bool is_name_char(uint32_t ch)
+{
+ /* Refer http://www.w3.org/TR/REC-xml/ for detail */
+ if (((ch >= 'a') && (ch <= 'z')) ||
+ ((ch >= 'A') && (ch <= 'Z')) ||
+ ((ch >= '0') && (ch <= '9')) || /* !start */
+ (ch == '_') || (ch == ':') ||
+ (ch == '-') || (ch == '.') || (ch == 0xB7) || /* !start */
+ ((ch >= 0xC0) && (ch <= 0xD6)) ||
+ ((ch >= 0xD8) && (ch <= 0xF6)) ||
+ ((ch >= 0xF8) && (ch <= 0x2FF)) ||
+ ((ch >= 0x300) && (ch <= 0x36F)) || /* !start */
+ ((ch >= 0x370) && (ch <= 0x37D)) ||
+ ((ch >= 0x37F) && (ch <= 0x1FFF)) ||
+ ((ch >= 0x200C) && (ch <= 0x200D)) ||
+ ((ch >= 0x203F) && (ch <= 0x2040)) || /* !start */
+ ((ch >= 0x2070) && (ch <= 0x218F)) ||
+ ((ch >= 0x2C00) && (ch <= 0x2FEF)) ||
+ ((ch >= 0x3001) && (ch <= 0xD7FF)) ||
+ ((ch >= 0xF900) && (ch <= 0xFDCF)) ||
+ ((ch >= 0xFDF0) && (ch <= 0xFFFD)) ||
+ ((ch >= 0x10000) && (ch <= 0xEFFFF)))
+ return true;
+
+ if (is_letter(ch) == true)
+ return true;
+ if (is_digit(ch) == true)
+ return true;
+ if (is_combining_char(ch) == true)
+ return true;
+ if (is_extender(ch) == true)
+ return true;
+
+ if (ch == (uint32_t) '.' || ch == (uint32_t) '-' ||
+ ch == (uint32_t) '_' || ch == (uint32_t) ':')
+ return true;
+
+ return false;
+}
+
+/**
+ * Test whether the name is a valid one according XML 1.0 standard.
+ * For the standard please refer:
+ *
+ * http://www.w3.org/TR/2004/REC-xml-20040204/
+ *
+ * \param name The name need to be tested
+ * \return true if ::name is valid, false otherwise.
+ */
+bool _dom_validate_name(struct dom_string *name)
+{
+ uint32_t ch, len, i;
+ dom_exception err;
+
+ if (name == NULL)
+ return false;
+
+ len = dom_string_length(name);
+ if (len == 0)
+ return false;
+
+ /* Test the first character of this string */
+ err = dom_string_at(name, 0, &ch);
+ if (err != DOM_NO_ERR)
+ return false;
+
+ if (is_first_char(ch) == false)
+ return false;
+
+ /* Test all remain characters in this string */
+ for(i = 1; i < len; i++) {
+ err = dom_string_at(name, i, &ch);
+ if (err != DOM_NO_ERR)
+ return false;
+
+ if (is_name_char(ch) != true)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Validate whether the string is a legal NCName.
+ * Refer http://www.w3.org/TR/REC-xml-names/ for detail.
+ *
+ * \param str The name to validate
+ * \return true if ::name is valid, false otherwise.
+ */
+bool _dom_validate_ncname(struct dom_string *name)
+{
+ uint32_t ch, len, i;
+ dom_exception err;
+
+ if (name == NULL)
+ return false;
+
+ len = dom_string_length(name);
+ if (len == 0)
+ return false;
+
+ /* Test the first character of this string */
+ err = dom_string_at(name, 0, &ch);
+ if (err != DOM_NO_ERR)
+ return false;
+
+ if (is_letter(ch) == false && ch != (uint32_t) '_')
+ return false;
+
+ /* Test all remain characters in this string */
+ for(i = 1; i < len; i++) {
+ err = dom_string_at(name, i, &ch);
+ if (err != DOM_NO_ERR)
+ return false;
+
+ if (is_name_char(ch) == false)
+ return false;
+
+ if (ch == (uint32_t) ':')
+ return false;
+ }
+
+ return true;
+}
+
diff --git a/src/utils/validate.h b/src/utils/validate.h
new file mode 100644
index 0000000..5d375e7
--- /dev/null
+++ b/src/utils/validate.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ *
+ * This file contains the API used to validate whether certain element's
+ * name/namespace are legal according the XML 1.0 standard. See
+ *
+ * http://www.w3.org/TR/2004/REC-xml-20040204/
+ *
+ * for detail.
+ */
+
+#ifndef dom_utils_valid_h_
+#define dom_utils_valid_h_
+
+#include <stdbool.h>
+
+struct dom_string;
+
+bool _dom_validate_name(struct dom_string *name);
+bool _dom_validate_ncname(struct dom_string *name);
+
+#endif
+