From 399da01ae4eb5c5e3e9349bacc2063c946c3d4a1 Mon Sep 17 00:00:00 2001 From: Bo Yang Date: Tue, 11 Aug 2009 11:17:23 +0000 Subject: Merge the branches/struggleyb/libdom-remain back to trunk. svn path=/trunk/dom/; revision=9191 --- src/utils/Makefile | 3 +- src/utils/character_valid.c | 217 +++++++++++++++++++ src/utils/character_valid.h | 54 +++++ src/utils/hashtable.c | 492 ++++++++++++++++++++++++++++++++++++++++++++ src/utils/hashtable.h | 42 ++++ src/utils/list.h | 61 ++++++ src/utils/namespace.c | 71 ++++++- src/utils/namespace.h | 7 + src/utils/resource_mgr.c | 105 ++++++++++ src/utils/resource_mgr.h | 45 ++++ src/utils/validate.c | 177 ++++++++++++++++ src/utils/validate.h | 26 +++ 12 files changed, 1291 insertions(+), 9 deletions(-) create mode 100644 src/utils/character_valid.c create mode 100644 src/utils/character_valid.h create mode 100644 src/utils/hashtable.c create mode 100644 src/utils/hashtable.h create mode 100644 src/utils/list.h create mode 100644 src/utils/resource_mgr.c create mode 100644 src/utils/resource_mgr.h create mode 100644 src/utils/validate.c create mode 100644 src/utils/validate.h (limited to 'src/utils') diff --git a/src/utils/Makefile b/src/utils/Makefile index c80f261..428a9cf 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -1,4 +1,5 @@ # Sources -DIR_SOURCES := namespace.c +DIR_SOURCES := namespace.c hashtable.c resource_mgr.c character_valid.c \ + validate.c include build/makefiles/Makefile.subdir diff --git a/src/utils/character_valid.c b/src/utils/character_valid.c new file mode 100644 index 0000000..2251075 --- /dev/null +++ b/src/utils/character_valid.c @@ -0,0 +1,217 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2009 Bo Yang + */ + +#include "utils/character_valid.h" + +#include + +static const struct xml_char_range base_char_range[] = { {0x41, 0x5a}, + {0x61, 0x7a}, {0xc0, 0xd6}, {0xd8, 0xf6}, {0x00f8, 0x00ff}, + {0x100, 0x131}, {0x134, 0x13e}, {0x141, 0x148}, {0x14a, 0x17e}, + {0x180, 0x1c3}, {0x1cd, 0x1f0}, {0x1f4, 0x1f5}, {0x1fa, 0x217}, + {0x250, 0x2a8}, {0x2bb, 0x2c1}, {0x386, 0x386}, {0x388, 0x38a}, + {0x38c, 0x38c}, {0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, + {0x3da, 0x3da}, {0x3dc, 0x3dc}, {0x3de, 0x3de}, {0x3e0, 0x3e0}, + {0x3e2, 0x3f3}, {0x401, 0x40c}, {0x40e, 0x44f}, {0x451, 0x45c}, + {0x45e, 0x481}, {0x490, 0x4c4}, {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, + {0x4d0, 0x4eb}, {0x4ee, 0x4f5}, {0x4f8, 0x4f9}, {0x531, 0x556}, + {0x559, 0x559}, {0x561, 0x586}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, + {0x621, 0x63a}, {0x641, 0x64a}, {0x671, 0x6b7}, {0x6ba, 0x6be}, + {0x6c0, 0x6ce}, {0x6d0, 0x6d3}, {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, + {0x905, 0x939}, {0x93d, 0x93d}, {0x958, 0x961}, {0x985, 0x98c}, + {0x98f, 0x990}, {0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, + {0x9b6, 0x9b9}, {0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, + {0xa05, 0xa0a}, {0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, + {0xa32, 0xa33}, {0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, + {0xa5e, 0xa5e}, {0xa72, 0xa74}, {0xa85, 0xa8b}, {0xa8d, 0xa8d}, + {0xa8f, 0xa91}, {0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3}, + {0xab5, 0xab9}, {0xabd, 0xabd}, {0xae0, 0xae0}, {0xb05, 0xb0c}, + {0xb0f, 0xb10}, {0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, + {0xb36, 0xb39}, {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, + {0xb85, 0xb8a}, {0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, + {0xb9c, 0xb9c}, {0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, + {0xbae, 0xbb5}, {0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, + {0xc12, 0xc28}, {0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, + {0xc85, 0xc8c}, {0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, + {0xcb5, 0xcb9}, {0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c}, + {0xd0e, 0xd10}, {0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61}, + {0xe01, 0xe2e}, {0xe30, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45}, + {0xe81, 0xe82}, {0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a}, + {0xe8d, 0xe8d}, {0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3}, + {0xea5, 0xea5}, {0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeae}, + {0xeb0, 0xeb0}, {0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, + {0xf40, 0xf47}, {0xf49, 0xf69}, {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, + {0x1100, 0x1100}, {0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109}, + {0x110b, 0x110c}, {0x110e, 0x1112}, {0x113c, 0x113c}, {0x113e, 0x113e}, + {0x1140, 0x1140}, {0x114c, 0x114c}, {0x114e, 0x114e}, {0x1150, 0x1150}, + {0x1154, 0x1155}, {0x1159, 0x1159}, {0x115f, 0x1161}, {0x1163, 0x1163}, + {0x1165, 0x1165}, {0x1167, 0x1167}, {0x1169, 0x1169}, {0x116d, 0x116e}, + {0x1172, 0x1173}, {0x1175, 0x1175}, {0x119e, 0x119e}, {0x11a8, 0x11a8}, + {0x11ab, 0x11ab}, {0x11ae, 0x11af}, {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, + {0x11bc, 0x11c2}, {0x11eb, 0x11eb}, {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, + {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, + {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, + {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, + {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, + {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, + {0x1ff6, 0x1ffc}, {0x2126, 0x2126}, {0x212a, 0x212b}, {0x212e, 0x212e}, + {0x2180, 0x2182}, {0x3041, 0x3094}, {0x30a1, 0x30fa}, {0x3105, 0x312c}, + {0xac00, 0xd7a3} +}; + +const struct xml_char_group base_char_group = { + sizeof(base_char_range) / sizeof(base_char_range[0]), + base_char_range}; + +static const struct xml_char_range char_range[] = { {0x100, 0xd7ff}, + {0xe000, 0xfffd}, {0x10000, 0x10ffff} +}; + +const struct xml_char_group char_group = { + sizeof(char_range) / sizeof(char_range[0]), char_range}; + +static const struct xml_char_range combining_char_range[] = { {0x300, 0x345}, + {0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9}, + {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4}, + {0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df}, + {0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903}, + {0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954}, + {0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be}, + {0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd}, + {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c}, + {0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48}, + {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc}, + {0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03}, + {0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d}, + {0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8}, + {0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44}, + {0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83}, + {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6}, + {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d}, + {0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e}, + {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd}, + {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39}, + {0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b}, + {0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7}, + {0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f}, + {0x3099, 0x3099}, {0x309a, 0x309a} +}; + +const struct xml_char_group combining_char_group = { + sizeof(combining_char_range) / sizeof(combining_char_range[0]), + combining_char_range }; + +static const struct xml_char_range digit_char_range[] = { {0x30, 0x39}, + {0x660, 0x669}, {0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef}, + {0xa66, 0xa6f}, {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef}, + {0xc66, 0xc6f}, {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, + {0xed0, 0xed9}, {0xf20, 0xf29} +}; + +const struct xml_char_group digit_char_group = { + sizeof(digit_char_range) / sizeof(digit_char_range[0]), + digit_char_range }; + +static const struct xml_char_range extender_range[] = { {0xb7, 0xb7}, + {0x2d0, 0x2d0}, {0x2d1, 0x2d1}, {0x387, 0x387}, {0x640, 0x640}, + {0xe46, 0xe46}, {0xec6, 0xec6}, {0x3005, 0x3005}, {0x3031, 0x3035}, + {0x309d, 0x309e}, {0x30fc, 0x30fe} +}; + +const struct xml_char_group extender_group = { + sizeof(extender_range) / sizeof(extender_range[0]), + extender_range }; + +static const struct xml_char_range ideographic_range[] = { {0x3007, 0x3007}, + {0x3021, 0x3029}, {0x4e00, 0x9fa5} +}; + +const struct xml_char_group ideographic_group = { + sizeof(ideographic_range) / sizeof(ideographic_range[0]), + ideographic_range }; + +/* The binary search helper function */ +static bool binary_search(unsigned int ch, int left, int right, + const struct xml_char_range *range); + +/* Search for ch in range[left, right] */ +bool binary_search(unsigned int ch, int left, int right, + const struct xml_char_range *range) +{ + if (left > right) + return false; + + int mid = (left + right) / 2; + if (ch >= range[mid].start && ch <= range[mid].end) + return true; + + if (ch < range[mid].start) + return binary_search(ch, left, mid - 1, range); + + if (ch > range[mid].end) + return binary_search(ch, mid + 1, right, range); + + return false; +} + +/** + * Test whether certain character belongs to some XML character group + * + * \param ch The character being tested + * \param group The character group + * \return true if the character belongs to the group, false otherwise. + * + * Generally, we use an algorithm like binary search to find the desired + * character in the group. The time complexity is about lg(n) and here n is + * at most 180, so, I think the algorithm is fast enough for name validation. + */ +bool _dom_is_character_in_group(unsigned int ch, + const struct xml_char_group *group) +{ + int len = group->len; + const struct xml_char_range *range = group->range; + + if (ch < range[0].start || ch > range[len-1].end) + return false; + + return binary_search(ch, 0, len - 1, range); +} + +#ifdef CHVALID_DEBUG +/* The following is the testcases for this file. + * Compile this file : + * + * gcc -o test -DCHVALID_DEBUG character_valid.c + * + */ +#include + +int main(int argc, char **argv) +{ + unsigned int ch = 0x666; + + assert(is_digit(ch) == true); + assert(is_base_char(ch) == false); + assert(is_char(ch) == true); + assert(is_extender(ch) == false); + assert(is_combining_char(ch) == false); + assert(is_ideographic(ch) == false); + + ch = 0xf40; + + assert(is_digit(ch) == false); + assert(is_base_char(ch) == true); + assert(is_char(ch) == true); + assert(is_extender(ch) == false); + assert(is_combining_char(ch) == false); + assert(is_ideographic(ch) == false); + + printf("The test pass.\n"); + return 0; +} + +#endif diff --git a/src/utils/character_valid.h b/src/utils/character_valid.h new file mode 100644 index 0000000..5094e7c --- /dev/null +++ b/src/utils/character_valid.h @@ -0,0 +1,54 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2009 Bo Yang + * + * This file contains the API used to validate whether certain character in + * name/value is legal according the XML 1.0 standard. See + * + * http://www.w3.org/TR/2004/REC-xml-20040204/ + * http://www.w3.org/TR/REC-xml/ + * + * for detail. + */ + +#ifndef dom_utils_character_valid_h_ +#define dom_utils_character_valid_h_ + +#include +#include + +struct xml_char_range { + unsigned int start; + unsigned int end; +}; + +struct xml_char_group { + size_t len; + const struct xml_char_range *range; +}; + +/* The groups */ +extern const struct xml_char_group base_char_group; +extern const struct xml_char_group char_group; +extern const struct xml_char_group combining_char_group; +extern const struct xml_char_group digit_char_group; +extern const struct xml_char_group extender_group; +extern const struct xml_char_group ideographic_group; + +bool _dom_is_character_in_group(unsigned int ch, + const struct xml_char_group *group); + +#define is_base_char(ch) _dom_is_character_in_group((ch), &base_char_group) +#define is_char(ch) _dom_is_character_in_group((ch), &char_group) +#define is_combining_char(ch) _dom_is_character_in_group((ch), \ + &combining_char_group) +#define is_digit(ch) _dom_is_character_in_group((ch), &digit_char_group) +#define is_extender(ch) _dom_is_character_in_group((ch), &extender_group) +#define is_ideographic(ch) _dom_is_character_in_group((ch), &ideographic_group) + +#define is_letter(ch) (is_base_char(ch) || is_ideographic(ch)) + +#endif + diff --git a/src/utils/hashtable.c b/src/utils/hashtable.c new file mode 100644 index 0000000..c2ff8ce --- /dev/null +++ b/src/utils/hashtable.c @@ -0,0 +1,492 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2006 Rob Kendrick + * Copyright 2006 Richard Wilson + * Copyright 2009 Bo Yang + */ + +#include +#include +#include +#include +#ifdef TEST_RIG +#include +#endif +#include "utils/hashtable.h" + +/* The hash table entry */ +struct _dom_hash_entry { + void *key; /**< The key pointer */ + void *value; /**< The value pointer */ + struct _dom_hash_entry *next; /**< Next entry */ +}; + +/* The hash table */ +struct dom_hash_table { + unsigned int nchains; /**< The chains number */ + dom_hash_func hash; /**< The hash function */ + struct _dom_hash_entry **chain; /**< The chain head */ + unsigned int number; /**< The enries in this table */ + + dom_alloc alloc; /**< Memory allocation function */ + void *ptr; /**< The private data for the memory allocator */ +}; + + +/** + * Create a new hash table, and return a context for it. The memory consumption + * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty. + * + * \param chains Number of chains/buckets this hash table will have. This + * should be a prime number, and ideally a prime number just + * over a power of two, for best performance and distribution + * \param hash The hash function + * \param alloc The memory allocator + * \param ptr The private pointer for the allocator + * \return struct dom_hash_table containing the context of this hash table or + * NULL if there is insufficent memory to create it and its chains. + */ +struct dom_hash_table *_dom_hash_create(unsigned int chains, dom_hash_func hash, + dom_alloc alloc, void *ptr) +{ + struct dom_hash_table *r = alloc(NULL, sizeof(struct dom_hash_table), + ptr); + + if (r == NULL) { + return NULL; + } + + r->nchains = chains; + r->hash = hash; + r->alloc = alloc; + r->ptr = ptr; + r->chain = (struct _dom_hash_entry **)alloc(NULL, + chains*sizeof(struct _dom_hash_entry *), ptr); + r->number = 0; + + unsigned int i; + for (i = 0; i < chains; i++) + r->chain[i] = NULL; + + if (r->chain == NULL) { + alloc(r, 0, ptr); + return NULL; + } + + return r; +} + +/** + * Clone a hash table. + * + * \param ht Hash table to clone. + * \param alloc The allocator. + * \param pw The private data for the allocator. + * \param kf The function pointer used to copy the key. + * \param key_pw The private data for the key cloner. + * \param vf The function pointer used to copy the value. + * \param value_pw The private data for the value cloner. + * + * \return The cloned hash table. + */ +struct dom_hash_table *_dom_hash_clone(struct dom_hash_table *ht, + dom_alloc alloc, void *pw, dom_key_func kf, void *key_pw, + dom_value_func vf, void *value_pw) +{ + struct dom_hash_table *ret; + + ret = _dom_hash_create(ht->nchains, ht->hash, alloc, pw); + if (ret == NULL) + return NULL; + + void *key = NULL, *nkey = NULL; + void *value = NULL, *nvalue = NULL; + unsigned int c1, *c2 = NULL; + while ( (key = _dom_hash_iterate(ht, &c1, &c2)) != NULL) { + nkey = kf(key, key_pw, alloc, pw, true); + if (nkey == NULL) { + _dom_hash_destroy(ret, kf, key_pw, vf, value_pw); + return NULL; + } + + value = _dom_hash_get(ht, key); + nvalue = vf(value, value_pw, alloc, pw, true); + if (nvalue == NULL) { + kf(nkey, key_pw, alloc, pw, false); + _dom_hash_destroy(ret, kf, key_pw, vf, value_pw); + return NULL; + } + + if (_dom_hash_add(ret, nkey, nvalue, false) == false) { + _dom_hash_destroy(ret, kf, key_pw, vf, value_pw); + return NULL; + } + } + + return ret; +} + +/** + * Destroys a hash table, freeing all memory associated with it. + * + * \param ht Hash table to destroy. After the function returns, this + * will nolonger be valid + * \param kf The key destroy function + * \param key_pw The key destroy function private data + * \param vf The value destroy function + * \param value_pw The value destroy function private data + */ +void _dom_hash_destroy(struct dom_hash_table *ht, dom_key_func kf, + void *key_pw, dom_value_func vf, void *value_pw) +{ + unsigned int i; + + if (ht == NULL) + return; + + assert(ht->alloc != NULL); + + for (i = 0; i < ht->nchains; i++) { + if (ht->chain[i] != NULL) { + struct _dom_hash_entry *e = ht->chain[i]; + while (e) { + struct _dom_hash_entry *n = e->next; + if (kf != NULL) { + kf(e->key, key_pw, ht->alloc, + ht->ptr, false); + } + if (vf != NULL) { + vf(e->value, value_pw, ht->alloc, + ht->ptr, false); + } + ht->alloc(e, 0, ht->ptr); + e = n; + } + } + } + + ht->alloc(ht->chain, 0, ht->ptr); + ht->alloc(ht, 0, ht->ptr); +} + +/** + * Adds a key/value pair to a hash table + * + * \param ht The hash table context to add the key/value pair to. + * \param key The key to associate the value with. + * \param value The value to associate the key with. + * \return true if the add succeeded, false otherwise. (Failure most likely + * indicates insufficent memory to make copies of the key and value. + */ +bool _dom_hash_add(struct dom_hash_table *ht, void *key, void *value, + bool replace) +{ + unsigned int h, c; + struct _dom_hash_entry *e; + + if (ht == NULL || key == NULL || value == NULL) + return false; + + h = ht->hash(key); + c = h % ht->nchains; + + for (e = ht->chain[c]; e; e = e->next) + if (key == e->key) { + if (replace == true) { + e->value = value; + return true; + } else { + return false; + } + } + + assert(ht->alloc != NULL); + + e = ht->alloc(NULL, sizeof(struct _dom_hash_entry), ht->ptr); + if (e == NULL) { + return false; + } + + e->key = key; + e->value = value; + + e->next = ht->chain[c]; + ht->chain[c] = e; + ht->number ++; + + return true; +} + +/** + * Looks up a the value associated with with a key from a specific hash table. + * + * \param ht The hash table context to look up + * \param key The key to search for + * \return The value associated with the key, or NULL if it was not found. + */ +void *_dom_hash_get(struct dom_hash_table *ht, void *key) +{ + unsigned int h, c; + struct _dom_hash_entry *e; + + if (ht == NULL || key == NULL) + return NULL; + + h = ht->hash(key); + c = h % ht->nchains; + + for (e = ht->chain[c]; e; e = e->next) + if (key == e->key) + return e->value; + + return NULL; +} + +/** + * Delete the key from the hashtable. + * + * \param ht The hashtable object + * \param key The key to delete + * \return The deleted value + */ +void *_dom_hash_del(struct dom_hash_table *ht, void *key) +{ + unsigned int h, c; + struct _dom_hash_entry *e, *p; + void *ret; + + if (ht == NULL || key == NULL) + return NULL; + + h = ht->hash(key); + c = h % ht->nchains; + + assert(ht->alloc != NULL); + + p = ht->chain[c]; + for (e = p; e; p = e, e = e->next) + if (key == e->key) { + if (p != e) { + p->next = e->next; + } else { + /* The first item in this chain is target*/ + ht->chain[c] = e->next; + } + + ret = e->value; + ht->alloc(e, 0, ht->ptr); + ht->number --; + return ret; + } + + return NULL; +} + +/** + * Iterate through all available hash keys. + * + * \param ht The hash table context to iterate. + * \param c1 Pointer to first context + * \param c2 Pointer to second context (set to 0 on first call) + * \return The next hash key, or NULL for no more keys + */ +void *_dom_hash_iterate(struct dom_hash_table *ht, unsigned int *c1, + unsigned int **c2) +{ + struct _dom_hash_entry **he = (struct _dom_hash_entry **)c2; + + if (ht == NULL) + return NULL; + + if (!*he) + *c1 = -1; + else + *he = (*he)->next; + + if (*he) + return (*he)->key; + + while (!*he) { + (*c1)++; + if (*c1 >= ht->nchains) + return NULL; + *he = ht->chain[*c1]; + } + return (*he)->key; +} + +/** + * Get the number of elements in this hash table + * + * \param ht The hash table + * + * \return the number of elements + */ +unsigned int _dom_hash_get_length(struct dom_hash_table *ht) +{ + return ht->number; +} + +/** + * Get the chain number of this hash table + * + * \param ht The hash table + * + * \return the number of chains + */ +unsigned int _dom_hash_get_chains(struct dom_hash_table *ht) +{ + return ht->nchains; +} + +/** + * Get the hash function of this hash table + * + * \param ht The hash table + * + * \return the hash function + */ +dom_hash_func _dom_hash_get_func(struct dom_hash_table *ht) +{ + return ht->hash; +} + +/* A simple test rig. To compile, use: + * gcc -g -o hashtest -I../ -I../../include -DTEST_RIG hashtable.c + * + * If you make changes to this hash table implementation, please rerun this + * test, and if possible, through valgrind to make sure there are no memory + * leaks or invalid memory accesses. If you add new functionality, please + * include a test for it that has good coverage along side the other tests. + */ + +#ifdef TEST_RIG + + +/** + * Hash a pointer, returning a 32bit value. + * + * \param ptr The pointer to hash. + * \return the calculated hash value for the pointer. + */ + +static inline unsigned int _dom_hash_pointer_fnv(void *ptr) +{ + return (unsigned int) ptr; +} + +static void *test_alloc(void *p, size_t size, void *ptr) +{ + if (p != NULL) { + free(p); + return NULL; + } + + if (p == NULL) { + return malloc(size); + } +} + +int main(int argc, char *argv[]) +{ + struct dom_hash_table *a, *b; + FILE *dict; + char keybuf[BUFSIZ], valbuf[BUFSIZ]; + int i; + char *cow="cow", *moo="moo", *pig="pig", *oink="oink", + *chicken="chikcken", *cluck="cluck", + *dog="dog", *woof="woof", *cat="cat", + *meow="meow"; + void *ret; + + a = _dom_hash_create(79, _dom_hash_pointer_fnv, test_alloc, NULL); + assert(a != NULL); + + b = _dom_hash_create(103, _dom_hash_pointer_fnv, test_alloc, NULL); + assert(b != NULL); + + _dom_hash_add(a, cow, moo ,true); + _dom_hash_add(b, moo, cow ,true); + + _dom_hash_add(a, pig, oink ,true); + _dom_hash_add(b, oink, pig ,true); + + _dom_hash_add(a, chicken, cluck ,true); + _dom_hash_add(b, cluck, chicken ,true); + + _dom_hash_add(a, dog, woof ,true); + _dom_hash_add(b, woof, dog ,true); + + _dom_hash_add(a, cat, meow ,true); + _dom_hash_add(b, meow, cat ,true); + +#define MATCH(x,y) assert(!strcmp((char *)hash_get(a, x), (char *)y)); \ + assert(!strcmp((char *)hash_get(b, y), (char *)x)) + MATCH(cow, moo); + MATCH(pig, oink); + MATCH(chicken, cluck); + MATCH(dog, woof); + MATCH(cat, meow); + + assert(hash_get_length(a) == 5); + assert(hash_get_length(b) == 5); + + _dom_hash_del(a, cat); + _dom_hash_del(b, meow); + assert(hash_get(a, cat) == NULL); + assert(hash_get(b, meow) == NULL); + + assert(hash_get_length(a) == 4); + assert(hash_get_length(b) == 4); + + _dom_hash_destroy(a, NULL, NULL); + _dom_hash_destroy(b, NULL, NULL); + + /* This test requires /usr/share/dict/words - a large list of English + * words. We load the entire file - odd lines are used as keys, and + * even lines are used as the values for the previous line. we then + * work through it again making sure everything matches. + * + * We do this twice - once in a hash table with many chains, and once + * with a hash table with fewer chains. + */ + + a = _dom_hash_create(1031, _dom_hash_pointer_fnv, test_alloc, NULL); + b = _dom_hash_create(7919, _dom_hash_pointer_fnv, test_alloc, NULL); + + dict = fopen("/usr/share/dict/words", "r"); + if (dict == NULL) { + fprintf(stderr, "Unable to open /usr/share/dict/words - \ + extensive testing skipped.\n"); + exit(0); + } + + while (!feof(dict)) { + fscanf(dict, "%s", keybuf); + fscanf(dict, "%s", valbuf); + _dom_hash_add(a, keybuf, valbuf, true); + _dom_hash_add(b, keybuf, valbuf, true); + } + + for (i = 0; i < 5; i++) { + fseek(dict, 0, SEEK_SET); + + while (!feof(dict)) { + fscanf(dict, "%s", keybuf); + fscanf(dict, "%s", valbuf); + assert(strcmp(hash_get(a, keybuf), valbuf) == 0); + assert(strcmp(hash_get(b, keybuf), valbuf) == 0); + } + } + + _dom_hash_destroy(a, NULL, NULL); + _dom_hash_destroy(b, NULL, NULL); + + fclose(dict); + + return 0; +} + +#endif diff --git a/src/utils/hashtable.h b/src/utils/hashtable.h new file mode 100644 index 0000000..3cfe95d --- /dev/null +++ b/src/utils/hashtable.h @@ -0,0 +1,42 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2006 Rob Kendrick + * Copyright 2009 Bo Yang + */ + +#ifndef dom_utils_hashtable_h_ +#define dom_utils_hashtable_h_ + +#include +#include + +typedef struct dom_hash_table dom_hash_table; +/* The hash function */ +typedef unsigned int (*dom_hash_func)(void *key); +/* Function to clone/delete key */ +typedef void *(*dom_key_func)(void *key, void *pw, dom_alloc alloc, + void *alloc_pw, bool clone); +/* Function to clone/delete value */ +typedef void *(*dom_value_func)(void *value, void *pw, dom_alloc alloc, + void *alloc_pw, bool clone); + +struct dom_hash_table *_dom_hash_create(unsigned int chains, dom_hash_func hash, + dom_alloc alloc, void *ptr); +struct dom_hash_table *_dom_hash_clone(struct dom_hash_table *ht, + dom_alloc alloc, void *pw, dom_key_func kf, void *key_pw, + dom_value_func vf, void *value_pw); +void _dom_hash_destroy(struct dom_hash_table *ht, dom_key_func kf, void *key_pw, + dom_value_func vf, void *value_pw); +bool _dom_hash_add(struct dom_hash_table *ht, void *key, void *value, + bool replace); +void *_dom_hash_get(struct dom_hash_table *ht, void *key); +void *_dom_hash_del(struct dom_hash_table *ht, void *key); +void *_dom_hash_iterate(struct dom_hash_table *ht, unsigned int *c1, + unsigned int **c2); +unsigned int _dom_hash_get_length(struct dom_hash_table *ht); +unsigned int _dom_hash_get_chains(struct dom_hash_table *ht); +dom_hash_func _dom_hash_get_func(struct dom_hash_table *ht); + +#endif diff --git a/src/utils/list.h b/src/utils/list.h new file mode 100644 index 0000000..6e3ba20 --- /dev/null +++ b/src/utils/list.h @@ -0,0 +1,61 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2009 Bo Yang + * + * This file contains the list structure used to compose lists. + * + * Note: This is a implementation of a doubld-linked cyclar list. + */ + +#ifndef dom_utils_list_h_ +#define dom_utils_list_h_ + +#include + +struct list_entry { + struct list_entry *prev; + struct list_entry *next; +}; + +/** + * Initialise a list_entry structure + * + * \param ent The entry to initialise + */ +static inline void list_init(struct list_entry *ent) +{ + ent->prev = ent; + ent->next = ent; +} + +/** + * Append a new list_entry after the list + * + * \param head The list header + * \param new The new entry + */ +static inline void list_append(struct list_entry *head, struct list_entry *new) +{ + new->next = head; + new->prev = head->prev; + head->prev->next = new; + head->prev = new; +} + +/** + * Delete a list_entry from the list + * + * \param entry The entry need to be deleted from the list + */ +static inline void list_del(struct list_entry *ent) +{ + ent->prev->next = ent->next; + ent->next->prev = ent->prev; + + ent->prev = ent; + ent->next = ent; +} + +#endif diff --git a/src/utils/namespace.c b/src/utils/namespace.c index ca5b01d..8d109ae 100644 --- a/src/utils/namespace.c +++ b/src/utils/namespace.c @@ -1,8 +1,9 @@ /* * This file is part of libdom. * Licensed under the MIT License, - * http://www.opensource.org/licenses/mit-license.php + * http://www.opensource.org/licenses/mit-license.php * Copyright 2007 John-Mark Bell + * Copyright 2009 Bo Yang */ #include @@ -10,6 +11,7 @@ #include #include "utils/namespace.h" +#include "utils/validate.h" #include "utils/utils.h" @@ -18,7 +20,7 @@ static struct dom_string *xml; /** XMLNS prefix */ static struct dom_string *xmlns; -/** The namespace strings */ +/* The namespace strings */ static const char *namespaces[DOM_NAMESPACE_COUNT] = { NULL, "http://www.w3.org/1999/xhtml", @@ -37,7 +39,7 @@ struct dom_string *dom_namespaces[DOM_NAMESPACE_COUNT] = { * Initialise the namespace component * * \param alloc Pointer to memory (de)allocation function - * \param pw Pointer to client-specific private data + * \param pw Pointer to client-specific private data * \return DOM_NO_ERR on success. */ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw) @@ -130,10 +132,19 @@ dom_exception _dom_namespace_finalise(void) dom_exception _dom_namespace_validate_qname(struct dom_string *qname, struct dom_string *namespace) { - uint32_t colon; + uint32_t colon, len; + + if (qname == NULL){ + if (namespace != NULL) + return DOM_NAMESPACE_ERR; + if (namespace == NULL) + return DOM_NO_ERR; + } + + if (_dom_validate_name(qname) == false) + return DOM_NAMESPACE_ERR; - /** \todo search qname for invalid characters */ - /** \todo ensure qname is not malformed */ + len = dom_string_length(qname); /* Find colon */ colon = dom_string_index(qname, ':'); @@ -147,9 +158,14 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname, dom_string_cmp(qname, xmlns) != 0) { return DOM_NAMESPACE_ERR; } + } else if (colon == 0) { + /* Some name like ":name" */ + if (namespace != NULL) + return DOM_NAMESPACE_ERR; } else { /* Prefix */ struct dom_string *prefix; + struct dom_string *lname; dom_exception err; /* Ensure there is a namespace URI */ @@ -157,11 +173,21 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname, return DOM_NAMESPACE_ERR; } - err = dom_string_substr(qname, 0, colon - 1, &prefix); + err = dom_string_substr(qname, 0, colon, &prefix); if (err != DOM_NO_ERR) { return err; } + err = dom_string_substr(qname, colon + 1, len, &lname); + if (err != DOM_NO_ERR) { + return err; + } + + if (_dom_validate_ncname(prefix) == false || + _dom_validate_ncname(lname) == false) { + return DOM_NAMESPACE_ERR; + } + /* Test for invalid XML namespace */ if (dom_string_cmp(prefix, xml) == 0 && dom_string_cmp(namespace, @@ -223,7 +249,7 @@ dom_exception _dom_namespace_split_qname(struct dom_string *qname, } } else { /* Found one => prefix */ - err = dom_string_substr(qname, 0, colon - 1, prefix); + err = dom_string_substr(qname, 0, colon, prefix); if (err != DOM_NO_ERR) { return err; } @@ -240,3 +266,32 @@ dom_exception _dom_namespace_split_qname(struct dom_string *qname, return DOM_NO_ERR; } +/** + * Get the XML prefix dom_string + * + * \return the xml prefix dom_string. + * + * Note: The client of this function may or may not call the dom_string_ref + * on the returned dom_string, because this string will only be destroyed when + * the dom_finalise is called. But if the client call dom_string_ref, it must + * call dom_string_unref to maintain a correct ref count of the dom_string. + */ +dom_string *_dom_namespace_get_xml_prefix(void) +{ + return xml; +} + +/** + * Get the XMLNS prefix dom_string. + * + * \return the xmlns prefix dom_string + * + * Note: The client of this function may or may not call the dom_string_ref + * on the returned dom_string, because this string will only be destroyed when + * the dom_finalise is called. But if the client call dom_string_ref, it must + * call dom_string_unref to maintain a correct ref count of the dom_string. + */ +dom_string *_dom_namespace_get_xmlns_prefix(void) +{ + return xmlns; +} diff --git a/src/utils/namespace.h b/src/utils/namespace.h index ec69035..900c9ee 100644 --- a/src/utils/namespace.h +++ b/src/utils/namespace.h @@ -14,6 +14,7 @@ struct dom_document; struct dom_string; + /* Initialise the namespace component */ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw); @@ -28,5 +29,11 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname, dom_exception _dom_namespace_split_qname(struct dom_string *qname, struct dom_string **prefix, struct dom_string **localname); +/* Get the XML prefix dom_string */ +struct dom_string *_dom_namespace_get_xml_prefix(void); + +/* Get the XMLNS prefix dom_string */ +struct dom_string *_dom_namespace_get_xmlns_prefix(void); + #endif diff --git a/src/utils/resource_mgr.c b/src/utils/resource_mgr.c new file mode 100644 index 0000000..c9c86d3 --- /dev/null +++ b/src/utils/resource_mgr.c @@ -0,0 +1,105 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2009 Bo Yang + */ + +#include "resource_mgr.h" + +#include +#include + +#include +#include "core/string.h" + +/** + * Allocate some memory with this allocator + * + * \param res The resource manager + * \param size The size of memory to allocate + * \return the allocated memory pointer. + */ +void *_dom_resource_mgr_alloc(struct dom_resource_mgr *res, void *ptr, + size_t size) +{ + return res->alloc(ptr, size, res->pw); +} + +/** + * Create a dom_string using this resource manager + * + * \param res The resource manager + * \param data The data pointer + * \param len The length of data + * \param result The returned dom_string + * \return DOM_NO_ERR on success, appropriate dom_exception on failure. + */ +dom_exception _dom_resource_mgr_create_string(struct dom_resource_mgr *res, + const uint8_t *data, size_t len, struct dom_string **result) +{ + return dom_string_create(res->alloc, res->pw, data, len, result); +} + +/** + * Create a lwc_string using this resource manager + * + * \param res The resource manager + * \param data The data pointer + * \param len The length of the data + * \param result The returned lwc_string + * \return DOM_NO_ERR on success, appropriate dom_exception on failure. + */ +dom_exception _dom_resource_mgr_create_lwcstring(struct dom_resource_mgr *res, + const uint8_t *data, size_t len, struct lwc_string_s **result) +{ + lwc_error lerr; + + assert(res->ctx != NULL); + + lerr = lwc_context_intern(res->ctx, (const char *) data, len, + result); + + return _dom_exception_from_lwc_error(lerr); +} + +/** + * Create a dom_string from a lwc_string using this resource manager + * + * \param res The resource manager + * \param str The dom_string to intern + * \param result The returned lwc_string + * \return DOM_NO_ERR on success, appropriate dom_exception on failure. + */ +dom_exception _dom_resource_mgr_create_string_from_lwcstring( + struct dom_resource_mgr *res, struct lwc_string_s *str, + struct dom_string **result) +{ + assert(res->ctx != NULL); + + return _dom_string_create_from_lwcstring(res->alloc, res->pw, res->ctx, + str, result); +} + +/** + * Create a hash table using this resource manager + * + * \param res The resource manager + * \param chains The number of buckets of the hash table + * \param f The hash function + * \param ht The returned hash table + * \return DOM_NO_ERR on success, appropriate dom_exception on failure. + */ +dom_exception _dom_resource_mgr_create_hashtable(struct dom_resource_mgr *res, + size_t chains, dom_hash_func f, struct dom_hash_table **ht) +{ + struct dom_hash_table *ret; + + ret = _dom_hash_create(chains, f, res->alloc, res->pw); + if (ret == NULL) + return DOM_NO_MEM_ERR; + + *ht = ret; + return DOM_NO_ERR; +} + diff --git a/src/utils/resource_mgr.h b/src/utils/resource_mgr.h new file mode 100644 index 0000000..b58f665 --- /dev/null +++ b/src/utils/resource_mgr.h @@ -0,0 +1,45 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2009 Bo Yang + */ + +#ifndef dom_utils_resource_mgr_h_ +#define dom_utils_resource_mgr_h_ + +#include +#include + +#include "hashtable.h" + +struct lwc_context_s; +struct lwc_string_s; +struct dom_string; + +/** + * Resource manager + */ +typedef struct dom_resource_mgr { + dom_alloc alloc; + void *pw; + struct lwc_context_s *ctx; +} dom_resource_mgr; + +void *_dom_resource_mgr_alloc(struct dom_resource_mgr *res, void *ptr, + size_t size); + +dom_exception _dom_resource_mgr_create_string(struct dom_resource_mgr *res, + const uint8_t *data, size_t len, struct dom_string **result); + +dom_exception _dom_resource_mgr_create_lwcstring(struct dom_resource_mgr *res, + const uint8_t *data, size_t len, struct lwc_string_s **result); + +dom_exception _dom_resource_mgr_create_string_from_lwcstring( + struct dom_resource_mgr *res, struct lwc_string_s *str, + struct dom_string **result); + +dom_exception _dom_resource_mgr_create_hashtable(struct dom_resource_mgr *res, + size_t chains, dom_hash_func f, struct dom_hash_table **ht); + +#endif diff --git a/src/utils/validate.c b/src/utils/validate.c new file mode 100644 index 0000000..eb6cb22 --- /dev/null +++ b/src/utils/validate.c @@ -0,0 +1,177 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2009 Bo Yang + */ + +#include +#include + +#include "utils/validate.h" + +#include + +#include "utils/character_valid.h" +#include "utils/namespace.h" +#include "utils/utils.h" + +/* An combination of various tests */ +static bool is_first_char(uint32_t ch); +static bool is_name_char(uint32_t ch); + +/* Test whether the character can be the first character of + * a NCName. */ +static bool is_first_char(uint32_t ch) +{ + /* Refer http://www.w3.org/TR/REC-xml/ for detail */ + if (((ch >= 'a') && (ch <= 'z')) || + ((ch >= 'A') && (ch <= 'Z')) || + (ch == '_') || (ch == ':') || + ((ch >= 0xC0) && (ch <= 0xD6)) || + ((ch >= 0xD8) && (ch <= 0xF6)) || + ((ch >= 0xF8) && (ch <= 0x2FF)) || + ((ch >= 0x370) && (ch <= 0x37D)) || + ((ch >= 0x37F) && (ch <= 0x1FFF)) || + ((ch >= 0x200C) && (ch <= 0x200D)) || + ((ch >= 0x2070) && (ch <= 0x218F)) || + ((ch >= 0x2C00) && (ch <= 0x2FEF)) || + ((ch >= 0x3001) && (ch <= 0xD7FF)) || + ((ch >= 0xF900) && (ch <= 0xFDCF)) || + ((ch >= 0xFDF0) && (ch <= 0xFFFD)) || + ((ch >= 0x10000) && (ch <= 0xEFFFF))) + return true; + + if (is_letter(ch) || ch == (uint32_t) '_' || ch == (uint32_t) ':') { + return true; + } + + return false; +} + +/* Test whether the character can be a part of a NCName */ +static bool is_name_char(uint32_t ch) +{ + /* Refer http://www.w3.org/TR/REC-xml/ for detail */ + if (((ch >= 'a') && (ch <= 'z')) || + ((ch >= 'A') && (ch <= 'Z')) || + ((ch >= '0') && (ch <= '9')) || /* !start */ + (ch == '_') || (ch == ':') || + (ch == '-') || (ch == '.') || (ch == 0xB7) || /* !start */ + ((ch >= 0xC0) && (ch <= 0xD6)) || + ((ch >= 0xD8) && (ch <= 0xF6)) || + ((ch >= 0xF8) && (ch <= 0x2FF)) || + ((ch >= 0x300) && (ch <= 0x36F)) || /* !start */ + ((ch >= 0x370) && (ch <= 0x37D)) || + ((ch >= 0x37F) && (ch <= 0x1FFF)) || + ((ch >= 0x200C) && (ch <= 0x200D)) || + ((ch >= 0x203F) && (ch <= 0x2040)) || /* !start */ + ((ch >= 0x2070) && (ch <= 0x218F)) || + ((ch >= 0x2C00) && (ch <= 0x2FEF)) || + ((ch >= 0x3001) && (ch <= 0xD7FF)) || + ((ch >= 0xF900) && (ch <= 0xFDCF)) || + ((ch >= 0xFDF0) && (ch <= 0xFFFD)) || + ((ch >= 0x10000) && (ch <= 0xEFFFF))) + return true; + + if (is_letter(ch) == true) + return true; + if (is_digit(ch) == true) + return true; + if (is_combining_char(ch) == true) + return true; + if (is_extender(ch) == true) + return true; + + if (ch == (uint32_t) '.' || ch == (uint32_t) '-' || + ch == (uint32_t) '_' || ch == (uint32_t) ':') + return true; + + return false; +} + +/** + * Test whether the name is a valid one according XML 1.0 standard. + * For the standard please refer: + * + * http://www.w3.org/TR/2004/REC-xml-20040204/ + * + * \param name The name need to be tested + * \return true if ::name is valid, false otherwise. + */ +bool _dom_validate_name(struct dom_string *name) +{ + uint32_t ch, len, i; + dom_exception err; + + if (name == NULL) + return false; + + len = dom_string_length(name); + if (len == 0) + return false; + + /* Test the first character of this string */ + err = dom_string_at(name, 0, &ch); + if (err != DOM_NO_ERR) + return false; + + if (is_first_char(ch) == false) + return false; + + /* Test all remain characters in this string */ + for(i = 1; i < len; i++) { + err = dom_string_at(name, i, &ch); + if (err != DOM_NO_ERR) + return false; + + if (is_name_char(ch) != true) + return false; + } + + return true; +} + +/** + * Validate whether the string is a legal NCName. + * Refer http://www.w3.org/TR/REC-xml-names/ for detail. + * + * \param str The name to validate + * \return true if ::name is valid, false otherwise. + */ +bool _dom_validate_ncname(struct dom_string *name) +{ + uint32_t ch, len, i; + dom_exception err; + + if (name == NULL) + return false; + + len = dom_string_length(name); + if (len == 0) + return false; + + /* Test the first character of this string */ + err = dom_string_at(name, 0, &ch); + if (err != DOM_NO_ERR) + return false; + + if (is_letter(ch) == false && ch != (uint32_t) '_') + return false; + + /* Test all remain characters in this string */ + for(i = 1; i < len; i++) { + err = dom_string_at(name, i, &ch); + if (err != DOM_NO_ERR) + return false; + + if (is_name_char(ch) == false) + return false; + + if (ch == (uint32_t) ':') + return false; + } + + return true; +} + diff --git a/src/utils/validate.h b/src/utils/validate.h new file mode 100644 index 0000000..5d375e7 --- /dev/null +++ b/src/utils/validate.h @@ -0,0 +1,26 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2009 Bo Yang + * + * This file contains the API used to validate whether certain element's + * name/namespace are legal according the XML 1.0 standard. See + * + * http://www.w3.org/TR/2004/REC-xml-20040204/ + * + * for detail. + */ + +#ifndef dom_utils_valid_h_ +#define dom_utils_valid_h_ + +#include + +struct dom_string; + +bool _dom_validate_name(struct dom_string *name); +bool _dom_validate_ncname(struct dom_string *name); + +#endif + -- cgit v1.2.3