From af381631e45d4cb8c3be18cd7784fc3e5cf8ffd7 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Wed, 28 Mar 2012 20:23:29 +0000 Subject: Replace algorithm used by _dom_validate_{nc,}name so that instead of O(n^2) it's O(n) by unwinding dom_string_at into the validate functions. svn path=/trunk/libdom/; revision=13763 --- src/utils/validate.c | 76 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/src/utils/validate.c b/src/utils/validate.c index ac0bcd1..26af690 100644 --- a/src/utils/validate.c +++ b/src/utils/validate.c @@ -16,6 +16,8 @@ #include "utils/namespace.h" #include "utils/utils.h" +#include + /* An combination of various tests */ static bool is_first_char(uint32_t ch); static bool is_name_char(uint32_t ch); @@ -101,32 +103,43 @@ static bool is_name_char(uint32_t ch) */ bool _dom_validate_name(dom_string *name) { - uint32_t ch, len, i; - dom_exception err; + uint32_t ch; + size_t clen, slen; + parserutils_error err; + const uint8_t *s; if (name == NULL) return false; - len = dom_string_length(name); - if (len == 0) + slen = dom_string_length(name); + if (slen == 0) return false; - /* Test the first character of this string */ - err = dom_string_at(name, 0, &ch); - if (err != DOM_NO_ERR) + s = (const uint8_t *) dom_string_data(name); + slen = dom_string_byte_length(name); + + err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); + if (err != PARSERUTILS_OK) { return false; - + } + if (is_first_char(ch) == false) return false; - - /* Test all remain characters in this string */ - for(i = 1; i < len; i++) { - err = dom_string_at(name, i, &ch); - if (err != DOM_NO_ERR) + + s += clen; + slen -= clen; + + while (slen > 0) { + err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); + if (err != PARSERUTILS_OK) { return false; + } - if (is_name_char(ch) != true) + if (is_name_char(ch) == false) return false; + + s += clen; + slen -= clen; } return true; @@ -141,35 +154,46 @@ bool _dom_validate_name(dom_string *name) */ bool _dom_validate_ncname(dom_string *name) { - uint32_t ch, len, i; - dom_exception err; + uint32_t ch; + size_t clen, slen; + parserutils_error err; + const uint8_t *s; if (name == NULL) return false; - len = dom_string_length(name); - if (len == 0) + slen = dom_string_length(name); + if (slen == 0) return false; - /* Test the first character of this string */ - err = dom_string_at(name, 0, &ch); - if (err != DOM_NO_ERR) + s = (const uint8_t *) dom_string_data(name); + slen = dom_string_byte_length(name); + + err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); + if (err != PARSERUTILS_OK) { return false; - + } + if (is_letter(ch) == false && ch != (uint32_t) '_') return false; - /* Test all remain characters in this string */ - for(i = 1; i < len; i++) { - err = dom_string_at(name, i, &ch); - if (err != DOM_NO_ERR) + s += clen; + slen -= clen; + + while (slen > 0) { + err = parserutils_charset_utf8_to_ucs4(s, slen, &ch, &clen); + if (err != PARSERUTILS_OK) { return false; + } if (is_name_char(ch) == false) return false; if (ch == (uint32_t) ':') return false; + + s += clen; + slen -= clen; } return true; -- cgit v1.2.3