From 23a0d42db30ac6ea2e60f442735ee5199b7acc0b Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Thu, 23 Jan 2014 23:41:58 +0000 Subject: Strip and collapse whitespace when gathering html option values. --- src/core/document.c | 17 +++++++++++ src/core/document.h | 2 ++ src/core/string.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++-- src/core/string.h | 23 +++++++++++++++ 4 files changed, 124 insertions(+), 2 deletions(-) (limited to 'src/core') diff --git a/src/core/document.c b/src/core/document.c index 22f08f1..a78cde9 100644 --- a/src/core/document.c +++ b/src/core/document.c @@ -147,6 +147,14 @@ dom_exception _dom_document_initialise(dom_document *doc, return err; } + err = dom_string_create_interned((const uint8_t *) "script", + SLEN("script"), &doc->script_string); + if (err != DOM_NO_ERR) { + dom_string_unref(doc->id_name); + dom_string_unref(doc->class_string); + return err; + } + /* Intern the empty string. The use of a space in the constant * is to prevent the compiler warning about an empty string. */ @@ -155,6 +163,7 @@ dom_exception _dom_document_initialise(dom_document *doc, if (err != DOM_NO_ERR) { dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -165,6 +174,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -176,6 +186,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -188,6 +199,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -201,6 +213,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -215,6 +228,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -230,6 +244,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -246,6 +261,7 @@ dom_exception _dom_document_initialise(dom_document *doc, dom_string_unref(doc->_memo_empty); dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); return err; } @@ -283,6 +299,7 @@ bool _dom_document_finalise(dom_document *doc) dom_string_unref(doc->id_name); dom_string_unref(doc->class_string); + dom_string_unref(doc->script_string); dom_string_unref(doc->_memo_empty); dom_string_unref(doc->_memo_domnodeinserted); dom_string_unref(doc->_memo_domnoderemoved); diff --git a/src/core/document.h b/src/core/document.h index de49cf2..2837893 100644 --- a/src/core/document.h +++ b/src/core/document.h @@ -54,6 +54,8 @@ struct dom_document { dom_string *class_string; /**< The string "class". */ + dom_string *script_string; /**< The string "script". */ + dom_document_event_internal dei; /**< The DocumentEvent interface */ dom_document_quirks_mode quirks; diff --git a/src/core/string.c b/src/core/string.c index 9ba3576..9df2cd3 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -271,8 +271,8 @@ bool dom_string_caseless_isequal(const dom_string *s1, const dom_string *s2) is2->type == DOM_STRING_INTERNED) { bool match; - if (lwc_string_caseless_isequal(is1->data.intern, is2->data.intern, - &match) != lwc_error_ok) + if (lwc_string_caseless_isequal(is1->data.intern, + is2->data.intern, &match) != lwc_error_ok) return false; return match; @@ -1018,3 +1018,83 @@ dom_string_tolower(dom_string *source, bool ascii_only, dom_string **lower) return exc; } +/* exported function documented in string.h */ +dom_exception dom_string_whitespace_op(dom_string *s, + enum dom_whitespace_op op, dom_string **ret) +{ + const uint8_t *src_text = (const uint8_t *) dom_string_data(s); + size_t len = dom_string_byte_length(s); + const uint8_t *src_pos; + const uint8_t *src_end; + dom_exception exc; + uint8_t *temp_pos; + uint8_t *temp; + + if (len == 0) { + *ret = dom_string_ref(s); + } + + temp = malloc(len); + if (temp == NULL) { + return DOM_NO_MEM_ERR; + } + + src_pos = src_text; + src_end = src_text + len; + temp_pos = temp; + + if (op & DOM_WHITESPACE_STRIP_LEADING) { + while (src_pos < src_end) { + if (*src_pos == ' ' || *src_pos == '\t' || + *src_pos == '\n' || *src_pos == '\r' || + *src_pos == '\f') + src_pos++; + else + break; + } + } + + while (src_pos < src_end) { + if ((op & DOM_WHITESPACE_COLLAPSE) && + (*src_pos == ' ' || *src_pos == '\t' || + *src_pos == '\n' || *src_pos == '\r' || + *src_pos == '\f')) { + /* Got a whitespace character */ + do { + /* Skip all adjacent whitespace */ + src_pos++; + } while (src_pos < src_end && + (*src_pos == ' ' || *src_pos == '\t' || + *src_pos == '\n' || *src_pos == '\r' || + *src_pos == '\f')); + /* Gets replaced with single space in output */ + *temp_pos++ = ' '; + } else { + /* Otherwise, copy to output */ + *temp_pos++ = *src_pos++; + } + } + + if (op & DOM_WHITESPACE_STRIP_TRAILING) { + if (temp_pos > temp) { + temp_pos--; + if (*temp_pos != ' ') + temp_pos++; + } + } + + /* New length */ + len = temp_pos - temp; + + /* Make new string */ + if (((dom_string_internal *) s)->type == DOM_STRING_CDATA) { + exc = dom_string_create(temp, len, ret); + } else { + exc = dom_string_create_interned(temp, len, ret); + } + + free(temp); + + return exc; +} + diff --git a/src/core/string.h b/src/core/string.h index cbf7d36..9fca1fa 100644 --- a/src/core/string.h +++ b/src/core/string.h @@ -14,5 +14,28 @@ /* Map the lwc_error to dom_exception */ dom_exception _dom_exception_from_lwc_error(lwc_error err); +enum dom_whitespace_op { + DOM_WHITESPACE_STRIP_LEADING = (1 << 0), + DOM_WHITESPACE_STRIP_TRAILING = (1 << 1), + DOM_WHITESPACE_STRIP = DOM_WHITESPACE_STRIP_LEADING | + DOM_WHITESPACE_STRIP_TRAILING, + DOM_WHITESPACE_COLLAPSE = (1 << 2), + DOM_WHITESPACE_STRIP_COLLAPSE = DOM_WHITESPACE_STRIP | + DOM_WHITESPACE_COLLAPSE +}; + +/** Perform whitespace operations on given string + * + * \param s Given string + * \param op Whitespace operation(s) to perform + * \param ret New string with whitespace ops performed. Caller owns ref + * + * \return DOM_NO_ERR on success. + * + * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false. + */ +dom_exception dom_string_whitespace_op(dom_string *s, + enum dom_whitespace_op op, dom_string **ret); + #endif -- cgit v1.2.3