summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Drake <tlsa@netsurf-browser.org>2014-01-23 23:41:58 (GMT)
committer Michael Drake <tlsa@netsurf-browser.org>2014-01-23 23:41:58 (GMT)
commit91bee91db8eb8d9a62afb31b3be5a834e8f2135d (patch)
treedec17afa252b302448e5e7c3d583c553967420d4
parent7cf749f4a5d167e924d5ea2fc3d1772ff697d6e1 (diff)
downloadlibdom-91bee91db8eb8d9a62afb31b3be5a834e8f2135d.tar.gz
libdom-91bee91db8eb8d9a62afb31b3be5a834e8f2135d.tar.bz2
Strip and collapse whitespace when gathering html option values.
-rw-r--r--src/core/document.c17
-rw-r--r--src/core/document.h2
-rw-r--r--src/core/string.c84
-rw-r--r--src/core/string.h23
-rw-r--r--src/html/html_option_element.c84
5 files changed, 207 insertions, 3 deletions
diff --git a/src/core/document.c b/src/core/document.c
index 22f08f1..a78cde9 100644
--- a/src/core/document.c
+++ b/src/core/document.c
@@ -147,6 +147,14 @@ dom_exception _dom_document_initialise(dom_document *doc,
return err;
}
+ err = dom_string_create_interned((const uint8_t *) "script",
+ SLEN("script"), &doc->script_string);
+ if (err != DOM_NO_ERR) {
+ dom_string_unref(doc->id_name);
+ dom_string_unref(doc->class_string);
+ return err;
+ }
+
/* Intern the empty string. The use of a space in the constant
* is to prevent the compiler warning about an empty string.
*/
@@ -155,6 +163,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
if (err != DOM_NO_ERR) {
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -165,6 +174,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -176,6 +186,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -188,6 +199,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -201,6 +213,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -215,6 +228,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -230,6 +244,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -246,6 +261,7 @@ dom_exception _dom_document_initialise(dom_document *doc,
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
return err;
}
@@ -283,6 +299,7 @@ bool _dom_document_finalise(dom_document *doc)
dom_string_unref(doc->id_name);
dom_string_unref(doc->class_string);
+ dom_string_unref(doc->script_string);
dom_string_unref(doc->_memo_empty);
dom_string_unref(doc->_memo_domnodeinserted);
dom_string_unref(doc->_memo_domnoderemoved);
diff --git a/src/core/document.h b/src/core/document.h
index de49cf2..2837893 100644
--- a/src/core/document.h
+++ b/src/core/document.h
@@ -54,6 +54,8 @@ struct dom_document {
dom_string *class_string; /**< The string "class". */
+ dom_string *script_string; /**< The string "script". */
+
dom_document_event_internal dei;
/**< The DocumentEvent interface */
dom_document_quirks_mode quirks;
diff --git a/src/core/string.c b/src/core/string.c
index 9ba3576..9df2cd3 100644
--- a/src/core/string.c
+++ b/src/core/string.c
@@ -271,8 +271,8 @@ bool dom_string_caseless_isequal(const dom_string *s1, const dom_string *s2)
is2->type == DOM_STRING_INTERNED) {
bool match;
- if (lwc_string_caseless_isequal(is1->data.intern, is2->data.intern,
- &match) != lwc_error_ok)
+ if (lwc_string_caseless_isequal(is1->data.intern,
+ is2->data.intern, &match) != lwc_error_ok)
return false;
return match;
@@ -1018,3 +1018,83 @@ dom_string_tolower(dom_string *source, bool ascii_only, dom_string **lower)
return exc;
}
+/* exported function documented in string.h */
+dom_exception dom_string_whitespace_op(dom_string *s,
+ enum dom_whitespace_op op, dom_string **ret)
+{
+ const uint8_t *src_text = (const uint8_t *) dom_string_data(s);
+ size_t len = dom_string_byte_length(s);
+ const uint8_t *src_pos;
+ const uint8_t *src_end;
+ dom_exception exc;
+ uint8_t *temp_pos;
+ uint8_t *temp;
+
+ if (len == 0) {
+ *ret = dom_string_ref(s);
+ }
+
+ temp = malloc(len);
+ if (temp == NULL) {
+ return DOM_NO_MEM_ERR;
+ }
+
+ src_pos = src_text;
+ src_end = src_text + len;
+ temp_pos = temp;
+
+ if (op & DOM_WHITESPACE_STRIP_LEADING) {
+ while (src_pos < src_end) {
+ if (*src_pos == ' ' || *src_pos == '\t' ||
+ *src_pos == '\n' || *src_pos == '\r' ||
+ *src_pos == '\f')
+ src_pos++;
+ else
+ break;
+ }
+ }
+
+ while (src_pos < src_end) {
+ if ((op & DOM_WHITESPACE_COLLAPSE) &&
+ (*src_pos == ' ' || *src_pos == '\t' ||
+ *src_pos == '\n' || *src_pos == '\r' ||
+ *src_pos == '\f')) {
+ /* Got a whitespace character */
+ do {
+ /* Skip all adjacent whitespace */
+ src_pos++;
+ } while (src_pos < src_end &&
+ (*src_pos == ' ' || *src_pos == '\t' ||
+ *src_pos == '\n' || *src_pos == '\r' ||
+ *src_pos == '\f'));
+ /* Gets replaced with single space in output */
+ *temp_pos++ = ' ';
+ } else {
+ /* Otherwise, copy to output */
+ *temp_pos++ = *src_pos++;
+ }
+ }
+
+ if (op & DOM_WHITESPACE_STRIP_TRAILING) {
+ if (temp_pos > temp) {
+ temp_pos--;
+ if (*temp_pos != ' ')
+ temp_pos++;
+ }
+ }
+
+ /* New length */
+ len = temp_pos - temp;
+
+ /* Make new string */
+ if (((dom_string_internal *) s)->type == DOM_STRING_CDATA) {
+ exc = dom_string_create(temp, len, ret);
+ } else {
+ exc = dom_string_create_interned(temp, len, ret);
+ }
+
+ free(temp);
+
+ return exc;
+}
+
diff --git a/src/core/string.h b/src/core/string.h
index cbf7d36..9fca1fa 100644
--- a/src/core/string.h
+++ b/src/core/string.h
@@ -14,5 +14,28 @@
/* Map the lwc_error to dom_exception */
dom_exception _dom_exception_from_lwc_error(lwc_error err);
+enum dom_whitespace_op {
+ DOM_WHITESPACE_STRIP_LEADING = (1 << 0),
+ DOM_WHITESPACE_STRIP_TRAILING = (1 << 1),
+ DOM_WHITESPACE_STRIP = DOM_WHITESPACE_STRIP_LEADING |
+ DOM_WHITESPACE_STRIP_TRAILING,
+ DOM_WHITESPACE_COLLAPSE = (1 << 2),
+ DOM_WHITESPACE_STRIP_COLLAPSE = DOM_WHITESPACE_STRIP |
+ DOM_WHITESPACE_COLLAPSE
+};
+
+/** Perform whitespace operations on given string
+ *
+ * \param s Given string
+ * \param op Whitespace operation(s) to perform
+ * \param ret New string with whitespace ops performed. Caller owns ref
+ *
+ * \return DOM_NO_ERR on success.
+ *
+ * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.
+ */
+dom_exception dom_string_whitespace_op(dom_string *s,
+ enum dom_whitespace_op op, dom_string **ret);
+
#endif
diff --git a/src/html/html_option_element.c b/src/html/html_option_element.c
index 1584bac..b133deb 100644
--- a/src/html/html_option_element.c
+++ b/src/html/html_option_element.c
@@ -8,6 +8,7 @@
#include <assert.h>
#include <stdlib.h>
+#include <dom/dom.h>
#include <dom/html/html_option_element.h>
#include <dom/html/html_select_element.h>
@@ -153,6 +154,86 @@ dom_exception dom_html_option_element_set_default_selected(
}
/**
+ * Helper for dom_html_option_element_get_text
+ */
+static dom_exception dom_html_option_element_get_text_node(
+ dom_node_internal *n, dom_string **text)
+{
+ dom_string *node_name = NULL;
+ dom_string *node_ns = NULL;
+ dom_document *owner = NULL;
+ dom_string *str = NULL;
+ dom_string *ret = NULL;
+ dom_exception exc;
+
+ *text = NULL;
+
+ assert(n->owner != NULL);
+ owner = n->owner;
+
+ for (n = n->first_child; n != NULL; n = n->next) {
+ /* Skip irrelevent node types */
+ if (n->type == DOM_COMMENT_NODE ||
+ n->type == DOM_PROCESSING_INSTRUCTION_NODE)
+ continue;
+
+ if (n->type == DOM_ELEMENT_NODE) {
+ /* Skip script elements with html or svg namespace */
+ exc = dom_node_get_local_name(n, &node_name);
+ if (exc != DOM_NO_ERR)
+ return exc;
+ if (dom_string_caseless_isequal(node_name,
+ owner->script_string)) {
+ exc = dom_node_get_namespace(n, &node_ns);
+ if (exc != DOM_NO_ERR) {
+ dom_string_unref(node_name);
+ return exc;
+ }
+ if (dom_string_caseless_isequal(node_ns,
+ dom_namespaces[
+ DOM_NAMESPACE_HTML]) ||
+ dom_string_caseless_isequal(node_ns,
+ dom_namespaces[
+ DOM_NAMESPACE_SVG])) {
+ dom_string_unref(node_name);
+ dom_string_unref(node_ns);
+ continue;
+ }
+ dom_string_unref(node_ns);
+ }
+ dom_string_unref(node_name);
+
+ /* Get text inside child node 'n' */
+ dom_html_option_element_get_text_node(n,
+ (str == NULL) ? &str : &ret);
+ } else {
+ /* Handle other nodes with their get_text_content
+ * specialisation */
+ dom_node_get_text_content(n,
+ (str == NULL) ? &str : &ret);
+ }
+
+ /* If we already have text, concatenate it */
+ if (ret != NULL) {
+ dom_string *new_str;
+ dom_string_concat(str, ret, &new_str);
+ dom_string_unref(str);
+ dom_string_unref(ret);
+ str = new_str;
+ }
+ }
+
+ /* Strip and collapse whitespace */
+ if (str != NULL) {
+ dom_string_whitespace_op(str,
+ DOM_WHITESPACE_STRIP_COLLAPSE, text);
+ dom_string_unref(str);
+ }
+
+ return DOM_NO_ERR;
+}
+
+/**
* Get the text contained in the option
*
* \param option The dom_html_option_element object
@@ -162,7 +243,8 @@ dom_exception dom_html_option_element_set_default_selected(
dom_exception dom_html_option_element_get_text(
dom_html_option_element *option, dom_string **text)
{
- return dom_node_get_text_content(option, text);
+ return dom_html_option_element_get_text_node(
+ (dom_node_internal *) option, text);
}
/**