summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRupinder Singh Khokhar <rsk1coder99@gmail.com>2014-07-11 05:26:59 +0530
committerRupinder Singh Khokhar <rsk1coder99@gmail.com>2014-08-01 21:44:32 +0530
commitd4f37723baada4e546dc6451d08b1e636dbeae3d (patch)
tree0110be8dbb8d348b3fe6b69e72f28430dc3b978d
parent66dd49ca9edc7f34cf889fed63c589335aebad95 (diff)
downloadlibhubbub-d4f37723baada4e546dc6451d08b1e636dbeae3d.tar.gz
libhubbub-d4f37723baada4e546dc6451d08b1e636dbeae3d.tar.bz2
Added support to get attribute names & values from the context. This slows down the library a very little bit. Optimizations will be done later.
-rw-r--r--src/treebuilder/after_head.c3
-rw-r--r--src/treebuilder/before_html.c19
-rw-r--r--src/treebuilder/in_body.c53
-rw-r--r--src/treebuilder/in_row.c2
-rw-r--r--src/treebuilder/in_table.c2
-rw-r--r--src/treebuilder/internal.h18
-rw-r--r--src/treebuilder/treebuilder.c81
-rw-r--r--test/data/tree-construction/INDEX2
8 files changed, 162 insertions, 18 deletions
diff --git a/src/treebuilder/after_head.c b/src/treebuilder/after_head.c
index 40955a8..c2444bf 100644
--- a/src/treebuilder/after_head.c
+++ b/src/treebuilder/after_head.c
@@ -67,7 +67,8 @@ hubbub_error handle_after_head(hubbub_treebuilder *treebuilder,
err = element_stack_push(treebuilder,
HUBBUB_NS_HTML,
HEAD,
- treebuilder->context.head_element);
+ treebuilder->context.head_element,
+ NULL, 0);
if (err != HUBBUB_OK)
return err;
diff --git a/src/treebuilder/before_html.c b/src/treebuilder/before_html.c
index 1ed3717..069696f 100644
--- a/src/treebuilder/before_html.c
+++ b/src/treebuilder/before_html.c
@@ -61,6 +61,7 @@ hubbub_error handle_before_html(hubbub_treebuilder *treebuilder,
if (handled || err == HUBBUB_REPROCESS) {
hubbub_error e;
void *html, *appended;
+ size_t i;
/* We can't use insert_element() here, as it assumes
* that we're inserting into current_node. There is
@@ -110,7 +111,23 @@ hubbub_error handle_before_html(hubbub_treebuilder *treebuilder,
treebuilder->context.element_stack[0].type = HTML;
treebuilder->context.element_stack[0].node = appended;
treebuilder->context.current_node = 0;
-
+ if(err != HUBBUB_REPROCESS) {
+ if(token->data.tag.n_attributes > 0){
+ treebuilder->context.element_stack[0].attributes =
+ (hubbub_attribute *)malloc(sizeof(hubbub_attribute) *
+ token->data.tag.n_attributes);
+ }
+
+ for(i = 0; i < token->data.tag.n_attributes; i++) {
+ copy_attribute(&token->data.tag.attributes[i],
+ &treebuilder->context.element_stack[0].attributes[i]);
+ }
+ treebuilder->context.element_stack[0].n_attributes =
+ token->data.tag.n_attributes;
+ } else {
+ treebuilder->context.element_stack[0].n_attributes =
+ 0;
+ }
/** \todo cache selection algorithm */
treebuilder->context.mode = BEFORE_HEAD;
diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c
index eb82db0..f4ac2ad 100644
--- a/src/treebuilder/in_body.c
+++ b/src/treebuilder/in_body.c
@@ -523,12 +523,49 @@ hubbub_error process_html_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
/** \todo parse error */
+ size_t i;
+ hubbub_attribute *attrs =
+ treebuilder->context.element_stack[0].attributes;
+ size_t n_attrs =
+ treebuilder->context.element_stack[0].n_attributes;
+
+ size_t j;
+ const hubbub_tag *tag = &token->data.tag;
+
+ bool found = 0;
+
+ size_t dummy_len = 0;
+ element_context *stack = treebuilder->context.element_stack;
+
+ /** This O(n^2) algorithm can be easily further reduced in time complexity to improve speed*/
+ for (j = 0; j < tag->n_attributes; j++) {
+ found = 0;
+ for (i = 0; i < n_attrs; i++) {
+ if (hubbub_string_match(attrs[i].name.ptr, attrs[i].name.len,
+ tag->attributes[j].name.ptr,
+ tag->attributes[j].name.len) == true) {
+ found = 1;
+ break;
+ }
+ }
+ if(!found) {
+ stack->n_attributes += 1;
+ stack->attributes =
+ realloc(stack->attributes,
+ stack->n_attributes
+ * sizeof (stack->attributes[0]));
+ copy_attribute(&tag->attributes[j],
+ &stack->attributes[
+ (stack->n_attributes)-1]);
+ dummy_len += 1;
+ }
+ }
return treebuilder->tree_handler->add_attributes(
treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[0].node,
- token->data.tag.attributes,
- token->data.tag.n_attributes);
+ &stack->attributes[stack->n_attributes] - dummy_len,
+ dummy_len);
}
/**
@@ -876,6 +913,7 @@ hubbub_error process_a_in_body(hubbub_treebuilder *treebuilder,
err = formatting_list_append(treebuilder, token->data.tag.ns, A,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
+ token->data.tag.attributes, token->data.tag.n_attributes,
treebuilder->context.current_node);
if (err != HUBBUB_OK) {
hubbub_ns ns;
@@ -929,6 +967,7 @@ hubbub_error process_presentational_in_body(hubbub_treebuilder *treebuilder,
err = formatting_list_append(treebuilder, token->data.tag.ns, type,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
+ token->data.tag.attributes, token->data.tag.n_attributes,
treebuilder->context.current_node);
if (err != HUBBUB_OK) {
hubbub_ns ns;
@@ -995,6 +1034,7 @@ hubbub_error process_nobr_in_body(hubbub_treebuilder *treebuilder,
err = formatting_list_append(treebuilder, token->data.tag.ns, NOBR,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
+ token->data.tag.attributes, token->data.tag.n_attributes,
treebuilder->context.current_node);
if (err != HUBBUB_OK) {
hubbub_ns ns;
@@ -1082,6 +1122,7 @@ hubbub_error process_applet_marquee_object_in_body(
err = formatting_list_append(treebuilder, token->data.tag.ns, type,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
+ token->data.tag.attributes, token->data.tag.n_attributes,
treebuilder->context.current_node);
if (err != HUBBUB_OK) {
hubbub_ns ns;
@@ -1877,6 +1918,8 @@ hubbub_error process_0presentational_in_body(hubbub_treebuilder *treebuilder,
* we insert an entry for clone */
stack[furthest_block + 1].type = entry->details.type;
stack[furthest_block + 1].node = clone_appended;
+ hubbub_attribute *attrs = entry->details.attributes;
+ size_t n_attrs = entry->details.n_attributes;
/* 11 */
err = formatting_list_remove(treebuilder, entry,
@@ -1888,7 +1931,8 @@ hubbub_error process_0presentational_in_body(hubbub_treebuilder *treebuilder,
err = formatting_list_insert(treebuilder,
bookmark.prev, bookmark.next,
- ons, otype, clone_appended, furthest_block + 1);
+ ons, otype, clone_appended, attrs, n_attrs,
+ furthest_block + 1);
if (err != HUBBUB_OK) {
treebuilder->tree_handler->unref_node(
treebuilder->tree_handler->ctx,
@@ -2268,7 +2312,8 @@ hubbub_error aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder,
/* Replace formatting list entry for node with clone */
err = formatting_list_replace(treebuilder, element,
element->details.ns, element->details.type,
- clone, element->stack_index,
+ clone, element->details.attributes,
+ element->details.n_attributes, element->stack_index,
&ons, &otype, &onode, &oindex);
assert(err == HUBBUB_OK);
diff --git a/src/treebuilder/in_row.c b/src/treebuilder/in_row.c
index 3bf9161..0446e6f 100644
--- a/src/treebuilder/in_row.c
+++ b/src/treebuilder/in_row.c
@@ -106,6 +106,8 @@ hubbub_error handle_in_row(hubbub_treebuilder *treebuilder,
token->data.tag.ns, type,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
+ token->data.tag.attributes,
+ token->data.tag.n_attributes,
treebuilder->context.current_node);
if (err != HUBBUB_OK) {
hubbub_ns ns;
diff --git a/src/treebuilder/in_table.c b/src/treebuilder/in_table.c
index 875e8ae..cf61827 100644
--- a/src/treebuilder/in_table.c
+++ b/src/treebuilder/in_table.c
@@ -121,6 +121,8 @@ hubbub_error handle_in_table(hubbub_treebuilder *treebuilder,
token->data.tag.ns, type,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
+ token->data.tag.attributes,
+ token->data.tag.n_attributes,
treebuilder->context.current_node);
if (err != HUBBUB_OK) {
treebuilder->tree_handler->unref_node(
diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h
index 941f94a..f1314bb 100644
--- a/src/treebuilder/internal.h
+++ b/src/treebuilder/internal.h
@@ -57,6 +57,10 @@ typedef struct element_context
* instead of the current node." */
void *node; /**< Node pointer */
+ hubbub_attribute *attributes; /**< The attributes associated with
+ * element*/
+ size_t n_attributes; /**< Number of attributes associated
+ * with the element*/
} element_context;
/**
@@ -170,7 +174,8 @@ bool is_formatting_element(element_type type);
bool is_phrasing_element(element_type type);
hubbub_error element_stack_push(hubbub_treebuilder *treebuilder,
- hubbub_ns ns, element_type type, void *node);
+ hubbub_ns ns, element_type type, void *node,
+ hubbub_attribute *attrs, size_t n_attrs);
hubbub_error element_stack_pop(hubbub_treebuilder *treebuilder,
hubbub_ns *ns, element_type *type, void **node);
hubbub_error element_stack_pop_until(hubbub_treebuilder *treebuilder,
@@ -183,12 +188,12 @@ element_type current_node(hubbub_treebuilder *treebuilder);
element_type prev_node(hubbub_treebuilder *treebuilder);
hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder,
- hubbub_ns ns, element_type type, void *node,
- uint32_t stack_index);
+ hubbub_ns ns, element_type type, void *node, hubbub_attribute *attrs,
+ size_t n_attrs, uint32_t stack_index);
hubbub_error formatting_list_insert(hubbub_treebuilder *treebuilder,
formatting_list_entry *prev, formatting_list_entry *next,
- hubbub_ns ns, element_type type, void *node,
- uint32_t stack_index);
+ hubbub_ns ns, element_type type, void *node, hubbub_attribute *attrs,
+ size_t n_attrs, uint32_t stack_index);
hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder,
formatting_list_entry *entry,
hubbub_ns *ns, element_type *type, void **node,
@@ -196,9 +201,12 @@ hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder,
hubbub_error formatting_list_replace(hubbub_treebuilder *treebuilder,
formatting_list_entry *entry,
hubbub_ns ns, element_type type, void *node,
+ hubbub_attribute *attrs, size_t n_attrs,
uint32_t stack_index,
hubbub_ns *ons, element_type *otype, void **onode,
uint32_t *ostack_index);
+void copy_attribute(hubbub_attribute *source,
+ hubbub_attribute *sink);
/* in_foreign_content.c */
void adjust_mathml_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag);
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 11ff2a2..32d7932 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -5,6 +5,8 @@
* Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
*/
+#define _GNU_SOURCE
+
#include <assert.h>
#include <string.h>
@@ -137,6 +139,8 @@ hubbub_error hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
* if the first item in the stack is in use. Assert this here. */
assert(HTML != 0);
tb->context.element_stack[0].type = (element_type) 0;
+ tb->context.element_stack[0].attributes = NULL;
+ tb->context.element_stack[0].n_attributes = 0;
tb->context.strip_leading_lr = false;
tb->context.frameset_ok = true;
@@ -687,7 +691,9 @@ hubbub_error reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
goto cleanup;
error = element_stack_push(treebuilder, entry->details.ns,
- entry->details.type, appended);
+ entry->details.type, appended,
+ entry->details.attributes,
+ entry->details.n_attributes);
if (error != HUBBUB_OK) {
remove_node_from_dom(treebuilder, appended);
@@ -716,8 +722,9 @@ hubbub_error reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
error = formatting_list_replace(treebuilder, entry,
entry->details.ns, entry->details.type,
- node, sp,
- &prev_ns, &prev_type, &prev_node,
+ node, entry->details.attributes,
+ entry->details.n_attributes,
+ sp, &prev_ns, &prev_type, &prev_node,
&prev_stack_index);
/* Cannot fail. Ensure this. */
assert(error == HUBBUB_OK);
@@ -880,7 +887,8 @@ hubbub_error insert_element(hubbub_treebuilder *treebuilder,
if (push) {
error = element_stack_push(treebuilder,
- tag->ns, type, appended);
+ tag->ns, type, appended,
+ tag->attributes, tag->n_attributes);
if (error != HUBBUB_OK) {
remove_node_from_dom(treebuilder, appended);
@@ -1154,9 +1162,11 @@ bool is_form_associated(element_type type)
* \return HUBBUB_OK on success, appropriate error otherwise.
*/
hubbub_error element_stack_push(hubbub_treebuilder *treebuilder,
- hubbub_ns ns, element_type type, void *node)
+ hubbub_ns ns, element_type type, void *node,
+ hubbub_attribute *attrs, size_t n_attrs)
{
uint32_t slot = treebuilder->context.current_node + 1;
+ size_t i;
if (slot >= treebuilder->context.stack_alloc) {
element_context *temp = realloc(
@@ -1176,6 +1186,16 @@ hubbub_error element_stack_push(hubbub_treebuilder *treebuilder,
treebuilder->context.element_stack[slot].type = type;
treebuilder->context.element_stack[slot].node = node;
+ if(n_attrs > 0) {
+ treebuilder->context.element_stack[slot].attributes =
+ (hubbub_attribute *)malloc(n_attrs * sizeof(hubbub_attribute));
+ }
+ for (i = 0; i < n_attrs; i++) {
+ copy_attribute(&attrs[i],
+ &treebuilder->context.element_stack[slot].attributes[i]);
+ }
+ treebuilder->context.element_stack[slot].n_attributes = n_attrs;
+
treebuilder->context.current_node = slot;
return HUBBUB_OK;
@@ -1371,11 +1391,14 @@ element_type prev_node(hubbub_treebuilder *treebuilder)
*/
hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder,
hubbub_ns ns, element_type type, void *node,
+ hubbub_attribute *attrs, size_t n_attrs,
uint32_t stack_index)
{
formatting_list_entry *entry;
uint32_t n_elements = 0;
formatting_list_entry *remove_entry;
+ size_t i;
+
for (entry = treebuilder->context.formatting_list_end;
entry != NULL; entry = entry->prev) {
/* Assumption: HTML and TABLE elements are not in the list */
@@ -1410,6 +1433,15 @@ hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder,
entry->details.type = type;
entry->details.node = node;
entry->stack_index = stack_index;
+ if(n_attrs > 0) {
+ entry->details.attributes =
+ (hubbub_attribute *)malloc(n_attrs * sizeof(hubbub_attribute));
+ }
+ for(i = 0; i < n_attrs; i++) {
+ copy_attribute(&attrs[i],
+ &entry->details.attributes[i]);
+ }
+ entry->details.n_attributes = n_attrs;
entry->prev = treebuilder->context.formatting_list_end;
entry->next = NULL;
@@ -1439,9 +1471,11 @@ hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder,
hubbub_error formatting_list_insert(hubbub_treebuilder *treebuilder,
formatting_list_entry *prev, formatting_list_entry *next,
hubbub_ns ns, element_type type, void *node,
+ hubbub_attribute *attrs, size_t n_attrs,
uint32_t stack_index)
{
formatting_list_entry *entry;
+ size_t i;
if (prev != NULL) {
assert(prev->next == next);
@@ -1459,6 +1493,15 @@ hubbub_error formatting_list_insert(hubbub_treebuilder *treebuilder,
entry->details.type = type;
entry->details.node = node;
entry->stack_index = stack_index;
+ entry->details.n_attributes = n_attrs;
+ if(n_attrs > 0) {
+ entry->details.attributes = (hubbub_attribute *)
+ malloc(n_attrs * sizeof(hubbub_attribute));
+ }
+ for(i = 0;i < n_attrs; i++) {
+ copy_attribute(&attrs[i],
+ &entry->details.attributes[i]);
+ }
entry->prev = prev;
entry->next = next;
@@ -1530,12 +1573,13 @@ hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder,
hubbub_error formatting_list_replace(hubbub_treebuilder *treebuilder,
formatting_list_entry *entry,
hubbub_ns ns, element_type type, void *node,
+ hubbub_attribute *attrs, size_t n_attrs,
uint32_t stack_index,
hubbub_ns *ons, element_type *otype, void **onode,
uint32_t *ostack_index)
{
UNUSED(treebuilder);
-
+ size_t i = 0;
*ons = entry->details.ns;
*otype = entry->details.type;
*onode = entry->details.node;
@@ -1544,11 +1588,36 @@ hubbub_error formatting_list_replace(hubbub_treebuilder *treebuilder,
entry->details.ns = ns;
entry->details.type = type;
entry->details.node = node;
+ entry->details.n_attributes = n_attrs;
+ if(n_attrs > 0) {
+ entry->details.attributes =
+ (hubbub_attribute *)malloc(n_attrs * sizeof(hubbub_attribute));
+ }
+ for(i = 0;i < n_attrs;i++) {
+ copy_attribute(&attrs[i],
+ &entry->details.attributes[i]);
+ }
entry->stack_index = stack_index;
return HUBBUB_OK;
}
+void copy_attribute(hubbub_attribute *source,
+ hubbub_attribute *sink) {
+
+ sink->ns = source->ns;
+
+ sink->name.ptr = (const uint8_t *) strndup(
+ (const char *) source->name.ptr,
+ source->name.len);
+ sink->name.len = source->name.len;
+
+ sink->value.ptr = (const uint8_t *) strndup(
+ (const char *) source->value.ptr,
+ source->value.len);
+ sink->value.len = source->value.len;
+ return;
+}
#ifndef NDEBUG
diff --git a/test/data/tree-construction/INDEX b/test/data/tree-construction/INDEX
index c8412aa..8a677b8 100644
--- a/test/data/tree-construction/INDEX
+++ b/test/data/tree-construction/INDEX
@@ -30,7 +30,7 @@ isindex.dat html5lib treebuilder tests for isindex element
#tests10.dat NA
#tests11.dat NA
#tests12.dat NA
-#tests14.dat NA
+tests14.dat html5lib treebuilder tests
#tests15.dat NA
#tests16.dat NA
#tests17.dat NA