/* * Copyright 2005 James Bursa * Copyright 2003 Phil Mellor * Copyright 2005 John M Bell * Copyright 2006 Richard Wilson * Copyright 2008 Michael Drake * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** \file * Conversion of XML tree to box tree (implementation). */ #include #include #include #include #include #include #include #include #include #include "utils/config.h" #include "content/content_protected.h" #include "css/css.h" #include "css/utils.h" #include "css/select.h" #include "desktop/options.h" #include "render/box.h" #include "render/form.h" #include "render/html_internal.h" #include "utils/locale.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/schedule.h" #include "utils/talloc.h" #include "utils/url.h" #include "utils/utils.h" /** * Context for box tree construction */ struct box_construct_ctx { html_content *content; /**< Content we're constructing for */ xmlNode *n; /**< Current node to process */ struct box *root_box; /**< Root box in the tree */ box_construct_complete_cb cb; /**< Callback to invoke on completion */ }; /** * Transient properties for construction of current node */ struct box_construct_props { /** Style from which to inherit, or NULL if none */ const css_computed_style *parent_style; /** Current link target, or NULL if none */ nsurl *href; /** Current frame target, or NULL if none */ const char *target; /** Current title attribute, or NULL if none */ const char *title; /** Identity of the current block-level container */ struct box *containing_block; /** Current container for inlines, or NULL if none * \note If non-NULL, will be the last child of containing_block */ struct box *inline_container; /** Whether the current node is the root of the DOM tree */ bool node_is_root; }; static const content_type image_types = CONTENT_IMAGE; /* the strings are not important, since we just compare the pointers */ const char *TARGET_SELF = "_self"; const char *TARGET_PARENT = "_parent"; const char *TARGET_TOP = "_top"; const char *TARGET_BLANK = "_blank"; static void convert_xml_to_box(struct box_construct_ctx *ctx); static bool box_construct_element(struct box_construct_ctx *ctx, bool *convert_children); static void box_construct_element_after(xmlNode *n, html_content *content); static bool box_construct_text(struct box_construct_ctx *ctx); static css_select_results * box_get_style(html_content *c, const css_computed_style *parent_style, xmlNode *n); static void box_text_transform(char *s, unsigned int len, enum css_text_transform_e tt); #define BOX_SPECIAL_PARAMS xmlNode *n, html_content *content, \ struct box *box, bool *convert_children static bool box_a(BOX_SPECIAL_PARAMS); static bool box_body(BOX_SPECIAL_PARAMS); static bool box_br(BOX_SPECIAL_PARAMS); static bool box_image(BOX_SPECIAL_PARAMS); static bool box_textarea(BOX_SPECIAL_PARAMS); static bool box_select(BOX_SPECIAL_PARAMS); static bool box_input(BOX_SPECIAL_PARAMS); static bool box_input_text(BOX_SPECIAL_PARAMS, bool password); static bool box_button(BOX_SPECIAL_PARAMS); static bool box_frameset(BOX_SPECIAL_PARAMS); static bool box_create_frameset(struct content_html_frames *f, xmlNode *n, html_content *content); static bool box_select_add_option(struct form_control *control, xmlNode *n); static bool box_object(BOX_SPECIAL_PARAMS); static bool box_embed(BOX_SPECIAL_PARAMS); static bool box_pre(BOX_SPECIAL_PARAMS); static bool box_iframe(BOX_SPECIAL_PARAMS); static bool box_get_attribute(xmlNode *n, const char *attribute, void *context, char **value); static struct frame_dimension *box_parse_multi_lengths(const char *s, unsigned int *count); /* element_table must be sorted by name */ struct element_entry { char name[10]; /* element type */ bool (*convert)(BOX_SPECIAL_PARAMS); }; static const struct element_entry element_table[] = { {"a", box_a}, {"body", box_body}, {"br", box_br}, {"button", box_button}, {"embed", box_embed}, {"frameset", box_frameset}, {"iframe", box_iframe}, {"image", box_image}, {"img", box_image}, {"input", box_input}, {"object", box_object}, {"pre", box_pre}, {"select", box_select}, {"textarea", box_textarea} }; #define ELEMENT_TABLE_COUNT (sizeof(element_table) / sizeof(element_table[0])) /** * Construct a box tree from an xml tree and stylesheets. * * \param n xml tree * \param c content of type CONTENT_HTML to construct box tree in * \param cb callback to report conversion completion * \return true on success, false on memory exhaustion */ bool xml_to_box(xmlNode *n, html_content *c, box_construct_complete_cb cb) { struct box_construct_ctx *ctx; ctx = malloc(sizeof(*ctx)); if (ctx == NULL) return false; ctx->content = c; ctx->n = n; ctx->root_box = NULL; ctx->cb = cb; schedule(0, (schedule_callback_fn) convert_xml_to_box, ctx); return true; } /* mapping from CSS display to box type * this table must be in sync with libcss' css_display enum */ static const box_type box_map[] = { 0, /*CSS_DISPLAY_INHERIT,*/ BOX_INLINE, /*CSS_DISPLAY_INLINE,*/ BOX_BLOCK, /*CSS_DISPLAY_BLOCK,*/ BOX_BLOCK, /*CSS_DISPLAY_LIST_ITEM,*/ BOX_INLINE, /*CSS_DISPLAY_RUN_IN,*/ BOX_INLINE_BLOCK, /*CSS_DISPLAY_INLINE_BLOCK,*/ BOX_TABLE, /*CSS_DISPLAY_TABLE,*/ BOX_TABLE, /*CSS_DISPLAY_INLINE_TABLE,*/ BOX_TABLE_ROW_GROUP, /*CSS_DISPLAY_TABLE_ROW_GROUP,*/ BOX_TABLE_ROW_GROUP, /*CSS_DISPLAY_TABLE_HEADER_GROUP,*/ BOX_TABLE_ROW_GROUP, /*CSS_DISPLAY_TABLE_FOOTER_GROUP,*/ BOX_TABLE_ROW, /*CSS_DISPLAY_TABLE_ROW,*/ BOX_NONE, /*CSS_DISPLAY_TABLE_COLUMN_GROUP,*/ BOX_NONE, /*CSS_DISPLAY_TABLE_COLUMN,*/ BOX_TABLE_CELL, /*CSS_DISPLAY_TABLE_CELL,*/ BOX_INLINE, /*CSS_DISPLAY_TABLE_CAPTION,*/ BOX_NONE /*CSS_DISPLAY_NONE*/ }; static inline struct box *box_for_node(const xmlNode *n) { return ((binding_private *) n->_private)->box; } static inline bool box_is_root(const xmlNode *n) { return n->parent == NULL || n->parent->type == XML_HTML_DOCUMENT_NODE; } /** * Find the next node in the DOM tree, completing * element construction where appropriate. * * \param n Current node * \param content Containing content * \param convert_children Whether to consider children of \a n * \return Next node to process, or NULL if complete */ static xmlNode *next_node(xmlNode *n, html_content *content, bool convert_children) { xmlNode *next = NULL; if (convert_children && n->children != NULL) { next = n->children; } else if (n->next != NULL) { if (box_for_node(n) != NULL) box_construct_element_after(n, content); next = n->next; } else { if (box_for_node(n) != NULL) box_construct_element_after(n, content); while (box_is_root(n) == false && n->parent->next == NULL) { n = n->parent; if (box_for_node(n) != NULL) box_construct_element_after(n, content); } if (box_is_root(n) == false) { if (box_for_node(n->parent) != NULL) { box_construct_element_after(n->parent, content); } next = n->parent->next; } } return next; } /** * Convert an ELEMENT node to a box tree fragment, * then schedule conversion of the next ELEMENT node */ void convert_xml_to_box(struct box_construct_ctx *ctx) { xmlNode *next; bool convert_children = true; assert(ctx->n != NULL); assert(ctx->n->type == XML_ELEMENT_NODE); if (box_construct_element(ctx, &convert_children) == false) { ctx->cb(ctx->content, false); free(ctx); return; } /* Find next element to process, converting text nodes as we go */ next = next_node(ctx->n, ctx->content, convert_children); while (next != NULL && next->type != XML_ELEMENT_NODE) { if (next->type == XML_TEXT_NODE) { ctx->n = next; if (box_construct_text(ctx) == false) { ctx->cb(ctx->content, false); free(ctx); return; } } next = next_node(next, ctx->content, true); } ctx->n = next; if (next != NULL) { /* More work to do: schedule a continuation */ schedule(0, (schedule_callback_fn) convert_xml_to_box, ctx); } else { /* Conversion complete */ struct box root; memset(&root, 0, sizeof(root)); root.type = BOX_BLOCK; root.children = root.last = ctx->root_box; root.children->parent = &root; /** \todo Remove box_normalise_block */ if (box_normalise_block(&root, ctx->content) == false) { ctx->cb(ctx->content, false); } else { ctx->content->layout = root.children; ctx->content->layout->parent = NULL; ctx->cb(ctx->content, true); } free(ctx); } } /** * Construct a list marker box * * \param box Box to attach marker to * \param title Current title attribute * \param content Containing content * \param parent Current block-level container * \return True on success, false on memory exhaustion */ static bool box_construct_marker(struct box *box, const char *title, html_content *content, struct box *parent) { lwc_string *image_uri; struct box *marker; marker = box_create(NULL, box->style, false, NULL, NULL, title, NULL, content); if (marker == false) return false; marker->type = BOX_BLOCK; /** \todo marker content (list-style-type) */ switch (css_computed_list_style_type(box->style)) { case CSS_LIST_STYLE_TYPE_DISC: /* 2022 BULLET */ marker->text = (char *) "\342\200\242"; marker->length = 3; break; case CSS_LIST_STYLE_TYPE_CIRCLE: /* 25CB WHITE CIRCLE */ marker->text = (char *) "\342\227\213"; marker->length = 3; break; case CSS_LIST_STYLE_TYPE_SQUARE: /* 25AA BLACK SMALL SQUARE */ marker->text = (char *) "\342\226\252"; marker->length = 3; break; case CSS_LIST_STYLE_TYPE_DECIMAL: case CSS_LIST_STYLE_TYPE_LOWER_ALPHA: case CSS_LIST_STYLE_TYPE_LOWER_ROMAN: case CSS_LIST_STYLE_TYPE_UPPER_ALPHA: case CSS_LIST_STYLE_TYPE_UPPER_ROMAN: default: if (parent->last) { struct box *last = parent->last; /* Drill down into last child of parent * to find the list marker (if any) * * Floated list boxes end up as: * * parent * BOX_INLINE_CONTAINER * BOX_FLOAT_{LEFT,RIGHT} * BOX_BLOCK <-- list box * ... */ while (last != NULL) { if (last->list_marker != NULL) break; last = last->last; } if (last && last->list_marker) { marker->rows = last->list_marker->rows + 1; } } marker->text = talloc_array(content, char, 20); if (marker->text == NULL) return false; snprintf(marker->text, 20, "%u.", marker->rows); marker->length = strlen(marker->text); break; case CSS_LIST_STYLE_TYPE_NONE: marker->text = 0; marker->length = 0; break; } if (css_computed_list_style_image(box->style, &image_uri) == CSS_LIST_STYLE_IMAGE_URI && image_uri != NULL) { nsurl *url; nserror error; /* TODO: we get a url out of libcss as a lwc string, but * earlier we already had it as a nsurl after we * nsurl_joined it. Can this be improved? * For now, just making another nsurl. */ error = nsurl_create(lwc_string_data(image_uri), &url); if (error != NSERROR_OK) return false; if (html_fetch_object(content, url, marker, image_types, content->base.available_width, 1000, false) == false) { nsurl_unref(url); return false; } nsurl_unref(url); } box->list_marker = marker; marker->parent = box; return true; } /** * Construct the box required for a generated element. * * \param n XML node of type XML_ELEMENT_NODE * \param content Content of type CONTENT_HTML that is being processed * \param box Box which may have generated content * \param style Complete computed style for pseudo element, or NULL * * TODO: * This is currently incomplete. It just does enough to support the clearfix * hack. ( http://www.positioniseverything.net/easyclearing.html ) */ static void box_construct_generate(xmlNode *n, html_content *content, struct box *box, const css_computed_style *style) { struct box *gen = NULL; const css_computed_content_item *c_item; /* Nothing to generate if the parent box is not a block */ if (box->type != BOX_BLOCK) return; /* To determine if an element has a pseudo element, we select * for it and test to see if the returned style's content * property is set to normal. */ if (style == NULL || css_computed_content(style, &c_item) == CSS_CONTENT_NORMAL) { /* No pseudo element */ return; } /* create box for this element */ if (css_computed_display(style, box_is_root(n)) == CSS_DISPLAY_BLOCK) { /* currently only support block level elements */ /** \todo Not wise to drop const from the computed style */ gen = box_create(NULL, (css_computed_style *) style, false, NULL, NULL, NULL, NULL, content); if (gen == NULL) { return; } /* set box type from computed display */ gen->type = box_map[css_computed_display( style, box_is_root(n))]; box_add_child(box, gen); } } /** * Extract transient construction properties * * \param n Current DOM node to convert * \param props Property object to populate */ static void box_extract_properties(const xmlNode *n, struct box_construct_props *props) { memset(props, 0, sizeof(*props)); props->node_is_root = box_is_root(n); /* Extract properties from containing DOM node */ if (props->node_is_root == false) { struct box *parent_box; /* Find ancestor node containing parent box */ while (n->parent != NULL && box_for_node(n->parent) == NULL) n = n->parent; parent_box = box_for_node(n->parent); props->parent_style = parent_box->style; props->href = parent_box->href; props->target = parent_box->target; props->title = parent_box->title; /* Find containing block (may be parent) */ for (n = n->parent; n != NULL; n = n->parent) { struct box *b = box_for_node(n); /* Children of nodes that created an inline box * will generate boxes which are attached as * _siblings_ of the box generated for their * parent node. Note, however, that we'll still * use the parent node's styling as the parent * style, above. */ if (b != NULL && b->type != BOX_INLINE && b->type != BOX_BR) { props->containing_block = b; break; } } } /* Compute current inline container, if any */ if (props->containing_block != NULL && props->containing_block->last != NULL && props->containing_block->last->type == BOX_INLINE_CONTAINER) props->inline_container = props->containing_block->last; } /** * Construct the box tree for an XML element. * * \param ctx Tree construction context * \param convert_children Whether to convert children * \return true on success, false on memory exhaustion */ bool box_construct_element(struct box_construct_ctx *ctx, bool *convert_children) { xmlChar *title0, *s; char *id = NULL; struct box *box = NULL; css_select_results *styles = NULL; struct element_entry *element; lwc_string *bgimage_uri; struct box_construct_props props; assert(ctx->n != NULL); assert(ctx->n->type == XML_ELEMENT_NODE); box_extract_properties(ctx->n, &props); if (props.containing_block != NULL) { /* In case the containing block is a pre block, we clear * the PRE_STRIP flag since it is not used if we follow * the pre with a tag */ props.containing_block->flags &= ~PRE_STRIP; } styles = box_get_style(ctx->content, props.parent_style, ctx->n); if (styles == NULL) return false; /* Extract title attribute, if present */ if ((title0 = xmlGetProp(ctx->n, (const xmlChar *) "title")) != NULL) { char *t = squash_whitespace((char *) title0); xmlFree(title0); if (t == NULL) return false; props.title = talloc_strdup(ctx->content, t); free(t); if (props.title == NULL) return false; } /* Extract id attribute, if present */ if (box_get_attribute(ctx->n, "id", ctx->content, &id) == false) return false; box = box_create(styles, styles->styles[CSS_PSEUDO_ELEMENT_NONE], false, props.href, props.target, props.title, id, ctx->content); if (box == NULL) return false; /* If this is the root box, add it to the context */ if (props.node_is_root) ctx->root_box = box; /* Deal with colspan/rowspan */ if ((s = xmlGetProp(ctx->n, (const xmlChar *) "colspan")) != NULL) { if ('0' <= s[0] && s[0] <= '9') box->columns = strtol((char *) s, NULL, 10); xmlFree(s); } if ((s = xmlGetProp(ctx->n, (const xmlChar *) "rowspan")) != NULL) { if ('0' <= s[0] && s[0] <= '9') box->rows = strtol((char *) s, NULL, 10); xmlFree(s); } /* Set box type from computed display */ if ((css_computed_position(box->style) == CSS_POSITION_ABSOLUTE || css_computed_position(box->style) == CSS_POSITION_FIXED) && (css_computed_display_static(box->style) == CSS_DISPLAY_INLINE || css_computed_display_static(box->style) == CSS_DISPLAY_INLINE_BLOCK || css_computed_display_static(box->style) == CSS_DISPLAY_INLINE_TABLE)) { /* Special case for absolute positioning: make absolute inlines * into inline block so that the boxes are constructed in an * inline container as if they were not absolutely positioned. * Layout expects and handles this. */ box->type = box_map[CSS_DISPLAY_INLINE_BLOCK]; } else { /* Normal mapping */ box->type = box_map[css_computed_display(box->style, props.node_is_root)]; } /* Handle the :before pseudo element */ box_construct_generate(ctx->n, ctx->content, box, box->styles->styles[CSS_PSEUDO_ELEMENT_BEFORE]); /* Special elements */ element = bsearch((const char *) ctx->n->name, element_table, ELEMENT_TABLE_COUNT, sizeof(element_table[0]), (int (*)(const void *, const void *)) strcmp); if (element != NULL) { /* A special convert function exists for this element */ if (element->convert(ctx->n, ctx->content, box, convert_children) == false) return false; } if (box->type == BOX_NONE || css_computed_display(box->style, props.node_is_root) == CSS_DISPLAY_NONE) { css_select_results_destroy(styles); box->styles = NULL; box->style = NULL; /* Invalidate associated gadget, if any */ if (box->gadget != NULL) { box->gadget->box = NULL; box->gadget = NULL; } /* Can't do this, because the lifetimes of boxes and gadgets * are inextricably linked. Fortunately, talloc will save us * (for now) */ /* box_free_box(box); */ *convert_children = false; return true; } /* Attach box to DOM node */ ((binding_private *) ctx->n->_private)->box = box; if (props.inline_container == NULL && (box->type == BOX_INLINE || box->type == BOX_BR || box->type == BOX_INLINE_BLOCK || css_computed_float(box->style) == CSS_FLOAT_LEFT || css_computed_float(box->style) == CSS_FLOAT_RIGHT)) { /* Found an inline child of a block without a current container * (i.e. this box is the first child of its parent, or was * preceded by block-level siblings) */ assert(props.containing_block != NULL && "Root box must not be inline or floated"); props.inline_container = box_create(NULL, NULL, false, NULL, NULL, NULL, NULL, ctx->content); if (props.inline_container == NULL) return false; props.inline_container->type = BOX_INLINE_CONTAINER; box_add_child(props.containing_block, props.inline_container); } /* Kick off fetch for any background image */ if (css_computed_background_image(box->style, &bgimage_uri) == CSS_BACKGROUND_IMAGE_IMAGE && bgimage_uri != NULL) { nsurl *url; nserror error; /* TODO: we get a url out of libcss as a lwc string, but * earlier we already had it as a nsurl after we * nsurl_joined it. Can this be improved? * For now, just making another nsurl. */ error = nsurl_create(lwc_string_data(bgimage_uri), &url); if (error != NSERROR_OK) return false; if (html_fetch_object(ctx->content, url, box, image_types, ctx->content->base.available_width, 1000, true) == false) { nsurl_unref(url); return false; } nsurl_unref(url); } if (*convert_children) box->flags |= CONVERT_CHILDREN; if (box->type == BOX_INLINE || box->type == BOX_BR || box->type == BOX_INLINE_BLOCK) { /* Inline container must exist, as we'll have * created it above if it didn't */ assert(props.inline_container != NULL); box_add_child(props.inline_container, box); } else { if (css_computed_display(box->style, props.node_is_root) == CSS_DISPLAY_LIST_ITEM) { /* List item: compute marker */ if (box_construct_marker(box, props.title, ctx->content, props.containing_block) == false) return false; } if (css_computed_float(box->style) == CSS_FLOAT_LEFT || css_computed_float(box->style) == CSS_FLOAT_RIGHT) { /* Float: insert a float between the parent and box. */ struct box *flt = box_create(NULL, NULL, false, props.href, props.target, props.title, NULL, ctx->content); if (flt == NULL) return false; if (css_computed_float(box->style) == CSS_FLOAT_LEFT) flt->type = BOX_FLOAT_LEFT; else flt->type = BOX_FLOAT_RIGHT; box_add_child(props.inline_container, flt); box_add_child(flt, box); } else { /* Non-floated block-level box: add to containing block * if there is one. If we're the root box, then there * won't be. */ if (props.containing_block != NULL) box_add_child(props.containing_block, box); } } return true; } /** * Complete construction of the box tree for an element. * * \param n DOM node to construct for * \param content Containing document * * This will be called after all children of an element have been processed */ void box_construct_element_after(xmlNode *n, html_content *content) { struct box_construct_props props; struct box *box = box_for_node(n); assert(box != NULL); box_extract_properties(n, &props); if (box->type == BOX_INLINE || box->type == BOX_BR) { /* Insert INLINE_END into containing block */ struct box *inline_end; if (n->children == NULL || (box->flags & CONVERT_CHILDREN) == 0) { /* No children, or didn't want children converted */ return; } if (props.inline_container == NULL) { /* Create inline container if we don't have one */ props.inline_container = box_create(NULL, NULL, false, NULL, NULL, NULL, NULL, content); if (props.inline_container == NULL) return; props.inline_container->type = BOX_INLINE_CONTAINER; box_add_child(props.containing_block, props.inline_container); } inline_end = box_create(NULL, box->style, false, box->href, box->target, box->title, box->id, content); if (inline_end != NULL) { inline_end->type = BOX_INLINE_END; assert(props.inline_container != NULL); box_add_child(props.inline_container, inline_end); box->inline_end = inline_end; inline_end->inline_end = box; } } else { /* Handle the :after pseudo element */ box_construct_generate(n, content, box, box->styles->styles[CSS_PSEUDO_ELEMENT_AFTER]); } } /** * Construct the box tree for an XML text node. * * \param ctx Tree construction context * \return true on success, false on memory exhaustion */ bool box_construct_text(struct box_construct_ctx *ctx) { struct box_construct_props props; struct box *box = NULL; assert(ctx->n != NULL); assert(ctx->n->type == XML_TEXT_NODE); box_extract_properties(ctx->n, &props); assert(props.containing_block != NULL); if (css_computed_white_space(props.parent_style) == CSS_WHITE_SPACE_NORMAL || css_computed_white_space(props.parent_style) == CSS_WHITE_SPACE_NOWRAP) { char *text = squash_whitespace((char *) ctx->n->content); if (text == NULL) return false; /* if the text is just a space, combine it with the preceding * text node, if any */ if (text[0] == ' ' && text[1] == 0) { if (props.inline_container != NULL) { assert(props.inline_container->last != NULL); props.inline_container->last->space = UNKNOWN_WIDTH; } free(text); return true; } if (props.inline_container == NULL) { /* Child of a block without a current container * (i.e. this box is the first child of its parent, or * was preceded by block-level siblings) */ props.inline_container = box_create(NULL, NULL, false, NULL, NULL, NULL, NULL, ctx->content); if (props.inline_container == NULL) { free(text); return false; } props.inline_container->type = BOX_INLINE_CONTAINER; box_add_child(props.containing_block, props.inline_container); } /** \todo Dropping const here is not clever */ box = box_create(NULL, (css_computed_style *) props.parent_style, false, props.href, props.target, props.title, NULL, ctx->content); if (box == NULL) { free(text); return false; } box->type = BOX_TEXT; box->text = talloc_strdup(ctx->content, text); free(text); if (box->text == NULL) return false; box->length = strlen(box->text); /* strip ending space char off */ if (box->length > 1 && box->text[box->length - 1] == ' ') { box->space = UNKNOWN_WIDTH; box->length--; } if (css_computed_text_transform(props.parent_style) != CSS_TEXT_TRANSFORM_NONE) box_text_transform(box->text, box->length, css_computed_text_transform( props.parent_style)); if (css_computed_white_space(props.parent_style) == CSS_WHITE_SPACE_NOWRAP) { unsigned int i; for (i = 0; i != box->length && box->text[i] != ' '; ++i) ; /* no body */ if (i != box->length) { /* there is a space in text block and we * want all spaces to be converted to NBSP */ /*box->text = cnv_space2nbsp(text); if (!box->text) { free(text); goto no_memory; } box->length = strlen(box->text);*/ } } box_add_child(props.inline_container, box); if (box->text[0] == ' ') { box->length--; memmove(box->text, &box->text[1], box->length); if (box->prev != NULL) box->prev->space = UNKNOWN_WIDTH; } } else { /* white-space: pre */ char *text = cnv_space2nbsp((char *) ctx->n->content); char *current; enum css_white_space_e white_space = css_computed_white_space(props.parent_style); /* note: pre-wrap/pre-line are unimplemented */ assert(white_space == CSS_WHITE_SPACE_PRE || white_space == CSS_WHITE_SPACE_PRE_LINE || white_space == CSS_WHITE_SPACE_PRE_WRAP); if (text == NULL) return false; if (css_computed_text_transform(props.parent_style) != CSS_TEXT_TRANSFORM_NONE) box_text_transform(text, strlen(text), css_computed_text_transform( props.parent_style)); current = text; /* swallow a single leading new line */ if (props.containing_block->flags & PRE_STRIP) { switch (*current) { case '\n': current++; break; case '\r': current++; if (*current == '\n') current++; break; } props.containing_block->flags &= ~PRE_STRIP; } do { size_t len = strcspn(current, "\r\n"); char old = current[len]; current[len] = 0; if (props.inline_container == NULL) { /* Child of a block without a current container * (i.e. this box is the first child of its * parent, or was preceded by block-level * siblings) */ props.inline_container = box_create(NULL, NULL, false, NULL, NULL, NULL, NULL, ctx->content); if (props.inline_container == NULL) { free(text); return false; } props.inline_container->type = BOX_INLINE_CONTAINER; box_add_child(props.containing_block, props.inline_container); } /** \todo Dropping const isn't clever */ box = box_create(NULL, (css_computed_style *) props.parent_style, false, props.href, props.target, props.title, NULL, ctx->content); if (box == NULL) { free(text); return false; } box->type = BOX_TEXT; box->text = talloc_strdup(ctx->content, current); if (box->text == NULL) { free(text); return false; } box->length = strlen(box->text); box_add_child(props.inline_container, box); current[len] = old; current += len; if (current[0] != '\0') { /* Linebreak: create new inline container */ props.inline_container = box_create(NULL, NULL, false, NULL, NULL, NULL, NULL, ctx->content); if (props.inline_container == NULL) { free(text); return false; } props.inline_container->type = BOX_INLINE_CONTAINER; box_add_child(props.containing_block, props.inline_container); if (current[0] == '\r' && current[1] == '\n') current += 2; else current++; } } while (*current); free(text); } return true; } /** * Get the style for an element. * * \param c content of type CONTENT_HTML that is being processed * \param parent_style style at this point in xml tree, or NULL for root * \param n node in xml tree * \return the new style, or NULL on memory exhaustion */ css_select_results *box_get_style(html_content *c, const css_computed_style *parent_style, xmlNode *n) { char *s; int pseudo_element; css_error error; css_stylesheet *inline_style = NULL; css_select_results *styles; nscss_select_ctx ctx; /* Firstly, construct inline stylesheet, if any */ if ((s = (char *) xmlGetProp(n, (const xmlChar *) "style"))) { inline_style = nscss_create_inline_style( (uint8_t *) s, strlen(s), c->encoding, nsurl_access(content__get_url(&c->base)), c->quirks != BINDING_QUIRKS_MODE_NONE, box_style_alloc, NULL); xmlFree(s); if (inline_style == NULL) return NULL; } /* Populate selection context */ ctx.ctx = c->select_ctx; ctx.quirks = (c->quirks == BINDING_QUIRKS_MODE_FULL); ctx.base_url = c->base_url; /* Select partial style for element */ styles = nscss_get_style(&ctx, n, CSS_MEDIA_SCREEN, inline_style, box_style_alloc, NULL); /* No longer need inline style */ if (inline_style != NULL) css_stylesheet_destroy(inline_style); /* Failed selecting partial style -- bail out */ if (styles == NULL) return NULL; /* If there's a parent style, compose with partial to obtain * complete computed style for element */ if (parent_style != NULL) { /* Complete the computed style, by composing with the parent * element's style */ error = css_computed_style_compose(parent_style, styles->styles[CSS_PSEUDO_ELEMENT_NONE], nscss_compute_font_size, NULL, styles->styles[CSS_PSEUDO_ELEMENT_NONE]); if (error != CSS_OK) { css_select_results_destroy(styles); return NULL; } } for (pseudo_element = CSS_PSEUDO_ELEMENT_NONE + 1; pseudo_element < CSS_PSEUDO_ELEMENT_COUNT; pseudo_element++) { if (pseudo_element == CSS_PSEUDO_ELEMENT_FIRST_LETTER || pseudo_element == CSS_PSEUDO_ELEMENT_FIRST_LINE) /* TODO: Handle first-line and first-letter pseudo * element computed style completion */ continue; if (styles->styles[pseudo_element] == NULL) /* There were no rules concerning this pseudo element */ continue; /* Complete the pseudo element's computed style, by composing * with the base element's style */ error = css_computed_style_compose( styles->styles[CSS_PSEUDO_ELEMENT_NONE], styles->styles[pseudo_element], nscss_compute_font_size, NULL, styles->styles[pseudo_element]); if (error != CSS_OK) { /* TODO: perhaps this shouldn't be quite so * catastrophic? */ css_select_results_destroy(styles); return NULL; } } return styles; } /** * Apply the CSS text-transform property to given text for its ASCII chars. * * \param s string to transform * \param len length of s * \param tt transform type */ void box_text_transform(char *s, unsigned int len, enum css_text_transform_e tt) { unsigned int i; if (len == 0) return; switch (tt) { case CSS_TEXT_TRANSFORM_UPPERCASE: for (i = 0; i < len; ++i) if ((unsigned char) s[i] < 0x80) s[i] = ls_toupper(s[i]); break; case CSS_TEXT_TRANSFORM_LOWERCASE: for (i = 0; i < len; ++i) if ((unsigned char) s[i] < 0x80) s[i] = ls_tolower(s[i]); break; case CSS_TEXT_TRANSFORM_CAPITALIZE: if ((unsigned char) s[0] < 0x80) s[0] = ls_toupper(s[0]); for (i = 1; i < len; ++i) if ((unsigned char) s[i] < 0x80 && ls_isspace(s[i - 1])) s[i] = ls_toupper(s[i]); break; default: break; } } /** * \name Special case element handlers * * These functions are called by box_construct_element() when an element is * being converted, according to the entries in element_table. * * The parameters are the xmlNode, the content for the document, and a partly * filled in box structure for the element. * * Return true on success, false on memory exhaustion. Set *convert_children * to false if children of this element in the XML tree should be skipped (for * example, if they have been processed in some special way already). * * Elements ordered as in the HTML 4.01 specification. Section numbers in * brackets [] refer to the spec. * * \{ */ /** * Document body [7.5.1]. */ bool box_body(BOX_SPECIAL_PARAMS) { css_color color; css_computed_background_color(box->style, &color); if (nscss_color_is_transparent(color)) content->background_colour = NS_TRANSPARENT; else content->background_colour = nscss_color_to_ns(color); return true; } /** * Forced line break [9.3.2]. */ bool box_br(BOX_SPECIAL_PARAMS) { box->type = BOX_BR; return true; } /** * Preformatted text [9.3.4]. */ bool box_pre(BOX_SPECIAL_PARAMS) { box->flags |= PRE_STRIP; return true; } /** * Anchor [12.2]. */ bool box_a(BOX_SPECIAL_PARAMS) { bool ok; nsurl *url; xmlChar *s; if ((s = xmlGetProp(n, (const xmlChar *) "href"))) { ok = box_extract_link((const char *) s, content->base_url, &url); xmlFree(s); if (!ok) return false; if (url) { if (box->href != NULL) nsurl_unref(box->href); box->href = url; } } /* name and id share the same namespace */ if (!box_get_attribute(n, "name", content, &box->id)) return false; /* target frame [16.3] */ if ((s = xmlGetProp(n, (const xmlChar *) "target"))) { if (!strcasecmp((const char *) s, "_blank")) box->target = TARGET_BLANK; else if (!strcasecmp((const char *) s, "_top")) box->target = TARGET_TOP; else if (!strcasecmp((const char *) s, "_parent")) box->target = TARGET_PARENT; else if (!strcasecmp((const char *) s, "_self")) /* the default may have been overridden by a * , so this is different to 0 */ box->target = TARGET_SELF; else { /* 6.16 says that frame names must begin with [a-zA-Z] * This doesn't match reality, so just take anything */ box->target = talloc_strdup(content, (const char *) s); if (!box->target) { xmlFree(s); return false; } } xmlFree(s); } return true; } /** * Embedded image [13.2]. */ bool box_image(BOX_SPECIAL_PARAMS) { bool ok; char *s; nsurl *url; xmlChar *alt, *src; enum css_width_e wtype; enum css_height_e htype; css_fixed value = 0; css_unit wunit = CSS_UNIT_PX; css_unit hunit = CSS_UNIT_PX; if (box->style && css_computed_display(box->style, n->parent == NULL) == CSS_DISPLAY_NONE) return true; /* handle alt text */ if ((alt = xmlGetProp(n, (const xmlChar *) "alt"))) { s = squash_whitespace((const char *) alt); xmlFree(alt); if (!s) return false; box->text = talloc_strdup(content, s); free(s); if (!box->text) return false; box->length = strlen(box->text); } /* imagemap associated with this image */ if (!box_get_attribute(n, "usemap", content, &box->usemap)) return false; if (box->usemap && box->usemap[0] == '#') box->usemap++; /* get image URL */ if (!(src = xmlGetProp(n, (const xmlChar *) "src"))) return true; if (!box_extract_link((char *) src, content->base_url, &url)) return false; xmlFree(src); if (!url) return true; /* start fetch */ ok = html_fetch_object(content, url, box, image_types, content->base.available_width, 1000, false); nsurl_unref(url); wtype = css_computed_width(box->style, &value, &wunit); htype = css_computed_height(box->style, &value, &hunit); if (wtype == CSS_WIDTH_SET && wunit != CSS_UNIT_PCT && htype == CSS_HEIGHT_SET && hunit != CSS_UNIT_PCT) { /* We know the dimensions the image will be shown at before it's * fetched. */ box->flags |= REPLACE_DIM; } return ok; } /** * Destructor for object_params, for elements * * \param b The object params being destroyed. * \return 0 to allow talloc to continue destroying the tree. */ static int box_object_talloc_destructor(struct object_params *o) { if (o->codebase != NULL) nsurl_unref(o->codebase); if (o->classid != NULL) nsurl_unref(o->classid); if (o->data != NULL) nsurl_unref(o->data); return 0; } /** * Generic embedded object [13.3]. */ bool box_object(BOX_SPECIAL_PARAMS) { struct object_params *params; struct object_param *param; xmlChar *codebase, *classid, *data; xmlNode *c; if (box->style && css_computed_display(box->style, n->parent == NULL) == CSS_DISPLAY_NONE) return true; if (!box_get_attribute(n, "usemap", content, &box->usemap)) return false; if (box->usemap && box->usemap[0] == '#') box->usemap++; params = talloc(content, struct object_params); if (!params) return false; talloc_set_destructor(params, box_object_talloc_destructor); params->data = 0; params->type = 0; params->codetype = 0; params->codebase = 0; params->classid = 0; params->params = 0; /* codebase, classid, and data are URLs * (codebase is the base for the other two) */ if ((codebase = xmlGetProp(n, (const xmlChar *) "codebase"))) { if (!box_extract_link((char *) codebase, content->base_url, ¶ms->codebase)) return false; xmlFree(codebase); } if (!params->codebase) params->codebase = nsurl_ref(content->base_url); if ((classid = xmlGetProp(n, (const xmlChar *) "classid"))) { if (!box_extract_link((char *) classid, params->codebase, ¶ms->classid)) return false; xmlFree(classid); } if ((data = xmlGetProp(n, (const xmlChar *) "data"))) { if (!box_extract_link((char *) data, params->codebase, ¶ms->data)) return false; xmlFree(data); } if (!params->classid && !params->data) /* nothing to embed; ignore */ return true; /* Don't include ourself */ if (params->classid && nsurl_compare(content->base_url, params->classid, NSURL_COMPLETE)) return true; if (params->data && nsurl_compare(content->base_url, params->data, NSURL_COMPLETE)) return true; /* codetype and type are MIME types */ if (!box_get_attribute(n, "codetype", params, ¶ms->codetype)) return false; if (!box_get_attribute(n, "type", params, ¶ms->type)) return false; /* classid && !data => classid is used (consult codetype) * (classid || !classid) && data => data is used (consult type) * !classid && !data => invalid; ignored */ if (params->classid != NULL && params->data == NULL && params->codetype != NULL) { lwc_string *icodetype; lwc_error lerror; lerror = lwc_intern_string(params->codetype, strlen(params->codetype), &icodetype); if (lerror != lwc_error_ok) return false; if (content_factory_type_from_mime_type(icodetype) == CONTENT_NONE) { /* can't handle this MIME type */ lwc_string_unref(icodetype); return true; } lwc_string_unref(icodetype); } if (params->data != NULL && params->type != NULL) { lwc_string *itype; lwc_error lerror; lerror = lwc_intern_string(params->type, strlen(params->type), &itype); if (lerror != lwc_error_ok) return false; if (content_factory_type_from_mime_type(itype) == CONTENT_NONE) { /* can't handle this MIME type */ lwc_string_unref(itype); return true; } lwc_string_unref(itype); } /* add parameters to linked list */ for (c = n->children; c; c = c->next) { if (c->type != XML_ELEMENT_NODE) continue; if (strcmp((const char *) c->name, "param") != 0) /* The first non-param child is the start of the alt * html. Therefore, we should break out of this loop. */ break; param = talloc(params, struct object_param); if (!param) return false; param->name = 0; param->value = 0; param->type = 0; param->valuetype = 0; param->next = 0; if (!box_get_attribute(c, "name", param, ¶m->name)) return false; if (!box_get_attribute(c, "value", param, ¶m->value)) return false; if (!box_get_attribute(c, "type", param, ¶m->type)) return false; if (!box_get_attribute(c, "valuetype", param, ¶m->valuetype)) return false; if (!param->valuetype) { param->valuetype = talloc_strdup(param, "data"); if (!param->valuetype) return false; } param->next = params->params; params->params = param; } box->object_params = params; /* start fetch (MIME type is ok or not specified) */ if (!html_fetch_object(content, params->data ? params->data : params->classid, box, CONTENT_ANY, content->base.available_width, 1000, false)) return false; *convert_children = false; return true; } /** * Window subdivision [16.2.1]. */ bool box_frameset(BOX_SPECIAL_PARAMS) { bool ok; if (content->frameset) { LOG(("Error: multiple framesets in document.")); /* Don't convert children */ if (convert_children) *convert_children = false; /* And ignore this spurious frameset */ box->type = BOX_NONE; return true; } content->frameset = talloc_zero(content, struct content_html_frames); if (!content->frameset) return false; ok = box_create_frameset(content->frameset, n, content); if (ok) box->type = BOX_NONE; if (convert_children) *convert_children = false; return ok; } /** * Destructor for content_html_frames, for elements * * \param b The frame params being destroyed. * \return 0 to allow talloc to continue destroying the tree. */ static int box_frames_talloc_destructor(struct content_html_frames *f) { if (f->url != NULL) { nsurl_unref(f->url); f->url = NULL; } return 0; } bool box_create_frameset(struct content_html_frames *f, xmlNode *n, html_content *content) { unsigned int row, col, index, i; unsigned int rows = 1, cols = 1; char *s; nsurl *url; struct frame_dimension *row_height = 0, *col_width = 0; xmlNode *c; struct content_html_frames *frame; bool default_border = true; colour default_border_colour = 0x000000; /* parse rows and columns */ if ((s = (char *) xmlGetProp(n, (const xmlChar *) "rows"))) { row_height = box_parse_multi_lengths(s, &rows); xmlFree(s); if (!row_height) return false; } else { row_height = calloc(1, sizeof(struct frame_dimension)); if (!row_height) return false; row_height->value = 100; row_height->unit = FRAME_DIMENSION_PERCENT; } if ((s = (char *) xmlGetProp(n, (const xmlChar *) "cols"))) { col_width = box_parse_multi_lengths(s, &cols); xmlFree(s); if (!col_width) return false; } else { col_width = calloc(1, sizeof(struct frame_dimension)); if (!col_width) return false; col_width->value = 100; col_width->unit = FRAME_DIMENSION_PERCENT; } /* common extension: border="0|1" to control all children */ if ((s = (char *) xmlGetProp(n, (const xmlChar *) "border"))) { if ((s[0] == '0') && (s[1] == '\0')) default_border = false; xmlFree(s); } /* common extension: frameborder="yes|no" to control all children */ if ((s = (char *) xmlGetProp(n, (const xmlChar *) "frameborder"))) { if (!strcasecmp(s, "no")) default_border = false; xmlFree(s); } /* common extension: bordercolor="#RRGGBB|" to control *all children */ if ((s = (char *) xmlGetProp(n, (const xmlChar *) "bordercolor"))) { css_color color; if (nscss_parse_colour((const char *) s, &color)) default_border_colour = nscss_color_to_ns(color); xmlFree(s); } /* update frameset and create default children */ f->cols = cols; f->rows = rows; f->scrolling = SCROLLING_NO; f->children = talloc_array(content, struct content_html_frames, (rows * cols)); talloc_set_destructor(f->children, box_frames_talloc_destructor); for (row = 0; row < rows; row++) { for (col = 0; col < cols; col++) { index = (row * cols) + col; frame = &f->children[index]; frame->cols = 0; frame->rows = 0; frame->width = col_width[col]; frame->height = row_height[row]; frame->margin_width = 0; frame->margin_height = 0; frame->name = NULL; frame->url = NULL; frame->no_resize = false; frame->scrolling = SCROLLING_AUTO; frame->border = default_border; frame->border_colour = default_border_colour; frame->children = NULL; } } free(col_width); free(row_height); /* create the frameset windows */ c = n->children; for (row = 0; c && row < rows; row++) { for (col = 0; c && col < cols; col++) { while (c && !(c->type == XML_ELEMENT_NODE && ( strcmp((const char *) c->name, "frame") == 0 || strcmp((const char *) c->name, "frameset") == 0 ))) c = c->next; if (!c) break; /* get current frame */ index = (row * cols) + col; frame = &f->children[index]; /* nest framesets */ if (strcmp((const char *) c->name, "frameset") == 0) { frame->border = 0; if (!box_create_frameset(frame, c, content)) return false; c = c->next; continue; } /* get frame URL (not required) */ url = NULL; if ((s = (char *) xmlGetProp(c, (const xmlChar *) "src"))) { box_extract_link(s, content->base_url, &url); xmlFree(s); } /* copy url */ if (url) { /* no self-references */ if (nsurl_compare(content->base_url, url, NSURL_COMPLETE) == false) frame->url = url; url = NULL; } /* fill in specified values */ if ((s = (char *) xmlGetProp(c, (const xmlChar *) "name"))) { frame->name = talloc_strdup(content, s); xmlFree(s); } frame->no_resize = xmlHasProp(c, (const xmlChar *) "noresize") != NULL; if ((s = (char *) xmlGetProp(c, (const xmlChar *) "frameborder"))) { i = atoi(s); frame->border = (i != 0); xmlFree(s); } if ((s = (char *) xmlGetProp(c, (const xmlChar *) "scrolling"))) { if (!strcasecmp(s, "yes")) frame->scrolling = SCROLLING_YES; else if (!strcasecmp(s, "no")) frame->scrolling = SCROLLING_NO; xmlFree(s); } if ((s = (char *) xmlGetProp(c, (const xmlChar *) "marginwidth"))) { frame->margin_width = atoi(s); xmlFree(s); } if ((s = (char *) xmlGetProp(c, (const xmlChar *) "marginheight"))) { frame->margin_height = atoi(s); xmlFree(s); } if ((s = (char *) xmlGetProp(c, (const xmlChar *) "bordercolor"))) { css_color color; if (nscss_parse_colour((const char *) s, &color)) frame->border_colour = nscss_color_to_ns(color); xmlFree(s); } /* advance */ c = c->next; } } return true; } /** * Destructor for content_html_iframe, for