/* * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license * Copyright 2004 James Bursa */ /** \file * Content for text/html (implementation). */ #include #include #include #include #include "libxml/parserInternals.h" #include "netsurf/utils/config.h" #include "netsurf/content/content.h" #include "netsurf/content/fetch.h" #include "netsurf/content/fetchcache.h" #include "netsurf/desktop/imagemap.h" #ifdef riscos #include "netsurf/desktop/gui.h" #endif #include "netsurf/render/html.h" #include "netsurf/render/layout.h" #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" #include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" #define CHUNK 4096 static void html_convert_css_callback(content_msg msg, struct content *css, void *p1, void *p2, const char *error); static void html_head(struct content *c, xmlNode *head); static void html_find_stylesheets(struct content *c, xmlNode *head); static void html_object_callback(content_msg msg, struct content *object, void *p1, void *p2, const char *error); static bool html_object_type_permitted(const content_type type, const content_type *permitted_types); /** * Create a CONTENT_HTML. * * The content_html_data structure is initialized and the HTML parser is * created. */ void html_create(struct content *c, const char *params[]) { unsigned int i; struct content_html_data *html = &c->data.html; html->encoding = XML_CHAR_ENCODING_8859_1; html->getenc = true; for (i = 0; params[i]; i += 2) { if (strcasecmp(params[i], "charset") == 0) { html->encoding = xmlParseCharEncoding(params[i + 1]); html->getenc = false; /* encoding specified - trust the server... */ if (html->encoding == XML_CHAR_ENCODING_ERROR) { html->encoding = XML_CHAR_ENCODING_8859_1; html->getenc = true; } break; } } html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0, html->encoding); html->base_url = xstrdup(c->url); html->layout = 0; html->background_colour = TRANSPARENT; html->stylesheet_count = 0; html->stylesheet_content = 0; html->style = 0; html->fonts = 0; html->object_count = 0; html->object = 0; html->string_pool = pool_create(8000); assert(html->string_pool); html->box_pool = pool_create(sizeof (struct box) * 100); assert(html->box_pool); } /** * Process data for CONTENT_HTML. * * The data is parsed in chunks of size CHUNK, multitasking in between. */ void html_process_data(struct content *c, char *data, unsigned long size) { unsigned long x; LOG(("content %s, size %lu", c->url, size)); /*cache_dump();*/ /* First time through, check if we need to get the encoding * if so, get it and reset the parser instance with it. * if it fails, assume Latin1 */ if (c->data.html.getenc) { c->data.html.encoding = xmlDetectCharEncoding(data, size); if (c->data.html.encoding == XML_CHAR_ENCODING_ERROR || c->data.html.encoding == XML_CHAR_ENCODING_NONE) c->data.html.encoding = XML_CHAR_ENCODING_8859_1; xmlSwitchEncoding(c->data.html.parser, c->data.html.encoding); c->data.html.getenc = false; LOG(("Encoding: %s", xmlGetCharEncodingName(c->data.html.encoding))); } for (x = 0; x + CHUNK <= size; x += CHUNK) { htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0); gui_multitask(); } htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0); } /** * Convert a CONTENT_HTML for display. * * The following steps are carried out in order: * * - parsing to an XML tree is completed * - stylesheets are fetched * - the XML tree is converted to a box tree and object fetches are started * - the box tree is laid out * * On exit, the content status will be either CONTENT_STATUS_DONE if the * document is completely loaded or CONTENT_STATUS_READY if objects are still * being fetched. */ int html_convert(struct content *c, unsigned int width, unsigned int height) { xmlDoc *document; xmlNode *html, *head; /* finish parsing */ htmlParseChunk(c->data.html.parser, "", 0, 1); document = c->data.html.parser->myDoc; /*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/ htmlFreeParserCtxt(c->data.html.parser); c->data.html.parser = 0; if (document == NULL) { LOG(("Parsing failed")); return 1; } /* locate html and head elements */ for (html = document->children; html != 0 && html->type != XML_ELEMENT_NODE; html = html->next) ; if (html == 0 || strcmp((const char *) html->name, "html") != 0) { LOG(("html element not found")); xmlFreeDoc(document); return 1; } for (head = html->children; head != 0 && head->type != XML_ELEMENT_NODE; head = head->next) ; if (strcmp((const char *) head->name, "head") != 0) { head = 0; LOG(("head element not found")); } if (head != 0) html_head(c, head); /* get stylesheets */ html_find_stylesheets(c, head); /* convert xml tree to box tree */ LOG(("XML to box")); sprintf(c->status_message, messages_get("Processing")); content_broadcast(c, CONTENT_MSG_STATUS, 0); xml_to_box(html, c); /*box_dump(c->data.html.layout->children, 0);*/ /* extract image maps - can't do this sensibly in xml_to_box */ imagemap_extract(html, c); /*imagemap_dump(c);*/ /* XML tree not required past this point */ xmlFreeDoc(document); /* layout the box tree */ sprintf(c->status_message, messages_get("Formatting")); content_broadcast(c, CONTENT_MSG_STATUS, 0); LOG(("Layout document")); layout_document(c->data.html.layout->children, width); /*box_dump(c->data.html.layout->children, 0);*/ c->width = c->data.html.layout->children->width; c->height = c->data.html.layout->children->height; if (c->active == 0) { c->status = CONTENT_STATUS_DONE; sprintf(c->status_message, messages_get("Done")); } else { c->status = CONTENT_STATUS_READY; sprintf(c->status_message, messages_get("FetchObjs"), c->active); } return 0; } /** * Process elements in . * * \param c content structure * \param head xml node of head element * * The title and base href are extracted if present. */ void html_head(struct content *c, xmlNode *head) { xmlNode *node; c->title = 0; for (node = head->children; node != 0; node = node->next) { if (node->type != XML_ELEMENT_NODE) continue; if (!c->title && strcmp(node->name, "title") == 0) { xmlChar *title = xmlNodeGetContent(node); c->title = squash_tolat1(title); xmlFree(title); } else if (strcmp(node->name, "base") == 0) { char *href = (char *) xmlGetProp(node, (const xmlChar *) "href"); if (href) { char *url = url_normalize(href); if (url) { free(c->data.html.base_url); c->data.html.base_url = url; } xmlFree(href); } } } } /** * Process inline stylesheets and fetch linked stylesheets. * * \param c content structure * \param head xml node of head element, or 0 if none */ void html_find_stylesheets(struct content *c, xmlNode *head) { xmlNode *node, *node2; char *rel, *type, *media, *href, *data, *url; unsigned int i = 2; unsigned int last_active = 0; /* stylesheet 0 is the base style sheet, stylesheet 1 is any