/* * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license * Copyright 2004 James Bursa */ /** \file * Content for text/html (implementation). */ #include #include #include #include #include #include "libxml/parserInternals.h" #include "netsurf/utils/config.h" #include "netsurf/content/content.h" #include "netsurf/content/fetch.h" #include "netsurf/content/fetchcache.h" #include "netsurf/desktop/imagemap.h" #ifdef riscos #include "netsurf/desktop/gui.h" #endif #include "netsurf/render/html.h" #include "netsurf/render/layout.h" #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" #include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" #define CHUNK 4096 static void html_convert_css_callback(content_msg msg, struct content *css, void *p1, void *p2, union content_msg_data data); static void html_head(struct content *c, xmlNode *head); static void html_find_stylesheets(struct content *c, xmlNode *head); static void html_object_callback(content_msg msg, struct content *object, void *p1, void *p2, union content_msg_data data); static void html_object_done(struct box *box, struct content *object, bool background); static bool html_object_type_permitted(const content_type type, const content_type *permitted_types); /** * Create a CONTENT_HTML. * * The content_html_data structure is initialized and the HTML parser is * created. */ bool html_create(struct content *c, const char *params[]) { unsigned int i; struct content_html_data *html = &c->data.html; union content_msg_data msg_data; xmlCharEncoding encXML = XML_CHAR_ENCODING_NONE; const char *encStr = NULL; html->encoding = NULL; html->getenc = true; for (i = 0; params[i]; i += 2) { if (strcasecmp(params[i], "charset") == 0) { encXML = xmlParseCharEncoding(params[i + 1]); if (encXML != XML_CHAR_ENCODING_ERROR && encXML != XML_CHAR_ENCODING_NONE) { /* encoding specified - trust the server... */ html->encoding = xstrdup(xmlGetCharEncodingName(encXML)); html->getenc = false; } else { encStr = xstrdup(params[i + 1]); } break; } } html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0, encXML); if (encStr != NULL) { xmlCharEncodingHandlerPtr handler; if ((handler = xmlFindCharEncodingHandler(encStr)) != NULL) { if (xmlSwitchToEncoding(html->parser, handler) == 0) { html->encoding = encStr; html->getenc = false; } else { LOG(("xmlSwitchToEncoding failed for <%s>\n", encStr)); free(encStr); } } else { LOG(("xmlFindCharEncodingHandler() failed for <%s>\n", encStr)); free(encStr); } } html->base_url = xstrdup(c->url); html->base_url = strdup(c->url); html->layout = 0; html->background_colour = TRANSPARENT; html->stylesheet_count = 0; html->stylesheet_content = 0; html->style = 0; html->fonts = 0; html->object_count = 0; html->object = 0; html->imagemaps = 0; html->string_pool = pool_create(8000); html->box_pool = pool_create(sizeof (struct box) * 100); if (!html->parser || !html->base_url || !html->string_pool || !html->box_pool) { htmlFreeParserCtxt(html->parser); free(html->base_url); if (html->string_pool) pool_destroy(html->string_pool); if (html->box_pool) pool_destroy(html->box_pool); msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); warn_user("NoMemory", 0); return false; } return true; } /** * Process data for CONTENT_HTML. * * The data is parsed in chunks of size CHUNK, multitasking in between. */ bool html_process_data(struct content *c, char *data, unsigned int size) { unsigned long x; /* First time through, check if we need to detect the encoding * if so, detect it and reset the parser instance with it. * Do this detection only once. */ if (c->data.html.getenc) { xmlCharEncoding encoding = xmlDetectCharEncoding(data, size); if (encoding != XML_CHAR_ENCODING_ERROR && encoding != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(c->data.html.parser, encoding); c->data.html.encoding = xstrdup(xmlGetCharEncodingName(encoding)); } c->data.html.getenc = false; } for (x = 0; x + CHUNK <= size; x += CHUNK) { htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0); gui_multitask(); } htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0); return true; } /** * Convert a CONTENT_HTML for display. * * The following steps are carried out in order: * * - parsing to an XML tree is completed * - stylesheets are fetched * - the XML tree is converted to a box tree and object fetches are started * - the box tree is laid out * * On exit, the content status will be either CONTENT_STATUS_DONE if the * document is completely loaded or CONTENT_STATUS_READY if objects are still * being fetched. */ bool html_convert(struct content *c, int width, int height) { xmlDoc *document; xmlNode *html, *head; union content_msg_data msg_data; int descendant_width; /* finish parsing */ htmlParseChunk(c->data.html.parser, "", 0, 1); document = c->data.html.parser->myDoc; /*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/ htmlFreeParserCtxt(c->data.html.parser); c->data.html.parser = 0; if (!document) { LOG(("Parsing failed")); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); warn_user("ParsingFail", 0); return false; } /* Last change to pick the Content-Type charset information if the * server didn't send it (or we're reading the HTML from disk) */ if (c->data.html.encoding == NULL && document->encoding != NULL) c->data.html.encoding = xstrdup(document->encoding); /* locate html and head elements */ for (html = document->children; html != 0 && html->type != XML_ELEMENT_NODE; html = html->next) ; if (html == 0 || strcmp((const char *) html->name, "html") != 0) { LOG(("html element not found")); xmlFreeDoc(document); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); warn_user("ParsingFail", 0); return false; } for (head = html->children; head != 0 && head->type != XML_ELEMENT_NODE; head = head->next) ; if (strcmp((const char *) head->name, "head") != 0) { head = 0; LOG(("head element not found")); } if (head != 0) html_head(c, head); /* get stylesheets */ html_find_stylesheets(c, head); /* convert xml tree to box tree */ LOG(("XML to box")); content_set_status(c, messages_get("Processing")); content_broadcast(c, CONTENT_MSG_STATUS, msg_data); xml_to_box(html, c); /*box_dump(c->data.html.layout->children, 0);*/ /* extract image maps - can't do this sensibly in xml_to_box */ imagemap_extract(html, c); /*imagemap_dump(c);*/ /* XML tree not required past this point */ xmlFreeDoc(document); /* layout the box tree */ content_set_status(c, messages_get("Formatting")); content_broadcast(c, CONTENT_MSG_STATUS, msg_data); LOG(("Layout document")); layout_document(c->data.html.layout->children, width, c->data.html.box_pool); /*box_dump(c->data.html.layout->children, 0);*/ descendant_width = c->data.html.layout->children->descendant_x1 - c->data.html.layout->children->descendant_x0; LOG(("Available width: %d, Returned Width: %d, Required width: %d", width, c->data.html.layout->children->width, descendant_width)); if (descendant_width > c->data.html.layout->children->width) c->width = descendant_width; else c->width = c->data.html.layout->children->width; c->height = c->data.html.layout->children->height; if (c->active == 0) { c->status = CONTENT_STATUS_DONE; content_set_status(c, messages_get("Done")); } else { c->status = CONTENT_STATUS_READY; content_set_status(c, messages_get("FetchObjs"), c->active); } return true; } /** * Process elements in . * * \param c content structure * \param head xml node of head element * * The title and base href are extracted if present. */ void html_head(struct content *c, xmlNode *head) { xmlNode *node; c->title = 0; for (node = head->children; node != 0; node = node->next) { if (node->type != XML_ELEMENT_NODE) continue; if (!c->title && strcmp(node->name, "title") == 0) { xmlChar *title = xmlNodeGetContent(node); c->title = squash_whitespace(title); xmlFree(title); } else if (strcmp(node->name, "base") == 0) { char *href = (char *) xmlGetProp(node, (const xmlChar *) "href"); if (href) { char *url = url_normalize(href); if (url) { free(c->data.html.base_url); c->data.html.base_url = url; } xmlFree(href); } } } } /** * Process inline stylesheets and fetch linked stylesheets. * * \param c content structure * \param head xml node of head element, or 0 if none */ void html_find_stylesheets(struct content *c, xmlNode *head) { xmlNode *node, *node2; char *rel, *type, *media, *href, *data, *url; unsigned int i = 2; unsigned int last_active = 0; union content_msg_data msg_data; /* stylesheet 0 is the base style sheet, stylesheet 1 is any