/* * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license * Copyright 2004 John M Bell * Copyright 2004 James Bursa */ /** \file * Save HTML document with dependencies (implementation). */ #define _GNU_SOURCE /* for strndup */ #include #include #include #include #include #include #include "libxml/HTMLtree.h" #include "libxml/parserInternals.h" #include "oslib/osfile.h" #include "netsurf/utils/config.h" #include "netsurf/content/content.h" #include "netsurf/css/css.h" #include "netsurf/render/box.h" #include "netsurf/riscos/gui.h" #include "netsurf/riscos/save_complete.h" #include "netsurf/utils/log.h" #include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" #ifdef WITH_SAVE_COMPLETE regex_t save_complete_import_re; /** An entry in save_complete_list. */ struct save_complete_entry { struct content *content; struct save_complete_entry *next; /**< Next entry in list */ }; /** List of urls seen and saved so far. */ static struct save_complete_entry *save_complete_list = 0; static bool save_complete_html(struct content *c, const char *path, bool index); static bool save_imported_sheets(struct content *c, const char *path); static char * rewrite_stylesheet_urls(const char *source, unsigned int size, int *osize, const char *base); static bool rewrite_document_urls(xmlDoc *doc, const char *base); static bool rewrite_urls(xmlNode *n, const char *base); static bool rewrite_url(xmlNode *n, const char *attr, const char *base); static bool save_complete_list_add(struct content *content); static struct content * save_complete_list_find(const char *url); static bool save_complete_list_check(struct content *content); static void save_complete_list_dump(void); /** * Save an HTML page with all dependencies. * * \param c CONTENT_HTML to save * \param path directory to save to (must exist) * \return true on success, false on error and error reported */ bool save_complete(struct content *c, const char *path) { bool result; result = save_complete_html(c, path, true); /* free save_complete_list */ while (save_complete_list) { struct save_complete_entry *next = save_complete_list->next; free(save_complete_list); save_complete_list = next; } return result; } /** * Save an HTML page with all dependencies, recursing through imported pages. * * \param c CONTENT_HTML to save * \param path directory to save to (must exist) * \param index true to save as "index" * \return true on success, false on error and error reported */ bool save_complete_html(struct content *c, const char *path, bool index) { char spath[256]; unsigned int i; htmlParserCtxtPtr parser; os_error *error; if (c->type != CONTENT_HTML) return false; if (save_complete_list_check(c)) return true; /* save stylesheets, ignoring the base and adblocking sheets */ for (i = STYLESHEET_STYLE; i != c->data.html.stylesheet_count; i++) { struct content *css = c->data.html.stylesheet_content[i]; char *source; int source_len; if (!css) continue; if (save_complete_list_check(css)) continue; if (!save_complete_list_add(css)) { warn_user("NoMemory", 0); return false; } if (!save_imported_sheets(css, path)) return false; if (i == STYLESHEET_STYLE) continue; /* don't save