summaryrefslogtreecommitdiff
path: root/riscos/save_text.c
diff options
context:
space:
mode:
Diffstat (limited to 'riscos/save_text.c')
-rw-r--r--riscos/save_text.c130
1 files changed, 130 insertions, 0 deletions
diff --git a/riscos/save_text.c b/riscos/save_text.c
new file mode 100644
index 000000000..c9a3ed2b8
--- /dev/null
+++ b/riscos/save_text.c
@@ -0,0 +1,130 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ * http://www.opensource.org/licenses/gpl-license
+ * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
+ */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "libxml/HTMLtree.h"
+
+#include "oslib/osfile.h"
+
+#include "netsurf/utils/config.h"
+#include "netsurf/content/content.h"
+#include "netsurf/riscos/save_text.h"
+#include "netsurf/utils/log.h"
+#include "netsurf/utils/utils.h"
+
+#ifdef WITH_TEXT_EXPORT
+
+static void extract_text(xmlDoc *doc);
+static void extract_text_from_tree(xmlNode *n);
+
+static char *buffer = 0;
+static int output_size = 0;
+
+void save_as_text(struct content *c, char *path) {
+
+ htmlParserCtxtPtr toSave;
+
+ if (c->type != CONTENT_HTML) {
+ return;
+ }
+
+ /* allocate a buffer the same size as the source
+ * the output is guaranteed to be less than this
+ */
+ buffer = xcalloc(c->source_size, sizeof(char));
+
+ toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
+ htmlParseDocument(toSave);
+
+ extract_text(toSave->myDoc);
+
+ if (output_size > 0) {
+ xosfile_save_stamped(path, 0xfff, (byte*)buffer,
+ (byte*)buffer+output_size);
+ }
+
+ xmlFreeDoc(toSave->myDoc);
+ htmlFreeParserCtxt(toSave);
+ xfree(buffer);
+}
+
+void extract_text(xmlDoc *doc)
+{
+ xmlNode *html;
+
+ /* find the html element */
+ for (html = doc->children;
+ html!=0 && html->type != XML_ELEMENT_NODE;
+ html = html->next)
+ ;
+ if (html == 0 || strcmp((const char*)html->name, "html") != 0) {
+ return;
+ }
+
+ extract_text_from_tree(html);
+}
+
+void extract_text_from_tree(xmlNode *n)
+{
+ xmlNode *this;
+ char *text;
+ int len = 0;
+ int need_nl = 0;
+
+ if (n->type == XML_ELEMENT_NODE) {
+ if (strcmp(n->name, "dl") == 0 ||
+ strcmp(n->name, "h1") == 0 ||
+ strcmp(n->name, "h2") == 0 ||
+ strcmp(n->name, "h3") == 0 ||
+ strcmp(n->name, "ol") == 0 ||
+ strcmp(n->name, "title") == 0 ||
+ strcmp(n->name, "ul") == 0) {
+ need_nl = 2;
+ }
+ else if (strcmp(n->name, "applet") == 0 ||
+ strcmp(n->name, "br") == 0 ||
+ strcmp(n->name, "div") == 0 ||
+ strcmp(n->name, "dt") == 0 ||
+ strcmp(n->name, "h4") == 0 ||
+ strcmp(n->name, "h5") == 0 ||
+ strcmp(n->name, "h6") == 0 ||
+ strcmp(n->name, "li") == 0 ||
+ strcmp(n->name, "object") == 0 ||
+ strcmp(n->name, "p") == 0 ||
+ strcmp(n->name, "tr") == 0) {
+ need_nl = 1;
+ }
+ /* do nothing, we just recurse through these nodes */
+ }
+ else if (n->type == XML_TEXT_NODE) {
+ text = squash_tolat1(n->content);
+ len = strlen(text);
+ strcat(buffer, text);
+ output_size += len;
+ xfree(text);
+ return;
+ }
+ else {
+ return;
+ }
+
+ /* now recurse */
+ for (this = n->children; this != 0; this = this->next) {
+ extract_text_from_tree(this);
+ }
+
+ if (need_nl) {
+ for (len = 0; len != need_nl; len++) {
+ strcat(buffer, "\n");
+ output_size += 1;
+ }
+ }
+}
+
+#endif