summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--desktop/save_text.c124
1 files changed, 61 insertions, 63 deletions
diff --git a/desktop/save_text.c b/desktop/save_text.c
index 18e565489..3214713bb 100644
--- a/desktop/save_text.c
+++ b/desktop/save_text.c
@@ -1,7 +1,7 @@
/*
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
- * http://www.opensource.org/licenses/gpl-license
+ * http://www.opensource.org/licenses/gpl-license
* Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
*/
@@ -25,21 +25,21 @@ static FILE *out;
void save_as_text(struct content *c, char *path) {
- htmlParserCtxtPtr toSave;
+ htmlParserCtxtPtr toSave;
if (c->type != CONTENT_HTML) {
return;
}
- out = fopen(path, "w");
- if (!out) return;
+ out = fopen(path, "w");
+ if (!out) return;
toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
htmlParseDocument(toSave);
extract_text(toSave->myDoc);
- fclose(out);
+ fclose(out);
xmlFreeDoc(toSave->myDoc);
htmlFreeParserCtxt(toSave);
@@ -47,71 +47,69 @@ void save_as_text(struct content *c, char *path) {
void extract_text(xmlDoc *doc)
{
- xmlNode *html;
-
- /* find the html element */
- for (html = doc->children;
- html!=0 && html->type != XML_ELEMENT_NODE;
- html = html->next)
- ;
- if (html == 0 || strcmp((const char*)html->name, "html") != 0) {
- return;
- }
-
- extract_text_from_tree(html);
+ xmlNode *html;
+
+ /* find the html element */
+ for (html = doc->children;
+ html!=0 && html->type != XML_ELEMENT_NODE;
+ html = html->next)
+ ;
+ if (html == 0 || strcmp((const char*)html->name, "html") != 0) {
+ return;
+ }
+
+ extract_text_from_tree(html);
}
void extract_text_from_tree(xmlNode *n)
{
- xmlNode *this;
- char *text;
- int len, need_nl = 0;
-
- if (n->type == XML_ELEMENT_NODE) {
- if (strcmp(n->name, "dl") == 0 ||
- strcmp(n->name, "h1") == 0 ||
- strcmp(n->name, "h2") == 0 ||
- strcmp(n->name, "h3") == 0 ||
- strcmp(n->name, "ol") == 0 ||
- strcmp(n->name, "title") == 0 ||
- strcmp(n->name, "ul") == 0) {
- need_nl = 2;
- }
- else if (strcmp(n->name, "applet") == 0 ||
- strcmp(n->name, "br") == 0 ||
- strcmp(n->name, "div") == 0 ||
- strcmp(n->name, "dt") == 0 ||
- strcmp(n->name, "h4") == 0 ||
- strcmp(n->name, "h5") == 0 ||
- strcmp(n->name, "h6") == 0 ||
- strcmp(n->name, "li") == 0 ||
- strcmp(n->name, "object") == 0 ||
- strcmp(n->name, "p") == 0 ||
- strcmp(n->name, "tr") == 0) {
- need_nl = 1;
- }
- /* do nothing, we just recurse through these nodes */
- }
- else if (n->type == XML_TEXT_NODE) {
- text = squash_tolat1(n->content);
- fprintf(out, "%s", text);
- xfree(text);
- return;
- }
- else {
- return;
- }
-
- /* now recurse */
- for (this = n->children; this != 0; this = this->next) {
- extract_text_from_tree(this);
+ xmlNode *this_node;
+ const char *text;
+ int need_nl = 0;
+
+ if (n->type == XML_ELEMENT_NODE) {
+ if (strcmp(n->name, "dl") == 0 ||
+ strcmp(n->name, "h1") == 0 ||
+ strcmp(n->name, "h2") == 0 ||
+ strcmp(n->name, "h3") == 0 ||
+ strcmp(n->name, "ol") == 0 ||
+ strcmp(n->name, "title") == 0 ||
+ strcmp(n->name, "ul") == 0) {
+ need_nl = 2;
+ }
+ else if (strcmp(n->name, "applet") == 0 ||
+ strcmp(n->name, "br") == 0 ||
+ strcmp(n->name, "div") == 0 ||
+ strcmp(n->name, "dt") == 0 ||
+ strcmp(n->name, "h4") == 0 ||
+ strcmp(n->name, "h5") == 0 ||
+ strcmp(n->name, "h6") == 0 ||
+ strcmp(n->name, "li") == 0 ||
+ strcmp(n->name, "object") == 0 ||
+ strcmp(n->name, "p") == 0 ||
+ strcmp(n->name, "tr") == 0) {
+ need_nl = 1;
+ }
+ /* do nothing, we just recurse through these nodes */
+ }
+ else if (n->type == XML_TEXT_NODE) {
+ if ((text = squash_tolat1(n->content)) != NULL) {
+ fputs(text, out);
+ free(text);
+ }
+ return;
+ }
+ else {
+ return;
}
- if (need_nl) {
- for (len = 0; len != need_nl; len++) {
- fprintf(out, "\n");
- }
+ /* now recurse */
+ for (this_node = n->children; this_node != 0; this_node = this_node->next) {
+ extract_text_from_tree(this_node);
}
+
+ while (need_nl--)
+ fputc('\n', out);
}
#endif