From 3286c99dd514b7aa20b23477e17f9b63a4d65bf9 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sun, 26 Jun 2005 01:55:20 +0000 Subject: [project @ 2005-06-26 01:55:20 by jmb] Move acceptable character set determination to form submission time, rather thanat box tree creation time. Use UTF-8 encoding, if specified, else use first specified encoding. Improve use of utf8_to_enc - falling back to document encoding then 8859-1 where appropriate. svn path=/import/netsurf/; revision=1765 --- render/box_construct.c | 34 ++++---------- render/form.c | 118 +++++++++++++++++++++++++++++++++++++++++++------ render/form.h | 6 ++- 3 files changed, 117 insertions(+), 41 deletions(-) diff --git a/render/box_construct.c b/render/box_construct.c index a07869d8f..8bec077f4 100644 --- a/render/box_construct.c +++ b/render/box_construct.c @@ -472,7 +472,6 @@ bool box_construct_element(xmlNode *n, struct content *content, } } - /* fetch any background image for this box */ if (style->background_image.type == CSS_BACKGROUND_IMAGE_URI) { if (!html_fetch_object(content, style->background_image.uri, @@ -832,6 +831,7 @@ struct css_style * box_get_style(struct content *c, value; } } + xmlFree(s); } } @@ -856,6 +856,7 @@ struct css_style * box_get_style(struct content *c, CSS_UNIT_PX; } } + xmlFree(s); } if ((s = (char *) xmlGetProp(n, (const xmlChar *) "vspace"))) { @@ -876,6 +877,7 @@ struct css_style * box_get_style(struct content *c, CSS_UNIT_PX; } } + xmlFree(s); } } @@ -1663,6 +1665,7 @@ bool box_iframe(BOX_SPECIAL_PARAMS) /* start fetch */ ok = html_fetch_object(content, url, box, 0, content->available_width, 1000, false); + free(url); return ok; } @@ -1698,34 +1701,13 @@ bool box_form(BOX_SPECIAL_PARAMS) } /* acceptable encoding(s) for form data */ - if ((charset = (char *) xmlGetProp(n, (const xmlChar *) "accept-charset"))) { - char *comma = strchr(charset, ','); - if (!comma) - /* only one => use it */ - comma = strdup(charset); - else - /* multiple => use first */ - comma = strndup(charset, comma - charset); + charset = (char *) xmlGetProp(n, (const xmlChar *) "accept-charset"); - xmlFree(charset); - charset = comma; - } - else if (content->data.html.encoding) - /* none specified => try document encoding */ - charset = strdup(content->data.html.encoding); - else - /* none specified and no document encoding => 8859-1 */ - charset = strdup("ISO-8859-1"); - - if (!charset) { - xmlFree(action); - return false; - } - - form = form_new(action, fmethod, charset); + form = form_new(action, fmethod, charset, + content->data.html.encoding); if (!form) { xmlFree(action); - free(charset); + xmlFree(charset); return false; } form->prev = content->data.html.forms; diff --git a/render/form.c b/render/form.c index 6e213cb7c..e20fb658f 100644 --- a/render/form.c +++ b/render/form.c @@ -4,6 +4,7 @@ * http://www.opensource.org/licenses/gpl-license * Copyright 2004 James Bursa * Copyright 2003 Phil Mellor + * Copyright 2005 John M Bell */ /** \file @@ -11,6 +12,7 @@ */ #include +#include #include #include #include @@ -23,17 +25,20 @@ static char *form_textarea_value(struct form_control *textarea); +static char *form_acceptable_charset(struct form *form); /** * Create a struct form. * * \param action URL to submit form to, used directly (not copied) * \param method method and enctype - * \param charset characterset of form (not copied) + * \param charset acceptable charactersets for form submission (not copied) + * \param doc_charset characterset of containing document (not copied) * \return a new structure, or 0 on memory exhaustion */ -struct form *form_new(char *action, form_method method, char *charset) +struct form *form_new(char *action, form_method method, char *charset, + char *doc_charset) { struct form *form; @@ -42,7 +47,8 @@ struct form *form_new(char *action, form_method method, char *charset) return 0; form->action = action; form->method = method; - form->charset = charset; + form->accept_charsets = charset; + form->document_charset = doc_charset; form->controls = 0; form->last_control = 0; form->prev = 0; @@ -83,6 +89,9 @@ struct form_control *form_new_control(form_control_type type) /** * Add a control to the list of controls in a form. + * + * \param form The form to add the control to + * \param control The control to add */ void form_add_control(struct form *form, struct form_control *control) @@ -485,6 +494,7 @@ char *form_url_encode(struct form *form, { char *name, *value, *n_temp, *v_temp; char *s = malloc(1), *s2; + char *charset; unsigned int len = 0, len1; utf8_convert_ret err; @@ -492,23 +502,37 @@ char *form_url_encode(struct form *form, return 0; s[0] = 0; + charset = form_acceptable_charset(form); + if (!charset) + return 0; + for (; control; control = control->next) { - /** \todo fallback to document encoding or 8859-1 as - * last resort. - * What would also be an improvement would be to choose - * an encoding acceptable by the server which covers as much - * of the input values as possible. Additionally, we need to - * handle the case where none of the acceptable encodings - * cover all the textual input values. - */ - err = utf8_to_enc(control->name, form->charset, 0, &n_temp); + err = utf8_to_enc(control->name, charset, 0, &n_temp); + if (err == UTF8_CONVERT_BADENC) { + /* charset not understood, try document charset */ + err = utf8_to_enc(control->name, + form->document_charset, 0, &n_temp); + if (err == UTF8_CONVERT_BADENC) + /* that also failed, use 8859-1 */ + err = utf8_to_enc(control->name, + "ISO-8859-1", 0, &n_temp); + } if (err != UTF8_CONVERT_OK) { + free(charset); free(s); return 0; } - err = utf8_to_enc(control->value, form->charset, 0, &v_temp); + err = utf8_to_enc(control->value, charset, 0, &v_temp); + if (err == UTF8_CONVERT_BADENC) { + err = utf8_to_enc(control->value, + form->document_charset, 0, &v_temp); + if (err == UTF8_CONVERT_BADENC) + err = utf8_to_enc(control->value, + "ISO-8859-1", 0, &v_temp); + } if (err != UTF8_CONVERT_OK) { free(n_temp); + free(charset); free(s); return 0; } @@ -521,6 +545,7 @@ char *form_url_encode(struct form *form, curl_free(name); free(v_temp); free(n_temp); + free(charset); free(s); return 0; } @@ -532,6 +557,9 @@ char *form_url_encode(struct form *form, free(v_temp); free(n_temp); } + + free(charset); + if (len) s[len - 1] = 0; return s; @@ -540,6 +568,8 @@ char *form_url_encode(struct form *form, /** * Free a linked list of form_successful_control. + * + * \param control Pointer to head of list to free */ void form_free_successful(struct form_successful_control *control) @@ -552,3 +582,65 @@ void form_free_successful(struct form_successful_control *control) free(control); } } + +/** + * Find an acceptable character set encoding with which to submit the form + * + * \param form The form + * \return Pointer to charset name (on heap, caller should free) or NULL + */ +char *form_acceptable_charset(struct form *form) +{ + char *temp, *c; + + if (!form) + return NULL; + + if (!form->accept_charsets) { + /* no accept-charsets attribute for this form */ + if (form->document_charset) + /* document charset present, so use it */ + return strdup(form->document_charset); + else + /* no document charset, so default to 8859-1 */ + return strdup("ISO-8859-1"); + } + + /* make temporary copy of accept-charsets attribute */ + temp = strdup(form->accept_charsets); + if (!temp) + return NULL; + + /* make it upper case */ + for (c = temp; *c; c++) + *c = toupper(c); + + /* is UTF-8 specified? */ + c = strstr(temp, "UTF-8"); + if (c) { + free(temp); + return strdup("UTF-8"); + } + + /* dispense with temporary copy */ + free(temp); + + /* according to RFC2070, the accept-charsets attribute of the + * form element contains a space and/or comma separated list */ + c = form->accept_charsets; + + /* What would be an improvement would be to choose an encoding + * acceptable to the server which covers as much of the input + * values as possible. Additionally, we need to handle the case + * where none of the acceptable encodings cover all the textual + * input values. + * For now, we just extract the first element of the charset list + */ + while (*c && !isspace(*c)) { + if (*c == ',') + break; + c++; + } + + return strndup(form->accept_charsets, c - form->accept_charsets); +} diff --git a/render/form.h b/render/form.h index c1e01ae0a..48630dea2 100644 --- a/render/form.h +++ b/render/form.h @@ -31,7 +31,8 @@ typedef enum { struct form { char *action; /**< URL to submit to. */ form_method method; /**< Method and enctype. */ - char *charset; /**< Charset to submit form in */ + char *accept_charsets; /**< Charset to submit form in */ + char *document_charset; /**< Charset of document containing form */ struct form_control *controls; /**< Linked list of controls. */ struct form_control *last_control; /**< Last control in list. */ struct form *prev; /**< Previous form in doc. */ @@ -102,7 +103,8 @@ struct form_successful_control { struct form_successful_control *next; /**< Next in linked list. */ }; -struct form *form_new(char *action, form_method method, char *charset); +struct form *form_new(char *action, form_method method, char *charset, + char *doc_charset); struct form_control *form_new_control(form_control_type type); void form_add_control(struct form *form, struct form_control *control); void form_free_control(struct form_control *control); -- cgit v1.2.3