summaryrefslogtreecommitdiff
path: root/render/form.c
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2005-06-26 01:55:20 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2005-06-26 01:55:20 +0000
commit3286c99dd514b7aa20b23477e17f9b63a4d65bf9 (patch)
treedabc3b9ea208c84b07a08cb2620ee0aa61c0666f /render/form.c
parent26872ea54f703ab316c31655d2eeeca83b5ca36e (diff)
downloadnetsurf-3286c99dd514b7aa20b23477e17f9b63a4d65bf9.tar.gz
netsurf-3286c99dd514b7aa20b23477e17f9b63a4d65bf9.tar.bz2
[project @ 2005-06-26 01:55:20 by jmb]
Move acceptable character set determination to form submission time, rather thanat box tree creation time. Use UTF-8 encoding, if specified, else use first specified encoding. Improve use of utf8_to_enc - falling back to document encoding then 8859-1 where appropriate. svn path=/import/netsurf/; revision=1765
Diffstat (limited to 'render/form.c')
-rw-r--r--render/form.c118
1 files changed, 105 insertions, 13 deletions
diff --git a/render/form.c b/render/form.c
index 6e213cb7c..e20fb658f 100644
--- a/render/form.c
+++ b/render/form.c
@@ -4,6 +4,7 @@
* http://www.opensource.org/licenses/gpl-license
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
+ * Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
*/
/** \file
@@ -11,6 +12,7 @@
*/
#include <assert.h>
+#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
@@ -23,17 +25,20 @@
static char *form_textarea_value(struct form_control *textarea);
+static char *form_acceptable_charset(struct form *form);
/**
* Create a struct form.
*
* \param action URL to submit form to, used directly (not copied)
* \param method method and enctype
- * \param charset characterset of form (not copied)
+ * \param charset acceptable charactersets for form submission (not copied)
+ * \param doc_charset characterset of containing document (not copied)
* \return a new structure, or 0 on memory exhaustion
*/
-struct form *form_new(char *action, form_method method, char *charset)
+struct form *form_new(char *action, form_method method, char *charset,
+ char *doc_charset)
{
struct form *form;
@@ -42,7 +47,8 @@ struct form *form_new(char *action, form_method method, char *charset)
return 0;
form->action = action;
form->method = method;
- form->charset = charset;
+ form->accept_charsets = charset;
+ form->document_charset = doc_charset;
form->controls = 0;
form->last_control = 0;
form->prev = 0;
@@ -83,6 +89,9 @@ struct form_control *form_new_control(form_control_type type)
/**
* Add a control to the list of controls in a form.
+ *
+ * \param form The form to add the control to
+ * \param control The control to add
*/
void form_add_control(struct form *form, struct form_control *control)
@@ -485,6 +494,7 @@ char *form_url_encode(struct form *form,
{
char *name, *value, *n_temp, *v_temp;
char *s = malloc(1), *s2;
+ char *charset;
unsigned int len = 0, len1;
utf8_convert_ret err;
@@ -492,23 +502,37 @@ char *form_url_encode(struct form *form,
return 0;
s[0] = 0;
+ charset = form_acceptable_charset(form);
+ if (!charset)
+ return 0;
+
for (; control; control = control->next) {
- /** \todo fallback to document encoding or 8859-1 as
- * last resort.
- * What would also be an improvement would be to choose
- * an encoding acceptable by the server which covers as much
- * of the input values as possible. Additionally, we need to
- * handle the case where none of the acceptable encodings
- * cover all the textual input values.
- */
- err = utf8_to_enc(control->name, form->charset, 0, &n_temp);
+ err = utf8_to_enc(control->name, charset, 0, &n_temp);
+ if (err == UTF8_CONVERT_BADENC) {
+ /* charset not understood, try document charset */
+ err = utf8_to_enc(control->name,
+ form->document_charset, 0, &n_temp);
+ if (err == UTF8_CONVERT_BADENC)
+ /* that also failed, use 8859-1 */
+ err = utf8_to_enc(control->name,
+ "ISO-8859-1", 0, &n_temp);
+ }
if (err != UTF8_CONVERT_OK) {
+ free(charset);
free(s);
return 0;
}
- err = utf8_to_enc(control->value, form->charset, 0, &v_temp);
+ err = utf8_to_enc(control->value, charset, 0, &v_temp);
+ if (err == UTF8_CONVERT_BADENC) {
+ err = utf8_to_enc(control->value,
+ form->document_charset, 0, &v_temp);
+ if (err == UTF8_CONVERT_BADENC)
+ err = utf8_to_enc(control->value,
+ "ISO-8859-1", 0, &v_temp);
+ }
if (err != UTF8_CONVERT_OK) {
free(n_temp);
+ free(charset);
free(s);
return 0;
}
@@ -521,6 +545,7 @@ char *form_url_encode(struct form *form,
curl_free(name);
free(v_temp);
free(n_temp);
+ free(charset);
free(s);
return 0;
}
@@ -532,6 +557,9 @@ char *form_url_encode(struct form *form,
free(v_temp);
free(n_temp);
}
+
+ free(charset);
+
if (len)
s[len - 1] = 0;
return s;
@@ -540,6 +568,8 @@ char *form_url_encode(struct form *form,
/**
* Free a linked list of form_successful_control.
+ *
+ * \param control Pointer to head of list to free
*/
void form_free_successful(struct form_successful_control *control)
@@ -552,3 +582,65 @@ void form_free_successful(struct form_successful_control *control)
free(control);
}
}
+
+/**
+ * Find an acceptable character set encoding with which to submit the form
+ *
+ * \param form The form
+ * \return Pointer to charset name (on heap, caller should free) or NULL
+ */
+char *form_acceptable_charset(struct form *form)
+{
+ char *temp, *c;
+
+ if (!form)
+ return NULL;
+
+ if (!form->accept_charsets) {
+ /* no accept-charsets attribute for this form */
+ if (form->document_charset)
+ /* document charset present, so use it */
+ return strdup(form->document_charset);
+ else
+ /* no document charset, so default to 8859-1 */
+ return strdup("ISO-8859-1");
+ }
+
+ /* make temporary copy of accept-charsets attribute */
+ temp = strdup(form->accept_charsets);
+ if (!temp)
+ return NULL;
+
+ /* make it upper case */
+ for (c = temp; *c; c++)
+ *c = toupper(c);
+
+ /* is UTF-8 specified? */
+ c = strstr(temp, "UTF-8");
+ if (c) {
+ free(temp);
+ return strdup("UTF-8");
+ }
+
+ /* dispense with temporary copy */
+ free(temp);
+
+ /* according to RFC2070, the accept-charsets attribute of the
+ * form element contains a space and/or comma separated list */
+ c = form->accept_charsets;
+
+ /* What would be an improvement would be to choose an encoding
+ * acceptable to the server which covers as much of the input
+ * values as possible. Additionally, we need to handle the case
+ * where none of the acceptable encodings cover all the textual
+ * input values.
+ * For now, we just extract the first element of the charset list
+ */
+ while (*c && !isspace(*c)) {
+ if (*c == ',')
+ break;
+ c++;
+ }
+
+ return strndup(form->accept_charsets, c - form->accept_charsets);
+}