summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--COPYING19
-rw-r--r--Makefile34
-rw-r--r--Makefile-riscos38
-rw-r--r--README46
-rw-r--r--build/Makefile.common39
-rw-r--r--docs/Architecture83
-rw-r--r--docs/Todo12
-rw-r--r--include/hubbub/errors.h29
-rw-r--r--include/hubbub/functypes.h37
-rw-r--r--include/hubbub/hubbub.h23
-rw-r--r--include/hubbub/parser.h84
-rw-r--r--include/hubbub/types.h97
-rw-r--r--json/README26
-rw-r--r--json/hex-chars.jmb1.p12
-rw-r--r--json/void-prototypes.jmb1.p45
-rw-r--r--src/Makefile79
-rw-r--r--src/charset/Makefile53
-rw-r--r--src/charset/aliases.c361
-rw-r--r--src/charset/aliases.h42
-rw-r--r--src/charset/codec.c186
-rw-r--r--src/charset/codec.h153
-rw-r--r--src/charset/codec_iconv.c837
-rw-r--r--src/charset/codec_impl.h51
-rw-r--r--src/charset/codec_utf8.c620
-rw-r--r--src/charset/detect.c673
-rw-r--r--src/charset/detect.h22
-rw-r--r--src/hubbub.c63
-rw-r--r--src/input/Makefile53
-rw-r--r--src/input/filter.c380
-rw-r--r--src/input/filter.h57
-rw-r--r--src/input/inputstream.c479
-rw-r--r--src/input/inputstream.h98
-rw-r--r--src/input/streamimpl.h77
-rw-r--r--src/input/utf8_stream.c567
-rw-r--r--src/parser.c237
-rw-r--r--src/tokeniser/Makefile53
-rw-r--r--src/tokeniser/entities.c363
-rw-r--r--src/tokeniser/entities.h25
-rw-r--r--src/tokeniser/tokeniser.c2282
-rw-r--r--src/tokeniser/tokeniser.h71
-rw-r--r--src/utils/Makefile53
-rw-r--r--src/utils/dict.c219
-rw-r--r--src/utils/dict.h31
-rw-r--r--src/utils/errors.c70
-rw-r--r--src/utils/utf8.c368
-rw-r--r--src/utils/utf8.h38
-rw-r--r--src/utils/utils.h28
-rw-r--r--test/INDEX15
-rw-r--r--test/Makefile63
-rw-r--r--test/README84
-rw-r--r--test/aliases.c61
-rw-r--r--test/cscodec.c247
-rw-r--r--test/csdetect.c132
-rw-r--r--test/data/Aliases302
-rw-r--r--test/data/cscodec/INDEX5
-rw-r--r--test/data/cscodec/simple.datbin0 -> 1193 bytes
-rw-r--r--test/data/csdetect/INDEX9
-rw-r--r--test/data/csdetect/bom.datbin0 -> 639 bytes
-rw-r--r--test/data/csdetect/non-ascii-meta.dat129
-rw-r--r--test/data/csdetect/test-yahoo-jp.dat10
-rw-r--r--test/data/csdetect/tests1.dat392
-rw-r--r--test/data/csdetect/tests2.dat82
-rw-r--r--test/data/html/INDEX6
-rw-r--r--test/data/html/section-tree-construction.html2783
-rw-r--r--test/data/html/web-apps.html41271
-rw-r--r--test/data/tokeniser2/INDEX7
-rw-r--r--test/data/tokeniser2/contentModelFlags.test36
-rw-r--r--test/data/tokeniser2/test1.test136
-rw-r--r--test/data/tokeniser2/test2.test108
-rw-r--r--test/dict.c53
-rw-r--r--test/entities.c42
-rw-r--r--test/filter.c355
-rw-r--r--test/hubbub.c29
-rw-r--r--test/inputstream.c126
-rw-r--r--test/parser.c175
-rw-r--r--test/regression/cscodec-segv.c37
-rw-r--r--test/regression/filter-segv.c38
-rw-r--r--test/testrunner.pl147
-rw-r--r--test/testutils.h123
-rw-r--r--test/tokeniser.c174
-rw-r--r--test/tokeniser2.c418
81 files changed, 56908 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..7646f4c
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,19 @@
+Copyright (C) 2007 J-M Bell
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..db5a35b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,34 @@
+# Toolchain definitions for building on the destination platform
+export CC = gcc
+export AR = ar
+export LD = gcc
+
+export CP = cp
+export RM = rm
+export MKDIR = mkdir
+export MV = mv
+export ECHO = echo
+export MAKE = make
+export PERL = perl
+export PKGCONFIG = pkg-config
+
+# Toolchain flags
+WARNFLAGS = -Wall -Wextra -Wundef -Wpointer-arith -Wcast-align \
+ -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wnested-externs -Werror -pedantic
+export CFLAGS = -std=c99 -D_BSD_SOURCE -I$(TOP)/include/ $(WARNFLAGS)
+export ARFLAGS = -cru
+export LDFLAGS = -L$(TOP)/
+
+export CPFLAGS =
+export RMFLAGS =
+export MKDIRFLAGS = -p
+export MVFLAGS =
+export ECHOFLAGS =
+export MAKEFLAGS =
+export PKGCONFIGFLAGS =
+
+export EXEEXT =
+
+
+include build/Makefile.common
diff --git a/Makefile-riscos b/Makefile-riscos
new file mode 100644
index 0000000..f1d8cf0
--- /dev/null
+++ b/Makefile-riscos
@@ -0,0 +1,38 @@
+# Toolchain definitions for building for RISC OS using the GCCSDK cross-compiler
+GCCSDK_INSTALL_CROSSBIN ?= /home/riscos/cross/bin
+GCCSDK_INSTALL_ENV ?= /home/riscos/env
+
+export CC = $(GCCSDK_INSTALL_CROSSBIN)/gcc
+export AR = $(GCCSDK_INSTALL_CROSSBIN)/ar
+export LD = $(GCCSDK_INSTALL_CROSSBIN)/gcc
+
+export CP = cp
+export RM = rm
+export MKDIR = mkdir
+export MV = mv
+export ECHO = echo
+export MAKE = make
+export PERL = perl
+export PKGCONFIG = pkg-config
+
+# Toolchain flags
+WARNFLAGS = -Wall -Wextra -Wundef -Wpointer-arith -Wcast-align \
+ -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wnested-externs -Werror -pedantic
+export CFLAGS = -std=c99 -D_BSD_SOURCE -I$(TOP)/include/ $(WARNFLAGS) \
+ -mpoke-function-name
+export ARFLAGS = -cru
+export LDFLAGS = -L$(TOP)/
+
+export CPFLAGS =
+export RMFLAGS =
+export MKDIRFLAGS = -p
+export MVFLAGS =
+export ECHOFLAGS =
+export MAKEFLAGS =
+export PKGCONFIGFLAGS =
+
+export EXEEXT = ,ff8
+
+
+include build/Makefile.common
diff --git a/README b/README
new file mode 100644
index 0000000..e8b92cb
--- /dev/null
+++ b/README
@@ -0,0 +1,46 @@
+Hubbub -- an HTML parser
+========================
+
+Overview
+--------
+
+ Hubbub is a flexible HTML parser. It aims to comply with the HTML5
+ specification.
+
+Requirements
+------------
+
+ Hubbub requires the following tools:
+
+ + A C99 capable C compiler
+ + GNU make or compatible
+ + Perl (for the testcases)
+ + Pkg-config (for the testcases)
+
+ Hubbub also requires the following libraries to be installed:
+
+ + An iconv implementation (e.g. libiconv)
+ + JSON-C (for the testcases) -- see json/README for further information
+
+Compilation
+-----------
+
+ If necessary, modify the toolchain settings in the Makefile.
+ Invoke make:
+ $ make
+
+Verification
+------------
+
+ To verify that the parser is working, it is necessary to specify a
+ different makefile target than that used for normal compilation, thus:
+
+ $ make test
+
+API documentation
+-----------------
+
+ Currently, there is none. However, the code is well commented and the
+ public API may be found in the "include" directory. The testcase sources
+ may also be of use in working out how to use it.
+
diff --git a/build/Makefile.common b/build/Makefile.common
new file mode 100644
index 0000000..21c319a
--- /dev/null
+++ b/build/Makefile.common
@@ -0,0 +1,39 @@
+# Top-level Makefile fragment for Hubbub
+
+# Name of component
+export COMPONENT = libhubbub
+
+# Environment
+export EXPORT = $(CURDIR)/dist
+export TOP = $(CURDIR)
+
+.PHONY: release debug test clean setup export distclean
+
+# Rules
+release: setup
+ @$(MAKE) $(MAKEFLAGS) -C src release
+
+debug: setup
+ @$(MAKE) $(MAKEFLAGS) -C src debug
+
+test: debug
+ @$(MAKE) $(MAKEFLAGS) -C test test
+
+clean:
+ @$(MAKE) $(MAKEFLAGS) -C src clean
+ @$(MAKE) $(MAKEFLAGS) -C test clean
+
+setup:
+ @$(MAKE) $(MAKEFLAGS) -C src setup
+ @$(MAKE) $(MAKEFLAGS) -C test setup
+
+export: release
+ @$(MKDIR) $(MKDIRFLAGS) $(TOP)/dist/lib
+ @$(CP) $(CPFLAGS) -r include $(EXPORT)/
+ @$(MAKE) $(MAKEFLAGS) -C src export
+ @$(MAKE) $(MAKEFLAGS) -C test export
+
+distclean: clean
+ -@$(RM) $(RMFLAGS) -r $(TOP)/dist
+ @$(MAKE) $(MAKEFLAGS) -C src distclean
+ @$(MAKE) $(MAKEFLAGS) -C test distclean
diff --git a/docs/Architecture b/docs/Architecture
new file mode 100644
index 0000000..73966eb
--- /dev/null
+++ b/docs/Architecture
@@ -0,0 +1,83 @@
+Hubbub parser architecture
+==========================
+
+Introduction
+------------
+
+ Hubbub is a flexible HTML parser. It offers two interfaces:
+
+ * a SAX-style event interface
+ * a DOM-style tree-based interface
+
+Overview
+--------
+
+ Hubbub is comprised of four parts:
+
+ * a charset handler
+ * an input stream veneer
+ * a tokeniser
+ * a tree builder
+
+ Charset handler
+ ---------------
+
+ The charset handler converts the raw data input into a requested encoding.
+
+ Input stream veneer
+ -------------------
+
+ The input stream veneer provides an abstract stream-like interface over
+ the document buffer. This is used by the tokeniser. The document buffer
+ will be encoded in either UTf-8 or UTF-16 (this is client-selectable).
+
+ Tokeniser
+ ---------
+
+ The tokeniser divides the data held in the document buffer into chunks.
+ It sends SAX-style events for each chunk. The tokeniser is agnostic to
+ the charset the document buffer is stored in.
+
+ Tree builder
+ ------------
+
+ The tree builder constructs a DOM tree from the SAX events emitted by the
+ tokeniser. The tree builder is tied to the document buffer charset.
+
+Memory usage and ownership
+--------------------------
+
+ Memory usage within the library is well defined, as is ownership of allocated
+ memory.
+
+ Raw input data provided by the library client is owned by the client.
+
+ The document buffer is allocated on the fly by the library.
+
+ The document buffer is created and resized by the charset handler. Its
+ location is passed to the tree builder through a dedicated event. While
+ parsing is occurring, the ownership of the document buffer lies with the
+ charset handler. Upon parse completion, the tree builder may request
+ ownership of the buffer. If it does not, the buffer will be freed on parser
+ destruction.
+
+ SAX events which refer to document segments contain direct references into
+ the document buffer (i.e. no copying of data held in the document buffer
+ occurs).
+
+ The tree builder will allocate memory for use as DOM nodes. References to
+ strings in the document buffer will be direct and will operate a
+ copy-on-write strategy. All strings (excepting those which comprise part of
+ the document buffer) and nodes within the DOM are reference counted. Upon a
+ reference count reaching 0, the item is freed.
+
+ The above strategy permits data copying to be kept to a minimum, hence
+ minimising memory usage.
+
+Parse errors
+------------
+
+ Notification of parse errors is made through a dedicated event similar to
+ that used for notification of movement of the document buffer. This event
+ contains the line/column offset of the error location, along with a message
+ detailing the error.
diff --git a/docs/Todo b/docs/Todo
new file mode 100644
index 0000000..2abce2b
--- /dev/null
+++ b/docs/Todo
@@ -0,0 +1,12 @@
+TODO list
+=========
+
+ + Update tokeniser to comply with latest spec draft (currently complies
+ with 2007-06-12 draft)
+ + Implement one or more tree builders
+ + More charset convertors (or make the iconv codec significantly faster)
+ + Parse error reporting from the tokeniser
+ + Implement extraneous chunk insertion/tokenisation
+ + Statistical charset autodetection
+ + Shared library, for those platforms that support such things
+ + Optimise it
diff --git a/include/hubbub/errors.h b/include/hubbub/errors.h
new file mode 100644
index 0000000..c3b1f5d
--- /dev/null
+++ b/include/hubbub/errors.h
@@ -0,0 +1,29 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_errors_h_
+#define hubbub_errors_h_
+
+#include <stddef.h>
+
+typedef enum hubbub_error {
+ HUBBUB_OK = 0,
+
+ HUBBUB_NOMEM = 1,
+ HUBBUB_BADPARM = 2,
+ HUBBUB_INVALID = 3,
+ HUBBUB_FILENOTFOUND = 4,
+ HUBBUB_NEEDDATA = 5,
+} hubbub_error;
+
+/* Convert a hubbub error value to a string */
+const char *hubbub_error_to_string(hubbub_error error);
+/* Convert a string to a hubbub error value */
+hubbub_error hubbub_error_from_string(const char *str, size_t len);
+
+#endif
+
diff --git a/include/hubbub/functypes.h b/include/hubbub/functypes.h
new file mode 100644
index 0000000..aa3e649
--- /dev/null
+++ b/include/hubbub/functypes.h
@@ -0,0 +1,37 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_functypes_h_
+#define hubbub_functypes_h_
+
+#include <stdlib.h>
+
+#include <hubbub/types.h>
+
+/* Type of allocation function for hubbub */
+typedef void *(*hubbub_alloc)(void *ptr, size_t size, void *pw);
+
+/**
+ * Type of token handling function
+ */
+typedef void (*hubbub_token_handler)(const hubbub_token *token, void *pw);
+
+/**
+ * Type of document buffer handling function
+ */
+typedef void (*hubbub_buffer_handler)(const uint8_t *data,
+ size_t len, void *pw);
+
+/**
+ * Type of parse error handling function
+ */
+typedef void (*hubbub_error_handler)(uint32_t line, uint32_t col,
+ const char *message, void *pw);
+
+
+#endif
+
diff --git a/include/hubbub/hubbub.h b/include/hubbub/hubbub.h
new file mode 100644
index 0000000..8a15eca
--- /dev/null
+++ b/include/hubbub/hubbub.h
@@ -0,0 +1,23 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_h_
+#define hubbub_h_
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+#include <hubbub/types.h>
+
+/* Initialise the Hubbub library for use */
+hubbub_error hubbub_initialise(const char *aliases_file,
+ hubbub_alloc alloc, void *pw);
+
+/* Clean up after Hubbub */
+hubbub_error hubbub_finalise(hubbub_alloc alloc, void *pw);
+
+#endif
+
diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h
new file mode 100644
index 0000000..cdf8664
--- /dev/null
+++ b/include/hubbub/parser.h
@@ -0,0 +1,84 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_parser_h_
+#define hubbub_parser_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+#include <hubbub/types.h>
+
+typedef struct hubbub_parser hubbub_parser;
+
+/**
+ * Hubbub parser option types
+ */
+typedef enum hubbub_parser_opttype {
+ HUBBUB_PARSER_TOKEN_HANDLER,
+ HUBBUB_PARSER_BUFFER_HANDLER,
+ HUBBUB_PARSER_ERROR_HANDLER,
+ HUBBUB_PARSER_CONTENT_MODEL,
+} hubbub_parser_opttype;
+
+/**
+ * Hubbub parser option parameters
+ */
+typedef union hubbub_parser_optparams {
+ struct {
+ hubbub_token_handler handler;
+ void *pw;
+ } token_handler;
+
+ struct {
+ hubbub_buffer_handler handler;
+ void *pw;
+ } buffer_handler;
+
+ struct {
+ hubbub_error_handler handler;
+ void *pw;
+ } error_handler;
+
+ struct {
+ hubbub_content_model model;
+ } content_model;
+} hubbub_parser_optparams;
+
+/* Create a hubbub parser */
+hubbub_parser *hubbub_parser_create(const char *enc, const char *int_enc,
+ hubbub_alloc alloc, void *pw);
+/* Destroy a hubbub parser */
+void hubbub_parser_destroy(hubbub_parser *parser);
+
+/* Configure a hubbub parser */
+hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
+ hubbub_parser_opttype type,
+ hubbub_parser_optparams *params);
+
+/* Pass a chunk of data to a hubbub parser for parsing */
+/* This data is encoded in the input charset */
+hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser,
+ uint8_t *data, size_t len);
+/* Pass a chunk of extraneous data to a hubbub parser for parsing */
+/* This data is UTF-8 encoded */
+hubbub_error hubbub_parser_parse_extraneous_chunk(hubbub_parser *parser,
+ uint8_t *data, size_t len);
+/* Inform the parser that the last chunk of data has been parsed */
+hubbub_error hubbub_parser_completed(hubbub_parser *parser);
+
+/* Read the document charset */
+const char *hubbub_parser_read_charset(hubbub_parser *parser,
+ hubbub_charset_source *source);
+
+/* Claim ownership of the document buffer */
+hubbub_error hubbub_parser_claim_buffer(hubbub_parser *parser,
+ uint8_t **buffer, size_t *len);
+
+#endif
+
diff --git a/include/hubbub/types.h b/include/hubbub/types.h
new file mode 100644
index 0000000..57518ae
--- /dev/null
+++ b/include/hubbub/types.h
@@ -0,0 +1,97 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_types_h_
+#define hubbub_types_h_
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+/** Source of charset information, in order of importance
+ * A client-dictated charset will override all others.
+ * A document-specified charset will override autodetection or the default */
+typedef enum hubbub_charset_source {
+ HUBBUB_CHARSET_UNKNOWN = 0, /**< Unknown */
+ HUBBUB_CHARSET_DEFAULT = 1, /**< Default setting */
+ HUBBUB_CHARSET_DETECTED = 2, /**< Autodetected */
+ HUBBUB_CHARSET_DOCUMENT = 3, /**< Defined in document */
+ HUBBUB_CHARSET_DICTATED = 4, /**< Dictated by client */
+} hubbub_charset_source;
+
+/**
+ * Content model flag
+ */
+typedef enum hubbub_content_model {
+ HUBBUB_CONTENT_MODEL_PCDATA,
+ HUBBUB_CONTENT_MODEL_RCDATA,
+ HUBBUB_CONTENT_MODEL_CDATA,
+ HUBBUB_CONTENT_MODEL_PLAINTEXT
+} hubbub_content_model;
+
+/**
+ * Type of an emitted token
+ */
+typedef enum hubbub_token_type {
+ HUBBUB_TOKEN_DOCTYPE,
+ HUBBUB_TOKEN_START_TAG,
+ HUBBUB_TOKEN_END_TAG,
+ HUBBUB_TOKEN_COMMENT,
+ HUBBUB_TOKEN_CHARACTER,
+ HUBBUB_TOKEN_EOF
+} hubbub_token_type;
+
+/**
+ * Tokeniser string type
+ */
+typedef struct hubbub_string {
+ uint32_t data_off; /**< Byte offset of string start */
+ size_t len; /**< Byte length of string */
+} hubbub_string;
+
+/**
+ * Tag attribute data
+ */
+typedef struct hubbub_attribute {
+ hubbub_string name; /**< Attribute name */
+ hubbub_string value; /**< Attribute value */
+} hubbub_attribute;
+
+/**
+ * Data for doctype token
+ */
+typedef struct hubbub_doctype {
+ hubbub_string name; /**< Doctype name */
+ bool correct; /**< Doctype validity flag */
+} hubbub_doctype;
+
+/**
+ * Data for a tag
+ */
+typedef struct hubbub_tag {
+ hubbub_string name; /**< Tag name */
+ uint32_t n_attributes; /**< Count of attributes */
+ hubbub_attribute *attributes; /**< Array of attribute data */
+} hubbub_tag;
+
+/**
+ * Token data
+ */
+typedef struct hubbub_token {
+ hubbub_token_type type;
+
+ union {
+ hubbub_doctype doctype;
+
+ hubbub_tag tag;
+
+ hubbub_string comment;
+
+ hubbub_string character;
+ } data;
+} hubbub_token;
+
+#endif
diff --git a/json/README b/json/README
new file mode 100644
index 0000000..50dcf79
--- /dev/null
+++ b/json/README
@@ -0,0 +1,26 @@
+JSON-C patches
+==============
+
+This directory contains a couple of patches to JSON-C 0.7.
+Upstream sources may be found at http://oss.metaparadigm.com/json-c/
+
+hex-chars.jmb1.p:
+
+ Fix handling of upper case hex digits.
+ The previous behaviour resulted in the likes of \uFFFD causing a parse
+ error.
+
+void-prototypes.jmb1.p:
+
+ Fix compiler warnings about function prototypes in header files when
+ compiling client code in standards mode with pedantic warnings switched
+ on.
+
+Apply them as follows:
+
+ $ cd json-c-0.7
+ $ patch -p 1 -i ../hex-chars.jmb1.p
+ $ patch -p 1 -i ../void-prototypes.jmb1.p
+
+They have been submitted upstream, so will probably disappear in due
+course.
diff --git a/json/hex-chars.jmb1.p b/json/hex-chars.jmb1.p
new file mode 100644
index 0000000..10ea30a
--- /dev/null
+++ b/json/hex-chars.jmb1.p
@@ -0,0 +1,12 @@
+diff -urw json-c-0.7/json_object.c json-c-0.7-jmb/json_object.c
+--- json-c-0.7/json_object.c 2007-03-13 08:25:39.000000000 +0000
++++ json-c-0.7-jmb/json_object.c 2007-06-23 13:33:20.000000000 +0100
+@@ -30,7 +30,7 @@
+ /* #define REFCOUNT_DEBUG 1 */
+
+ char *json_number_chars = "0123456789.+-e";
+-char *json_hex_chars = "0123456789abcdef";
++char *json_hex_chars = "0123456789abcdefABCDEF";
+
+ #ifdef REFCOUNT_DEBUG
+ static char* json_type_name[] = {
diff --git a/json/void-prototypes.jmb1.p b/json/void-prototypes.jmb1.p
new file mode 100644
index 0000000..db71ffe
--- /dev/null
+++ b/json/void-prototypes.jmb1.p
@@ -0,0 +1,45 @@
+diff -urw json-c-0.7/debug.h json-c-0.7-jmb/debug.h
+--- json-c-0.7/debug.h 2007-03-13 08:25:39.000000000 +0000
++++ json-c-0.7-jmb/debug.h 2007-06-22 23:52:37.000000000 +0100
+@@ -13,7 +13,7 @@
+ #define _DEBUG_H_
+
+ extern void mc_set_debug(int debug);
+-extern int mc_get_debug();
++extern int mc_get_debug(void);
+
+ extern void mc_set_syslog(int syslog);
+ extern void mc_abort(const char *msg, ...);
+diff -urw json-c-0.7/json_object.h json-c-0.7-jmb/json_object.h
+--- json-c-0.7/json_object.h 2007-03-13 08:25:39.000000000 +0000
++++ json-c-0.7-jmb/json_object.h 2007-06-22 23:53:10.000000000 +0100
+@@ -98,7 +98,7 @@
+ /** Create a new empty object
+ * @returns a json_object of type json_type_object
+ */
+-extern struct json_object* json_object_new_object();
++extern struct json_object* json_object_new_object(void);
+
+ /** Get the hashtable of a json_object of type json_type_object
+ * @param obj the json_object instance
+@@ -167,7 +167,7 @@
+ /** Create a new empty json_object of type json_type_array
+ * @returns a json_object of type json_type_array
+ */
+-extern struct json_object* json_object_new_array();
++extern struct json_object* json_object_new_array(void);
+
+ /** Get the arraylist of a json_object of type json_type_array
+ * @param obj the json_object instance
+diff -urw json-c-0.7/json_tokener.h json-c-0.7-jmb/json_tokener.h
+--- json-c-0.7/json_tokener.h 2007-03-13 08:25:39.000000000 +0000
++++ json-c-0.7-jmb/json_tokener.h 2007-06-22 23:53:26.000000000 +0100
+@@ -79,7 +79,7 @@
+
+ extern const char* json_tokener_errors[];
+
+-extern struct json_tokener* json_tokener_new();
++extern struct json_tokener* json_tokener_new(void);
+ extern void json_tokener_free(struct json_tokener *tok);
+ extern void json_tokener_reset(struct json_tokener *tok);
+ extern struct json_object* json_tokener_parse(char *str);
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..b72a9e0
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,79 @@
+# Makefile for libhubbub
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Manipulate include paths
+CFLAGS += -I$(CURDIR)
+
+# Release output
+RELEASE = ${TOP}/${COMPONENT}.a
+
+# Debug output
+DEBUG = ${TOP}/${COMPONENT}-debug.a
+
+# Objects
+OBJS = hubbub parser
+
+.PHONY: clean debug distclean export release setup test
+
+# Targets
+release: $(addprefix Release/, $(addsuffix .o, $(OBJS)))
+ @${MAKE} -C charset release
+ @${MAKE} -C input release
+ @${MAKE} -C tokeniser release
+ @${MAKE} -C utils release
+ @${AR} ${ARFLAGS} $(RELEASE) Release/*
+
+debug: $(addprefix Debug/, $(addsuffix .o, $(OBJS)))
+ @${MAKE} -C charset debug
+ @${MAKE} -C input debug
+ @${MAKE} -C tokeniser debug
+ @${MAKE} -C utils debug
+ @${AR} ${ARFLAGS} $(DEBUG) Debug/*
+
+clean:
+ @${MAKE} -C charset clean
+ @${MAKE} -C input clean
+ @${MAKE} -C tokeniser clean
+ @${MAKE} -C utils clean
+ -@${RM} ${RMFLAGS} $(addprefix Release/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(addprefix Debug/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(RELEASE) $(DEBUG)
+
+distclean:
+ -@${RM} ${RMFLAGS} -r Release
+ -@${RM} ${RMFLAGS} -r Debug
+
+setup:
+ @${MKDIR} ${MKDIRFLAGS} Release
+ @${MKDIR} ${MKDIRFLAGS} Debug
+
+export:
+ @${CP} ${CPFLAGS} $(RELEASE) ${EXPORT}/lib/
+
+test:
+
+# Pattern rules
+Release/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $<
+
+Debug/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@ $<
diff --git a/src/charset/Makefile b/src/charset/Makefile
new file mode 100644
index 0000000..62817b3
--- /dev/null
+++ b/src/charset/Makefile
@@ -0,0 +1,53 @@
+# Makefile for libhubbub
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Manipulate include paths
+CFLAGS += -I$(CURDIR)
+
+# Objects
+OBJS = aliases codec codec_iconv codec_utf8 detect
+
+.PHONY: clean debug distclean export release setup test
+
+# Targets
+release: $(addprefix ../Release/, $(addsuffix .o, $(OBJS)))
+
+debug: $(addprefix ../Debug/, $(addsuffix .o, $(OBJS)))
+
+clean:
+ -@${RM} ${RMFLAGS} $(addprefix ../Release/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(addprefix ../Debug/, $(addsuffix .o, ${OBJS}))
+
+distclean:
+
+setup:
+
+export:
+
+test:
+
+# Pattern rules
+../Release/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $<
+
+../Debug/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@ $<
diff --git a/src/charset/aliases.c b/src/charset/aliases.c
new file mode 100644
index 0000000..dcf6de2
--- /dev/null
+++ b/src/charset/aliases.c
@@ -0,0 +1,361 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "charset/aliases.h"
+
+struct alias {
+ struct alias *next;
+ hubbub_aliases_canon *canon;
+ uint16_t name_len;
+ char name[1];
+};
+
+#define HASH_SIZE (43)
+static hubbub_aliases_canon *canon_tab[HASH_SIZE];
+static struct alias *alias_tab[HASH_SIZE];
+
+static hubbub_error hubbub_create_alias(const char *alias,
+ hubbub_aliases_canon *c, hubbub_alloc alloc, void *pw);
+static hubbub_aliases_canon *hubbub_create_canon(const char *canon,
+ uint16_t mibenum, hubbub_alloc alloc, void *pw);
+static uint32_t hubbub_hash_val(const char *alias, size_t len);
+
+/**
+ * Create alias data from Aliases file
+ *
+ * \param filename The path to the Aliases file
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return HUBBUB_OK on success, appropriate error otherwise.
+ */
+hubbub_error hubbub_aliases_create(const char *filename,
+ hubbub_alloc alloc, void *pw)
+{
+ char buf[300];
+ FILE *fp;
+
+ if (filename == NULL || alloc == NULL)
+ return HUBBUB_BADPARM;
+
+ fp = fopen(filename, "r");
+ if (fp == NULL)
+ return HUBBUB_FILENOTFOUND;
+
+ while (fgets(buf, sizeof buf, fp)) {
+ char *p, *aliases = 0, *mib, *end;
+ hubbub_aliases_canon *cf;
+
+ if (buf[0] == 0 || buf[0] == '#')
+ /* skip blank lines or comments */
+ continue;
+
+ buf[strlen(buf) - 1] = 0; /* lose terminating newline */
+ end = buf + strlen(buf);
+
+ /* find end of canonical form */
+ for (p = buf; *p && !isspace(*p) && !iscntrl(*p); p++)
+ ; /* do nothing */
+ if (p >= end)
+ continue;
+ *p++ = '\0'; /* terminate canonical form */
+
+ /* skip whitespace */
+ for (; *p && isspace(*p); p++)
+ ; /* do nothing */
+ if (p >= end)
+ continue;
+ mib = p;
+
+ /* find end of mibenum */
+ for (; *p && !isspace(*p) && !iscntrl(*p); p++)
+ ; /* do nothing */
+ if (p < end)
+ *p++ = '\0'; /* terminate mibenum */
+
+ cf = hubbub_create_canon(buf, atoi(mib), alloc, pw);
+ if (cf == NULL)
+ continue;
+
+ /* skip whitespace */
+ for (; p < end && *p && isspace(*p); p++)
+ ; /* do nothing */
+ if (p >= end)
+ continue;
+ aliases = p;
+
+ while (p < end) {
+ /* find end of alias */
+ for (; *p && !isspace(*p) && !iscntrl(*p); p++)
+ ; /* do nothing */
+ if (p > end)
+ /* stop if we've gone past the end */
+ break;
+ /* terminate current alias */
+ *p++ = '\0';
+
+ if (hubbub_create_alias(aliases, cf,
+ alloc, pw) != HUBBUB_OK)
+ break;
+
+ /* in terminating, we may have advanced
+ * past the end - check this here */
+ if (p >= end)
+ break;
+
+ /* skip whitespace */
+ for (; *p && isspace(*p); p++)
+ ; /* do nothing */
+
+ if (p >= end)
+ /* gone past end => stop */
+ break;
+
+ /* update pointer to current alias */
+ aliases = p;
+ }
+ }
+
+ fclose(fp);
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Free all alias data
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ */
+void hubbub_aliases_destroy(hubbub_alloc alloc, void *pw)
+{
+ hubbub_aliases_canon *c, *d;
+ struct alias *a, *b;
+ int i;
+
+ for (i = 0; i != HASH_SIZE; i++) {
+ for (c = canon_tab[i]; c; c = d) {
+ d = c->next;
+ alloc(c, 0, pw);
+ }
+ canon_tab[i] = NULL;
+
+ for (a = alias_tab[i]; a; a = b) {
+ b = a->next;
+ alloc(a, 0, pw);
+ }
+ alias_tab[i] = NULL;
+ }
+}
+
+/**
+ * Retrieve the MIB enum value assigned to an encoding name
+ *
+ * \param alias The alias to lookup
+ * \param len The length of the alias string
+ * \return The MIB enum value, or 0 if not found
+ */
+uint16_t hubbub_mibenum_from_name(const char *alias, size_t len)
+{
+ hubbub_aliases_canon *c;
+
+ if (alias == NULL)
+ return 0;
+
+ c = hubbub_alias_canonicalise(alias, len);
+ if (c == NULL)
+ return 0;
+
+ return c->mib_enum;
+}
+
+/**
+ * Retrieve the canonical name of an encoding from the MIB enum
+ *
+ * \param mibenum The MIB enum value
+ * \return Pointer to canonical name, or NULL if not found
+ */
+const char *hubbub_mibenum_to_name(uint16_t mibenum)
+{
+ int i;
+ hubbub_aliases_canon *c;
+
+ for (i = 0; i != HASH_SIZE; i++)
+ for (c = canon_tab[i]; c; c = c->next)
+ if (c->mib_enum == mibenum)
+ return c->name;
+
+ return NULL;
+}
+
+
+/**
+ * Retrieve the canonical form of an alias name
+ *
+ * \param alias The alias name
+ * \param len The length of the alias name
+ * \return Pointer to canonical form or NULL if not found
+ */
+hubbub_aliases_canon *hubbub_alias_canonicalise(const char *alias,
+ size_t len)
+{
+ uint32_t hash;
+ hubbub_aliases_canon *c;
+ struct alias *a;
+
+ if (alias == NULL)
+ return NULL;
+
+ hash = hubbub_hash_val(alias, len);
+
+ for (c = canon_tab[hash]; c; c = c->next)
+ if (c->name_len == len &&
+ strncasecmp(c->name, alias, len) == 0)
+ break;
+ if (c)
+ return c;
+
+ for (a = alias_tab[hash]; a; a = a->next)
+ if (a->name_len == len &&
+ strncasecmp(a->name, alias, len) == 0)
+ break;
+ if (a)
+ return a->canon;
+
+ return NULL;
+}
+
+
+/**
+ * Create an alias
+ *
+ * \param alias The alias name
+ * \param c The canonical form
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_create_alias(const char *alias, hubbub_aliases_canon *c,
+ hubbub_alloc alloc, void *pw)
+{
+ struct alias *a;
+ uint32_t hash;
+
+ if (alias == NULL || c == NULL || alloc == NULL)
+ return HUBBUB_BADPARM;
+
+ a = alloc(NULL, sizeof(struct alias) + strlen(alias) + 1, pw);
+ if (a == NULL)
+ return HUBBUB_NOMEM;
+
+ a->canon = c;
+ a->name_len = strlen(alias);
+ strcpy(a->name, alias);
+ a->name[a->name_len] = '\0';
+
+ hash = hubbub_hash_val(alias, a->name_len);
+
+ a->next = alias_tab[hash];
+ alias_tab[hash] = a;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Create a canonical form
+ *
+ * \param canon The canonical name
+ * \param mibenum The MIB enum value
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to canonical form or NULL on error
+ */
+hubbub_aliases_canon *hubbub_create_canon(const char *canon,
+ uint16_t mibenum, hubbub_alloc alloc, void *pw)
+{
+ hubbub_aliases_canon *c;
+ uint32_t hash, len;
+
+ if (canon == NULL || alloc == NULL)
+ return NULL;
+
+ len = strlen(canon);
+
+ c = alloc(NULL, sizeof(hubbub_aliases_canon) + len + 1, pw);
+ if (c == NULL)
+ return NULL;
+
+ c->mib_enum = mibenum;
+ c->name_len = len;
+ strcpy(c->name, canon);
+ c->name[len] = '\0';
+
+ hash = hubbub_hash_val(canon, len);
+
+ c->next = canon_tab[hash];
+ canon_tab[hash] = c;
+
+ return c;
+}
+
+/**
+ * Hash function
+ *
+ * \param alias String to hash
+ * \return The hashed value
+ */
+uint32_t hubbub_hash_val(const char *alias, size_t len)
+{
+ const char *s = alias;
+ uint32_t h = 5381;
+
+ if (alias == NULL)
+ return 0;
+
+ while (len--)
+ h = (h * 33) ^ (*s++ & ~0x20); /* case insensitive */
+
+ return h % HASH_SIZE;
+}
+
+
+#ifndef NDEBUG
+/**
+ * Dump all alias data to stdout
+ */
+void hubbub_aliases_dump(void)
+{
+ hubbub_aliases_canon *c;
+ struct alias *a;
+ int i;
+ size_t size = 0;
+
+ for (i = 0; i != HASH_SIZE; i++) {
+ for (c = canon_tab[i]; c; c = c->next) {
+ printf("%d %s\n", i, c->name);
+ size += offsetof(hubbub_aliases_canon, name) +
+ c->name_len;
+ }
+
+ for (a = alias_tab[i]; a; a = a->next) {
+ printf("%d %s\n", i, a->name);
+ size += offsetof(struct alias, name) + a->name_len;
+ }
+ }
+
+ size += (sizeof(canon_tab) / sizeof(canon_tab[0]));
+ size += (sizeof(alias_tab) / sizeof(alias_tab[0]));
+
+ printf("%u\n", (unsigned int) size);
+}
+#endif
diff --git a/src/charset/aliases.h b/src/charset/aliases.h
new file mode 100644
index 0000000..e0505d0
--- /dev/null
+++ b/src/charset/aliases.h
@@ -0,0 +1,42 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_charset_aliases_h_
+#define hubbub_charset_aliases_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+
+typedef struct hubbub_aliases_canon {
+ struct hubbub_aliases_canon *next;
+ uint16_t mib_enum;
+ uint16_t name_len;
+ char name[1];
+} hubbub_aliases_canon;
+
+/* Load encoding aliases from file */
+hubbub_error hubbub_aliases_create(const char *filename,
+ hubbub_alloc alloc, void *pw);
+/* Destroy encoding aliases */
+void hubbub_aliases_destroy(hubbub_alloc alloc, void *pw);
+
+/* Convert an encoding alias to a MIB enum value */
+uint16_t hubbub_mibenum_from_name(const char *alias, size_t len);
+/* Convert a MIB enum value into an encoding alias */
+const char *hubbub_mibenum_to_name(uint16_t mibenum);
+
+/* Canonicalise an alias name */
+hubbub_aliases_canon *hubbub_alias_canonicalise(const char *alias,
+ size_t len);
+
+#ifndef NDEBUG
+void hubbub_aliases_dump(void);
+#endif
+
+#endif
diff --git a/src/charset/codec.c b/src/charset/codec.c
new file mode 100644
index 0000000..12a1bdc
--- /dev/null
+++ b/src/charset/codec.c
@@ -0,0 +1,186 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <string.h>
+
+#include "charset/aliases.h"
+
+#include "codec_impl.h"
+
+extern hubbub_charsethandler hubbub_iconv_codec_handler;
+extern hubbub_charsethandler hubbub_utf8_codec_handler;
+
+static hubbub_charsethandler *handler_table[] = {
+ &hubbub_utf8_codec_handler,
+ &hubbub_iconv_codec_handler,
+ NULL,
+};
+
+/**
+ * Create a charset codec
+ *
+ * \param charset Target charset
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to codec instance, or NULL on failure
+ */
+hubbub_charsetcodec *hubbub_charsetcodec_create(const char *charset,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_charsetcodec *codec;
+ hubbub_charsethandler **handler;
+ const hubbub_aliases_canon * canon;
+
+ if (charset == NULL || alloc == NULL)
+ return NULL;
+
+ /* Canonicalise charset name. */
+ canon = hubbub_alias_canonicalise(charset, strlen(charset));
+ if (canon == NULL)
+ return NULL;
+
+ /* Search for handler class */
+ for (handler = handler_table; *handler != NULL; handler++) {
+ if ((*handler)->handles_charset(canon->name))
+ break;
+ }
+
+ /* None found */
+ if ((*handler) == NULL)
+ return NULL;
+
+ /* Instantiate class */
+ codec = (*handler)->create(canon->name, alloc, pw);
+ if (codec == NULL)
+ return NULL;
+
+ /* and initialise it */
+ codec->mibenum = canon->mib_enum;
+
+ codec->filter = NULL;
+ codec->filter_pw = NULL;
+
+ codec->errormode = HUBBUB_CHARSETCODEC_ERROR_LOOSE;
+
+ codec->alloc = alloc;
+ codec->alloc_pw = pw;
+
+ return codec;
+}
+
+/**
+ * Destroy a charset codec
+ *
+ * \param codec The codec to destroy
+ */
+void hubbub_charsetcodec_destroy(hubbub_charsetcodec *codec)
+{
+ if (codec == NULL)
+ return;
+
+ codec->handler.destroy(codec);
+
+ codec->alloc(codec, 0, codec->alloc_pw);
+}
+
+/**
+ * Configure a charset codec
+ *
+ * \param codec The codec to configure
+ * \parem type The codec option type to configure
+ * \param params Option-specific parameters
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_charsetcodec_setopt(hubbub_charsetcodec *codec,
+ hubbub_charsetcodec_opttype type,
+ hubbub_charsetcodec_optparams *params)
+{
+ if (codec == NULL || params == NULL)
+ return HUBBUB_BADPARM;
+
+ switch (type) {
+ case HUBBUB_CHARSETCODEC_FILTER_FUNC:
+ codec->filter = params->filter_func.filter;
+ codec->filter_pw = params->filter_func.pw;
+ break;
+
+ case HUBBUB_CHARSETCODEC_ERROR_MODE:
+ codec->errormode = params->error_mode.mode;
+ break;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Encode a chunk of UCS4 data into a codec's charset
+ *
+ * \param codec The codec to use
+ * \param source Pointer to pointer to source data
+ * \param sourcelen Pointer to length (in bytes) of source data
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to length (in bytes) of output buffer
+ * \return HUBBUB_OK on success, appropriate error otherwise.
+ *
+ * source, sourcelen, dest and destlen will be updated appropriately on exit
+ */
+hubbub_error hubbub_charsetcodec_encode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ if (codec == NULL || source == NULL || *source == NULL ||
+ sourcelen == NULL || dest == NULL || *dest == NULL ||
+ destlen == NULL)
+ return HUBBUB_BADPARM;
+
+ return codec->handler.encode(codec, source, sourcelen, dest, destlen);
+}
+
+/**
+ * Decode a chunk of data in a codec's charset into UCS4
+ *
+ * \param codec The codec to use
+ * \param source Pointer to pointer to source data
+ * \param sourcelen Pointer to length (in bytes) of source data
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to length (in bytes) of output buffer
+ * \return HUBBUB_OK on success, appropriate error otherwise.
+ *
+ * source, sourcelen, dest and destlen will be updated appropriately on exit
+ *
+ * Call this with a source length of 0 to flush any buffers.
+ */
+hubbub_error hubbub_charsetcodec_decode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ if (codec == NULL || source == NULL || *source == NULL ||
+ sourcelen == NULL || dest == NULL || *dest == NULL ||
+ destlen == NULL)
+ return HUBBUB_BADPARM;
+
+ return codec->handler.decode(codec, source, sourcelen, dest, destlen);
+}
+
+/**
+ * Clear a charset codec's encoding state
+ *
+ * \param codec The codec to reset
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_charsetcodec_reset(hubbub_charsetcodec *codec)
+{
+ if (codec == NULL)
+ return HUBBUB_BADPARM;
+
+ /* Reset filter */
+ if (codec->filter)
+ codec->filter(HUBBUB_CHARSETCODEC_NULL, NULL, NULL, NULL);
+
+ return codec->handler.reset(codec);
+}
+
diff --git a/src/charset/codec.h b/src/charset/codec.h
new file mode 100644
index 0000000..4cd94d8
--- /dev/null
+++ b/src/charset/codec.h
@@ -0,0 +1,153 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_charset_codec_h_
+#define hubbub_charset_codec_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+
+typedef struct hubbub_charsetcodec hubbub_charsetcodec;
+
+#define HUBBUB_CHARSETCODEC_NULL (0xffffffffU)
+
+/**
+ * Type of charset codec filter function
+ *
+ * \param c UCS4 character (in host byte order) or
+ * HUBBUB_CHARSETCODEC_NULL to reset
+ * \param output Pointer to location to store output buffer location
+ * \param outputlen Pointer to location to store output buffer length
+ * \param pw Pointer to client-specific private data
+ * \return HUBBUB_OK on success, or appropriate error otherwise.
+ *
+ * The output buffer is owned by the filter code and will not be freed by
+ * any charset codec. It should contain the replacement UCS4 character(s)
+ * for the input. The replacement characters should be in host byte order.
+ * The contents of *output and *outputlen on entry are ignored and these
+ * will be filled in by the filter code.
+ *
+ * Filters may elect to replace the input character with no output. In this
+ * case, *output should be set to NULL and *outputlen should be set to 0 and
+ * HUBBUB_OK should be returned.
+ *
+ * The output length is in terms of the number of UCS4 characters in the
+ * output buffer. i.e.:
+ *
+ * for (size_t i = 0; i < outputlen; i++) {
+ * dest[curchar++] = output[i];
+ * }
+ *
+ * would copy the contents of the filter output buffer to the codec's output
+ * buffer.
+ */
+typedef hubbub_error (*hubbub_charsetcodec_filter)(uint32_t c,
+ uint32_t **output, size_t *outputlen, void *pw);
+
+/**
+ * Charset codec error mode
+ *
+ * A codec's error mode determines its behaviour in the face of:
+ *
+ * + characters which are unrepresentable in the destination charset (if
+ * encoding data) or which cannot be converted to UCS4 (if decoding data).
+ * + invalid byte sequences (both encoding and decoding)
+ *
+ * The options provide a choice between the following approaches:
+ *
+ * + draconian, "stop processing" ("strict")
+ * + "replace the unrepresentable character with something else" ("loose")
+ * + "attempt to transliterate, or replace if unable" ("translit")
+ *
+ * The default error mode is "loose".
+ *
+ *
+ * In the "loose" case, the replacement character will depend upon:
+ *
+ * + Whether the operation was encoding or decoding
+ * + If encoding, what the destination charset is.
+ *
+ * If decoding, the replacement character will be:
+ *
+ * U+FFFD (REPLACEMENT CHARACTER)
+ *
+ * If encoding, the replacement character will be:
+ *
+ * U+003F (QUESTION MARK) if the destination charset is not UTF-(8|16|32)
+ * U+FFFD (REPLACEMENT CHARACTER) otherwise.
+ *
+ *
+ * In the "translit" case, the codec will attempt to transliterate into
+ * the destination charset, if encoding. If decoding, or if transliteration
+ * fails, this option is identical to "loose".
+ */
+typedef enum hubbub_charsetcodec_errormode {
+ /** Abort processing if unrepresentable character encountered */
+ HUBBUB_CHARSETCODEC_ERROR_STRICT = 0,
+ /** Replace unrepresentable characters with single alternate */
+ HUBBUB_CHARSETCODEC_ERROR_LOOSE = 1,
+ /** Transliterate unrepresentable characters, if possible */
+ HUBBUB_CHARSETCODEC_ERROR_TRANSLIT = 2,
+} hubbub_charsetcodec_errormode;
+
+/**
+ * Charset codec option types
+ */
+typedef enum hubbub_charsetcodec_opttype {
+ /** Register codec filter function */
+ HUBBUB_CHARSETCODEC_FILTER_FUNC = 0,
+ /** Set codec error mode */
+ HUBBUB_CHARSETCODEC_ERROR_MODE = 1,
+} hubbub_charsetcodec_opttype;
+
+/**
+ * Charset codec option parameters
+ */
+typedef union hubbub_charsetcodec_optparams {
+ /** Parameters for filter function setting */
+ struct {
+ /** Filter function */
+ hubbub_charsetcodec_filter filter;
+ /** Client-specific private data */
+ void *pw;
+ } filter_func;
+
+ /** Parameters for error mode setting */
+ struct {
+ /** The desired error handling mode */
+ hubbub_charsetcodec_errormode mode;
+ } error_mode;
+} hubbub_charsetcodec_optparams;
+
+
+/* Create a charset codec */
+hubbub_charsetcodec *hubbub_charsetcodec_create(const char *charset,
+ hubbub_alloc alloc, void *pw);
+/* Destroy a charset codec */
+void hubbub_charsetcodec_destroy(hubbub_charsetcodec *codec);
+
+/* Configure a charset codec */
+hubbub_error hubbub_charsetcodec_setopt(hubbub_charsetcodec *codec,
+ hubbub_charsetcodec_opttype type,
+ hubbub_charsetcodec_optparams *params);
+
+/* Encode a chunk of UCS4 data into a codec's charset */
+hubbub_error hubbub_charsetcodec_encode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+
+/* Decode a chunk of data in a codec's charset into UCS4 */
+hubbub_error hubbub_charsetcodec_decode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+
+/* Reset a charset codec */
+hubbub_error hubbub_charsetcodec_reset(hubbub_charsetcodec *codec);
+
+#endif
diff --git a/src/charset/codec_iconv.c b/src/charset/codec_iconv.c
new file mode 100644
index 0000000..097e82a
--- /dev/null
+++ b/src/charset/codec_iconv.c
@@ -0,0 +1,837 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+/* This codec is hideously slow. Only use it as a last resort */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <iconv.h>
+
+/* These two are for htonl / ntohl */
+#include <arpa/inet.h>
+#include <netinet/in.h>
+
+#include "charset/aliases.h"
+#include "utils/utils.h"
+
+#include "codec_impl.h"
+
+/**
+ * A note on endianness:
+ *
+ * UCS4 is big-endian by default. Therefore, this codec reads and writes
+ * big-endian values. This is fine, and causes no problems. However, to
+ * make life easier for client-supplied filter code, character values passed
+ * to a filter and those read back from a filter are in host-endian.
+ * Therefore, we need to convert from big-endian to host-endian when passing
+ * characters to a filter and perform the reverse translation when reading
+ * characters back.
+ */
+
+/**
+ * Iconv-based charset codec
+ */
+typedef struct hubbub_iconv_codec {
+ hubbub_charsetcodec base; /**< Base class */
+
+ iconv_t read_cd; /**< Iconv handle for reading */
+#define INVAL_BUFSIZE (32)
+ uint8_t inval_buf[INVAL_BUFSIZE]; /**< Buffer for fixing up
+ * incomplete input
+ * sequences */
+ size_t inval_len; /**< Number of bytes in inval_buf */
+
+#define READ_BUFSIZE (8)
+ uint32_t read_buf[READ_BUFSIZE]; /**< Buffer for partial
+ * output sequences (decode)
+ */
+ size_t read_len; /**< Number of characters in
+ * read_buf */
+
+ iconv_t write_cd; /**< Iconv handle for writing */
+#define WRITE_BUFSIZE (8)
+ uint32_t write_buf[WRITE_BUFSIZE]; /**< Buffer for partial
+ * output sequences (encode)
+ */
+ size_t write_len; /**< Number of characters in
+ * write_buf */
+} hubbub_iconv_codec;
+
+
+static bool hubbub_iconv_codec_handles_charset(const char *charset);
+static hubbub_charsetcodec *hubbub_iconv_codec_create(const char *charset,
+ hubbub_alloc alloc, void *pw);
+static void hubbub_iconv_codec_destroy (hubbub_charsetcodec *codec);
+static hubbub_error hubbub_iconv_codec_encode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+static hubbub_error hubbub_iconv_codec_decode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+static hubbub_error hubbub_iconv_codec_reset(hubbub_charsetcodec *codec);
+static hubbub_error hubbub_iconv_codec_filter_decoded_char(
+ hubbub_iconv_codec *c, uint32_t ucs4, uint8_t **dest,
+ size_t *destlen);
+static bool hubbub_iconv_codec_is_unicode(hubbub_iconv_codec *c);
+static hubbub_error hubbub_iconv_codec_read_char(hubbub_iconv_codec *c,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+static hubbub_error hubbub_iconv_codec_write_char(hubbub_iconv_codec *c,
+ uint32_t ucs4, uint8_t **dest, size_t *destlen);
+
+/**
+ * Determine whether this codec handles a specific charset
+ *
+ * \param charset Charset to test
+ * \return true if handleable, false otherwise
+ */
+bool hubbub_iconv_codec_handles_charset(const char *charset)
+{
+ iconv_t cd;
+ bool ret;
+
+ cd = iconv_open("UCS-4", charset);
+
+ ret = (cd != (iconv_t) -1);
+
+ if (ret)
+ iconv_close(cd);
+
+ return ret;
+}
+
+/**
+ * Create an iconv-based codec
+ *
+ * \param charset The charset to read from / write to
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to codec, or NULL on failure
+ */
+hubbub_charsetcodec *hubbub_iconv_codec_create(const char *charset,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_iconv_codec *codec;
+
+ codec = alloc(NULL, sizeof(hubbub_iconv_codec), pw);
+ if (codec == NULL)
+ return NULL;
+
+ codec->read_cd = iconv_open("UCS-4", charset);
+ if (codec->read_cd == (iconv_t) -1) {
+ alloc(codec, 0, pw);
+ return NULL;
+ }
+
+ codec->write_cd = iconv_open(charset, "UCS-4");
+ if (codec->write_cd == (iconv_t) -1) {
+ iconv_close(codec->read_cd);
+ alloc(codec, 0, pw);
+ return NULL;
+ }
+
+ codec->inval_buf[0] = '\0';
+ codec->inval_len = 0;
+
+ codec->read_buf[0] = 0;
+ codec->read_len = 0;
+
+ codec->write_buf[0] = 0;
+ codec->write_len = 0;
+
+ /* Finally, populate vtable */
+ codec->base.handler.destroy = hubbub_iconv_codec_destroy;
+ codec->base.handler.encode = hubbub_iconv_codec_encode;
+ codec->base.handler.decode = hubbub_iconv_codec_decode;
+ codec->base.handler.reset = hubbub_iconv_codec_reset;
+
+ return (hubbub_charsetcodec *) codec;
+}
+
+/**
+ * Destroy an iconv-based codec
+ *
+ * \param codec The codec to destroy
+ */
+void hubbub_iconv_codec_destroy (hubbub_charsetcodec *codec)
+{
+ hubbub_iconv_codec *c = (hubbub_iconv_codec *) codec;
+
+ iconv_close(c->read_cd);
+ iconv_close(c->write_cd);
+
+ return;
+}
+
+/**
+ * Encode a chunk of UCS4 data into an iconv-based codec's charset
+ *
+ * \param codec The codec to use
+ * \param source Pointer to pointer to source data
+ * \param sourcelen Pointer to length (in bytes) of source data
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to length (in bytes) of output buffer
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * HUBBUB_INVALID if a character cannot be represented and the
+ * codec's error handling mode is set to STRICT,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ *
+ * On exit, ::source will point immediately _after_ the last input character
+ * read. Any remaining output for the character will be buffered by the
+ * codec for writing on the next call. This buffered data is post-filtering,
+ * so will not be refiltered on the next call.
+ *
+ * In the case of the filter function failing, ::source will point _at_ the
+ * last input character read; nothing will be written or buffered for the
+ * failed character. It is up to the client to fix the cause of the failure
+ * and retry the encoding process.
+ *
+ * Note that, if failure occurs whilst attempting to write any output
+ * buffered by the last call, then ::source and ::sourcelen will remain
+ * unchanged (as nothing more has been read).
+ *
+ * There is no way to determine the output character which caused a
+ * failure (as it may be one in a filter-injected replacement sequence).
+ * It is, however, possible to determine which source character caused it
+ * (this being the character immediately before the location pointed to by
+ * ::source on exit).
+ *
+ * [I.e. the process of filtering results in a potential one-to-many mapping
+ * between source characters and output characters, and identification of
+ * individual output characters is impossible.]
+ *
+ * ::sourcelen will be reduced appropriately on exit.
+ *
+ * ::dest will point immediately _after_ the last character written.
+ *
+ * ::destlen will be reduced appropriately on exit.
+ */
+hubbub_error hubbub_iconv_codec_encode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ hubbub_iconv_codec *c = (hubbub_iconv_codec *) codec;
+ uint32_t ucs4;
+ const uint32_t *towrite;
+ size_t towritelen;
+ hubbub_error error;
+
+ /* Process any outstanding characters from the previous call */
+ if (c->write_len > 0) {
+ uint32_t *pwrite = c->write_buf;
+
+ while (c->write_len > 0) {
+ error = hubbub_iconv_codec_write_char(c, pwrite[0],
+ dest, destlen);
+ if (error != HUBBUB_OK) {
+ /* Copy outstanding chars down, skipping
+ * invalid one, if present, so as to avoid
+ * reprocessing the invalid character */
+ if (error == HUBBUB_INVALID) {
+ for (ucs4 = 1; ucs4 < c->write_len;
+ ucs4++) {
+ c->write_buf[ucs4] =
+ pwrite[ucs4];
+ }
+ }
+
+ return error;
+ }
+
+ pwrite++;
+ c->write_len--;
+ }
+ }
+
+ /* Now process the characters for this call */
+ while (*sourcelen > 0) {
+ towrite = (const uint32_t *) (const void *) *source;
+ towritelen = 1;
+ ucs4 = *towrite;
+
+ /* Run character we're about to output through the
+ * registered filter, so it can replace it, if it sees
+ * fit to do so */
+ if (c->base.filter != NULL) {
+ uint32_t *replacement;
+
+ error = c->base.filter(ntohl(ucs4),
+ &replacement, &towritelen,
+ c->base.filter_pw);
+ if (error != HUBBUB_OK) {
+ /* Don't eat character -- filter failed,
+ * so nothing gets written or buffered.
+ * It's up to the client to ensure that
+ * the filter works in the case where it
+ * reprocesses this character after the
+ * fault is fixed up. */
+
+ return error;
+ }
+
+ /* Convert filter output to big endian UCS4 */
+ for (ucs4 = 0; ucs4 < towritelen; ucs4++) {
+ replacement[ucs4] = htonl(replacement[ucs4]);
+ }
+
+ towrite = (const uint32_t *) replacement;
+ }
+
+ /* Output current character(s) */
+ while (towritelen > 0) {
+ error = hubbub_iconv_codec_write_char(c, towrite[0],
+ dest, destlen);
+
+ if (error != HUBBUB_OK) {
+ ucs4 = (error == HUBBUB_INVALID) ? 1 : 0;
+
+ if (towritelen - ucs4 >= WRITE_BUFSIZE)
+ abort();
+
+ c->write_len = towritelen - ucs4;
+
+ /* Copy pending chars to save area, for
+ * processing next call; skipping invalid
+ * character, if present, so it's not
+ * reprocessed. */
+ for (; ucs4 < towritelen; ucs4++) {
+ c->write_buf[ucs4] = towrite[ucs4];
+ }
+
+ /* Claim character we've just buffered,
+ * so it's not repreocessed */
+ *source += 4;
+ *sourcelen -= 4;
+
+ return error;
+ }
+
+ towrite++;
+ towritelen--;
+ }
+
+ *source += 4;
+ *sourcelen -= 4;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Decode a chunk of data in an iconv-based codec's charset into UCS4
+ *
+ * \param codec The codec to use
+ * \param source Pointer to pointer to source data
+ * \param sourcelen Pointer to length (in bytes) of source data
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to length (in bytes) of output buffer
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * HUBBUB_INVALID if a character cannot be represented and the
+ * codec's error handling mode is set to STRICT,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ *
+ * On exit, ::source will point immediately _after_ the last input character
+ * read, if the result is _OK or _NOMEM. Any remaining output for the
+ * character will be buffered by the codec for writing on the next call.
+ * This buffered data is post-filtering, so will not be refiltered on the
+ * next call.
+ *
+ * In the case of the result being _INVALID or the filter function failing,
+ * ::source will point _at_ the last input character read; nothing will be
+ * written or buffered for the failed character. It is up to the client to
+ * fix the cause of the failure and retry the decoding process.
+ *
+ * Note that, if failure occurs whilst attempting to write any output
+ * buffered by the last call, then ::source and ::sourcelen will remain
+ * unchanged (as nothing more has been read).
+ *
+ * There is no way to determine the output character which caused a
+ * failure (as it may be one in a filter-injected replacement sequence).
+ * It is, however, possible to determine which source character caused it
+ * (this being the character immediately at or before the location pointed
+ * to by ::source on exit).
+ *
+ * [I.e. the process of filtering results in a potential one-to-many mapping
+ * between source characters and output characters, and identification of
+ * individual output characters is impossible.]
+ *
+ * If STRICT error handling is configured and an illegal sequence is split
+ * over two calls, then _INVALID will be returned from the second call,
+ * but ::source will point mid-way through the invalid sequence (i.e. it
+ * will be unmodified over the second call). In addition, the internal
+ * incomplete-sequence buffer will be emptied, such that subsequent calls
+ * will progress, rather than re-evaluating the same invalid sequence.
+ *
+ * ::sourcelen will be reduced appropriately on exit.
+ *
+ * ::dest will point immediately _after_ the last character written.
+ *
+ * ::destlen will be reduced appropriately on exit.
+ *
+ * Call this with a source length of 0 to flush the output buffer.
+ */
+hubbub_error hubbub_iconv_codec_decode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ hubbub_iconv_codec *c = (hubbub_iconv_codec *) codec;
+ hubbub_error error;
+
+ if (c->read_len > 0) {
+ /* Output left over from last decode
+ * Attempt to finish this here */
+ uint32_t *pread = c->read_buf;
+
+ while (c->read_len > 0 && *destlen >= c->read_len * 4) {
+ *((uint32_t *) (void *) *dest) = pread[0];
+
+ *dest += 4;
+ *destlen -= 4;
+
+ pread++;
+ c->read_len--;
+ }
+
+ if (*destlen < c->read_len * 4) {
+ /* Run out of output buffer */
+ size_t i;
+
+ /* Shuffle remaining output down */
+ for (i = 0; i < c->read_len; i++) {
+ c->read_buf[i] = pread[i];
+ }
+
+ return HUBBUB_NOMEM;
+ }
+ }
+
+ if (c->inval_len > 0) {
+ /* The last decode ended in an incomplete sequence.
+ * Fill up inval_buf with data from the start of the
+ * new chunk and process it. */
+ uint8_t *in = c->inval_buf;
+ size_t ol = c->inval_len;
+ size_t l = min(INVAL_BUFSIZE - ol - 1, *sourcelen);
+ size_t orig_l = l;
+
+ memcpy(c->inval_buf + ol, *source, l);
+
+ l += c->inval_len;
+
+ error = hubbub_iconv_codec_read_char(c,
+ (const uint8_t **) &in, &l, dest, destlen);
+ if (error != HUBBUB_OK && error != HUBBUB_NOMEM) {
+ return error;
+ }
+
+
+ /* And now, fix everything up so the normal processing
+ * does the right thing. */
+ *source += max((signed) (orig_l - l), 0);
+ *sourcelen -= max((signed) (orig_l - l), 0);
+
+ /* Failed to resolve an incomplete character and
+ * ran out of buffer space. No recovery strategy
+ * possible, so explode everywhere. */
+ if ((orig_l + ol) - l == 0)
+ abort();
+
+ /* Handle memry exhaustion case from above */
+ if (error != HUBBUB_OK)
+ return error;
+ }
+
+ while (*sourcelen > 0) {
+ error = hubbub_iconv_codec_read_char(c,
+ source, sourcelen, dest, destlen);
+ if (error != HUBBUB_OK) {
+ return error;
+ }
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Clear an iconv-based codec's encoding state
+ *
+ * \param codec The codec to reset
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_iconv_codec_reset(hubbub_charsetcodec *codec)
+{
+ hubbub_iconv_codec *c = (hubbub_iconv_codec *) codec;
+
+ iconv(c->read_cd, NULL, NULL, NULL, NULL);
+ iconv(c->write_cd, NULL, NULL, NULL, NULL);
+
+ c->inval_buf[0] = '\0';
+ c->inval_len = 0;
+
+ c->read_buf[0] = 0;
+ c->read_len = 0;
+
+ c->write_buf[0] = 0;
+ c->write_len = 0;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Feed a UCS4 character through the registered filter and output the result
+ *
+ * \param c Codec to use
+ * \param ucs4 UCS4 character (big endian)
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to output buffer length
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ */
+hubbub_error hubbub_iconv_codec_filter_decoded_char(hubbub_iconv_codec *c,
+ uint32_t ucs4, uint8_t **dest, size_t *destlen)
+{
+ if (c->base.filter != NULL) {
+ uint32_t *rep;
+ size_t replen;
+ hubbub_error error;
+
+ error = c->base.filter(ntohl(ucs4), &rep, &replen,
+ c->base.filter_pw);
+ if (error != HUBBUB_OK) {
+ return error;
+ }
+
+ while (replen > 0 && *destlen >= replen * 4) {
+ *((uint32_t *) (void *) *dest) = htonl(*rep);
+
+ *dest += 4;
+ *destlen -= 4;
+
+ rep++;
+ replen--;
+ }
+
+ if (*destlen < replen * 4) {
+ /* Run out of output buffer */
+ size_t i;
+
+ /* Buffer remaining output */
+ c->read_len = replen;
+
+ for (i = 0; i < replen; i++) {
+ c->read_buf[i] = htonl(rep[i]);
+ }
+
+ return HUBBUB_NOMEM;
+ }
+
+ } else {
+ if (*destlen < 4) {
+ /* Run out of output buffer */
+
+ c->read_len = 1;
+ c->read_buf[0] = ucs4;
+
+ return HUBBUB_NOMEM;
+ }
+
+ *((uint32_t *) (void *) *dest) = ucs4;
+ *dest += 4;
+ *destlen -= 4;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Detect if a codec's charset is Unicode capable
+ *
+ * \param c Codec to consider
+ * \return true if a Unicode variant, false otherwise
+ */
+bool hubbub_iconv_codec_is_unicode(hubbub_iconv_codec *c)
+{
+ static uint16_t ucs4;
+ static uint16_t ucs2;
+ static uint16_t utf8;
+ static uint16_t utf16;
+ static uint16_t utf16be;
+ static uint16_t utf16le;
+ static uint16_t utf32;
+ static uint16_t utf32be;
+ static uint16_t utf32le;
+
+ if (ucs4 == 0) {
+ ucs4 = hubbub_mibenum_from_name("UCS-4", SLEN("UCS-4"));
+ ucs2 = hubbub_mibenum_from_name("UCS-2", SLEN("UCS-2"));
+ utf8 = hubbub_mibenum_from_name("UTF-8", SLEN("UTF-8"));
+ utf16 = hubbub_mibenum_from_name("UTF-16", SLEN("UTF-16"));
+ utf16be = hubbub_mibenum_from_name("UTF-16BE",
+ SLEN("UTF-16BE"));
+ utf16le = hubbub_mibenum_from_name("UTF-16LE",
+ SLEN("UTF-16LE"));
+ utf32 = hubbub_mibenum_from_name("UTF-32", SLEN("UTF-32"));
+ utf32be = hubbub_mibenum_from_name("UTF-32BE",
+ SLEN("UTF-32BE"));
+ utf32le = hubbub_mibenum_from_name("UTF-32LE",
+ SLEN("UTF-32LE"));
+ }
+
+ return (c->base.mibenum == ucs4 ||
+ c->base.mibenum == ucs2 ||
+ c->base.mibenum == utf8 ||
+ c->base.mibenum == utf16 ||
+ c->base.mibenum == utf16be ||
+ c->base.mibenum == utf16le ||
+ c->base.mibenum == utf32 ||
+ c->base.mibenum == utf32be ||
+ c->base.mibenum == utf32le);
+}
+
+/**
+ * Read a character from the codec's native charset to UCS4 (big endian)
+ *
+ * \param c The codec
+ * \param source Pointer to pointer to source buffer (updated on exit)
+ * \param sourcelen Pointer to length of source buffer (updated on exit)
+ * \param dest Pointer to pointer to output buffer (updated on exit)
+ * \param destlen Pointer to length of output buffer (updated on exit)
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * HUBBUB_INVALID if a character cannot be represented and the
+ * codec's error handling mode is set to STRICT,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ *
+ * On exit, ::source will point immediately _after_ the last input character
+ * read, if the result is _OK or _NOMEM. Any remaining output for the
+ * character will be buffered by the codec for writing on the next call.
+ * This buffered data is post-filtering, so will not be refiltered on the
+ * next call.
+ *
+ * In the case of the result being _INVALID or the filter function failing,
+ * ::source will point _at_ the last input character read; nothing will be
+ * written or buffered for the failed character. It is up to the client to
+ * fix the cause of the failure and retry the decoding process.
+ *
+ * ::sourcelen will be reduced appropriately on exit.
+ *
+ * ::dest will point immediately _after_ the last character written.
+ *
+ * ::destlen will be reduced appropriately on exit.
+ */
+hubbub_error hubbub_iconv_codec_read_char(hubbub_iconv_codec *c,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ size_t iconv_ret;
+ const uint8_t *origsrc = *source;
+ size_t origsrclen = *sourcelen;
+ uint32_t ucs4;
+ uint8_t *pucs4 = (uint8_t *) &ucs4;
+ size_t sucs4 = 4;
+ hubbub_error error;
+
+ /* Use iconv to convert a single character
+ * Side effect: Updates *source to point at next input
+ * character and *sourcelen to reflect reduced input length
+ */
+ iconv_ret = iconv(c->read_cd, (char **) source, sourcelen,
+ (char **) (void *) &pucs4, &sucs4);
+
+ if (iconv_ret != (size_t) -1 ||
+ (*source != origsrc && sucs4 == 0)) {
+ /* Read a character */
+ error = hubbub_iconv_codec_filter_decoded_char(c,
+ ucs4, dest, destlen);
+ if (error != HUBBUB_OK && error != HUBBUB_NOMEM) {
+ /* filter function failed; restore source pointers */
+ *source = origsrc;
+ *sourcelen = origsrclen;
+ }
+
+ /* Clear inval buffer */
+ c->inval_buf[0] = '\0';
+ c->inval_len = 0;
+
+ return error;
+ } else if (errno == E2BIG) {
+ /* Should never happen */
+ abort();
+ } else if (errno == EINVAL) {
+ /* Incomplete input sequence */
+ if (*sourcelen > INVAL_BUFSIZE)
+ abort();
+
+ memmove(c->inval_buf, (const char *) *source, *sourcelen);
+ c->inval_buf[*sourcelen] = '\0';
+ c->inval_len = *sourcelen;
+
+ *source += *sourcelen;
+ *sourcelen = 0;
+
+ return HUBBUB_OK;
+ } else if (errno == EILSEQ) {
+ /* Illegal input sequence */
+ bool found = false;
+ const uint8_t *oldsrc;
+ size_t oldsrclen;
+
+ /* Clear inval buffer */
+ c->inval_buf[0] = '\0';
+ c->inval_len = 0;
+
+ /* Strict errormode; simply flag invalid character */
+ if (c->base.errormode == HUBBUB_CHARSETCODEC_ERROR_STRICT) {
+ /* restore source pointers */
+ *source = origsrc;
+ *sourcelen = origsrclen;
+
+ return HUBBUB_INVALID;
+ }
+
+ /* Ok, this becomes problematic. The iconv API here
+ * is particularly unhelpful; *source will point at
+ * the _start_ of the illegal sequence. This means
+ * that we must find the end of the sequence */
+
+ /* Search for the start of the next valid input
+ * sequence (or the end of the input stream) */
+ while (*sourcelen > 1) {
+ pucs4 = (uint8_t *) &ucs4;
+ sucs4 = 4;
+
+ (*source)++;
+ (*sourcelen)--;
+
+ oldsrc = *source;
+ oldsrclen = *sourcelen;
+
+ iconv_ret = iconv(c->read_cd,
+ (char **) source, sourcelen,
+ (char **) (void *) &pucs4, &sucs4);
+ if (iconv_ret != (size_t) -1 || errno != EILSEQ) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ /* Found start of next valid sequence */
+ *source = oldsrc;
+ *sourcelen = oldsrclen;
+ } else {
+ /* Not found - skip last byte in buffer */
+ (*source)++;
+ (*sourcelen)--;
+
+ if (*sourcelen != 0)
+ abort();
+ }
+
+ /* output U+FFFD and continue processing. */
+ error = hubbub_iconv_codec_filter_decoded_char(c,
+ htonl(0xFFFD), dest, destlen);
+ if (error != HUBBUB_OK && error != HUBBUB_NOMEM) {
+ /* filter function failed; restore source pointers */
+ *source = origsrc;
+ *sourcelen = origsrclen;
+ }
+
+ return error;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Write a UCS4 character in a codec's native charset
+ *
+ * \param c The codec
+ * \param ucs4 The UCS4 character to write (big endian)
+ * \param dest Pointer to pointer to output buffer (updated on exit)
+ * \param destlen Pointer to length of output buffer (updated on exit)
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * HUBBUB_INVALID if character cannot be represented and the
+ * codec's error handling mode is set to STRICT.
+ */
+hubbub_error hubbub_iconv_codec_write_char(hubbub_iconv_codec *c,
+ uint32_t ucs4, uint8_t **dest, size_t *destlen)
+{
+ size_t iconv_ret;
+ uint8_t *pucs4 = (uint8_t *) &ucs4;
+ size_t sucs4 = 4;
+ uint8_t *origdest = *dest;
+
+ iconv_ret = iconv(c->write_cd, (char **) (void *) &pucs4,
+ &sucs4, (char **) dest, destlen);
+
+ if (iconv_ret == (size_t) -1 && errno == E2BIG) {
+ /* Output buffer is too small */
+ return HUBBUB_NOMEM;
+ } else if (iconv_ret == (size_t) -1 && errno == EILSEQ) {
+ /* Illegal multibyte sequence */
+ /* This should never happen */
+ abort();
+ } else if (iconv_ret == (size_t) -1 && errno == EINVAL) {
+ /* Incomplete input character */
+ /* This should never happen */
+ abort();
+ } else if (*dest == origdest) {
+ /* Nothing was output */
+ switch (c->base.errormode) {
+ case HUBBUB_CHARSETCODEC_ERROR_STRICT:
+ return HUBBUB_INVALID;
+
+ case HUBBUB_CHARSETCODEC_ERROR_TRANSLIT:
+ /** \todo transliteration */
+ case HUBBUB_CHARSETCODEC_ERROR_LOOSE:
+ {
+ pucs4 = (uint8_t *) &ucs4;
+ sucs4 = 4;
+
+ ucs4 = hubbub_iconv_codec_is_unicode(c)
+ ? htonl(0xFFFD) : htonl(0x3F);
+
+ iconv_ret = iconv(c->write_cd,
+ (char **) (void *) &pucs4, &sucs4,
+ (char **) dest, destlen);
+
+ if (iconv_ret == (size_t) -1 && errno == E2BIG) {
+ return HUBBUB_NOMEM;
+ } else if (iconv_ret == (size_t) -1 &&
+ errno == EILSEQ) {
+ /* Illegal multibyte sequence */
+ /* This should never happen */
+ abort();
+ } else if (iconv_ret == (size_t) -1 &&
+ errno == EINVAL) {
+ /* Incomplete input character */
+ /* This should never happen */
+ abort();
+ }
+ }
+ break;
+ }
+ }
+
+ return HUBBUB_OK;
+}
+
+const hubbub_charsethandler hubbub_iconv_codec_handler = {
+ hubbub_iconv_codec_handles_charset,
+ hubbub_iconv_codec_create
+};
diff --git a/src/charset/codec_impl.h b/src/charset/codec_impl.h
new file mode 100644
index 0000000..eb5116b
--- /dev/null
+++ b/src/charset/codec_impl.h
@@ -0,0 +1,51 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_charset_codecimpl_h_
+#define hubbub_charset_codecimpl_h_
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include "codec.h"
+
+/**
+ * Core charset codec definition; implementations extend this
+ */
+struct hubbub_charsetcodec {
+ uint16_t mibenum; /**< MIB enum for charset */
+
+ hubbub_charsetcodec_filter filter; /**< filter function */
+ void *filter_pw; /**< filter private word */
+
+ hubbub_charsetcodec_errormode errormode; /**< error mode */
+
+ hubbub_alloc alloc; /**< allocation function */
+ void *alloc_pw; /**< private word */
+
+ struct {
+ void (*destroy)(hubbub_charsetcodec *codec);
+ hubbub_error (*encode)(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+ hubbub_error (*decode)(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+ hubbub_error (*reset)(hubbub_charsetcodec *codec);
+ } handler; /**< Vtable for handler code */
+};
+
+/**
+ * Codec factory component definition
+ */
+typedef struct hubbub_charsethandler {
+ bool (*handles_charset)(const char *charset);
+ hubbub_charsetcodec *(*create)(const char *charset,
+ hubbub_alloc alloc, void *pw);
+} hubbub_charsethandler;
+
+#endif
diff --git a/src/charset/codec_utf8.c b/src/charset/codec_utf8.c
new file mode 100644
index 0000000..86d667f
--- /dev/null
+++ b/src/charset/codec_utf8.c
@@ -0,0 +1,620 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+/* These two are for htonl / ntohl */
+#include <arpa/inet.h>
+#include <netinet/in.h>
+
+#include "charset/aliases.h"
+#include "utils/utf8.h"
+#include "utils/utils.h"
+
+#include "codec_impl.h"
+
+/**
+ * UTF-8 charset codec
+ */
+typedef struct hubbub_utf8_codec {
+ hubbub_charsetcodec base; /**< Base class */
+
+#define INVAL_BUFSIZE (32)
+ uint8_t inval_buf[INVAL_BUFSIZE]; /**< Buffer for fixing up
+ * incomplete input
+ * sequences */
+ size_t inval_len; /*< Byte length of inval_buf **/
+
+#define READ_BUFSIZE (8)
+ uint32_t read_buf[READ_BUFSIZE]; /**< Buffer for partial
+ * output sequences (decode)
+ * (host-endian) */
+ size_t read_len; /**< Character length of read_buf */
+
+#define WRITE_BUFSIZE (8)
+ uint32_t write_buf[WRITE_BUFSIZE]; /**< Buffer for partial
+ * output sequences (encode)
+ * (host-endian) */
+ size_t write_len; /**< Character length of write_buf */
+
+} hubbub_utf8_codec;
+
+static bool hubbub_utf8_codec_handles_charset(const char *charset);
+static hubbub_charsetcodec *hubbub_utf8_codec_create(const char *charset,
+ hubbub_alloc alloc, void *pw);
+static void hubbub_utf8_codec_destroy (hubbub_charsetcodec *codec);
+static hubbub_error hubbub_utf8_codec_encode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+static hubbub_error hubbub_utf8_codec_decode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+static hubbub_error hubbub_utf8_codec_reset(hubbub_charsetcodec *codec);
+static hubbub_error hubbub_utf8_codec_read_char(hubbub_utf8_codec *c,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen);
+static hubbub_error hubbub_utf8_codec_filter_decoded_char(
+ hubbub_utf8_codec *c,
+ uint32_t ucs4, uint8_t **dest, size_t *destlen);
+
+/**
+ * Determine whether this codec handles a specific charset
+ *
+ * \param charset Charset to test
+ * \return true if handleable, false otherwise
+ */
+bool hubbub_utf8_codec_handles_charset(const char *charset)
+{
+ return hubbub_mibenum_from_name(charset, strlen(charset)) ==
+ hubbub_mibenum_from_name("UTF-8", SLEN("UTF-8"));
+}
+
+/**
+ * Create a utf8 codec
+ *
+ * \param charset The charset to read from / write to
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to codec, or NULL on failure
+ */
+hubbub_charsetcodec *hubbub_utf8_codec_create(const char *charset,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_utf8_codec *codec;
+
+ UNUSED(charset);
+
+ codec = alloc(NULL, sizeof(hubbub_utf8_codec), pw);
+ if (codec == NULL)
+ return NULL;
+
+ codec->inval_buf[0] = '\0';
+ codec->inval_len = 0;
+
+ codec->read_buf[0] = 0;
+ codec->read_len = 0;
+
+ codec->write_buf[0] = 0;
+ codec->write_len = 0;
+
+ /* Finally, populate vtable */
+ codec->base.handler.destroy = hubbub_utf8_codec_destroy;
+ codec->base.handler.encode = hubbub_utf8_codec_encode;
+ codec->base.handler.decode = hubbub_utf8_codec_decode;
+ codec->base.handler.reset = hubbub_utf8_codec_reset;
+
+ return (hubbub_charsetcodec *) codec;
+}
+
+/**
+ * Destroy a utf8 codec
+ *
+ * \param codec The codec to destroy
+ */
+void hubbub_utf8_codec_destroy (hubbub_charsetcodec *codec)
+{
+ UNUSED(codec);
+}
+
+/**
+ * Encode a chunk of UCS4 data into utf8
+ *
+ * \param codec The codec to use
+ * \param source Pointer to pointer to source data
+ * \param sourcelen Pointer to length (in bytes) of source data
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to length (in bytes) of output buffer
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * HUBBUB_INVALID if a character cannot be represented and the
+ * codec's error handling mode is set to STRICT,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ *
+ * On exit, ::source will point immediately _after_ the last input character
+ * read. Any remaining output for the character will be buffered by the
+ * codec for writing on the next call. This buffered data is post-filtering,
+ * so will not be refiltered on the next call.
+ *
+ * In the case of the filter function failing, ::source will point _at_ the
+ * last input character read; nothing will be written or buffered for the
+ * failed character. It is up to the client to fix the cause of the failure
+ * and retry the encoding process.
+ *
+ * Note that, if failure occurs whilst attempting to write any output
+ * buffered by the last call, then ::source and ::sourcelen will remain
+ * unchanged (as nothing more has been read).
+ *
+ * There is no way to determine the output character which caused a
+ * failure (as it may be one in a filter-injected replacement sequence).
+ * It is, however, possible to determine which source character caused it
+ * (this being the character immediately before the location pointed to by
+ * ::source on exit).
+ *
+ * [I.e. the process of filtering results in a potential one-to-many mapping
+ * between source characters and output characters, and identification of
+ * individual output characters is impossible.]
+ *
+ * ::sourcelen will be reduced appropriately on exit.
+ *
+ * ::dest will point immediately _after_ the last character written.
+ *
+ * ::destlen will be reduced appropriately on exit.
+ */
+hubbub_error hubbub_utf8_codec_encode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ hubbub_utf8_codec *c = (hubbub_utf8_codec *) codec;
+ uint32_t ucs4;
+ uint32_t *towrite;
+ size_t towritelen;
+ hubbub_error error;
+
+ /* Process any outstanding characters from the previous call */
+ if (c->write_len > 0) {
+ uint32_t *pwrite = c->write_buf;
+ uint8_t buf[6];
+ size_t len;
+
+ while (c->write_len > 0) {
+ error = hubbub_utf8_from_ucs4(pwrite[0], buf, &len);
+ if (error != HUBBUB_OK)
+ abort();
+
+ if (*destlen < len) {
+ /* Insufficient output buffer space */
+ for (len = 0; len < c->write_len; len++)
+ c->write_buf[len] = pwrite[len];
+
+ return HUBBUB_NOMEM;
+ }
+
+ memcpy(*dest, buf, len);
+
+ *dest += len;
+ *destlen -= len;
+
+ pwrite++;
+ c->write_len--;
+ }
+ }
+
+ /* Now process the characters for this call */
+ while (*sourcelen > 0) {
+ ucs4 = ntohl(*((uint32_t *) (void *) *source));
+ towrite = &ucs4;
+ towritelen = 1;
+
+ /* Run character we're about to output through the
+ * registered filter, so it can replace it. */
+ if (c->base.filter != NULL) {
+ error = c->base.filter(ucs4,
+ &towrite, &towritelen,
+ c->base.filter_pw);
+ if (error != HUBBUB_OK)
+ return error;
+ }
+
+ /* Output current characters */
+ while (towritelen > 0) {
+ uint8_t buf[6];
+ size_t len;
+
+ error = hubbub_utf8_from_ucs4(towrite[0], buf, &len);
+ if (error != HUBBUB_OK)
+ abort();
+
+ if (*destlen < len) {
+ /* Insufficient output space */
+ if (towritelen >= WRITE_BUFSIZE)
+ abort();
+
+ c->write_len = towritelen;
+
+ /* Copy pending chars to save area, for
+ * processing next call. */
+ for (len = 0; len < towritelen; len++)
+ c->write_buf[len] = towrite[len];
+
+ /* Claim character we've just buffered,
+ * so it's not reprocessed */
+ *source += 4;
+ *sourcelen -= 4;
+
+ return HUBBUB_NOMEM;
+ }
+
+ memcpy(*dest, buf, len);
+
+ *dest += len;
+ *destlen -= len;
+
+ towrite++;
+ towritelen--;
+ }
+
+ *source += 4;
+ *sourcelen -= 4;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Decode a chunk of utf8 data into UCS4
+ *
+ * \param codec The codec to use
+ * \param source Pointer to pointer to source data
+ * \param sourcelen Pointer to length (in bytes) of source data
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to length (in bytes) of output buffer
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * HUBBUB_INVALID if a character cannot be represented and the
+ * codec's error handling mode is set to STRICT,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ *
+ * On exit, ::source will point immediately _after_ the last input character
+ * read, if the result is _OK or _NOMEM. Any remaining output for the
+ * character will be buffered by the codec for writing on the next call.
+ * This buffered data is post-filtering, so will not be refiltered on the
+ * next call.
+ *
+ * In the case of the result being _INVALID or the filter function failing,
+ * ::source will point _at_ the last input character read; nothing will be
+ * written or buffered for the failed character. It is up to the client to
+ * fix the cause of the failure and retry the decoding process.
+ *
+ * Note that, if failure occurs whilst attempting to write any output
+ * buffered by the last call, then ::source and ::sourcelen will remain
+ * unchanged (as nothing more has been read).
+ *
+ * There is no way to determine the output character which caused a
+ * failure (as it may be one in a filter-injected replacement sequence).
+ * It is, however, possible to determine which source character caused it
+ * (this being the character immediately at or before the location pointed
+ * to by ::source on exit).
+ *
+ * [I.e. the process of filtering results in a potential one-to-many mapping
+ * between source characters and output characters, and identification of
+ * individual output characters is impossible.]
+ *
+ * If STRICT error handling is configured and an illegal sequence is split
+ * over two calls, then _INVALID will be returned from the second call,
+ * but ::source will point mid-way through the invalid sequence (i.e. it
+ * will be unmodified over the second call). In addition, the internal
+ * incomplete-sequence buffer will be emptied, such that subsequent calls
+ * will progress, rather than re-evaluating the same invalid sequence.
+ *
+ * ::sourcelen will be reduced appropriately on exit.
+ *
+ * ::dest will point immediately _after_ the last character written.
+ *
+ * ::destlen will be reduced appropriately on exit.
+ *
+ * Call this with a source length of 0 to flush the output buffer.
+ */
+hubbub_error hubbub_utf8_codec_decode(hubbub_charsetcodec *codec,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ hubbub_utf8_codec *c = (hubbub_utf8_codec *) codec;
+ hubbub_error error;
+
+ if (c->read_len > 0) {
+ /* Output left over from last decode */
+ uint32_t *pread = c->read_buf;
+
+ while (c->read_len > 0 && *destlen >= c->read_len * 4) {
+ *((uint32_t *) (void *) *dest) = htonl(pread[0]);
+
+ *dest += 4;
+ *destlen -= 4;
+
+ pread++;
+ c->read_len--;
+ }
+
+ if (*destlen < c->read_len * 4) {
+ /* Ran out of output buffer */
+ size_t i;
+
+ /* Shuffle remaining output down */
+ for (i = 0; i < c->read_len; i++)
+ c->read_buf[i] = pread[i];
+
+ return HUBBUB_NOMEM;
+ }
+ }
+
+ if (c->inval_len > 0) {
+ /* The last decode ended in an incomplete sequence.
+ * Fill up inval_buf with data from the start of the
+ * new chunk and process it. */
+ uint8_t *in = c->inval_buf;
+ size_t ol = c->inval_len;
+ size_t l = min(INVAL_BUFSIZE - ol - 1, *sourcelen);
+ size_t orig_l = l;
+
+ memcpy(c->inval_buf + ol, *source, l);
+
+ l += c->inval_len;
+
+ error = hubbub_utf8_codec_read_char(c,
+ (const uint8_t **) &in, &l, dest, destlen);
+ if (error != HUBBUB_OK && error != HUBBUB_NOMEM) {
+ return error;
+ }
+
+ /* And now, fix up source pointers */
+ *source += max((signed) (orig_l - l), 0);
+ *sourcelen -= max((signed) (orig_l - l), 0);
+
+ /* Failed to resolve an incomplete character and
+ * ran out of buffer space. No recovery strategy
+ * possible, so explode everywhere. */
+ if ((orig_l + ol) - l == 0)
+ abort();
+
+ /* Report memory exhaustion case from above */
+ if (error != HUBBUB_OK)
+ return error;
+ }
+
+ /* Finally, the "normal" case; process all outstanding characters */
+ while (*sourcelen > 0) {
+ error = hubbub_utf8_codec_read_char(c,
+ source, sourcelen, dest, destlen);
+ if (error != HUBBUB_OK) {
+ return error;
+ }
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Clear a utf8 codec's encoding state
+ *
+ * \param codec The codec to reset
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_utf8_codec_reset(hubbub_charsetcodec *codec)
+{
+ hubbub_utf8_codec *c = (hubbub_utf8_codec *) codec;
+
+ c->inval_buf[0] = '\0';
+ c->inval_len = 0;
+
+ c->read_buf[0] = 0;
+ c->read_len = 0;
+
+ c->write_buf[0] = 0;
+ c->write_len = 0;
+
+ return HUBBUB_OK;
+}
+
+
+/**
+ * Read a character from the UTF-8 to UCS4 (big endian)
+ *
+ * \param c The codec
+ * \param source Pointer to pointer to source buffer (updated on exit)
+ * \param sourcelen Pointer to length of source buffer (updated on exit)
+ * \param dest Pointer to pointer to output buffer (updated on exit)
+ * \param destlen Pointer to length of output buffer (updated on exit)
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * HUBBUB_INVALID if a character cannot be represented and the
+ * codec's error handling mode is set to STRICT,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ *
+ * On exit, ::source will point immediately _after_ the last input character
+ * read, if the result is _OK or _NOMEM. Any remaining output for the
+ * character will be buffered by the codec for writing on the next call.
+ * This buffered data is post-filtering, so will not be refiltered on the
+ * next call.
+ *
+ * In the case of the result being _INVALID or the filter function failing,
+ * ::source will point _at_ the last input character read; nothing will be
+ * written or buffered for the failed character. It is up to the client to
+ * fix the cause of the failure and retry the decoding process.
+ *
+ * ::sourcelen will be reduced appropriately on exit.
+ *
+ * ::dest will point immediately _after_ the last character written.
+ *
+ * ::destlen will be reduced appropriately on exit.
+ */
+hubbub_error hubbub_utf8_codec_read_char(hubbub_utf8_codec *c,
+ const uint8_t **source, size_t *sourcelen,
+ uint8_t **dest, size_t *destlen)
+{
+ uint32_t ucs4;
+ size_t sucs4;
+ hubbub_error error;
+
+ /* Convert a single character */
+ error = hubbub_utf8_to_ucs4(*source, *sourcelen, &ucs4, &sucs4);
+ if (error == HUBBUB_OK) {
+ /* Read a character */
+ error = hubbub_utf8_codec_filter_decoded_char(c,
+ ucs4, dest, destlen);
+ if (error == HUBBUB_OK || error == HUBBUB_NOMEM) {
+ /* filter function succeeded; update source pointers */
+ *source += sucs4;
+ *sourcelen -= sucs4;
+ }
+
+ /* Clear inval buffer */
+ c->inval_buf[0] = '\0';
+ c->inval_len = 0;
+
+ return error;
+ } else if (error == HUBBUB_NEEDDATA) {
+ /* Incomplete input sequence */
+ if (*sourcelen > INVAL_BUFSIZE)
+ abort();
+
+ memmove(c->inval_buf, (char *) *source, *sourcelen);
+ c->inval_buf[*sourcelen] = '\0';
+ c->inval_len = *sourcelen;
+
+ *source += *sourcelen;
+ *sourcelen = 0;
+
+ return HUBBUB_OK;
+ } else if (error == HUBBUB_INVALID) {
+ /* Illegal input sequence */
+ uint32_t nextchar;
+
+ /* Clear inval buffer */
+ c->inval_buf[0] = '\0';
+ c->inval_len = 0;
+
+ /* Strict errormode; simply flag invalid character */
+ if (c->base.errormode == HUBBUB_CHARSETCODEC_ERROR_STRICT) {
+ return HUBBUB_INVALID;
+ }
+
+ /* Find next valid UTF-8 sequence.
+ * We're processing client-provided data, so let's
+ * be paranoid about its validity. */
+ error = hubbub_utf8_next_paranoid(*source, *sourcelen,
+ 0, &nextchar);
+ if (error != HUBBUB_OK) {
+ if (error == HUBBUB_NEEDDATA) {
+ /* Need more data to be sure */
+ if (*sourcelen > INVAL_BUFSIZE)
+ abort();
+
+ memmove(c->inval_buf, (char *) *source,
+ *sourcelen);
+ c->inval_buf[*sourcelen] = '\0';
+ c->inval_len = *sourcelen;
+
+ *source += *sourcelen;
+ *sourcelen = 0;
+
+ nextchar = 0;
+ } else {
+ return error;
+ }
+ }
+
+ /* output U+FFFD and continue processing. */
+ error = hubbub_utf8_codec_filter_decoded_char(c,
+ 0xFFFD, dest, destlen);
+ if (error == HUBBUB_OK || error == HUBBUB_NOMEM) {
+ /* filter function succeeded; update source pointers */
+ *source += nextchar;
+ *sourcelen -= nextchar;
+ }
+
+ return error;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Feed a UCS4 character through the registered filter and output the result
+ *
+ * \param c Codec to use
+ * \param ucs4 UCS4 character (host endian)
+ * \param dest Pointer to pointer to output buffer
+ * \param destlen Pointer to output buffer length
+ * \return HUBBUB_OK on success,
+ * HUBBUB_NOMEM if output buffer is too small,
+ * <any_other_error> as a result of the failure of the
+ * client-provided filter function.
+ */
+hubbub_error hubbub_utf8_codec_filter_decoded_char(hubbub_utf8_codec *c,
+ uint32_t ucs4, uint8_t **dest, size_t *destlen)
+{
+ if (c->base.filter != NULL) {
+ uint32_t *rep;
+ size_t replen;
+ hubbub_error error;
+
+ error = c->base.filter(ucs4, &rep, &replen,
+ c->base.filter_pw);
+ if (error != HUBBUB_OK) {
+ return error;
+ }
+
+ while (replen > 0 && *destlen >= replen * 4) {
+ *((uint32_t *) (void *) *dest) = htonl(*rep);
+
+ *dest += 4;
+ *destlen -= 4;
+
+ rep++;
+ replen--;
+ }
+
+ if (*destlen < replen * 4) {
+ /* Run out of output buffer */
+ size_t i;
+
+ /* Buffer remaining output */
+ c->read_len = replen;
+
+ for (i = 0; i < replen; i++) {
+ c->read_buf[i] = rep[i];
+ }
+
+ return HUBBUB_NOMEM;
+ }
+
+ } else {
+ if (*destlen < 4) {
+ /* Run out of output buffer */
+ c->read_len = 1;
+ c->read_buf[0] = ucs4;
+
+ return HUBBUB_NOMEM;
+ }
+
+ *((uint32_t *) (void *) *dest) = htonl(ucs4);
+ *dest += 4;
+ *destlen -= 4;
+ }
+
+ return HUBBUB_OK;
+}
+
+
+const hubbub_charsethandler hubbub_utf8_codec_handler = {
+ hubbub_utf8_codec_handles_charset,
+ hubbub_utf8_codec_create
+};
diff --git a/src/charset/detect.c b/src/charset/detect.c
new file mode 100644
index 0000000..8ff3b87
--- /dev/null
+++ b/src/charset/detect.c
@@ -0,0 +1,673 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "charset/aliases.h"
+#include "utils/utils.h"
+
+#include "detect.h"
+
+static uint16_t hubbub_charset_read_bom(const uint8_t **data, size_t *len);
+static uint16_t hubbub_charset_scan_meta(const uint8_t *data, size_t len);
+static uint16_t hubbub_charset_parse_attributes(const uint8_t **pos,
+ const uint8_t *end);
+static uint16_t hubbub_charset_parse_content(const uint8_t *value,
+ uint32_t valuelen);
+static bool hubbub_charset_get_attribute(const uint8_t **data,
+ const uint8_t *end,
+ const uint8_t **name, uint32_t *namelen,
+ const uint8_t **value, uint32_t *valuelen);
+
+/**
+ * Extract a charset from a chunk of data
+ *
+ * \param data Pointer to pointer to buffer containing data
+ * \param len Pointer to buffer length
+ * \param mibenum Pointer to location to store MIB enum representing charset
+ * \param source Pointer to location to receive charset source
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ *
+ * The data pointer and length will be modified by this function if
+ * a byte order mark is encountered at the start of the buffer. The updated
+ * data pointer will point to the first byte in the buffer after the BOM.
+ * The length will be modified appropriately.
+ *
+ * The larger a chunk of data fed to this routine, the better, as it allows
+ * charset autodetection access to a larger dataset for analysis.
+ */
+hubbub_error hubbub_charset_extract(const uint8_t **data, size_t *len,
+ uint16_t *mibenum, hubbub_charset_source *source)
+{
+ uint16_t charset = 0;
+
+ if (data == NULL || *data == NULL || len == NULL ||
+ mibenum == NULL || source == NULL)
+ return HUBBUB_BADPARM;
+
+ /* We need at least 4 bytes of data */
+ if (*len < 4)
+ goto default_encoding;
+
+ /* First, look for a BOM */
+ charset = hubbub_charset_read_bom(data, len);
+ if (charset != 0) {
+ *mibenum = charset;
+ *source = HUBBUB_CHARSET_DOCUMENT;
+
+ return HUBBUB_OK;
+ }
+
+ /* No BOM was found, so we must look for a meta charset within
+ * the document itself. */
+ charset = hubbub_charset_scan_meta(*data, *len);
+ if (charset != 0) {
+ /* ISO-8859-1 becomes Windows-1252 */
+ if (charset == hubbub_mibenum_from_name("ISO-8859-1",
+ SLEN("ISO-8859-1"))) {
+ charset = hubbub_mibenum_from_name("Windows-1252",
+ SLEN("Windows-1252"));
+ /* Fallback to 8859-1 if that failed */
+ if (charset == 0)
+ charset = hubbub_mibenum_from_name(
+ "ISO-8859-1", SLEN("ISO-8859-1"));
+ }
+
+ /* If we've encountered a meta charset for a non-ASCII-
+ * compatible encoding, don't trust it.
+ *
+ * Firstly, it should have been sent with a BOM (and thus
+ * detected above).
+ *
+ * Secondly, we've just used an ASCII-only parser to
+ * extract the encoding from the document. Therefore,
+ * the document plainly isn't what the meta charset
+ * claims it is.
+ *
+ * What we do in this case is to ignore the meta charset's
+ * claims and leave the charset determination to the
+ * autodetection routines (or the fallback case if they
+ * fail).
+ */
+ if (charset != hubbub_mibenum_from_name("UTF-16",
+ SLEN("UTF-16")) &&
+ charset != hubbub_mibenum_from_name("UTF-16LE",
+ SLEN("UTF-16LE")) &&
+ charset != hubbub_mibenum_from_name("UTF-16BE",
+ SLEN("UTF-16BE")) &&
+ charset != hubbub_mibenum_from_name("UTF-32",
+ SLEN("UTF-32")) &&
+ charset != hubbub_mibenum_from_name("UTF-32LE",
+ SLEN("UTF-32LE")) &&
+ charset != hubbub_mibenum_from_name("UTF-32BE",
+ SLEN("UTF-32BE"))) {
+
+ *mibenum = charset;
+ *source = HUBBUB_CHARSET_DOCUMENT;
+
+ return HUBBUB_OK;
+ }
+ }
+
+ /* No charset was specified within the document, attempt to
+ * autodetect the encoding from the data that we have available. */
+
+ /** \todo Charset autodetection */
+
+ /* We failed to autodetect a charset, so use the default fallback */
+default_encoding:
+
+ charset = hubbub_mibenum_from_name("Windows-1252",
+ SLEN("Windows-1252"));
+ if (charset == 0)
+ charset = hubbub_mibenum_from_name("ISO-8859-1",
+ SLEN("ISO-8859-1"));
+
+ *mibenum = charset;
+ *source = HUBBUB_CHARSET_DEFAULT;
+
+ return HUBBUB_OK;
+}
+
+
+/**
+ * Inspect the beginning of a buffer of data for the presence of a
+ * UTF Byte Order Mark.
+ *
+ * \param data Pointer to pointer to buffer containing data
+ * \param len Pointer to buffer length
+ * \return MIB enum representing encoding described by BOM, or 0 if not found
+ *
+ * If a BOM is found, the data pointer will be modified to point to the first
+ * byte in the buffer after the BOM. The length will also be modified
+ * appropriately.
+ */
+uint16_t hubbub_charset_read_bom(const uint8_t **data, size_t *len)
+{
+ if (data == NULL || *data == NULL || len == NULL)
+ return 0;
+
+ /* We require at least 4 bytes of data */
+ if (*len < 4)
+ return 0;
+
+#define UTF32BOM_LEN (4)
+#define UTF16BOM_LEN (2)
+#define UTF8BOM_LEN (3)
+
+ if ((*data)[0] == 0x00 && (*data)[1] == 0x00 &&
+ (*data)[2] == 0xFE && (*data)[3] == 0xFF) {
+ *data += UTF32BOM_LEN;
+ *len -= UTF32BOM_LEN;
+
+ return hubbub_mibenum_from_name("UTF-32BE",
+ SLEN("UTF-32BE"));
+ } else if ((*data)[0] == 0xFF && (*data)[1] == 0xFE &&
+ (*data)[2] == 0x00 && (*data)[3] == 0x00) {
+ *data += UTF32BOM_LEN;
+ *len -= UTF32BOM_LEN;
+
+ return hubbub_mibenum_from_name("UTF-32LE",
+ SLEN("UTF-32LE"));
+ } else if ((*data)[0] == 0xFE && (*data)[1] == 0xFF) {
+ *data += UTF16BOM_LEN;
+ *len -= UTF16BOM_LEN;
+
+ return hubbub_mibenum_from_name("UTF-16BE",
+ SLEN("UTF-16BE"));
+ } else if ((*data)[0] == 0xFF && (*data)[1] == 0xFE) {
+ *data += UTF16BOM_LEN;
+ *len -= UTF16BOM_LEN;
+
+ return hubbub_mibenum_from_name("UTF-16LE",
+ SLEN("UTF-16LE"));
+ } else if ((*data)[0] == 0xEF && (*data)[1] == 0xBB &&
+ (*data)[2] == 0xBF) {
+ *data += UTF8BOM_LEN;
+ *len -= UTF8BOM_LEN;
+
+ return hubbub_mibenum_from_name("UTF-8", SLEN("UTF-8"));
+ }
+
+#undef UTF32BOM_LEN
+#undef UTF16BOM_LEN
+#undef UTF8BOM_LEN
+
+ return 0;
+}
+
+#define PEEK(a) \
+ (pos < end - SLEN(a) && \
+ strncasecmp((const char *) pos, a, SLEN(a)) == 0)
+
+#define ADVANCE(a) \
+ while (pos < end - SLEN(a)) { \
+ if (PEEK(a)) \
+ break; \
+ pos++; \
+ } \
+ \
+ if (pos == end - SLEN(a)) \
+ return 0;
+
+#define ISSPACE(a) \
+ (a == 0x09 || a == 0x0a || a == 0x0b || \
+ a == 0x0c || a == 0x0d || a == 0x20)
+
+/**
+ * Search for a meta charset within a buffer of data
+ *
+ * \param data Pointer to buffer containing data
+ * \param len Length of buffer in data
+ * \return MIB enum representing encoding, or 0 if none found
+ */
+uint16_t hubbub_charset_scan_meta(const uint8_t *data, size_t len)
+{
+ const uint8_t *pos = data;
+ const uint8_t *end;
+ uint16_t mibenum;
+
+ if (data == NULL)
+ return 0;
+
+ end = pos + min(512, len);
+
+ /* 1. */
+ while (pos < end) {
+ /* a */
+ if (PEEK("<!--")) {
+ pos += SLEN("<!--");
+ ADVANCE("-->");
+ /* b */
+ } else if (PEEK("<meta")) {
+ if (pos + SLEN("<meta") >= end - 1)
+ return 0;
+
+ if (ISSPACE(*(pos + SLEN("<meta")))) {
+ /* 1 */
+ pos += SLEN("<meta");
+
+ mibenum = hubbub_charset_parse_attributes(
+ &pos, end);
+ if (mibenum != 0)
+ return mibenum;
+
+ if (pos >= end)
+ return 0;
+ }
+ /* c */
+ } else if ((PEEK("</") && (pos < end - 3 &&
+ (0x41 <= (*(pos + 2) & ~ 0x20) &&
+ (*(pos + 2) & ~ 0x20) <= 0x5A))) ||
+ (pos < end - 2 && *pos == '<' &&
+ (0x41 <= (*(pos + 1) & ~ 0x20) &&
+ (*(pos + 1) & ~ 0x20) <= 0x5A))) {
+
+ /* skip '<' */
+ pos++;
+
+ /* 1. */
+ while (pos < end) {
+ if (ISSPACE(*pos) ||
+ *pos == '>' || *pos == '<')
+ break;
+ pos++;
+ }
+
+ if (pos >= end)
+ return 0;
+
+ /* 3 */
+ if (*pos != '<') {
+ const uint8_t *n;
+ const uint8_t *v;
+ uint32_t nl, vl;
+
+ while (hubbub_charset_get_attribute(&pos, end,
+ &n, &nl, &v, &vl))
+ ; /* do nothing */
+ /* 2 */
+ } else
+ continue;
+ /* d */
+ } else if (PEEK("<!") || PEEK("</") || PEEK("<?")) {
+ pos++;
+ ADVANCE(">");
+ }
+
+ /* e - do nothing */
+
+ /* 2 */
+ pos++;
+ }
+
+ return 0;
+}
+
+/**
+ * Parse attributes on a meta tag
+ *
+ * \param pos Pointer to pointer to current location (updated on exit)
+ * \param end Pointer to end of data stream
+ * \return MIB enum of detected encoding, or 0 if none found
+ */
+uint16_t hubbub_charset_parse_attributes(const uint8_t **pos,
+ const uint8_t *end)
+{
+ const uint8_t *name;
+ const uint8_t *value;
+ uint32_t namelen, valuelen;
+ uint16_t mibenum;
+
+ if (pos == NULL || *pos == NULL || end == NULL)
+ return 0;
+
+ /* 2 */
+ while (hubbub_charset_get_attribute(pos, end,
+ &name, &namelen, &value, &valuelen)) {
+ /* 3 */
+ /* a */
+ if (namelen == SLEN("charset") && valuelen > 0 &&
+ strncasecmp((const char *) name, "charset",
+ SLEN("charset")) == 0) {
+ /* strip value */
+ while (ISSPACE(*value)) {
+ value++;
+ valuelen--;
+ }
+
+ while (valuelen > 0 && ISSPACE(value[valuelen - 1]))
+ valuelen--;
+
+ mibenum = hubbub_mibenum_from_name(
+ (const char *) value, valuelen);
+ if (mibenum != 0)
+ return mibenum;
+ /* b */
+ } else if (namelen == SLEN("content") && valuelen > 0 &&
+ strncasecmp((const char *) name, "content",
+ SLEN("content")) == 0) {
+ mibenum = hubbub_charset_parse_content(value,
+ valuelen);
+ if (mibenum != 0)
+ return mibenum;
+ }
+
+ /* c - do nothing */
+
+ /* 1 */
+ while (*pos < end) {
+ if (ISSPACE(**pos))
+ break;
+ (*pos)++;
+ }
+
+ if (*pos >= end) {
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Parse a content= attribute's value
+ *
+ * \param value Attribute's value
+ * \param valuelen Length of value
+ * \return MIB enum of detected encoding, or 0 if none found
+ */
+uint16_t hubbub_charset_parse_content(const uint8_t *value,
+ uint32_t valuelen)
+{
+ const uint8_t *end;
+ const uint8_t *tentative = NULL;
+ uint32_t tentative_len = 0;
+
+ if (value == NULL)
+ return 0;
+
+ end = value + valuelen;
+
+ /* 1 */
+ while (value < end) {
+ if (*value == ';') {
+ value++;
+ break;
+ }
+
+ value++;
+ }
+
+ if (value >= end)
+ return 0;
+
+ /* 2 */
+ while (value < end && ISSPACE(*value)) {
+ value++;
+ }
+
+ if (value >= end)
+ return 0;
+
+ /* 3 */
+ if (value < end - SLEN("charset") &&
+ strncasecmp((const char *) value,
+ "charset", SLEN("charset")) != 0)
+ return 0;
+
+ value += SLEN("charset");
+
+ /* 4 */
+ while (value < end && ISSPACE(*value)) {
+ value++;
+ }
+
+ if (value >= end)
+ return 0;
+
+ /* 5 */
+ if (*value != '=')
+ return 0;
+ /* skip '=' */
+ value++;
+
+ /* 6 */
+ while (value < end && ISSPACE(*value)) {
+ value++;
+ }
+
+ if (value >= end)
+ return 0;
+
+ /* 7 */
+ tentative = value;
+
+ /* a */
+ if (*value == '"') {
+ while (++value < end && *value != '"') {
+ tentative_len++;
+ }
+
+ if (value < end)
+ tentative++;
+ else
+ tentative = NULL;
+ /* b */
+ } else if (*value == '\'') {
+ while (++value < end && *value != '\'') {
+ tentative_len++;
+ }
+
+ if (value < end)
+ tentative++;
+ else
+ tentative = NULL;
+ /* c */
+ } else {
+ while (value < end && !ISSPACE(*value)) {
+ value++;
+ tentative_len++;
+ }
+ }
+
+ /* 8 */
+ if (tentative != NULL) {
+ return hubbub_mibenum_from_name((const char *) tentative,
+ tentative_len);
+ }
+
+ /* 9 */
+ return 0;
+}
+
+/**
+ * Extract an attribute from the data stream
+ *
+ * \param data Pointer to pointer to current location (updated on exit)
+ * \param end Pointer to end of data stream
+ * \param name Pointer to location to receive attribute name
+ * \param namelen Pointer to location to receive attribute name length
+ * \param value Pointer to location to receive attribute value
+ * \param valuelen Pointer to location to receive attribute value langth
+ * \return true if attribute extracted, false otherwise.
+ *
+ * Note: The caller should heed the returned lengths; these are the only
+ * indicator that useful content resides in name or value.
+ */
+bool hubbub_charset_get_attribute(const uint8_t **data, const uint8_t *end,
+ const uint8_t **name, uint32_t *namelen,
+ const uint8_t **value, uint32_t *valuelen)
+{
+ const uint8_t *pos;
+
+ if (data == NULL || *data == NULL || end == NULL || name == NULL ||
+ namelen == NULL || value == NULL || valuelen == NULL)
+ return false;
+
+ pos = *data;
+
+ /* 1. Skip leading spaces or '/' characters */
+ while (pos < end && (ISSPACE(*pos) || *pos == '/')) {
+ pos++;
+ }
+
+ if (pos >= end) {
+ *data = pos;
+ return false;
+ }
+
+ /* 2. Invalid element open character */
+ if (*pos == '<') {
+ pos--;
+ *data = pos;
+ return false;
+ }
+
+ /* 3. End of element */
+ if (*pos == '>') {
+ *data = pos;
+ return false;
+ }
+
+ /* 4. Initialise name & value to empty string */
+ *name = pos;
+ *namelen = 0;
+ *value = (const uint8_t *) "";
+ *valuelen = 0;
+
+ /* 5. Extract name */
+ while (pos < end) {
+ /* a */
+ if (*pos == '=') {
+ break;
+ }
+
+ /* b */
+ if (ISSPACE(*pos)) {
+ break;
+ }
+
+ /* c */
+ if (*pos == '/' || *pos == '<' || *pos == '>') {
+ return true;
+ }
+
+ /* d is handled by strncasecmp in _parse_attributes */
+
+ /* e */
+ (*namelen)++;
+
+ /* 6 */
+ pos++;
+ }
+
+ if (pos >= end) {
+ *data = pos;
+ return false;
+ }
+
+ if (ISSPACE(*pos)) {
+ /* 7. Skip trailing spaces */
+ while (pos < end && ISSPACE(*pos)) {
+ pos++;
+ }
+
+ if (pos >= end) {
+ *data = pos;
+ return false;
+ }
+
+ /* 8. Must be '=' */
+ if (*pos != '=') {
+ pos--;
+ *data = pos;
+ return true;
+ }
+ }
+
+ /* 9. Skip '=' */
+ pos++;
+
+ /* 10. Skip any spaces after '=' */
+ while (pos < end && ISSPACE(*pos)) {
+ pos++;
+ }
+
+ if (pos >= end) {
+ *data = pos;
+ return false;
+ }
+
+ /* 11. Extract value, if quoted */
+ /* a */
+ if (*pos == '\'' || *pos == '"') {
+ /* 1 */
+ const uint8_t *quote = pos;
+
+ /* 2 */
+ while (++pos < end) {
+ /* 3 */
+ if (*pos == *quote) {
+ *value = (quote + 1);
+ *data = ++pos;
+ return true;
+ }
+
+ /* 4 is handled by strncasecmp */
+
+ /* 5 */
+ (*valuelen)++;
+
+ /* 6 */
+ }
+
+ if (pos >= end) {
+ *data = pos;
+ return false;
+ }
+ }
+
+ /* b */
+ if (*pos == '<' || *pos == '>') {
+ *data = pos;
+ return true;
+ }
+
+ /* c is handled by strncasecmp */
+
+ /* d */
+ *value = pos;
+
+ while (pos < end) {
+ /* 12. Extract unquoted value */
+ /* a */
+ if (ISSPACE(*pos) || *pos == '<' || *pos == '>') {
+ *data = pos;
+ return true;
+ }
+
+ /* b is handled by strncasecmp */
+
+ /* c */
+ (*valuelen)++;
+
+ /* 13. Advance */
+ pos++;
+ }
+
+ if (pos >= end) {
+ *data = pos;
+ return false;
+ }
+
+ /* should never be reached */
+ abort();
+
+ return false;
+}
diff --git a/src/charset/detect.h b/src/charset/detect.h
new file mode 100644
index 0000000..854a8d6
--- /dev/null
+++ b/src/charset/detect.h
@@ -0,0 +1,22 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_charset_detect_h_
+#define hubbub_charset_detect_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+#include <hubbub/types.h>
+
+/* Extract a charset from a chunk of data */
+hubbub_error hubbub_charset_extract(const uint8_t **data, size_t *len,
+ uint16_t *mibenum, hubbub_charset_source *source);
+
+#endif
+
diff --git a/src/hubbub.c b/src/hubbub.c
new file mode 100644
index 0000000..32e0a1f
--- /dev/null
+++ b/src/hubbub.c
@@ -0,0 +1,63 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <hubbub/hubbub.h>
+
+#include "charset/aliases.h"
+#include "tokeniser/entities.h"
+
+/**
+ * Initialise the Hubbub library for use.
+ *
+ * This _must_ be called before using any hubbub functions
+ *
+ * \param aliases_file Pointer to name of file containing encoding alias data
+ * \param alloc Pointer to (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return HUBBUB_OK on success, applicable error otherwise.
+ */
+hubbub_error hubbub_initialise(const char *aliases_file,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_error error;
+
+ if (aliases_file == NULL || alloc == NULL)
+ return HUBBUB_BADPARM;
+
+ error = hubbub_aliases_create(aliases_file, alloc, pw);
+ if (error != HUBBUB_OK)
+ return error;
+
+ error = hubbub_entities_create(alloc, pw);
+ if (error != HUBBUB_OK) {
+ hubbub_aliases_destroy(alloc, pw);
+ return error;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Clean up after Hubbub
+ *
+ * \param alloc Pointer to (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return HUBBUB_OK on success, applicable error otherwise.
+ */
+hubbub_error hubbub_finalise(hubbub_alloc alloc, void *pw)
+{
+ if (alloc == NULL)
+ return HUBBUB_BADPARM;
+
+ hubbub_entities_destroy(alloc, pw);
+
+ hubbub_aliases_destroy(alloc, pw);
+
+ return HUBBUB_OK;
+}
+
+
diff --git a/src/input/Makefile b/src/input/Makefile
new file mode 100644
index 0000000..8b06c63
--- /dev/null
+++ b/src/input/Makefile
@@ -0,0 +1,53 @@
+# Makefile for libhubbub
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Manipulate include paths
+CFLAGS += -I$(CURDIR)
+
+# Objects
+OBJS = filter inputstream utf8_stream
+
+.PHONY: clean debug distclean export release setup test
+
+# Targets
+release: $(addprefix ../Release/, $(addsuffix .o, $(OBJS)))
+
+debug: $(addprefix ../Debug/, $(addsuffix .o, $(OBJS)))
+
+clean:
+ -@${RM} ${RMFLAGS} $(addprefix ../Release/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(addprefix ../Debug/, $(addsuffix .o, ${OBJS}))
+
+distclean:
+
+setup:
+
+export:
+
+test:
+
+# Pattern rules
+../Release/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $<
+
+../Debug/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@ $<
diff --git a/src/input/filter.c b/src/input/filter.c
new file mode 100644
index 0000000..5ac5391
--- /dev/null
+++ b/src/input/filter.c
@@ -0,0 +1,380 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "charset/aliases.h"
+#include "charset/codec.h"
+#include "utils/utils.h"
+
+#include "input/filter.h"
+
+
+/** Input filter */
+struct hubbub_filter {
+ hubbub_charsetcodec *read_codec; /**< Read codec */
+ hubbub_charsetcodec *write_codec; /**< Write codec */
+
+ uint32_t filter_output[2]; /**< Filter output buffer */
+ uint32_t last_filter_char; /**< Last filtered character */
+
+ uint32_t pivot_buf[64]; /**< Conversion pivot buffer */
+
+ bool leftover; /**< Data remains from last call */
+ uint8_t *pivot_left; /**< Remaining pivot to write */
+ size_t pivot_len; /**< Length of pivot remaining */
+
+ struct {
+ uint16_t encoding; /**< Input encoding */
+ } settings; /**< Filter settings */
+
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *pw; /**< Client private data */
+};
+
+static hubbub_error hubbub_filter_set_defaults(hubbub_filter *input);
+static hubbub_error hubbub_filter_set_encoding(hubbub_filter *input,
+ const char *enc);
+static hubbub_error read_character_filter(uint32_t c,
+ uint32_t **output, size_t *outputlen, void *pw);
+
+/**
+ * Create an input filter
+ *
+ * \param int_enc Desired encoding of document
+ * \param alloc Function used to (de)allocate data
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to filter instance, or NULL on failure
+ */
+hubbub_filter *hubbub_filter_create(const char *int_enc,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_filter *filter;
+
+ if (alloc == NULL)
+ return NULL;
+
+ filter = alloc(NULL, sizeof(*filter), pw);
+ if (!filter)
+ return NULL;
+
+ filter->last_filter_char = 0;
+
+ filter->leftover = false;
+ filter->pivot_left = NULL;
+ filter->pivot_len = 0;
+
+ filter->alloc = alloc;
+ filter->pw = pw;
+
+ if (hubbub_filter_set_defaults(filter) != HUBBUB_OK) {
+ filter->alloc(filter, 0, pw);
+ return NULL;
+ }
+
+ filter->write_codec = hubbub_charsetcodec_create(int_enc, alloc, pw);
+ if (filter->write_codec == NULL) {
+ if (filter->read_codec != NULL)
+ hubbub_charsetcodec_destroy(filter->read_codec);
+ filter->alloc(filter, 0, pw);
+ return NULL;
+ }
+
+ return filter;
+}
+
+/**
+ * Destroy an input filter
+ *
+ * \param input Pointer to filter instance
+ */
+void hubbub_filter_destroy(hubbub_filter *input)
+{
+ if (input == NULL)
+ return;
+
+ if (input->read_codec != NULL)
+ hubbub_charsetcodec_destroy(input->read_codec);
+
+ if (input->write_codec != NULL)
+ hubbub_charsetcodec_destroy(input->write_codec);
+
+ input->alloc(input, 0, input->pw);
+
+ return;
+}
+
+/**
+ * Configure an input filter
+ *
+ * \param input Pointer to filter instance
+ * \param type Input option type to configure
+ * \param params Option-specific parameters
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_filter_setopt(hubbub_filter *input,
+ hubbub_filter_opttype type,
+ hubbub_filter_optparams *params)
+{
+ hubbub_error error = HUBBUB_OK;
+
+ if (input == NULL || params == NULL)
+ return HUBBUB_BADPARM;
+
+ switch (type) {
+ case HUBBUB_FILTER_SET_ENCODING:
+ error = hubbub_filter_set_encoding(input,
+ params->encoding.name);
+ break;
+ }
+
+ return error;
+}
+
+/**
+ * Process a chunk of data
+ *
+ * \param input Pointer to filter instance
+ * \param data Pointer to pointer to input buffer
+ * \param len Pointer to length of input buffer
+ * \param output Pointer to pointer to output buffer
+ * \param outlen Pointer to length of output buffer
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ *
+ * Call this with an input buffer length of 0 to flush any buffers.
+ */
+hubbub_error hubbub_filter_process_chunk(hubbub_filter *input,
+ const uint8_t **data, size_t *len,
+ uint8_t **output, size_t *outlen)
+{
+ hubbub_error read_error, write_error;
+
+ if (input == NULL || data == NULL || *data == NULL || len == NULL ||
+ output == NULL || *output == NULL || outlen == NULL)
+ return HUBBUB_BADPARM;
+
+ if (input->leftover) {
+ /* Some data left to be written from last call */
+
+ /* Attempt to flush the remaining data. */
+ write_error = hubbub_charsetcodec_encode(input->write_codec,
+ (const uint8_t **) &input->pivot_left,
+ &input->pivot_len,
+ output, outlen);
+
+ if (write_error != HUBBUB_OK) {
+ return write_error;
+ }
+
+ /* And clear leftover */
+ input->pivot_left = NULL;
+ input->pivot_len = 0;
+ input->leftover = false;
+ }
+
+ while (*len > 0) {
+ size_t pivot_len = sizeof(input->pivot_buf);
+ uint8_t *pivot = (uint8_t *) input->pivot_buf;
+
+ read_error = hubbub_charsetcodec_decode(input->read_codec,
+ data, len,
+ (uint8_t **) &pivot, &pivot_len);
+
+ pivot = (uint8_t *) input->pivot_buf;
+ pivot_len = sizeof(input->pivot_buf) - pivot_len;
+
+ if (pivot_len > 0) {
+ write_error = hubbub_charsetcodec_encode(
+ input->write_codec,
+ (const uint8_t **) &pivot,
+ &pivot_len,
+ output, outlen);
+
+ if (write_error != HUBBUB_OK) {
+ input->leftover = true;
+ input->pivot_left = pivot;
+ input->pivot_len = pivot_len;
+
+ return write_error;
+ }
+ }
+
+ if (read_error != HUBBUB_OK && read_error != HUBBUB_NOMEM)
+ return read_error;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Reset an input filter's state
+ *
+ * \param input The input filter to reset
+ * \param HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_filter_reset(hubbub_filter *input)
+{
+ hubbub_error error;
+
+ if (input == NULL)
+ return HUBBUB_BADPARM;
+
+ /* Clear pivot buffer leftovers */
+ input->pivot_left = NULL;
+ input->pivot_len = 0;
+ input->leftover = false;
+
+ /* Reset read codec */
+ error = hubbub_charsetcodec_reset(input->read_codec);
+ if (error != HUBBUB_OK)
+ return error;
+
+ /* Reset write codec */
+ error = hubbub_charsetcodec_reset(input->write_codec);
+ if (error != HUBBUB_OK)
+ return error;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Set an input filter's default settings
+ *
+ * \param input Input filter to configure
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_filter_set_defaults(hubbub_filter *input)
+{
+ hubbub_error error;
+
+ if (input == NULL)
+ return HUBBUB_BADPARM;
+
+ input->read_codec = NULL;
+ input->write_codec = NULL;
+ input->settings.encoding = 0;
+ error = hubbub_filter_set_encoding(input, "ISO-8859-1");
+ if (error != HUBBUB_OK)
+ return error;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Set an input filter's encoding
+ *
+ * \param input Input filter to configure
+ * \param enc Encoding name
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_filter_set_encoding(hubbub_filter *input,
+ const char *enc)
+{
+ const char *old_enc;
+ uint16_t mibenum;
+ hubbub_error error;
+ hubbub_charsetcodec_optparams params;
+
+ if (input == NULL || enc == NULL)
+ return HUBBUB_BADPARM;
+
+ mibenum = hubbub_mibenum_from_name(enc, strlen(enc));
+ if (mibenum == 0)
+ return HUBBUB_INVALID;
+
+ /* Exit early if we're already using this encoding */
+ if (input->settings.encoding == mibenum)
+ return HUBBUB_OK;
+
+ old_enc = hubbub_mibenum_to_name(input->settings.encoding);
+ if (old_enc == NULL)
+ old_enc = "ISO-8859-1";
+
+ if (input->read_codec != NULL)
+ hubbub_charsetcodec_destroy(input->read_codec);
+
+ input->read_codec = hubbub_charsetcodec_create(enc, input->alloc,
+ input->pw);
+ if (input->read_codec == NULL)
+ return HUBBUB_NOMEM;
+
+ /* Register filter function */
+ params.filter_func.filter = read_character_filter;
+ params.filter_func.pw = (void *) input;
+ error = hubbub_charsetcodec_setopt(input->read_codec,
+ HUBBUB_CHARSETCODEC_FILTER_FUNC,
+ (hubbub_charsetcodec_optparams *) &params);
+ if (error != HUBBUB_OK)
+ return error;
+
+ input->settings.encoding = mibenum;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Character filter function for read characters
+ *
+ * \param c The read character (UCS4 - host byte order)
+ * \param output Pointer to pointer to output buffer (filled on exit)
+ * \param outputlen Pointer to output buffer length (filled on exit)
+ * \param pw Pointer to client-specific private data.
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error read_character_filter(uint32_t c, uint32_t **output,
+ size_t *outputlen, void *pw)
+{
+ hubbub_filter *input = (hubbub_filter *) pw;
+ size_t len;
+
+ if (output == NULL || outputlen == NULL || pw == NULL)
+ return HUBBUB_BADPARM;
+
+ /* Line ending normalisation:
+ * CRLF -> LF (trap CR and let LF through unmodified)
+ * CR -> LF (trap CR and convert to LF if not CRLF)
+ * LF -> LF (leave LF alone)
+ */
+
+#define NUL (0x00000000)
+#define CR (0x0000000D)
+#define LF (0x0000000A)
+#define REP (0x0000FFFD)
+
+ if (c == NUL) {
+ /* Replace NUL (U+0000) characters in input with U+FFFD */
+ input->filter_output[0] = REP;
+ len = 1;
+ } else if (c == CR) {
+ /* Trap CR characters */
+ len = 0;
+ } else if (input->last_filter_char == CR && c != LF) {
+ /* Last char was CR and this isn't LF => CR -> LF */
+ input->filter_output[0] = LF;
+ input->filter_output[1] = c;
+ len = 2;
+ } else {
+ /* Let character through unchanged */
+ input->filter_output[0] = c;
+ len = 1;
+ }
+
+#undef NUL
+#undef CR
+#undef LF
+#undef REP
+
+ input->last_filter_char = c;
+
+ *output = input->filter_output;
+ *outputlen = len;
+
+ return HUBBUB_OK;
+}
diff --git a/src/input/filter.h b/src/input/filter.h
new file mode 100644
index 0000000..6650e09
--- /dev/null
+++ b/src/input/filter.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_input_filter_h_
+#define hubbub_input_filter_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+
+typedef struct hubbub_filter hubbub_filter;
+
+/**
+ * Input filter option types
+ */
+typedef enum hubbub_filter_opttype {
+ HUBBUB_FILTER_SET_ENCODING = 0,
+} hubbub_filter_opttype;
+
+/**
+ * Input filter option parameters
+ */
+typedef union hubbub_filter_optparams {
+ /** Parameters for encoding setting */
+ struct {
+ /** Encoding name */
+ const char *name;
+ } encoding;
+} hubbub_filter_optparams;
+
+
+/* Create an input filter */
+hubbub_filter *hubbub_filter_create(const char *int_enc,
+ hubbub_alloc alloc, void *pw);
+/* Destroy an input filter */
+void hubbub_filter_destroy(hubbub_filter *input);
+
+/* Configure an input filter */
+hubbub_error hubbub_filter_setopt(hubbub_filter *input,
+ hubbub_filter_opttype type,
+ hubbub_filter_optparams *params);
+
+/* Process a chunk of data */
+hubbub_error hubbub_filter_process_chunk(hubbub_filter *input,
+ const uint8_t **data, size_t *len,
+ uint8_t **output, size_t *outlen);
+
+/* Reset an input filter's state */
+hubbub_error hubbub_filter_reset(hubbub_filter *input);
+
+#endif
+
diff --git a/src/input/inputstream.c b/src/input/inputstream.c
new file mode 100644
index 0000000..f82d279
--- /dev/null
+++ b/src/input/inputstream.c
@@ -0,0 +1,479 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdlib.h>
+
+#include "charset/aliases.h"
+#include "input/streamimpl.h"
+
+/**
+ * Buffer moving claimant context
+ */
+struct hubbub_inputstream_bm_handler {
+ hubbub_inputstream_buffermoved handler; /**< Handler function */
+ void *pw; /**< Client private data */
+
+ struct hubbub_inputstream_bm_handler *next;
+ struct hubbub_inputstream_bm_handler *prev;
+};
+
+extern hubbub_streamhandler utf8stream;
+
+static hubbub_streamhandler *handler_table[] = {
+ &utf8stream,
+ NULL
+};
+
+/**
+ * Create an input stream
+ *
+ * \param enc Document charset, or NULL to autodetect
+ * \param int_enc Desired encoding of document
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to stream instance, or NULL on failure
+ */
+hubbub_inputstream *hubbub_inputstream_create(const char *enc,
+ const char *int_enc, hubbub_alloc alloc, void *pw)
+{
+ hubbub_inputstream *stream;
+ hubbub_streamhandler **handler;
+
+ if (int_enc == NULL || alloc == NULL)
+ return NULL;
+
+ /* Search for handler class */
+ for (handler = handler_table; *handler != NULL; handler++) {
+ if ((*handler)->uses_encoding(int_enc))
+ break;
+ }
+
+ /* None found */
+ if ((*handler) == NULL)
+ return NULL;
+
+ stream = (*handler)->create(enc, int_enc, alloc, pw);
+ if (stream == NULL)
+ return NULL;
+
+ stream->handlers = NULL;
+
+ stream->alloc = alloc;
+ stream->pw = pw;
+
+ return stream;
+}
+
+/**
+ * Destroy an input stream
+ *
+ * \param stream Input stream to destroy
+ */
+void hubbub_inputstream_destroy(hubbub_inputstream *stream)
+{
+ hubbub_inputstream_bm_handler *h, *i;
+
+ if (stream == NULL)
+ return;
+
+ for (h = stream->handlers; h; h = i) {
+ i = h->next;
+
+ stream->alloc(h, 0, stream->pw);
+ }
+
+ stream->destroy(stream);
+}
+
+/**
+ * Append data to an input stream
+ *
+ * \param stream Input stream to append data to
+ * \param data Data to append (in document charset), or NULL to flag EOF
+ * \param len Length, in bytes, of data
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_inputstream_append(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len)
+{
+ if (stream == NULL)
+ return HUBBUB_BADPARM;
+
+ /* Calling this if we've disowned the buffer is foolish */
+ if (stream->buffer == NULL)
+ return HUBBUB_INVALID;
+
+ return stream->append(stream, data, len);
+}
+
+/**
+ * Insert data into stream at current location
+ *
+ * \param stream Input stream to insert into
+ * \param data Data to insert (UTF-8 encoded)
+ * \param len Length, in bytes, of data
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_inputstream_insert(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len)
+{
+ if (stream == NULL || data == NULL)
+ return HUBBUB_BADPARM;
+
+ /* Calling this if we've disowned the buffer is foolish */
+ if (stream->buffer == NULL)
+ return HUBBUB_INVALID;
+
+ return stream->insert(stream, data, len);
+}
+
+/**
+ * Look at the next character in the stream
+ *
+ * \param stream Stream to look in
+ * \return UCS4 (host-endian) character code, or EOF or OOD.
+ */
+uint32_t hubbub_inputstream_peek(hubbub_inputstream *stream)
+{
+ /* It is illegal to call this after the buffer has been disowned */
+ if (stream == NULL || stream->buffer == NULL)
+ return HUBBUB_INPUTSTREAM_OOD;
+
+ return stream->peek(stream);;
+}
+
+/**
+ * Retrieve the byte index and length of the current character in the stream
+ *
+ * \param stream Stream to look in
+ * \param len Pointer to location to receive byte length of character
+ * \return Byte index of current character from start of stream,
+ * or (uint32_t) -1 on error
+ */
+uint32_t hubbub_inputstream_cur_pos(hubbub_inputstream *stream,
+ size_t *len)
+{
+ /* It is illegal to call this after the buffer has been disowned */
+ if (stream == NULL || len == NULL || stream->buffer == NULL)
+ return (uint32_t) -1;
+
+ return stream->cur_pos(stream, len);
+}
+
+/**
+ * Convert the current character to lower case
+ *
+ * \param stream Stream to look in
+ */
+void hubbub_inputstream_lowercase(hubbub_inputstream *stream)
+{
+ if (stream == NULL || stream->buffer == NULL)
+ return;
+
+ stream->lowercase(stream);
+}
+
+/**
+ * Convert the current character to upper case
+ *
+ * \param stream Stream to look in
+ */
+void hubbub_inputstream_uppercase(hubbub_inputstream *stream)
+{
+ if (stream == NULL || stream->buffer == NULL)
+ return;
+
+ stream->uppercase(stream);
+}
+
+/**
+ * Advance the stream's current position
+ *
+ * \param stream The stream whose position to advance
+ */
+void hubbub_inputstream_advance(hubbub_inputstream *stream)
+{
+ /* It is illegal to call this after the buffer has been disowned */
+ if (stream == NULL || stream->buffer == NULL)
+ return;
+
+ if (stream->cursor == stream->buffer_len)
+ return;
+
+ stream->advance(stream);
+}
+
+/**
+ * Push a character back onto the stream
+ *
+ * \param stream Stream to push back to
+ * \param character UCS4 (host-endian) codepoint to push back
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ *
+ * Note that this doesn't actually modify the data in the stream.
+ * It works by ensuring that the character located just before the
+ * current stream location is the same as ::character. If it is,
+ * then the stream pointer is moved back. If it is not, then an
+ * error is returned and the stream pointer remains unmodified.
+ */
+hubbub_error hubbub_inputstream_push_back(hubbub_inputstream *stream,
+ uint32_t character)
+{
+ /* It is illegal to call this after the buffer has been disowned */
+ if (stream == NULL || stream->buffer == NULL)
+ return HUBBUB_BADPARM;
+
+ if (stream->cursor == 0)
+ return HUBBUB_INVALID;
+
+ return stream->push_back(stream, character);
+}
+
+/**
+ * Rewind the input stream by a number of bytes
+ *
+ * \param stream Stream to rewind
+ * \param n Number of bytes to go back
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_inputstream_rewind(hubbub_inputstream *stream, size_t n)
+{
+ if (stream == NULL || stream->buffer == NULL)
+ return HUBBUB_BADPARM;
+
+ if (stream->cursor < n)
+ return HUBBUB_INVALID;
+
+ stream->cursor -= n;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Claim ownership of an input stream's buffer
+ *
+ * \param stream Input stream whose buffer to claim
+ * \param buffer Pointer to location to receive buffer pointer
+ * \param len Pointer to location to receive byte length of buffer
+ * \return HUBBUB_OK on success, appropriate error otherwise.
+ *
+ * Once the buffer has been claimed by a client, the input stream disclaims
+ * all ownership rights (and invalidates any internal references it may have
+ * to the buffer). Therefore, the only input stream call which may be made
+ * after calling this function is to destroy the input stream. Therefore,
+ * unless the stream pointer is located at EOF, this call will return an
+ * error.
+ */
+hubbub_error hubbub_inputstream_claim_buffer(hubbub_inputstream *stream,
+ uint8_t **buffer, size_t *len)
+{
+ if (stream == NULL || buffer == NULL || len == NULL)
+ return HUBBUB_BADPARM;
+
+ if (stream->had_eof == false ||
+ stream->cursor != stream->buffer_len)
+ return HUBBUB_INVALID;
+
+ *buffer = stream->buffer;
+ *len = stream->buffer_len;
+
+ stream->buffer = NULL;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Register interest in buffer moved events
+ *
+ * \param stream Input stream to register interest with
+ * \param handler Pointer to handler function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_inputstream_register_movehandler(
+ hubbub_inputstream *stream,
+ hubbub_inputstream_buffermoved handler, void *pw)
+{
+ hubbub_inputstream_bm_handler *h;
+
+ if (stream == NULL || handler == NULL)
+ return HUBBUB_BADPARM;
+
+ h = stream->alloc(NULL, sizeof(hubbub_inputstream_bm_handler),
+ stream->pw);
+ if (h == NULL)
+ return HUBBUB_NOMEM;
+
+ h->handler = handler;
+ h->pw = pw;
+
+ h->prev = NULL;
+ h->next = stream->handlers;
+
+ if (stream->handlers)
+ stream->handlers->prev = h;
+ stream->handlers = h;
+
+ /* And notify claimant of current buffer location */
+ handler(stream->buffer, stream->buffer_len, pw);
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Deregister interest in buffer moved events
+ *
+ * \param stream Input stream to deregister from
+ * \param handler Pointer to handler function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_inputstream_deregister_movehandler(
+ hubbub_inputstream *stream,
+ hubbub_inputstream_buffermoved handler, void *pw)
+{
+ hubbub_inputstream_bm_handler *h;
+
+ if (stream == NULL || handler == NULL)
+ return HUBBUB_BADPARM;
+
+ for (h = stream->handlers; h; h = h->next) {
+ if (h->handler == handler && h->pw == pw)
+ break;
+ }
+
+ if (h == NULL)
+ return HUBBUB_INVALID;
+
+ if (h->next)
+ h->next->prev = h->prev;
+ if (h->prev)
+ h->prev->next = h->next;
+ else
+ stream->handlers = h->next;
+
+ stream->alloc(h, 0, stream->pw);
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Case insensitively compare a pair of ranges in the input stream
+ *
+ * \param stream Input stream to look in
+ * \param r1 Offset of start of first range
+ * \param r2 Offset of start of second range
+ * \param len Byte length of ranges
+ * \return 0 if ranges match, non-zero otherwise
+ */
+int hubbub_inputstream_compare_range_ci(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len)
+{
+ if (stream == NULL || stream->buffer == NULL)
+ return 1; /* arbitrary */
+
+ return stream->cmp_range_ci(stream, r1, r2, len);
+}
+
+/**
+ * Case sensitively compare a pair of ranges in the input stream
+ *
+ * \param stream Input stream to look in
+ * \param r1 Offset of start of first range
+ * \param r2 Offset of start of second range
+ * \param len Byte length of ranges
+ * \return 0 if ranges match, non-zero otherwise
+ */
+int hubbub_inputstream_compare_range_cs(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len)
+{
+ if (stream == NULL || stream->buffer == NULL)
+ return 1; /* arbitrary */
+
+ return stream->cmp_range_cs(stream, r1, r2, len);
+}
+
+/**
+ * Case sensitively compare a range of input stream against an ASCII string
+ *
+ * \param stream Input stream to look in
+ * \param off Offset of range start
+ * \param len Byte length of range
+ * \param data Comparison string
+ * \param dlen Byte length of comparison string
+ * \return 0 if match, non-zero otherwise
+ */
+int hubbub_inputstream_compare_range_ascii(hubbub_inputstream *stream,
+ uint32_t off, size_t len, const char *data, size_t dlen)
+{
+ if (stream == NULL || stream->buffer == NULL)
+ return 1; /* arbitrary */
+
+ return stream->cmp_range_ascii(stream, off, len, data, dlen);
+}
+
+/**
+ * Replace a range of bytes in the input stream with a single character
+ *
+ * \param stream Input stream containing data
+ * \param start Offset of start of range to replace
+ * \param len Length (in bytes) of range to replace
+ * \param ucs4 UCS4 (host endian) encoded replacement character
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_inputstream_replace_range(hubbub_inputstream *stream,
+ uint32_t start, size_t len, uint32_t ucs4)
+{
+ if (stream == NULL || stream->buffer == NULL)
+ return HUBBUB_BADPARM;
+
+ if (start >= stream->buffer_len)
+ return HUBBUB_INVALID;
+
+ if (start < stream->cursor)
+ return HUBBUB_INVALID;
+
+ return stream->replace_range(stream, start, len, ucs4);
+}
+
+/**
+ * Read the document charset
+ *
+ * \param stream Input stream to query
+ * \param source Pointer to location to receive charset source
+ * \return Pointer to charset name (constant; do not free), or NULL if unknown
+ */
+const char *hubbub_inputstream_read_charset(hubbub_inputstream *stream,
+ hubbub_charset_source *source)
+{
+ if (stream == NULL || source == NULL)
+ return NULL;
+
+ *source = stream->encsrc;
+
+ if (stream->encsrc == HUBBUB_CHARSET_UNKNOWN)
+ return NULL;
+
+ return hubbub_mibenum_to_name(stream->mibenum);
+}
+
+/**
+ * Inform interested parties that the buffer has moved
+ *
+ * \param stream Input stream
+ */
+void hubbub_inputstream_buffer_moved(hubbub_inputstream *stream)
+{
+ hubbub_inputstream_bm_handler *h;
+
+ if (stream == NULL)
+ return;
+
+ for (h = stream->handlers; h; h = h->next)
+ h->handler(stream->buffer, stream->buffer_len, h->pw);
+}
+
diff --git a/src/input/inputstream.h b/src/input/inputstream.h
new file mode 100644
index 0000000..5325d14
--- /dev/null
+++ b/src/input/inputstream.h
@@ -0,0 +1,98 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_input_inputstream_h_
+#define hubbub_input_inputstream_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+#include <hubbub/types.h>
+
+typedef struct hubbub_inputstream hubbub_inputstream;
+
+/* EOF pseudo-character */
+#define HUBBUB_INPUTSTREAM_EOF (0xFFFFFFFFU)
+/* Out-of-data indicator */
+#define HUBBUB_INPUTSTREAM_OOD (0xFFFFFFFEU)
+
+/* Type of input stream buffer moved handler function */
+typedef void (*hubbub_inputstream_buffermoved)(const uint8_t *buffer,
+ size_t len, void *pw);
+
+/* Create an input stream */
+hubbub_inputstream *hubbub_inputstream_create(const char *enc,
+ const char *int_enc, hubbub_alloc alloc, void *pw);
+/* Destroy an input stream */
+void hubbub_inputstream_destroy(hubbub_inputstream *stream);
+
+/* Append data to an input stream */
+hubbub_error hubbub_inputstream_append(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len);
+/* Insert data into stream at current location */
+hubbub_error hubbub_inputstream_insert(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len);
+
+/* Look at the next character in the stream */
+uint32_t hubbub_inputstream_peek(hubbub_inputstream *stream);
+
+/* Retrieve the byte index and length of the current character in the stream */
+uint32_t hubbub_inputstream_cur_pos(hubbub_inputstream *stream, size_t *len);
+
+/* Convert the current character to lowercase */
+void hubbub_inputstream_lowercase(hubbub_inputstream *stream);
+
+/* Convert the current character to uppercase */
+void hubbub_inputstream_uppercase(hubbub_inputstream *stream);
+
+/* Advance the stream's current position */
+void hubbub_inputstream_advance(hubbub_inputstream *stream);
+
+/* Push a character back onto the stream */
+hubbub_error hubbub_inputstream_push_back(hubbub_inputstream *stream,
+ uint32_t character);
+
+/* Rewind the input stream by a number of bytes */
+hubbub_error hubbub_inputstream_rewind(hubbub_inputstream *stream, size_t n);
+
+/* Claim ownership of an input stream's buffer */
+hubbub_error hubbub_inputstream_claim_buffer(hubbub_inputstream *stream,
+ uint8_t **buffer, size_t *len);
+
+/* Register interest in buffer moved events */
+hubbub_error hubbub_inputstream_register_movehandler(
+ hubbub_inputstream *stream,
+ hubbub_inputstream_buffermoved handler, void *pw);
+
+/* Deregister interest in buffer moved events */
+hubbub_error hubbub_inputstream_deregister_movehandler(
+ hubbub_inputstream *stream,
+ hubbub_inputstream_buffermoved handler, void *pw);
+
+/* Case insensitively compare a pair of ranges in the input stream */
+int hubbub_inputstream_compare_range_ci(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len);
+
+/* Case sensitively compare a pair of ranges in the input stream */
+int hubbub_inputstream_compare_range_cs(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len);
+
+/* Case sensitively compare a range of input stream against an ASCII string */
+int hubbub_inputstream_compare_range_ascii(hubbub_inputstream *stream,
+ uint32_t off, size_t len, const char *data, size_t dlen);
+
+/* Replace a range of bytes in the input stream with a single character */
+hubbub_error hubbub_inputstream_replace_range(hubbub_inputstream *stream,
+ uint32_t start, size_t len, uint32_t ucs4);
+
+/* Read the document charset */
+const char *hubbub_inputstream_read_charset(hubbub_inputstream *stream,
+ hubbub_charset_source *source);
+
+#endif
+
diff --git a/src/input/streamimpl.h b/src/input/streamimpl.h
new file mode 100644
index 0000000..f44f6da
--- /dev/null
+++ b/src/input/streamimpl.h
@@ -0,0 +1,77 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_input_streamimpl_h_
+#define hubbub_input_streamimpl_h_
+
+#include <stdbool.h>
+
+#include <hubbub/types.h>
+
+#include "input/filter.h"
+#include "input/inputstream.h"
+
+typedef struct hubbub_inputstream_bm_handler hubbub_inputstream_bm_handler;
+
+/**
+ * Input stream definition: implementations extend this
+ */
+struct hubbub_inputstream {
+ uint8_t *buffer; /**< Document buffer */
+ size_t buffer_len; /**< Amount of data in buffer */
+ size_t buffer_alloc; /**< Allocated size of buffer */
+
+ uint32_t cursor; /**< Byte offset of current position */
+
+ bool had_eof; /**< Whether EOF has been reached */
+
+ uint16_t mibenum; /**< MIB enum for charset, or 0 */
+ hubbub_charset_source encsrc; /**< Charset source */
+
+ hubbub_filter *input; /**< Charset conversion filter */
+
+ hubbub_inputstream_bm_handler *handlers; /**< List of buffer
+ * moved handlers */
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *pw; /**< Client private data */
+
+ void (*destroy)(hubbub_inputstream *stream);
+ hubbub_error (*append)(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len);
+ hubbub_error (*insert)(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len);
+ uint32_t (*peek)(hubbub_inputstream *stream);
+ uint32_t (*cur_pos)(hubbub_inputstream *stream, size_t *len);
+ void (*lowercase)(hubbub_inputstream *stream);
+ void (*uppercase)(hubbub_inputstream *stream);
+ void (*advance)(hubbub_inputstream *stream);
+ hubbub_error (*push_back)(hubbub_inputstream *stream,
+ uint32_t character);
+ int (*cmp_range_ci)(hubbub_inputstream *stream, uint32_t r1,
+ uint32_t r2, size_t len);
+ int (*cmp_range_cs)(hubbub_inputstream *stream, uint32_t r1,
+ uint32_t r2, size_t len);
+ int (*cmp_range_ascii)(hubbub_inputstream *stream,
+ uint32_t off, size_t len,
+ const char *data, size_t dlen);
+ hubbub_error (*replace_range)(hubbub_inputstream *stream,
+ uint32_t start, size_t len, uint32_t ucs4);
+};
+
+/**
+ * Input stream factory component definition
+ */
+typedef struct hubbub_streamhandler {
+ bool (*uses_encoding)(const char *int_enc);
+ hubbub_inputstream *(*create)(const char *enc, const char *int_enc,
+ hubbub_alloc alloc, void *pw);
+} hubbub_streamhandler;
+
+/* Notification of stream buffer moving */
+void hubbub_inputstream_buffer_moved(hubbub_inputstream *stream);
+
+#endif
diff --git a/src/input/utf8_stream.c b/src/input/utf8_stream.c
new file mode 100644
index 0000000..5d08993
--- /dev/null
+++ b/src/input/utf8_stream.c
@@ -0,0 +1,567 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "charset/aliases.h"
+#include "charset/detect.h"
+#include "input/streamimpl.h"
+#include "utils/utf8.h"
+#include "utils/utils.h"
+
+#define BUFFER_CHUNK (4096)
+
+static bool hubbub_utf8stream_uses_encoding(const char *int_enc);
+static hubbub_inputstream *hubbub_utf8stream_create(const char *enc,
+ const char *int_enc, hubbub_alloc alloc, void *pw);
+static void hubbub_utf8stream_destroy(hubbub_inputstream *stream);
+static hubbub_error hubbub_utf8stream_append(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len);
+static hubbub_error hubbub_utf8stream_insert(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len);
+static uint32_t hubbub_utf8stream_peek(hubbub_inputstream *stream);
+static uint32_t hubbub_utf8stream_cur_pos(hubbub_inputstream *stream,
+ size_t *len);
+static void hubbub_utf8stream_lowercase(hubbub_inputstream *stream);
+static void hubbub_utf8stream_uppercase(hubbub_inputstream *stream);
+static void hubbub_utf8stream_advance(hubbub_inputstream *stream);
+static hubbub_error hubbub_utf8stream_push_back(hubbub_inputstream *stream,
+ uint32_t character);
+static int hubbub_utf8stream_compare_range_ci(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len);
+static int hubbub_utf8stream_compare_range_cs(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len);
+static int hubbub_utf8stream_compare_range_ascii(hubbub_inputstream *stream,
+ uint32_t off, size_t len, const char *data, size_t dlen);
+static hubbub_error hubbub_utf8stream_replace_range(
+ hubbub_inputstream *stream,
+ uint32_t start, size_t len, uint32_t ucs4);
+
+/**
+ * Determine whether a stream implementation uses an internal encoding
+ *
+ * \param int_enc The desired encoding
+ * \return true if handled, false otherwise
+ */
+bool hubbub_utf8stream_uses_encoding(const char *int_enc)
+{
+ return (hubbub_mibenum_from_name(int_enc, strlen(int_enc)) ==
+ hubbub_mibenum_from_name("UTF-8", SLEN("UTF-8")));
+}
+
+/**
+ * Create an input stream
+ *
+ * \param enc Document charset, or NULL if unknown
+ * \param int_enc Desired encoding of document
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to stream instance, or NULL on failure
+ */
+hubbub_inputstream *hubbub_utf8stream_create(const char *enc,
+ const char *int_enc, hubbub_alloc alloc, void *pw)
+{
+ hubbub_inputstream *stream;
+
+ if (hubbub_mibenum_from_name(int_enc, strlen(int_enc)) !=
+ hubbub_mibenum_from_name("UTF-8", SLEN("UTF-8")))
+ return NULL;
+
+ stream = alloc(NULL, sizeof(hubbub_inputstream), pw);
+ if (stream == NULL)
+ return NULL;
+
+ stream->buffer = alloc(NULL, BUFFER_CHUNK, pw);
+ if (stream->buffer == NULL) {
+ alloc(stream, 0, pw);
+ return NULL;
+ }
+
+ stream->buffer_len = 0;
+ stream->buffer_alloc = BUFFER_CHUNK;
+
+ stream->cursor = 0;
+
+ stream->had_eof = false;
+
+ stream->input = hubbub_filter_create(int_enc, alloc, pw);
+ if (stream->input == NULL) {
+ alloc(stream->buffer, 0, pw);
+ alloc(stream, 0, pw);
+ return NULL;
+ }
+
+ if (enc != NULL) {
+ hubbub_error error;
+ hubbub_filter_optparams params;
+
+ stream->mibenum = hubbub_mibenum_from_name(enc, strlen(enc));
+
+ if (stream->mibenum != 0) {
+ params.encoding.name = enc;
+
+ error = hubbub_filter_setopt(stream->input,
+ HUBBUB_FILTER_SET_ENCODING, &params);
+ if (error != HUBBUB_OK && error != HUBBUB_INVALID) {
+ hubbub_filter_destroy(stream->input);
+ alloc(stream->buffer, 0, pw);
+ alloc(stream, 0, pw);
+ return NULL;
+ }
+
+ stream->encsrc = HUBBUB_CHARSET_DICTATED;
+ }
+ } else {
+ stream->mibenum = 0;
+ stream->encsrc = HUBBUB_CHARSET_UNKNOWN;
+ }
+
+ stream->destroy = hubbub_utf8stream_destroy;
+ stream->append = hubbub_utf8stream_append;
+ stream->insert = hubbub_utf8stream_insert;
+ stream->peek = hubbub_utf8stream_peek;
+ stream->cur_pos = hubbub_utf8stream_cur_pos;
+ stream->lowercase = hubbub_utf8stream_lowercase;
+ stream->uppercase = hubbub_utf8stream_uppercase;
+ stream->advance = hubbub_utf8stream_advance;
+ stream->push_back = hubbub_utf8stream_push_back;
+ stream->cmp_range_ci = hubbub_utf8stream_compare_range_ci;
+ stream->cmp_range_cs = hubbub_utf8stream_compare_range_cs;
+ stream->cmp_range_ascii = hubbub_utf8stream_compare_range_ascii;
+ stream->replace_range = hubbub_utf8stream_replace_range;
+
+ return stream;
+}
+
+/**
+ * Destroy an input stream
+ *
+ * \param stream Input stream to destroy
+ */
+void hubbub_utf8stream_destroy(hubbub_inputstream *stream)
+{
+ if (stream->input != NULL) {
+ hubbub_filter_destroy(stream->input);
+ }
+
+ if (stream->buffer != NULL) {
+ stream->alloc(stream->buffer, 0, stream->pw);
+ }
+
+ stream->alloc(stream, 0, stream->pw);
+}
+
+/**
+ * Append data to an input stream
+ *
+ * \param stream Input stream to append data to
+ * \param data Data to append (in document charset), or NULL to flag EOF
+ * \param len Length, in bytes, of data
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_utf8stream_append(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len)
+{
+ hubbub_error error;
+ uint8_t *base;
+ size_t space;
+
+ if (data == NULL) {
+ /* EOF indicated */
+ size_t dummy_len = 0;
+ uint8_t *dummy_data = (uint8_t *) &dummy_len;
+
+ base = stream->buffer + stream->buffer_len;
+ space = stream->buffer_alloc - stream->buffer_len;
+
+ /* Forcibly flush through any remaining buffered data */
+ while ((error = hubbub_filter_process_chunk(stream->input,
+ (const uint8_t **) &dummy_data, &dummy_len,
+ &base, &space)) == HUBBUB_NOMEM) {
+ bool moved = false;
+ uint8_t *temp = stream->alloc(stream->buffer,
+ stream->buffer_alloc + BUFFER_CHUNK,
+ stream->pw);
+
+ if (temp == NULL) {
+ return HUBBUB_NOMEM;
+ }
+
+ moved = (temp != stream->buffer);
+
+ stream->buffer = temp;
+ stream->buffer_len += stream->buffer_alloc -
+ stream->buffer_len - space;
+ stream->buffer_alloc += BUFFER_CHUNK;
+
+ base = stream->buffer + stream->buffer_len;
+ space = stream->buffer_alloc - stream->buffer_len;
+
+ if (moved)
+ hubbub_inputstream_buffer_moved(stream);
+ }
+
+ /* And fix up buffer length */
+ stream->buffer_len += stream->buffer_alloc -
+ stream->buffer_len - space;
+
+ stream->had_eof = true;
+ } else {
+ /* Normal data chunk */
+
+ if (stream->mibenum == 0) {
+ /* Haven't found charset yet; detect it */
+ error = hubbub_charset_extract(&data, &len,
+ &stream->mibenum, &stream->encsrc);
+ if (error) {
+ return error;
+ }
+
+ /* We should always have a charset by now */
+ if (stream->mibenum == 0)
+ abort();
+ }
+
+ base = stream->buffer + stream->buffer_len;
+ space = stream->buffer_alloc - stream->buffer_len;
+
+ /* Convert chunk to UTF-8 */
+ while ((error = hubbub_filter_process_chunk(stream->input,
+ &data, &len,
+ &base, &space)) == HUBBUB_NOMEM) {
+ bool moved = false;
+ uint8_t *temp = stream->alloc(stream->buffer,
+ stream->buffer_alloc + BUFFER_CHUNK,
+ stream->pw);
+
+ if (temp == NULL) {
+ return HUBBUB_NOMEM;
+ }
+
+ moved = (temp != stream->buffer);
+
+ stream->buffer = temp;
+ stream->buffer_len += stream->buffer_alloc -
+ stream->buffer_len - space;
+ stream->buffer_alloc += BUFFER_CHUNK;
+
+ base = stream->buffer + stream->buffer_len;
+ space = stream->buffer_alloc - stream->buffer_len -
+ space;
+
+ if (moved)
+ hubbub_inputstream_buffer_moved(stream);
+ }
+
+ /* And fix up buffer length */
+ stream->buffer_len += stream->buffer_alloc -
+ stream->buffer_len - space;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Insert data into stream at current location
+ *
+ * \param stream Input stream to insert into
+ * \param data Data to insert (UTF-8 encoded)
+ * \param len Length, in bytes, of data
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_utf8stream_insert(hubbub_inputstream *stream,
+ const uint8_t *data, size_t len)
+{
+ size_t space;
+ uint8_t *curpos;
+
+ space = stream->buffer_alloc - stream->buffer_len;
+
+ /* Need to grow buffer, if there's insufficient space */
+ if (space <= len) {
+ bool moved = false;
+ uint8_t *temp = stream->alloc(stream->buffer,
+ stream->buffer_alloc +
+ ((len + BUFFER_CHUNK - 1) & ~BUFFER_CHUNK) +
+ BUFFER_CHUNK,
+ stream->pw);
+
+ if (temp == NULL)
+ return HUBBUB_NOMEM;
+
+ moved = (temp != stream->buffer);
+
+ stream->buffer = temp;
+ stream->buffer_alloc +=
+ ((len + BUFFER_CHUNK - 1) & ~BUFFER_CHUNK);
+
+ if (moved)
+ hubbub_inputstream_buffer_moved(stream);
+ }
+
+ /* Find the insertion point
+ * (just before the next character to be read) */
+ curpos = stream->buffer + stream->cursor;
+
+ /* Move data above this point up */
+ memmove(curpos + len, curpos, stream->buffer_len - stream->cursor);
+
+ /* Copy new data into gap created by memmove */
+ memcpy(curpos, data, len);
+
+ /* Fix up buffer length */
+ stream->buffer_len += len;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Look at the next character in the stream
+ *
+ * \param stream Stream to look in
+ * \return UCS4 (host-endian) character code, or EOF or OOD.
+ */
+uint32_t hubbub_utf8stream_peek(hubbub_inputstream *stream)
+{
+ hubbub_error error;
+ size_t len;
+ uint32_t ret;
+
+ if (stream->cursor == stream->buffer_len) {
+ return stream->had_eof ? HUBBUB_INPUTSTREAM_EOF
+ : HUBBUB_INPUTSTREAM_OOD;
+ }
+
+ error = hubbub_utf8_to_ucs4(stream->buffer + stream->cursor,
+ stream->buffer_len - stream->cursor,
+ &ret, &len);
+ if (error != HUBBUB_OK && error != HUBBUB_NEEDDATA)
+ return HUBBUB_INPUTSTREAM_OOD;
+
+ if (error == HUBBUB_NEEDDATA) {
+ if (stream->had_eof)
+ return HUBBUB_INPUTSTREAM_EOF;
+ else
+ return HUBBUB_INPUTSTREAM_OOD;
+ }
+
+ return ret;
+}
+
+/**
+ * Retrieve the byte index and length of the current character in the stream
+ *
+ * \param stream Stream to look in
+ * \param len Pointer to location to receive byte length of character
+ * \return Byte index of current character from start of stream,
+ * or (uint32_t) -1 on error
+ */
+uint32_t hubbub_utf8stream_cur_pos(hubbub_inputstream *stream,
+ size_t *len)
+{
+ hubbub_utf8_char_byte_length(stream->buffer + stream->cursor, len);
+
+ return stream->cursor;
+}
+
+/**
+ * Convert the current character to lower case
+ *
+ * \param stream Stream to look in
+ */
+void hubbub_utf8stream_lowercase(hubbub_inputstream *stream)
+{
+ if ('A' <= stream->buffer[stream->cursor] &&
+ stream->buffer[stream->cursor] <= 'Z')
+ stream->buffer[stream->cursor] += 0x0020;
+}
+
+/**
+ * Convert the current character to upper case
+ *
+ * \param stream Stream to look in
+ */
+void hubbub_utf8stream_uppercase(hubbub_inputstream *stream)
+{
+ if ('a' <= stream->buffer[stream->cursor] &&
+ stream->buffer[stream->cursor] <= 'z')
+ stream->buffer[stream->cursor] -= 0x0020;
+}
+
+/**
+ * Advance the stream's current position
+ *
+ * \param stream The stream whose position to advance
+ */
+void hubbub_utf8stream_advance(hubbub_inputstream *stream)
+{
+ hubbub_error error;
+ uint32_t next;
+
+ error = hubbub_utf8_next(stream->buffer, stream->buffer_len,
+ stream->cursor, &next);
+
+ if (error == HUBBUB_OK)
+ stream->cursor = next;
+}
+
+/**
+ * Push a character back onto the stream
+ *
+ * \param stream Stream to push back to
+ * \param character UCS4 (host-endian) codepoint to push back
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ *
+ * Note that this doesn't actually modify the data in the stream.
+ * It works by ensuring that the character located just before the
+ * current stream location is the same as ::character. If it is,
+ * then the stream pointer is moved back. If it is not, then an
+ * error is returned and the stream pointer remains unmodified.
+ */
+hubbub_error hubbub_utf8stream_push_back(hubbub_inputstream *stream,
+ uint32_t character)
+{
+ hubbub_error error;
+ uint32_t prev;
+ uint8_t buf[6];
+ size_t len;
+
+ error = hubbub_utf8_prev(stream->buffer, stream->cursor, &prev);
+ if (error != HUBBUB_OK)
+ return error;
+
+ error = hubbub_utf8_from_ucs4(character, buf, &len);
+ if (error != HUBBUB_OK)
+ return error;
+
+ if ((stream->cursor - prev) != len ||
+ memcmp(stream->buffer + prev, buf, len) != 0)
+ return HUBBUB_INVALID;
+
+ stream->cursor = prev;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Case insensitively compare a pair of ranges in the input stream
+ *
+ * \param stream Input stream to look in
+ * \param r1 Offset of start of first range
+ * \param r2 Offset of start of second range
+ * \param len Byte length of ranges
+ * \return 0 if ranges match, non-zero otherwise
+ */
+int hubbub_utf8stream_compare_range_ci(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len)
+{
+ return strncasecmp((const char *) (stream->buffer + r1),
+ (const char *) (stream->buffer + r2), len);
+}
+
+/**
+ * Case sensitively compare a pair of ranges in the input stream
+ *
+ * \param stream Input stream to look in
+ * \param r1 Offset of start of first range
+ * \param r2 Offset of start of second range
+ * \param len Byte length of ranges
+ * \return 0 if ranges match, non-zero otherwise
+ */
+int hubbub_utf8stream_compare_range_cs(hubbub_inputstream *stream,
+ uint32_t r1, uint32_t r2, size_t len)
+{
+ return strncmp((const char *) (stream->buffer + r1),
+ (const char *) (stream->buffer + r2), len);
+}
+
+/**
+ * Case sensitively compare a range of input stream against an ASCII string
+ *
+ * \param stream Input stream to look in
+ * \param off Offset of range start
+ * \param len Byte length of range
+ * \param data Comparison string
+ * \param dlen Byte length of comparison string
+ * \return 0 if match, non-zero otherwise
+ */
+int hubbub_utf8stream_compare_range_ascii(hubbub_inputstream *stream,
+ uint32_t off, size_t len, const char *data, size_t dlen)
+{
+ /* Lengths don't match, so strings don't */
+ if (len != dlen)
+ return 1; /* arbitrary */
+
+ return strncmp((const char *) (stream->buffer + off),
+ data, len);
+}
+
+/**
+ * Replace a range of bytes in the input stream with a single character
+ *
+ * \param stream Input stream containing data
+ * \param start Offset of start of range to replace
+ * \param len Length (in bytes) of range to replace
+ * \param ucs4 UCS4 (host endian) encoded replacement character
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_utf8stream_replace_range(hubbub_inputstream *stream,
+ uint32_t start, size_t len, uint32_t ucs4)
+{
+ uint8_t buf[6];
+ size_t replen;
+ int32_t diff;
+ hubbub_error error;
+
+ /* Get UTF8 version of replacement character */
+ error = hubbub_utf8_from_ucs4(ucs4, buf, &replen);
+ if (error)
+ return error;
+
+ diff = replen - len;
+
+ if (stream->buffer_len + diff >= stream->buffer_alloc) {
+ /* Need more buffer space */
+ bool moved = false;
+ uint8_t *temp = stream->alloc(stream->buffer,
+ stream->buffer_alloc +
+ ((diff + BUFFER_CHUNK - 1) & ~BUFFER_CHUNK) +
+ BUFFER_CHUNK,
+ stream->pw);
+
+ if (temp == NULL)
+ return HUBBUB_NOMEM;
+
+ moved = (temp != stream->buffer);
+
+ stream->buffer = temp;
+ stream->buffer_alloc +=
+ ((diff + BUFFER_CHUNK - 1) & ~BUFFER_CHUNK);
+
+ if (moved)
+ hubbub_inputstream_buffer_moved(stream);
+ }
+
+ /* Move subsequent input to correct location */
+ memmove(stream->buffer + start + len + diff,
+ stream->buffer + start + len,
+ stream->buffer_len - (start + len));
+
+ /* And fill the gap with the replacement character */
+ memcpy(stream->buffer + start, buf, replen);
+
+ /* Finally, update length */
+ stream->buffer_len += diff;
+
+ return HUBBUB_OK;
+}
+
+hubbub_streamhandler utf8stream = {
+ hubbub_utf8stream_uses_encoding,
+ hubbub_utf8stream_create
+};
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..e7a4fe8
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,237 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <hubbub/parser.h>
+
+#include "input/inputstream.h"
+#include "tokeniser/tokeniser.h"
+
+/**
+ * Hubbub parser object
+ */
+struct hubbub_parser {
+ hubbub_inputstream *stream; /**< Input stream instance */
+ hubbub_tokeniser *tok; /**< Tokeniser instance */
+
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *pw; /**< Client data */
+};
+
+/**
+ * Create a hubbub parser
+ *
+ * \param enc Source document encoding, or NULL to autodetect
+ * \param int_enc Desired encoding of document
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to parser instance, or NULL on error
+ */
+hubbub_parser *hubbub_parser_create(const char *enc, const char *int_enc,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_parser *parser;
+
+ if (alloc == NULL)
+ return NULL;
+
+ parser = alloc(NULL, sizeof(hubbub_parser), pw);
+ if (parser == NULL)
+ return NULL;
+
+ parser->stream = hubbub_inputstream_create(enc, int_enc, alloc, pw);
+ if (parser->stream == NULL) {
+ alloc(parser, 0, pw);
+ return NULL;
+ }
+
+ parser->tok = hubbub_tokeniser_create(parser->stream, alloc, pw);
+ if (parser->tok == NULL) {
+ hubbub_inputstream_destroy(parser->stream);
+ alloc(parser, 0, pw);
+ return NULL;
+ }
+
+ parser->alloc = alloc;
+ parser->pw = pw;
+
+ return parser;
+}
+
+/**
+ * Destroy a hubbub parser
+ *
+ * \param parser Parser instance to destroy
+ */
+void hubbub_parser_destroy(hubbub_parser *parser)
+{
+ if (parser == NULL)
+ return;
+
+ hubbub_tokeniser_destroy(parser->tok);
+
+ hubbub_inputstream_destroy(parser->stream);
+
+ parser->alloc(parser, 0, parser->pw);
+}
+
+/**
+ * Configure a hubbub parser
+ *
+ * \param parser Parser instance to configure
+ * \param type Option to set
+ * \param params Option-specific parameters
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
+ hubbub_parser_opttype type,
+ hubbub_parser_optparams *params)
+{
+ hubbub_tokeniser_opttype toktype;
+
+ if (parser == NULL || params == NULL)
+ return HUBBUB_BADPARM;
+
+ switch (type) {
+ case HUBBUB_PARSER_TOKEN_HANDLER:
+ toktype = HUBBUB_TOKENISER_TOKEN_HANDLER;
+ break;
+ case HUBBUB_PARSER_BUFFER_HANDLER:
+ toktype = HUBBUB_TOKENISER_BUFFER_HANDLER;
+ break;
+ case HUBBUB_PARSER_ERROR_HANDLER:
+ toktype = HUBBUB_TOKENISER_BUFFER_HANDLER;
+ break;
+ case HUBBUB_PARSER_CONTENT_MODEL:
+ toktype = HUBBUB_TOKENISER_CONTENT_MODEL;
+ break;
+ }
+
+ return hubbub_tokeniser_setopt(parser->tok, toktype,
+ (hubbub_tokeniser_optparams *) params);
+}
+
+/**
+ * Pass a chunk of data to a hubbub parser for parsing
+ *
+ * \param parser Parser instance to use
+ * \param data Data to parse (encoded in the input charset)
+ * \param len Length, in bytes, of data
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser,
+ uint8_t *data, size_t len)
+{
+ hubbub_error error;
+
+ if (parser == NULL || data == NULL)
+ return HUBBUB_BADPARM;
+
+ error = hubbub_inputstream_append(parser->stream, data, len);
+ if (error != HUBBUB_OK)
+ return error;
+
+ error = hubbub_tokeniser_run(parser->tok);
+ if (error != HUBBUB_OK)
+ return error;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Pass a chunk of extraneous data to a hubbub parser for parsing
+ *
+ * \param parser Parser instance to use
+ * \param data Data to parse (encoded in internal charset)
+ * \param len Length, in byte, of data
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_parser_parse_extraneous_chunk(hubbub_parser *parser,
+ uint8_t *data, size_t len)
+{
+ hubbub_error error;
+
+ /** \todo In some cases, we don't actually want script-inserted
+ * data to be parsed until later. We'll need some way of flagging
+ * this through the public API, and the inputstream API will need
+ * some way of marking the insertion point so that, when the
+ * tokeniser is run, only the inserted chunk is parsed. */
+
+ if (parser == NULL || data == NULL)
+ return HUBBUB_BADPARM;
+
+ error = hubbub_inputstream_insert(parser->stream, data, len);
+ if (error != HUBBUB_OK)
+ return error;
+
+ error = hubbub_tokeniser_run(parser->tok);
+ if (error != HUBBUB_OK)
+ return error;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Inform the parser that the last chunk of data has been parsed
+ *
+ * \param parser Parser to inform
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_parser_completed(hubbub_parser *parser)
+{
+ hubbub_error error;
+
+ if (parser == NULL)
+ return HUBBUB_BADPARM;
+
+ error = hubbub_inputstream_append(parser->stream, NULL, 0);
+ if (error != HUBBUB_OK)
+ return error;
+
+ error = hubbub_tokeniser_run(parser->tok);
+ if (error != HUBBUB_OK)
+ return error;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Read the document charset
+ *
+ * \param parser Parser instance to query
+ * \param source Pointer to location to receive charset source
+ * \return Pointer to charset name (constant; do not free), or NULL if unknown
+ */
+const char *hubbub_parser_read_charset(hubbub_parser *parser,
+ hubbub_charset_source *source)
+{
+ if (parser == NULL || source == NULL)
+ return NULL;
+
+ return hubbub_inputstream_read_charset(parser->stream, source);
+}
+
+/**
+ * Claim ownership of the document buffer
+ *
+ * \param parser Parser whose buffer to claim
+ * \param buffer Pointer to location to receive buffer pointer
+ * \param len Pointer to location to receive byte length of buffer
+ * \return HUBBUB_OK on success, appropriate error otherwise.
+ *
+ * Once the buffer has been claimed by a client, the parser disclaims
+ * all ownership rights (and invalidates any internal references it may have
+ * to the buffer). Therefore, the only parser call which may be made
+ * after calling this function is to destroy the parser.
+ */
+hubbub_error hubbub_parser_claim_buffer(hubbub_parser *parser,
+ uint8_t **buffer, size_t *len)
+{
+ if (parser == NULL || buffer == NULL || len == NULL)
+ return HUBBUB_BADPARM;
+
+ return hubbub_inputstream_claim_buffer(parser->stream, buffer, len);
+}
diff --git a/src/tokeniser/Makefile b/src/tokeniser/Makefile
new file mode 100644
index 0000000..539625f
--- /dev/null
+++ b/src/tokeniser/Makefile
@@ -0,0 +1,53 @@
+# Makefile for libhubbub
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Manipulate include paths
+CFLAGS += -I$(CURDIR)
+
+# Objects
+OBJS = entities tokeniser
+
+.PHONY: clean debug distclean export release setup test
+
+# Targets
+release: $(addprefix ../Release/, $(addsuffix .o, $(OBJS)))
+
+debug: $(addprefix ../Debug/, $(addsuffix .o, $(OBJS)))
+
+clean:
+ -@${RM} ${RMFLAGS} $(addprefix ../Release/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(addprefix ../Debug/, $(addsuffix .o, ${OBJS}))
+
+distclean:
+
+setup:
+
+export:
+
+test:
+
+# Pattern rules
+../Release/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $<
+
+../Debug/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@ $<
diff --git a/src/tokeniser/entities.c b/src/tokeniser/entities.c
new file mode 100644
index 0000000..8a9acf5
--- /dev/null
+++ b/src/tokeniser/entities.c
@@ -0,0 +1,363 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include "utils/dict.h"
+#include "utils/utils.h"
+#include "tokeniser/entities.h"
+
+typedef struct hubbub_entity hubbub_entity;
+
+static const struct hubbub_entity {
+ const char *name;
+ uint32_t ucs4;
+} entities[] = {
+ { "AElig", 0x00C6 },
+ { "Aacute", 0x00C1 },
+ { "Acirc", 0x00C2 },
+ { "Agrave", 0x00C0 },
+ { "Alpha", 0x0391 },
+ { "Aring", 0x00C5 },
+ { "Atilde", 0x00C3 },
+ { "Auml", 0x00C4 },
+ { "Beta", 0x0392 },
+ { "Ccedil", 0x00C7 },
+ { "Chi", 0x03A7 },
+ { "Dagger", 0x2021 },
+ { "Delta", 0x0394 },
+ { "ETH", 0x00D0 },
+ { "Eacute", 0x00C9 },
+ { "Ecirc", 0x00CA },
+ { "Egrave", 0x00C8 },
+ { "Epsilon", 0x0395 },
+ { "Eta", 0x0397 },
+ { "Euml", 0x00CB },
+ { "Gamma", 0x0393 },
+ { "Iacute", 0x00CD },
+ { "Icirc", 0x00CE },
+ { "Igrave", 0x00CC },
+ { "Iota", 0x0399 },
+ { "Iuml", 0x00CF },
+ { "Kappa", 0x039A },
+ { "Lambda", 0x039B },
+ { "Mu", 0x039C },
+ { "Ntilde", 0x00D1 },
+ { "Nu", 0x039D },
+ { "OElig", 0x0152 },
+ { "Oacute", 0x00D3 },
+ { "Ocirc", 0x00D4 },
+ { "Ograve", 0x00D2 },
+ { "Omega", 0x03A9 },
+ { "Omicron", 0x039F },
+ { "Oslash", 0x00D8 },
+ { "Otilde", 0x00D5 },
+ { "Ouml", 0x00D6 },
+ { "Phi", 0x03A6 },
+ { "Pi", 0x03A0 },
+ { "Prime", 0x2033 },
+ { "Psi", 0x03A8 },
+ { "Rho", 0x03A1 },
+ { "Scaron", 0x0160 },
+ { "Sigma", 0x03A3 },
+ { "THORN", 0x00DE },
+ { "Tau", 0x03A4 },
+ { "Theta", 0x0398 },
+ { "Uacute", 0x00DA },
+ { "Ucirc", 0x00DB },
+ { "Ugrave", 0x00D9 },
+ { "Upsilon", 0x03A5 },
+ { "Uuml", 0x00DC },
+ { "Xi", 0x039E },
+ { "Yacute", 0x00DD },
+ { "Yuml", 0x0178 },
+ { "Zeta", 0x0396 },
+ { "aacute", 0x00E1 },
+ { "acirc", 0x00E2 },
+ { "acute", 0x00B4 },
+ { "aelig", 0x00E6 },
+ { "agrave", 0x00E0 },
+ { "alefsym", 0x2135 },
+ { "alpha", 0x03B1 },
+ { "amp", 0x0026 },
+ { "AMP", 0x0026 },
+ { "and", 0x2227 },
+ { "ang", 0x2220 },
+ { "apos", 0x0027 },
+ { "aring", 0x00E5 },
+ { "asymp", 0x2248 },
+ { "atilde", 0x00E3 },
+ { "auml", 0x00E4 },
+ { "bdquo", 0x201E },
+ { "beta", 0x03B2 },
+ { "brvbar", 0x00A6 },
+ { "bull", 0x2022 },
+ { "cap", 0x2229 },
+ { "ccedil", 0x00E7 },
+ { "cedil", 0x00B8 },
+ { "cent", 0x00A2 },
+ { "chi", 0x03C7 },
+ { "circ", 0x02C6 },
+ { "clubs", 0x2663 },
+ { "cong", 0x2245 },
+ { "copy", 0x00A9 },
+ { "COPY", 0x00A9 },
+ { "crarr", 0x21B5 },
+ { "cup", 0x222A },
+ { "curren", 0x00A4 },
+ { "dArr", 0x21D3 },
+ { "dagger", 0x2020 },
+ { "darr", 0x2193 },
+ { "deg", 0x00B0 },
+ { "delta", 0x03B4 },
+ { "diams", 0x2666 },
+ { "divide", 0x00F7 },
+ { "eacute", 0x00E9 },
+ { "ecirc", 0x00EA },
+ { "egrave", 0x00E8 },
+ { "empty", 0x2205 },
+ { "emsp", 0x2003 },
+ { "ensp", 0x2002 },
+ { "epsilon", 0x03B5 },
+ { "equiv", 0x2261 },
+ { "eta", 0x03B7 },
+ { "eth", 0x00F0 },
+ { "euml", 0x00EB },
+ { "euro", 0x20AC },
+ { "exist", 0x2203 },
+ { "fnof", 0x0192 },
+ { "forall", 0x2200 },
+ { "frac12", 0x00BD },
+ { "frac14", 0x00BC },
+ { "frac34", 0x00BE },
+ { "frasl", 0x2044 },
+ { "gamma", 0x03B3 },
+ { "ge", 0x2265 },
+ { "gt", 0x003E },
+ { "GT", 0x003E },
+ { "hArr", 0x21D4 },
+ { "harr", 0x2194 },
+ { "hearts", 0x2665 },
+ { "hellip", 0x2026 },
+ { "iacute", 0x00ED },
+ { "icirc", 0x00EE },
+ { "iexcl", 0x00A1 },
+ { "igrave", 0x00EC },
+ { "image", 0x2111 },
+ { "infin", 0x221E },
+ { "int", 0x222B },
+ { "iota", 0x03B9 },
+ { "iquest", 0x00BF },
+ { "isin", 0x2208 },
+ { "iuml", 0x00EF },
+ { "kappa", 0x03BA },
+ { "lArr", 0x21D0 },
+ { "lambda", 0x03BB },
+ { "lang", 0x2329 },
+ { "laquo", 0x00AB },
+ { "larr", 0x2190 },
+ { "lceil", 0x2308 },
+ { "ldquo", 0x201C },
+ { "le", 0x2264 },
+ { "lfloor", 0x230A },
+ { "lowast", 0x2217 },
+ { "loz", 0x25CA },
+ { "lrm", 0x200E },
+ { "lsaquo", 0x2039 },
+ { "lsquo", 0x2018 },
+ { "lt", 0x003C },
+ { "LT", 0x003C },
+ { "macr", 0x00AF },
+ { "mdash", 0x2014 },
+ { "micro", 0x00B5 },
+ { "middot", 0x00B7 },
+ { "minus", 0x2212 },
+ { "mu", 0x03BC },
+ { "nabla", 0x2207 },
+ { "nbsp", 0x00A0 },
+ { "ndash", 0x2013 },
+ { "ne", 0x2260 },
+ { "ni", 0x220B },
+ { "not", 0x00AC },
+ { "notin", 0x2209 },
+ { "nsub", 0x2284 },
+ { "ntilde", 0x00F1 },
+ { "nu", 0x03BD },
+ { "oacute", 0x00F3 },
+ { "ocirc", 0x00F4 },
+ { "oelig", 0x0153 },
+ { "ograve", 0x00F2 },
+ { "oline", 0x203E },
+ { "omega", 0x03C9 },
+ { "omicron", 0x03BF },
+ { "oplus", 0x2295 },
+ { "or", 0x2228 },
+ { "ordf", 0x00AA },
+ { "ordm", 0x00BA },
+ { "oslash", 0x00F8 },
+ { "otilde", 0x00F5 },
+ { "otimes", 0x2297 },
+ { "ouml", 0x00F6 },
+ { "para", 0x00B6 },
+ { "part", 0x2202 },
+ { "permil", 0x2030 },
+ { "perp", 0x22A5 },
+ { "phi", 0x03C6 },
+ { "pi", 0x03C0 },
+ { "piv", 0x03D6 },
+ { "plusmn", 0x00B1 },
+ { "pound", 0x00A3 },
+ { "prime", 0x2032 },
+ { "prod", 0x220F },
+ { "prop", 0x221D },
+ { "psi", 0x03C8 },
+ { "quot", 0x0022 },
+ { "QUOT", 0x0022 },
+ { "rArr", 0x21D2 },
+ { "radic", 0x221A },
+ { "rang", 0x232A },
+ { "raquo", 0x00BB },
+ { "rarr", 0x2192 },
+ { "rceil", 0x2309 },
+ { "rdquo", 0x201D },
+ { "real", 0x211C },
+ { "reg", 0x00AE },
+ { "REG", 0x00AE },
+ { "rfloor", 0x230B },
+ { "rho", 0x03C1 },
+ { "rlm", 0x200F },
+ { "rsaquo", 0x203A },
+ { "rsquo", 0x2019 },
+ { "sbquo", 0x201A },
+ { "scaron", 0x0161 },
+ { "sdot", 0x22C5 },
+ { "sect", 0x00A7 },
+ { "shy", 0x00AD },
+ { "sigma", 0x03C3 },
+ { "sigmaf", 0x03C2 },
+ { "sim", 0x223C },
+ { "spades", 0x2660 },
+ { "sub", 0x2282 },
+ { "sube", 0x2286 },
+ { "sum", 0x2211 },
+ { "sup", 0x2283 },
+ { "sup1", 0x00B9 },
+ { "sup2", 0x00B2 },
+ { "sup3", 0x00B3 },
+ { "supe", 0x2287 },
+ { "szlig", 0x00DF },
+ { "tau", 0x03C4 },
+ { "there4", 0x2234 },
+ { "theta", 0x03B8 },
+ { "thetasym", 0x03D1 },
+ { "thinsp", 0x2009 },
+ { "thorn", 0x00FE },
+ { "tilde", 0x02DC },
+ { "times", 0x00D7 },
+ { "trade", 0x2122 },
+ { "uArr", 0x21D1 },
+ { "uacute", 0x00FA },
+ { "uarr", 0x2191 },
+ { "ucirc", 0x00FB },
+ { "ugrave", 0x00F9 },
+ { "uml", 0x00A8 },
+ { "upsih", 0x03D2 },
+ { "upsilon", 0x03C5 },
+ { "uuml", 0x00FC },
+ { "weierp", 0x2118 },
+ { "xi", 0x03BE },
+ { "yacute", 0x00FD },
+ { "yen", 0x00A5 },
+ { "yuml", 0x00FF },
+ { "zeta", 0x03B6 },
+ { "zwj", 0x200D },
+ { "zwnj", 0x200C },
+};
+
+static hubbub_dict *dict;
+
+/**
+ * Create the entities dictionary
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_entities_create(hubbub_alloc alloc, void *pw)
+{
+ hubbub_error error;
+ size_t i;
+
+ if (alloc == NULL)
+ return HUBBUB_BADPARM;
+
+ dict = hubbub_dict_create(alloc, pw);
+ if (dict == NULL)
+ return HUBBUB_NOMEM;
+
+ for (i = 0; i < sizeof(entities) / sizeof(entities[0]); i++) {
+ error = hubbub_dict_insert(dict, entities[i].name,
+ &entities[i]);
+ if (error != HUBBUB_OK) {
+ hubbub_dict_destroy(dict);
+ return error;
+ }
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Destroy the entities dictionary
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ */
+void hubbub_entities_destroy(hubbub_alloc alloc, void *pw)
+{
+ UNUSED(alloc);
+ UNUSED(pw);
+
+ hubbub_dict_destroy(dict);
+}
+
+/**
+ * Step-wise search for an entity in the dictionary
+ *
+ * \param c Character to look for
+ * \param result Pointer to location for result
+ * \param context Pointer to location for search context
+ * \return HUBBUB_OK if key found,
+ * HUBBUB_NEEDDATA if more steps are required
+ * HUBBUB_INVALID if nothing matches
+ *
+ * The value pointed to by ::context should be NULL for the first call.
+ * Thereafter, pass in the same value as returned by the previous call.
+ * The context is opaque to the caller and should not be inspected.
+ *
+ * The location pointed to by ::result will be set to U+FFFD unless a match
+ * is found.
+ */
+hubbub_error hubbub_entities_search_step(uint8_t c, uint32_t *result,
+ void **context)
+{
+ const hubbub_entity *e;
+ hubbub_error error;
+
+ if (result == NULL || context == NULL)
+ return HUBBUB_BADPARM;
+
+ error = hubbub_dict_search_step(dict, c,
+ (const void **) (const void *) &e,
+ context);
+ if (error != HUBBUB_OK) {
+ *result = 0xFFFD;
+ return error;
+ }
+
+ *result = e->ucs4;
+
+ return HUBBUB_OK;
+}
diff --git a/src/tokeniser/entities.h b/src/tokeniser/entities.h
new file mode 100644
index 0000000..efd1987
--- /dev/null
+++ b/src/tokeniser/entities.h
@@ -0,0 +1,25 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_tokeniser_entities_h_
+#define hubbub_tokeniser_entities_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+
+/* Create the entities dictionary */
+hubbub_error hubbub_entities_create(hubbub_alloc alloc, void *pw);
+/* Destroy the entities dictionary */
+void hubbub_entities_destroy(hubbub_alloc alloc, void *pw);
+
+/* Step-wise search for an entity in the dictionary */
+hubbub_error hubbub_entities_search_step(uint8_t c, uint32_t *result,
+ void **context);
+
+#endif
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
new file mode 100644
index 0000000..f8b6bb3
--- /dev/null
+++ b/src/tokeniser/tokeniser.c
@@ -0,0 +1,2282 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "utils/utils.h"
+
+#include "tokeniser/entities.h"
+#include "tokeniser/tokeniser.h"
+
+/**
+ * Table of mappings between Windows-1252 codepoints 128-159 and UCS4
+ */
+static const uint32_t cp1252Table[32] = {
+ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178
+};
+
+/**
+ * Tokeniser states
+ */
+typedef enum hubbub_tokeniser_state {
+ HUBBUB_TOKENISER_STATE_DATA,
+ HUBBUB_TOKENISER_STATE_ENTITY_DATA,
+ HUBBUB_TOKENISER_STATE_TAG_OPEN,
+ HUBBUB_TOKENISER_STATE_CLOSE_TAG_OPEN,
+ HUBBUB_TOKENISER_STATE_CLOSE_TAG_MATCH,
+ HUBBUB_TOKENISER_STATE_TAG_NAME,
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME,
+ HUBBUB_TOKENISER_STATE_ATTRIBUTE_NAME,
+ HUBBUB_TOKENISER_STATE_AFTER_ATTRIBUTE_NAME,
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_VALUE,
+ HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_DQ,
+ HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_SQ,
+ HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_UQ,
+ HUBBUB_TOKENISER_STATE_ENTITY_IN_ATTRIBUTE_VALUE,
+ HUBBUB_TOKENISER_STATE_BOGUS_COMMENT,
+ HUBBUB_TOKENISER_STATE_MARKUP_DECLARATION_OPEN,
+ HUBBUB_TOKENISER_STATE_COMMENT_START,
+ HUBBUB_TOKENISER_STATE_COMMENT,
+ HUBBUB_TOKENISER_STATE_COMMENT_DASH,
+ HUBBUB_TOKENISER_STATE_COMMENT_END,
+ HUBBUB_TOKENISER_STATE_MATCH_DOCTYPE,
+ HUBBUB_TOKENISER_STATE_DOCTYPE,
+ HUBBUB_TOKENISER_STATE_BEFORE_DOCTYPE_NAME,
+ HUBBUB_TOKENISER_STATE_DOCTYPE_NAME,
+ HUBBUB_TOKENISER_STATE_AFTER_DOCTYPE_NAME,
+ HUBBUB_TOKENISER_STATE_BOGUS_DOCTYPE,
+ HUBBUB_TOKENISER_STATE_NUMBERED_ENTITY,
+ HUBBUB_TOKENISER_STATE_NAMED_ENTITY
+} hubbub_tokeniser_state;
+
+/**
+ * Context for tokeniser
+ */
+typedef struct hubbub_tokeniser_context {
+ hubbub_token_type current_tag_type; /**< Type of current_tag */
+ hubbub_tag current_tag; /**< Current tag */
+
+ hubbub_string current_comment; /**< Current comment */
+
+ hubbub_doctype current_doctype; /**< Current doctype */
+
+ hubbub_string current_chars; /**< Pending characters */
+
+ hubbub_tokeniser_state prev_state; /**< Previous state */
+
+ struct {
+ hubbub_string tag; /**< Pending close tag */
+ } close_tag_match;
+
+ struct {
+ uint32_t count; /**< Index into "DOCTYPE" */
+ } match_doctype;
+
+ struct {
+ hubbub_string str; /**< Pending string */
+ uint8_t base; /**< Base for numeric
+ * entities */
+ uint32_t codepoint; /**< UCS4 codepoint */
+ bool had_data; /**< Whether we read
+ * anything after &#(x)? */
+ hubbub_tokeniser_state return_state; /**< State we were
+ * called from */
+ bool complete; /**< Flag that entity
+ * matching completed */
+ bool done_setup; /**< Flag that match setup
+ * has completed */
+ void *context; /**< Context for named
+ * entity search */
+ size_t prev_len; /**< Previous byte length
+ * of str */
+ } match_entity;
+
+ struct {
+ uint32_t line; /**< Current line of input */
+ uint32_t col; /**< Current character in
+ * line */
+ } position;
+} hubbub_tokeniser_context;
+
+/**
+ * Tokeniser data structure
+ */
+struct hubbub_tokeniser {
+ hubbub_tokeniser_state state; /**< Current tokeniser state */
+ hubbub_content_model content_model; /**< Current content
+ * model flag */
+
+ hubbub_inputstream *input; /**< Input stream */
+
+ const uint8_t *input_buffer; /**< Start of input stream's buffer */
+ size_t input_buffer_len; /**< Length of input buffer */
+
+ hubbub_tokeniser_context context; /**< Tokeniser context */
+
+ hubbub_token_handler token_handler;
+ void *token_pw;
+
+ hubbub_buffer_handler buffer_handler;
+ void *buffer_pw;
+
+ hubbub_error_handler error_handler;
+ void *error_pw;
+
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *alloc_pw; /**< Client private data */
+};
+
+static bool hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_entity_data(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_close_tag_open(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_close_tag_match(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_before_attribute_name(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_attribute_name(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_after_attribute_name(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_before_attribute_value(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_attribute_value_dq(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_attribute_value_sq(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_attribute_value_uq(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_entity_in_attribute_value(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_bogus_comment(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_markup_declaration_open(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_comment_start(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_comment_dash(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_comment_end(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_match_doctype(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_doctype(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_before_doctype_name(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_doctype_name(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_after_doctype_name(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_bogus_doctype(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_consume_entity(hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_numbered_entity(
+ hubbub_tokeniser *tokeniser);
+static bool hubbub_tokeniser_handle_named_entity(
+ hubbub_tokeniser *tokeniser);
+static void hubbub_tokeniser_buffer_moved_handler(const uint8_t *buffer,
+ size_t len, void *pw);
+static void hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser,
+ hubbub_token *token);
+
+/**
+ * Create a hubbub tokeniser
+ *
+ * \param input Input stream instance
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to tokeniser instance, or NULL on failure
+ */
+hubbub_tokeniser *hubbub_tokeniser_create(hubbub_inputstream *input,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_tokeniser *tok;
+
+ if (input == NULL || alloc == NULL)
+ return NULL;
+
+ tok = alloc(NULL, sizeof(hubbub_tokeniser), pw);
+ if (tok == NULL)
+ return NULL;
+
+ tok->state = HUBBUB_TOKENISER_STATE_DATA;
+ tok->content_model = HUBBUB_CONTENT_MODEL_PCDATA;
+
+ tok->input = input;
+ tok->input_buffer = NULL;
+ tok->input_buffer_len = 0;
+
+ tok->token_handler = NULL;
+ tok->token_pw = NULL;
+
+ tok->buffer_handler = NULL;
+ tok->buffer_pw = NULL;
+
+ tok->error_handler = NULL;
+ tok->error_pw = NULL;
+
+ tok->alloc = alloc;
+ tok->alloc_pw = pw;
+
+ if (hubbub_inputstream_register_movehandler(input,
+ hubbub_tokeniser_buffer_moved_handler, tok) !=
+ HUBBUB_OK) {
+ alloc(tok, 0, pw);
+ return NULL;
+ }
+
+ memset(&tok->context, 0, sizeof(hubbub_tokeniser_context));
+
+ return tok;
+}
+
+/**
+ * Destroy a hubbub tokeniser
+ *
+ * \param tokeniser The tokeniser instance to destroy
+ */
+void hubbub_tokeniser_destroy(hubbub_tokeniser *tokeniser)
+{
+ if (tokeniser == NULL)
+ return;
+
+ hubbub_inputstream_deregister_movehandler(tokeniser->input,
+ hubbub_tokeniser_buffer_moved_handler, tokeniser);
+
+ if (tokeniser->context.current_tag.attributes != NULL) {
+ tokeniser->alloc(tokeniser->context.current_tag.attributes,
+ 0, tokeniser->alloc_pw);
+ }
+
+ tokeniser->alloc(tokeniser, 0, tokeniser->alloc_pw);
+}
+
+/**
+ * Configure a hubbub tokeniser
+ *
+ * \param tokeniser The tokeniser instance to configure
+ * \param type The option type to set
+ * \param params Option-specific parameters
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser,
+ hubbub_tokeniser_opttype type,
+ hubbub_tokeniser_optparams *params)
+{
+ if (tokeniser == NULL || params == NULL)
+ return HUBBUB_BADPARM;
+
+ switch (type) {
+ case HUBBUB_TOKENISER_TOKEN_HANDLER:
+ tokeniser->token_handler = params->token_handler.handler;
+ tokeniser->token_pw = params->token_handler.pw;
+ break;
+ case HUBBUB_TOKENISER_BUFFER_HANDLER:
+ tokeniser->buffer_handler = params->buffer_handler.handler;
+ tokeniser->buffer_pw = params->buffer_handler.pw;
+ tokeniser->buffer_handler(tokeniser->input_buffer,
+ tokeniser->input_buffer_len,
+ tokeniser->buffer_pw);
+ break;
+ case HUBBUB_TOKENISER_ERROR_HANDLER:
+ tokeniser->error_handler = params->error_handler.handler;
+ tokeniser->error_pw = params->error_handler.pw;
+ break;
+ case HUBBUB_TOKENISER_CONTENT_MODEL:
+ tokeniser->content_model = params->content_model.model;
+ break;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Process remaining data in the input stream
+ *
+ * \param tokeniser The tokeniser instance to invoke
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
+{
+ bool cont = true;
+
+ if (tokeniser == NULL)
+ return HUBBUB_BADPARM;
+
+ while (cont) {
+ switch (tokeniser->state) {
+ case HUBBUB_TOKENISER_STATE_DATA:
+ cont = hubbub_tokeniser_handle_data(tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_ENTITY_DATA:
+ cont = hubbub_tokeniser_handle_entity_data(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_TAG_OPEN:
+ cont = hubbub_tokeniser_handle_tag_open(tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_CLOSE_TAG_OPEN:
+ cont = hubbub_tokeniser_handle_close_tag_open(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_CLOSE_TAG_MATCH:
+ cont = hubbub_tokeniser_handle_close_tag_match(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_TAG_NAME:
+ cont = hubbub_tokeniser_handle_tag_name(tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME:
+ cont = hubbub_tokeniser_handle_before_attribute_name(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_ATTRIBUTE_NAME:
+ cont = hubbub_tokeniser_handle_attribute_name(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_AFTER_ATTRIBUTE_NAME:
+ cont = hubbub_tokeniser_handle_after_attribute_name(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_VALUE:
+ cont = hubbub_tokeniser_handle_before_attribute_value(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_DQ:
+ cont = hubbub_tokeniser_handle_attribute_value_dq(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_SQ:
+ cont = hubbub_tokeniser_handle_attribute_value_sq(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_UQ:
+ cont = hubbub_tokeniser_handle_attribute_value_uq(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_ENTITY_IN_ATTRIBUTE_VALUE:
+ cont = hubbub_tokeniser_handle_entity_in_attribute_value(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_BOGUS_COMMENT:
+ cont = hubbub_tokeniser_handle_bogus_comment(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_MARKUP_DECLARATION_OPEN:
+ cont = hubbub_tokeniser_handle_markup_declaration_open(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_COMMENT_START:
+ cont = hubbub_tokeniser_handle_comment_start(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_COMMENT:
+ cont = hubbub_tokeniser_handle_comment(tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_COMMENT_DASH:
+ cont = hubbub_tokeniser_handle_comment_dash(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_COMMENT_END:
+ cont = hubbub_tokeniser_handle_comment_end(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_MATCH_DOCTYPE:
+ cont = hubbub_tokeniser_handle_match_doctype(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_DOCTYPE:
+ cont = hubbub_tokeniser_handle_doctype(tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_BEFORE_DOCTYPE_NAME:
+ cont = hubbub_tokeniser_handle_before_doctype_name(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_DOCTYPE_NAME:
+ cont = hubbub_tokeniser_handle_doctype_name(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_AFTER_DOCTYPE_NAME:
+ cont = hubbub_tokeniser_handle_after_doctype_name(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_BOGUS_DOCTYPE:
+ cont = hubbub_tokeniser_handle_bogus_doctype(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_NUMBERED_ENTITY:
+ cont = hubbub_tokeniser_handle_numbered_entity(
+ tokeniser);
+ break;
+ case HUBBUB_TOKENISER_STATE_NAMED_ENTITY:
+ cont = hubbub_tokeniser_handle_named_entity(
+ tokeniser);
+ break;
+ }
+ }
+
+ return HUBBUB_OK;
+}
+
+bool hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
+{
+ hubbub_token token;
+ uint32_t c;
+
+ /* Clear current characters */
+ tokeniser->context.current_chars.data_off = 0;
+ tokeniser->context.current_chars.len = 0;
+
+ while ((c = hubbub_inputstream_peek(tokeniser->input)) !=
+ HUBBUB_INPUTSTREAM_EOF &&
+ c != HUBBUB_INPUTSTREAM_OOD) {
+ if (c == '&' && (tokeniser->content_model ==
+ HUBBUB_CONTENT_MODEL_PCDATA ||
+ tokeniser->content_model ==
+ HUBBUB_CONTENT_MODEL_RCDATA)) {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_ENTITY_DATA;
+ /* Don't eat the '&'; it'll be handled by
+ * entity consumption */
+ break;
+ } else if (c == '<' && tokeniser->content_model !=
+ HUBBUB_CONTENT_MODEL_PLAINTEXT) {
+ if (tokeniser->context.current_chars.len > 0) {
+ /* Emit any pending characters */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character =
+ tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser,
+ &token);
+ }
+
+ /* Buffer '<' */
+ tokeniser->context.current_chars.data_off =
+ hubbub_inputstream_cur_pos(tokeniser->input,
+ &tokeniser->context.current_chars.len);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_TAG_OPEN;
+ hubbub_inputstream_advance(tokeniser->input);
+ break;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ /* Accumulate characters into buffer */
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+
+ if (tokeniser->context.current_chars.len == 0) {
+ tokeniser->context.current_chars.data_off =
+ pos;
+ }
+ tokeniser->context.current_chars.len++;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+ }
+
+ if (tokeniser->state != HUBBUB_TOKENISER_STATE_TAG_OPEN &&
+ tokeniser->context.current_chars.len > 0) {
+ /* Emit any pending characters */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character = tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->context.current_chars.data_off = 0;
+ tokeniser->context.current_chars.len = 0;
+ }
+
+ if (c == HUBBUB_INPUTSTREAM_EOF) {
+ token.type = HUBBUB_TOKEN_EOF;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ }
+
+ return (c != HUBBUB_INPUTSTREAM_EOF && c != HUBBUB_INPUTSTREAM_OOD);
+}
+
+bool hubbub_tokeniser_handle_entity_data(hubbub_tokeniser *tokeniser)
+{
+ if (tokeniser->context.match_entity.complete == false) {
+ return hubbub_tokeniser_consume_entity(tokeniser);
+ } else {
+ hubbub_token token;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD ||
+ c == HUBBUB_INPUTSTREAM_EOF) {
+ /* Should never happen */
+ abort();
+ }
+
+ /* Emit character */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character.data_off =
+ hubbub_inputstream_cur_pos(tokeniser->input,
+ &token.data.character.len);
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ /* Reset for next time */
+ tokeniser->context.match_entity.complete = false;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t pos;
+ size_t len;
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+ tokeniser->content_model ==
+ HUBBUB_CONTENT_MODEL_CDATA) {
+ if (c == '/') {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ tokeniser->context.current_chars.len += len;
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_CLOSE_TAG_OPEN;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ hubbub_token token;
+
+ /* Emit '<' */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character =
+ tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_DATA;
+ }
+ } else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_PCDATA) {
+ if (c == '!') {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+
+ tokeniser->context.current_chars.len += len;
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_MARKUP_DECLARATION_OPEN;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '/') {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+
+ tokeniser->context.current_chars.len += len;
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_CLOSE_TAG_OPEN;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('A' <= c && c <= 'Z') {
+ hubbub_inputstream_lowercase(tokeniser->input);
+
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_START_TAG;
+
+ ctag->name.data_off =
+ hubbub_inputstream_cur_pos(tokeniser->input,
+ &ctag->name.len);
+ ctag->n_attributes = 0;
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_TAG_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('a' <= c && c <= 'z') {
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_START_TAG;
+
+ ctag->name.data_off =
+ hubbub_inputstream_cur_pos(tokeniser->input,
+ &ctag->name.len);
+ ctag->n_attributes = 0;
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_TAG_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ tokeniser->context.current_chars.len += len;
+
+ /* Emit "<>" */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character =
+ tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_DATA;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '?') {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ tokeniser->context.current_chars.len += len;
+
+ tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.len = len;
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ hubbub_token token;
+
+ /* Emit '<' */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character =
+ tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_DATA;
+ }
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
+{
+ if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+ tokeniser->content_model ==
+ HUBBUB_CONTENT_MODEL_CDATA) {
+ tokeniser->context.close_tag_match.tag.len = 0;
+ tokeniser->state = HUBBUB_TOKENISER_STATE_CLOSE_TAG_MATCH;
+ } else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_PCDATA) {
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+ uint32_t pos;
+ size_t len;
+
+ if ('A' <= c && c <= 'Z') {
+ hubbub_inputstream_lowercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_END_TAG;
+ ctag->name.data_off = pos;
+ ctag->name.len = len;
+ ctag->n_attributes = 0;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_TAG_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('a' <= c && c <= 'z') {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_END_TAG;
+ ctag->name.data_off = pos;
+ ctag->name.len = len;
+ ctag->n_attributes = 0;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_TAG_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit "</" */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character =
+ tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else if (c != HUBBUB_INPUTSTREAM_OOD) {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+
+ tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.len = len;
+
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ /* Out of data */
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_close_tag_match(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tokeniser_context *ctx = &tokeniser->context;
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = 0;
+
+ while (ctx->close_tag_match.tag.len < ctag->name.len &&
+ (c = hubbub_inputstream_peek(tokeniser->input)) !=
+ HUBBUB_INPUTSTREAM_EOF &&
+ c != HUBBUB_INPUTSTREAM_OOD) {
+ /* Match last open tag */
+ uint32_t off;
+ size_t len;
+
+ off = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (ctx->close_tag_match.tag.len == 0) {
+ ctx->close_tag_match.tag.data_off = off;
+ ctx->close_tag_match.tag.len = len;
+ } else {
+ ctx->close_tag_match.tag.len += len;
+ }
+
+ hubbub_inputstream_advance(tokeniser->input);
+
+ if (ctx->close_tag_match.tag.len > ctag->name.len ||
+ (ctx->close_tag_match.tag.len == ctag->name.len &&
+ hubbub_inputstream_compare_range_ci(
+ tokeniser->input,
+ ctag->name.data_off,
+ ctx->close_tag_match.tag.data_off,
+ ctag->name.len) != 0)) {
+ hubbub_token token;
+
+ /* Rewind input stream to start of tag name */
+ if (hubbub_inputstream_rewind(tokeniser->input,
+ ctx->close_tag_match.tag.len) !=
+ HUBBUB_OK)
+ abort();
+
+ /* Emit "</" */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character =
+ tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+
+ return true;
+ } else if (ctx->close_tag_match.tag.len == ctag->name.len &&
+ hubbub_inputstream_compare_range_ci(
+ tokeniser->input,
+ ctag->name.data_off,
+ ctx->close_tag_match.tag.data_off,
+ ctag->name.len) == 0) {
+ /* Matched => stop searching */
+ break;
+ }
+ }
+
+ if (c == HUBBUB_INPUTSTREAM_OOD) {
+ /* Need more data */
+ return false;
+ }
+
+ if (c == HUBBUB_INPUTSTREAM_EOF) {
+ /* Ran out of data - parse error */
+ hubbub_token token;
+
+ /* Rewind input stream to start of tag name */
+ if (hubbub_inputstream_rewind(tokeniser->input,
+ ctx->close_tag_match.tag.len) != HUBBUB_OK)
+ abort();
+
+ /* Emit "</" */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character = tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+
+ return true;
+ }
+
+ /* Match following char */
+ c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD) {
+ /* Need more data */
+ return false;
+ }
+
+ /* Rewind input stream to start of tag name */
+ if (hubbub_inputstream_rewind(tokeniser->input,
+ ctx->close_tag_match.tag.len) != HUBBUB_OK)
+ abort();
+
+ /* Check that following char was valid */
+ if (c != '\t' && c != '\n' && c != '\v' && c != '\f' &&
+ c != ' ' && c != '>' && c != '/' && c != '<' &&
+ c != HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit "</" */
+ token.type = HUBBUB_TOKEN_CHARACTER;
+ token.data.character = tokeniser->context.current_chars;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+
+ return true;
+ }
+
+ /* Switch the content model back to PCDATA */
+ tokeniser->content_model = HUBBUB_CONTENT_MODEL_PCDATA;
+
+ /* Finally, transition back to close tag open state */
+ tokeniser->state = HUBBUB_TOKENISER_STATE_CLOSE_TAG_OPEN;
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('A' <= c && c <= 'Z') {
+ uint32_t pos;
+ size_t len;
+
+ hubbub_inputstream_lowercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ ctag->name.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '<' || c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else if (c == '/') {
+ /** \todo permitted slash */
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ ctag->name.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_before_attribute_name(
+ hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('A' <= c && c <= 'Z') {
+ uint32_t pos;
+ size_t len;
+ hubbub_attribute *attr;
+
+ hubbub_inputstream_lowercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ attr = tokeniser->alloc(ctag->attributes,
+ (ctag->n_attributes + 1) *
+ sizeof(hubbub_attribute),
+ tokeniser->alloc_pw);
+ if (attr == NULL) {
+ /** \todo handle memory exhaustion */
+ }
+
+ ctag->attributes = attr;
+
+ attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.len = len;
+ attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.len = 0;
+
+ ctag->n_attributes++;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_NAME;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '/') {
+ /** \todo permitted slash */
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '<' || c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+ hubbub_attribute *attr;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ attr = tokeniser->alloc(ctag->attributes,
+ (ctag->n_attributes + 1) *
+ sizeof(hubbub_attribute),
+ tokeniser->alloc_pw);
+ if (attr == NULL) {
+ /** \todo handle memory exhaustion */
+ }
+
+ ctag->attributes = attr;
+
+ attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.len = len;
+ attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.len = 0;
+
+ ctag->n_attributes++;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_NAME;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_attribute_name(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_AFTER_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '=') {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_VALUE;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('A' <= c && c <= 'Z') {
+ uint32_t pos;
+ size_t len;
+
+ hubbub_inputstream_lowercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ ctag->attributes[ctag->n_attributes - 1].name.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '/') {
+ /** \todo permitted slash */
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '<' || c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ ctag->attributes[ctag->n_attributes - 1].name.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_after_attribute_name(
+ hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '=') {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_VALUE;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('A' <= c && c <= 'Z') {
+ uint32_t pos;
+ size_t len;
+ hubbub_attribute *attr;
+
+ hubbub_inputstream_lowercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ attr = tokeniser->alloc(ctag->attributes,
+ (ctag->n_attributes + 1) *
+ sizeof(hubbub_attribute),
+ tokeniser->alloc_pw);
+ if (attr == NULL) {
+ /** \todo handle memory exhaustion */
+ }
+
+ ctag->attributes = attr;
+
+ attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.len = len;
+ attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.len = 0;
+
+ ctag->n_attributes++;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_NAME;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '/') {
+ /** \todo permitted slash */
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '<' || c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+ hubbub_attribute *attr;
+
+ hubbub_inputstream_lowercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ attr = tokeniser->alloc(ctag->attributes,
+ (ctag->n_attributes + 1) *
+ sizeof(hubbub_attribute),
+ tokeniser->alloc_pw);
+ if (attr == NULL) {
+ /** \todo handle memory exhaustion */
+ }
+
+ ctag->attributes = attr;
+
+ attr[ctag->n_attributes].name.data_off = pos;
+ attr[ctag->n_attributes].name.len = len;
+ attr[ctag->n_attributes].value.data_off = 0;
+ attr[ctag->n_attributes].value.len = 0;
+
+ ctag->n_attributes++;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_NAME;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_before_attribute_value(
+ hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '"') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_DQ;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '&') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_UQ;
+ } else if (c == '\'') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_SQ;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '<' || c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ ctag->attributes[ctag->n_attributes - 1].value.data_off = pos;
+ ctag->attributes[ctag->n_attributes - 1].value.len = len;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_ATTRIBUTE_VALUE_UQ;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_attribute_value_dq(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '"') {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '&') {
+ tokeniser->context.prev_state = tokeniser->state;
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_ENTITY_IN_ATTRIBUTE_VALUE;
+ /* Don't eat the '&'; entity consumption handles this */
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
+ ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ pos;
+ }
+
+ ctag->attributes[ctag->n_attributes - 1].value.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_attribute_value_sq(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\'') {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '&') {
+ tokeniser->context.prev_state = tokeniser->state;
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_ENTITY_IN_ATTRIBUTE_VALUE;
+ /* Don't eat the '&'; entity consumption handles this */
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
+ ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ pos;
+ }
+
+ ctag->attributes[ctag->n_attributes - 1].value.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '&') {
+ tokeniser->context.prev_state = tokeniser->state;
+ tokeniser->state =
+ HUBBUB_TOKENISER_STATE_ENTITY_IN_ATTRIBUTE_VALUE;
+ /* Don't eat the '&'; entity consumption handles this */
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '<' || c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit current tag */
+ token.type = tokeniser->context.current_tag_type;
+ token.data.tag = tokeniser->context.current_tag;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
+ ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ pos;
+ }
+
+ ctag->attributes[ctag->n_attributes - 1].value.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_entity_in_attribute_value(
+ hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+ uint32_t pos;
+ size_t len;
+
+ if (tokeniser->context.match_entity.complete == false) {
+ return hubbub_tokeniser_consume_entity(tokeniser);
+ } else {
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD ||
+ c == HUBBUB_INPUTSTREAM_EOF) {
+ /* Should never happen */
+ abort();
+ }
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
+ ctag->attributes[ctag->n_attributes - 1].value.data_off =
+ pos;
+ }
+
+ ctag->attributes[ctag->n_attributes - 1].value.len += len;
+
+ /* Reset for next time */
+ tokeniser->context.match_entity.complete = false;
+
+ /* And back to the previous state */
+ tokeniser->state = tokeniser->context.prev_state;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_bogus_comment(hubbub_tokeniser *tokeniser)
+{
+ hubbub_token token;
+ uint32_t c;
+
+ while ((c = hubbub_inputstream_peek(tokeniser->input)) !=
+ HUBBUB_INPUTSTREAM_EOF &&
+ c != HUBBUB_INPUTSTREAM_OOD) {
+ uint32_t pos;
+ size_t len;
+
+ if (c == '>') {
+ hubbub_inputstream_advance(tokeniser->input);
+ break;
+ }
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (tokeniser->context.current_comment.len == 0)
+ tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ /* Emit comment */
+ token.type = HUBBUB_TOKEN_COMMENT;
+ token.data.comment = tokeniser->context.current_comment;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_markup_declaration_open(
+ hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '-') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_START;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ((c & ~0x20) == 'D') {
+ hubbub_inputstream_uppercase(tokeniser->input);
+ tokeniser->context.match_doctype.count = 1;
+ tokeniser->state = HUBBUB_TOKENISER_STATE_MATCH_DOCTYPE;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ tokeniser->context.current_comment.data_off = 0;
+ tokeniser->context.current_comment.len = 0;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_comment_start(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ tokeniser->context.current_comment.data_off = 0;
+ tokeniser->context.current_comment.len = 0;
+
+
+ if (c == '-') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ hubbub_inputstream_push_back(tokeniser->input, '-');
+ tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '-') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_DASH;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit comment */
+ token.type = HUBBUB_TOKEN_COMMENT;
+ token.data.comment = tokeniser->context.current_comment;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (tokeniser->context.current_comment.len == 0)
+ tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_comment_dash(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '-') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_END;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit comment */
+ token.type = HUBBUB_TOKEN_COMMENT;
+ token.data.comment = tokeniser->context.current_comment;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (tokeniser->context.current_comment.len == 0) {
+ tokeniser->context.current_comment.data_off = pos;
+ } else {
+ /* Need to do this to get length of '-' */
+ len += pos -
+ tokeniser->context.current_comment.data_off;
+ }
+
+ tokeniser->context.current_comment.len = len;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_comment_end(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '>') {
+ hubbub_token token;
+
+ /* Emit comment */
+ token.type = HUBBUB_TOKEN_COMMENT;
+ token.data.comment = tokeniser->context.current_comment;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '-') {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (tokeniser->context.current_comment.len == 0) {
+ tokeniser->context.current_comment.data_off = pos;
+ tokeniser->context.current_comment.len = len;
+ } else {
+ /* Need to do this to get length of '-' */
+ len = pos -
+ tokeniser->context.current_comment.data_off;
+ }
+
+ tokeniser->context.current_comment.len = len;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT_END;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit comment */
+ token.type = HUBBUB_TOKEN_COMMENT;
+ token.data.comment = tokeniser->context.current_comment;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ if (tokeniser->context.current_comment.len == 0) {
+ tokeniser->context.current_comment.data_off = pos;
+ } else {
+ /* Need to do this to get length of '--' */
+ len += pos -
+ tokeniser->context.current_comment.data_off;
+ }
+
+ tokeniser->context.current_comment.len = len;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_COMMENT;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_match_doctype(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (tokeniser->context.match_doctype.count == 1 &&
+ (c & ~0x20) == 'O') {
+ hubbub_inputstream_uppercase(tokeniser->input);
+ tokeniser->context.match_doctype.count++;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (tokeniser->context.match_doctype.count == 2 &&
+ (c & ~0x20) == 'C') {
+ hubbub_inputstream_uppercase(tokeniser->input);
+ tokeniser->context.match_doctype.count++;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (tokeniser->context.match_doctype.count == 3 &&
+ (c & ~0x20) == 'T') {
+ hubbub_inputstream_uppercase(tokeniser->input);
+ tokeniser->context.match_doctype.count++;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (tokeniser->context.match_doctype.count == 4 &&
+ (c & ~0x20) == 'Y') {
+ hubbub_inputstream_uppercase(tokeniser->input);
+ tokeniser->context.match_doctype.count++;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (tokeniser->context.match_doctype.count == 5 &&
+ (c & ~0x20) == 'P') {
+ hubbub_inputstream_uppercase(tokeniser->input);
+ tokeniser->context.match_doctype.count++;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (tokeniser->context.match_doctype.count == 6 &&
+ (c & ~0x20) == 'E') {
+ hubbub_inputstream_uppercase(tokeniser->input);
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DOCTYPE;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ switch (tokeniser->context.match_doctype.count) {
+ case 6: hubbub_inputstream_push_back(tokeniser->input, 'P');
+ case 5: hubbub_inputstream_push_back(tokeniser->input, 'Y');
+ case 4: hubbub_inputstream_push_back(tokeniser->input, 'T');
+ case 3: hubbub_inputstream_push_back(tokeniser->input, 'C');
+ case 2: hubbub_inputstream_push_back(tokeniser->input, 'O');
+ case 1: hubbub_inputstream_push_back(tokeniser->input, 'D');
+ }
+
+ tokeniser->context.current_comment.data_off = 0;
+ tokeniser->context.current_comment.len = 0;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_COMMENT;
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_doctype(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_BEFORE_DOCTYPE_NAME;
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_before_doctype_name(
+ hubbub_tokeniser *tokeniser)
+{
+ hubbub_doctype *cdoc = &tokeniser->context.current_doctype;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('a' <= c && c <= 'z') {
+ uint32_t pos;
+ size_t len;
+
+ hubbub_inputstream_uppercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ cdoc->name.data_off = pos;
+ cdoc->name.len = len;
+ cdoc->correct = false;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DOCTYPE_NAME;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ cdoc->name.data_off = pos;
+ cdoc->name.len = len;
+ cdoc->correct = false;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DOCTYPE_NAME;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)
+{
+ hubbub_doctype *cdoc = &tokeniser->context.current_doctype;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_AFTER_DOCTYPE_NAME;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+ token.data.doctype.correct =
+ (hubbub_inputstream_compare_range_ascii(
+ tokeniser->input,
+ token.data.doctype.name.data_off,
+ token.data.doctype.name.len,
+ "HTML", SLEN("HTML")) == 0);
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if ('a' <= c && c <= 'z') {
+ uint32_t pos;
+ size_t len;
+
+ hubbub_inputstream_uppercase(tokeniser->input);
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ cdoc->name.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ uint32_t pos;
+ size_t len;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ cdoc->name.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_after_doctype_name(hubbub_tokeniser *tokeniser)
+{
+ hubbub_doctype *cdoc = &tokeniser->context.current_doctype;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == ' ') {
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == '>') {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+ token.data.doctype.correct =
+ (hubbub_inputstream_compare_range_ascii(
+ tokeniser->input,
+ token.data.doctype.name.data_off,
+ token.data.doctype.name.len,
+ "HTML", SLEN("HTML")) == 0);
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ cdoc->correct = false;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_BOGUS_DOCTYPE;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_bogus_doctype(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '>') {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else if (c == HUBBUB_INPUTSTREAM_EOF) {
+ hubbub_token token;
+
+ /* Emit doctype */
+ token.type = HUBBUB_TOKEN_DOCTYPE;
+ token.data.doctype = tokeniser->context.current_doctype;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
+ } else {
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_consume_entity(hubbub_tokeniser *tokeniser)
+{
+ uint32_t c;
+ uint32_t pos;
+ size_t len;
+
+ if (tokeniser->context.match_entity.done_setup == false) {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ tokeniser->context.match_entity.str.data_off = pos;
+ tokeniser->context.match_entity.str.len = len;
+ tokeniser->context.match_entity.base = 0;
+ tokeniser->context.match_entity.codepoint = 0;
+ tokeniser->context.match_entity.had_data = false;
+ tokeniser->context.match_entity.return_state =
+ tokeniser->state;
+ tokeniser->context.match_entity.complete = false;
+ tokeniser->context.match_entity.done_setup = true;
+ tokeniser->context.match_entity.context = NULL;
+ tokeniser->context.match_entity.prev_len = len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ c = hubbub_inputstream_peek(tokeniser->input);
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (c == '#') {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+
+ tokeniser->context.match_entity.str.len += len;
+
+ tokeniser->state = HUBBUB_TOKENISER_STATE_NUMBERED_ENTITY;
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ tokeniser->state = HUBBUB_TOKENISER_STATE_NAMED_ENTITY;
+ }
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_numbered_entity(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tokeniser_context *ctx = &tokeniser->context;
+ uint32_t c = hubbub_inputstream_peek(tokeniser->input);
+ uint32_t pos;
+ size_t len;
+ hubbub_error error;
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ if (ctx->match_entity.base == 0) {
+ if ((c & ~0x20) == 'X') {
+ ctx->match_entity.base = 16;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ ctx->match_entity.str.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ } else {
+ ctx->match_entity.base = 10;
+ }
+ }
+
+ while ((c = hubbub_inputstream_peek(tokeniser->input)) !=
+ HUBBUB_INPUTSTREAM_EOF &&
+ c != HUBBUB_INPUTSTREAM_OOD) {
+ if (ctx->match_entity.base == 10 &&
+ ('0' <= c && c <= '9')) {
+ ctx->match_entity.had_data = true;
+
+ ctx->match_entity.codepoint =
+ ctx->match_entity.codepoint * 10 + (c - '0');
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ ctx->match_entity.str.len += len;
+ } else if (ctx->match_entity.base == 16 &&
+ (('0' <= c && c <= '9') ||
+ ('A' <= (c & ~0x20) &&
+ (c & ~0x20) <= 'F'))) {
+ ctx->match_entity.had_data = true;
+
+ ctx->match_entity.codepoint *= 16;
+
+ if ('0' <= c && c <= '9') {
+ ctx->match_entity.codepoint += (c - '0');
+ } else {
+ ctx->match_entity.codepoint +=
+ ((c & ~0x20) - 'A' + 10);
+ }
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ ctx->match_entity.str.len += len;
+ } else {
+ break;
+ }
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ /* Eat trailing semicolon, if any */
+ if (c == ';') {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+ ctx->match_entity.str.len += len;
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ /* Rewind the inputstream to start of matched sequence */
+ hubbub_inputstream_rewind(tokeniser->input,
+ ctx->match_entity.str.len);
+
+ if (ctx->match_entity.had_data) {
+ /* Had data, so calculate final codepoint */
+ if (0x80 <= ctx->match_entity.codepoint &&
+ ctx->match_entity.codepoint <= 0x9F) {
+ ctx->match_entity.codepoint =
+ cp1252Table[ctx->match_entity.codepoint -
+ 0x80];
+ } else if (ctx->match_entity.codepoint == 0 ||
+ ctx->match_entity.codepoint > 0x10FFFF) {
+ ctx->match_entity.codepoint = 0xFFFD;
+ }
+
+ /* And replace the matched range with it */
+ error = hubbub_inputstream_replace_range(tokeniser->input,
+ ctx->match_entity.str.data_off,
+ ctx->match_entity.str.len,
+ ctx->match_entity.codepoint);
+ if (error != HUBBUB_OK) {
+ /** \todo handle memory exhaustion */
+ }
+ }
+
+ /* Reset for next time */
+ ctx->match_entity.done_setup = false;
+
+ /* Flag completion */
+ ctx->match_entity.complete = true;
+
+ /* And back to the state we were entered in */
+ tokeniser->state = ctx->match_entity.return_state;
+
+ return true;
+}
+
+bool hubbub_tokeniser_handle_named_entity(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tokeniser_context *ctx = &tokeniser->context;
+ uint32_t c;
+ uint32_t pos;
+ size_t len;
+ hubbub_error error;
+
+ while ((c = hubbub_inputstream_peek(tokeniser->input)) !=
+ HUBBUB_INPUTSTREAM_EOF &&
+ c != HUBBUB_INPUTSTREAM_OOD) {
+ uint32_t cp;
+
+ if (c > 0x7F) {
+ /* Entity names are ASCII only */
+ break;
+ }
+
+ error = hubbub_entities_search_step((uint8_t) c,
+ &cp,
+ &ctx->match_entity.context);
+ if (error == HUBBUB_OK) {
+ /* Had a match - store it for later */
+ ctx->match_entity.codepoint = cp;
+
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ ctx->match_entity.str.len += len;
+
+ /* And cache length, for replacement */
+ ctx->match_entity.prev_len =
+ ctx->match_entity.str.len;
+ } else if (error == HUBBUB_INVALID) {
+ /* No further matches - use last found */
+ break;
+ } else {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input,
+ &len);
+ ctx->match_entity.str.len += len;
+ }
+
+ hubbub_inputstream_advance(tokeniser->input);
+ }
+
+ if (c == HUBBUB_INPUTSTREAM_OOD)
+ return false;
+
+ /* Eat trailing semicolon, if any */
+ if (ctx->match_entity.codepoint != 0 && c == ';' &&
+ ctx->match_entity.prev_len ==
+ ctx->match_entity.str.len) {
+ pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
+ ctx->match_entity.prev_len += len;
+ }
+
+ /* Rewind the inputstream to start of processed sequence */
+ hubbub_inputstream_rewind(tokeniser->input,
+ ctx->match_entity.str.len);
+
+ /* Now, replace range, if we found a named entity */
+ if (ctx->match_entity.codepoint != 0) {
+ error = hubbub_inputstream_replace_range(tokeniser->input,
+ ctx->match_entity.str.data_off,
+ ctx->match_entity.prev_len,
+ ctx->match_entity.codepoint);
+ if (error != HUBBUB_OK) {
+ /** \todo handle memory exhaustion */
+ }
+ }
+
+ /* Reset for next time */
+ ctx->match_entity.done_setup = false;
+
+ /* Flag completion */
+ ctx->match_entity.complete = true;
+
+ /* And back to the state from whence we came */
+ tokeniser->state = ctx->match_entity.return_state;
+
+ return true;
+}
+
+/**
+ * Handle input stream buffer moving
+ *
+ * \param buffer Pointer to buffer
+ * \param len Length of data in buffer (bytes)
+ * \param pw Pointer to our context
+ */
+void hubbub_tokeniser_buffer_moved_handler(const uint8_t *buffer,
+ size_t len, void *pw)
+{
+ hubbub_tokeniser *tok = (hubbub_tokeniser *) pw;
+
+ tok->input_buffer = buffer;
+ tok->input_buffer_len = len;
+
+ if (tok->buffer_handler != NULL)
+ tok->buffer_handler(buffer, len, tok->buffer_pw);
+}
+
+/**
+ * Emit a token, performing sanity checks if necessary
+ *
+ * \param tokeniser Tokeniser instance
+ * \param token Token to emit
+ */
+void hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser,
+ hubbub_token *token)
+{
+ if (tokeniser == NULL || token == NULL)
+ return;
+
+ /* Nothing to do if there's no registered handler */
+ if (tokeniser->token_handler == NULL)
+ return;
+
+ if (token->type == HUBBUB_TOKEN_START_TAG ||
+ token->type == HUBBUB_TOKEN_END_TAG) {
+ uint32_t i, j;
+ uint32_t n_attributes = token->data.tag.n_attributes;
+ hubbub_attribute *attrs =
+ token->data.tag.attributes;
+
+ /* Discard duplicate attributes */
+ for (i = 0; i < n_attributes; i++) {
+ for (j = 0; j < n_attributes; j++) {
+ uint32_t move;
+
+ if (j == i ||
+ attrs[i].name.len !=
+ attrs[j].name.len ||
+ hubbub_inputstream_compare_range_cs(
+ tokeniser->input,
+ attrs[i].name.data_off,
+ attrs[j].name.data_off,
+ attrs[i].name.len) != 0) {
+ /* Attributes don't match */
+ continue;
+ }
+
+ /* Calculate amount to move */
+ move = (n_attributes - 1 -
+ ((i < j) ? j : i)) *
+ sizeof(hubbub_attribute);
+
+ if (move > 0) {
+ memmove((i < j) ? &attrs[j]
+ : &attrs[i],
+ (i < j) ? &attrs[j+1]
+ : &attrs[i+1],
+ move);
+ }
+
+ /* And reduce the number of attributes */
+ n_attributes--;
+ }
+ }
+
+ token->data.tag.n_attributes = n_attributes;
+ }
+
+ /* Finally, emit token */
+ tokeniser->token_handler(token, tokeniser->token_pw);
+}
diff --git a/src/tokeniser/tokeniser.h b/src/tokeniser/tokeniser.h
new file mode 100644
index 0000000..20bbe20
--- /dev/null
+++ b/src/tokeniser/tokeniser.h
@@ -0,0 +1,71 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_tokeniser_tokeniser_h_
+#define hubbub_tokeniser_tokeniser_h_
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+#include <hubbub/types.h>
+
+#include "input/inputstream.h"
+
+typedef struct hubbub_tokeniser hubbub_tokeniser;
+
+/**
+ * Hubbub tokeniser option types
+ */
+typedef enum hubbub_tokeniser_opttype {
+ HUBBUB_TOKENISER_TOKEN_HANDLER,
+ HUBBUB_TOKENISER_BUFFER_HANDLER,
+ HUBBUB_TOKENISER_ERROR_HANDLER,
+ HUBBUB_TOKENISER_CONTENT_MODEL,
+} hubbub_tokeniser_opttype;
+
+/**
+ * Hubbub tokeniser option parameters
+ */
+typedef union hubbub_tokeniser_optparams {
+ struct {
+ hubbub_token_handler handler;
+ void *pw;
+ } token_handler;
+
+ struct {
+ hubbub_buffer_handler handler;
+ void *pw;
+ } buffer_handler;
+
+ struct {
+ hubbub_error_handler handler;
+ void *pw;
+ } error_handler;
+
+ struct {
+ hubbub_content_model model;
+ } content_model;
+} hubbub_tokeniser_optparams;
+
+/* Create a hubbub tokeniser */
+hubbub_tokeniser *hubbub_tokeniser_create(hubbub_inputstream *input,
+ hubbub_alloc alloc, void *pw);
+/* Destroy a hubbub tokeniser */
+void hubbub_tokeniser_destroy(hubbub_tokeniser *tokeniser);
+
+/* Configure a hubbub tokeniser */
+hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser,
+ hubbub_tokeniser_opttype type,
+ hubbub_tokeniser_optparams *params);
+
+/* Process remaining data in the input stream */
+hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser);
+
+#endif
+
diff --git a/src/utils/Makefile b/src/utils/Makefile
new file mode 100644
index 0000000..59b5512
--- /dev/null
+++ b/src/utils/Makefile
@@ -0,0 +1,53 @@
+# Makefile for libhubbub
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Manipulate include paths
+CFLAGS += -I$(CURDIR)
+
+# Objects
+OBJS = dict errors utf8
+
+.PHONY: clean debug distclean export release setup test
+
+# Targets
+release: $(addprefix ../Release/, $(addsuffix .o, $(OBJS)))
+
+debug: $(addprefix ../Debug/, $(addsuffix .o, $(OBJS)))
+
+clean:
+ -@${RM} ${RMFLAGS} $(addprefix ../Release/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(addprefix ../Debug/, $(addsuffix .o, ${OBJS}))
+
+distclean:
+
+setup:
+
+export:
+
+test:
+
+# Pattern rules
+../Release/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $<
+
+../Debug/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@ $<
diff --git a/src/utils/dict.c b/src/utils/dict.c
new file mode 100644
index 0000000..f50ffab
--- /dev/null
+++ b/src/utils/dict.c
@@ -0,0 +1,219 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <stdbool.h>
+
+#include "utils/dict.h"
+
+/** Node in a dictionary tree */
+typedef struct hubbub_dict_node {
+ uint8_t split; /**< Data to split on */
+ struct hubbub_dict_node *lt; /**< Subtree for data less than
+ * split */
+ struct hubbub_dict_node *eq; /**< Subtree for data equal to split
+ * If split == '\0', this stores the
+ * pointer to the actual data, not a
+ * subtree */
+ struct hubbub_dict_node *gt; /**< Subtree for data greater than
+ * split */
+} hubbub_dict_node;
+
+/** Dictionary object */
+struct hubbub_dict {
+ hubbub_dict_node *dict; /**< Root of tree */
+
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *pw; /**< Pointer to client data */
+};
+
+static void hubbub_dict_destroy_internal(hubbub_dict *dict,
+ hubbub_dict_node *root);
+static hubbub_dict_node *hubbub_dict_insert_internal(hubbub_dict *dict,
+ hubbub_dict_node *parent, const char *key,
+ const void *value);
+
+
+/**
+ * Create a dictionary
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data (may be NULL)
+ * \return Pointer to dictionary instance, or NULL on error
+ */
+hubbub_dict *hubbub_dict_create(hubbub_alloc alloc, void *pw)
+{
+ hubbub_dict *dict;
+
+ if (alloc == NULL)
+ return NULL;
+
+ dict = alloc(NULL, sizeof(hubbub_dict), pw);
+ if (dict == NULL)
+ return NULL;
+
+ dict->dict = NULL;
+
+ dict->alloc = alloc;
+ dict->pw = pw;
+
+ return dict;
+}
+
+/**
+ * Destroy a dictionary
+ *
+ * \param dict Dictionary to destroy
+ */
+void hubbub_dict_destroy(hubbub_dict *dict)
+{
+ if (dict == NULL)
+ return;
+
+ hubbub_dict_destroy_internal(dict, dict->dict);
+
+ dict->alloc(dict, 0, dict->pw);
+}
+
+/**
+ * Helper routine for dictionary destruction
+ *
+ * \param dict Dictionary being destroyed
+ * \param root Root node of dictionary (sub)tree to destroy
+ */
+void hubbub_dict_destroy_internal(hubbub_dict *dict, hubbub_dict_node *root)
+{
+ if (root == NULL)
+ return;
+
+ hubbub_dict_destroy_internal(dict, root->lt);
+ if (root->split != '\0')
+ hubbub_dict_destroy_internal(dict, root->eq);
+ hubbub_dict_destroy_internal(dict, root->gt);
+
+ dict->alloc(root, 0, dict->pw);
+}
+
+/**
+ * Insert a key-value pair into a dictionary
+ *
+ * \param dict Dictionary to insert into
+ * \param key Key string
+ * \param value Value to associate with key (may be NULL)
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+hubbub_error hubbub_dict_insert(hubbub_dict *dict, const char *key,
+ const void *value)
+{
+ if (dict == NULL || key == NULL)
+ return HUBBUB_BADPARM;
+
+ dict->dict = hubbub_dict_insert_internal(dict, dict->dict,
+ key, value);
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Helper routine for insertion into dictionary
+ *
+ * \param dict Dictionary being inserted into
+ * \param parent Parent node of subtree to insert into
+ * \param key Key string
+ * \param value Value to associate with key
+ * \return Pointer to root of tree created
+ */
+hubbub_dict_node *hubbub_dict_insert_internal(hubbub_dict *dict,
+ hubbub_dict_node *parent, const char *key, const void *value)
+{
+ if (parent == NULL) {
+ parent = dict->alloc(NULL,
+ sizeof(hubbub_dict_node), dict->pw);
+ if (parent == NULL)
+ return NULL;
+ parent->split = (uint8_t) key[0];
+ parent->lt = parent->eq = parent->gt = NULL;
+ }
+
+ if ((uint8_t) key[0] < parent->split) {
+ parent->lt = hubbub_dict_insert_internal(dict,
+ parent->lt, key, value);
+ } else if ((uint8_t) key[0] == parent->split) {
+ if (key[0] == '\0') {
+ parent->eq = (hubbub_dict_node *) value;
+ } else {
+ parent->eq = hubbub_dict_insert_internal(dict,
+ parent->eq, ++key, value);
+ }
+ } else {
+ parent->gt = hubbub_dict_insert_internal(dict,
+ parent->gt, key, value);
+ }
+
+ return parent;
+}
+
+/**
+ * Step-wise search for a key in a dictionary
+ *
+ * \param dict Dictionary to search
+ * \param c Character to look for
+ * \param result Pointer to location for result
+ * \param context Pointer to location for search context
+ * \return HUBBUB_OK if key found,
+ * HUBBUB_NEEDDATA if more steps are required
+ * HUBBUB_INVALID if nothing matches
+ *
+ * The value pointed to by ::context must be NULL for the first call.
+ * Thereafter, pass in the same value as returned by the previous call.
+ * The context is opaque to the caller and should not be inspected.
+ *
+ * The location pointed to by ::result will be set to NULL unless a match
+ * is found.
+ */
+hubbub_error hubbub_dict_search_step(hubbub_dict *dict, uint8_t c,
+ const void **result, void **context)
+{
+ bool match = false;
+ hubbub_dict_node *p;
+
+ if (dict == NULL || result == NULL || context == NULL)
+ return HUBBUB_BADPARM;
+
+ *result = NULL;
+
+ if (*context == NULL) {
+ p = dict->dict;
+ } else {
+ p = (hubbub_dict_node *) *context;
+ }
+
+ while (p != NULL) {
+ if (c < p->split) {
+ p = p->lt;
+ } else if (c == p->split) {
+ if (p->split == '\0') {
+ match = true;
+ p = NULL;
+ } else if (p->eq != NULL && p->eq->split == '\0') {
+ match = true;
+ *result = (const void *) p->eq->eq;
+ p = p->eq;
+ } else {
+ p = p->eq;
+ }
+
+ break;
+ } else {
+ p = p->gt;
+ }
+ }
+
+ *context = (void *) p;
+
+ return (match) ? HUBBUB_OK :
+ (p == NULL) ? HUBBUB_INVALID : HUBBUB_NEEDDATA;
+}
diff --git a/src/utils/dict.h b/src/utils/dict.h
new file mode 100644
index 0000000..2cde01d
--- /dev/null
+++ b/src/utils/dict.h
@@ -0,0 +1,31 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_utils_dict_h_
+#define hubbub_utils_dict_h_
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/hubbub.h>
+
+typedef struct hubbub_dict hubbub_dict;
+
+/* Create a dictionary */
+hubbub_dict *hubbub_dict_create(hubbub_alloc alloc, void *pw);
+/* Destroy a dictionary */
+void hubbub_dict_destroy(hubbub_dict *dict);
+
+/* Insert a key-value pair into a dictionary */
+hubbub_error hubbub_dict_insert(hubbub_dict *dict, const char *key,
+ const void *value);
+
+/* Step-wise search for a key in a dictionary */
+hubbub_error hubbub_dict_search_step(hubbub_dict *dict, uint8_t c,
+ const void **result, void **context);
+
+#endif
diff --git a/src/utils/errors.c b/src/utils/errors.c
new file mode 100644
index 0000000..e57ba6a
--- /dev/null
+++ b/src/utils/errors.c
@@ -0,0 +1,70 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <string.h>
+
+#include <hubbub/errors.h>
+
+/**
+ * Convert a hubbub error code to a string
+ *
+ * \param error The error code to convert
+ * \return Pointer to string representation of error, or NULL if unknown.
+ */
+const char *hubbub_error_to_string(hubbub_error error)
+{
+ const char *result = NULL;
+
+ switch (error) {
+ case HUBBUB_OK:
+ result = "No error";
+ break;
+ case HUBBUB_NOMEM:
+ result = "Insufficient memory";
+ break;
+ case HUBBUB_BADPARM:
+ result = "Bad parameter";
+ break;
+ case HUBBUB_INVALID:
+ result = "Invalid input";
+ break;
+ case HUBBUB_FILENOTFOUND:
+ result = "File not found";
+ break;
+ case HUBBUB_NEEDDATA:
+ result = "Insufficient data";
+ break;
+ }
+
+ return result;
+}
+
+/**
+ * Convert a string representation of an error name to a hubbub error code
+ *
+ * \param str String containing error name
+ * \param len Length of string (bytes)
+ * \return Hubbub error code, or HUBBUB_OK if unknown
+ */
+hubbub_error hubbub_error_from_string(const char *str, size_t len)
+{
+ if (strncmp(str, "HUBBUB_OK", len) == 0) {
+ return HUBBUB_OK;
+ } else if (strncmp(str, "HUBBUB_NOMEM", len) == 0) {
+ return HUBBUB_NOMEM;
+ } else if (strncmp(str, "HUBBUB_BADPARM", len) == 0) {
+ return HUBBUB_BADPARM;
+ } else if (strncmp(str, "HUBBUB_INVALID", len) == 0) {
+ return HUBBUB_INVALID;
+ } else if (strncmp(str, "HUBBUB_FILENOTFOUND", len) == 0) {
+ return HUBBUB_FILENOTFOUND;
+ } else if (strncmp(str, "HUBBUB_NEEDDATA", len) == 0) {
+ return HUBBUB_NEEDDATA;
+ }
+
+ return HUBBUB_OK;
+}
diff --git a/src/utils/utf8.c b/src/utils/utf8.c
new file mode 100644
index 0000000..062d629
--- /dev/null
+++ b/src/utils/utf8.c
@@ -0,0 +1,368 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+/** \file
+ * UTF-8 manipulation functions (implementation).
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "utils/utf8.h"
+
+/** Number of continuation bytes for a given start byte */
+static const uint8_t numContinuations[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
+};
+
+/**
+ * Convert a UTF-8 multibyte sequence into a single UCS4 character
+ *
+ * Encoding of UCS values outside the UTF-16 plane has been removed from
+ * RFC3629. This function conforms to RFC2279, however.
+ *
+ * \param s The sequence to process
+ * \param len Length of sequence
+ * \param ucs4 Pointer to location to receive UCS4 character (host endian)
+ * \param clen Pointer to location to receive byte length of UTF-8 sequence
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+inline hubbub_error hubbub_utf8_to_ucs4(const uint8_t *s, size_t len,
+ uint32_t *ucs4, size_t *clen)
+{
+ if (s == NULL || ucs4 == NULL || clen == NULL)
+ return HUBBUB_BADPARM;
+
+ if (len == 0)
+ return HUBBUB_NEEDDATA;
+
+ if (*s < 0x80) {
+ *ucs4 = *s;
+ *clen = 1;
+ } else if ((*s & 0xE0) == 0xC0) {
+ if (len < 2)
+ return HUBBUB_NEEDDATA;
+ else if ((*(s+1) & 0xC0) != 0x80)
+ return HUBBUB_INVALID;
+ else {
+ *ucs4 = ((*s & 0x1F) << 6) | (*(s+1) & 0x3F);
+ *clen = 2;
+ }
+ } else if ((*s & 0xF0) == 0xE0) {
+ if (len < 3)
+ return HUBBUB_NEEDDATA;
+ else if ((*(s+1) & 0xC0) != 0x80 ||
+ (*(s+2) & 0xC0) != 0x80)
+ return HUBBUB_INVALID;
+ else {
+ *ucs4 = ((*s & 0x0F) << 12) |
+ ((*(s+1) & 0x3F) << 6) |
+ (*(s+2) & 0x3F);
+ *clen = 3;
+ }
+ } else if ((*s & 0xF8) == 0xF0) {
+ if (len < 4)
+ return HUBBUB_NEEDDATA;
+ else if ((*(s+1) & 0xC0) != 0x80 ||
+ (*(s+2) & 0xC0) != 0x80 ||
+ (*(s+3) & 0xC0) != 0x80)
+ return HUBBUB_INVALID;
+ else {
+ *ucs4 = ((*s & 0x0F) << 18) |
+ ((*(s+1) & 0x3F) << 12) |
+ ((*(s+2) & 0x3F) << 6) |
+ (*(s+3) & 0x3F);
+ *clen = 4;
+ }
+ } else if ((*s & 0xFC) == 0xF8) {
+ if (len < 5)
+ return HUBBUB_NEEDDATA;
+ else if ((*(s+1) & 0xC0) != 0x80 ||
+ (*(s+2) & 0xC0) != 0x80 ||
+ (*(s+3) & 0xC0) != 0x80 ||
+ (*(s+4) & 0xC0) != 0x80)
+ return HUBBUB_INVALID;
+ else {
+ *ucs4 = ((*s & 0x0F) << 24) |
+ ((*(s+1) & 0x3F) << 18) |
+ ((*(s+2) & 0x3F) << 12) |
+ ((*(s+3) & 0x3F) << 6) |
+ (*(s+4) & 0x3F);
+ *clen = 5;
+ }
+ } else if ((*s & 0xFE) == 0xFC) {
+ if (len < 6)
+ return HUBBUB_NEEDDATA;
+ else if ((*(s+1) & 0xC0) != 0x80 ||
+ (*(s+2) & 0xC0) != 0x80 ||
+ (*(s+3) & 0xC0) != 0x80 ||
+ (*(s+4) & 0xC0) != 0x80 ||
+ (*(s+5) & 0xC0) != 0x80)
+ return HUBBUB_INVALID;
+ else {
+ *ucs4 = ((*s & 0x0F) << 28) |
+ ((*(s+1) & 0x3F) << 24) |
+ ((*(s+2) & 0x3F) << 18) |
+ ((*(s+3) & 0x3F) << 12) |
+ ((*(s+4) & 0x3F) << 6) |
+ (*(s+5) & 0x3F);
+ *clen = 6;
+ }
+ } else {
+ return HUBBUB_INVALID;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Convert a single UCS4 character into a UTF-8 multibyte sequence
+ *
+ * Encoding of UCS values outside the UTF-16 plane has been removed from
+ * RFC3629. This function conforms to RFC2279, however.
+ *
+ * \param ucs4 The character to process (0 <= c <= 0x7FFFFFFF) (host endian)
+ * \param s Pointer to 6 byte long output buffer
+ * \param len Pointer to location to receive length of multibyte sequence
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+inline hubbub_error hubbub_utf8_from_ucs4(uint32_t ucs4, uint8_t *s,
+ size_t *len)
+{
+ uint32_t l = 0;
+
+ if (s == NULL || len == NULL)
+ return HUBBUB_BADPARM;
+ else if (ucs4 < 0x80) {
+ *s = (uint8_t) ucs4;
+ l = 1;
+ } else if (ucs4 < 0x800) {
+ *s = 0xC0 | ((ucs4 >> 6) & 0x1F);
+ *(s+1) = 0x80 | (ucs4 & 0x3F);
+ l = 2;
+ } else if (ucs4 < 0x10000) {
+ *s = 0xE0 | ((ucs4 >> 12) & 0xF);
+ *(s+1) = 0x80 | ((ucs4 >> 6) & 0x3F);
+ *(s+2) = 0x80 | (ucs4 & 0x3F);
+ l = 3;
+ } else if (ucs4 < 0x200000) {
+ *s = 0xF0 | ((ucs4 >> 18) & 0x7);
+ *(s+1) = 0x80 | ((ucs4 >> 12) & 0x3F);
+ *(s+2) = 0x80 | ((ucs4 >> 6) & 0x3F);
+ *(s+3) = 0x80 | (ucs4 & 0x3F);
+ l = 4;
+ } else if (ucs4 < 0x4000000) {
+ *s = 0xF8 | ((ucs4 >> 24) & 0x3);
+ *(s+1) = 0x80 | ((ucs4 >> 18) & 0x3F);
+ *(s+2) = 0x80 | ((ucs4 >> 12) & 0x3F);
+ *(s+3) = 0x80 | ((ucs4 >> 6) & 0x3F);
+ *(s+4) = 0x80 | (ucs4 & 0x3F);
+ l = 5;
+ } else if (ucs4 <= 0x7FFFFFFF) {
+ *s = 0xFC | ((ucs4 >> 30) & 0x1);
+ *(s+1) = 0x80 | ((ucs4 >> 24) & 0x3F);
+ *(s+2) = 0x80 | ((ucs4 >> 18) & 0x3F);
+ *(s+3) = 0x80 | ((ucs4 >> 12) & 0x3F);
+ *(s+4) = 0x80 | ((ucs4 >> 6) & 0x3F);
+ *(s+5) = 0x80 | (ucs4 & 0x3F);
+ l = 6;
+ } else {
+ return HUBBUB_INVALID;
+ }
+
+ *len = l;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Calculate the length (in characters) of a bounded UTF-8 string
+ *
+ * \param s The string
+ * \param max Maximum length
+ * \param len Pointer to location to receive length of string
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+inline hubbub_error hubbub_utf8_length(const uint8_t *s, size_t max,
+ size_t *len)
+{
+ const uint8_t *end = s + max;
+ int l = 0;
+
+ if (s == NULL || len == NULL)
+ return HUBBUB_BADPARM;
+
+ while (s < end) {
+ if ((*s & 0x80) == 0x00)
+ s += 1;
+ else if ((*s & 0xE0) == 0xC0)
+ s += 2;
+ else if ((*s & 0xF0) == 0xE0)
+ s += 3;
+ else if ((*s & 0xF8) == 0xF0)
+ s += 4;
+ else if ((*s & 0xFC) == 0xF8)
+ s += 5;
+ else if ((*s & 0xFE) == 0xFC)
+ s += 6;
+ else
+ return HUBBUB_INVALID;
+ l++;
+ }
+
+ *len = l;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Calculate the length (in bytes) of a UTF-8 character
+ *
+ * \param s Pointer to start of character
+ * \param len Pointer to location to receive length
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+inline hubbub_error hubbub_utf8_char_byte_length(const uint8_t *s,
+ size_t *len)
+{
+ if (s == NULL || len == NULL)
+ return HUBBUB_BADPARM;
+
+ *len = numContinuations[s[0]] + 1 /* Start byte */;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Find previous legal UTF-8 char in string
+ *
+ * \param s The string
+ * \param off Offset in the string to start at
+ * \param prevoff Pointer to location to receive offset of first byte of
+ * previous legal character
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+inline hubbub_error hubbub_utf8_prev(const uint8_t *s, uint32_t off,
+ uint32_t *prevoff)
+{
+ if (s == NULL || prevoff == NULL)
+ return HUBBUB_BADPARM;
+
+ while (off != 0 && (s[--off] & 0xC0) == 0x80)
+ /* do nothing */;
+
+ *prevoff = off;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Find next legal UTF-8 char in string
+ *
+ * \param s The string (assumed valid)
+ * \param len Maximum offset in string
+ * \param off Offset in the string to start at
+ * \param nextoff Pointer to location to receive offset of first byte of
+ * next legal character
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+inline hubbub_error hubbub_utf8_next(const uint8_t *s, uint32_t len,
+ uint32_t off, uint32_t *nextoff)
+{
+ if (s == NULL || off >= len || nextoff == NULL)
+ return HUBBUB_BADPARM;
+
+ /* Skip current start byte (if present - may be mid-sequence) */
+ if (s[off] < 0x80 || (s[off] & 0xC0) == 0xC0)
+ off++;
+
+ while (off < len && (s[off] & 0xC0) == 0x80)
+ off++;
+
+ *nextoff = off;
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Find next legal UTF-8 char in string
+ *
+ * \param s The string (assumed to be of dubious validity)
+ * \param len Maximum offset in string
+ * \param off Offset in the string to start at
+ * \param nextoff Pointer to location to receive offset of first byte of
+ * next legal character
+ * \return HUBBUB_OK on success, appropriate error otherwise
+ */
+inline hubbub_error hubbub_utf8_next_paranoid(const uint8_t *s, uint32_t len,
+ uint32_t off, uint32_t *nextoff)
+{
+ bool valid;
+
+ if (s == NULL || off >= len || nextoff == NULL)
+ return HUBBUB_BADPARM;
+
+ /* Skip current start byte (if present - may be mid-sequence) */
+ if (s[off] < 0x80 || (s[off] & 0xC0) == 0xC0)
+ off++;
+
+ while (1) {
+ /* Find next possible start byte */
+ while (off < len && (s[off] & 0xC0) == 0x80)
+ off++;
+
+ /* Ran off end of data */
+ if (off == len || off + numContinuations[s[off]] >= len)
+ return HUBBUB_NEEDDATA;
+
+ /* Found if start byte is ascii,
+ * or next n bytes are valid continuations */
+ valid = true;
+
+ switch (numContinuations[s[off]]) {
+ case 5:
+ valid &= ((s[off + 5] & 0xC0) == 0x80);
+ case 4:
+ valid &= ((s[off + 4] & 0xC0) == 0x80);
+ case 3:
+ valid &= ((s[off + 3] & 0xC0) == 0x80);
+ case 2:
+ valid &= ((s[off + 2] & 0xC0) == 0x80);
+ case 1:
+ valid &= ((s[off + 1] & 0xC0) == 0x80);
+ case 0:
+ valid &= (s[off + 0] < 0x80);
+ }
+
+ if (valid)
+ break;
+
+ /* Otherwise, skip this (invalid) start byte and try again */
+ off++;
+ }
+
+ *nextoff = off;
+
+ return HUBBUB_OK;
+}
+
diff --git a/src/utils/utf8.h b/src/utils/utf8.h
new file mode 100644
index 0000000..8836338
--- /dev/null
+++ b/src/utils/utf8.h
@@ -0,0 +1,38 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+/** \file
+ * UTF-8 manipulation functions (interface).
+ */
+
+#ifndef hubbub_utils_utf8_h_
+#define hubbub_utils_utf8_h
+
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+
+inline hubbub_error hubbub_utf8_to_ucs4(const uint8_t *s, size_t len,
+ uint32_t *ucs4, size_t *clen);
+inline hubbub_error hubbub_utf8_from_ucs4(uint32_t ucs4, uint8_t *s,
+ size_t *len);
+
+inline hubbub_error hubbub_utf8_length(const uint8_t *s, size_t max,
+ size_t *len);
+inline hubbub_error hubbub_utf8_char_byte_length(const uint8_t *s,
+ size_t *len);
+
+inline hubbub_error hubbub_utf8_prev(const uint8_t *s, uint32_t off,
+ uint32_t *prevoff);
+inline hubbub_error hubbub_utf8_next(const uint8_t *s, uint32_t len,
+ uint32_t off, uint32_t *nextoff);
+
+inline hubbub_error hubbub_utf8_next_paranoid(const uint8_t *s, uint32_t len,
+ uint32_t off, uint32_t *nextoff);
+
+#endif
+
diff --git a/src/utils/utils.h b/src/utils/utils.h
new file mode 100644
index 0000000..a1e0230
--- /dev/null
+++ b/src/utils/utils.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_utils_h_
+#define hubbub_utils_h_
+
+#ifndef max
+#define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+#ifndef min
+#define min(a,b) ((a)<(b)?(a):(b))
+#endif
+
+#ifndef SLEN
+/* Calculate length of a string constant */
+#define SLEN(s) (sizeof((s)) - 1) /* -1 for '\0' */
+#endif
+
+#ifndef UNUSED
+#define UNUSED(x) ((x)=(x))
+#endif
+
+#endif
diff --git a/test/INDEX b/test/INDEX
new file mode 100644
index 0000000..100dd21
--- /dev/null
+++ b/test/INDEX
@@ -0,0 +1,15 @@
+# Index for libhubbub testcases
+#
+# Test Description DataDir
+
+aliases Encoding alias handling
+cscodec Charset codec implementation cscodec
+csdetect Charset detection csdetect
+dict Generic string dictionary
+entities Named entity dictionary
+filter Input stream filtering
+hubbub Library initialisation/finalisation
+inputstream Buffered input stream html
+parser Public parser API html
+tokeniser HTML tokeniser html
+tokeniser2 HTML tokeniser (again) tokeniser2 \ No newline at end of file
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..ef50365
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,63 @@
+# Makefile for Hubbub testcases
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Extend toolchain settings
+# We require the presence of libjson -- http://oss.metaparadigm.com/json-c/
+CFLAGS += -I${TOP}/src/ -I$(CURDIR) \
+ `${PKGCONFIG} ${PKGCONFIGFLAGS} --cflags json`
+LDFLAGS += `${PKGCONFIG} ${PKGCONFIGFLAGS} --libs json`
+
+# Release output
+RELEASE =
+
+# Debug output
+DEBUG =
+
+# Objects
+OBJS = aliases cscodec csdetect dict entities filter hubbub \
+ inputstream parser tokeniser tokeniser2
+OBJS += regression/cscodec-segv regression/filter-segv
+
+.PHONY: clean debug export release setup test
+
+# Targets
+release:
+
+debug:
+
+clean:
+ -@${RM} ${RMFLAGS} $(addsuffix ${EXEEXT}, $(OBJS))
+
+distclean:
+ -@${RM} ${RMFLAGS} log
+
+setup:
+
+export:
+
+test: $(OBJS)
+ @${PERL} testrunner.pl ${EXEEXT}
+
+# Pattern rules
+%: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@.o $<
+ @${LD} -g -o $@ $@.o ${LDFLAGS} -lhubbub-debug
+ @${RM} ${RMFLAGS} $@.o
diff --git a/test/README b/test/README
new file mode 100644
index 0000000..e4a895b
--- /dev/null
+++ b/test/README
@@ -0,0 +1,84 @@
+Hubbub testcases
+================
+
+Testcases for hubbub are self-contained binaries which test various parts
+of the hubbub library. These may make use of external data files to drive
+the testing.
+
+Testcase command lines
+----------------------
+
+Testcase command lines are in a unified format, thus:
+
+ <aliases_file> [ <data_file> ]
+
+The aliases file parameter will always be specified (as it is required for
+the library to work at all).
+
+The data file parameter is optional and may be provided on a test-by-test
+basis.
+
+Testcase output
+---------------
+
+Testcases may output anything at all to stdout. The final line of the
+output must begin with either PASS or FAIL (case sensitive), indicating
+the success status of the test.
+
+Test Index
+----------
+
+In the test sources directory, is a file, named INDEX, which provides an
+index of all available test binaries. Any new test applications should be
+added to this index as they are created.
+
+The test index file format is as follows:
+
+ file = *line
+
+ line = ( entry / comment / blank ) LF
+
+ entry = testname 1*HTAB description [ 1*HTAB datadir ]
+ comment = "#" *non-newline
+ blank = 0<OCTET>
+
+ testname = 1*non-reserved
+ description = 1*non-reserved
+ datadir = 1*non-reserved
+
+ non-newline = VCHAR / WSP
+ non-reserved = VCHAR / SP
+
+Each entry contains a mandatory binary name and description followed by
+an optional data directory specifier. The data directory specifier is
+used to state the name of the directory containing data files for the
+test name. This directory will be searched for within the "data"
+directory in the source tree.
+
+If a data directory is specified, the test binary will be invoked for
+each data file listed within the data directory INDEX, passing the
+filename as the second parameter (<data_file>, above).
+
+Data Index
+----------
+
+Each test data directory contains a file, named INDEX, which provides an
+index of all available test data files.
+
+The data index file format is as follows:
+
+ file = *line
+
+ line = ( entry / comment / blank ) LF
+
+ entry = dataname 1*HTAB description
+ comment = "#" *non-newline
+ blank = 0<OCTET>
+
+ dataname = 1*non-reserved
+ description = 1*non-reserved
+
+ non-newline = VCHAR / WSP
+ non-reserved = VCHAR / SP
+
+Each entry contains a mandatory data file name and description.
diff --git a/test/aliases.c b/test/aliases.c
new file mode 100644
index 0000000..1cbf2a4
--- /dev/null
+++ b/test/aliases.c
@@ -0,0 +1,61 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "charset/aliases.h"
+
+#include "testutils.h"
+
+extern void hubbub_aliases_dump(void);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main (int argc, char **argv)
+{
+ hubbub_aliases_canon *c;
+
+ if (argc != 2) {
+ printf("Usage: %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ hubbub_aliases_create(argv[1], myrealloc, NULL);
+
+ hubbub_aliases_dump();
+
+ c = hubbub_alias_canonicalise("moose", 5);
+ if (c) {
+ printf("FAIL - found invalid encoding 'moose'\n");
+ return 1;
+ }
+
+ c = hubbub_alias_canonicalise("csinvariant", 11);
+ if (c) {
+ printf("%s %d\n", c->name, c->mib_enum);
+ } else {
+ printf("FAIL - failed finding encoding 'csinvariant'\n");
+ return 1;
+ }
+
+ c = hubbub_alias_canonicalise("nats-sefi-add", 13);
+ if (c) {
+ printf("%s %d\n", c->name, c->mib_enum);
+ } else {
+ printf("FAIL - failed finding encoding 'nats-sefi-add'\n");
+ return 1;
+ }
+
+ printf("%d\n", hubbub_mibenum_from_name(c->name, strlen(c->name)));
+
+ printf("%s\n", hubbub_mibenum_to_name(c->mib_enum));
+
+ hubbub_aliases_destroy(myrealloc, NULL);
+
+ printf("PASS\n");
+
+ return 0;
+}
diff --git a/test/cscodec.c b/test/cscodec.c
new file mode 100644
index 0000000..525b275
--- /dev/null
+++ b/test/cscodec.c
@@ -0,0 +1,247 @@
+#include <stdio.h>
+#include <string.h>
+
+#include <hubbub/hubbub.h>
+
+#include "charset/codec.h"
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+typedef struct line_ctx {
+ hubbub_charsetcodec *codec;
+
+ size_t buflen;
+ size_t bufused;
+ uint8_t *buf;
+ size_t explen;
+ size_t expused;
+ uint8_t *exp;
+
+ bool indata;
+ bool inexp;
+
+ hubbub_error exp_ret;
+
+ enum { ENCODE, DECODE } dir;
+} line_ctx;
+
+static bool handle_line(const char *data, size_t datalen, void *pw);
+static void run_test(line_ctx *ctx);
+static hubbub_error filter(uint32_t c, uint32_t **output,
+ size_t *outputlen, void *pw);
+
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ line_ctx ctx;
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ assert(hubbub_charsetcodec_create("NATS-SEFI-ADD",
+ myrealloc, NULL) == NULL);
+
+ ctx.codec = hubbub_charsetcodec_create("UTF-8", myrealloc, NULL);
+ assert(ctx.codec != NULL);
+
+ ctx.buflen = parse_filesize(argv[2]);
+ if (ctx.buflen == 0)
+ return 1;
+
+ ctx.buf = malloc(2 * ctx.buflen);
+ if (ctx.buf == NULL) {
+ printf("Failed allocating %u bytes\n",
+ (unsigned int) ctx.buflen);
+ return 1;
+ }
+
+ ctx.exp = ctx.buf + ctx.buflen;
+ ctx.explen = ctx.buflen;
+
+ ctx.buf[0] = '\0';
+ ctx.exp[0] = '\0';
+ ctx.bufused = 0;
+ ctx.expused = 0;
+ ctx.indata = false;
+ ctx.inexp = false;
+ ctx.exp_ret = HUBBUB_OK;
+
+ assert(parse_testfile(argv[2], handle_line, &ctx) == true);
+
+ /* and run final test */
+ if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n')
+ ctx.bufused -= 1;
+
+ if (ctx.expused > 0 && ctx.exp[ctx.expused - 1] == '\n')
+ ctx.expused -= 1;
+
+ run_test(&ctx);
+
+ free(ctx.buf);
+
+ hubbub_charsetcodec_destroy(ctx.codec);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
+bool handle_line(const char *data, size_t datalen, void *pw)
+{
+ line_ctx *ctx = (line_ctx *) pw;
+
+ if (data[0] == '#') {
+ if (ctx->inexp) {
+ /* This marks end of testcase, so run it */
+
+ if (ctx->buf[ctx->bufused - 1] == '\n')
+ ctx->bufused -= 1;
+
+ if (ctx->exp[ctx->expused - 1] == '\n')
+ ctx->expused -= 1;
+
+ run_test(ctx);
+
+ ctx->buf[0] = '\0';
+ ctx->exp[0] = '\0';
+ ctx->bufused = 0;
+ ctx->expused = 0;
+ ctx->exp_ret = HUBBUB_OK;
+ }
+
+ if (strncasecmp(data+1, "data", 4) == 0) {
+ hubbub_charsetcodec_optparams params;
+ const char *ptr = data + 6;
+
+ ctx->indata = true;
+ ctx->inexp = false;
+
+ if (strncasecmp(ptr, "decode", 6) == 0)
+ ctx->dir = DECODE;
+ else
+ ctx->dir = ENCODE;
+
+ ptr += 7;
+
+ if (strncasecmp(ptr, "LOOSE", 5) == 0) {
+ params.error_mode.mode =
+ HUBBUB_CHARSETCODEC_ERROR_LOOSE;
+ ptr += 6;
+ } else if (strncasecmp(ptr, "STRICT", 6) == 0) {
+ params.error_mode.mode =
+ HUBBUB_CHARSETCODEC_ERROR_STRICT;
+ ptr += 7;
+ } else {
+ params.error_mode.mode =
+ HUBBUB_CHARSETCODEC_ERROR_TRANSLIT;
+ ptr += 9;
+ }
+
+ assert(hubbub_charsetcodec_setopt(ctx->codec,
+ HUBBUB_CHARSETCODEC_ERROR_MODE,
+ (hubbub_charsetcodec_optparams *) &params)
+ == HUBBUB_OK);
+
+ if (strncasecmp(ptr, "filter", 6) == 0) {
+ params.filter_func.filter = filter;
+ params.filter_func.pw = ctx;
+
+ assert(hubbub_charsetcodec_setopt(ctx->codec,
+ HUBBUB_CHARSETCODEC_FILTER_FUNC,
+ (hubbub_charsetcodec_optparams *)
+ &params) == HUBBUB_OK);
+ }
+ } else if (strncasecmp(data+1, "expected", 8) == 0) {
+ ctx->indata = false;
+ ctx->inexp = true;
+
+ ctx->exp_ret = hubbub_error_from_string(data + 10,
+ datalen - 10 - 1 /* \n */);
+ } else if (strncasecmp(data+1, "reset", 5) == 0) {
+ ctx->indata = false;
+ ctx->inexp = false;
+
+ hubbub_charsetcodec_reset(ctx->codec);
+ }
+ } else {
+ if (ctx->indata) {
+ memcpy(ctx->buf + ctx->bufused, data, datalen);
+ ctx->bufused += datalen;
+ }
+ if (ctx->inexp) {
+ memcpy(ctx->exp + ctx->expused, data, datalen);
+ ctx->expused += datalen;
+ }
+ }
+
+ return true;
+}
+
+void run_test(line_ctx *ctx)
+{
+ static int testnum;
+ size_t destlen = ctx->bufused * 4;
+ uint8_t dest[destlen];
+ uint8_t *pdest = dest;
+ const uint8_t *psrc = ctx->buf;
+ size_t srclen = ctx->bufused;
+ size_t i;
+
+ if (ctx->dir == DECODE) {
+ assert(hubbub_charsetcodec_decode(ctx->codec,
+ &psrc, &srclen,
+ &pdest, &destlen) == ctx->exp_ret);
+ } else {
+ assert(hubbub_charsetcodec_encode(ctx->codec,
+ &psrc, &srclen,
+ &pdest, &destlen) == ctx->exp_ret);
+ }
+
+ printf("%d: Read '", ++testnum);
+ for (i = 0; i < ctx->expused; i++) {
+ printf("%c%c ", "0123456789abcdef"[(dest[i] >> 4) & 0xf],
+ "0123456789abcdef"[dest[i] & 0xf]);
+ }
+ printf("' Expected '");
+ for (i = 0; i < ctx->expused; i++) {
+ printf("%c%c ", "0123456789abcdef"[(ctx->exp[i] >> 4) & 0xf],
+ "0123456789abcdef"[ctx->exp[i] & 0xf]);
+ }
+ printf("'\n");
+
+ assert(memcmp(dest, ctx->exp, ctx->expused) == 0);
+}
+
+hubbub_error filter(uint32_t c, uint32_t **output,
+ size_t *outputlen, void *pw)
+{
+ static uint32_t outbuf;
+
+ UNUSED(pw);
+
+ if (c == HUBBUB_CHARSETCODEC_NULL) {
+ outbuf = 0;
+ return HUBBUB_OK;
+ }
+
+ outbuf = c;
+
+ *output = &outbuf;
+ *outputlen = 1;
+
+ return HUBBUB_OK;
+}
diff --git a/test/csdetect.c b/test/csdetect.c
new file mode 100644
index 0000000..3b39972
--- /dev/null
+++ b/test/csdetect.c
@@ -0,0 +1,132 @@
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <hubbub/hubbub.h>
+
+#include "charset/aliases.h"
+#include "charset/detect.h"
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+typedef struct line_ctx {
+ size_t buflen;
+ size_t bufused;
+ uint8_t *buf;
+ char enc[64];
+ bool indata;
+ bool inenc;
+} line_ctx;
+
+static bool handle_line(const char *data, size_t datalen, void *pw);
+static void run_test(const uint8_t *data, size_t len, char *expected);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ line_ctx ctx;
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ ctx.buflen = parse_filesize(argv[2]);
+ if (ctx.buflen == 0)
+ return 1;
+
+ ctx.buf = malloc(ctx.buflen);
+ if (ctx.buf == NULL) {
+ printf("Failed allocating %u bytes\n",
+ (unsigned int) ctx.buflen);
+ return 1;
+ }
+
+ ctx.buf[0] = '\0';
+ ctx.enc[0] = '\0';
+ ctx.bufused = 0;
+ ctx.indata = false;
+ ctx.inenc = false;
+
+ assert(parse_testfile(argv[2], handle_line, &ctx) == true);
+
+ /* and run final test */
+ if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n')
+ ctx.bufused -= 1;
+
+ run_test(ctx.buf, ctx.bufused, ctx.enc);
+
+ free(ctx.buf);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
+bool handle_line(const char *data, size_t datalen, void *pw)
+{
+ line_ctx *ctx = (line_ctx *) pw;
+
+ if (data[0] == '#') {
+ if (ctx->inenc) {
+ /* This marks end of testcase, so run it */
+
+ if (ctx->buf[ctx->bufused - 1] == '\n')
+ ctx->bufused -= 1;
+
+ run_test(ctx->buf, ctx->bufused, ctx->enc);
+
+ ctx->buf[0] = '\0';
+ ctx->enc[0] = '\0';
+ ctx->bufused = 0;
+ }
+
+ ctx->indata = (strncasecmp(data+1, "data", 4) == 0);
+ ctx->inenc = (strncasecmp(data+1, "encoding", 8) == 0);
+ } else {
+ if (ctx->indata) {
+ memcpy(ctx->buf + ctx->bufused, data, datalen);
+ ctx->bufused += datalen;
+ }
+ if (ctx->inenc) {
+ strcpy(ctx->enc, data);
+ if (ctx->enc[strlen(ctx->enc) - 1] == '\n')
+ ctx->enc[strlen(ctx->enc) - 1] = '\0';
+ }
+ }
+
+ return true;
+}
+
+void run_test(const uint8_t *data, size_t len, char *expected)
+{
+ uint16_t mibenum;
+ hubbub_charset_source source;
+ static int testnum;
+
+ assert(hubbub_charset_extract(&data, &len,
+ &mibenum, &source) == HUBBUB_OK);
+
+ assert(mibenum != 0);
+
+ printf("%d: Detected charset %s (%d) Source %d Expected %s (%d)\n",
+ ++testnum, hubbub_mibenum_to_name(mibenum),
+ mibenum, source, expected,
+ hubbub_mibenum_from_name(expected, strlen(expected)));
+
+ assert(mibenum ==
+ hubbub_mibenum_from_name(expected, strlen(expected)));
+}
diff --git a/test/data/Aliases b/test/data/Aliases
new file mode 100644
index 0000000..db61ff1
--- /dev/null
+++ b/test/data/Aliases
@@ -0,0 +1,302 @@
+# > Unicode:Files.Aliases
+# Mapping of character set encoding names to their canonical form
+#
+# Lines starting with a '#' are comments, blank lines are ignored.
+#
+# Based on http://www.iana.org/assignments/character-sets and
+# http://www.iana.org/assignments/ianacharset-mib
+#
+# Canonical Form MIBenum Aliases...
+#
+US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII
+ISO-10646-UTF-1 27 csISO10646UTF1
+ISO_646.basic:1983 28 ref csISO646basic1983
+INVARIANT 29 csINVARIANT
+ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion
+BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
+NATS-SEFI 31 iso-ir-8-1 csNATSSEFI
+NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD
+NATS-DANO 33 iso-ir-9-1 csNATSDANO
+NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD
+SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
+SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
+KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
+ISO-2022-KR 37 csISO2022KR
+EUC-KR 38 csEUCKR EUCKR
+ISO-2022-JP 39 csISO2022JP
+ISO-2022-JP-2 40 csISO2022JP2
+ISO-2022-CN 104
+ISO-2022-CN-EXT 105
+JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp
+JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro
+IT 22 iso-ir-15 ISO646-IT csISO15Italian
+PT 43 iso-ir-16 ISO646-PT csISO16Portuguese
+ES 23 iso-ir-17 ISO646-ES csISO17Spanish
+greek7-old 44 iso-ir-18 csISO18Greek7Old
+latin-greek 45 iso-ir-19 csISO19LatinGreek
+DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German
+NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French
+Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1
+ISO_5427 48 iso-ir-37 csISO5427Cyrillic
+JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978
+BS_viewdata 50 iso-ir-47 csISO47BSViewdata
+INIS 51 iso-ir-49 csISO49INIS
+INIS-8 52 iso-ir-50 csISO50INIS8
+INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic
+ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981
+ISO_5428:1980 55 iso-ir-55 csISO5428Greek
+GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988
+GB_2312-80 57 iso-ir-58 chinese csISO58GB231280
+NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1
+NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
+NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French
+videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1
+PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2
+ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2
+MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian
+JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
+greek7 64 iso-ir-88 csISO88Greek7
+ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
+iso-ir-90 66 csISO90
+JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a
+JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b
+JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
+JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
+JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd
+JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana
+ISO_2033-1983 73 iso-ir-98 e13b csISO2033
+ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
+ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1
+ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2
+T.61-7bit 75 iso-ir-102 csISO102T617bit
+T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit
+ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3
+ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4
+ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic
+CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
+CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
+CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr
+ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic
+ISO-8859-6-E 81 csISO88596E ISO_8859-6-E
+ISO-8859-6-I 82 csISO88596I ISO_8859-6-I
+ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7
+T.101-G2 83 iso-ir-128 csISO128T101G2
+ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8
+ISO-8859-8-E 84 csISO88598E ISO_8859-8-E
+ISO-8859-8-I 85 csISO88598I ISO_8859-8-I
+CSN_369103 86 iso-ir-139 csISO139CSN369103
+JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
+ISO_6937-2-add 14 iso-ir-142 csISOTextComm
+IEC_P27-1 88 iso-ir-143 csISO143IECP271
+ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5
+JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian
+JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian
+ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9
+greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT
+NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba
+ISO_6937-2-25 93 iso-ir-152 csISO6937Add
+GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
+ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp
+ISO_10367-box 96 iso-ir-155 csISO10367Box
+ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10
+latin-lap 97 lap iso-ir-158 csISO158Lap
+JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990
+DS_2089 99 DS2089 ISO646-DK dk csISO646Danish
+us-dk 100 csUSDK
+dk-us 101 csDKUS
+JIS_X0201 15 X0201 csHalfWidthKatakana
+KSC5636 102 ISO646-KR csKSC5636
+ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2
+ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4
+DEC-MCS 2008 dec csDECMCS
+hp-roman8 2004 roman8 r8 csHPRoman8
+macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN
+IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037
+IBM038 2029 EBCDIC-INT cp038 csIBM038
+IBM273 2030 CP273 csIBM273
+IBM274 2031 EBCDIC-BE CP274 csIBM274
+IBM275 2032 EBCDIC-BR cp275 csIBM275
+IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
+IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
+IBM280 2035 CP280 ebcdic-cp-it csIBM280
+IBM281 2036 EBCDIC-JP-E cp281 csIBM281
+IBM284 2037 CP284 ebcdic-cp-es csIBM284
+IBM285 2038 CP285 ebcdic-cp-gb csIBM285
+IBM290 2039 cp290 EBCDIC-JP-kana csIBM290
+IBM297 2040 cp297 ebcdic-cp-fr csIBM297
+IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420
+IBM423 2042 cp423 ebcdic-cp-gr csIBM423
+IBM424 2043 cp424 ebcdic-cp-he csIBM424
+IBM437 2011 cp437 437 csPC8CodePage437
+IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
+IBM775 2087 cp775 csPC775Baltic
+IBM850 2009 cp850 850 csPC850Multilingual
+IBM851 2045 cp851 851 csIBM851
+IBM852 2010 cp852 852 csPCp852
+IBM855 2046 cp855 855 csIBM855
+IBM857 2047 cp857 857 csIBM857
+IBM860 2048 cp860 860 csIBM860
+IBM861 2049 cp861 861 cp-is csIBM861
+IBM862 2013 cp862 862 csPC862LatinHebrew
+IBM863 2050 cp863 863 csIBM863
+IBM864 2051 cp864 csIBM864
+IBM865 2052 cp865 865 csIBM865
+IBM866 2086 cp866 866 csIBM866
+IBM868 2053 CP868 cp-ar csIBM868
+IBM869 2054 cp869 869 cp-gr csIBM869
+IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
+IBM871 2056 CP871 ebcdic-cp-is csIBM871
+IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880
+IBM891 2058 cp891 csIBM891
+IBM903 2059 cp903 csIBM903
+IBM904 2060 cp904 904 csIBBM904
+IBM905 2061 CP905 ebcdic-cp-tr csIBM905
+IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918
+IBM1026 2063 CP1026 csIBM1026
+EBCDIC-AT-DE 2064 csIBMEBCDICATDE
+EBCDIC-AT-DE-A 2065 csEBCDICATDEA
+EBCDIC-CA-FR 2066 csEBCDICCAFR
+EBCDIC-DK-NO 2067 csEBCDICDKNO
+EBCDIC-DK-NO-A 2068 csEBCDICDKNOA
+EBCDIC-FI-SE 2069 csEBCDICFISE
+EBCDIC-FI-SE-A 2070 csEBCDICFISEA
+EBCDIC-FR 2071 csEBCDICFR
+EBCDIC-IT 2072 csEBCDICIT
+EBCDIC-PT 2073 csEBCDICPT
+EBCDIC-ES 2074 csEBCDICES
+EBCDIC-ES-A 2075 csEBCDICESA
+EBCDIC-ES-S 2076 csEBCDICESS
+EBCDIC-UK 2077 csEBCDICUK
+EBCDIC-US 2078 csEBCDICUS
+UNKNOWN-8BIT 2079 csUnknown8BiT
+MNEMONIC 2080 csMnemonic
+MNEM 2081 csMnem
+VISCII 2082 csVISCII
+VIQR 2083 csVIQR
+KOI8-R 2084 csKOI8R
+KOI8-U 2088
+IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro
+IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro
+IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro
+IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro
+IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro
+IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro
+IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro
+IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro
+IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro
+IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro
+IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro
+IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro
+Big5-HKSCS 2101
+IBM1047 2102 IBM-1047
+PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian
+Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251
+KOI7-switched 2105
+UNICODE-1-1 1010 csUnicode11
+SCSU 1011
+UTF-7 1012
+UTF-16BE 1013
+UTF-16LE 1014
+UTF-16 1015
+CESU-8 1016 csCESU-8
+UTF-32 1017
+UTF-32BE 1018
+UTF-32LE 1019
+BOCU-1 1020 csBOCU-1
+UNICODE-1-1-UTF-7 103 csUnicode11UTF7
+UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8
+ISO-8859-13 109 8859_13 ISO8859-13
+ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14
+ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15
+ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10
+GBK 113 CP936 MS936 windows-936
+GB18030 114
+OSD_EBCDIC_DF04_15 115
+OSD_EBCDIC_DF03_IRV 116
+OSD_EBCDIC_DF04_1 117
+JIS_Encoding 16 csJISEncoding
+Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS
+EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP
+Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese
+ISO-10646-UCS-Basic 1002 csUnicodeASCII
+ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646
+ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261
+ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268
+ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276
+ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264
+ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265
+ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1
+ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1
+ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2
+ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5
+Adobe-Standard-Encoding 2005 csAdobeStandardEncoding
+Ventura-US 2006 csVenturaUS
+Ventura-International 2007 csVenturaInternational
+PC8-Danish-Norwegian 2012 csPC8DanishNorwegian
+PC8-Turkish 2014 csPC8Turkish
+IBM-Symbols 2015 csIBMSymbols
+IBM-Thai 2016 csIBMThai
+HP-Legal 2017 csHPLegal
+HP-Pi-font 2018 csHPPiFont
+HP-Math8 2019 csHPMath8
+Adobe-Symbol-Encoding 2020 csHPPSMath
+HP-DeskTop 2021 csHPDesktop
+Ventura-Math 2022 csVenturaMath
+Microsoft-Publishing 2023 csMicrosoftPublishing
+Windows-31J 2024 csWindows31J
+GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB
+Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE
+windows-1250 2250 CP1250 MS-EE
+windows-1251 2251 CP1251 MS-CYRL
+windows-1252 2252 CP1252 MS-ANSI
+windows-1253 2253 CP1253 MS-GREEK
+windows-1254 2254 CP1254 MS-TURK
+windows-1255 2255
+windows-1256 2256 CP1256 MS-ARAB
+windows-1257 2257 CP1257 WINBALTRIM
+windows-1258 2258
+TIS-620 2259
+HZ-GB-2312 2085
+
+# Additional encodings not defined by IANA
+
+# Arbitrary allocations
+#CP737 3001
+#CP853 3002
+#CP856 3003
+CP874 3004 WINDOWS-874
+#CP922 3005
+#CP1046 3006
+#CP1124 3007
+#CP1125 3008 WINDOWS-1125
+#CP1129 3009
+#CP1133 3010 IBM-CP1133
+#CP1161 3011 IBM-1161 IBM1161 CSIBM1161
+#CP1162 3012 IBM-1162 IBM1162 CSIBM1162
+#CP1163 3013 IBM-1163 IBM1163 CSIBM1163
+#GEORGIAN-ACADEMY 3014
+#GEORGIAN-PS 3015
+#KOI8-RU 3016
+#KOI8-T 3017
+#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC
+#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN
+#MACGREEK 3020 X-MAC-GREEK MAC-GREEK
+#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW
+#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND
+#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA
+#MACTHAI 3024 X-MAC-THAI MAC-THAI
+#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH
+#MULELAO-1 3026
+
+# From Unicode Lib
+ISO-IR-182 4000
+ISO-IR-197 4002
+ISO-2022-JP-1 4008
+MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC
+MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN
+MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
+JOHAB 4012
+ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
+X-CURRENT 4999 X-SYSTEM
+X-ACORN-LATIN1 5001
+X-ACORN-FUZZY 5002
diff --git a/test/data/cscodec/INDEX b/test/data/cscodec/INDEX
new file mode 100644
index 0000000..326cff5
--- /dev/null
+++ b/test/data/cscodec/INDEX
@@ -0,0 +1,5 @@
+# Index file for charset codec tests
+#
+# Test Description
+
+simple.dat Simple tests, designed to validate testdriver \ No newline at end of file
diff --git a/test/data/cscodec/simple.dat b/test/data/cscodec/simple.dat
new file mode 100644
index 0000000..6a3cad1
--- /dev/null
+++ b/test/data/cscodec/simple.dat
Binary files differ
diff --git a/test/data/csdetect/INDEX b/test/data/csdetect/INDEX
new file mode 100644
index 0000000..e292063
--- /dev/null
+++ b/test/data/csdetect/INDEX
@@ -0,0 +1,9 @@
+# Index file for charset detection tests
+#
+# Test Description
+
+bom.dat UTF Byte Order Mark detection tests
+non-ascii-meta.dat Tests for meta charsets claiming to be non-ASCII
+test-yahoo-jp.dat Yahoo! Japan, from html5lib testcases
+tests1.dat Assorted tests, including edge cases, from html5lib
+tests2.dat Further tests from html5lib
diff --git a/test/data/csdetect/bom.dat b/test/data/csdetect/bom.dat
new file mode 100644
index 0000000..9a2f719
--- /dev/null
+++ b/test/data/csdetect/bom.dat
Binary files differ
diff --git a/test/data/csdetect/non-ascii-meta.dat b/test/data/csdetect/non-ascii-meta.dat
new file mode 100644
index 0000000..ea2a707
--- /dev/null
+++ b/test/data/csdetect/non-ascii-meta.dat
@@ -0,0 +1,129 @@
+#data
+<html>
+<head>
+<meta charset="utf-16">
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset="utf-16le">
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset="utf-16be">
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset='utf-16'>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset='utf-16le'>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset='utf-16be'>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset=utf-16>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset=utf-16le>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset=utf-16be>
+#encoding
+windows-1252
+
+
+
+#data
+<html>
+<head>
+<meta charset="utf-32">
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset="utf-32le">
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset="utf-32be">
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset='utf-32'>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset='utf-32le'>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset='utf-32be'>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset=utf-32>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset=utf-32le>
+#encoding
+windows-1252
+
+#data
+<html>
+<head>
+<meta charset=utf-32be>
+#encoding
+windows-1252
+
+
diff --git a/test/data/csdetect/test-yahoo-jp.dat b/test/data/csdetect/test-yahoo-jp.dat
new file mode 100644
index 0000000..daf6125
--- /dev/null
+++ b/test/data/csdetect/test-yahoo-jp.dat
@@ -0,0 +1,10 @@
+#data
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
+<!--京-->
+<title>Yahoo! JAPAN</title>
+<meta name="description" content="日本最大級ã®ãƒãƒ¼ã‚¿ãƒ«ã‚µã‚¤ãƒˆã€‚検索ã€ã‚ªãƒ¼ã‚¯ã‚·ãƒ§ãƒ³ã€ãƒ‹ãƒ¥ãƒ¼ã‚¹ã€ãƒ¡ãƒ¼ãƒ«ã€ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã€ã‚·ãƒ§ãƒƒãƒ”ングã€ãªã©80以上ã®ã‚µãƒ¼ãƒ“スを展開。ã‚ãªãŸã®ç”Ÿæ´»ã‚’より豊ã‹ã«ã™ã‚‹ã€Œãƒ©ã‚¤ãƒ•ãƒ»ã‚¨ãƒ³ã‚¸ãƒ³ã€ã‚’目指ã—ã¦ã„ãã¾ã™ã€‚">
+<style type="text/css" media="all">
+#encoding
+euc-jp \ No newline at end of file
diff --git a/test/data/csdetect/tests1.dat b/test/data/csdetect/tests1.dat
new file mode 100644
index 0000000..8a62676
--- /dev/null
+++ b/test/data/csdetect/tests1.dat
@@ -0,0 +1,392 @@
+#data
+<!DOCTYPE HTML>
+<!-- (control test - for the other tests to work, this should pass - you may have to set your defaults appropriately) -->
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta charset='ISO-8859-9'>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta charset=ISO-8859-9>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta
+charset=ISO-8859-9>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<metacharset=ISO-8859-9>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-9">
+<!-- XXX this is a tough one, not sure how to do this one, unless we explictly do content= processing -->
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=ISO-8859-9" http-equiv="Content-Type">
+<!-- XXX this is a tough one, not sure how to do this one, unless we explictly do content= processing -->
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type" content=text/html; charset=ISO-8859-9>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type content="text/html; charset=ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type " content="text/html; charset=ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=ISO-8859-9" http-equiv="Content-Type ">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type>" content="text/html; charset=ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=ISO-8859-9" http-equiv="Content-Type>">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Style-Type" content="text/html; charset=ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=ISO-8859-9" http-equiv="Content-Style-Type">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta name="Content-Style-Type" content="text/html; charset=ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=ISO-8859-9" name="Content-Style-Type">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content=" text/html; charset = ISO-8859-9 ">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta content="
+text/html; charset=ISO-8859-9
+">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta charset="
+ISO-8859-9
+">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta charset=
+ISO-8859-9
+>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-9>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset=ISO-8859-9">
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta " charset=ISO-8859-9>
+<p>"</p>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta test" charset=ISO-8859-9>
+<p>"</p>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta test=" charset=ISO-8859-9>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test="' charset=ISO-8859-9>
+<p>"'</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test='" charset=ISO-8859-9>
+<p>'"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test="" charset=ISO-8859-9>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta test=x" charset=ISO-8859-9>
+<p>"</p>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta test=x" charset=ISO-8859-9>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta test=x charset=ISO-8859-9>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta charset=ISO-8859-9>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>">
+<meta charset=ISO-8859-9>
+<p>"</p>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-9">
+<meta charset="ISO-8859-1">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<!--<meta charset="ISO-8859-1">-->
+<meta charset="ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<!--<meta charset="ISO-8859-9">-->
+<meta charset="ISO-8859-1">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<!-- Starts with UTF-8 BOM -->
+#encoding
+UTF-8
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+<!-- Starts with UTF-8 BOM -->
+#encoding
+UTF-8
+
+#data
+<!-- 511 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!-- 512 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="ISO-8859-9">
+#encoding
+ISO-8859-9
+
+#data
+<!-- 1024 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!-- 1025 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!-- 2048 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!-- 2049 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data <!-- 4096 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data <!-- 4097 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!-- 8192 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!-- 8193 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="ISO-8859-9">
+#encoding
+Windows-1252
+
+#data
+<!-- multi-script test -->
+<script>alert('step 1 of 3 ("þ")')</script>
+<!-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<script>alert('step 2 of 3 ("þ")')</script>
+<meta charset="ISO-8859-9">
+<script>alert('step 3 of 3 ("þ")')</script>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<script>document.write('<meta charset="ISO-8859-' + '9">')</script>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<script>document.write('<meta charset="ISO-8859-9">')</script>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<script type="text/plain"><meta charset="ISO-8859-9"></script>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<style type="text/plain"><meta charset="ISO-8859-9"></style>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<p><meta charset="ISO-8859-9"></p>
+#encoding
+ISO-8859-9
+
+#data
+<!DOCTYPE HTML>
+<meta charset="bogus">
+<meta charset="ISO-8859-9">
+#encoding
+ISO-8859-9
diff --git a/test/data/csdetect/tests2.dat b/test/data/csdetect/tests2.dat
new file mode 100644
index 0000000..dd43f85
--- /dev/null
+++ b/test/data/csdetect/tests2.dat
@@ -0,0 +1,82 @@
+#data
+<meta
+#encoding
+windows-1252
+
+#data
+<
+#encoding
+windows-1252
+
+#data
+<!
+#encoding
+windows-1252
+
+#data
+<meta charset = "
+#encoding
+windows-1252
+
+#data
+<meta charset=EUC-jp
+#encoding
+windows-1252
+
+#data
+<meta <meta charset='EUC-jp'>
+#encoding
+EUC-jp
+
+#data
+<meta charset = 'EUC-jp'>
+#encoding
+EUC-jp
+
+
+#data
+<!-- -->
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+#encoding
+utf-8
+
+#data
+<!-- -->
+<meta http-equiv="Content-Type" content="text/html; charset=utf
+#encoding
+windows-1252
+
+#data
+<meta http-equiv="Content-Type<meta charset="utf-8">
+#encoding
+windows-1252
+
+#data
+<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
+#encoding
+utf-8
+
+#data
+<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
+#encoding
+windows-1252
+
+#data
+<meta
+#encoding
+windows-1252
+
+#data
+<meta charset =
+#encoding
+windows-1252
+
+#data
+<meta charset= utf-8
+#encoding
+windows-1252
+
+#data
+<meta content = "text/html;
+#encoding
+windows-1252
diff --git a/test/data/html/INDEX b/test/data/html/INDEX
new file mode 100644
index 0000000..03d6e04
--- /dev/null
+++ b/test/data/html/INDEX
@@ -0,0 +1,6 @@
+# Index file for generic HTML content
+#
+# Test Description
+
+section-tree-construction.html HTML5 tree construction algorithm
+web-apps.html HTML5 specification
diff --git a/test/data/html/section-tree-construction.html b/test/data/html/section-tree-construction.html
new file mode 100644
index 0000000..45ce9ab
--- /dev/null
+++ b/test/data/html/section-tree-construction.html
@@ -0,0 +1,2783 @@
+<!DOCTYPE HTML>
+
+
+<html lang="en-GB-hixie">
+ <head>
+ <title>HTML 5</title>
+ <link href="/style/specification" type="text/css" rel="stylesheet">
+ <link href="/images/icon" rel="icon">
+
+ <style type="text/css">
+ h4 + .element { margin-top: -2.5em; padding-top: 2em; }
+ h4 + p + .element { margin-top: -5em; padding-top: 4em; }
+ .element { background: #EEFFEE; color: black; margin: 0 0 1em -1em; padding: 0 1em 0.25em 0.75em; border-left: solid #99FF99 0.25em; -padding: 0; /* that last decl is for IE6. Try removing it, it's hilarious! */ }
+ .proposal { border: blue solid; padding: 1em; }
+ table.matrix, table.matrix td { border: none; text-align: right; }
+ table.matrix { margin-left: 2em; }
+ </style>
+
+ <link href="section-tokenisation.html#nav-bar" rel="prev" title="8.2.3. Tokenisation"><link href="index.html#contents" rel="index" title="Table of contents"><link href="section-namespaces.html#nav-bar" rel="next" title="8.3. Namespaces"></head><body class="draft"><div class="head">
+ <p><a href="http://www.whatwg.org/" class="logo" rel="home"><img src="/images/logo" alt="WHATWG"></a></p>
+
+ <h1 id="html-5">HTML 5</h1>
+
+ <h2 id="working" class="no-num no-toc">Working Draft — 12 June 2007</h2></div><nav id="nav-bar"><a href="section-tokenisation.html#nav-bar">&lt; 8.2.3. Tokenisation</a> – <a href="index.html#contents">Table of contents</a> – <a href="section-namespaces.html#nav-bar">8.3. Namespaces &gt;</a></nav><h4 id="tree-construction"><span class="secno">8.2.4. </span><dfn id="tree-construction0">Tree construction</dfn></h4>
+
+ <p>The input to the tree construction stage is a sequence of tokens from
+ the <a href="section-tokenisation.html#tokenisation0">tokenisation</a> stage. The tree construction
+ stage is associated with a DOM <code>Document</code> object when a parser
+ is created. The &quot;output&quot; of this stage consists of dynamically modifying
+ or extending that document's DOM tree.
+
+ </p><p>Tree construction passes through several phases. Initially, UAs must act
+ according to the steps described as being those of <a href="#the-initial0">the initial phase</a>.
+
+ </p><p>This specification does not define when an interactive user agent has to
+ render the <code>Document</code> available to the user, or when it has to
+ begin accepting user input.
+
+ </p><p>When the steps below require the UA to <dfn id="append">append a
+ character</dfn> to a node, the UA must collect it and all subsequent
+ consecutive characters that would be appended to that node, and insert one
+ <code>Text</code> node whose data is the concatenation of all those
+ characters.
+
+ </p><p id="mutation-during-parsing">DOM mutation events must not fire for changes
+ caused by the UA parsing the document. (Conceptually, the parser is not
+ mutating the DOM, it is constructing it.) This includes the parsing of any
+ content inserted using <code title="dom-document-write-HTML"><a href="section-dynamic.html#document.write0">document.write()</a></code> and <code title="dom-document-writeln"><a href="section-dynamic.html#document.writeln">document.writeln()</a></code> calls.<!--
+ XXX xref -->
+ <a href="#refsDOM3EVENTS">[DOM3EVENTS]</a></p>
+ <!-- XXX
+ what abotu innerHTML? -->
+
+ <p class="note">Not all of the tag names mentioned below are conformant tag
+ names in this specification; many are included to handle legacy content.
+ They still form part of the algorithm that implementations are required to
+ implement to claim conformance.
+
+ </p><p class="note">The algorithm described below places no limit on the depth of
+ the DOM tree generated, or on the length of tag names, attribute names,
+ attribute values, text nodes, etc. While implementators are encouraged to
+ avoid arbitrary limits, it is recognised that <a href="section-conformance.html#hardwareLimitations">practical concerns</a> will likely force user
+ agents to impose nesting depths.
+
+ </p><h5 id="the-initial"><span class="secno">8.2.4.1. </span><dfn id="the-initial0">The initial phase</dfn></h5>
+
+ <p>Initially, the tree construction stage must handle each token emitted
+ from the <a href="section-tokenisation.html#tokenisation0">tokenisation</a> stage as follows:
+
+ </p><dl class="switch">
+ <dt>A DOCTYPE token that is marked as being in error
+
+ </dt><dt>A comment token
+
+ </dt><dt>A start tag token
+
+ </dt><dt>An end tag token
+
+ </dt><dt>A character token that is not one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dt>An end-of-file token
+
+ </dt><dd>
+ <p>This specification does not define how to handle this case. In
+ particular, user agents may ignore the entirety of this specification
+ altogether for such documents, and instead invoke special parse modes
+ with a greater emphasis on backwards compatibility.</p>
+
+ <div class="note">
+ <p>Browsers in particular have generally used DOCTYPE-based sniffing to
+ invoke an &quot;alternative conformance mode&quot; known as <em>quirks mode</em>
+ on certain documents. In this mode, emphasis is put on legacy
+ compatibility rather than on standards compliance. This specification
+ takes no position on this behaviour; documents without DOCTYPEs or with
+ DOCTYPEs that do not conform to the syntax allowed by this
+ specification are considered to be out of scope of this specification.</p>
+ </div>
+
+ <div class="big-issue">
+ <p>As far as parsing goes, the quirks I know of are:</p>
+
+ <ul>
+ <li>Comment parsing is different.
+
+ </li><li>The following is considered one script block (!):
+ <pre>&lt;script&gt;&lt;!-- document.write('&lt;/script&gt;'); --&gt;&lt;/script&gt;</pre>
+
+ </li><li><code title="">&lt;/br&gt;</code> and <code title="">&lt;/p&gt;</code> do
+ magical things.
+
+ </li><li><code><a href="section-prose.html#p">p</a></code> can contain <code><a href="section-tabular.html#table">table</a></code>
+
+ </li><li>Safari and IE have special parsing rules for &lt;% ... %&gt; (even
+ in standards mode, though clearly this should be quirks-only).
+ </li></ul>
+
+ <p>Maybe we should just adopt all those and be done with it. One parsing
+ mode to rule them all. Or legitimise/codify the quirks mode parsing in
+ some way.</p>
+
+ <p>Would be interesting to do a search to see how many pages hit each of
+ the above.</p>
+ <!-- biased by page rank? --></div>
+
+ </dd><dt>A DOCTYPE token marked as being correct
+
+ </dt><dd>
+ <p>Append a <code>DocumentType</code> node to the <code>Document</code>
+ node, with the <code title="">name</code> attribute set to the name
+ given in the DOCTYPE token (which will be &quot;HTML&quot;), and the other
+ attributes specific to <code>DocumentType</code> objects set to null,
+ empty lists, or the empty string as appropriate.</p>
+
+ <p>Then, switch to <a href="#the-root1">the root element phase</a> of the
+ tree construction stage.</p>
+ <!-- XXX should set doctype on the Document object, too, unless
+ spec is defined to already point to it if you append -->
+
+
+ </dd><dt>A character token that <em>is</em> one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append that character</a>
+ to the <code>Document</code> node.</p>
+ </dd></dl>
+
+ <h5 id="the-root0"><span class="secno">8.2.4.2. </span><dfn id="the-root1">The
+ root element phase</dfn></h5>
+
+ <p>After <a href="#the-initial0">the initial phase</a>, as each token is
+ emitted from the <a href="section-tokenisation.html#tokenisation0">tokenisation</a> stage, it must
+ be processed as described in this section.
+
+ </p><dl class="switch">
+ <dt>A DOCTYPE token
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <code>Document</code> object
+ with the <code title="">data</code> attribute set to the data given in
+ the comment token.</p>
+
+ </dd><dt>A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append that character</a>
+ to the <code>Document</code> node.</p>
+
+ </dd><dt>A character token that is <em>not</em> one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dt>A start tag token
+
+ </dt><dt>An end tag token
+
+ </dt><dt>An end-of-file token
+
+ </dt><dd>
+ <p>Create an <code><a href="section-elements.html#htmlelement">HTMLElement</a></code> node
+ with the tag name <code><a href="section-the-root.html#html">html</a></code>, in the <a href="section-namespaces.html#html-namespace0">HTML namespace</a>. Append it to the
+ <code>Document</code> object. Switch to <a href="#the-main0">the main
+ phase</a> and reprocess the current token.</p>
+
+ <p class="big-issue">Should probably make end tags be ignored, so that
+ &quot;&lt;/head&gt;&lt;!-- --&gt;&lt;html&gt;&quot; puts the comment befor the root node
+ (or should we?)</p>
+ </dd></dl>
+
+ <p>The root element can end up being removed from the <code>Document</code>
+ object, e.g. by scripts; nothing in particular happens in such cases,
+ content continues being appended to the nodes as described in the next
+ section.
+
+ </p><h5 id="the-main"><span class="secno">8.2.4.3. </span><dfn id="the-main0">The
+ main phase</dfn></h5>
+
+ <p>After <a href="#the-root1">the root element phase</a>, each token
+ emitted from the <a href="section-tokenisation.html#tokenisation0">tokenisation</a> stage must be
+ processed as described in <em>this</em> section. This is by far the most
+ involved part of parsing an HTML document.
+
+ </p><p>The tree construction stage in this phase has several pieces of state: a
+ <a href="#stack">stack of open elements</a>, a <a href="#list-of4">list of
+ active formatting elements</a>, a <a href="#head-element"><code title="">head</code> element pointer</a>, a <a href="#form-element"><code title="">form</code> element pointer</a>, and an <a href="#insertion0">insertion mode</a>.
+
+ </p><p class="big-issue">We could just fold insertion modes and phases into one
+ concept (and duplicate the two rules common to all insertion modes into
+ all of them).
+
+ </p><h6 id="the-stack"><span class="secno">8.2.4.3.1. </span>The stack of open
+ elements</h6>
+
+ <p>Initially the <dfn id="stack">stack of open elements</dfn> contains just
+ the <code><a href="section-the-root.html#html">html</a></code> root element node created in the
+ <a href="#the-root1" title="the root element phase">last phase</a> before
+ switching to <em>this</em> phase (or, in the <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>, the <code><a href="section-the-root.html#html">html</a></code> element created to represent the element
+ whose <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code> attribute is being set). That's
+ the topmost node of the stack. It never gets popped off the stack. (This
+ stack grows downwards.)
+
+ </p><p>The <dfn id="current4">current node</dfn> is the bottommost node in this
+ stack.
+
+ </p><p>Elements in the stack fall into the following categories:
+
+ </p><dl>
+ <dt><dfn id="special">Special</dfn>
+
+ </dt><dd>
+ <p>The following HTML elements have varying levels of special parsing
+ rules: <code><a href="section-sections.html#address">address</a></code>, <code><a href="section-the-canvas.html#area">area</a></code>, <code><a href="section-document.html#base">base</a></code>,
+ <code>basefont</code>, <code>bgsound</code>, <code><a href="section-sections.html#blockquote">blockquote</a></code>, <code><a href="section-sections.html#body0">body</a></code>, <code><a href="section-prose.html#br">br</a></code>,
+ <code>center</code>, <code><a href="section-tabular.html#col">col</a></code>, <code><a href="section-tabular.html#colgroup">colgroup</a></code>, <code><a href="section-lists0.html#dd">dd</a></code>,
+ <code>dir</code>, <code><a href="section-miscellaneous.html#div">div</a></code>, <code><a href="section-lists0.html#dl">dl</a></code>, <code><a href="section-lists0.html#dt">dt</a></code>, <code><a href="section-embedded.html#embed">embed</a></code>, <code>fieldset</code>,
+ <code>form</code>, <code>frame</code>, <code>frameset</code>, <code><a href="section-sections.html#h1">h1</a></code>, <code><a href="section-sections.html#h2">h2</a></code>, <code><a href="section-sections.html#h3">h3</a></code>, <code><a href="section-sections.html#h4">h4</a></code>, <code><a href="section-sections.html#h5">h5</a></code>, <code><a href="section-sections.html#h6">h6</a></code>, <code><a href="section-document.html#head">head</a></code>, <code><a href="section-prose.html#hr">hr</a></code>,
+ <code><a href="section-embedded.html#iframe">iframe</a></code>,
+ <code>image</code><!-- XXX ? this isn't an element that can end up
+ on the stack-->,
+ <code><a href="section-embedded.html#img">img</a></code>, <code>input</code>,
+ <code>isindex</code>, <code><a href="section-lists0.html#li">li</a></code>, <code><a href="section-document.html#link">link</a></code>, <code>listing</code>, <code><a href="section-the-command.html#menu">menu</a></code>, <code><a href="section-document.html#meta0">meta</a></code>,
+ <code>noembed</code>, <code>noframes</code>, <code><a href="section-scripting0.html#noscript">noscript</a></code>, <code><a href="section-lists0.html#ol">ol</a></code>,
+ <code>optgroup</code>, <code>option</code>, <code><a href="section-prose.html#p">p</a></code>, <code><a href="section-embedded.html#param">param</a></code>,
+ <code>plaintext</code>, <code><a href="section-preformatted.html#pre">pre</a></code>, <code><a href="section-scripting0.html#script0">script</a></code>, <code>select</code>,
+ <code>spacer</code>, <code><a href="section-document.html#style">style</a></code>, <code><a href="section-tabular.html#tbody">tbody</a></code>, <code>textarea</code>, <code><a href="section-tabular.html#tfoot0">tfoot</a></code>, <code><a href="section-tabular.html#thead0">thead</a></code>, <code><a href="section-document.html#title1">title</a></code>, <code><a href="section-tabular.html#tr">tr</a></code>,
+ <code><a href="section-lists0.html#ul">ul</a></code>, and <code>wbr</code>.
+
+ </p></dd><dt><dfn id="scoping">Scoping</dfn>
+
+ </dt><dd>
+ <p>The following HTML elements introduce new <a href="#have-an" title="has an element in scope">scopes</a> for various parts of the
+ parsing: <code>button</code>, <code><a href="section-tabular.html#caption0">caption</a></code>, <code><a href="section-the-root.html#html">html</a></code>, <code>marquee</code>, <code><a href="section-embedded.html#object">object</a></code>, <code><a href="section-tabular.html#table">table</a></code>, <code><a href="section-tabular.html#td">td</a></code> and
+ <code><a href="section-tabular.html#th">th</a></code>.
+
+ </p></dd><dt><dfn id="formatting">Formatting</dfn>
+
+ </dt><dd>
+ <p>The following HTML elements are those that end up in the <a href="#list-of4">list of active formatting elements</a>: <code><a href="section-phrase.html#a">a</a></code>, <code><a href="section-phrase.html#b">b</a></code>,
+ <code>big</code>, <code><a href="section-phrase.html#em">em</a></code>, <code><a href="section-presentational.html#font">font</a></code>, <code><a href="section-phrase.html#i">i</a></code>,
+ <code>nobr</code>, <code>s</code>, <code><a href="section-phrase.html#small">small</a></code>, <code>strike</code>, <code><a href="section-phrase.html#strong">strong</a></code>, <code>tt</code>, and <code>u</code>.
+
+ </p></dd><dt><dfn id="phrasing">Phrasing</dfn>
+
+ </dt><dd>
+ <p>All other elements found while parsing an HTML document.
+ </p></dd></dl>
+
+ <p class="big-issue">Still need to add these new elements to the lists:
+ <code><a href="section-scripting0.html#event-source">event-source</a></code>, <code><a href="section-sections.html#section">section</a></code>, <code><a href="section-sections.html#nav">nav</a></code>,
+ <code><a href="section-sections.html#article">article</a></code>, <code><a href="section-sections.html#aside">aside</a></code>, <code><a href="section-sections.html#header">header</a></code>,
+ <code><a href="section-sections.html#footer">footer</a></code>, <code><a href="section-interactive.html#datagrid0">datagrid</a></code>, <code><a href="section-the-command.html#command0">command</a></code>
+
+ </p><p>The <a href="#stack">stack of open elements</a> is said to <dfn id="have-an" title="has an element in scope">have an element in scope</dfn>
+ or <dfn id="have-an0" title="has an element in table scope">have an element
+ in <em>table scope</em></dfn> when the following algorithm terminates in a
+ match state:
+
+ </p><ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a href="#current4">current node</a> (the bottommost node of the stack).
+
+ </p></li><li>
+ <p>If <var title="">node</var> is the target node, terminate in a match
+ state.
+
+ </p></li><li>
+ <p>Otherwise, if <var title="">node</var> is a <code><a href="section-tabular.html#table">table</a></code> element, terminate in a failure state.
+
+ </p></li><li>
+ <p>Otherwise, if the algorithm is the &quot;has an element in scope&quot; variant
+ (rather than the &quot;has an element in table scope&quot; variant), and <var title="">node</var> is one of the following, terminate in a failure
+ state:</p>
+
+ <ul class="brief">
+ <li><code><a href="section-tabular.html#caption0">caption</a></code>
+
+ </li><li><code><a href="section-tabular.html#td">td</a></code>
+
+ </li><li><code><a href="section-tabular.html#th">th</a></code>
+
+ </li><li><code>button</code>
+
+ </li><li><code>marquee</code>
+
+ </li><li><code><a href="section-embedded.html#object">object</a></code>
+ </li></ul>
+
+ </li><li>
+ <p>Otherwise, if <var title="">node</var> is an <code><a href="section-the-root.html#html">html</a></code> element, terminate in a failure state.
+ (This can only happen if the <var title="">node</var> is the topmost
+ node of the <a href="#stack">stack of open elements</a>, and prevents
+ the next step from being invoked if there are no more elements in the
+ stack.)
+
+ </p></li><li>
+ <p>Otherwise, set <var title="">node</var> to the previous entry in the
+ <a href="#stack">stack of open elements</a> and return to step 2. (This
+ will never fail, since the loop will always terminate in the previous
+ step if the top of the stack is reached.)
+ </p></li></ol>
+
+ <p>Nothing happens if at any time any of the elements in the <a href="#stack">stack of open elements</a> are moved to a new location in,
+ or removed from, the <code>Document</code> tree. In particular, the stack
+ is not changed in this situation. This can cause, amongst other strange
+ effects, content to be appended to nodes that are no longer in the DOM.
+
+ </p><p class="note">In some cases (namely, when <a href="#adoptionAgency">closing
+ misnested formatting elements</a>), the stack is manipulated in a
+ random-access fashion.
+
+ </p><h6 id="the-list"><span class="secno">8.2.4.3.2. </span>The list of active
+ formatting elements</h6>
+
+ <p>Initially the <dfn id="list-of4">list of active formatting elements</dfn>
+ is empty. It is used to handle mis-nested <a href="#formatting" title="formatting">formatting element tags</a>.
+
+ </p><p>The list contains elements in the <a href="#formatting">formatting</a>
+ category, and scope markers. The scope markers are inserted when entering
+ buttons, <code><a href="section-embedded.html#object">object</a></code> elements, marquees,
+ table cells, and table captions, and are used to prevent formatting from
+ &quot;leaking&quot; into tables, buttons, <code><a href="section-embedded.html#object">object</a></code>
+ elements, and marquees.
+
+ </p><p>When the steps below require the UA to <dfn id="reconstruct">reconstruct
+ the active formatting elements</dfn>, the UA must perform the following
+ steps:
+
+ </p><ol>
+ <li>If there are no entries in the <a href="#list-of4">list of active
+ formatting elements</a>, then there is nothing to reconstruct; stop this
+ algorithm.
+
+ </li><li>If the last (most recently added) entry in the <a href="#list-of4">list of active formatting elements</a> is a marker, or
+ if it is an element that is in the <a href="#stack">stack of open
+ elements</a>, then there is nothing to reconstruct; stop this algorithm.
+
+ </li><li>Let <var title="">entry</var> be the last (most recently added)
+ element in the <a href="#list-of4">list of active formatting
+ elements</a>.
+
+ </li><li>If there are no entries before <var title="">entry</var> in the <a href="#list-of4">list of active formatting elements</a>, then jump to
+ step 8.
+
+ </li><li>Let <var title="">entry</var> be the entry one earlier than <var title="">entry</var> in the <a href="#list-of4">list of active formatting
+ elements</a>.
+
+ </li><li>If <var title="">entry</var> is neither a marker nor an element that
+ is also in the <a href="#stack">stack of open elements</a>, go to step 4.
+
+ </li><li>Let <var title="">entry</var> be the element one later than <var title="">entry</var> in the <a href="#list-of4">list of active formatting
+ elements</a>.
+
+ </li><li>Perform a shallow clone of the element <var title="">entry</var> to
+ obtain <var title="">clone</var>. <a href="#refsDOM3CORE">[DOM3CORE]</a>
+
+ </li><li>Append <var title="">clone</var> to the <a href="#current4">current
+ node</a> and push it onto the <a href="#stack">stack of open elements</a>
+ so that it is the new <a href="#current4">current node</a>.
+
+ </li><li>Replace the entry for <var title="">entry</var> in the list with an
+ entry for <var title="">clone</var>.
+
+ </li><li>If the entry for <var title="">clone</var> in the <a href="#list-of4">list of active formatting elements</a> is not the last
+ entry in the list, return to step 7.
+ </li></ol>
+
+ <p>This has the effect of reopening all the formatting elements that were
+ opened in the current body, cell, or caption (whichever is youngest) that
+ haven't been explicitly closed.
+
+ </p><p class="note">The way this specification is written, the <a href="#list-of4">list of active formatting elements</a> always consists of
+ elements in chronological order with the least recently added element
+ first and the most recently added element last (except for while steps 8
+ to 11 of the above algorithm are being executed, of course).
+
+ </p><p>When the steps below require the UA to <dfn id="clear0">clear the list of
+ active formatting elements up to the last marker</dfn>, the UA must
+ perform the following steps:
+
+ </p><ol>
+ <li>Let <var title="">entry</var> be the last (most recently added) entry
+ in the <a href="#list-of4">list of active formatting elements</a>.
+
+ </li><li>Remove <var title="">entry</var> from the <a href="#list-of4">list of
+ active formatting elements</a>.
+
+ </li><li>If <var title="">entry</var> was a marker, then stop the algorithm at
+ this point. The list has been cleared up to the last marker.
+
+ </li><li>Go to step 1.
+ </li></ol>
+
+ <h6 id="creating"><span class="secno">8.2.4.3.3. </span>Creating and inserting
+ HTML elements</h6>
+
+ <p>When the steps below require the UA to <dfn id="create" title="create an
+ element for the token">create an element for a token</dfn>, the UA must
+ create a node implementing the interface appropriate for the element type
+ corresponding to the tag name of the token (as given in the section of
+ this specification that defines that element, e.g. for an <code><a href="section-phrase.html#a">a</a></code> element it would be the <code><a href="section-phrase.html#htmlanchorelement">HTMLAnchorElement</a></code> interface), with
+ the tag name being the name of that element, with the node being in the <a href="section-namespaces.html#html-namespace0">HTML namespace</a>, and with the attributes on the
+ node being those given in the given token.
+
+ </p><p>When the steps below require the UA to <dfn id="insert">insert an HTML
+ element</dfn> for a token, the UA must first <a href="#create">create an
+ element for the token</a>, and then append this node to the <a href="#current4">current node</a>, and push it onto the <a href="#stack">stack of open elements</a> so that it is the new <a href="#current4">current node</a>.
+
+ </p><p>The steps below may also require that the UA insert an HTML element in a
+ particular place, in which case the UA must <a href="#create">create an
+ element for the token</a> and then insert or append the new node in the
+ location specified. (This happens in particular during the parsing of
+ tables with invalid content.)
+
+ </p><p>The interface appropriate for an element that is not defined in this
+ specification is <code><a href="section-elements.html#htmlelement">HTMLElement</a></code>.
+
+ </p><h6 id="closing"><span class="secno">8.2.4.3.4. </span>Closing elements that
+ have implied end tags</h6>
+
+ <p>When the steps below require the UA to <dfn id="generate">generate implied
+ end tags</dfn>, then, if the <a href="#current4">current node</a> is a
+ <code><a href="section-lists0.html#dd">dd</a></code> element, a <code><a href="section-lists0.html#dt">dt</a></code> element, an <code><a href="section-lists0.html#li">li</a></code>
+ element, a <code><a href="section-prose.html#p">p</a></code> element, a <code><a href="section-tabular.html#td">td</a></code> element, a <code><a href="section-tabular.html#th">th</a></code>
+ element, or a <code><a href="section-tabular.html#tr">tr</a></code> element, the UA must act
+ as if an end tag with the respective tag name had been seen and then <a href="#generate">generate implied end tags</a> again.
+
+ </p><p>The step that requires the UA to generate implied end tags but lists an
+ element to exclude from the process, then the UA must perform the above
+ steps as if that element was not in the above list.
+
+ </p><h6 id="the-element"><span class="secno">8.2.4.3.5. </span>The element pointers</h6>
+
+ <p>Initially the <dfn id="head-element"><code title="">head</code> element
+ pointer</dfn> and the <dfn id="form-element"><code title="">form</code>
+ element pointer</dfn> are both null.
+
+ </p><p>Once a <code><a href="section-document.html#head">head</a></code> element has been parsed
+ (whether implicitly or explicitly) the <a href="#head-element"><code title="">head</code> element pointer</a> gets set to point to this node.
+
+ </p><p>The <a href="#form-element"><code title="">form</code> element
+ pointer</a> points to the last <code>form</code> element that was opened
+ and whose end tag has not yet been seen. It is used to make form controls
+ associate with forms in the face of dramatically bad markup, for
+ historical reasons.
+
+ </p><h6 id="the-insertion"><span class="secno">8.2.4.3.6. </span>The insertion mode</h6>
+
+ <p>Initially the <dfn id="insertion0">insertion mode</dfn> is &quot;<a href="#before2" title="insertion mode: before head">before head</a>&quot;. It
+ can change to &quot;<a href="#in-head" title="insertion mode: in head">in
+ head</a>&quot;, &quot;<a href="#after1" title="insertion mode: after head">after
+ head</a>&quot;, &quot;<a href="#in-body" title="insertion mode: in body">in
+ body</a>&quot;, &quot;<a href="#in-table" title="insertion mode: in table">in
+ table</a>&quot;, &quot;<a href="#in-caption" title="insertion mode: in caption">in
+ caption</a>&quot;, &quot;<a href="#in-column" title="insertion mode: in column
+ group">in column group</a>&quot;, &quot;<a href="#in-table0" title="insertion mode:
+ in table body">in table body</a>&quot;, &quot;<a href="#in-row" title="insertion
+ mode: in row">in row</a>&quot;, &quot;<a href="#in-cell" title="insertion mode: in
+ cell">in cell</a>&quot;, &quot;<a href="#in-select" title="insertion mode: in
+ select">in select</a>&quot;, &quot;<a href="#after2" title="insertion mode: after
+ body">after body</a>&quot;, &quot;<a href="#in-frameset" title="insertion mode: in
+ frameset">in frameset</a>&quot;, and &quot;<a href="#after3" title="insertion mode:
+ after frameset">after frameset</a>&quot; during the course of the parsing, as
+ described below. It affects how certain tokens are processed.
+
+ </p><p>If the tree construction stage is switched from <a href="#the-main0">the
+ main phase</a> to <a href="#the-trailing0">the trailing end phase</a> and
+ back again, the various pieces of state are not reset; the UA must act as
+ if the state was maintained.
+
+ </p><p>When the steps below require the UA to <dfn id="reset">reset the insertion
+ mode appropriately</dfn>, it means the UA must follow these steps:
+
+ </p><ol>
+ <li>Let <var title="">last</var> be false.
+
+ </li><li>Let <var title="">node</var> be the last node in the <a href="#stack">stack of open elements</a>.
+
+ </li><li>If <var title="">node</var> is the first node in the stack of open
+ elements, then set <var title="">last</var> to true. If the element whose
+ <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code>
+ attribute is being set is neither a <code><a href="section-tabular.html#td">td</a></code>
+ element nor a <code><a href="section-tabular.html#th">th</a></code> element, then set <var title="">node</var> to the element whose <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code>
+ attribute is being set. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ </li><li>If <var title="">node</var> is a <code>select</code> element, then
+ switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-select" title="insertion mode: in select">in select</a>&quot; and
+ abort these steps. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-tabular.html#td">td</a></code> or
+ <code><a href="section-tabular.html#th">th</a></code> element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-cell" title="insertion mode: in cell">in cell</a>&quot; and abort these steps.
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-tabular.html#tr">tr</a></code>
+ element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-row" title="insertion mode: in row">in row</a>&quot; and abort these
+ steps.
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-tabular.html#tbody">tbody</a></code>, <code><a href="section-tabular.html#thead0">thead</a></code>,
+ or <code><a href="section-tabular.html#tfoot0">tfoot</a></code> element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table0" title="insertion mode: in table body">in table body</a>&quot; and abort these
+ steps.
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-tabular.html#caption0">caption</a></code> element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-caption" title="insertion mode: in caption">in caption</a>&quot; and abort these steps.
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-tabular.html#colgroup">colgroup</a></code> element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-column" title="insertion mode: in column group">in column group</a>&quot; and abort
+ these steps. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-tabular.html#table">table</a></code> element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table" title="insertion mode: in table">in table</a>&quot; and abort these steps.
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-document.html#head">head</a></code>
+ element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot; (&quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot;! <em> not
+ &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot;</em>!)
+ and abort these steps. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ </li><li>If <var title="">node</var> is a <code><a href="section-sections.html#body0">body</a></code> element, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot; and abort these steps.
+
+ </li><li>If <var title="">node</var> is a <code>frameset</code> element, then
+ switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-frameset" title="insertion mode: in frameset">in frameset</a>&quot;
+ and abort these steps. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ </li><li>If <var title="">node</var> is an <code><a href="section-the-root.html#html">html</a></code> element, then: if the <a href="#head-element"><code title="">head</code> element pointer</a> is
+ null, switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#before2" title="insertion mode: before head">before head</a>&quot;,
+ otherwise, switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#after1" title="insertion mode: after head">after head</a>&quot;. In
+ either case, abort these steps. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</li>
+ <!-- XXX can the head element pointer ever be
+ non-null when we're going through these steps? -->
+
+ <li>If <var title="">last</var> is true, then set the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot; and abort these steps. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)
+
+ </li><li>Let <var title="">node</var> now be the node before <var title="">node</var> in the <a href="#stack">stack of open elements</a>.
+
+ </li><li>Return to step 3.
+ </li></ol>
+ <!--When you don't have to handle innerHTML, you can use this
+simplified explanation instead:
+
+ <ol>
+
+ <li><p>If the <span>stack of open elements</span> <span title="has
+ an element in table scope">has a <code>td</code> or <code>th</code>
+ element in table scope</span>, then switch the <span>insertion
+ mode</span> to "<span title="insertion mode: in cell">in
+ cell</span>".</p></li>
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>tr</code> element
+ in table scope</span>, then switch the <span>insertion mode</span>
+ to "<span title="insertion mode: in row">in row</span>".</p></li>
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>tbody</code>,
+ <code>tfoot</code>, or <code>thead</code> element in table
+ scope</span>, then switch the <span>insertion mode</span> to "<span
+ title="insertion mode: in table body">in table
+ body</span>".</p></li>
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>caption</code>
+ element in table scope</span>, then switch the <span>insertion
+ mode</span> to "<span title="insertion mode: in caption">in
+ caption</span>".</p></li>
+
+ ( you can't reach this point with a colgroup element on the
+ stack )
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>table</code>
+ element in table scope</span>, then switch the <span>insertion
+ mode</span> to "<span title="insertion mode: in table">in
+ table</span>".</p></li>
+
+ <li><p>Otherwise, switch the <span>insertion mode</span> to "<span
+ title="insertion mode: in body">in body</span>".</p></li>
+
+ </ol>
+-->
+
+ <h6 id="how-to0"><span class="secno">8.2.4.3.7. </span>How to handle tokens in
+ the main phase</h6>
+
+ <p>Tokens in the main phase must be handled as follows:
+
+ </p><dl class="switch">
+ <dt>A DOCTYPE token
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>A start tag token with the tag name &quot;html&quot;
+
+ </dt><dd>
+ <p>If this start tag token was not the first start tag token, then it is
+ a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>For each attribute on the token, check to see if the attribute is
+ already present on the top element of the <a href="#stack">stack of open
+ elements</a>. If it is not, add the attribute and its corresponding
+ value to that element.</p>
+
+ </dd><dt>An end-of-file token
+
+ </dt><dd>
+ <p><a href="#generate">Generate implied end tags.</a></p>
+
+ <p>If there are more than two nodes on the <a href="#stack">stack of open
+ elements</a>, or if there are two nodes but the second node is not a
+ <code><a href="section-sections.html#body0">body</a></code> node, this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Otherwise, if the parser was originally created in order to handle the
+ setting of an element's <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code> attribute, and there's more than
+ one element in the <a href="#stack">stack of open elements</a>, and the
+ second node on the <a href="#stack">stack of open elements</a> is not a
+ <code><a href="section-sections.html#body0">body</a></code> node, then this is a <a href="section-parsing.html#parse">parse error</a>. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p><a href="#stops">Stop parsing.</a></p>
+
+ <p class="big-issue">This fails because it doesn't imply HEAD and BODY
+ tags. We should probably expand out the insertion modes and merge them
+ with phases and then put the three things here into each insertion mode
+ instead of trying to factor them out so carefully.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>Depends on the <a href="#insertion0">insertion mode</a>:</p>
+
+ <dl class="switch">
+ <dt>If the <a href="#insertion0">insertion mode</a> is &quot;<dfn id="before2" title="insertion mode: before head">before head</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag token with the tag name &quot;head&quot;
+
+ </dt><dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Set the <a href="#head-element"><code title="">head</code> element
+ pointer</a> to this new element node.</p>
+
+ <p>Append the new element to the <a href="#current4">current node</a>
+ and push it onto the <a href="#stack">stack of open elements</a>.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot;.</p>
+
+ </dd><dt>A start tag token whose tag name is one of: &quot;base&quot;, &quot;link&quot;,
+ &quot;meta&quot;, &quot;script&quot;, &quot;style&quot;, &quot;title&quot;
+
+ </dt><dd>
+ <p>Act as if a start tag token with the tag name &quot;head&quot; and no
+ attributes had been seen, then reprocess the current token.</p>
+
+ <p class="note">This will result in a <code><a href="section-document.html#head">head</a></code> element being generated, and with the
+ current token being reprocessed in the &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot; <a href="#insertion0">insertion mode</a>.</p>
+
+ </dd><dt>An end tag with the tag name &quot;html&quot;
+
+ </dt><dd>
+ <p>Act as if a start tag token with the tag name &quot;head&quot; and no
+ attributes had been seen, then reprocess the current token.</p>
+
+ </dd><dt>Any other end tag
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>A character token that is <em>not</em> one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dt>Any other start tag token
+
+ </dt><dd>
+ <p>Act as if a start tag token with the tag name &quot;head&quot; and no
+ attributes had been seen, then reprocess the current token.</p>
+
+ <p class="note">This will result in an empty <code><a href="section-document.html#head">head</a></code> element being generated, with the
+ current token being reprocessed in the &quot;<a href="#after1" title="insertion mode: after head">after head</a>&quot; <a href="#insertion0">insertion mode</a>.</p>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-inhead">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-head" title="insertion mode: in head">in
+ head</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows.</p>
+
+ <p class="note">The rules for handling &quot;title&quot;, &quot;style&quot;, and &quot;script&quot;
+ start tags are similar, but not identical.</p>
+
+ <p class="note">It is possible for the <a href="#tree-construction0">tree
+ construction</a> stage's <a href="#the-main0" title="the main
+ phase">main phase</a> to be in the &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot; <a href="#insertion0">insertion mode</a> without the <a href="#current4">current node</a> being a <code><a href="section-document.html#head">head</a></code> element, e.g. if a <code><a href="section-document.html#head">head</a></code> end tag is immediately followed by a
+ <code><a href="section-document.html#meta0">meta</a></code> start tag.</p>
+
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag with the tag name &quot;title&quot;
+
+ </dt><dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Append the new element to the node pointed to by the <a href="#head-element"><code title="">head</code> element pointer</a>,
+ or, if that is null (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code>
+ case</a>), to the <a href="#current4">current node</a>.</p>
+
+ <p>Switch the tokeniser's <a href="section-tokenisation.html#content2">content model flag</a>
+ to the RCDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node to the <code><a href="section-document.html#title1">title</a></code> element node whose contents is the
+ concatenation of all those tokens' characters.</p>
+
+ <p>The tokeniser's <a href="section-tokenisation.html#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the tag name &quot;title&quot;,
+ ignore it. Otherwise, this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ </dd><dt>A start tag with the tag name &quot;style&quot;
+
+ </dt><dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>, unless the <a href="#insertion0">insertion mode</a> is &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot; and the
+ <a href="#head-element"><code title="">head</code> element
+ pointer</a> is not null, in which case append it to the node pointed
+ to by the <a href="#head-element"><code title="">head</code> element
+ pointer</a>. <!--
+ <head></head><style><body> should put the style block in the
+ head, and does so by switching back to in head, but the head
+ isn't the current node at that point (comments should go
+ between the head and the body) -->.</p>
+
+ <p>Switch the tokeniser's <a href="section-tokenisation.html#content2">content model flag</a>
+ to the CDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node to the <code><a href="section-document.html#style">style</a></code> element node whose contents is the
+ concatenation of all those tokens' characters.</p>
+
+ <p>The tokeniser's <a href="section-tokenisation.html#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the tag name &quot;style&quot;,
+ ignore it. Otherwise, this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ </dd><dt id="scriptTag">A start tag with the tag name &quot;script&quot;
+
+ </dt><dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Mark the element as being <a href="section-scripting0.html#parser-inserted">&quot;parser-inserted&quot;</a>. This ensures that, if
+ the script is external, any <code title="dom-document-write-HTML"><a href="section-dynamic.html#document.write0">document.write()</a></code> calls in the
+ script will execute in-line, instead of blowing the document away,
+ as would happen in most other cases.</p>
+
+ <p>Switch the tokeniser's <a href="section-tokenisation.html#content2">content model flag</a>
+ to the CDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node to the <code><a href="section-scripting0.html#script0">script</a></code> element node whose contents is the
+ concatenation of all those tokens' characters.</p>
+
+ <p>The tokeniser's <a href="section-tokenisation.html#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is not an end tag token with the tag name
+ &quot;script&quot;, then this is a <a href="section-parsing.html#parse">parse error</a>; mark the
+ <code><a href="section-scripting0.html#script0">script</a></code> element as <a href="section-scripting0.html#already">&quot;already executed&quot;</a>. Otherwise, the token is the
+ <code><a href="section-scripting0.html#script0">script</a></code> element's end tag, so
+ ignore it.</p>
+
+ <p>If the parser was originally created in order to handle the
+ setting of a node's <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code> attribute, then mark the
+ <code><a href="section-scripting0.html#script0">script</a></code> element as <a href="section-scripting0.html#already">&quot;already executed&quot;</a>, and skip the rest of the
+ processing described for this token (including the part below where
+ &quot;<a href="section-scripting0.html#the-script" title="the script that will execute as soon
+ as the parser resumes">scripts that will execute as soon as the
+ parser resumes</a>&quot; are executed). (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p class="note">Marking the <code><a href="section-scripting0.html#script0">script</a></code>
+ element as &quot;already executed&quot; prevents it from executing when it is
+ inserted into the document a few paragraphs below. Scripts missing
+ their end tags and scripts that were inserted using <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code>
+ aren't executed.</p>
+
+ <p>Let the <var title="">old insertion point</var> have the same
+ value as the current <a href="section-parsing.html#insertion">insertion point</a>. Let
+ the <a href="section-parsing.html#insertion">insertion point</a> be just before the <a href="section-parsing.html#next-input">next input character</a>.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>, unless the <a href="#insertion0">insertion mode</a> is &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot; and the
+ <a href="#head-element"><code title="">head</code> element
+ pointer</a> is not null, in which case append it to the node pointed
+ to by the <a href="#head-element"><code title="">head</code> element
+ pointer</a>. <!--
+ <head></head><script><body> should put the script in the head,
+ and does so by switching back to in head, but the head isn't
+ the current node at that point (comments should go between the
+ head and the body) -->
+ <a href="section-scripting0.html#running0" title="running a script">Special processing
+ occurs when a <code>script</code> element is inserted into a
+ document</a> that might cause some script to execute, which might
+ cause <a href="section-dynamic.html#document.write0" title="dom-document-write-HTML">new
+ characters to be inserted into the tokeniser</a>.</p>
+
+ <p>Let the <a href="section-parsing.html#insertion">insertion point</a> have the value of
+ the <var title="">old insertion point</var>. (In other words,
+ restore the <a href="section-parsing.html#insertion">insertion point</a> to the value it
+ had before the previous paragraph. This value might be the
+ &quot;undefined&quot; value.)</p>
+
+ <p id="scriptTagParserResumes">At this stage, if there is <a href="section-scripting0.html#the-script" title="the script that will execute as soon as
+ the parser resumes">a script that will execute as soon as the parser
+ resumes</a>, then:</p>
+
+ <dl class="switch">
+ <dt>If the tree construction stage is <a href="section-parsing.html#nestedParsing">being
+ called reentrantly</a>, say from a call to <code title="dom-document-write-HTML"><a href="section-dynamic.html#document.write0">document.write()</a></code>:
+
+ </dt><dd>
+ <p>Abort the processing of any nested invokations of the tokeniser,
+ yielding control back to the caller. (Tokenisation will resume
+ when the caller returns to the &quot;outer&quot; tree construction stage.)
+
+ </p></dd><dt>Otherwise:
+
+ </dt><dd>
+ <p>Follow these steps:</p>
+
+ <ol>
+ <li>
+ <p>Let <var title="">the script</var> be <a href="section-scripting0.html#the-script">the script that will execute as soon as the
+ parser resumes</a>. There is no longer <a href="section-scripting0.html#the-script" title="the script that will execute as soon as the parser
+ resumes">a script that will execute as soon as the parser
+ resumes</a>.
+
+ </p></li><li>
+ <p><a href="section-terminology.html#pause">Pause</a> until the script has
+ <span>completed loading</span><!-- XXX xref -->.
+
+ </p></li><li>
+ <p>Let the <a href="section-parsing.html#insertion">insertion point</a> be just
+ before the <a href="section-parsing.html#next-input">next input character</a>.
+
+ </p></li><li>
+ <p><a href="section-scripting0.html#executing0" title="executing a script block">Execute
+ the script</a>.
+
+ </p></li><li>
+ <p>Let the <a href="section-parsing.html#insertion">insertion point</a> be undefined
+ again.
+
+ </p></li><li>
+ <p>If there is once again <a href="section-scripting0.html#the-script" title="the script
+ that will execute as soon as the parser resumes">a script that
+ will execute as soon as the parser resumes</a>, then repeat
+ these steps from step 1.
+ </p></li></ol>
+ </dd></dl>
+
+ </dd><dt>A start tag with the tag name &quot;base&quot;, &quot;link&quot;, or &quot;meta&quot;
+
+ </dt><dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Append the new element to the node pointed to by the <a href="#head-element"><code title="">head</code> element pointer</a>,
+ or, if that is null (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code>
+ case</a>), to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>An end tag with the tag name &quot;head&quot;
+
+ </dt><dd>
+ <p>If the <a href="#current4">current node</a> is a <code><a href="section-document.html#head">head</a></code> element, pop the <a href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>. Otherwise, this is a <a href="section-parsing.html#parse">parse
+ error</a>.</p>
+ <!-- might happen if you see two </head>s
+ and something in between the two sends you from "after head"
+ back to "in head" -->
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#after1" title="insertion mode: after head">after head</a>&quot;.</p>
+
+ </dd><dt>An end tag with the tag name &quot;html&quot;
+
+ </dt><dd>
+ <p>Act as described in the &quot;anything else&quot; entry below.</p>
+
+ </dd><dt>A start tag with the tag name &quot;head&quot;
+
+ </dt><dt>Any other end tag
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>If the <a href="#current4">current node</a> is a <code><a href="section-document.html#head">head</a></code> element, act as if an end tag token
+ with the tag name &quot;head&quot; had been seen.</p>
+
+ <p>Otherwise, change the <a href="#insertion0">insertion mode</a> to
+ &quot;<a href="#after1" title="insertion mode: after head">after
+ head</a>&quot;.</p>
+
+ <p>Then, reprocess the current token.</p>
+
+ <p class="big-issue">In certain UAs, <a href="https://bugzilla.mozilla.org/attachment.cgi?id=180157&amp;action=view">some
+ elements</a> don't trigger the &quot;in body&quot; mode straight away, but
+ instead get put into the head. Do we want to copy that?</p>
+ </dd></dl>
+
+ </dd><dt>If the <a href="#insertion0">insertion mode</a> is &quot;<dfn id="after1" title="insertion mode: after head">after head</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag token with the tag name &quot;body&quot;
+
+ </dt><dd>
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>body</code> element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot;.</p>
+
+ </dd><dt>A start tag token with the tag name &quot;frameset&quot;
+
+ </dt><dd>
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>frameset</code> element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-frameset" title="insertion mode: in frameset">in
+ frameset</a>&quot;.</p>
+
+ </dd><dt>A start tag token whose tag name is one of: &quot;base&quot;, &quot;link&quot;,
+ &quot;meta&quot;, &quot;script&quot;, &quot;style&quot;, &quot;title&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Switch the <a href="#insertion0">insertion mode</a> back to &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot; and reprocess the
+ token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>Act as if a start tag token with the tag name &quot;body&quot; and no
+ attributes had been seen, and then reprocess the current token.</p>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-inbody">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-body" title="insertion mode: in body">in
+ body</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class="switch">
+ <dt>A character token
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#append" title="append a character">Append the token's
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag token whose tag name is one of: &quot;script&quot;, &quot;style&quot;
+
+ </dt><dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> had been &quot;<a href="#in-head" title="insertion mode: in
+ head">in head</a>&quot;.</p>
+
+ </dd><dt>A start tag token whose tag name is one of: &quot;base&quot;, &quot;link&quot;,
+ &quot;meta&quot;, &quot;title&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Process the token as if the <a href="#insertion0">insertion mode</a> had been &quot;<a href="#in-head" title="insertion mode: in head">in head</a>&quot;.</p>
+
+ </dd><dt>A start tag token with the tag name &quot;body&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>.</p>
+
+ <p>If the second element on the <a href="#stack">stack of open
+ elements</a> is not a <code><a href="section-sections.html#body0">body</a></code>
+ element, or, if the <a href="#stack">stack of open elements</a> has
+ only one node on it, then ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, for each attribute on the token, check to see if the
+ attribute is already present on the <code><a href="section-sections.html#body0">body</a></code> element (the second element) on the <a href="#stack">stack of open elements</a>. If it is not, add the
+ attribute and its corresponding value to that element.</p>
+
+ </dd><dt>An end tag with the tag name &quot;body&quot;
+
+ </dt><dd>
+ <p>If the second element in the <a href="#stack">stack of open
+ elements</a> is not a <code><a href="section-sections.html#body0">body</a></code>
+ element, this is a <a href="section-parsing.html#parse">parse error</a>. Ignore the
+ token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p class="big-issue">this needs to handle closing of implied elements,
+ but without closing them</p>
+
+ <p>If the <a href="#current4">current node</a> is not the <code><a href="section-sections.html#body0">body</a></code> element, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#after2" title="insertion mode: after body">after body</a>&quot;.</p>
+
+ </dd><dt>An end tag with the tag name &quot;html&quot;
+
+ </dt><dd>
+ <p>Act as if an end tag with tag name &quot;body&quot; had been seen, then, if
+ that token wasn't ignored, reprocess the current token.</p>
+
+ <p class="note">The fake end tag token here can only be ignored in the
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;address&quot;, &quot;blockquote&quot;,
+ &quot;center&quot;, &quot;dir&quot;, &quot;div&quot;, &quot;dl&quot;, &quot;fieldset&quot;, &quot;listing&quot;, &quot;menu&quot;, &quot;ol&quot;,
+ &quot;p&quot;, &quot;ul&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;pre&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ <p>If the next token is a U+000A LINE FEED (LF) character token, then
+ ignore that token and move on to the next one. (Newlines at the
+ start of <code><a href="section-preformatted.html#pre">pre</a></code> blocks are ignored as
+ an authoring convenience.)</p>
+
+ </dd><dt>A start tag whose tag name is &quot;form&quot;
+
+ </dt><dd>
+ <p>If the <a href="#form-element"><code title="form">form</code>
+ element pointer</a> is not null, ignore the token with a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Otherwise:</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html Element">Insert an HTML
+ element</a> for the token, and set the <code title="form">form</code>
+ element pointer to point to the element created.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;li&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+
+ <p>Run the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a href="#current4">current node</a> (the bottommost node of the
+ stack).
+
+ </p></li><li>
+ <p>If <var title="">node</var> is an <code><a href="section-lists0.html#li">li</a></code> element, then pop all the nodes from the
+ <a href="#current4">current node</a> up to <var title="">node</var>, including <var title="">node</var>, then stop
+ this algorithm. If more than one node is popped, then this is a <a href="section-parsing.html#parse">parse error</a>.
+
+ </p></li><li>
+ <p>If <var title="">node</var> is not in the <a href="#formatting">formatting</a> category, and is not in the <a href="#phrasing">phrasing</a> category, and is not an <code><a href="section-sections.html#address">address</a></code> or <code><a href="section-miscellaneous.html#div">div</a></code> element, then stop this algorithm.
+ </p></li>
+ <!-- an element <foo> is in this
+ list if the following markup:
+
+ <!DOCTYPE html><body><ol><li><foo><li>
+
+ ...results in the second <li> not being (in any way) a
+ descendant of the first <li>, or if <foo> is a formatting
+ element that gets reopened later. -->
+
+ <li>
+ <p>Otherwise, set <var title="">node</var> to the previous entry in
+ the <a href="#stack">stack of open elements</a> and return to step
+ 2.
+ </p></li></ol>
+
+ <p>Finally, <a href="#insert" title="insert an html element">insert
+ an <code>li</code> element</a>.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;dd&quot; or &quot;dt&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+
+ <p>Run the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a href="#current4">current node</a> (the bottommost node of the
+ stack).
+
+ </p></li><li>
+ <p>If <var title="">node</var> is a <code><a href="section-lists0.html#dd">dd</a></code> or <code><a href="section-lists0.html#dt">dt</a></code>
+ element, then pop all the nodes from the <a href="#current4">current node</a> up to <var title="">node</var>,
+ including <var title="">node</var>, then stop this algorithm. If
+ more than one node is popped, then this is a <a href="section-parsing.html#parse">parse error</a>.
+
+ </p></li><li>
+ <p>If <var title="">node</var> is not in the <a href="#formatting">formatting</a> category, and is not in the <a href="#phrasing">phrasing</a> category, and is not an <code><a href="section-sections.html#address">address</a></code> or <code><a href="section-miscellaneous.html#div">div</a></code> element, then stop this algorithm.
+ </p></li>
+ <!-- an element <foo> is in this
+ list if the following markup:
+
+ <!DOCTYPE html><body><ol><dt><foo><dt>
+
+ ...results in the second <li> not being (in any way) a
+ descendant of the first <li>, or if <foo> is a formatting
+ element that gets reopened later. -->
+
+ <li>
+ <p>Otherwise, set <var title="">node</var> to the previous entry in
+ the <a href="#stack">stack of open elements</a> and return to step
+ 2.
+ </p></li></ol>
+
+ <p>Finally, <a href="#insert" title="insert an html element">insert
+ an HTML element</a> with the same tag name as the token's.</p>
+
+ </dd><dt>A start tag token whose tag name is &quot;plaintext&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ <p>Switch the <a href="section-tokenisation.html#content2">content model flag</a> to the
+ PLAINTEXT state.</p>
+
+ <p class="note">Once a start tag with the tag name &quot;plaintext&quot; has been
+ seen, that will be the last token ever seen other than character
+ tokens (and the end-of-file token), because there is no way to
+ switch the <a href="section-tokenisation.html#content2">content model flag</a> out of the
+ PLAINTEXT state.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;address&quot;, &quot;blockquote&quot;,
+ &quot;center&quot;, &quot;dir&quot;, &quot;div&quot;, &quot;dl&quot;, &quot;fieldset&quot;, &quot;listing&quot;, &quot;menu&quot;, &quot;ol&quot;,
+ &quot;pre&quot;, &quot;ul&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an">has an element in scope</a> with the same tag name
+ as that of the token, then <a href="#generate">generate implied end
+ tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as that of the token, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an">has an element in scope</a> with the same tag name
+ as that of the token, then pop elements from this stack until an
+ element with that tag name has been popped from the stack.</p>
+ <!-- XXX quirk (except for in certain cases?):
+ <p>Otherwise, act as if a start tag with the tag name given in
+ the token had been seen, then reprocess the current token.</p>
+ -->
+
+
+ </dd><dt>An end tag whose tag name is &quot;form&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an">has an element in scope</a> with the same tag name
+ as that of the token, then <a href="#generate">generate implied end
+ tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as that of the token, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Otherwise, if the <a href="#current4">current node</a> is an
+ element with the same tag name as that of the token pop that element
+ from the stack.</p>
+
+ <p>In any case, set the <a href="#form-element"><code title="">form</code> element pointer</a> to null.</p>
+
+ </dd><dt>An end tag whose tag name is &quot;p&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then <a href="#generate">generate implied end
+ tags</a>, except for <code><a href="section-prose.html#p">p</a></code> elements.</p>
+
+ <p>If the <a href="#current4">current node</a> is not a <code><a href="section-prose.html#p">p</a></code> element, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then pop elements from this stack until the
+ stack no longer <a href="#have-an" title="has an element in
+ scope">has a <code>p</code> element in scope</a>.</p>
+ <!-- XXX quirk:
+ <p>Otherwise, act as if a start tag with the tag name
+ <code>p</code> had been seen, then reprocess the current
+ token.</p>
+ -->
+
+
+ </dd><dt>An end tag whose tag name is &quot;dd&quot;, &quot;dt&quot;, or &quot;li&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an">has an element in scope</a> whose tag name matches
+ the tag name of the token, then <a href="#generate">generate implied
+ end tags</a>, except for elements with the same tag name as the
+ token.</p>
+
+ <p>If the <a href="#current4">current node</a> is not an element with
+ the same tag name as the token, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an">has an element in scope</a> whose tag name matches
+ the tag name of the token, then pop elements from this stack until
+ an element with that tag name has been popped from the stack.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;h1&quot;, &quot;h2&quot;, &quot;h3&quot;, &quot;h4&quot;,
+ &quot;h5&quot;, &quot;h6&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is one of &quot;h1&quot;, &quot;h2&quot;, &quot;h3&quot;, &quot;h4&quot;, &quot;h5&quot;, or
+ &quot;h6&quot;, then this is a <a href="section-parsing.html#parse">parse error</a>; pop elements
+ from the stack until an element with one of those tag names has been
+ popped from the stack.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;h1&quot;, &quot;h2&quot;, &quot;h3&quot;, &quot;h4&quot;, &quot;h5&quot;,
+ &quot;h6&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is one of &quot;h1&quot;, &quot;h2&quot;, &quot;h3&quot;, &quot;h4&quot;, &quot;h5&quot;, or
+ &quot;h6&quot;, then <a href="#generate">generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as that of the token, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is one of &quot;h1&quot;, &quot;h2&quot;, &quot;h3&quot;, &quot;h4&quot;, &quot;h5&quot;, or
+ &quot;h6&quot;, then pop elements from the stack until an element with one of
+ those tag names has been popped from the stack.</p>
+ <!-- XXX quirk:
+ <p>Otherwise, act as if a start tag with the tag name given in
+ the token had been seen, then reprocess the current token.</p>
+ -->
+ </dd>
+ <!-- ADOPTION AGENCY ELEMENTS
+ Mozilla-only: bdo blink del ins sub sup q
+ Safari-only: code dfn kbd nobr samp var wbr
+ Both: a b big em font i s small strike strong tt u -->
+
+ <dt>A start tag whose tag name is &quot;a&quot;
+
+ </dt><dd>
+ <p>If the <a href="#list-of4">list of active formatting elements</a>
+ contains an element whose tag name is &quot;a&quot; between the end of the
+ list and the last marker on the list (or the start of the list if
+ there is no marker on the list), then this is a <a href="section-parsing.html#parse">parse error</a>; act as if an end tag with the tag
+ name &quot;a&quot; had been seen, then remove that element from the <a href="#list-of4">list of active formatting elements</a> and the <a href="#stack">stack of open elements</a> if the end tag didn't
+ already remove it (it might not have if the element is not <a href="#have-an0" title="has an element in table scope">in table
+ scope</a>).</p>
+
+ <p class="example">In the non-conforming stream
+ <code>&lt;a href=&quot;a&quot;&gt;a&lt;table&gt;&lt;a href=&quot;b&quot;&gt;b&lt;/table&gt;x</code>,
+ the first <code><a href="section-phrase.html#a">a</a></code> element would be closed
+ upon seeing the second one, and the &quot;x&quot; character would be inside a
+ link to &quot;b&quot;, not to &quot;a&quot;. This is despite the fact that the outer
+ <code><a href="section-phrase.html#a">a</a></code> element is not in table scope
+ (meaning that a regular <code>&lt;/a&gt;</code> end tag at the start of
+ the table wouldn't close the outer <code><a href="section-phrase.html#a">a</a></code>
+ element).</p>
+
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Add that element to the <a href="#list-of4">list of active formatting elements</a>.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;b&quot;, &quot;big&quot;, &quot;em&quot;, &quot;font&quot;,
+ &quot;i&quot;, &quot;nobr&quot;, &quot;s&quot;, &quot;small&quot;, &quot;strike&quot;, &quot;strong&quot;, &quot;tt&quot;, &quot;u&quot;
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Add that element to the <a href="#list-of4">list of active formatting elements</a>.</p>
+
+ </dd><dt id="adoptionAgency">An end tag whose tag name is one of: &quot;a&quot;, &quot;b&quot;,
+ &quot;big&quot;, &quot;em&quot;, &quot;font&quot;, &quot;i&quot;, &quot;nobr&quot;, &quot;s&quot;, &quot;small&quot;, &quot;strike&quot;, &quot;strong&quot;,
+ &quot;tt&quot;, &quot;u&quot;
+
+ </dt><dd>
+ <p>Follow these steps:</p>
+
+ <ol>
+ <li>
+ <p>Let the <var title="">formatting element</var> be the last
+ element in the <a href="#list-of4">list of active formatting
+ elements</a> that:</p>
+
+ <ul>
+ <li>is between the end of the list and the last scope marker in
+ the list, if any, or the start of the list otherwise, and
+
+ </li><li>has the same tag name as the token.
+ </li></ul>
+
+ <p>If there is no such node, or, if that node is also in the <a href="#stack">stack of open elements</a> but the element is not <a href="#have-an" title="has an element in scope">in scope</a>, then
+ this is a <a href="section-parsing.html#parse">parse error</a>. Abort these steps. The
+ token is ignored.</p>
+
+ <p>Otherwise, if there is such a node, but that node is not in the
+ <a href="#stack">stack of open elements</a>, then this is a <a href="section-parsing.html#parse">parse error</a>; remove the element from the list,
+ and abort these steps.</p>
+
+ <p>Otherwise, there is a <var title="">formatting element</var> and
+ that element is in <a href="#stack" title="stack of open
+ elements">the stack</a> and is <a href="#have-an" title="has an
+ element in scope">in scope</a>. If the element is not the <a href="#current4">current node</a>, this is a <a href="section-parsing.html#parse">parse error</a>. In any case, proceed with the
+ algorithm as written in the following steps.</p>
+
+ </li><li>
+ <p>Let the <var title="">furthest block</var> be the topmost node
+ in the <a href="#stack">stack of open elements</a> that is lower
+ in the stack than the <var title="">formatting element</var>, and
+ is not an element in the <a href="#phrasing">phrasing</a> or <a href="#formatting">formatting</a> categories. There might not be
+ one.
+
+ </p></li><li>
+ <p>If there is no <var title="">furthest block</var>, then the UA
+ must skip the subsequent steps and instead just pop all the nodes
+ from the bottom of the <a href="#stack">stack of open
+ elements</a>, from the <a href="#current4">current node</a> up to
+ the <var title="">formatting element</var>, and remove the <var title="">formatting element</var> from the <a href="#list-of4">list of active formatting elements</a>.
+
+ </p></li><li>
+ <p>Let the <var title="">common ancestor</var> be the element
+ immediately above the <var title="">formatting element</var> in
+ the <a href="#stack">stack of open elements</a>.
+
+ </p></li><li>
+ <p>If the <var title="">furthest block</var> has a parent node,
+ then remove the <var title="">furthest block</var> from its parent
+ node.
+
+ </p></li><li>
+ <p>Let a bookmark note the position of the <var title="">formatting
+ element</var> in the <a href="#list-of4">list of active formatting
+ elements</a> relative to the elements on either side of it in the
+ list.
+
+ </p></li><li>
+ <p>Let <var title="">node</var> and <var title="">last node</var>
+ be the <var title="">furthest block</var>. Follow these steps:</p>
+
+ <ol>
+ <li>Let <var title="">node</var> be the element immediately prior
+ to <var title="">node</var> in the <a href="#stack">stack of open
+ elements</a>.
+
+ </li><li>If <var title="">node</var> is not in the <a href="#list-of4">list of active formatting elements</a>, then
+ remove <var title="">node</var> from the <a href="#stack">stack
+ of open elements</a> and then go back to step 1.
+
+ </li><li>Otherwise, if <var title="">node</var> is the <var title="">formatting element</var>, then go to the next step in
+ the overall algorithm.
+
+ </li><li>Otherwise, if <var title="">last node</var> is the <var title="">furthest block</var>, then move the aforementioned
+ bookmark to be immediately after the <var title="">node</var> in
+ the <a href="#list-of4">list of active formatting elements</a>.
+
+ </li><li>If <var title="">node</var> has any children, perform a
+ shallow clone of <var title="">node</var>, replace the entry for
+ <var title="">node</var> in the <a href="#list-of4">list of
+ active formatting elements</a> with an entry for the clone,
+ replace the entry for <var title="">node</var> in the <a href="#stack">stack of open elements</a> with an entry for the
+ clone, and let <var title="">node</var> be the clone.
+
+ </li><li>Insert <var title="">last node</var> into <var title="">node</var>, first removing it from its previous parent
+ node if any.
+
+ </li><li>Let <var title="">last node</var> be <var title="">node</var>.
+
+ </li><li>Return to step 1 of this inner set of steps.
+ </li></ol>
+
+ </li><li>
+ <p>Insert whatever <var title="">last node</var> ended up being in
+ the previous step into the <var title="">common ancestor</var>
+ node, first removing it from its previous parent node if any.
+
+ </p></li><li>
+ <p>Perform a shallow clone of the <var title="">formatting
+ element</var>.
+
+ </p></li><li>
+ <p>Take all of the child nodes of the <var title="">furthest
+ block</var> and append them to the clone created in the last step.
+
+ </p></li><li>
+ <p>Append that clone to the <var title="">furthest block</var>.
+
+ </p></li><li>
+ <p>Remove the <var title="">formatting element</var> from the <a href="#list-of4">list of active formatting elements</a>, and
+ insert the clone into the <a href="#list-of4">list of active
+ formatting elements</a> at the position of the aforementioned
+ bookmark.
+
+ </p></li><li>
+ <p>Remove the <var title="">formatting element</var> from the <a href="#stack">stack of open elements</a>, and insert the clone
+ into the <a href="#stack">stack of open elements</a> immediately
+ after (i.e. in a more deeply nested position than) the position of
+ the <var title="">furthest block</var> in that stack.
+
+ </p></li><li>
+ <p>Jump back to step 1 in this series of steps.
+ </p></li></ol>
+
+ <p class="note">The way these steps are defined, only elements in the
+ <a href="#formatting">formatting</a> category ever get cloned by
+ this algorithm.</p>
+ <!--XXX
+ <div class="example">
+ <p class="big-issue">Need an example.</p>
+ </div>
+-->
+
+ <p class="note">Because of the way this algorithm causes elements to
+ change parents, it has been dubbed the &quot;adoption agency algorithm&quot;
+ (in contrast with other possibly algorithms for dealing with
+ misnested content, which included the &quot;incest algorithm&quot;, the
+ &quot;secret affair algorithm&quot;, and the &quot;Heisenberg algorithm&quot;).</p>
+
+ </dd><dt>A start tag token whose tag name is &quot;button&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a
+ <code>button</code> element in scope</a>, then this is a <a href="section-parsing.html#parse">parse error</a>; act as if an end tag with the tag
+ name &quot;button&quot; had been seen, then reprocess the token.</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ </dd><dt>A start tag token whose tag name is one of: &quot;marquee&quot;, &quot;object&quot;
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ </dd><dt>An end tag token whose tag name is one of: &quot;button&quot;, &quot;marquee&quot;,
+ &quot;object&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is the same as the tag name of the token,
+ then <a href="#generate">generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as the token, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Now, if the <a href="#stack">stack of open elements</a> <a href="#have-an">has an element in scope</a> whose tag name matches
+ the tag name of the token, then pop elements from the stack until
+ that element has been popped from the stack, and <a href="#clear0">clear the list of active formatting elements up to
+ the last marker</a>.</p>
+
+ </dd><dt>A start tag token whose tag name is &quot;xmp&quot;
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Switch the <a href="section-tokenisation.html#content2">content model flag</a> to the CDATA
+ state.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;table&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+ <!-- XXX quirks: don't do this -->
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table" title="insertion mode: in table">in table</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;area&quot;, &quot;basefont&quot;,
+ &quot;bgsound&quot;, &quot;br&quot;, &quot;embed&quot;, &quot;img&quot;, &quot;param&quot;, &quot;spacer&quot;, &quot;wbr&quot;
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Immediately pop the <a href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;hr&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="section-prose.html#p">p</a></code> had been seen.</p>
+ <!-- XXX quirks: don't do this -->
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Immediately pop the <a href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;image&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Change the token's tag name to
+ &quot;img&quot; and reprocess it. (Don't ask.)</p>
+ <!-- As of
+ 2005-12, studies showed that around 0.2% of pages used the
+ <image> element. -->
+
+
+ </dd><dt>A start tag whose tag name is &quot;input&quot;
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an
+ <code>input</code> element</a> for the token.</p>
+
+ <p>If the <a href="#form-element"><code title="">form</code> element
+ pointer</a> is not null, then <span>associate</span><!--XXX
+ xref! -->
+ the <code>input</code> element with the <code>form</code> element
+ pointed to by the <a href="#form-element"><code title="">form</code>
+ element pointer</a>.</p>
+
+ <p>Pop that <code>input</code> element off the <a href="#stack">stack
+ of open elements</a>.</p>
+
+ </dd><dt id="isindex">A start tag whose tag name is &quot;isindex&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>.</p>
+
+ <p>If the <a href="#form-element"><code title="">form</code> element
+ pointer</a> is not null, then ignore the token.</p>
+
+ <p>Otherwise:</p>
+
+ <p>Act as if a start tag token with the tag name &quot;form&quot; had been
+ seen.</p>
+
+ <p>Act as if a start tag token with the tag name &quot;hr&quot; had been seen.</p>
+
+ <p>Act as if a start tag token with the tag name &quot;p&quot; had been seen.</p>
+
+ <p>Act as if a start tag token with the tag name &quot;label&quot; had been
+ seen.</p>
+
+ <p>Act as if a stream of character tokens had been seen (see below
+ for what they should say).</p>
+
+ <p>Act as if a start tag token with the tag name &quot;input&quot; had been
+ seen, with all the attributes from the &quot;isindex&quot; token, except with
+ the &quot;name&quot; attribute set to the value &quot;isindex&quot; (ignoring any
+ explicit &quot;name&quot; attribute).</p>
+
+ <p>Act as if a stream of character tokens had been seen (see below
+ for what they should say).</p>
+
+ <p>Act as if an end tag token with the tag name &quot;label&quot; had been
+ seen.</p>
+
+ <p>Act as if an end tag token with the tag name &quot;p&quot; had been seen.</p>
+
+ <p>Act as if a start tag token with the tag name &quot;hr&quot; had been seen.</p>
+
+ <p>Act as if an end tag token with the tag name &quot;form&quot; had been seen.</p>
+
+ <p>The two streams of character tokens together should, together with
+ the <code>input</code> element, express the equivalent of &quot;This is a
+ searchable index. Insert your search keywords here: (input field)&quot;
+ in the user's preferred language.</p>
+
+ <p class="big-issue"> Then need to specify that if the form submission
+ causes just a single form control, whose name is &quot;isindex&quot;, to be
+ submitted, then we submit just the value part, not the &quot;isindex=&quot;
+ part.</p>
+ </dd>
+ <!-- XXX keygen support; don't forget form element pointer!
+
+ <dt>A start tag whose tag name is "keygen"</dt>
+ <dd>
+ ...
+ </dd>
+-->
+
+ <dt>A start tag whose tag name is &quot;textarea&quot;
+
+ </dt><dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>If the <a href="#form-element"><code title="">form</code> element
+ pointer</a> is not null, then <span>associate</span><!--XXX
+ xref! -->
+ the <code>textarea</code> element with the <code>form</code> element
+ pointed to by the <a href="#form-element"><code title="">form</code>
+ element pointer</a>.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>.</p>
+
+ <p>Switch the tokeniser's <a href="section-tokenisation.html#content2">content model flag</a>
+ to the RCDATA state.</p>
+
+ <p>If the next token is a U+000A LINE FEED (LF) character token, then
+ ignore that token and move on to the next one. (Newlines at the
+ start of <code>textarea</code> elements are ignored as an authoring
+ convenience.)</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node, whose contents is the
+ concatenation of all those tokens' characters, to the new element
+ node.</p>
+
+ <p>The tokeniser's <a href="section-tokenisation.html#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the tag name
+ &quot;textarea&quot;, ignore it. Otherwise, this is a <a href="section-parsing.html#parse">parse
+ error</a>.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;iframe&quot;, &quot;noembed&quot;,
+ &quot;noframes&quot;
+
+ </dt><dt>A start tag whose tag name is &quot;noscript&quot;, if <a href="section-scripting.html#scripting2">scripting is enabled</a>:
+
+ </dt><dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>For &quot;iframe&quot; tags, the node must be an <code><a href="section-embedded.html#htmliframeelement">HTMLIFrameElement</a></code> object, for
+ the other tags it must be an <code><a href="section-elements.html#htmlelement">HTMLElement</a></code> object.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>.</p>
+
+ <p>Switch the tokeniser's <a href="section-tokenisation.html#content2">content model flag</a>
+ to the CDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node, whose contents is the
+ concatenation of all those tokens' characters, to the new element
+ node.</p>
+
+ <p>The tokeniser's <a href="section-tokenisation.html#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the same tag name as
+ the start tag token, ignore it. Otherwise, this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;select&quot;
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-select" title="insertion mode: in select">in select</a>&quot;.</p>
+ </dd>
+ <!-- XXX quirks:
+ <dt>An end tag whose tag name is "br"</dt>
+ <dd>
+ <p>Act as if a start tag token with the tag name "br" had been
+ seen. Ignore the end tag token.</p>
+ </dd>
+-->
+
+ <dt>A start or end tag whose tag name is one of: &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;frame&quot;, &quot;frameset&quot;, &quot;head&quot;, &quot;option&quot;, &quot;optgroup&quot;,
+ &quot;tbody&quot;, &quot;td&quot;, &quot;tfoot&quot;, &quot;th&quot;, &quot;thead&quot;, &quot;tr&quot;
+
+ </dt><dt>An end tag whose tag name is one of: &quot;area&quot;, &quot;basefont&quot;,
+ &quot;bgsound&quot;, <!--XXX quirks: remove br-->&quot;br&quot;, &quot;embed&quot;, &quot;hr&quot;, &quot;iframe&quot;,
+ &quot;image&quot;, &quot;img&quot;, &quot;input&quot;, &quot;isindex&quot;, &quot;noembed&quot;, &quot;noframes&quot;, &quot;param&quot;,
+ &quot;select&quot;, &quot;spacer&quot;, &quot;table&quot;, &quot;textarea&quot;, &quot;wbr&quot;</dt>
+ <!-- add keygen if we add the start tag -->
+
+ <dt>An end tag whose tag name is &quot;noscript&quot;, if <a href="section-scripting.html#scripting2">scripting is enabled</a>:
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>A start or end tag whose tag name is one of: &quot;event-source&quot;,
+ &quot;section&quot;, &quot;nav&quot;, &quot;article&quot;, &quot;aside&quot;, &quot;header&quot;, &quot;footer&quot;, &quot;datagrid&quot;,
+ &quot;command&quot;
+
+ </dt><dd> <!-- XXXX -->
+ <p class="big-issue">Work in progress!</p>
+
+ </dd><dt>A start tag token not covered by the previous entries
+
+ </dt><dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p class="note">This element will be a <a href="#phrasing">phrasing</a>
+ element.</p>
+ <!--
+Put the following into the MathML namespace if parsed:
+ math, mrow, mfrac, msqrt, mroot, mstyle, merror, mpadded,
+ mphantom, mfenced, menclose, msub, msup, msubsup, munder,
+ mover, munderover, mmultiscripts, mtable, mlabeledtr, mtr,
+ mtd, maction
+-->
+
+
+ </dd><dt>An end tag token not covered by the previous entries
+
+ </dt><dd>
+ <p>Run the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a href="#current4">current node</a> (the bottommost node of the
+ stack).
+
+ </p></li><li>
+ <p>If <var title="">node</var> has the same tag name as the end tag
+ token, then:</p>
+
+ <ol>
+ <li>
+ <p><a href="#generate">Generate implied end tags</a>.
+
+ </p></li><li>
+ <p>If the tag name of the end tag token does not match the tag
+ name of the <a href="#current4">current node</a>, this is a <a href="section-parsing.html#parse">parse error</a>.
+
+ </p></li><li>
+ <p>Pop all the nodes from the <a href="#current4">current
+ node</a> up to <var title="">node</var>, including <var title="">node</var>, then stop this algorithm.
+ </p></li></ol>
+
+ </li><li>
+ <p>Otherwise, if <var title="">node</var> is in neither the <a href="#formatting">formatting</a> category nor the <a href="#phrasing">phrasing</a> category, then this is a <a href="section-parsing.html#parse">parse error</a>. Stop this algorithm. The end tag
+ token is ignored.
+
+ </p></li><li>
+ <p>Set <var title="">node</var> to the previous entry in the <a href="#stack">stack of open elements</a>.
+
+ </p></li><li>
+ <p>Return to step 2.
+ </p></li></ol>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-intable">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-table" title="insertion mode: in table">in
+ table</dfn>&quot;
+
+ </dt><dd>
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;caption&quot;
+
+ </dt><dd>
+ <p><a href="#clear1">Clear the stack back to a table context</a>.
+ (See below.)</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token, then
+ switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-caption" title="insertion mode: in caption">in
+ caption</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;colgroup&quot;
+
+ </dt><dd>
+ <p><a href="#clear1">Clear the stack back to a table context</a>.
+ (See below.)</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token, then
+ switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-column" title="insertion mode: in column group">in column
+ group</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;col&quot;
+
+ </dt><dd>
+ <p>Act as if a start tag token with the tag name &quot;colgroup&quot; had been
+ seen, then reprocess the current token.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;tbody&quot;, &quot;tfoot&quot;, &quot;thead&quot;
+
+ </dt><dd>
+ <p><a href="#clear1">Clear the stack back to a table context</a>.
+ (See below.)</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token, then
+ switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table0" title="insertion mode: in table body">in table
+ body</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;td&quot;, &quot;th&quot;, &quot;tr&quot;
+
+ </dt><dd>
+ <p>Act as if a start tag token with the tag name &quot;tbody&quot; had been
+ seen, then reprocess the current token.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;table&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Act as if an end tag token with
+ the tag name &quot;table&quot; had been seen, then, if that token wasn't
+ ignored, reprocess the current token.</p>
+
+ <p class="note">The fake end tag token here can only be ignored in the
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ </dd><dt>An end tag whose tag name is &quot;table&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="section-parsing.html#parse">parse error</a>. Ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#generate">Generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not a <code><a href="section-tabular.html#table">table</a></code> element, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Pop elements from this stack until a <code><a href="section-tabular.html#table">table</a></code> element has been popped from the
+ stack.</p>
+
+ <p><a href="#reset">Reset the insertion mode appropriately</a>.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;body&quot;, &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;html&quot;, &quot;tbody&quot;, &quot;td&quot;, &quot;tfoot&quot;, &quot;th&quot;, &quot;thead&quot;, &quot;tr&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Process the token as if the <a href="#insertion0">insertion mode</a> was &quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot;, with the following
+ exception:</p>
+
+ <p>If the <a href="#current4">current node</a> is a <code><a href="section-tabular.html#table">table</a></code>, <code><a href="section-tabular.html#tbody">tbody</a></code>, <code><a href="section-tabular.html#tfoot0">tfoot</a></code>, <code><a href="section-tabular.html#thead0">thead</a></code>, or <code><a href="section-tabular.html#tr">tr</a></code> element, then, whenever a node would be
+ inserted into the <a href="#current4">current node</a>, it must
+ instead be inserted into the <em><a href="#foster">foster parent
+ element</a></em>.</p>
+
+ <p>The <dfn id="foster">foster parent element</dfn> is the parent
+ element of the last <code><a href="section-tabular.html#table">table</a></code> element
+ in the <a href="#stack">stack of open elements</a>, if there is a
+ <code><a href="section-tabular.html#table">table</a></code> element and it has such a
+ parent element. If there is no <code><a href="section-tabular.html#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a> (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code>
+ case</a>), then the <em><a href="#foster">foster parent
+ element</a></em> is the first element in the <a href="#stack">stack
+ of open elements</a> (the <code><a href="section-the-root.html#html">html</a></code>
+ element). Otherwise, if there is a <code><a href="section-tabular.html#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a>, but the last <code><a href="section-tabular.html#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a> has no parent, or its parent node is not an
+ element, then the <em><a href="#foster">foster parent
+ element</a></em> is the element before the last <code><a href="section-tabular.html#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a>.</p>
+
+ <p>If the <em><a href="#foster">foster parent element</a></em> is the
+ parent element of the last <code><a href="section-tabular.html#table">table</a></code>
+ element in the <a href="#stack">stack of open elements</a>, then the
+ new node must be inserted immediately <em>before</em> the last
+ <code><a href="section-tabular.html#table">table</a></code> element in the <a href="#stack">stack of open elements</a> in the <a href="#foster">foster parent element</a>; otherwise, the new node
+ must be <em>appended</em> to the <a href="#foster">foster parent
+ element</a>.</p>
+ </dd></dl>
+
+ <p>When the steps above require the UA to <dfn id="clear1">clear the
+ stack back to a table context</dfn>, it means that the UA must, while
+ the <a href="#current4">current node</a> is not a <code><a href="section-tabular.html#table">table</a></code> element or an <code><a href="section-the-root.html#html">html</a></code> element, pop elements from the <a href="#stack">stack of open elements</a>. If this causes any elements
+ to be popped from the stack, then this is a <a href="section-parsing.html#parse">parse
+ error</a>.</p>
+
+ <p class="note">The <a href="#current4">current node</a> being an
+ <code><a href="section-the-root.html#html">html</a></code> element after this process is an
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ </dd><dt id="parsing-main-incaption">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-caption" title="insertion mode: in caption">in
+ caption</dfn>&quot;
+
+ </dt><dd>
+ <dl class="switch">
+ <dt>An end tag whose tag name is &quot;caption&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="section-parsing.html#parse">parse error</a>. Ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#generate">Generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not a <code><a href="section-tabular.html#caption0">caption</a></code> element, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Pop elements from this stack until a <code><a href="section-tabular.html#caption0">caption</a></code> element has been popped from the
+ stack.</p>
+
+ <p><a href="#clear0">Clear the list of active formatting elements up
+ to the last marker</a>.</p>
+
+ <p>Switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table" title="insertion mode: in table">in table</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;tbody&quot;, &quot;td&quot;, &quot;tfoot&quot;, &quot;th&quot;, &quot;thead&quot;, &quot;tr&quot;
+
+ </dt><dt>An end tag whose tag name is &quot;table&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Act as if an end tag with the
+ tag name &quot;caption&quot; had been seen, then, if that token wasn't
+ ignored, reprocess the current token.</p>
+
+ <p class="note">The fake end tag token here can only be ignored in the
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;body&quot;, &quot;col&quot;, &quot;colgroup&quot;,
+ &quot;html&quot;, &quot;tbody&quot;, &quot;td&quot;, &quot;tfoot&quot;, &quot;th&quot;, &quot;thead&quot;, &quot;tr&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was &quot;<a href="#in-body" title="insertion mode: in body">in
+ body</a>&quot;.</p>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-incolgroup">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-column" title="insertion mode: in column
+ group">in column group</dfn>&quot;
+
+ </dt><dd>
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;col&quot;
+
+ </dt><dd>
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>col</code> element</a> for the token. Immediately pop the <a href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>.</p>
+
+ </dd><dt>An end tag whose tag name is &quot;colgroup&quot;
+
+ </dt><dd>
+ <p>If the <a href="#current4">current node</a> is the root <code><a href="section-the-root.html#html">html</a></code> element, then this is a <a href="section-parsing.html#parse">parse error</a>, ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, pop the <a href="#current4">current node</a> (which
+ will be a <code><a href="section-tabular.html#colgroup">colgroup</a></code> element)
+ from the <a href="#stack">stack of open elements</a>. Switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table" title="insertion mode: in table">in table</a>&quot;.</p>
+
+ </dd><dt>An end tag whose tag name is &quot;col&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>Act as if an end tag with the tag name &quot;colgroup&quot; had been seen,
+ and then, if that token wasn't ignored, reprocess the current token.</p>
+
+ <p class="note">The fake end tag token here can only be ignored in the
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-intbody">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-table0" title="insertion mode: in table body">in
+ table body</dfn>&quot;
+
+ </dt><dd>
+ <dl class="switch">
+ <dt>A start tag whose tag name is &quot;tr&quot;
+
+ </dt><dd>
+ <p><a href="#clear2">Clear the stack back to a table body
+ context</a>. (See below.)</p>
+
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>tr</code> element</a> for the token, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-row" title="insertion mode: in row">in row</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;th&quot;, &quot;td&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Act as if a start tag with the
+ tag name &quot;tr&quot; had been seen, then reprocess the current token.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;tbody&quot;, &quot;tfoot&quot;, &quot;thead&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="section-parsing.html#parse">parse error</a>. Ignore the token.</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#clear2">Clear the stack back to a table body
+ context</a>. (See below.)</p>
+
+ <p>Pop the <a href="#current4">current node</a> from the <a href="#stack">stack of open elements</a>. Switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table" title="insertion mode: in table">in table</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;tbody&quot;, &quot;tfoot&quot;, &quot;thead&quot;
+
+ </dt><dt>An end tag whose tag name is &quot;table&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have a
+ <code>tbody</code>, <code>thead</code>, or <code>tfoot</code>
+ element in table scope</a>, this is a <a href="section-parsing.html#parse">parse
+ error</a>. Ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#clear2">Clear the stack back to a table body
+ context</a>. (See below.)</p>
+
+ <p>Act as if an end tag with the same tag name as the <a href="#current4">current node</a> (&quot;tbody&quot;, &quot;tfoot&quot;, or &quot;thead&quot;) had
+ been seen, then reprocess the current token.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;body&quot;, &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;html&quot;, &quot;td&quot;, &quot;th&quot;, &quot;tr&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was &quot;<a href="#in-table" title="insertion mode: in
+ table">in table</a>&quot;.</p>
+ </dd></dl>
+
+ <p>When the steps above require the UA to <dfn id="clear2">clear the
+ stack back to a table body context</dfn>, it means that the UA must,
+ while the <a href="#current4">current node</a> is not a <code><a href="section-tabular.html#tbody">tbody</a></code>, <code><a href="section-tabular.html#tfoot0">tfoot</a></code>, <code><a href="section-tabular.html#thead0">thead</a></code>, or <code><a href="section-the-root.html#html">html</a></code> element, pop elements from the <a href="#stack">stack of open elements</a>. If this causes any elements
+ to be popped from the stack, then this is a <a href="section-parsing.html#parse">parse
+ error</a>.</p>
+
+ <p class="note">The <a href="#current4">current node</a> being an
+ <code><a href="section-the-root.html#html">html</a></code> element after this process is an
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ </dd><dt id="parsing-main-intr">If the <a href="#insertion0">insertion mode</a>
+ is &quot;<dfn id="in-row" title="insertion mode: in row">in row</dfn>&quot;
+
+ </dt><dd>
+ <dl class="switch">
+ <dt>A start tag whose tag name is one of: &quot;th&quot;, &quot;td&quot;
+
+ </dt><dd>
+ <p><a href="#clear3">Clear the stack back to a table row context</a>.
+ (See below.)</p>
+
+ <p><a href="#insert" title="insert an HTML element">Insert an HTML
+ element</a> for the token, then switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-cell" title="insertion mode: in cell">in cell</a>&quot;.</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ </dd><dt>An end tag whose tag name is &quot;tr&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="section-parsing.html#parse">parse error</a>. Ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#clear3">Clear the stack back to a table row context</a>.
+ (See below.)</p>
+
+ <p>Pop the <a href="#current4">current node</a> (which will be a
+ <code><a href="section-tabular.html#tr">tr</a></code> element) from the <a href="#stack">stack of open elements</a>. Switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-table0" title="insertion mode: in table body">in table body</a>&quot;.</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;tbody&quot;, &quot;tfoot&quot;, &quot;thead&quot;, &quot;tr&quot;
+
+ </dt><dt>An end tag whose tag name is &quot;table&quot;
+
+ </dt><dd>
+ <p>Act as if an end tag with the tag name &quot;tr&quot; had been seen, then,
+ if that token wasn't ignored, reprocess the current token.</p>
+
+ <p class="note">The fake end tag token here can only be ignored in the
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;tbody&quot;, &quot;tfoot&quot;, &quot;thead&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="section-parsing.html#parse">parse error</a>. Ignore the token.</p>
+
+ <p>Otherwise, act as if an end tag with the tag name &quot;tr&quot; had been
+ seen, then reprocess the current token.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;body&quot;, &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;html&quot;, &quot;td&quot;, &quot;th&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was &quot;<a href="#in-table" title="insertion mode: in
+ table">in table</a>&quot;.</p>
+ </dd></dl>
+
+ <p>When the steps above require the UA to <dfn id="clear3">clear the
+ stack back to a table row context</dfn>, it means that the UA must,
+ while the <a href="#current4">current node</a> is not a <code><a href="section-tabular.html#tr">tr</a></code> element or an <code><a href="section-the-root.html#html">html</a></code> element, pop elements from the <a href="#stack">stack of open elements</a>. If this causes any elements
+ to be popped from the stack, then this is a <a href="section-parsing.html#parse">parse
+ error</a>.</p>
+
+ <p class="note">The <a href="#current4">current node</a> being an
+ <code><a href="section-the-root.html#html">html</a></code> element after this process is an
+ <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ </dd><dt id="parsing-main-intd">If the <a href="#insertion0">insertion mode</a>
+ is &quot;<dfn id="in-cell" title="insertion mode: in cell">in cell</dfn>&quot;
+
+ </dt><dd>
+ <dl class="switch">
+ <dt>An end tag whose tag name is one of: &quot;td&quot;, &quot;th&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as that of the
+ token, then this is a <a href="section-parsing.html#parse">parse error</a> and the token
+ must be ignored.</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#generate">Generate implied end tags</a>, except for
+ elements with the same tag name as the token.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as the token, then this is a <a href="section-parsing.html#parse">parse error</a>.</p>
+
+ <p>Pop elements from this stack until an element with the same tag
+ name as the token has been popped from the stack.</p>
+
+ <p><a href="#clear0">Clear the list of active formatting elements up
+ to the last marker</a>.</p>
+
+ <p>Switch the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-row" title="insertion mode: in row">in row</a>&quot;. (The <a href="#current4">current node</a> will be a <code><a href="section-tabular.html#tr">tr</a></code> element at this point.)</p>
+
+ </dd><dt>A start tag whose tag name is one of: &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;tbody&quot;, &quot;td&quot;, &quot;tfoot&quot;, &quot;th&quot;, &quot;thead&quot;, &quot;tr&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does
+ <em>not</em> <a href="#have-an0" title="has an element in table
+ scope">have a <code>td</code> or <code>th</code> element in table
+ scope</a>, then this is a <a href="section-parsing.html#parse">parse error</a>; ignore
+ the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, <a href="#close2">close the cell</a> (see below) and
+ reprocess the current token.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;body&quot;, &quot;caption&quot;, &quot;col&quot;,
+ &quot;colgroup&quot;, &quot;html&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;table&quot;, &quot;tbody&quot;, &quot;tfoot&quot;,
+ &quot;thead&quot;, &quot;tr&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as that of the
+ token (which can only happen for &quot;tbody&quot;, &quot;tfoot&quot; and &quot;thead&quot;, or,
+ in the <a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>), then
+ this is a <a href="section-parsing.html#parse">parse error</a> and the token must be
+ ignored.</p>
+
+ <p>Otherwise, <a href="#close2">close the cell</a> (see below) and
+ reprocess the current token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was &quot;<a href="#in-body" title="insertion mode: in body">in
+ body</a>&quot;.</p>
+ </dd></dl>
+
+ <p>Where the steps above say to <dfn id="close2">close the cell</dfn>,
+ they mean to follow the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an0" title="has an element in table scope">has a
+ <code>td</code> element in table scope</a>, then act as if an end
+ tag token with the tag name &quot;td&quot; had been seen.
+
+ </p></li><li>
+ <p>Otherwise, the <a href="#stack">stack of open elements</a> will <a href="#have-an0" title="has an element in table scope">have a
+ <code>th</code> element in table scope</a>; act as if an end tag
+ token with the tag name &quot;th&quot; had been seen.
+ </p></li></ol>
+
+ <p class="note">The <a href="#stack">stack of open elements</a> cannot
+ have both a <code><a href="section-tabular.html#td">td</a></code> and a <code><a href="section-tabular.html#th">th</a></code> element <a href="#have-an0" title="has an
+ element in table scope">in table scope</a> at the same time, nor can
+ it have neither when the <a href="#insertion0">insertion mode</a> is
+ &quot;<a href="#in-cell" title="insertion mode: in cell">in cell</a>&quot;.</p>
+
+ </dd><dt id="parsing-main-inselect">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-select" title="insertion mode: in select">in
+ select</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class="switch">
+ <dt>A character token
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the token's
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag token whose tag name is &quot;option&quot;
+
+ </dt><dd>
+ <p>If the <a href="#current4">current node</a> is an
+ <code>option</code> element, act as if an end tag with the tag name
+ &quot;option&quot; had been seen.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ </dd><dt>A start tag token whose tag name is &quot;optgroup&quot;
+
+ </dt><dd>
+ <p>If the <a href="#current4">current node</a> is an
+ <code>option</code> element, act as if an end tag with the tag name
+ &quot;option&quot; had been seen.</p>
+
+ <p>If the <a href="#current4">current node</a> is an
+ <code>optgroup</code> element, act as if an end tag with the tag
+ name &quot;optgroup&quot; had been seen.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ </dd><dt>An end tag token whose tag name is &quot;optgroup&quot;
+
+ </dt><dd>
+ <p>First, if the <a href="#current4">current node</a> is an
+ <code>option</code> element, and the node immediately before it in
+ the <a href="#stack">stack of open elements</a> is an
+ <code>optgroup</code> element, then act as if an end tag with the
+ tag name &quot;option&quot; had been seen.</p>
+
+ <p>If the <a href="#current4">current node</a> is an
+ <code>optgroup</code> element, then pop that node from the <a href="#stack">stack of open elements</a>. Otherwise, this is a <a href="section-parsing.html#parse">parse error</a>, ignore the token.</p>
+
+ </dd><dt>An end tag token whose tag name is &quot;option&quot;
+
+ </dt><dd>
+ <p>If the <a href="#current4">current node</a> is an
+ <code>option</code> element, then pop that node from the <a href="#stack">stack of open elements</a>. Otherwise, this is a <a href="section-parsing.html#parse">parse error</a>, ignore the token.</p>
+
+ </dd><dt>An end tag whose tag name is &quot;select&quot;
+
+ </dt><dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="section-parsing.html#parse">parse error</a>. Ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p>Pop elements from the <a href="#stack">stack of open elements</a>
+ until a <code>select</code> element has been popped from the stack.</p>
+
+ <p><a href="#reset">Reset the insertion mode appropriately</a>.</p>
+
+ </dd><dt>A start tag whose tag name is &quot;select&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Act as if the token had been an
+ end tag with the tag name &quot;select&quot; instead.</p>
+
+ </dd><dt>An end tag whose tag name is one of: &quot;caption&quot;, &quot;table&quot;, &quot;tbody&quot;,
+ &quot;tfoot&quot;, &quot;thead&quot;, &quot;tr&quot;, &quot;td&quot;, &quot;th&quot;
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a href="#have-an0">has an element in table scope</a> with the same tag
+ name as that of the token, then act as if an end tag with the tag
+ name &quot;select&quot; had been seen, and reprocess the token. Otherwise,
+ ignore the token.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-afterbody">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="after2" title="insertion mode: after body">after
+ body</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p>Process the token as it would be processed if the <a href="#insertion0">insertion mode</a> was &quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot;.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the first element in the <a href="#stack">stack of open elements</a> (the <code><a href="section-the-root.html#html">html</a></code> element), with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>An end tag with the tag name &quot;html&quot;
+
+ </dt><dd>
+ <p>If the parser was originally created in order to handle the
+ setting of <em>an element</em>'s <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code> attribute, this is a <a href="section-parsing.html#parse">parse error</a>; ignore the token. (The element will
+ be an <code><a href="section-the-root.html#html">html</a></code> element in this case.)
+ (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, switch to <a href="#the-trailing0">the trailing end
+ phase</a>.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Set the <a href="#insertion0">insertion mode</a> to &quot;<a href="#in-body" title="insertion mode: in body">in body</a>&quot; and reprocess the
+ token.</p>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-inframeset">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="in-frameset" title="insertion mode: in frameset">in
+ frameset</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>A start tag with the tag name &quot;frameset&quot;
+
+ </dt><dd>
+ <p><a href="#insert" title="Insert an HTML element">Insert a
+ <code>frameset</code> element</a> for the token.</p>
+
+ </dd><dt>An end tag with the tag name &quot;frameset&quot;
+
+ </dt><dd>
+ <p>If the <a href="#current4">current node</a> is the root <code><a href="section-the-root.html#html">html</a></code> element, then this is a <a href="section-parsing.html#parse">parse error</a>; ignore the token. (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, pop the <a href="#current4">current node</a> from the
+ <a href="#stack">stack of open elements</a>.</p>
+
+ <p>If the parser was <em>not</em> originally created in order to
+ handle the setting of an element's <code title="dom-innerHTML-HTML"><a href="section-dynamic.html#innerhtml0">innerHTML</a></code> attribute (<a href="section-dynamic.html#innerhtml1"><code>innerHTML</code> case</a>), and the <a href="#current4">current node</a> is no longer a
+ <code>frameset</code> element, then change the <a href="#insertion0">insertion mode</a> to &quot;<a href="#after3" title="insertion mode: after frameset">after frameset</a>&quot;.</p>
+
+ </dd><dt>A start tag with the tag name &quot;frame&quot;
+
+ </dt><dd>
+ <p><a href="#insert">Insert an HTML element</a> for the token.
+ Immediately pop the <a href="#current4">current node</a> off the <a href="#stack">stack of open elements</a>.</p>
+
+ </dd><dt>A start tag with the tag name &quot;noframes&quot;
+
+ </dt><dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> had been &quot;<a href="#in-body" title="insertion mode: in
+ body">in body</a>&quot;.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+ </dd></dl>
+
+ </dd><dt id="parsing-main-afterframeset">If the <a href="#insertion0">insertion
+ mode</a> is &quot;<dfn id="after3" title="insertion mode: after
+ frameset">after frameset</dfn>&quot;
+
+ </dt><dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class="switch">
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <a href="#current4">current node</a> with the <code title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ </dd><dt>An end tag with the tag name &quot;html&quot;
+
+ </dt><dd>
+ <p>Switch to <a href="#the-trailing0">the trailing end phase</a>.</p>
+
+ </dd><dt>A start tag with the tag name &quot;noframes&quot;
+
+ </dt><dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> had been &quot;<a href="#in-body" title="insertion mode: in
+ body">in body</a>&quot;.</p>
+
+ </dd><dt>Anything else
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+ </dd></dl>
+ </dd></dl>
+ </dd></dl>
+
+ <p class="big-issue">This doesn't handle UAs that don't support frames, or
+ that do support frames but want to show the NOFRAMES content. Supporting
+ the former is easy; supporting the latter is harder.
+
+ </p><h5 id="the-trailing"><span class="secno">8.2.4.4. </span><dfn id="the-trailing0">The trailing end phase</dfn></h5>
+
+ <p>After <a href="#the-main0">the main phase</a>, as each token is emitted
+ from the <a href="section-tokenisation.html#tokenisation0">tokenisation</a> stage, it must be
+ processed as described in this section.
+
+ </p><dl class="switch">
+ <dt>A DOCTYPE token
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Ignore the token.</p>
+
+ </dd><dt>A comment token
+
+ </dt><dd>
+ <p>Append a <code>Comment</code> node to the <code>Document</code> object
+ with the <code title="">data</code> attribute set to the data given in
+ the comment token.</p>
+
+ </dd><dt>A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dd>
+ <p>Process the token as it would be processed in <a href="#the-main0">the
+ main phase</a>.</p>
+
+ </dd><dt>A character token that is <em>not</em> one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ </dt><dt>A start tag token
+
+ </dt><dt>An end tag token
+
+ </dt><dd>
+ <p><a href="section-parsing.html#parse">Parse error</a>. Switch back to <a href="#the-main0">the main phase</a> and reprocess the token.</p>
+
+ </dd><dt>An end-of-file token
+
+ </dt><dd>
+ <p><a href="#stops">Stop parsing</a>.</p>
+ </dd></dl>
+
+ <h4 id="the-end"><span class="secno">8.2.5. </span>The End</h4>
+
+ <p>Once the user agent <dfn id="stops" title="stop parsing">stops
+ parsing</dfn> the document, the user agent must follow the steps in this
+ section.
+
+ </p><p>First, <!--the user agent must <span title="fire a DOMContentLoaded
+ event">fire a <code
+ title="event-DOMContentLoaded">DOMContentLoaded</code> event</span>
+ at <span>the <code>body</code> element</span>.</p>
+
+ <p>Then, -->the
+ rules for <a href="section-scripting0.html#when-a">when a script completes loading</a> start
+ applying (script execution is no longer managed by the parser).
+
+ </p><p>If any of the scripts in the <a href="section-scripting0.html#list-of1">list of scripts that
+ will execute as soon as possible</a> have <span>completed
+ loading</span><!-- XXX xref -->, or if the <a href="section-scripting0.html#list-of0">list of
+ scripts that will execute asynchronously</a> is not empty and the first
+ script in that list has <span>completed loading</span><!-- XXX xref
+ -->,
+ then the user agent must act as if those scripts just completed loading,
+ following the rules given for that in the <code><a href="section-scripting0.html#script0">script</a></code> element definition.
+
+ </p><p>Then, if the <a href="section-scripting0.html#list-of">list of scripts that will execute when
+ the document has finished parsing</a> is not empty, and the first item in
+ this list has already <span>completed loading</span><!--XXX
+ xref -->,
+ then the user agent must act as if that script just finished loading.
+
+ </p><p>By this point, there will be no scripts that have loaded but have not
+ yet been executed.
+
+ </p><p>The user agent must then <a href="section-scripting.html#firing2">fire a simple event</a>
+ called <code title="event-DOMContentLoaded">DOMContentLoaded</code> at the
+ <code>Document</code>.
+
+ </p><p>Once everything that <dfn id="delays" title="delay the load event">delays
+ the load event</dfn> has completed, the user agent must <a href="section-scripting.html#firing4" title="fire a load event">fire a <code title="event-load">load</code>
+ event</a> at <a href="section-dom-tree.html#the-body0">the <code>body</code> element</a>.</p>
+ <!-- XXX make sure things "delay the load event" -->
+ <!--XXX need to handle
+http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#2354
+2354 // Don't open transient styles if it makes the stack deep, bug 58917.
+-->
+ <!--XXX
+http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/nsHTMLTokenizer.cpp#749
+-->
+ <!--
+see also CTextToken::ConsumeCharacterData() for CDATA parsing?
+
+1212 1 Here's a tricky case from bug 22596: <h5><li><h5>
+1213 How do we know that the 2nd <h5> should close the <LI> rather than nest inside the <LI>?
+1214 (Afterall, the <h5> is a legal child of the <LI>).
+1215
+1216 The way you know is that there is no root between the two, so the <h5> binds more
+1217 tightly to the 1st <h5> than to the <LI>.
+1218 2. Also, bug 6148 shows this case: <SPAN><DIV><SPAN>
+1219 From this case we learned not to execute this logic if the parent is a block.
+1220
+1221 3. Fix for 26583
+1222 Ex. <A href=foo.html><B>foo<A href-bar.html>bar</A></B></A> <- A legal HTML
+1223 In the above example clicking on "foo" or "bar" should link to
+1224 foo.html or bar.html respectively. That is, the inner <A> should be informed
+1225 about the presence of an open <A> above <B>..so that the inner <A> can close out
+1226 the outer <A>. The following code does it for us.
+1227
+1228 4. Fix for 27865 [ similer to 22596 ]. Ex: <DL><DD><LI>one<DD><LI>two
+ - http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#1211
+
+815 // Here's a problem. If theTag is legal in here, we don't move it
+816 // out. So if we're moving stuff out of here, the parent of theTag
+817 // gets closed at this point. But some things are legal
+818 // _everywhere_ and hence would effectively close out misplaced
+819 // content in tables. This is undesirable, so treat them as
+820 // illegal here so they'll be shipped out with their parents and
+821 // siblings. See bug 40855 for an explanation (that bug was for
+822 // comments, but the same issues arise with whitespace, newlines,
+823 // noscript, etc). Script is special, though. Shipping it out
+824 // breaks document.write stuff. See bug 243064.
+ - http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#825
+
+
+1326 /**************************************************************************************
+1327 *
+1328 * Now a little code to deal with bug #49687 (crash when layout stack gets too deep)
+1329 * I've also opened this up to any container (not just inlines): re bug 55095
+1330 * Improved to handle bug 55980 (infinite loop caused when DEPTH is exceeded and
+1331 * </P> is encountered by itself (<P>) is continuously produced.
+1332 *
+1333 **************************************************************************************/
+
+1912 // Oh boy!! we found a "stray" tag. Nav4.x and IE introduce line break in
+1913 // such cases. So, let's simulate that effect for compatibility.
+1914 // Ex. <html><body>Hello</P>There</body></html>
+http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#1912
+
+http://lxr.mozilla.org/seamonkey/search?string=nested
+/parser/htmlparser/src/CNavDTD.cpp, line 791 - * 2. <CENTER><DL><DT><A><CENTER> allow nested <CENTER>
+/parser/htmlparser/src/CNavDTD.cpp, line 792 - * 3. <TABLE><TR><TD><TABLE>... allow nested <TABLE>
+/parser/htmlparser/src/CNavDTD.cpp, line 2562 - // Discard nested forms - bug 72639
+/parser/htmlparser/src/nsElementTable.cpp, line 1453 - * 2. <CENTER><DL><DT><A><CENTER> allow nested <CENTER>
+/parser/htmlparser/src/nsElementTable.cpp, line 1454 - * 3. <TABLE><TR><TD><TABLE>... allow nested <TABLE>
+/parser/htmlparser/src/nsElementTable.cpp, line 1901 - // Ex: <H1><LI><H1><LI>. Inner LI has the potential of getting nested
+-->
+
+ <script src="http://status.whatwg.org/annotate-web-apps.js" type="text/javascript"></script></body></html> \ No newline at end of file
diff --git a/test/data/html/web-apps.html b/test/data/html/web-apps.html
new file mode 100644
index 0000000..d685320
--- /dev/null
+++ b/test/data/html/web-apps.html
@@ -0,0 +1,41271 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
+
+<html lang=en-GB-hixie>
+ <head>
+ <title>HTML 5</title>
+ <link href="/style/specification" rel=stylesheet type="text/css">
+ <link href="/images/icon" rel=icon>
+
+ <style type="text/css">
+ h4 + .element { margin-top: -2.5em; padding-top: 2em; }
+ h4 + p + .element { margin-top: -5em; padding-top: 4em; }
+ .element { background: #EEFFEE; color: black; margin: 0 0 1em -1em; padding: 0 1em 0.25em 0.75em; border-left: solid #99FF99 0.25em; -padding: 0; /* that last decl is for IE6. Try removing it, it's hilarious! */ }
+ .proposal { border: blue solid; padding: 1em; }
+ table.matrix, table.matrix td { border: none; text-align: right; }
+ table.matrix { margin-left: 2em; }
+ </style>
+
+ <body class=draft>
+ <div class=head>
+ <p><a class=logo href="http://www.whatwg.org/" rel=home><img alt=WHATWG
+ src="/images/logo"></a></p>
+
+ <h1 id=html-5>HTML 5</h1>
+
+ <h2 class="no-num no-toc" id=working>Working Draft &mdash; 14 June 2007</h2>
+
+ <p>You can take part in this work. <a
+ href="http://www.whatwg.org/mailing-list">Join the working group's
+ discussion list.</a></p>
+
+ <p><strong>Web designers!</strong> We have a <a
+ href="http://blog.whatwg.org/faq/">FAQ</a>, a <a
+ href="http://forums.whatwg.org/">forum</a>, and a <a
+ href="http://www.whatwg.org/mailing-list#help">help mailing list</a> for
+ you!</p>
+
+ <dl>
+ <dt>One-page version:
+
+ <dd><a
+ href="http://www.whatwg.org/specs/web-apps/current-work/">http://www.whatwg.org/specs/web-apps/current-work/</a>
+
+ <dt>Multiple-page version:
+
+ <dd><a
+ href="http://www.whatwg.org/specs/web-apps/current-work/multipage/">http://www.whatwg.org/specs/web-apps/current-work/multipage/</a>
+
+ <dt>Version history:
+
+ <dd>Twitter messages (non-editorial changes only): <a
+ href="http://twitter.com/WHATWG">http://twitter.com/WHATWG</a>
+
+ <dd>Commit-Watchers mailing list: <a
+ href="http://lists.whatwg.org/listinfo.cgi/commit-watchers-whatwg.org">http://lists.whatwg.org/listinfo.cgi/commit-watchers-whatwg.org</a>
+
+ <dd>Interactive Web interface: <a
+ href="http://html5.org/tools/web-apps-tracker">http://html5.org/tools/web-apps-tracker</a>
+
+ <dd>Subversion interface: <a
+ href="http://svn.whatwg.org/">http://svn.whatwg.org/</a>
+
+ <dt>Editor:
+
+ <dd>Ian Hickson, Google, ian@hixie.ch
+ </dl>
+
+ <p class=copyright>&copy; Copyright 2004-2007 Apple Computer, Inc.,
+ Mozilla Foundation, and Opera Software ASA.</p>
+
+ <p class=copyright>You are granted a license to use, reproduce and create
+ derivative works of this document.</p>
+ </div>
+
+ <hr>
+
+ <h2 class="no-num no-toc" id=abstract>Abstract</h2>
+
+ <p>This specification introduces features to HTML and the DOM that ease the
+ authoring of Web-based applications. Additions include the context menus,
+ a direct-mode graphics canvas, inline popup windows, and server-sent
+ events.
+
+ <h2 class="no-num no-toc" id=status>Status of this document</h2>
+
+ <p><strong>This is a work in progress!</strong> This document is changing
+ on a daily if not hourly basis in response to comments and as a general
+ part of its development process. Comments are very welcome, please send
+ them to <a href="mailto:whatwg@whatwg.org">whatwg@whatwg.org</a>. Thank
+ you.
+
+ <p>Implementors should be aware that this specification is not stable.
+ <strong>Implementors who are not taking part in the discussions are likely
+ to find the specification changing out from under them in incompatible
+ ways.</strong> Vendors interested in implementing this specification
+ before it eventually reaches the call for implementations should join the
+ <a href="/mailing-list">WHATWG mailing list</a> and take part in the
+ discussions.
+
+ <p>This specification is also being produced by the <a
+ href="http://www.w3.org/html/wg">W3C HTML WG</a>. The two specifications
+ are identical from the table of contents onwards.
+
+ <p>This specification is intended to replace (be the new version of) what
+ was previously the HTML4, XHTML 1.x, and DOM2 HTML specifications.
+
+ <h3 class="no-num no-toc" id=stability0>Stability</h3>
+
+ <p>Different parts of this specification are at different levels of
+ maturity.
+
+ <div id=stability></div>
+
+ <p class=big-issue>Known issues are usually marked like this. There are
+ some spec-wide issues that have not yet been addressed: case-sensitivity
+ is a very poorly handled topic right now, and the firing of events needs
+ to be unified (right now some bubble, some don't, they all use different
+ text to fire events, etc).
+
+ <h2 class="no-num no-toc" id=contents>Table of contents</h2>
+ <!--begin-toc-->
+
+ <ul class=toc>
+ <li><a href="#introduction"><span class=secno>1. </span>Introduction</a>
+ <ul class=toc>
+ <li><a href="#scope"><span class=secno>1.1. </span>Scope</a>
+ <ul class=toc>
+ <li><a href="#relationship"><span class=secno>1.1.1.
+ </span>Relationship to HTML 4.01, XHTML 1.1, DOM2 HTML</a>
+
+ <li><a href="#relationship0"><span class=secno>1.1.2.
+ </span>Relationship to XHTML2</a>
+
+ <li><a href="#relationship1"><span class=secno>1.1.3.
+ </span>Relationship to XUL, Flash, Silverlight, and other proprietary
+ UI languages</a>
+ </ul>
+
+ <li><a href="#structure"><span class=secno>1.2. </span>Structure of this
+ specification</a>
+ <ul class=toc>
+ <li><a href="#how-to"><span class=secno>1.2.1. </span>How to read this
+ specification</a>
+ </ul>
+
+ <li><a href="#conformance"><span class=secno>1.3. </span>Conformance
+ requirements</a>
+ <ul class=toc>
+ <li><a href="#common"><span class=secno>1.3.1. </span>Common
+ conformance requirements for APIs exposed to JavaScript</a>
+
+ <li><a href="#dependencies"><span class=secno>1.3.2.
+ </span>Dependencies</a>
+
+ <li><a href="#features"><span class=secno>1.3.3. </span>Features
+ defined in other specifications</a>
+ </ul>
+
+ <li><a href="#terminology"><span class=secno>1.4. </span>Terminology</a>
+
+ <ul class=toc>
+ <li><a href="#html-vs"><span class=secno>1.4.1. </span>HTML vs
+ XHTML</a>
+ </ul>
+ </ul>
+
+ <li><a href="#dom"><span class=secno>2. </span>The Document Object
+ Model</a>
+ <ul class=toc>
+ <li><a href="#documents"><span class=secno>2.1. </span>Documents</a>
+ <ul class=toc>
+ <li><a href="#security"><span class=secno>2.1.1. </span>Security</a>
+
+ <li><a href="#resource"><span class=secno>2.1.2. </span>Resource
+ metadata management</a>
+ </ul>
+
+ <li><a href="#elements"><span class=secno>2.2. </span>Elements</a>
+ <ul class=toc>
+ <li><a href="#reflecting"><span class=secno>2.2.1. </span>Reflecting
+ content attributes in DOM attributes</a>
+ </ul>
+
+ <li><a href="#common0"><span class=secno>2.3. </span>Common DOM
+ interfaces</a>
+ <ul class=toc>
+ <li><a href="#collections"><span class=secno>2.3.1.
+ </span>Collections</a>
+ <ul class=toc>
+ <li><a href="#htmlcollection"><span class=secno>2.3.1.1.
+ </span>HTMLCollection</a>
+
+ <li><a href="#htmlformcontrolscollection"><span class=secno>2.3.1.2.
+ </span>HTMLFormControlsCollection</a>
+
+ <li><a href="#htmloptionscollection"><span class=secno>2.3.1.3.
+ </span>HTMLOptionsCollection</a>
+ </ul>
+
+ <li><a href="#domtokenlist"><span class=secno>2.3.2.
+ </span>DOMTokenList</a>
+
+ <li><a href="#dom-feature"><span class=secno>2.3.3. </span>DOM feature
+ strings</a>
+ </ul>
+
+ <li><a href="#dom-tree"><span class=secno>2.4. </span>DOM tree
+ accessors</a>
+
+ <li><a href="#dynamic"><span class=secno>2.5. </span>Dynamic markup
+ insertion</a>
+ <ul class=toc>
+ <li><a href="#controlling"><span class=secno>2.5.1. </span>Controlling
+ the input stream</a>
+
+ <li><a href="#dynamic0"><span class=secno>2.5.2. </span>Dynamic markup
+ insertion in HTML</a>
+
+ <li><a href="#dynamic1"><span class=secno>2.5.3. </span>Dynamic markup
+ insertion in XML</a>
+ </ul>
+
+ <li><a href="#apis-in"><span class=secno>2.6. </span>APIs in HTML
+ documents</a>
+ </ul>
+
+ <li><a href="#semantics"><span class=secno>3. </span>Semantics and
+ structure of HTML elements</a>
+ <ul class=toc>
+ <li><a href="#semantics-intro"><span class=secno>3.1.
+ </span>Introduction</a>
+
+ <li><a href="#common1"><span class=secno>3.2. </span>Common
+ microsyntaxes</a>
+ <ul class=toc>
+ <li><a href="#common2"><span class=secno>3.2.1. </span>Common parser
+ idioms</a>
+
+ <li><a href="#boolean"><span class=secno>3.2.2. </span>Boolean
+ attributes</a>
+
+ <li><a href="#numbers"><span class=secno>3.2.3. </span>Numbers</a>
+ <ul class=toc>
+ <li><a href="#unsigned"><span class=secno>3.2.3.1. </span>Unsigned
+ integers</a>
+
+ <li><a href="#signed"><span class=secno>3.2.3.2. </span>Signed
+ integers</a>
+
+ <li><a href="#real-numbers"><span class=secno>3.2.3.3. </span>Real
+ numbers</a>
+
+ <li><a href="#ratios"><span class=secno>3.2.3.4. </span>Ratios</a>
+
+ <li><a href="#percentages-and-dimensions"><span class=secno>3.2.3.5.
+ </span>Percentages and dimensions</a>
+
+ <li><a href="#lists"><span class=secno>3.2.3.6. </span>Lists of
+ integers</a>
+ </ul>
+
+ <li><a href="#dates"><span class=secno>3.2.4. </span>Dates and
+ times</a>
+ <ul class=toc>
+ <li><a href="#specific"><span class=secno>3.2.4.1. </span>Specific
+ moments in time</a>
+
+ <li><a href="#vaguer"><span class=secno>3.2.4.2. </span>Vaguer
+ moments in time</a>
+ </ul>
+
+ <li><a href="#time-offsets"><span class=secno>3.2.5. </span>Time
+ offsets</a>
+
+ <li><a href="#tokens"><span class=secno>3.2.6. </span>Tokens</a>
+
+ <li><a href="#keywords"><span class=secno>3.2.7. </span>Keywords and
+ enumerated attributes</a>
+
+ <li><a href="#syntax-references"><span class=secno>3.2.8.
+ </span>References</a>
+ </ul>
+
+ <li><a href="#documents0"><span class=secno>3.3. </span>Documents and
+ document fragments</a>
+ <ul class=toc>
+ <li><a href="#semantics0"><span class=secno>3.3.1.
+ </span>Semantics</a>
+
+ <li><a href="#structure0"><span class=secno>3.3.2.
+ </span>Structure</a>
+
+ <li><a href="#kinds"><span class=secno>3.3.3. </span>Kinds of
+ elements</a>
+ <ul class=toc>
+ <li><a href="#block-level"><span class=secno>3.3.3.1.
+ </span>Block-level elements</a>
+
+ <li><a href="#inline-level"><span class=secno>3.3.3.2.
+ </span>Inline-level content</a>
+
+ <li><a href="#transparent"><span class=secno>3.3.3.3.
+ </span>Transparent content models</a>
+
+ <li><a href="#determining"><span class=secno>3.3.3.4.
+ </span>Determining if a particular element contains block-level
+ elements or inline-level content</a>
+
+ <li><a href="#interactive0"><span class=secno>3.3.3.5.
+ </span>Interactive elements</a>
+
+ <li><a href="#paragraphs"><span class=secno>3.3.3.6.
+ </span>Paragraphs</a>
+ </ul>
+ </ul>
+
+ <li><a href="#global"><span class=secno>3.4. </span>Global
+ attributes</a>
+ <ul class=toc>
+ <li><a href="#the-id"><span class=secno>3.4.1. </span>The
+ <code>id</code> attribute</a>
+
+ <li><a href="#the-title"><span class=secno>3.4.2. </span>The
+ <code>title</code> attribute</a>
+
+ <li><a href="#the-lang"><span class=secno>3.4.3. </span>The
+ <code>lang</code> (HTML only) and <code>xml:lang</code> (XML only)
+ attributes</a>
+
+ <li><a href="#the-dir"><span class=secno>3.4.4. </span>The
+ <code>dir</code> attribute</a>
+
+ <li><a href="#classes"><span class=secno>3.4.5. </span>The
+ <code>class</code> attribute</a>
+
+ <li><a href="#the-irrelevant"><span class=secno>3.4.6. </span>The
+ <code>irrelevant</code> attribute</a>
+ </ul>
+
+ <li><a href="#interaction"><span class=secno>3.5. </span>Interaction</a>
+
+ <ul class=toc>
+ <li><a href="#activation"><span class=secno>3.5.1.
+ </span>Activation</a>
+
+ <li><a href="#focus"><span class=secno>3.5.2. </span>Focus</a>
+ <ul class=toc>
+ <li><a href="#focus-management"><span class=secno>3.5.2.1.
+ </span>Focus management</a>
+
+ <li><a href="#sequential"><span class=secno>3.5.2.2.
+ </span>Sequential focus navigation</a>
+ </ul>
+
+ <li><a href="#scrolling"><span class=secno>3.5.3. </span>Scrolling
+ elements into view</a>
+ </ul>
+
+ <li><a href="#the-root"><span class=secno>3.6. </span>The root
+ element</a>
+ <ul class=toc>
+ <li><a href="#the-html"><span class=secno>3.6.1. </span>The
+ <code>html</code> element</a>
+ </ul>
+
+ <li><a href="#document"><span class=secno>3.7. </span>Document
+ metadata</a>
+ <ul class=toc>
+ <li><a href="#the-head"><span class=secno>3.7.1. </span>The
+ <code>head</code> element</a>
+
+ <li><a href="#the-title0"><span class=secno>3.7.2. </span>The
+ <code>title</code> element</a>
+
+ <li><a href="#the-base"><span class=secno>3.7.3. </span>The
+ <code>base</code> element</a>
+
+ <li><a href="#the-link"><span class=secno>3.7.4. </span>The
+ <code>link</code> element</a>
+
+ <li><a href="#meta"><span class=secno>3.7.5. </span>The
+ <code>meta</code> element</a>
+ <ul class=toc>
+ <li><a href="#standard"><span class=secno>3.7.5.1. </span>Standard
+ metadata names</a>
+
+ <li><a href="#other"><span class=secno>3.7.5.2. </span>Other
+ metadata names</a>
+
+ <li><a href="#pragma"><span class=secno>3.7.5.3. </span>Pragma
+ directives</a>
+
+ <li><a href="#charset"><span class=secno>3.7.5.4. </span>Specifying
+ and establishing the document's character encoding</a>
+ </ul>
+
+ <li><a href="#the-style"><span class=secno>3.7.6. </span>The
+ <code>style</code> element</a>
+
+ <li><a href="#styling"><span class=secno>3.7.7. </span>Styling</a>
+ </ul>
+
+ <li><a href="#sections"><span class=secno>3.8. </span>Sections</a>
+ <ul class=toc>
+ <li><a href="#the-body"><span class=secno>3.8.1. </span>The
+ <code>body</code> element</a>
+
+ <li><a href="#the-section"><span class=secno>3.8.2. </span>The
+ <code>section</code> element</a>
+
+ <li><a href="#the-nav"><span class=secno>3.8.3. </span>The
+ <code>nav</code> element</a>
+
+ <li><a href="#the-article"><span class=secno>3.8.4. </span>The
+ <code>article</code> element</a>
+
+ <li><a href="#the-blockquote"><span class=secno>3.8.5. </span>The
+ <code>blockquote</code> element</a>
+
+ <li><a href="#the-aside"><span class=secno>3.8.6. </span>The
+ <code>aside</code> element</a>
+
+ <li><a href="#the-h1"><span class=secno>3.8.7. </span>The
+ <code>h1</code>, <code>h2</code>, <code>h3</code>, <code>h4</code>,
+ <code>h5</code>, and <code>h6</code> elements</a>
+
+ <li><a href="#the-header"><span class=secno>3.8.8. </span>The
+ <code>header</code> element</a>
+
+ <li><a href="#the-footer"><span class=secno>3.8.9. </span>The
+ <code>footer</code> element</a>
+
+ <li><a href="#the-address"><span class=secno>3.8.10. </span>The
+ <code>address</code> element</a>
+
+ <li><a href="#headings"><span class=secno>3.8.11. </span>Headings and
+ sections</a>
+ <ul class=toc>
+ <li><a href="#outlines"><span class=secno>3.8.11.1. </span>Creating
+ an outline</a>
+
+ <li><a href="#associatedSection"><span class=secno>3.8.11.2.
+ </span>Determining which heading and section applies to a
+ particular node</a>
+
+ <li><a href="#distinguishing"><span class=secno>3.8.11.3.
+ </span>Distinguishing site-wide headers from page headers</a>
+ </ul>
+ </ul>
+
+ <li><a href="#prose"><span class=secno>3.9. </span>Prose</a>
+ <ul class=toc>
+ <li><a href="#the-p"><span class=secno>3.9.1. </span>The
+ <code>p</code> element</a>
+
+ <li><a href="#the-hr"><span class=secno>3.9.2. </span>The
+ <code>hr</code> element</a>
+
+ <li><a href="#the-br"><span class=secno>3.9.3. </span>The
+ <code>br</code> element</a>
+
+ <li><a href="#the-dialog"><span class=secno>3.9.4. </span>The
+ <code>dialog</code> element</a>
+ </ul>
+
+ <li><a href="#preformatted"><span class=secno>3.10. </span>Preformatted
+ text</a>
+ <ul class=toc>
+ <li><a href="#the-pre"><span class=secno>3.10.1. </span>The
+ <code>pre</code> element</a>
+ </ul>
+
+ <li><a href="#lists0"><span class=secno>3.11. </span>Lists</a>
+ <ul class=toc>
+ <li><a href="#the-ol"><span class=secno>3.11.1. </span>The
+ <code>ol</code> element</a>
+
+ <li><a href="#the-ul"><span class=secno>3.11.2. </span>The
+ <code>ul</code> element</a>
+
+ <li><a href="#the-li"><span class=secno>3.11.3. </span>The
+ <code>li</code> element</a>
+
+ <li><a href="#the-dl"><span class=secno>3.11.4. </span>The
+ <code>dl</code> element</a>
+
+ <li><a href="#the-dt"><span class=secno>3.11.5. </span>The
+ <code>dt</code> element</a>
+
+ <li><a href="#the-dd"><span class=secno>3.11.6. </span>The
+ <code>dd</code> element</a>
+ </ul>
+
+ <li><a href="#phrase"><span class=secno>3.12. </span>Phrase elements</a>
+
+ <ul class=toc>
+ <li><a href="#the-a"><span class=secno>3.12.1. </span>The
+ <code>a</code> element</a>
+
+ <li><a href="#the-q"><span class=secno>3.12.2. </span>The
+ <code>q</code> element</a>
+
+ <li><a href="#the-cite"><span class=secno>3.12.3. </span>The
+ <code>cite</code> element</a>
+
+ <li><a href="#the-em"><span class=secno>3.12.4. </span>The
+ <code>em</code> element</a>
+
+ <li><a href="#the-strong"><span class=secno>3.12.5. </span>The
+ <code>strong</code> element</a>
+
+ <li><a href="#the-small"><span class=secno>3.12.6. </span>The
+ <code>small</code> element</a>
+
+ <li><a href="#the-m"><span class=secno>3.12.7. </span>The
+ <code>m</code> element</a>
+
+ <li><a href="#the-dfn"><span class=secno>3.12.8. </span>The
+ <code>dfn</code> element</a>
+
+ <li><a href="#the-abbr"><span class=secno>3.12.9. </span>The
+ <code>abbr</code> element</a>
+
+ <li><a href="#the-time"><span class=secno>3.12.10. </span>The
+ <code>time</code> element</a>
+
+ <li><a href="#the-meter"><span class=secno>3.12.11. </span>The
+ <code>meter</code> element</a>
+
+ <li><a href="#the-progress"><span class=secno>3.12.12. </span>The
+ <code>progress</code> element</a>
+
+ <li><a href="#the-code"><span class=secno>3.12.13. </span>The
+ <code>code</code> element</a>
+
+ <li><a href="#the-var"><span class=secno>3.12.14. </span>The
+ <code>var</code> element</a>
+
+ <li><a href="#the-samp"><span class=secno>3.12.15. </span>The
+ <code>samp</code> element</a>
+
+ <li><a href="#the-kbd"><span class=secno>3.12.16. </span>The
+ <code>kbd</code> element</a>
+
+ <li><a href="#the-sup"><span class=secno>3.12.17. </span>The
+ <code>sup</code> and <code>sub</code> elements</a>
+
+ <li><a href="#the-span"><span class=secno>3.12.18. </span>The
+ <code>span</code> element</a>
+
+ <li><a href="#the-i"><span class=secno>3.12.19. </span>The
+ <code>i</code> element</a>
+
+ <li><a href="#the-b"><span class=secno>3.12.20. </span>The
+ <code>b</code> element</a>
+
+ <li><a href="#the-bdo"><span class=secno>3.12.21. </span>The
+ <code>bdo</code> element</a>
+ </ul>
+
+ <li><a href="#edits"><span class=secno>3.13. </span>Edits</a>
+ <ul class=toc>
+ <li><a href="#the-ins"><span class=secno>3.13.1. </span>The
+ <code>ins</code> element</a>
+
+ <li><a href="#the-del"><span class=secno>3.13.2. </span>The
+ <code>del</code> element</a>
+
+ <li><a href="#attributes"><span class=secno>3.13.3. </span>Attributes
+ common to <code>ins</code> and <code>del</code> elements</a>
+ </ul>
+
+ <li><a href="#embedded"><span class=secno>3.14. </span>Embedded
+ content</a>
+ <ul class=toc>
+ <li><a href="#the-figure"><span class=secno>3.14.1. </span>The
+ <code>figure</code> element</a>
+
+ <li><a href="#the-img"><span class=secno>3.14.2. </span>The
+ <code>img</code> element</a>
+
+ <li><a href="#the-iframe"><span class=secno>3.14.3. </span>The
+ <code>iframe</code> element</a>
+
+ <li><a href="#the-embed"><span class=secno>3.14.4. </span>The
+ <code>embed</code> element</a>
+
+ <li><a href="#the-object"><span class=secno>3.14.5. </span>The
+ <code>object</code> element</a>
+
+ <li><a href="#the-param"><span class=secno>3.14.6. </span>The
+ <code>param</code> element</a>
+
+ <li><a href="#video"><span class=secno>3.14.7. </span>The
+ <code>video</code> element</a>
+ <ul class=toc>
+ <li><a href="#video0"><span class=secno>3.14.7.1. </span>Video and
+ audio codecs for <code>video</code> elements</a>
+ </ul>
+
+ <li><a href="#audio"><span class=secno>3.14.8. </span>The
+ <code>audio</code> element</a>
+ <ul class=toc>
+ <li><a href="#audio0"><span class=secno>3.14.8.1. </span>Audio
+ codecs for <code>audio</code> elements</a>
+ </ul>
+
+ <li><a href="#media"><span class=secno>3.14.9. </span>Media
+ elements</a>
+ <ul class=toc>
+ <li><a href="#error"><span class=secno>3.14.9.1. </span>Error
+ codes</a>
+
+ <li><a href="#location"><span class=secno>3.14.9.2. </span>Location
+ of the media resource</a>
+
+ <li><a href="#network0"><span class=secno>3.14.9.3. </span>Network
+ states</a>
+
+ <li><a href="#loading"><span class=secno>3.14.9.4. </span>Loading
+ the media resource</a>
+
+ <li><a href="#offsets"><span class=secno>3.14.9.5. </span>Offsets
+ into the media resource</a>
+
+ <li><a href="#the-ready"><span class=secno>3.14.9.6. </span>The
+ ready states</a>
+
+ <li><a href="#playing"><span class=secno>3.14.9.7. </span>Playing
+ the media resource</a>
+
+ <li><a href="#seeking"><span class=secno>3.14.9.8.
+ </span>Seeking</a>
+
+ <li><a href="#cue-points"><span class=secno>3.14.9.9. </span>Cue
+ points</a>
+
+ <li><a href="#user-interface"><span class=secno>3.14.9.10.
+ </span>User interface</a>
+
+ <li><a href="#time-range"><span class=secno>3.14.9.11. </span>Time
+ range</a>
+
+ <li><a href="#mediaevents"><span class=secno>3.14.9.12. </span>Event
+ summary</a>
+
+ <li><a href="#security0"><span class=secno>3.14.9.13.
+ </span>Security and privacy considerations</a>
+ </ul>
+
+ <li><a href="#the-source"><span class=secno>3.14.10. </span>The
+ <code>source</code> element</a>
+
+ <li><a href="#the-canvas"><span class=secno>3.14.11. </span>The
+ <code>canvas</code> element</a>
+ <ul class=toc>
+ <li><a href="#the-2d"><span class=secno>3.14.11.1. </span>The 2D
+ context</a>
+ <ul class=toc>
+ <li><a href="#the-canvas0"><span class=secno>3.14.11.1.1.
+ </span>The canvas state</a>
+
+ <li><a href="#transformations"><span class=secno>3.14.11.1.2.
+ </span>Transformations</a>
+
+ <li><a href="#compositing"><span class=secno>3.14.11.1.3.
+ </span>Compositing</a>
+
+ <li><a href="#colors"><span class=secno>3.14.11.1.4. </span>Colors
+ and styles</a>
+
+ <li><a href="#line-styles"><span class=secno>3.14.11.1.5.
+ </span>Line styles</a>
+
+ <li><a href="#shadows"><span class=secno>3.14.11.1.6.
+ </span>Shadows</a>
+
+ <li><a href="#simple"><span class=secno>3.14.11.1.7. </span>Simple
+ shapes (rectangles)</a>
+
+ <li><a href="#complex"><span class=secno>3.14.11.1.8.
+ </span>Complex shapes (paths)</a>
+
+ <li><a href="#images"><span class=secno>3.14.11.1.9.
+ </span>Images</a>
+
+ <li><a href="#pixel"><span class=secno>3.14.11.1.10. </span>Pixel
+ manipulation</a>
+
+ <li><a href="#drawing"><span class=secno>3.14.11.1.11.
+ </span>Drawing model</a>
+ </ul>
+ </ul>
+
+ <li><a href="#the-map"><span class=secno>3.14.12. </span>The
+ <code>map</code> element</a>
+
+ <li><a href="#the-area"><span class=secno>3.14.13. </span>The
+ <code>area</code> element</a>
+
+ <li><a href="#image-maps"><span class=secno>3.14.14. </span>Image
+ maps</a>
+ </ul>
+
+ <li><a href="#tabular"><span class=secno>3.15. </span>Tabular data</a>
+ <ul class=toc>
+ <li><a href="#the-table"><span class=secno>3.15.1. </span>The
+ <code>table</code> element</a>
+
+ <li><a href="#the-caption"><span class=secno>3.15.2. </span>The
+ <code>caption</code> element</a>
+
+ <li><a href="#the-colgroup"><span class=secno>3.15.3. </span>The
+ <code>colgroup</code> element</a>
+
+ <li><a href="#the-col"><span class=secno>3.15.4. </span>The
+ <code>col</code> element</a>
+
+ <li><a href="#the-tbody"><span class=secno>3.15.5. </span>The
+ <code>tbody</code> element</a>
+
+ <li><a href="#the-thead"><span class=secno>3.15.6. </span>The
+ <code>thead</code> element</a>
+
+ <li><a href="#the-tfoot"><span class=secno>3.15.7. </span>The
+ <code>tfoot</code> element</a>
+
+ <li><a href="#the-tr"><span class=secno>3.15.8. </span>The
+ <code>tr</code> element</a>
+
+ <li><a href="#the-td"><span class=secno>3.15.9. </span>The
+ <code>td</code> element</a>
+
+ <li><a href="#the-th"><span class=secno>3.15.10. </span>The
+ <code>th</code> element</a>
+
+ <li><a href="#processing"><span class=secno>3.15.11. </span>Processing
+ model</a>
+ <ul class=toc>
+ <li><a href="#forming"><span class=secno>3.15.11.1. </span>Forming a
+ table</a>
+
+ <li><a href="#header-and-data-cell-semantics"><span
+ class=secno>3.15.11.2. </span>Forming relationships between data
+ cells and header cells</a>
+ </ul>
+ </ul>
+
+ <li><a href="#forms"><span class=secno>3.16. </span>Forms</a>
+ <ul class=toc>
+ <li><a href="#the-form"><span class=secno>3.16.1. </span>The
+ <code>form</code> element</a>
+
+ <li><a href="#the-fieldset"><span class=secno>3.16.2. </span>The
+ <code>fieldset</code> element</a>
+
+ <li><a href="#the-input"><span class=secno>3.16.3. </span>The
+ <code>input</code> element</a>
+
+ <li><a href="#the-button"><span class=secno>3.16.4. </span>The
+ <code>button</code> element</a>
+
+ <li><a href="#the-label"><span class=secno>3.16.5. </span>The
+ <code>label</code> element</a>
+
+ <li><a href="#the-select"><span class=secno>3.16.6. </span>The
+ <code>select</code> element</a>
+
+ <li><a href="#the-datalist"><span class=secno>3.16.7. </span>The
+ <code>datalist</code> element</a>
+
+ <li><a href="#the-optgroup"><span class=secno>3.16.8. </span>The
+ <code>optgroup</code> element</a>
+
+ <li><a href="#the-option"><span class=secno>3.16.9. </span>The
+ <code>option</code> element</a>
+
+ <li><a href="#the-textarea"><span class=secno>3.16.10. </span>The
+ <code>textarea</code> element</a>
+
+ <li><a href="#the-output"><span class=secno>3.16.11. </span>The
+ <code>output</code> element</a>
+
+ <li><a href="#processing0"><span class=secno>3.16.12.
+ </span>Processing model</a>
+ <ul class=toc>
+ <li><a href="#form-submission"><span class=secno>3.16.12.1.
+ </span>Form submission</a>
+ </ul>
+ </ul>
+
+ <li><a href="#scripting0"><span class=secno>3.17. </span>Scripting</a>
+ <ul class=toc>
+ <li><a href="#script"><span class=secno>3.17.1. </span>The
+ <code>script</code> element</a>
+ <ul class=toc>
+ <li><a href="#scriptingLanguages"><span class=secno>3.17.1.1.
+ </span>Scripting languages</a>
+ </ul>
+
+ <li><a href="#the-noscript"><span class=secno>3.17.2. </span>The
+ <code>noscript</code> element</a>
+
+ <li><a href="#the-event-source"><span class=secno>3.17.3. </span>The
+ <code>event-source</code> element</a>
+ </ul>
+
+ <li><a href="#interactive"><span class=secno>3.18. </span>Interactive
+ elements</a>
+ <ul class=toc>
+ <li><a href="#the-details"><span class=secno>3.18.1. </span>The
+ <code>details</code> element</a>
+
+ <li><a href="#datagrid"><span class=secno>3.18.2. </span>The
+ <code>datagrid</code> element</a>
+ <ul class=toc>
+ <li><a href="#the-datagrid"><span class=secno>3.18.2.1. </span>The
+ <code>datagrid</code> data model</a>
+
+ <li><a href="#how-rows"><span class=secno>3.18.2.2. </span>How rows
+ are identified</a>
+
+ <li><a href="#the-data"><span class=secno>3.18.2.3. </span>The data
+ provider interface</a>
+
+ <li><a href="#the-default"><span class=secno>3.18.2.4. </span>The
+ default data provider</a>
+ <ul class=toc>
+ <li><a href="#commonDefaultDataGridMethodDefinitions"><span
+ class=secno>3.18.2.4.1. </span>Common default data provider
+ method definitions for cells</a>
+ </ul>
+
+ <li><a href="#populating"><span class=secno>3.18.2.5.
+ </span>Populating the <code>datagrid</code> element</a>
+
+ <li><a href="#updating"><span class=secno>3.18.2.6. </span>Updating
+ the <code>datagrid</code></a>
+
+ <li><a href="#requirements"><span class=secno>3.18.2.7.
+ </span>Requirements for interactive user agents</a>
+
+ <li><a href="#the-selection"><span class=secno>3.18.2.8. </span>The
+ selection</a>
+
+ <li><a href="#columns"><span class=secno>3.18.2.9. </span>Columns
+ and captions</a>
+ </ul>
+
+ <li><a href="#the-command"><span class=secno>3.18.3. </span>The
+ <code>command</code> element</a>
+
+ <li><a href="#menus"><span class=secno>3.18.4. </span>The
+ <code>menu</code> element</a>
+ <ul class=toc>
+ <li><a href="#menus-intro"><span class=secno>3.18.4.1.
+ </span>Introduction</a>
+
+ <li><a href="#building"><span class=secno>3.18.4.2. </span>Building
+ menus</a>
+
+ <li><a href="#context"><span class=secno>3.18.4.3. </span>Context
+ menus</a>
+
+ <li><a href="#toolbars"><span class=secno>3.18.4.4.
+ </span>Toolbars</a>
+ </ul>
+
+ <li><a href="#commands"><span class=secno>3.18.5. </span>Commands</a>
+ <ul class=toc>
+ <li><a href="#using"><span class=secno>3.18.5.1. </span>Using the
+ <code>a</code> element to define a command</a>
+
+ <li><a href="#using0"><span class=secno>3.18.5.2. </span>Using the
+ <code>button</code> element to define a command</a>
+
+ <li><a href="#using1"><span class=secno>3.18.5.3. </span>Using the
+ <code>input</code> element to define a command</a>
+
+ <li><a href="#using2"><span class=secno>3.18.5.4. </span>Using the
+ <code>option</code> element to define a command</a>
+
+ <li><a href="#using3"><span class=secno>3.18.5.5. </span>Using the
+ <code>command</code> element to define a command</a>
+ </ul>
+ </ul>
+
+ <li><a href="#miscellaneous"><span class=secno>3.19.
+ </span>Miscellaneous elements</a>
+ <ul class=toc>
+ <li><a href="#the-legend"><span class=secno>3.19.1. </span>The
+ <code>legend</code> element</a>
+
+ <li><a href="#the-div"><span class=secno>3.19.2. </span>The
+ <code>div</code> element</a>
+ </ul>
+ </ul>
+
+ <li><a href="#web-browsers"><span class=secno>4. </span>Web browsers</a>
+ <ul class=toc>
+ <li><a href="#windows"><span class=secno>4.1. </span>Browsing
+ contexts</a>
+ <ul class=toc>
+ <li><a href="#nested"><span class=secno>4.1.1. </span>Nested browsing
+ contexts</a>
+
+ <li><a href="#auxiliary"><span class=secno>4.1.2. </span>Auxiliary
+ browsing contexts</a>
+
+ <li><a href="#secondary"><span class=secno>4.1.3. </span>Secondary
+ browsing contexts</a>
+
+ <li><a href="#threads"><span class=secno>4.1.4. </span>Threads</a>
+
+ <li><a href="#browsing"><span class=secno>4.1.5. </span>Browsing
+ context names</a>
+ </ul>
+
+ <li><a href="#the-default0"><span class=secno>4.2. </span>The default
+ view</a>
+ <ul class=toc>
+ <li><a href="#security1"><span class=secno>4.2.1. </span>Security</a>
+
+ <li><a href="#constructors"><span class=secno>4.2.2.
+ </span>Constructors</a>
+
+ <li><a href="#apis-for"><span class=secno>4.2.3. </span>APIs for
+ creating and navigating browsing contexts by name</a>
+
+ <li><a href="#accessing"><span class=secno>4.2.4. </span>Accessing
+ other browsing contexts</a>
+ </ul>
+
+ <li><a href="#history"><span class=secno>4.3. </span>Session history and
+ navigation</a>
+ <ul class=toc>
+ <li><a href="#the-session"><span class=secno>4.3.1. </span>The session
+ history of browsing contexts</a>
+
+ <li><a href="#the-history"><span class=secno>4.3.2. </span>The
+ <code>History</code> interface</a>
+
+ <li><a href="#activating"><span class=secno>4.3.3. </span>Activating
+ state objects</a>
+
+ <li><a href="#the-location"><span class=secno>4.3.4. </span>The
+ <code>Location</code> interface</a>
+ <ul class=toc>
+ <li><a href="#security2"><span class=secno>4.3.4.1.
+ </span>Security</a>
+ </ul>
+
+ <li><a href="#history-notes"><span class=secno>4.3.5.
+ </span>Implementation notes for session history</a>
+ </ul>
+
+ <li><a href="#links"><span class=secno>4.4. </span>Links</a>
+ <ul class=toc>
+ <li><a href="#hyperlink"><span class=secno>4.4.1. </span>Hyperlink
+ elements</a>
+
+ <li><a href="#following"><span class=secno>4.4.2. </span>Following
+ hyperlinks</a>
+ <ul class=toc>
+ <li><a href="#hyperlink0"><span class=secno>4.4.2.1.
+ </span>Hyperlink auditing</a>
+ </ul>
+
+ <li><a href="#linkTypes"><span class=secno>4.4.3. </span>Link
+ types</a>
+ <ul class=toc>
+ <li><a href="#link-type"><span class=secno>4.4.3.1. </span>Link type
+ "<code>alternate</code>"</a>
+
+ <li><a href="#link-type0"><span class=secno>4.4.3.2. </span>Link
+ type "<code>archives</code>"</a>
+
+ <li><a href="#link-type1"><span class=secno>4.4.3.3. </span>Link
+ type "<code>author</code>"</a>
+
+ <li><a href="#link-type2"><span class=secno>4.4.3.4. </span>Link
+ type "<code>bookmark</code>"</a>
+
+ <li><a href="#link-type3"><span class=secno>4.4.3.5. </span>Link
+ type "<code>contact</code>"</a>
+
+ <li><a href="#link-type4"><span class=secno>4.4.3.6. </span>Link
+ type "<code>external</code>"</a>
+
+ <li><a href="#link-type5"><span class=secno>4.4.3.7. </span>Link
+ type "<code>feed</code>"</a>
+
+ <li><a href="#link-type6"><span class=secno>4.4.3.8. </span>Link
+ type "<code>help</code>"</a>
+
+ <li><a href="#link-type7"><span class=secno>4.4.3.9. </span>Link
+ type "<code>icon</code>"</a>
+
+ <li><a href="#link-type8"><span class=secno>4.4.3.10. </span>Link
+ type "<code>license</code>"</a>
+
+ <li><a href="#link-type9"><span class=secno>4.4.3.11. </span>Link
+ type "<code>nofollow</code>"</a>
+
+ <li><a href="#link-type10"><span class=secno>4.4.3.12. </span>Link
+ type "<code>pingback</code>"</a>
+
+ <li><a href="#link-type11"><span class=secno>4.4.3.13. </span>Link
+ type "<code>prefetch</code>"</a>
+
+ <li><a href="#link-type12"><span class=secno>4.4.3.14. </span>Link
+ type "<code>search</code>"</a>
+
+ <li><a href="#link-type13"><span class=secno>4.4.3.15. </span>Link
+ type "<code>stylesheet</code>"</a>
+
+ <li><a href="#link-type14"><span class=secno>4.4.3.16. </span>Link
+ type "<code>sidebar</code>"</a>
+
+ <li><a href="#link-type15"><span class=secno>4.4.3.17. </span>Link
+ type "<code>tag</code>"</a>
+
+ <li><a href="#hierarchical"><span class=secno>4.4.3.18.
+ </span>Hierarchical link types</a>
+ <ul class=toc>
+ <li><a href="#link-type16"><span class=secno>4.4.3.18.1.
+ </span>Link type "<code>first</code>"</a>
+
+ <li><a href="#link-type17"><span class=secno>4.4.3.18.2.
+ </span>Link type "<code>index</code>"</a>
+
+ <li><a href="#link-type18"><span class=secno>4.4.3.18.3.
+ </span>Link type "<code>last</code>"</a>
+
+ <li><a href="#link-type19"><span class=secno>4.4.3.18.4.
+ </span>Link type "<code>next</code>"</a>
+
+ <li><a href="#link-type20"><span class=secno>4.4.3.18.5.
+ </span>Link type "<code>prev</code>"</a>
+
+ <li><a href="#link-type21"><span class=secno>4.4.3.18.6.
+ </span>Link type "<code>up</code>"</a>
+ </ul>
+
+ <li><a href="#other0"><span class=secno>4.4.3.19. </span>Other link
+ types</a>
+ </ul>
+ </ul>
+
+ <li><a href="#interfaces"><span class=secno>4.5. </span>Interfaces for
+ URI manipulation</a>
+
+ <li><a href="#navigating"><span class=secno>4.6. </span>Navigating
+ across documents</a>
+ <ul class=toc>
+ <li><a href="#read-html"><span class=secno>4.6.1. </span>Page load
+ processing model for HTML files</a>
+
+ <li><a href="#read-xml"><span class=secno>4.6.2. </span>Page load
+ processing model for XML files</a>
+
+ <li><a href="#read-text"><span class=secno>4.6.3. </span>Page load
+ processing model for text files</a>
+
+ <li><a href="#read-image"><span class=secno>4.6.4. </span>Page load
+ processing model for images</a>
+
+ <li><a href="#read-plugin"><span class=secno>4.6.5. </span>Page load
+ processing model for content that uses plugins</a>
+
+ <li><a href="#non-DOM-inline-content"><span class=secno>4.6.6.
+ </span>Page load processing model for inline content that doesn't
+ have a DOM</a>
+
+ <li><a href="#scroll-to-fragid"><span class=secno>4.6.7.
+ </span>Scrolling to a fragment identifier</a>
+ </ul>
+
+ <li><a href="#content-type-sniffing"><span class=secno>4.7.
+ </span>Determining the type of a new resource in a browsing context</a>
+
+ <ul class=toc>
+ <li><a href="#content-type0"><span class=secno>4.7.1.
+ </span>Content-Type sniffing: text or binary</a>
+
+ <li><a href="#content-type1"><span class=secno>4.7.2.
+ </span>Content-Type sniffing: unknown type</a>
+
+ <li><a href="#content-type2"><span class=secno>4.7.3.
+ </span>Content-Type sniffing: image</a>
+
+ <li><a href="#content-type3"><span class=secno>4.7.4.
+ </span>Content-Type sniffing: feed or HTML</a>
+
+ <li><a href="#content-type"><span class=secno>4.7.5.
+ </span>Content-Type metadata</a>
+ </ul>
+
+ <li><a href="#user-prompts"><span class=secno>4.8. </span>User
+ prompts</a>
+
+ <li><a href="#scripting"><span class=secno>4.9. </span>Scripting</a>
+ <ul class=toc>
+ <li><a href="#running"><span class=secno>4.9.1. </span>Running
+ executable code</a>
+
+ <li><a href="#origin"><span class=secno>4.9.2. </span>Origin</a>
+
+ <li><a href="#security3"><span class=secno>4.9.3. </span>Security
+ exceptions</a>
+
+ <li><a href="#javascript-protocol"><span class=secno>4.9.4. </span>The
+ <code title="">javascript:</code> protocol</a>
+
+ <li><a href="#events"><span class=secno>4.9.5. </span>Events</a>
+ <ul class=toc>
+ <li><a href="#event-handler-attributes"><span class=secno>4.9.5.1.
+ </span>Event handler attributes</a>
+
+ <li><a href="#event"><span class=secno>4.9.5.2. </span>Event
+ firing</a>
+
+ <li><a href="#events0"><span class=secno>4.9.5.3. </span>Events and
+ the <code>Window</code> object</a>
+
+ <li><a href="#runtime-script-errors"><span class=secno>4.9.5.4.
+ </span>Runtime script errors</a>
+ </ul>
+ </ul>
+
+ <li><a href="#browser"><span class=secno>4.10. </span>Browser state</a>
+ <ul class=toc>
+ <li><a href="#offline"><span class=secno>4.10.1. </span>Offline Web
+ applications</a>
+
+ <li><a href="#custom-handlers"><span class=secno>4.10.2. </span>Custom
+ protocol and content handlers</a>
+ <ul class=toc>
+ <li><a href="#security4"><span class=secno>4.10.2.1. </span>Security
+ and privacy</a>
+
+ <li><a href="#sample-handler-impl"><span class=secno>4.10.2.2.
+ </span>Sample user interface</a>
+ </ul>
+ </ul>
+
+ <li><a href="#storage"><span class=secno>4.11. </span>Client-side
+ session and persistent storage of name/value pairs</a>
+ <ul class=toc>
+ <li><a href="#introduction0"><span class=secno>4.11.1.
+ </span>Introduction</a>
+
+ <li><a href="#the-storage"><span class=secno>4.11.2. </span>The
+ <code>Storage</code> interface</a>
+
+ <li><a href="#the-storageitem"><span class=secno>4.11.3. </span>The
+ <code>StorageItem</code> interface</a>
+
+ <li><a href="#the-sessionstorage"><span class=secno>4.11.4. </span>The
+ <code title=dom-sessionStorage>sessionStorage</code> attribute</a>
+
+ <li><a href="#the-globalstorage"><span class=secno>4.11.5. </span>The
+ <code title=dom-globalStorage>globalStorage</code> attribute</a>
+
+ <li><a href="#the-storage0"><span class=secno>4.11.6. </span>The <code
+ title=event-storage>storage</code> event</a>
+
+ <li><a href="#miscellaneous0"><span class=secno>4.11.7.
+ </span>Miscellaneous implementation requirements for storage
+ areas</a>
+ <ul class=toc>
+ <li><a href="#disk-space"><span class=secno>4.11.7.1. </span>Disk
+ space</a>
+
+ <li><a href="#threads0"><span class=secno>4.11.7.2.
+ </span>Threads</a>
+ </ul>
+
+ <li><a href="#security5"><span class=secno>4.11.8. </span>Security and
+ privacy</a>
+ <ul class=toc>
+ <li><a href="#user-tracking"><span class=secno>4.11.8.1. </span>User
+ tracking</a>
+
+ <li><a href="#cookie"><span class=secno>4.11.8.2. </span>Cookie
+ resurrection</a>
+
+ <li><a href="#integrity"><span class=secno>4.11.8.3.
+ </span>Integrity of "public" storage areas</a>
+
+ <li><a href="#cross-protocol"><span class=secno>4.11.8.4.
+ </span>Cross-protocol and cross-port attacks</a>
+
+ <li><a href="#dns-spoofing"><span class=secno>4.11.8.5. </span>DNS
+ spoofing attacks</a>
+
+ <li><a href="#cross-directory"><span class=secno>4.11.8.6.
+ </span>Cross-directory attacks</a>
+
+ <li><a href="#public"><span class=secno>4.11.8.7. </span>Public
+ storage areas corresponding to hosts</a>
+
+ <li><a href="#storage0"><span class=secno>4.11.8.8. </span>Storage
+ areas in the face of untrusted higher-level domains that do not
+ correspond to public storage areas</a>
+
+ <li><a href="#storage1"><span class=secno>4.11.8.9. </span>Storage
+ areas in the face of untrusted subdomains</a>
+
+ <li><a href="#implementation"><span class=secno>4.11.8.10.
+ </span>Implementation risks</a>
+ </ul>
+ </ul>
+
+ <li><a href="#sql"><span class=secno>4.12. </span>Client-side database
+ storage</a>
+ <ul class=toc>
+ <li><a href="#introduction1"><span class=secno>4.12.1.
+ </span>Introduction</a>
+
+ <li><a href="#executing"><span class=secno>4.12.2. </span>Executing
+ SQL statements</a>
+
+ <li><a href="#database"><span class=secno>4.12.3. </span>Database
+ query results</a>
+
+ <li><a href="#privacy"><span class=secno>4.12.4. </span>Privacy</a>
+
+ <li><a href="#security6"><span class=secno>4.12.5. </span>Security</a>
+
+ <ul class=toc>
+ <li><a href="#user-agents"><span class=secno>4.12.5.1. </span>User
+ agents</a>
+
+ <li><a href="#sql-injection"><span class=secno>4.12.5.2. </span>SQL
+ injection</a>
+ </ul>
+ </ul>
+ </ul>
+
+ <li><a href="#editing"><span class=secno>5. </span>Editing</a>
+ <ul class=toc>
+ <li><a href="#editing-intro"><span class=secno>5.1.
+ </span>Introduction</a>
+
+ <li><a href="#contenteditable"><span class=secno>5.2. </span>The <code
+ title=attr-contenteditable>contenteditable</code> attribute</a>
+ <ul class=toc>
+ <li><a href="#user-editing"><span class=secno>5.2.1. </span>User
+ editing actions</a>
+
+ <li><a href="#making"><span class=secno>5.2.2. </span>Making entire
+ documents editable</a>
+ </ul>
+
+ <li><a href="#dnd"><span class=secno>5.3. </span>Drag and drop</a>
+ <ul class=toc>
+ <li><a href="#the-dragevent"><span class=secno>5.3.1. </span>The
+ <code>DragEvent</code> and <code>DataTransfer</code> interfaces</a>
+
+ <li><a href="#events1"><span class=secno>5.3.2. </span>Events fired
+ during a drag-and-drop action</a>
+
+ <li><a href="#drag-and-drop"><span class=secno>5.3.3.
+ </span>Drag-and-drop processing model</a>
+ <ul class=toc>
+ <li><a href="#when-the"><span class=secno>5.3.3.1. </span>When the
+ drag-and-drop operation starts or ends in another document</a>
+
+ <li><a href="#when-the0"><span class=secno>5.3.3.2. </span>When the
+ drag-and-drop operation starts or ends in another application</a>
+ </ul>
+
+ <li><a href="#the-draggable"><span class=secno>5.3.4. </span>The
+ <code>draggable</code> attribute</a>
+
+ <li><a href="#copy-and"><span class=secno>5.3.5. </span>Copy and
+ paste</a>
+ <ul class=toc>
+ <li><a href="#copy-to"><span class=secno>5.3.5.1. </span>Copy to
+ clipboard</a>
+
+ <li><a href="#cut-to"><span class=secno>5.3.5.2. </span>Cut to
+ clipboard</a>
+
+ <li><a href="#paste"><span class=secno>5.3.5.3. </span>Paste from
+ clipboard</a>
+
+ <li><a href="#paste0"><span class=secno>5.3.5.4. </span>Paste from
+ selection</a>
+ </ul>
+
+ <li><a href="#security7"><span class=secno>5.3.6. </span>Security
+ risks in the drag-and-drop model</a>
+ </ul>
+
+ <li><a href="#undo"><span class=secno>5.4. </span>Undo history</a>
+ <ul class=toc>
+ <li><a href="#the-undomanager"><span class=secno>5.4.1. </span>The
+ <code>UndoManager</code> interface</a>
+
+ <li><a href="#undo-moving"><span class=secno>5.4.2. </span>Undo:
+ moving back in the undo transaction history</a>
+
+ <li><a href="#redo-moving"><span class=secno>5.4.3. </span>Redo:
+ moving forward in the undo transaction history</a>
+
+ <li><a href="#the-undomanagerevent"><span class=secno>5.4.4.
+ </span>The <code>UndoManagerEvent</code> interface and the <code
+ title=event-undo>undo</code> and <code title=event-redo>redo</code>
+ events</a>
+
+ <li><a href="#implementation0"><span class=secno>5.4.5.
+ </span>Implementation notes</a>
+ </ul>
+
+ <li><a href="#command"><span class=secno>5.5. </span>Command APIs</a>
+
+ <li><a href="#selection"><span class=secno>5.6. </span>The text
+ selection APIs</a>
+ <ul class=toc>
+ <li><a href="#documentSelection"><span class=secno>5.6.1. </span>APIs
+ for the browsing context selection</a>
+
+ <li><a href="#textFieldSelection"><span class=secno>5.6.2. </span>APIs
+ for the text field selections</a>
+ </ul>
+ </ul>
+
+ <li><a href="#comms"><span class=secno>6. </span>Communication</a>
+ <ul class=toc>
+ <li><a href="#event0"><span class=secno>6.1. </span>Event
+ definitions</a>
+
+ <li><a href="#server-sent-events"><span class=secno>6.2.
+ </span>Server-sent DOM events</a>
+ <ul class=toc>
+ <li><a href="#the-remoteeventtarget"><span class=secno>6.2.1.
+ </span>The <code>RemoteEventTarget</code> interface</a>
+
+ <li><a href="#connecting"><span class=secno>6.2.2. </span>Connecting
+ to an event stream</a>
+
+ <li><a href="#parsing0"><span class=secno>6.2.3. </span>Parsing an
+ event stream</a>
+
+ <li><a href="#event-stream-interpretation"><span class=secno>6.2.4.
+ </span>Interpreting an event stream</a>
+
+ <li><a href="#notes"><span class=secno>6.2.5. </span>Notes</a>
+ </ul>
+
+ <li><a href="#network"><span class=secno>6.3. </span>Network
+ connections</a>
+ <ul class=toc>
+ <li><a href="#network-intro"><span class=secno>6.3.1.
+ </span>Introduction</a>
+
+ <li><a href="#the-connection"><span class=secno>6.3.2. </span>The
+ <code>Connection</code> interface</a>
+
+ <li><a href="#connection"><span class=secno>6.3.3. </span>Connection
+ Events</a>
+
+ <li><a href="#tcp-connections"><span class=secno>6.3.4. </span>TCP
+ connections</a>
+
+ <li><a href="#broadcast"><span class=secno>6.3.5. </span>Broadcast
+ connections</a>
+ <ul class=toc>
+ <li><a href="#broadcasting"><span class=secno>6.3.5.1.
+ </span>Broadcasting over TCP/IP</a>
+
+ <li><a href="#bluetooth-broadcast"><span class=secno>6.3.5.2.
+ </span>Broadcasting over Bluetooth</a>
+
+ <li><a href="#irda-broadcast"><span class=secno>6.3.5.3.
+ </span>Broadcasting over IrDA</a>
+ </ul>
+
+ <li><a href="#peer-to-peer"><span class=secno>6.3.6.
+ </span>Peer-to-peer connections</a>
+ <ul class=toc>
+ <li><a href="#peer-to-peer0"><span class=secno>6.3.6.1.
+ </span>Peer-to-peer connections over TCP/IP</a>
+
+ <li><a href="#bluetooth-peer"><span class=secno>6.3.6.2.
+ </span>Peer-to-peer connections over Bluetooth</a>
+
+ <li><a href="#irda-peer"><span class=secno>6.3.6.3.
+ </span>Peer-to-peer connections over IrDA</a>
+ </ul>
+
+ <li><a href="#the-common"><span class=secno>6.3.7. </span>The common
+ protocol for TCP-based connections</a>
+ <ul class=toc>
+ <li><a href="#clients"><span class=secno>6.3.7.1. </span>Clients
+ connecting over TCP</a>
+
+ <li><a href="#servers"><span class=secno>6.3.7.2. </span>Servers
+ accepting connections over TCP</a>
+
+ <li><a href="#sending"><span class=secno>6.3.7.3. </span>Sending and
+ receiving data over TCP</a>
+ </ul>
+
+ <li><a href="#network-security"><span class=secno>6.3.8.
+ </span>Security</a>
+
+ <li><a href="#network-other-specs"><span class=secno>6.3.9.
+ </span>Relationship to other standards</a>
+ </ul>
+
+ <li><a href="#crossDocumentMessages"><span class=secno>6.4.
+ </span>Cross-document messaging</a>
+ <ul class=toc>
+ <li><a href="#processing1"><span class=secno>6.4.1. </span>Processing
+ model</a>
+ </ul>
+ </ul>
+
+ <li><a href="#repetition"><span class=secno>7. </span>Repetition
+ templates</a>
+
+ <li><a href="#syntax"><span class=secno>8. </span>The HTML syntax</a>
+ <ul class=toc>
+ <li><a href="#writing"><span class=secno>8.1. </span>Writing HTML
+ documents</a>
+ <ul class=toc>
+ <li><a href="#the-doctype"><span class=secno>8.1.1. </span>The
+ DOCTYPE</a>
+
+ <li><a href="#elements0"><span class=secno>8.1.2. </span>Elements</a>
+ <ul class=toc>
+ <li><a href="#start"><span class=secno>8.1.2.1. </span>Start
+ tags</a>
+
+ <li><a href="#end-tags"><span class=secno>8.1.2.2. </span>End
+ tags</a>
+
+ <li><a href="#attributes0"><span class=secno>8.1.2.3.
+ </span>Attributes</a>
+
+ <li><a href="#optional"><span class=secno>8.1.2.4. </span>Optional
+ tags</a>
+
+ <li><a href="#restrictions"><span class=secno>8.1.2.5.
+ </span>Restrictions on content models</a>
+ </ul>
+
+ <li><a href="#text"><span class=secno>8.1.3. </span>Text</a>
+ <ul class=toc>
+ <li><a href="#newlines"><span class=secno>8.1.3.1.
+ </span>Newlines</a>
+ </ul>
+
+ <li><a href="#character"><span class=secno>8.1.4. </span>Character
+ entity references</a>
+
+ <li><a href="#comments"><span class=secno>8.1.5. </span>Comments</a>
+ </ul>
+
+ <li><a href="#parsing"><span class=secno>8.2. </span>Parsing HTML
+ documents</a>
+ <ul class=toc>
+ <li><a href="#overview"><span class=secno>8.2.1. </span>Overview of
+ the parsing model</a>
+
+ <li><a href="#the-input0"><span class=secno>8.2.2. </span>The input
+ stream</a>
+
+ <li><a href="#tokenisation"><span class=secno>8.2.3.
+ </span>Tokenisation</a>
+ <ul class=toc>
+ <li><a href="#tokenising"><span class=secno>8.2.3.1.
+ </span>Tokenising entities</a>
+ </ul>
+
+ <li><a href="#tree-construction"><span class=secno>8.2.4. </span>Tree
+ construction</a>
+ <ul class=toc>
+ <li><a href="#the-initial"><span class=secno>8.2.4.1. </span>The
+ initial phase</a>
+
+ <li><a href="#the-root0"><span class=secno>8.2.4.2. </span>The root
+ element phase</a>
+
+ <li><a href="#the-main"><span class=secno>8.2.4.3. </span>The main
+ phase</a>
+ <ul class=toc>
+ <li><a href="#the-stack"><span class=secno>8.2.4.3.1. </span>The
+ stack of open elements</a>
+
+ <li><a href="#the-list"><span class=secno>8.2.4.3.2. </span>The
+ list of active formatting elements</a>
+
+ <li><a href="#creating"><span class=secno>8.2.4.3.3.
+ </span>Creating and inserting HTML elements</a>
+
+ <li><a href="#closing"><span class=secno>8.2.4.3.4. </span>Closing
+ elements that have implied end tags</a>
+
+ <li><a href="#the-element"><span class=secno>8.2.4.3.5. </span>The
+ element pointers</a>
+
+ <li><a href="#the-insertion"><span class=secno>8.2.4.3.6.
+ </span>The insertion mode</a>
+
+ <li><a href="#how-to0"><span class=secno>8.2.4.3.7. </span>How to
+ handle tokens in the main phase</a>
+ </ul>
+
+ <li><a href="#the-trailing"><span class=secno>8.2.4.4. </span>The
+ trailing end phase</a>
+ </ul>
+
+ <li><a href="#the-end"><span class=secno>8.2.5. </span>The End</a>
+ </ul>
+
+ <li><a href="#namespaces"><span class=secno>8.3. </span>Namespaces</a>
+
+ <li><a href="#entities"><span class=secno>8.4. </span>Entities</a>
+ </ul>
+
+ <li><a href="#wysiwyg"><span class=secno>9. </span>WYSIWYG editors</a>
+ <ul class=toc>
+ <li><a href="#presentational"><span class=secno>9.1.
+ </span>Presentational markup</a>
+ <ul class=toc>
+ <li><a href="#wysiwyg0"><span class=secno>9.1.1. </span>WYSIWYG
+ signature</a>
+
+ <li><a href="#the-font"><span class=secno>9.1.2. </span>The
+ <code>font</code> element</a>
+ </ul>
+ </ul>
+
+ <li><a href="#rendering"><span class=secno>10. </span>Rendering</a>
+ <ul class=toc>
+ <li><a href="#rendering0"><span class=secno>10.1. </span>Rendering and
+ the DOM</a>
+ </ul>
+
+ <li><a href="#no"><span class=secno>11. </span>Things that you can't do
+ with this specification because they are better handled using other
+ technologies that are further described herein</a>
+ <ul class=toc>
+ <li><a href="#localisation"><span class=secno>11.1.
+ </span>Localisation</a>
+
+ <li><a href="#declarative"><span class=secno>11.2. </span>Declarative 2D
+ vector graphics and animation</a>
+
+ <li><a href="#declarative0"><span class=secno>11.3. </span>Declarative
+ 3D scenes</a>
+
+ <li><a href="#timers"><span class=secno>11.4. </span>Timers</a>
+
+ <li><a href="#events2"><span class=secno>11.5. </span>Events</a>
+ </ul>
+
+ <li class=no-num><a href="#references">References</a>
+
+ <li class=no-num><a href="#acknowledgements">Acknowledgements</a>
+ </ul>
+ <!--end-toc-->
+
+ <hr>
+
+ <h2 id=introduction><span class=secno>1. </span>Introduction</h2>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>The World Wide Web's markup language has always been HTML. HTML was
+ primarily designed as a language for semantically describing scientific
+ documents, although its general design and adaptations over the years has
+ enabled it to be used to describe a number of other types of documents.
+
+ <p>The main area that has not been adequately addressed by HTML is a vague
+ subject referred to as Web Applications. This specification attempts to
+ rectify this, while at the same time updating the HTML specifications to
+ address issues raised in the past few years.
+
+ <h3 id=scope><span class=secno>1.1. </span>Scope</h3>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>This specification is limited to providing a semantic-level markup
+ language and associated semantic-level scripting APIs for authoring
+ accessible pages on the Web ranging from static documents to dynamic
+ applications.
+
+ <p>The scope of this specification does not include addressing presentation
+ concerns (although default rendering rules for Web browsers are included
+ at the end of this specification).
+
+ <p>The scope of this specification does not include documenting every HTML
+ or DOM feature supported by Web browsers. Browsers support many features
+ that are considered to be very bad for accessibility or that are otherwise
+ inappropriate. For example, the <code>blink</code> element is clearly
+ presentational and authors wishing to cause text to blink should instead
+ use CSS.
+
+ <p>The scope of this specification is not to describe an entire operating
+ system. In particular, hardware configuration software, image manipulation
+ tools, and applications that users would be expected to use with high-end
+ workstations on a daily basis are out of scope. In terms of applications,
+ this specification is targeted specifically at applications that would be
+ expected to be used by users on an occasional basis, or regularly but from
+ disparate locations, with low CPU requirements. For instance online
+ purchasing systems, searching systems, games (especially multiplayer
+ online games), public telephone books or address books, communications
+ software (e-mail clients, instant messaging clients, discussion software),
+ document editing software, etc.
+
+ <p>For sophisticated cross-platform applications, there already exist
+ several proprietary solutions (such as Mozilla's XUL and Macromedia's
+ Flash). These solutions are evolving faster than any standards process
+ could follow, and the requirements are evolving even faster. These systems
+ are also significantly more complicated to specify, and are orders of
+ magnitude more difficult to achieve interoperability with, than the
+ solutions described in this document. Platform-specific solutions for such
+ sophisticated applications (for example the MacOS X Core APIs) are even
+ further ahead.
+
+ <h4 id=relationship><span class=secno>1.1.1. </span>Relationship to HTML
+ 4.01, XHTML 1.1, DOM2 HTML</h4>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>This specification represents a new version of HTML4 and XHTML1, along
+ with a new version of the associated DOM2 HTML API. Migration from HTML4
+ or XHTML1 to the format and APIs described in this specification should in
+ most cases be straightforward, as care has been taken to ensure that
+ backwards-compatibility is retained.</p>
+ <!-- XXX refs -->
+
+ <p>This specification will eventually supplant Web Forms 2.0 as well. <a
+ href="#refsWF2">[WF2]</a>
+
+ <h4 id=relationship0><span class=secno>1.1.2. </span>Relationship to XHTML2</h4>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>XHTML2 <a href="#refsXHTML2">[XHTML2]</a> defines a new HTML vocabulary
+ with better features for hyperlinks, multimedia content, annotating
+ document edits, rich metadata, declarative interactive forms, and
+ describing the semantics of human literary works such as poems and
+ scientific papers.
+
+ <p>However, it lacks elements to express the semantics of many of the
+ non-document types of content often seen on the Web. For instance, forum
+ sites, auction sites, search engines, online shops, and the like, do not
+ fit the document metaphor well, and are not covered by XHTML2.
+
+ <p><em>This</em> specification aims to extend HTML so that it is also
+ suitable in these contexts.
+
+ <p>XHTML2 and this specification use different namespaces and therefore can
+ both be implemented in the same XML processor.
+
+ <h4 id=relationship1><span class=secno>1.1.3. </span>Relationship to XUL,
+ Flash, Silverlight, and other proprietary UI languages</h4>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>This specification is independent of the various proprietary UI
+ languages that various vendors provide. As an open, vender-neutral
+ language, HTML provides for a solution to the same problems without the
+ risk of vendor lock-in.
+
+ <h3 id=structure><span class=secno>1.2. </span>Structure of this
+ specification</h3>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>This specification is divided into the following important sections:
+
+ <dl>
+ <dt><a href="#dom">The DOM</a>
+
+ <dd>The DOM, or Document Object Model, provides a base for the rest of the
+ specification.
+
+ <dt><a href="#semantics">The Semantics</a>
+
+ <dd>Documents are built from elements. These elements form a tree using
+ the DOM. Each element also has a predefined meaning, which is explained
+ in this section. User agent requirements for how to handle each element
+ are also given, along with rules for authors on how to use the element.
+
+ <dt><a href="#windows">Browsing Contexts</a>
+
+ <dd>HTML documents do not exist in a vacuum &mdash; this section defines
+ many of the features that affect environments that deal with multiple
+ pages, links between pages, and running scripts.
+
+ <dt>APIs
+
+ <dd><a href="#editing">The Editing APIs</a>: HTML documents can provide a
+ number of mechanisms for users to modify content, which are described in
+ this section.
+
+ <dd><a href="#comms">The Communication APIs</a>: Applications written in
+ HTML often require mechanisms to communicate with remote servers, as well
+ as communicating with other applications from different domains running
+ on the same client.
+
+ <dd><a href="#repetition">Repetition Templates</a>: A mechanism to support
+ repeating sections in forms.
+
+ <dt><a href="#syntax">The Language Syntax</a>
+
+ <dd>All of these features would be for naught if they couldn't be
+ represented in a serialised form and sent to other people, and so this
+ section defines the syntax of HTML, along with rules for how to parse
+ HTML.
+ </dl>
+
+ <p>There are also a couple of appendices, defining <a href="#wysiwyg">shims
+ for WYSIWYG editors</a>, <a href="#rendering">rendering rules</a> for Web
+ browsers, and listing <a href="#no">areas that are out of scope</a> for
+ this specification.
+
+ <h4 id=how-to><span class=secno>1.2.1. </span>How to read this
+ specification</h4>
+
+ <p>This specification should be read like all other specifications. First,
+ it should be read cover-to-cover, multiple times. Then, it should be read
+ backwards at least once. Then it should be read by picking random sections
+ from the contents list and following all the cross-references.
+
+ <h3 id=conformance><span class=secno>1.3. </span>Conformance requirements</h3>
+
+ <p>All diagrams, examples, and notes in this specification are
+ non-normative, as are all sections explicitly marked non-normative.
+ Everything else in this specification is normative.
+
+ <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in the
+ normative parts of this document are to be interpreted as described in
+ RFC2119. For readability, these words do not appear in all uppercase
+ letters in this specification. <a href="#refsRFC2119">[RFC2119]</a></p>
+ <!-- XXX but they should be marked up -->
+
+ <p>This specification describes the conformance criteria for user agents
+ (relevant to implementors) and documents (relevant to authors and
+ authoring tool implementors).
+
+ <p class=note>There is no implied relationship between document conformance
+ requirements and implementation conformance requirements. User agents are
+ not free to handle non-conformant documents as they please; the processing
+ model described in this specification applies to implementations
+ regardless of the conformity of the input documents.</p>
+ <!--XXX quite possible that
+ this is stated twice. check for whether this is a dupe. -->
+
+ <p>User agents fall into several (overlapping) categories with different
+ conformance requirements.
+
+ <dl>
+ <dt id=interactive>Web browsers and other interactive user agents
+
+ <dd>
+ <p>Web browsers that support <a href="#xhtml5">XHTML</a> must process
+ elements and attributes from the <a href="#html-namespace0">HTML
+ namespace</a> found in <a href="#xml-documents">XML documents</a> as
+ described in this specification, so that users can interact with them,
+ unless the semantics of those elements have been overridden by other
+ specifications.</p>
+
+ <p class=example>A conforming XHTML processor would, upon finding an
+ XHTML <code><a href="#script0">script</a></code> element in an XML
+ document, execute the script contained in that element. However, if the
+ element is found within an XSLT transformation sheet (assuming the UA
+ also supports XSLT), then the processor would instead treat the <code><a
+ href="#script0">script</a></code> element as an opaque element that
+ forms part of the transform.</p>
+
+ <p>Web browsers that support <a href="#html5" title=HTML5>HTML</a> must
+ process documents labelled as <code>text/html</code> as described in
+ this specification, so that users can interact with them.</p>
+
+ <dt id=non-interactive>Non-interactive presentation user agents
+
+ <dd>
+ <p>User agents that process HTML and XHTML documents purely to render
+ non-interactive versions of them must comply to the same conformance
+ criteria as Web browsers, except that they are exempt from requirements
+ regarding user interaction.</p>
+
+ <p class=note>Typical examples of non-interactive presentation user
+ agents are printers (static UAs) and overhead displays (dynamic UAs). It
+ is expected that most static non-interactive presentation user agents
+ will also opt to <a href="#non-scripted">lack scripting support</a>.</p>
+
+ <p class=example>A non-interactive but dynamic presentation UA would
+ still execute scripts, allowing forms to be dynamically submitted, and
+ so forth. However, since the concept of "focus" is irrelevant when the
+ user cannot interact with the document, the UA would not need to support
+ any of the focus-related DOM APIs.</p>
+
+ <dt><dfn id=non-scripted>User agents with no scripting support</dfn>
+
+ <dd>
+ <p>Implementations that do not support scripting (or which have their
+ scripting features <a href="#scripting1" title="scripting is
+ disabled">disabled</a>) are exempt from supporting the events and DOM
+ interfaces mentioned in this specification. For the parts of this
+ specification that are defined in terms of an events model or in terms
+ of the DOM, such user agents must still act as if events and the DOM
+ were supported.</p>
+
+ <p class=note>Scripting can form an integral part of an application. Web
+ browsers that do not support scripting, or that have scripting disabled,
+ might be unable to fully convey the author's intent.</p>
+
+ <dt>Conformance checkers
+
+ <dd id=conformance-checkers>
+ <p>Conformance checkers must verify that a document conforms to the
+ applicable conformance criteria described in this specification.
+ Conformance checkers are exempt from detecting errors that require
+ interpretation of the author's intent (for example, while a document is
+ non-conforming if the content of a <code><a
+ href="#blockquote">blockquote</a></code> element is not a quote,
+ conformance checkers do not have to check that <code><a
+ href="#blockquote">blockquote</a></code> elements only contain quoted
+ material).</p>
+
+ <p>Conformance checkers must check that the input document conforms when
+ <a href="#scripting1">scripting is disabled</a>, and should also check
+ that the input document conforms when <a href="#scripting2">scripting is
+ enabled</a>. (This is only a "SHOULD" and not a "MUST" requirement
+ because it has been proven to be impossible. <a
+ href="#refsHALTINGPROBLEM">[HALTINGPROBLEM]</a>)</p>
+ <!-- XXX
+ [Computable] On computable numbers, with an application to the
+ Entscheidungsproblem. Alan M. Turing. In Proceedings of the London
+ Mathematical Society, series 2, volume 42, pages 230-265. London
+ Mathematical Society,
+ 1937. http://www.turingarchive.org/browse.php/B/12 (referenced:
+ 2007-03-03)
+ -->
+
+ <p>The term "HTML5 validator" can be used to refer to a conformance
+ checker that itself conforms to the applicable requirements of this
+ specification.</p>
+
+ <div class=note>
+ <p>XML DTDs cannot express all the conformance requirements of this
+ specification. Therefore, a validating XML processor and a DTD cannot
+ constitute a conformance checker. Also, since neither of the two
+ authoring formats defined in this specification are applications of
+ SGML, a validating SGML system cannot constitute a conformance checker
+ either.</p>
+
+ <p>To put it another way, there are three types of conformance criteria:</p>
+
+ <ol>
+ <li>Criteria that can be expressed in a DTD.
+
+ <li>Criteria that cannot be expressed by a DTD, but can still be
+ checked by a machine.
+
+ <li>Criteria that can only be checked by a human.
+ </ol>
+
+ <p>A conformance checker must check for the first two. A simple
+ DTD-based validator only checks for the first class of errors and is
+ therefore not a conforming conformance checker according to this
+ specification.</p>
+ </div>
+
+ <dt>Data mining tools
+
+ <dd id=data-mining>
+ <p>Applications and tools that process HTML and XHTML documents for
+ reasons other than to either render the documents or check them for
+ conformance should act in accordance to the semantics of the documents
+ that they process.</p>
+
+ <p class=example>A tool that generates <span title="sections and
+ headings">document outlines</span> but increases the nesting level for
+ each paragraph and does not increase the nesting level for each section
+ would not be conforming.</p>
+
+ <dt id=editors>Authoring tools and markup generators
+
+ <dd>
+ <p>Authoring tools and markup generators must generate conforming
+ documents. Conformance criteria that apply to authors also apply to
+ authoring tools, where appropriate.</p>
+
+ <p>Authoring tools are exempt from the strict requirements of using
+ elements only for their specified purpose, but only to the extent that
+ authoring tools are not yet able to determine author intent.</p>
+
+ <p class=example>For example, it is not conforming to use an <code><a
+ href="#address">address</a></code> element for arbitrary contact
+ information; that element can only be used for marking up contact
+ information for the author of the document or section. However, since an
+ authoring tools is likely unable to determine the difference, an
+ authoring tool is exempt from that requirement.</p>
+
+ <p class=note>In terms of conformance checking, an editor is therefore
+ required to output documents that conform to the same extent that a
+ conformance checker will verify.</p>
+
+ <p>When an authoring tool is used to edit a non-conforming document, it
+ may preserve the conformance errors in sections of the document that
+ were not edited during the editing session (i.e. an editing tool is
+ allowed to round-trip errorneous content). However, an authoring tool
+ must not claim that the output is conformant if errors have been so
+ preserved.</p>
+
+ <p>Authoring tools are expected to come in two broad varieties: tools
+ that work from structure or semantic data, and tools that work on a
+ What-You-See-Is-What-You-Get media-specific editing basis (WYSIWYG).</p>
+
+ <p>The former is the preferred mechanism for tools that author HTML,
+ since the structure in the source information can be used to make
+ informed choices regarding which HTML elements and attributes are most
+ appropriate.</p>
+
+ <p>However, WYSIWYG tools are legitimate, and this specification <a
+ href="#wysiwyg1" title="WYSIWYG editors">makes certain concessions to
+ WYSIWYG editors</a>.</p>
+
+ <p>All authoring tools, whether WYSIWYG or not, should make a best effort
+ attempt at enabling users to create well-structured, semantically rich,
+ media-independent content.</p>
+ </dl>
+
+ <p>Some conformance requirements are phrased as requirements on elements,
+ attributes, methods or objects. Such requirements fall into two
+ categories; those describing content model restrictions, and those
+ describing implementation behaviour. The former category of requirements
+ are requirements on documents and authoring tools. The second category are
+ requirements on user agents.
+
+ <p>Conformance requirements phrased as algorithms or specific steps may be
+ implemented in any manner, so long as the end result is equivalent. (In
+ particular, the algorithms defined in this specification are intended to
+ be easy to follow, and not intended to be performant.)
+
+ <p id=hardwareLimitations>User agents may impose implementation-specific
+ limits on otherwise unconstrained inputs, e.g. to prevent denial of
+ service attacks, to guard against running out of memory, or to work around
+ platform-specific limitations.
+
+ <p>For compatibility with existing content and prior specifications, this
+ specification describes two authoring formats: one based on XML (referred
+ to as <dfn id=xhtml5 title=XHTML>XHTML5</dfn>), and one using a <a
+ href="#parsing">custom format</a> inspired by SGML (referred to as <dfn
+ id=html5>HTML5</dfn>). Implementations may support only one of these two
+ formats, although supporting both is encouraged.
+
+ <p id=authors-using-xhtml><a href="#xhtml5">XHTML</a> documents (<a
+ href="#xml-documents">XML documents</a> using elements from the <a
+ href="#html-namespace0">HTML namespace</a>) that use the new features
+ described in this specification and that are served over the wire (e.g. by
+ HTTP) must be sent using an XML MIME type such as
+ <code>application/xml</code> or <code>application/xhtml+xml</code> and
+ must not be served as <code>text/html</code>. <a
+ href="#refsRFC3023">[RFC3023]</a>
+
+ <p>Such XML documents may contain a <code>DOCTYPE</code> if desired, but
+ this is not required to conform to this specification.
+
+ <p class=note>According to the XML specification, XML processors are not
+ guaranteed to process the external DTD subset referenced in the DOCTYPE.
+ This means, for example, that using entities for characters in XHTML
+ documents is unsafe (except for &amp;lt;, &amp;gt;, &amp;amp;, &amp;quot;
+ and &amp;apos;). For interoperability, authors are advised to avoid
+ optional features of XML.
+
+ <p id=authors-using-html><a href="#html5" title=HTML5>HTML documents</a>,
+ if they are served over the wire (e.g. by HTTP) must be labelled with the
+ <code>text/html</code> MIME type.</p>
+ <!--
+ XXX update RFC 2854 -->
+
+ <p id=entity-references>The language in this specification assumes that the
+ user agent expands all entity references, and therefore does not include
+ entity reference nodes in the DOM. If user agents do include entity
+ reference nodes in the DOM, then user agents must handle them as if they
+ were fully expanded when implementing this specification. For example, if
+ a requirement talks about an element's child text nodes, then any text
+ nodes that are children of an entity reference that is a child of that
+ element would be used as well.</p>
+ <!-- XXX unexpandable entities? -->
+
+ <h4 id=common><span class=secno>1.3.1. </span>Common conformance
+ requirements for APIs exposed to JavaScript</h4>
+
+ <p class=big-issue>A lot of arrays/lists/<span>collection</span>s in this
+ spec assume zero-based indexes but use the term "<var
+ title="">index</var>th" liberally. We should define those to be zero-based
+ and be clearer about this.
+
+ <p>Unless other specified, if a DOM attribute that is a floating point
+ number type (<code title="">float</code>) is assigned an Infinity or
+ Not-a-Number value, a <code title=big-issue>NOT_SUPPORTED_ERR</code>
+ exception must be raised.
+
+ <p>Unless other specified, if a DOM attribute that is a signed numberic
+ type is assigned a negative value, a <code
+ title=big-issue>NOT_SUPPORTED_ERR</code> exception must be raised.
+
+ <p>Unless other specified, if a method with an argument that is a floating
+ point number type (<code title="">float</code>) is passed an Infinity or
+ Not-a-Number value, a <code title=big-issue>NOT_SUPPORTED_ERR</code>
+ exception must be raised.
+
+ <p>Unless other specified, if a method is passed fewer arguments than is
+ defined for that method in its IDL definition, a <code
+ title=big-issue>NOT_SUPPORTED_ERR</code> exception must be raised.
+
+ <p>Unless other specified, if a method is passed more arguments than is
+ defined for that method in its IDL definition, the excess arguments must
+ be ignored.
+
+ <p>Unless other specified, if a method is expecting, as one of its
+ arguments, as defined by its IDL definition, an object implementing a
+ particular interface <var title="">X</var>, and the argument passed is an
+ object whose [[Class]] property is neither that interface <var
+ title="">X</var>, nor the name of an interface <var title="">Y</var> where
+ this specification requires that all objects implementing interface <var
+ title="">Y</var> also implement interface <var title="">X</var>, nor the
+ name of an interface that inherits from the expected interface <var
+ title="">X</var>, then a <code title="">TYPE_MISMATCH_ERR</code> exception
+ must be raised.
+
+ <p class=big-issue>Anything else? Passing the wrong type of object, maybe?
+ Implied conversions to int/float?
+
+ <h4 id=dependencies><span class=secno>1.3.2. </span>Dependencies</h4>
+
+ <p>This specification relies on several other underlying specifications.
+
+ <dl>
+ <dt>XML
+
+ <dd>
+ <p>Implementations that support XHTML5 must support some version of XML,
+ as well as its corresponding namespaces specification, because XHTML5
+ uses an XML serialisation with namespaces. <a href="#refsXML">[XML]</a>
+ <a href="#refsXMLNAMES">[XMLNAMES]</a></p>
+
+ <dt>XML Base
+
+ <dd>
+ <p id=xmlBase>User agents must follow the rules given by XML Base to
+ resolve relative URIs in HTML and XHTML fragments. That is the mechanism
+ used in this specification for resolving relative URIs in DOM trees. <a
+ href="#refsXMLBASE">[XMLBASE]</a></p>
+
+ <p class=note>It is possible for <code
+ title=attr-xml-base>xml:base</code> attributes to be present even in
+ HTML fragments, as such attributes can be added dynamically using
+ script.</p>
+
+ <dt>DOM
+
+ <dd>
+ <p>Implementations must support some version of DOM Core and DOM Events,
+ because this specification is defined in terms of the DOM, and some of
+ the features are defined as extensions to the DOM Core interfaces. <a
+ href="#refsDOM3CORE">[DOM3CORE]</a> <a
+ href="#refsDOM3CORE">[DOM3EVENTS]</a></p>
+
+ <dt>ECMAScript
+
+ <dd>
+ <p>Implementations that use ECMAScript to implement the APIs defined in
+ this specification must implement them in a manner consistent with the
+ ECMAScript Bindings for DOM Specifications specification, as this
+ specification uses that specification's terminology. <a
+ href="#refsEBFD">[EBFD]</a></p>
+ </dl>
+
+ <p>This specification does not require support of any particular network
+ transport protocols, image formats, audio formats, video formats, style
+ sheet language, scripting language, or any of the DOM and WebAPI
+ specifications beyond those described above. However, the language
+ described by this specification is biased towards CSS as the styling
+ language, ECMAScript as the scripting language, and HTTP as the network
+ protocol, and several features assume that those languages and protocols
+ are in use.
+
+ <h4 id=features><span class=secno>1.3.3. </span>Features defined in other
+ specifications</h4>
+
+ <p>Some elements are defined in terms of their DOM <dfn
+ id=textcontent><code>textContent</code></dfn> attribute. This is an
+ attribute defined on the <code>Node</code> interface in DOM3 Core. <a
+ href="#refsDOM3CORE">[DOM3CORE]</a>
+
+ <p class=big-issue>Should textContent be defined differently for dir="" and
+ &lt;bdo>? Should we come up with an alternative to textContent that
+ handles those and other things, like alt=""?</p>
+ <!-- This section is currently here exclusively so that we crossref
+ to textContent. XXX also add event-click, event-change,
+ event-DOMActivate, etc, here, and just have the section be a general
+ "defined in other specifications" section -->
+
+ <p>The term <dfn id=activation0>activation behavior</dfn> is used as
+ defined in the DOM3 Events specification. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a> <span class=big-issue>At the time
+ of writing, DOM3 Events hadn't yet been updated to define that
+ phrase.</span>
+
+ <p id=alternate-style-sheets>The rules for handling alternative style
+ sheets are defined in the CSS object model specification. <a
+ href="#CSSOM">[CSSOM]</a>
+
+ <p class=big-issue>See <a
+ href="http://dev.w3.org/cvsweb/~checkout~/csswg/cssom/Overview.html?rev=1.35&amp;content-type=text/html;%20charset=utf-8">http://dev.w3.org/cvsweb/~checkout~/csswg/cssom/Overview.html?rev=1.35&amp;content-type=text/html;%20charset=utf-8</a>
+
+ <p>Certain features are defined in terms of CSS &lt;color&gt; values. When
+ the CSS value <code title="">currentColor</code> is specified in this
+ context, the "computed value of the 'color' property" for the purposes of
+ determining the computed value of the <code title="">currentColor</code>
+ keyword is the computed value of the 'color' property on the element in
+ question. <a href="#refsCSS3COLOR">[CSS3COLOR]</a>
+
+ <p class=example>If a canvas gradient's <code
+ title=dom-canvasgradient-addColorStop><a
+ href="#addcolorstop">addColorStop()</a></code> method is called with the
+ <code title="">currentColor</code> keyword as the color, then the computed
+ value of the 'color' property on the <code><a
+ href="#canvas">canvas</a></code> element is the one that is used.
+
+ <h3 id=terminology><span class=secno>1.4. </span>Terminology</h3>
+
+ <p>This specification refers to both HTML and XML attributes and DOM
+ attributes, often in the same context. When it is not clear which is being
+ referred to, they are referred to as <dfn id=content>content
+ attributes</dfn> for HTML and XML attributes, and <dfn
+ id=dom-attributes>DOM attributes</dfn> for those from the DOM. Similarly,
+ the term "properties" is used for both ECMAScript object properties and
+ CSS properties. When these are ambiguous they are qualified as object
+ properties and CSS properties respectively.
+
+ <p id=html-namespace>To ease migration from HTML to XHTML, UAs conforming
+ to this specification will place elements in HTML in the
+ <code>http://www.w3.org/1999/xhtml</code> namespace, at least for the
+ purposes of the DOM and CSS. The term "<dfn id=elements1>elements in the
+ HTML namespace</dfn>", or "<dfn id=html-elements>HTML elements</dfn>" for
+ short, when used in this specification, thus refers to both HTML and XHTML
+ elements.
+
+ <p>Unless otherwise stated, all elements defined or mentioned in this
+ specification are in the <code>http://www.w3.org/1999/xhtml</code>
+ namespace, and all attributes defined or mentioned in this specification
+ have no namespace (they are in the per-element partition).
+
+ <p>The term <a href="#html-">HTML documents</a> is sometimes used in
+ contrast with <a href="#xml-documents">XML documents</a> to mean
+ specifically documents that were parsed using an <a href="#html-0">HTML
+ parser</a> (as opposed to using an XML parser or created purely through
+ the DOM).
+
+ <p>Generally, when the specification states that a feature applies to HTML
+ or XHTML, it also includes the other. When a feature specifically only
+ applies to one of the two languages, it is called out by explicitly
+ stating that it does not apply to the other format, as in "for HTML, ...
+ (this does not apply to XHTML)".
+
+ <p>This specification uses the term <em>document</em> to refer to any use
+ of HTML, ranging from short static documents to long essays or reports
+ with rich multimedia, as well as to fully-fledged interactive
+ applications.
+
+ <p>For readability, the term URI is used to refer to both ASCII URIs and
+ Unicode IRIs, as those terms are defined by <a
+ href="#refsRFC3986">[RFC3986]</a> and <a href="#refsRFC3987">[RFC3987]</a>
+ respectively. On the rare occasions where IRIs are not allowed but ASCII
+ URIs are, this is called out explicitly.
+
+ <p>The term <dfn id=root-element>root element</dfn>, when not qualified to
+ explicitly refer to the document's root element, means the furthest
+ ancestor element node of whatever node is being discussed, or the node
+ itself is there is none. When the node is a part of the document, then
+ that is indeed the document's root element. However, if the node is not
+ currently part of the document tree, the root element will be an orphaned
+ node.
+
+ <p>An element is said to have been <dfn id=inserted title="insert an
+ element into a document">inserted into a document</dfn> when its <a
+ href="#root-element">root element</a> changes and is now the document's <a
+ href="#root-element">root element</a>.
+
+ <p>The term <dfn id=tree-order>tree order</dfn> means a pre-order,
+ depth-first traversal of DOM nodes involved (through the <code
+ title="">parentNode</code>/<code title="">childNodes</code> relationship).
+
+ <p>When it is stated that some element or attribute is <dfn id=ignored
+ title=ignore>ignored</dfn>, or treated as some other value, or handled as
+ if it was something else, this refers only to the processing of the node
+ after it is in the DOM. A user agent must not mutate the DOM in such
+ situations.
+
+ <p>When an XML name, such as an attribute or element name, is referred to
+ in the form <code><var title="">prefix</var>:<var
+ title="">localName</var></code>, as in <code>xml:id</code> or
+ <code>svg:rect</code>, it refers to a name with the local name <var
+ title="">localName</var> and the namespace given by the prefix, as defined
+ by the following table:
+
+ <dl>
+ <dt><code title="">xml</code>
+
+ <dd><code>http://www.w3.org/XML/1998/namespace</code>
+
+ <dt><code title="">html</code>
+
+ <dd><code>http://www.w3.org/1999/xhtml</code>
+
+ <dt><code title="">svg</code>
+
+ <dd><code>http://www.w3.org/2000/svg</code>
+ </dl>
+
+ <p>For simplicity, terms such as <em>shown</em>, <em>displayed</em>, and
+ <em>visible</em> might sometimes be used when referring to the way a
+ document is rendered to the user. These terms are not meant to imply a
+ visual medium; they must be considered to apply to other media in
+ equivalent ways.
+
+ <p>Various DOM interfaces are defined in this specification using
+ pseudo-IDL. This looks like OMG IDL but isn't. For instance, method
+ overloading is used, and types from the W3C DOM specifications are used
+ without qualification. Language-specific bindings for these abstract
+ interface definitions must be derived in the way consistent with W3C DOM
+ specifications. Some interface-specific binding information for ECMAScript
+ is included in this specification.
+
+ <p class=big-issue>The current situation with IDL blocks is pitiful. IDL is
+ totally inadequate to properly represent what objects have to look like in
+ JS; IDL can't say if a member is enumerable, what the indexing behaviour
+ is, what the stringification behaviour is, what behaviour setting a member
+ whose type is a particular interface should be (e.g. setting of
+ document.location or element.className), what constructor an object
+ implementing an interface should claim to have, how overloads work, etc. I
+ think we should make the IDL blocks non-normative, and/or replace them
+ with something else that is better for JS while still being clear on how
+ it applies to other languages. However, we do need to have something that
+ says what types the methods take as arguments, since we have to raise
+ exceptions if they are wrong.
+
+ <p>The construction "a <code>Foo</code> object", where <code>Foo</code> is
+ actually an interface, is sometimes used instead of the more accurate "an
+ object implementing the interface <code>Foo</code>".
+
+ <p>A DOM attribute is said to be <em>getting</em> when its value is being
+ retrieved (e.g. by author script), and is said to be <em>setting</em> when
+ a new value is assigned to it.
+
+ <p>If a DOM object is said to be <dfn id=live>live</dfn>, then that means
+ that any attributes returning that object must always return the same
+ object (not a new object each time), and the attributes and methods on
+ that object must operate on the actual underlying data, not a snapshot of
+ the data.</p>
+ <!-- XXX should define "same instance of" to mean JS===. -->
+
+ <p>The terms <em>fire</em> and <em>dispatch</em> are used interchangeably
+ in the context of events, as in the DOM Events specifications. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>The term <dfn id=text-node>text node</dfn> refers to any
+ <code>Text</code> node, including <code>CDATASection</code> nodes (any
+ <code>Node</code> with node type 3 or 4).
+
+ <p>Some of the algorithms in this specification, for historical reasons,
+ require the user agent to <dfn id=pause>pause</dfn> until some condition
+ has been met. While a user agent is paused, it must ensure that no scripts
+ execute (e.g. no event handlers, no timers, etc). User agents should
+ remain responsive to user input while paused, however.
+
+ <h4 id=html-vs><span class=secno>1.4.1. </span>HTML vs XHTML</h4>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>This specification defines an abstract language for describing documents
+ and applications, and some APIs for interacting with in-memory
+ representations of resources that use this language.
+
+ <p>The in-memory representation is known as "DOM5 HTML", or "the DOM" for
+ short.
+
+ <p>There are various concrete syntaxes that can be used to transmit
+ resources that use this abstract language, two of which are defined in
+ this specification.
+
+ <p>The first such concrete syntax is "HTML5". This is the format
+ recommended for most authors. It is compatible with all legacy Web
+ browsers. If a document is transmitted with the MIME type <code
+ title="">text/html</code>, then it will be processed as an "HTML5"
+ document by Web browsers.
+
+ <p>The second concrete syntax uses XML, and is known as "XHTML5". When a
+ document is transmitted with an XML MIME type, such as <code
+ title="">application/xhtml+xml</code>, then it is processed by an XML
+ processor by Web browsers, and treated as an "XHTML5" document. Generally
+ speaking, authors are discouraged from trying to use XML on the Web,
+ because XML has much stricter syntax rules than the "HTML5" variant
+ described above, and is relatively newer and therefore less mature.
+
+ <p>The "DOM5 HTML", "HTML5", and "XHTML5" representations cannot all
+ represent the same content. For example, namespaces cannot be represented
+ using "HTML5", but they are supported in "DOM5 HTML" and "XHTML5".
+ Similarly, documents that use the <code><a
+ href="#noscript">noscript</a></code> feature can be represented using
+ "HTML5", but cannot be represented with "XHTML5" and "DOM5 HTML". Comments
+ that contain the string "<code title="">--&gt;</code>" can be represented
+ in "DOM5 HTML" but not in "HTML5" and "XHTML5". And so forth.
+
+ <h2 id=dom><span class=secno>2. </span>The Document Object Model</h2>
+
+ <p>The Document Object Model (DOM) is a representation &mdash; a model
+ &mdash; of a document and its content. <a
+ href="#refsDOM3CORE">[DOM3CORE]</a> The DOM is not just an API; the
+ conformance criteria of HTML implementations are defined, in this
+ specification, in terms of operations on the DOM.
+
+ <p>This specification defines the language represented in the DOM by
+ features together called DOM5 HTML. DOM5 HTML consists of DOM Core
+ <code>Document</code> nodes and DOM Core <code>Element</code> nodes, along
+ with text nodes and other content.
+
+ <p>Elements in the DOM represent things; that is, they have intrinsic
+ <em>meaning</em>, also known as semantics.
+
+ <p class=example>For example, a <code><a href="#p">p</a></code> element
+ represents a paragraph.
+
+ <p>In addition, documents and elements in the DOM host APIs that extend the
+ DOM Core APIs, providing new features to application developers using DOM5
+ HTML.
+
+ <h3 id=documents><span class=secno>2.1. </span>Documents</h3>
+
+ <p>Every XML and HTML document in an HTML UA is represented by a
+ <code>Document</code> object. <a href="#refsDOM3CORE">[DOM3CORE]</a>
+
+ <p><code>Document</code> objects are assumed to be <dfn
+ id=xml-documents>XML documents</dfn> unless they are flagged as being <dfn
+ id=html->HTML documents</dfn> when they are created. Whether a document is
+ an <a href="#html-" title="HTML documents">HTML document</a> or an <a
+ href="#xml-documents" title="XML documents">XML document</a> affects the
+ behaviour of certain APIs, as well as a few CSS rendering rules. <a
+ href="#refsCSS21">[CSS21]</a>
+
+ <p class=note>A <code>Document</code> object created by the <code
+ title="">createDocument()</code> API on the <code>DOMImplementation</code>
+ object is initially an <a href="#xml-documents" title="XML documents">XML
+ document</a>, but can be made into an <a href="#html-" title="HTML
+ documents">HTML document</a> by calling <code title=dom-document-open><a
+ href="#open">document.open()</a></code> on it.
+
+ <p>All <code>Document</code> objects (in user agents implementing this
+ specification) must also implement the <code><a
+ href="#htmldocument">HTMLDocument</a></code> interface, available using
+ binding-specific methods. (This is the case whether or not the document in
+ question is an <a href="#html-" title="HTML documents">HTML document</a>
+ or indeed whether it contains any <a href="#html-elements">HTML
+ elements</a> at all.) <code>Document</code> objects must also implement
+ the document-level interface of any other namespaces found in the document
+ that the UA supports. For example, if an HTML implementation also supports
+ SVG, then the <code>Document</code> object must implement <code><a
+ href="#htmldocument">HTMLDocument</a></code> and <code>SVGDocument</code>.
+
+ <p class=note>Because the <code><a
+ href="#htmldocument">HTMLDocument</a></code> interface is now obtained
+ using binding-specific casting methods instead of simply being the primary
+ interface of the document object, it is no longer defined as inheriting
+ from <code>Document</code>.
+
+ <pre class=idl>interface <dfn id=htmldocument>HTMLDocument</dfn> {
+ // <a href="#resource0">Resource metadata management</a>
+ readonly attribute <a href="#location2">Location</a> <a href="#location0" title=dom-document-location>location</a>;
+ readonly attribute DOMString <a href="#url" title=dom-document-URL>URL</a>;
+ attribute DOMString <a href="#domain" title=dom-document-domain>domain</a>;
+ readonly attribute DOMString <a href="#referrer" title=dom-document-referrer>referrer</a>;
+ attribute DOMString <a href="#cookie0" title=dom-document-cookie>cookie</a>;
+ readonly attribute DOMString <a href="#lastmodified" title=dom-document-lastModified>lastModified</a>;
+
+ // <a href="#dom-tree0">DOM tree accessors</a>
+ attribute DOMString <a href="#document.title" title=dom-document-title>title</a>;
+ attribute DOMString <a href="#dir1" title=dom-document-dir>dir</a>;
+ attribute <a href="#htmlelement">HTMLElement</a> <a href="#body" title=dom-document-body>body</a>;
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#images0" title=dom-document-images>images</a>;
+<!-- readonly attribute <span>HTMLCollection</span> <span title="dom-document-applets">applets</span>;
+--> readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#links0" title=dom-document-links>links</a>;
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#forms0" title=dom-document-forms>forms</a>;
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#anchors" title=dom-document-anchors>anchors</a>;
+ NodeList <a href="#getelementsbyname" title=dom-document-getElementsByName>getElementsByName</a>(in DOMString elementName);
+ NodeList <a href="#getelementsbyclassname" title=dom-document-getElementsByClassName>getElementsByClassName</a>(in DOMString[] classNames);
+
+ // <a href="#dynamic2">Dynamic markup insertion</a>
+ attribute DOMString <a href="#innerhtml" title=dom-innerHTML>innerHTML</a>;
+ void <a href="#open" title=dom-document-open>open</a>();
+ void <a href="#open" title=dom-document-open>open</a>(in DOMString type);
+ void <a href="#open" title=dom-document-open>open</a>(in DOMString type, in DOMString replace);
+ void <a href="#open" title=dom-document-open>open</a>(in DOMString url, in DOMString name, in DOMString features);
+ void <a href="#open" title=dom-document-open>open</a>(in DOMString url, in DOMString name, in DOMString features, in bool replace);
+ void <a href="#close" title=dom-document-close>close</a>();
+ void <a href="#document.write" title=dom-document-write>write</a>(in DOMString text);
+ void <a href="#document.writeln" title=dom-document-writeln>writeln</a>(in DOMString text);
+
+ // <a href="#interaction0">Interaction</a>
+ readonly attribute <span>Element</span> <a href="#activeelement" title=dom-document-activeElement>activeElement</a>;
+ readonly attribute boolean <a href="#hasfocus" title=dom-document-hasFocus>hasFocus</a>;
+
+ // <a href="#command1" title=concept-command>Commands</a>
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#commands0" title=dom-document-commands>commands</a>;
+
+ // <a href="#editing0">Editing</a>
+ attribute boolean <a href="#designMode" title=dom-document-designMode>designMode</a>;
+ boolean <a href="#execCommand" title=dom-document-execCommand>execCommand</a>(in DOMString commandId);
+ boolean <a href="#execCommand" title=dom-document-execCommand>execCommand</a>(in DOMString commandId, in boolean doShowUI);
+ boolean <a href="#execCommand" title=dom-document-execCommand>execCommand</a>(in DOMString commandId, in boolean doShowUI, in DOMString value);
+ <a href="#selection1">Selection</a> <a href="#getselection0" title=dom-document-getSelection>getSelection</a>();
+
+ // <a href="#cross-document">Cross-document messaging</a>
+ void <a href="#postmessage" title=dom-document-postMessage>postMessage</a>(in DOMString message);
+<!-- XXX we're not done here.
+ XXX see e.g. http://lxr.mozilla.org/seamonkey/source/dom/public/idl/html/nsIDOMNSHTMLDocument.idl
+ XXX see e.g. http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/dom/Document.cpp
+ XXX see e.g. http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/html/HTMLDocument.cpp
+ -->
+};</pre>
+
+ <p>Since the <code><a href="#htmldocument">HTMLDocument</a></code>
+ interface holds methods and attributes related to a number of disparate
+ features, the members of this interface are described in various different
+ sections.
+
+ <h4 id=security><span class=secno>2.1.1. </span>Security</h4>
+
+ <p>User agents must raise a <a href="#security8">security exception</a>
+ whenever any of the members of an <code><a
+ href="#htmldocument">HTMLDocument</a></code> object are accessed by
+ scripts whose <a href="#origin0">origin</a> is not the same as the
+ <code>Document</code>'s origin, with the following exceptions:
+
+ <ul>
+ <li>The <code title=dom-document-postMessage><a
+ href="#postmessage">postMessage()</a></code> method must be allowed to be
+ called from any script.
+ </ul>
+
+ <p class=big-issue>We may want to just put postMessage on Window instead of
+ Document, as that reduces the XSS risk.
+
+ <h4 id=resource><span class=secno>2.1.2. </span><dfn id=resource0>Resource
+ metadata management</dfn></h4>
+
+ <p>The <dfn id=url title=dom-document-URL><code>URL</code></dfn> attribute
+ must return <span>the document's address</span><!-- XXX
+ xref -->.
+
+ <p>The <dfn id=domain title=dom-document-domain><code>domain</code></dfn>
+ attribute must be initialised to <span>the document's domain</span>, if it
+ has one, and null otherwise. On getting, the attribute must return its
+ current value. On setting, if the new value is an allowed value (as
+ defined below), the attribute's value must be changed to the new value. If
+ the new value is not an allowed value, then a <a
+ href="#security8">security exception</a> must be raised instead.
+
+ <p>A new value is an allowed value for the <code
+ title=dom-document-domain><a href="#domain">document.domain</a></code>
+ attribute if it is equal to the attribute's current value, or if the new
+ value, prefixed by a U+002E FULL STOP ("."), exactly matches the end of
+ the current value. If the current value is null, new values other than
+ null will never be allowed.
+
+ <p>If the <code>Document</code> object's <span title="the document's
+ address">address</span><!-- XXX xref --> is hierarchical and uses a
+ server-based naming authority, then its <dfn id=domain0 title="document's
+ domain">domain</dfn> is the &lt;hostname&gt; part of that address.
+ Otherwise, it has no domain.
+
+ <p class=note>The <code title=dom-document-domain><a
+ href="#domain">domain</a></code> attribute is used to enable pages on
+ different hosts of a domain to access each others' DOMs<span
+ class=big-issue>, though this is not yet defined by this
+ specification</span>.</p>
+ <!-- XXX xref -->
+ <!--XXX
+ http://lxr.mozilla.org/seamonkey/source/content/html/document/src/nsHTMLDocument.cpp
+ search for ::GetDomain ::SetDomain
+ http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/dom/Document.cpp
+ search for ::domain ::setDomain
+ -->
+
+ <p>The <dfn id=referrer
+ title=dom-document-referrer><code>referrer</code></dfn> attribute must
+ return either the URI of the page which <a href="#navigate"
+ title=navigate>navigated</a> the <a href="#browsing0">browsing context</a>
+ to the current document (if any), or the empty string (if there is no such
+ originating page, or if the UA has been configured not to report
+ referrers).
+
+ <p class=note>In the case of HTTP, the <code title=dom-document-referrer><a
+ href="#referrer">referrer</a></code> DOM attribute will match the
+ <code>Referer</code> (sic) header that was sent when fetching the current
+ page.
+
+ <p>The <dfn id=cookie0 title=dom-document-cookie><code>cookie</code></dfn>
+ attribute must, on getting, return the same string as the value of the
+ <code title="">Cookie</code> HTTP header it would include if fetching the
+ resource indicated by <span>the document's address</span> over HTTP, as
+ per RFC 2109 section 4.3.4. <a href="#refsRFC2109">[RFC2109]</a>
+
+ <p>On setting, the <code title=dom-document-cookie><a
+ href="#cookie0">cookie</a></code> attribute must cause the user agent to
+ act as it would when processing cookies if it had just attempted to fetch
+ <span>the document's address</span> over HTTP, and had received a response
+ with a <code>Set-Cookie</code> header whose value was the specified value,
+ as per RFC 2109 sections 4.3.1, 4.3.2, and 4.3.3. <a
+ href="#refsRFC2109">[RFC2109]</a>
+
+ <p class=note>Since the <code title=dom-document-cookie><a
+ href="#cookie0">cookie</a></code> attribute is accessible across frames,
+ the path restrictions on cookies are only a tool to help manage which
+ cookies are sent to which parts of the site, and are not in any way a
+ security feature.
+
+ <p>The <dfn id=lastmodified
+ title=dom-document-lastModified><code>lastModified</code></dfn> attribute,
+ on getting, must return the date and time of the <code>Document</code>'s
+ source file's last modification, in the user's local timezone, in the
+ following format:
+
+ <ol>
+ <li> The month component of the date.
+
+ <li> A U+002F SOLIDUS character ('/').
+
+ <li> The day component of the date.
+
+ <li> A U+002F SOLIDUS character ('/').
+
+ <li> The last two digits of the year component of the date.
+
+ <li> A U+0020 SPACE character.
+
+ <li> The hours component of the time.
+
+ <li> A U+003A COLON character (':').
+
+ <li> The minutes component of the time.
+
+ <li> A U+003A COLON character (':').
+
+ <li> The seconds component of the time.
+ </ol>
+
+ <p>All the numeric components above must be given as two digits in the
+ range U+0030 DIGIT ZERO to U+0039 DIGIT NINE representing the number in
+ base ten, zero-padded if necessary.
+
+ <p>The <code>Document</code>'s source file's last modification date and
+ time must be derived from relevant features of the networking protocols
+ used, e.g. from the value of the HTTP <code title="">Last-Modified</code>
+ header of the document, or from metadata in the filesystem for local
+ files. If the last modification date and time are not known, the attribute
+ must return the string <code title="">01/01/1970 00:00:00</code>.
+
+ <h3 id=elements><span class=secno>2.2. </span>Elements</h3>
+
+ <p>The nodes representing <a href="#html-elements">HTML elements</a> in the
+ DOM must implement, and expose to scripts, the interfaces listed for them
+ in the relevant sections of this specification. This includes <a
+ href="#xhtml5">XHTML</a> elements in <a href="#xml-documents">XML
+ documents</a>, even when those documents are in another context (e.g.
+ inside an XSLT transform).
+
+ <p>The basic interface, from which all the <a href="#html-elements">HTML
+ elements</a>' interfaces inherit, and which must be used by elements that
+ have no additional requirements, is the <code><a
+ href="#htmlelement">HTMLElement</a></code> interface.
+
+ <pre
+ class=idl>interface <dfn id=htmlelement>HTMLElement</dfn> : <span>Element</span> {
+ // <a href="#dom-tree0">DOM tree accessors</a>
+ NodeList <a href="#getelementsbyclassname0" title=dom-getElementsByClassName>getElementsByClassName</a>(in DOMString[] classNames);
+
+ // <a href="#dynamic2">Dynamic markup insertion</a>
+ attribute DOMString <a href="#innerhtml" title=dom-innerHTML>innerHTML</a>;
+
+ // <span>Metadata attributes</span>
+ attribute DOMString <a href="#id0" title=dom-id>id</a>;
+ attribute DOMString <a href="#title0" title=dom-title>title</a>;
+ attribute DOMString <a href="#lang0" title=dom-lang>lang</a>;
+ attribute DOMString <a href="#dir0" title=dom-dir>dir</a>;
+ attribute <span>DOMString</span> <a href="#classname" title=dom-className>className</a>;
+ readonly attribute <a href="#domtokenlist0">DOMTokenList</a> <a href="#classlist" title=dom-classList>classList</a>;
+
+ // <a href="#interaction0">Interaction</a>
+ attribute boolean <a href="#irrelevant0" title=dom-irrelevant>irrelevant</a>;
+ attribute long <a href="#tabindex0" title=dom-tabindex>tabIndex</a>;
+ void <a href="#click" title=dom-click>click</a>();
+ void <a href="#focus0" title=dom-focus>focus</a>();
+ void <a href="#blur" title=dom-blur>blur</a>();
+ void <a href="#scrollintoview" title=dom-scrollIntoView>scrollIntoView</a>();
+ void <a href="#scrollintoview" title=dom-scrollIntoView>scrollIntoView</a>(in boolean top);
+
+ // <a href="#command1" title=concept-command>Commands</a>
+ attribute <a href="#htmlmenuelement">HTMLMenuElement</a> <a href="#contextmenu0" title=dom-contextMenu>contextMenu</a>;
+
+ // <a href="#editing0">Editing</a>
+ attribute boolean <a href="#draggable0" title=dom-draggable>draggable</a>;
+ attribute DOMString <a href="#contenteditable1" title=dom-contentEditable>contentEditable</a>;
+
+ // <a href="#event3">event handler DOM attributes</a>
+ attribute <span>EventListener</span> <a href="#onabort" title=handler-onabort>onabort</a>;
+ attribute <span>EventListener</span> <a href="#onbeforeunload" title=handler-onbeforeunload>onbeforeunload</a>;
+ attribute <span>EventListener</span> <a href="#onblur" title=handler-onblur>onblur</a>;
+ attribute <span>EventListener</span> <a href="#onchange" title=handler-onchange>onchange</a>;
+ attribute <span>EventListener</span> <a href="#onclick" title=handler-onclick>onclick</a>;
+ attribute <span>EventListener</span> <a href="#oncontextmenu" title=handler-oncontextmenu>oncontextmenu</a>;
+ attribute <span>EventListener</span> <a href="#ondblclick" title=handler-ondblclick>ondblclick</a>;
+ attribute <span>EventListener</span> <a href="#ondrag" title=handler-ondrag>ondrag</a>;
+ attribute <span>EventListener</span> <a href="#ondragend" title=handler-ondragend>ondragend</a>;
+ attribute <span>EventListener</span> <a href="#ondragenter" title=handler-ondragenter>ondragenter</a>;
+ attribute <span>EventListener</span> <a href="#ondragleave" title=handler-ondragleave>ondragleave</a>;
+ attribute <span>EventListener</span> <a href="#ondragover" title=handler-ondragover>ondragover</a>;
+ attribute <span>EventListener</span> <a href="#ondragstart" title=handler-ondragstart>ondragstart</a>;
+ attribute <span>EventListener</span> <a href="#ondrop" title=handler-ondrop>ondrop</a>;
+ attribute <span>EventListener</span> <a href="#onerror" title=handler-onerror>onerror</a>;
+ attribute <span>EventListener</span> <a href="#onfocus" title=handler-onfocus>onfocus</a>;
+ attribute <span>EventListener</span> <a href="#onkeydown" title=handler-onkeydown>onkeydown</a>;
+ attribute <span>EventListener</span> <a href="#onkeypress" title=handler-onkeypress>onkeypress</a>;
+ attribute <span>EventListener</span> <a href="#onkeyup" title=handler-onkeyup>onkeyup</a>;
+ attribute <span>EventListener</span> <a href="#onload" title=handler-onload>onload</a>;
+ attribute <span>EventListener</span> <a href="#onmessage" title=handler-onmessage>onmessage</a>;
+ attribute <span>EventListener</span> <a href="#onmousedown" title=handler-onmousedown>onmousedown</a>;
+ attribute <span>EventListener</span> <a href="#onmousemove" title=handler-onmousemove>onmousemove</a>;
+ attribute <span>EventListener</span> <a href="#onmouseout" title=handler-onmouseout>onmouseout</a>;
+ attribute <span>EventListener</span> <a href="#onmouseover" title=handler-onmouseover>onmouseover</a>;
+ attribute <span>EventListener</span> <a href="#onmouseup" title=handler-onmouseup>onmouseup</a>;
+ attribute <span>EventListener</span> <a href="#onmousewheel" title=handler-onmousewheel>onmousewheel</a>;
+ attribute <span>EventListener</span> <a href="#onresize" title=handler-onresize>onresize</a>;
+ attribute <span>EventListener</span> <a href="#onscroll" title=handler-onscroll>onscroll</a>;
+ attribute <span>EventListener</span> <a href="#onselect" title=handler-onselect>onselect</a>;
+ attribute <span>EventListener</span> <a href="#onsubmit" title=handler-onsubmit>onsubmit</a>;
+ attribute <span>EventListener</span> <a href="#onunload" title=handler-onunload>onunload</a>;
+
+};</pre>
+
+ <p>As with the <code><a href="#htmldocument">HTMLDocument</a></code>
+ interface, the <code><a href="#htmlelement">HTMLElement</a></code>
+ interface holds methods and attributes related to a number of disparate
+ features, and the members of this interface are therefore described in
+ various different sections of this specification.
+
+ <h4 id=reflecting><span class=secno>2.2.1. </span>Reflecting content
+ attributes in DOM attributes</h4>
+
+ <p>Some <span title="DOM attribute">DOM attributes</span> are defined to
+ <dfn id=reflect>reflect</dfn> a particular <span>content attribute</span>.
+ This means that on getting, the DOM attribute returns the current value of
+ the content attribute, and on setting, the DOM attribute changes the value
+ of the content attribute to the given value.
+
+ <p>If a reflecting DOM attribute is a <code>DOMString</code> attribute
+ whose content attribute is defined to contain a URI, then on getting, the
+ DOM attribute must return the value of the content attribute, resolved to
+ an absolute URI, and on setting, must set the content attribute to the
+ specified literal value. If the content attribute is absent, the DOM
+ attribute must return the default value, if the content attribute has one,
+ or else the empty string.
+
+ <p>If a reflecting DOM attribute is a <code>DOMString</code> whose content
+ attribute is an <a href="#enumerated">enumerated attribute</a>, and the
+ DOM attribute is <dfn id=limited>limited to only known values</dfn>, then,
+ on getting, the DOM attribute must return the value associated with the
+ state the attribute is in (in its canonical case), or the empty string if
+ the attribute is in a state that has no associated keyword value; and on
+ setting, if the new value case-insensitively matches one of the keywords
+ given for that attribute, then the content attribute must be set to that
+ value, otherwise, if the new value is the empty string, then the content
+ attribute must be removed, otherwise, the setter must raise a
+ <code>SYNTAX_ERR</code> exception.
+
+ <p>If a reflecting DOM attribute is a <code>DOMString</code> but doesn't
+ fall into any of the above categories, then the getting and setting must
+ be done in a transparent, case-preserving manner.
+
+ <p>If a reflecting DOM attribute is a boolean attribute, then the DOM
+ attribute must return true if the attribute is set, and false if it is
+ absent. On setting, the content attribute must be removed if the DOM
+ attribute is set to false, and must be set to have the same value as its
+ name if the DOM attribute is set to true. (This corresponds to the rules
+ for <a href="#boolean0" title="boolean attribute">boolean content
+ attributes</a>.)
+
+ <p>If a reflecting DOM attribute is a signed integer type
+ (<code>long</code>) then the content attribute must be parsed according to
+ <a href="#rules0" title="rules for parsing integers">the rules for parsing
+ signed integers</a> first. If that fails, or if the attribute is absent,
+ the default value must be returned instead, or 0 if there is no default
+ value. On setting, the given value must be converted to a string
+ representing the number as a <a href="#valid0">valid integer</a> in base
+ ten and then that string must be used as the new content attribute value.
+
+ <p>If a reflecting DOM attribute is an <em>unsigned</em> integer type
+ (<code>unsigned long</code>) then the content attribute must be parsed
+ according to <a href="#rules" title="rules for parsing non-negative
+ integers">the rules for parsing unsigned integers</a> first. If that
+ fails, or if the attribute is absent, the default value must be returned
+ instead, or 0 if there is no default value. On setting, the given value
+ must be converted to a string representing the number as a <a
+ href="#valid">valid non-negative integer</a> in base ten and then that
+ string must be used as the new content attribute value.
+
+ <p>If a reflecting DOM attribute is an unsigned integer type
+ (<code>unsigned long</code>) that is <dfn id=limited0>limited to only
+ positive non-zero numbers</dfn>, then the behavior is similar to the
+ previous case, but zero is not allowed. On getting, the content attribute
+ must first be parsed according to <a href="#rules" title="rules for
+ parsing non-negative integers">the rules for parsing unsigned
+ integers</a>, and if that fails, or if the attribute is absent, the
+ default value must be returned instead, or 1 if there is no default value.
+ On setting, if the value is zero, the user agent must fire an
+ <code>INDEX_SIZE_ERR</code> exception. Otherwise, the given value must be
+ converted to a string representing the number as a <a href="#valid">valid
+ non-negative integer</a> in base ten and then that string must be used as
+ the new content attribute value.
+
+ <p>If a reflecting DOM attribute is a floating point number type
+ (<code>float</code>) and the content attribute is defined to contain a
+ time offset, then the content attribute must be parsed according to <a
+ href="#rules4" title="rules for parsing time offsets">the rules for
+ parsing time ofsets</a> first. If that fails, or if the attribute is
+ absent, the default value must be returned instead, or the not-a-number
+ value (NaN) if there is no default value. On setting, the given value must
+ be converted to a string using the <a href="#time-offset">time offset
+ serialisation rules</a>, and that string must be used as the new content
+ attribute value.
+
+ <p>If a reflecting DOM attribute is of the type <code><a
+ href="#domtokenlist0">DOMTokenList</a></code>, then on getting it must
+ return a <code><a href="#domtokenlist0">DOMTokenList</a></code> object
+ whose underlying string is the element's corresponding content attribute.
+ When the <code><a href="#domtokenlist0">DOMTokenList</a></code> object
+ mutates its underlying string, the attribute must itself be immediately
+ mutated. When the attribute is absent, then the string represented by the
+ <code><a href="#domtokenlist0">DOMTokenList</a></code> object is the empty
+ string; when the object mutates this empty string, the user agent must
+ first add the corresponding content attribute, and then mutate that
+ attribute instead. <code><a href="#domtokenlist0">DOMTokenList</a></code>
+ attributes are always read-only. The same <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> object must be returned
+ every time for each attribute.
+
+ <p>If a reflecting DOM attribute has the type <code><a
+ href="#htmlelement">HTMLElement</a></code>, or an interface that descends
+ from <code><a href="#htmlelement">HTMLElement</a></code>, then, on
+ getting, it must run the following algorithm (stopping at the first point
+ where a value is returned):
+
+ <ol>
+ <li>If the corresponding content attribute is absent, then the DOM
+ attribute must return null.
+
+ <li>Let <var title="">candidate</var> be the element that the <code
+ title="">document.getElementById()</code> method would find if it was
+ passed as its argument the current value of the corresponding content
+ attribute.
+
+ <li>If <var title="">candidate</var> is null, or if it is not
+ type-compatible with the DOM attribute, then the DOM attribute must
+ return null.
+
+ <li>Otherwise, it must return <var title="">candidate</var>.
+ </ol>
+
+ <p>On setting, if the given element has an <code title=attr-id><a
+ href="#id">id</a></code> attribute, then the content attribute must be set
+ to the value of that <code title=attr-id><a href="#id">id</a></code>
+ attribute. Otherwise, the DOM attribute must be set to the empty string.</p>
+ <!-- XXX or raise an exception? -->
+
+ <h3 id=common0><span class=secno>2.3. </span>Common DOM interfaces</h3>
+
+ <h4 id=collections><span class=secno>2.3.1. </span>Collections</h4>
+
+ <p>The <code><a href="#htmlcollection0">HTMLCollection</a></code>, <code><a
+ href="#htmlformcontrolscollection0">HTMLFormControlsCollection</a></code>,
+ and <code><a
+ href="#htmloptionscollection0">HTMLOptionsCollection</a></code> interfaces
+ represent various lists of DOM nodes. Collectively, objects implementing
+ these interfaces are called <dfn id=collections0>collections</dfn>.
+
+ <p>When a <a href="#collections0" title=collections>collection</a> is
+ created, a filter and a root are associated with the collection.
+
+ <p class=example>For example, when the <code><a
+ href="#htmlcollection0">HTMLCollection</a></code> object for the <code
+ title=dom-document-images><a href="#images0">document.images</a></code>
+ attribute is created, it is associated with a filter that selects only
+ <code><a href="#img">img</a></code> elements, and rooted at the root of
+ the document.
+
+ <p>The <span>collection</span> then <dfn id=represents title="representated
+ by the collection">represents</dfn> a <a href="#live">live</a> view of the
+ subtree rooted at the collection's root, containing only nodes that match
+ the given filter. The view is linear. In the absence of specific
+ requirements to the contrary, the nodes within the collection must be
+ sorted in <a href="#tree-order">tree order</a>.
+
+ <p class=note>The <code title=dom-table-rows><a
+ href="#rows">rows</a></code> list is not in tree order.
+
+ <p>An attribute that returns a collection must return the same object every
+ time it is retrieved.
+
+ <h5 id=htmlcollection><span class=secno>2.3.1.1. </span>HTMLCollection</h5>
+
+ <p>The <code><a href="#htmlcollection0">HTMLCollection</a></code> interface
+ represents a generic <span>collection</span> of elements.
+
+ <pre class=idl>interface <dfn id=htmlcollection0>HTMLCollection</dfn> {
+ readonly attribute unsigned long <a href="#length" title=dom-HTMLCollection-length>length</a>;
+ Element <a href="#itemindex" title=dom-HTMLCollection-item>item</a>(in unsigned long index);
+ Element <a href="#nameditem" title=dom-HTMLCollection-namedItem>namedItem</a>(in DOMString name);
+};</pre>
+
+ <p>The <dfn id=length
+ title=dom-HTMLCollection-length><code>length</code></dfn> attribute must
+ return the number of nodes <span>represented by the collection</span>.
+
+ <p>The <dfn id=itemindex title=dom-HTMLCollection-item><code>item(<var
+ title="">index</var>)</code></dfn> method must return the <var
+ title="">index</var>th node in the collection. If there is no <var
+ title="">index</var>th node in the collection, then the method must return
+ null.
+
+ <p>The <dfn id=nameditem
+ title=dom-HTMLCollection-namedItem><code>namedItem(<var
+ title="">key</var>)</code></dfn> method must return the first node in the
+ collection that matches the following requirements:
+
+ <ul>
+ <li>It is an <code><a href="#a">a</a></code>, <code>applet</code>,
+ <code><a href="#area">area</a></code>, <code>form</code>, <code><a
+ href="#img">img</a></code>, or <code><a href="#object">object</a></code>
+ element with a <code title=attr-name>name</code> attribute equal to <var
+ title="">key</var>, or,
+
+ <li>It is an HTML element of any kind with an <code title=attr-id><a
+ href="#id">id</a></code> attribute equal to <var title="">key</var>.
+ (Non-HTML elements, even if they have IDs, are not searched for the
+ purposes of <code title=dom-HTMLCollection-namedItem><a
+ href="#nameditem">namedItem()</a></code>.)
+ </ul>
+
+ <p>If no such elements are found, then the method must return null.
+
+ <p>In ECMAScript implementations, objects that implement the <code><a
+ href="#htmlcollection0">HTMLCollection</a></code> interface must also have
+ a [[Get]] method that, when invoked with a property name that is a number,
+ acts like the <code title=dom-HTMLCollection-item><a
+ href="#itemindex">item()</a></code> method would when invoked with that
+ argument, and when invoked with a property name that is a string, acts
+ like the <code title=dom-HTMLCollection-namedItem><a
+ href="#nameditem">namedItem()</a></code> method would when invoked with
+ that argument.
+
+ <h5 id=htmlformcontrolscollection><span class=secno>2.3.1.2.
+ </span>HTMLFormControlsCollection</h5>
+
+ <p>The <code><a
+ href="#htmlformcontrolscollection0">HTMLFormControlsCollection</a></code>
+ interface represents a <span>collection</span> of form controls.
+
+ <pre
+ class=idl>interface <dfn id=htmlformcontrolscollection0>HTMLFormControlsCollection</dfn> {
+ readonly attribute unsigned long <a href="#length0" title=dom-HTMLFormControlsCollection-length>length</a>;
+ <a href="#htmlelement">HTMLElement</a> <a href="#itemindex0" title=dom-HTMLFormControlsCollection-item>item</a>(in unsigned long index);
+ Object <a href="#nameditem0" title=dom-HTMLFormControlsCollection-namedItem>namedItem</a>(in DOMString name);
+};</pre>
+
+ <p>The <dfn id=length0
+ title=dom-HTMLFormControlsCollection-length><code>length</code></dfn>
+ attribute must return the number of nodes <span>represented by the
+ collection</span>.
+
+ <p>The <dfn id=itemindex0
+ title=dom-HTMLFormControlsCollection-item><code>item(<var
+ title="">index</var>)</code></dfn> method must return the <var
+ title="">index</var>th node in the collection. If there is no <var
+ title="">index</var>th node in the collection, then the method must return
+ null.
+
+ <p>The <dfn id=nameditem0
+ title=dom-HTMLFormControlsCollection-namedItem><code>namedItem(<var
+ title="">key</var>)</code></dfn> method must act according to the
+ following algorithm:
+
+ <ol>
+ <li>If, at the time the method is called, there is exactly one node in the
+ collection that has either an <code title=attr-id><a
+ href="#id">id</a></code> attribute or a <code title=attr-name>name</code>
+ attribute equal to <var title="">key</var>, then return that node and
+ stop the algorithm.
+
+ <li>Otherwise, if there are no nodes in the collection that have either an
+ <code title=attr-id><a href="#id">id</a></code> attribute or a <code
+ title=attr-name>name</code> attribute equal to <var title="">key</var>,
+ then return null and stop the algorithm.
+
+ <li>Otherwise, create a <code>NodeList</code> object representing a live
+ view of the <code><a
+ href="#htmlformcontrolscollection0">HTMLFormControlsCollection</a></code>
+ object, further filtered so that the only nodes in the
+ <code>NodeList</code> object are those that have either an <code
+ title=attr-id><a href="#id">id</a></code> attribute or a <code
+ title=attr-name>name</code> attribute equal to <var title="">key</var>.
+ The nodes in the <code>NodeList</code> object must be sorted in <a
+ href="#tree-order">tree order</a>.
+
+ <li>Return that <code>NodeList</code> object.
+ </ol>
+
+ <p>In the ECMAScript DOM binding, objects implementing the <code><a
+ href="#htmlformcontrolscollection0">HTMLFormControlsCollection</a></code>
+ interface must support being dereferenced using the square bracket
+ notation, such that dereferencing with an integer index is equivalent to
+ invoking the <code title=dom-HTMLFormControlsCollection-item><a
+ href="#itemindex0">item()</a></code> method with that index, and such that
+ dereferencing with a string index is equivalent to invoking the <code
+ title=dom-HTMLFormControlsCollection-namedItem><a
+ href="#nameditem0">namedItem()</a></code> method with that index.</p>
+ <!--
+http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3E...%0A%3Cform%20name%3D%22a%22%3E%3Cinput%20id%3D%22x%22%20name%3D%22y%22%3E%3Cinput%20name%3D%22x%22%20id%3D%22y%22%3E%3C/form%3E%0A%3Cscript%3E%0A%20%20var%20x%3B%0A%20%20w%28x%20%3D%20document.forms%5B%27a%27%5D%5B%27x%27%5D%29%3B%0A%20%20w%28x.length%29%3B%0A%20%20x%5B0%5D.parentNode.removeChild%28x%5B0%5D%29%3B%0A%20%20w%28x.length%29%3B%0A%20%20w%28x%20%3D%3D%20document.forms%5B%27a%27%5D%5B%27x%27%5D%29%3B%0A%3C/script%3E%0A
+-->
+
+ <h5 id=htmloptionscollection><span class=secno>2.3.1.3.
+ </span>HTMLOptionsCollection</h5>
+
+ <p>The <code><a
+ href="#htmloptionscollection0">HTMLOptionsCollection</a></code> interface
+ represents a list of <code>option</code> elements.
+
+ <pre
+ class=idl>interface <dfn id=htmloptionscollection0>HTMLOptionsCollection</dfn> {
+ attribute unsigned long <a href="#length1" title=dom-HTMLOptionsCollection-length>length</a>;
+ HTMLOptionElement <a href="#itemindex1" title=dom-HTMLOptionsCollection-item>item</a>(in unsigned long index);
+ Object <a href="#nameditem1" title=dom-HTMLOptionsCollection-namedItem>namedItem</a>(in DOMString name);
+};</pre>
+
+ <p>On getting, the <dfn id=length1
+ title=dom-HTMLOptionsCollection-length><code>length</code></dfn> attribute
+ must return the number of nodes <span>represented by the
+ collection</span>.
+
+ <p>On setting, the behaviour depends on whether the new value is equal to,
+ greater than, or less than the number of nodes <span>represented by the
+ collection</span> at that time. If the number is the same, then setting
+ the attribute must do nothing. If the new value is greater, then <var
+ title="">n</var> new <code>option</code> elements with no attributes and
+ no child nodes must be appended to the <code>select</code> element on
+ which the <code><a
+ href="#htmloptionscollection0">HTMLOptionsCollection</a></code> is rooted,
+ where <var title="">n</var> is the difference between the two numbers (new
+ value minus old value). If the new value is lower, then the last <var
+ title="">n</var> nodes in the collection must be removed from their parent
+ nodes, where <var title="">n</var> is the difference between the two
+ numbers (old value minus new value).
+
+ <p class=note>Setting <code title=dom-HTMLOptionsCollection-length><a
+ href="#length1">length</a></code> never removes or adds any
+ <code>optgroup</code> elements, and never adds new children to existing
+ <code>optgroup</code> elements (though it can remove children from them).
+
+ <p>The <dfn id=itemindex1
+ title=dom-HTMLOptionsCollection-item><code>item(<var
+ title="">index</var>)</code></dfn> method must return the <var
+ title="">index</var>th node in the collection. If there is no <var
+ title="">index</var>th node in the collection, then the method must return
+ null.
+
+ <p>The <dfn id=nameditem1
+ title=dom-HTMLOptionsCollection-namedItem><code>namedItem(<var
+ title="">key</var>)</code></dfn> method must act according to the
+ following algorithm:
+
+ <ol>
+ <li>If, at the time the method is called, there is exactly one node in the
+ collection that has either an <code title=attr-id><a
+ href="#id">id</a></code> attribute or a <code title=attr-name>name</code>
+ attribute equal to <var title="">key</var>, then return that node and
+ stop the algorithm.
+
+ <li>Otherwise, if there are no nodes in the collection that have either an
+ <code title=attr-id><a href="#id">id</a></code> attribute or a <code
+ title=attr-name>name</code> attribute equal to <var title="">key</var>,
+ then return null and stop the algorithm.
+
+ <li>Otherwise, create a <code>NodeList</code> object representing a live
+ view of the <code><a
+ href="#htmloptionscollection0">HTMLOptionsCollection</a></code> object,
+ further filtered so that the only nodes in the <code>NodeList</code>
+ object are those that have either an <code title=attr-id><a
+ href="#id">id</a></code> attribute or a <code
+ title=attr-option-name>name</code> attribute equal to <var
+ title="">key</var>. The nodes in the <code>NodeList</code> object must be
+ sorted in <a href="#tree-order">tree order</a>.
+
+ <li>Return that <code>NodeList</code> object.
+ </ol>
+
+ <p>In the ECMAScript DOM binding, objects implementing the <code><a
+ href="#htmloptionscollection0">HTMLOptionsCollection</a></code> interface
+ must support being dereferenced using the square bracket notation, such
+ that dereferencing with an integer index is equivalent to invoking the
+ <code title=dom-HTMLOptionsCollection-item><a
+ href="#itemindex1">item()</a></code> method with that index, and such that
+ dereferencing with a string index is equivalent to invoking the <code
+ title=dom-HTMLOptionsCollection-namedItem><a
+ href="#nameditem1">namedItem()</a></code> method with that index.</p>
+ <!-- see also http://ln.hixie.ch/?start=1161042744&count=1 -->
+
+ <p class=big-issue>We may want to add <code>add()</code> and
+ <code>remove()</code> methods here too because IE implements
+ HTMLSelectElement and HTMLOptionsCollection on the same object, and so
+ people use them almost interchangeably in the wild.
+
+ <h4 id=domtokenlist><span class=secno>2.3.2. </span>DOMTokenList</h4>
+
+ <p>The <code><a href="#domtokenlist0">DOMTokenList</a></code> interface
+ represents an interface to an underlying string that consists of an <a
+ href="#unordered">unordered set of space-separated tokens</a>.
+
+ <p>Which string underlies a particular <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> object is defined when the
+ object is created. It might be a content attribute (e.g. the string that
+ underlies the <code title=dom-classList><a
+ href="#classlist">classList</a></code> object is the <code
+ title=attr-class><a href="#class">class</a></code> attribute), or it might
+ be an anonymous string (e.g. when a <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> object is passed to an
+ author-implemented callback in the <code><a
+ href="#datagrid0">datagrid</a></code> APIs).
+
+ <pre class=idl>interface <dfn id=domtokenlist0>DOMTokenList</dfn> {
+ readonly attribute unsigned long <a href="#length2" title=dom-tokenlist-length>length</a>;
+ DOMString <a href="#itemindex2" title=dom-tokenlist-item>item</a>(in unsigned long index);
+ boolean <a href="#hastoken" title=dom-tokenlist-has>has</a>(in DOMString token);
+ void <a href="#remove" title=dom-tokenlist-add>add</a>(in DOMString token);
+ void <span title=dom-tokenlist-remove>remove</span>(in DOMString token);
+ boolean <a href="#toggle" title=dom-tokenlist-toggle>toggle</a>(in DOMString token);
+};</pre>
+
+ <p>The <dfn id=length2 title=dom-tokenlist-length><code>length</code></dfn>
+ attribute must return the number of <em>unique</em> tokens that result
+ from <a href="#split" title="split a string on spaces">splitting the
+ underlying string on spaces</a>.
+
+ <p>The <dfn id=itemindex2 title=dom-tokenlist-item><code>item(<var
+ title="">index</var>)</code></dfn> method must <a href="#split"
+ title="split a string on spaces">split the underlying string on
+ spaces</a>, sort the resulting list of tokens by Unicode
+ codepoint<!-- XXX that's
+ basically nonsense. What sort order do we want here? It should be
+ the cheapest one possible that is well-defined for all Unicode. -->,
+ remove exact duplicates, and then return the <var title="">index</var>th
+ item in this list. If <var title="">index</var> is equal to or greater
+ than the number of tokens, then the method must return null.
+
+ <p>In ECMAScript implementations, objects that implement the <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> interface must also have a
+ [[Get]] method that, when invoked with a property name that is a number,
+ acts like the <code title=dom-tokenlist-item><a
+ href="#itemindex2">item()</a></code> method would when invoked with that
+ argument.
+
+ <p>The <dfn id=hastoken title=dom-tokenlist-has><code>has(<var
+ title="">token</var>)</code></dfn> method must run the following
+ algorithm:
+
+ <ol>
+ <li>If the <var title="">token</var> argument contains any
+ spaces<!-- XXX elaborate -->, then raise an
+ <code>INVALID_CHARACTER_ERR</code> exception and stop the algorithm.
+
+ <li>Otherwise, <a href="#split" title="split a string on spaces">split the
+ underlying string on spaces</a> to get the list of tokens in the object's
+ underlying string.
+
+ <li>If the token indicated by <var title="">token</var> is one of the
+ tokens in the object's underlying string then return true and stop this
+ algorithm.
+
+ <li>Otherwise, return false.
+ </ol>
+
+ <p>The <dfn id=addtoken title=dom-tokenlist-add><code>add(<var
+ title="">token</var>)</code></dfn> method must run the following
+ algorithm:
+
+ <ol>
+ <li>If the <var title="">token</var> argument contains any
+ spaces<!-- XXX elaborate -->, then raise an
+ <code>INVALID_CHARACTER_ERR</code> exception and stop the algorithm.
+
+ <li>Otherwise, <a href="#split" title="split a string on spaces">split the
+ underlying string on spaces</a> to get the list of tokens in the object's
+ underlying string.
+
+ <li>If the given <var title="">token</var> is already one of the tokens in
+ the <code><a href="#domtokenlist0">DOMTokenList</a></code> object's
+ underlying string then stop the algorithm.
+
+ <li>Otherwise, if the last character of the <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> object's underlying string
+ is not a <a href="#space">space character</a>, then append a U+0020 SPACE
+ character to the end of that string.
+
+ <li>Append the value of <var title="">token</var> to the end of the
+ <code><a href="#domtokenlist0">DOMTokenList</a></code> object's
+ underlying string.
+ </ol>
+
+ <p>The <dfn id=remove title=dom-tokenlist-add><code>remove(<var
+ title="">token</var>)</code></dfn> method must run the following
+ algorithm:
+
+ <ol>
+ <li>If the <var title="">token</var> argument contains any <a
+ href="#space" title="space character">spaces</a>, then raise an
+ <code>INVALID_CHARACTER_ERR</code> exception and stop the algorithm.
+
+ <li>Otherwise, <a href="#remove0" title="remove a token from a
+ string">remove the given <var title="">token</var> from the underlying
+ string</a>.
+ </ol>
+
+ <p>The <dfn id=toggle title=dom-tokenlist-toggle><code>toggle(<var
+ title="">token</var>)</code></dfn> method must run the following
+ algorithm:
+
+ <ol>
+ <li>If the <var title="">token</var> argument contains any
+ spaces<!-- XXX elaborate -->, then raise an
+ <code>INVALID_CHARACTER_ERR</code> exception and stop the algorithm.
+
+ <li>Otherwise, <a href="#split" title="split a string on spaces">split the
+ underlying string on spaces</a> to get the list of tokens in the object's
+ underlying string.
+
+ <li>If the given <var title="">token</var> is already one of the tokens in
+ the <code><a href="#domtokenlist0">DOMTokenList</a></code> object's
+ underlying string then <a href="#remove0" title="remove a token from a
+ string">remove the given <var title="">token</var> from the underlying
+ string</a>, and stop the algorithm, returning false.
+
+ <li>Otherwise, if the last character of the <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> object's underlying string
+ is not a <a href="#space">space character</a>, then append a U+0020 SPACE
+ character to the end of that string.
+
+ <li>Append the value of <var title="">token</var> to the end of the
+ <code><a href="#domtokenlist0">DOMTokenList</a></code> object's
+ underlying string.
+
+ <li>Return true.
+ </ol>
+
+ <p>In the ECMAScript DOM binding, objects implementing the <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> interface must stringify to
+ the object's underlying string representation.
+
+ <h4 id=dom-feature><span class=secno>2.3.3. </span>DOM feature strings</h4>
+
+ <p>DOM3 Core defines mechanisms for checking for interface support, and for
+ obtaining implementations of interfaces, using <a
+ href="http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMFeatures">feature
+ strings</a>. <a href="#refsDOM3CORE">[DOM3CORE]</a>
+
+ <p>A DOM application can use the <dfn id=hasfeature
+ title=hasFeature><code>hasFeature(<var title="">feature</var>, <var
+ title="">version</var>)</code></dfn> method of the
+ <code>DOMImplementation</code> interface with parameter values "<code
+ title="">HTML</code>" and "<code>5.0</code>" (respectively) to determine
+ whether or not this module is supported by the implementation. In addition
+ to the feature string "<code title="">HTML</code>", the feature string
+ "<code title="">XHTML</code>" (with version string "<code>5.0</code>") can
+ be used to check if the implementation supports XHTML. User agents should
+ respond with a true value when the <code><a
+ href="#hasfeature">hasFeature</a></code> method is queried with these
+ values. Authors are cautioned, however, that UAs returning true might not
+ be perfectly compliant, and that UAs returning false might well have
+ support for features in this specification; in general, therefore, use of
+ this method is discouraged.
+
+ <p>The values "<code title="">HTML</code>" and "<code
+ title="">XHTML</code>" (both with version "<code>5.0</code>") should also
+ be supported in the context of the <code>getFeature()</code> and
+ <code>isSupported()</code> methods, as defined by DOM3 Core.
+
+ <p class=note>The interfaces defined in this specification are not always
+ supersets of the interfaces defined in DOM2 HTML; some features that were
+ formerly deprecated, poorly supported, rarely used or considered
+ unnecessary have been removed. Therefore it is not guarenteed that an
+ implementation that supports "<code title="">HTML</code>"
+ "<code>5.0</code>" also supports "<code title="">HTML</code>"
+ "<code>2.0</code>".
+
+ <h3 id=dom-tree><span class=secno>2.4. </span><dfn id=dom-tree0>DOM tree
+ accessors</dfn></h3>
+
+ <p><dfn id=the-html0>The <code>html</code> element</dfn> of a document is
+ the document's root element, if there is one and it's an <code><a
+ href="#html">html</a></code> element, or null otherwise.
+
+ <p><dfn id=the-head0>The <code>head</code> element</dfn> of a document is
+ the first <code><a href="#head">head</a></code> element that is a child of
+ <a href="#the-html0">the <code>html</code> element</a>, if there is one,
+ or null otherwise.
+
+ <p><dfn id=the-title1>The <code>title</code> element</dfn> of a document is
+ the first <code><a href="#title1">title</a></code> element that is a child
+ of <a href="#the-head0">the <code>head</code> element</a>, if there is
+ one, or null otherwise.
+
+ <p>The <dfn id=document.title
+ title=dom-document-title><code>title</code></dfn> attribute must, on
+ getting, run the following algorithm:
+
+ <ol>
+ <li>
+ <p>If the <a href="#root-element">root element</a> is an <code>svg</code>
+ element in the "<code title="">http://www.w3.org/2000/svg</code>"
+ namespace, and the user agent supports SVG, then the getter must return
+ the value that would have been returned by the DOM attribute of the same
+ name on the <code>SVGDocument</code> interface.
+
+ <li>
+ <p>Otherwise, it must return a concatenation of the data of all the child
+ <a href="#text-node" title="text node">text nodes</a> of <a
+ href="#the-title1">the <code>title</code> element</a>, in tree order, or
+ the empty string if <a href="#the-title1">the <code>title</code>
+ element</a> is null.
+ </ol>
+
+ <p>On setting, the following algorithm must be run:
+
+ <ol>
+ <li>
+ <p>If the <a href="#root-element">root element</a> is an <code>svg</code>
+ element in the "<code title="">http://www.w3.org/2000/svg</code>"
+ namespace, and the user agent supports SVG, then the setter must defer
+ to the setter for the DOM attribute of the same name on the
+ <code>SVGDocument</code> interface. Stop the algorithm here.
+
+ <li>If <a href="#the-head0">the <code>head</code> element</a> is null,
+ then the attribute must do nothing. Stop the algorithm here.
+
+ <li>If <a href="#the-title1">the <code>title</code> element</a> is null,
+ then a new <code><a href="#title1">title</a></code> element must be
+ created and appended to <a href="#the-head0">the <code>head</code>
+ element</a>.
+
+ <li>The children of <a href="#the-title1">the <code>title</code>
+ element</a> (if any) must all be removed.
+
+ <li>A single <code>Text</code> node whose data is the new value being
+ assigned must be appended to <a href="#the-title1">the <code>title</code>
+ element</a>.
+ </ol>
+
+ <p>The <code title=dom-document-title><a
+ href="#document.title">title</a></code> attribute on the <code><a
+ href="#htmldocument">HTMLDocument</a></code> interface should shadow the
+ attribute of the same name on the <code>SVGDocument</code> interface when
+ the user agent supports both HTML and SVG.
+
+ <p><dfn id=the-body0>The body element</dfn> of a document is the first
+ child of <a href="#the-html0">the <code>html</code> element</a> that is
+ either a <code><a href="#body0">body</a></code> element or a
+ <code>frameset</code> element. If there is no such element, it is null. If
+ the body element is null, then when the specification requires that events
+ be fired at "the body element", they must instead be fired at the
+ <code>Document</code> object.
+
+ <p>The <dfn id=body title=dom-document-body><code>body</code></dfn>
+ attribute, on getting, must return <a href="#the-body0">the body
+ element</a> of the document (either a <code><a
+ href="#body0">body</a></code> element, a <code>frameset</code> element, or
+ null). On setting, the following algorithm must be followed:
+
+ <ol>
+ <li>If the new value is not a <code><a href="#body0">body</a></code> or
+ <code>frameset</code> element, then raise a
+ <code>HIERARCHY_REQUEST_ERR</code> exception and abort these steps.
+
+ <li>Otherwise, if the new value is the same as <a href="#the-body0">the
+ body element</a>, do nothing. Abort these steps.
+
+ <li>Otherwise, if <a href="#the-body0">the body element</a> is not null,
+ then replace that element with the new value in the DOM, as if the root
+ element's <code title="">replaceChild()</code> method had been called
+ with the new value and <a href="#the-body0" title="the body element">the
+ incumbent body element</a> as its two arguments respectively, then abort
+ these steps.
+
+ <li>Otherwise, the <a href="#the-body0">the body element</a> is null.
+ Append the new value to the root element.
+ </ol>
+ <!--XXX
+ http://lxr.mozilla.org/seamonkey/source/content/html/document/src/nsHTMLDocument.cpp
+ search for ::GetBody ::SetBody
+ http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/html/HTMLDocument.cpp
+ search for ::setBody
+ http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/dom/Document.cpp
+ search for ::body
+ -->
+
+ <p>The <dfn id=images0 title=dom-document-images><code>images</code></dfn>
+ attribute must return an <code><a
+ href="#htmlcollection0">HTMLCollection</a></code> rooted at the
+ <code>Document</code> node, whose filter matches only <code><a
+ href="#img">img</a></code> elements.
+
+ <p>The <dfn id=links0 title=dom-document-links><code>links</code></dfn>
+ attribute must return an <code><a
+ href="#htmlcollection0">HTMLCollection</a></code> rooted at the
+ <code>Document</code> node, whose filter matches only <code><a
+ href="#a">a</a></code> elements with <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attributes and <code><a
+ href="#area">area</a></code> elements with <code
+ title=attr-hyperlink-href><a href="#href6">href</a></code> attributes.
+
+ <p>The <dfn id=forms0 title=dom-document-forms><code>forms</code></dfn>
+ attribute must return an <code><a
+ href="#htmlcollection0">HTMLCollection</a></code> rooted at the
+ <code>Document</code> node, whose filter matches only <code>form</code>
+ elements.
+
+ <p>The <dfn id=anchors
+ title=dom-document-anchors><code>anchors</code></dfn> attribute must
+ return an <code><a href="#htmlcollection0">HTMLCollection</a></code>
+ rooted at the <code>Document</code> node, whose filter matches only
+ <code><a href="#a">a</a></code> elements with <code
+ title=attr-a-name>name</code> attributes.</p>
+ <!-- XXX note that such elements are
+ non-conforming -->
+
+ <p>The <dfn id=getelementsbyname
+ title=dom-document-getElementsByName><code>getElementsByName(<var
+ title="">name</var>)</code></dfn> method a string <var
+ title="">name</var>, and must return a live <code>NodeList</code>
+ containing all the <code><a href="#a">a</a></code>, <code>applet</code>,
+ <code>button</code>, <code>form</code>, <!-- frame? frameset?
+ XXX--><code><a
+ href="#iframe">iframe</a></code>, <code><a href="#img">img</a></code>,
+ <code>input</code>, <code><a href="#map">map</a></code>, <code><a
+ href="#meta0">meta</a></code>, <code><a
+ href="#object">object</a></code>,<!-- param?
+ XXX--> <code>select</code>,
+ and <code>textarea</code> elements in that document that have a <code
+ title="">name</code> attribute whose value is
+ equal<!-- XXX case sensitivity --> to the <var title="">name</var>
+ argument.</p>
+ <!-- XXX what about XHTML? -->
+
+ <p>The <dfn id=getelementsbyclassname
+ title=dom-document-getElementsByClassName><code>getElementsByClassName(<var
+ title="">classNames</var>)</code></dfn> method takes an array of strings
+ representing classes. When called, the method must return a live
+ <code>NodeList</code> object containing all the elements in the document
+ that have all the classes specified in that array. If the array is empty,
+ then the method must return an empty <code>NodeList</code>.
+
+ <p>HTML, XHTML, SVG and MathML elements define which classes they are in by
+ having an attribute in the per-element partition with the name <code
+ title="">class</code> containing a space-separated list of classes to
+ which the element belongs. Other specifications may also allow elements in
+ their namespaces to be labelled as being in specific classes. UAs must not
+ assume that all attributes of the name <code>class</code> for elements in
+ any namespace work in this way, however, and must not assume that such
+ attributes, when used as global attributes, label other elements as being
+ in specific classes.
+
+ <div class=example>
+ <p>Given the following XHTML fragment:</p>
+
+ <pre>&lt;div id="example"&gt;
+ &lt;p id="p1" class="aaa bbb"/&gt;
+ &lt;p id="p2" class="aaa ccc"/&gt;
+ &lt;p id="p3" class="bbb ccc"/&gt;
+&lt;/div&gt;</pre>
+
+ <p>A call to
+ <code>document.getElementById('example').getElementsByClassName('aaa')</code>
+ would return a <code>NodeList</code> with the two paragraphs
+ <code>p1</code> and <code>p2</code> in it.</p>
+
+ <p>A call to <code>getElementsByClassName(['ccc', 'bbb'])</code> would
+ only return one node, however, namely <code>p3</code>. A call to
+ <code>document.getElementById('example').getElementsByClassName('ccc
+ bbb')</code> would return the same thing.</p>
+
+ <p>A call to <code>getElementsByClassName(['aaa bbb'])</code> would return
+ no nodes; none of the elements above are in the "aaa bbb" class.</p>
+
+ <p>A call to <code>getElementsByClassName([''])</code> would also return
+ no nodes, since none of the nodes are in the "" class (indeed, in HTML,
+ it is impossible to specify that an element is in the "" class).</p>
+ </div>
+
+ <p>The <dfn id=getelementsbyclassname0
+ title=dom-getElementsByClassName><code>getElementsByClassName()</code></dfn>
+ method on the <code><a href="#htmlelement">HTMLElement</a></code>
+ interface must return the nodes that the <code><a
+ href="#htmldocument">HTMLDocument</a></code> <code
+ title=dom-document-getElementsByClassName><a
+ href="#getelementsbyclassname">getElementsByClassName()</a></code> method
+ would return, excluding any elements that are not descendants of the
+ <code><a href="#htmlelement">HTMLElement</a></code> object on which the
+ method was invoked.</p>
+ <!-- XXX
+> * xGetParentElementByClassName(rootElement, className, tagName) -
+> Navigates upwards until we hit a parent element with the given class name and
+> optional tag name.
+-->
+
+ <p class=note>The <code title=dom-document-dir><a
+ href="#dir1">dir</a></code> attribute on the <code><a
+ href="#htmldocument">HTMLDocument</a></code> interface is defined along
+ with the <code title=attr-dir><a href="#dir">dir</a></code> content
+ attribute.
+
+ <h3 id=dynamic><span class=secno>2.5. </span><dfn id=dynamic2>Dynamic
+ markup insertion</dfn></h3>
+
+ <p>The <code title=dom-document-write><a
+ href="#document.write">document.write()</a></code> family of methods and
+ the <code title=dom-innerHTML><a href="#innerhtml">innerHTML</a></code>
+ family of DOM attributes enable script authors to dynamically insert
+ markup into the document.
+
+ <p class=issue>bz argues that innerHTML should be called something else on
+ XML documents and XML elements. Is the sanity worth the migration pain?
+
+ <p>Because these APIs interact with the parser, their behaviour varies
+ depending on whether they are used with <a href="#html-">HTML
+ documents</a> (and the <a href="#html-0">HTML parser</a>) or XHTML in <a
+ href="#xml-documents">XML documents</a> (and the <span>XML parser</span>).
+ The following table cross-references the various versions of these APIs.
+
+ <table>
+ <thead>
+ <tr>
+ <td>
+
+ <th><dfn id=document.write
+ title=dom-document-write><code>document.write()</code></dfn>
+
+ <th><dfn id=innerhtml title=dom-innerHTML><code>innerHTML</code></dfn>
+
+ <tbody>
+ <tr>
+ <th>For documents that are <a href="#html-">HTML documents</a>
+
+ <td><a href="#document.write0"
+ title=dom-document-write-HTML><code>document.write()</code> in HTML</a>
+
+ <td><a href="#innerhtml0"
+ title=dom-innerHTML-HTML><code>innerHTML</code> in HTML</a>
+
+ <tr>
+ <th>For documents that are <a href="#xml-documents">XML documents</a>
+
+ <td><a href="#document.write1"
+ title=dom-document-write-XML><code>document.write()</code> in XML</a>
+
+ <td><a href="#innerhtml2" title=dom-innerHTML-XML><code>innerHTML</code>
+ in XML</a>
+ </table>
+
+ <p>Regardless of the parsing mode, the <dfn id=document.writeln
+ title=dom-document-writeln><code>document.writeln(<var
+ title="">s</var>)</code></dfn> method must call the <code
+ title=dom-document-write><a
+ href="#document.write">document.write()</a></code> method with the same
+ argument <var title="">s</var>, and then call the <code
+ title=dom-document-write><a
+ href="#document.write">document.write()</a></code> method with, as its
+ argument, a string consisting of a single line feed character (U+000A).
+
+ <h4 id=controlling><span class=secno>2.5.1. </span>Controlling the input
+ stream</h4>
+
+ <p>The <dfn id=open title=dom-document-open><code>open()</code></dfn>
+ method comes in several variants with different numbers of arguments.
+
+ <p>When called with two or fewer arguments, the method must act as follows:
+
+ <ol>
+ <li>
+ <p>Let <var title="">type</var> be the value of the first argument, if
+ there is one, or "<code>text/html</code>" otherwise.
+
+ <li>
+ <p>Let <var title="">replace</var> be true if there is a second argument
+ and it has the value "replace"<!-- case-insensitive. XXX
+ -->, and
+ false otherwise.
+
+ <li>
+ <p>If the document has an <span>active parser</span><!-- XXX xref
+ -->
+ that isn't a <a href="#script-created">script-created parser</a>, and
+ the <a href="#insertion">insertion point</a> associated with that
+ parser's <a href="#input0">input stream</a> is not undefined (that is,
+ it <em>does</em> point to somewhere in the input stream), then the
+ method does nothing. Abort these steps.</p>
+
+ <p class=note>This basically causes <code title=dom-document-open><a
+ href="#open">document.open()</a></code> to be ignored when it's called
+ in an inline script found during the parsing of data sent over the
+ network, while still letting it have an effect when called
+ asynchronously or on a document that is itself being spoon-fed using
+ these APIs.</p>
+
+ <li>
+ <p class=big-issue>onbeforeunload, onunload
+
+ <li>
+ <p>If the document has an <span>active parser</span><!--XXX
+ xref-->,
+ then stop that parser, and throw away any pending content in the input
+ stream. <span class=big-issue>what about if it doesn't, because it's
+ either like a text/plain, or Atom, or PDF, or XHTML, or image document,
+ or something?</span>
+ </li>
+ <!-- XXX see
+ also innerHTML in HTML -->
+
+ <li>
+ <p>Remove all child nodes of the document.
+
+ <li>
+ <p>Create a new <a href="#html-0">HTML parser</a> and associate it with
+ the document. This is a <dfn id=script-created>script-created
+ parser</dfn> (meaning that it can be closed by the <code
+ title=dom-document-open><a href="#open">document.open()</a></code> and
+ <code title=dom-document-close><a
+ href="#close">document.close()</a></code> methods, and that the
+ tokeniser will wait for an explicit call to <code
+ title=dom-document-close><a href="#close">document.close()</a></code>
+ before emitting an end-of-file token).
+
+ <li>Mark the document as being an <a href="#html-" title="HTML
+ documents">HTML document</a> (it might already be so-marked).</li>
+ <!-- text/plain handling -->
+
+ <li>
+ <p>If <var title="">type</var> does not have the value
+ "<code>text/html</code>"<!-- XXX matched how?
+ -->, then act as if the
+ tokeniser had emitted a <code><a href="#pre">pre</a></code> element
+ start tag, then set the <a href="#html-0">HTML parser</a>'s <a
+ href="#tokenisation0">tokenisation</a> stage's <a
+ href="#content2">content model flag</a> to <em>PLAINTEXT</em>.
+
+ <li>
+ <p>If <var title="">replace</var> is false, then:
+
+ <ol>
+ <li>Remove all the entries in the <a href="#browsing0">browsing
+ context</a>'s <a href="#session">session history</a> after the <a
+ href="#current0">current entry</a> in its <code>Document</code>'s
+ <code><a href="#history1">History</a></code> object
+
+ <li>Remove any earlier entries that share the same <code>Document</code>
+
+ <li>Add a new entry just before the last entry that is associated with
+ the text that was parsed by the previous parser associated with the
+ <code>Document</code> object, as well as the state of the document at
+ the start of these steps. (This allows the user to step backwards in
+ the session history to see the page before it was blown away by the
+ <code title=dom-document-open><a
+ href="#open">document.open()</a></code> call.)
+ </ol>
+
+ <li>
+ <p>Finally, set the <a href="#insertion">insertion point</a> to point at
+ just before the end of the <a href="#input0">input stream</a> (which at
+ this point will be empty).
+ </ol>
+
+ <p class=big-issue>We shouldn't hard-code <code>text/plain</code> there. We
+ should do it some other way, e.g. hand off to the section on
+ content-sniffing and handling of incoming data streams, the part that
+ defines how this all works when stuff comes over the network.</p>
+ <!-- XXX Should we support XML/XHTML as a type to that method? -->
+
+ <p>When called with three or more arguments, the <code
+ title=dom-document-open><a href="#open">open()</a></code> method on the
+ <code><a href="#htmldocument">HTMLDocument</a></code> object must call the
+ <code title=dom-open><a href="#open2">open()</a></code> method on the
+ <code><a href="#window">Window</a></code> interface of the object returned
+ by the <code title=dom-document-defaultView>defaultView</code> attribute
+ of the <code>DocumentView</code> interface of the <code><a
+ href="#htmldocument">HTMLDocument</a></code> object, with the same
+ arguments as the original call to the <code title=dom-document-open><a
+ href="#open">open()</a></code> method. If the <code
+ title=dom-document-defaultView>defaultView</code> attribute of the
+ <code>DocumentView</code> interface of the <code><a
+ href="#htmldocument">HTMLDocument</a></code> object is null, then the
+ method must raise an <code>INVALID_ACCESS_ERR</code> exception.
+
+ <p>The <dfn id=close title=dom-document-close><code>close()</code></dfn>
+ method must do nothing if there is no <a
+ href="#script-created">script-created parser</a> associated with the
+ document. If there is such a parser, then, when the method is called, the
+ user agent must insert an <a href="#explicit">explicit "EOF" character</a>
+ at the <a href="#insertion">insertion point</a> of the parser's <a
+ href="#input0">input stream</a>.
+
+ <h4 id=dynamic0><span class=secno>2.5.2. </span>Dynamic markup insertion in
+ HTML</h4>
+
+ <p>In HTML, the <dfn id=document.write0
+ title=dom-document-write-HTML><code>document.write(<var
+ title="">s</var>)</code></dfn> method must act as follows:
+
+ <ol>
+ <li>
+ <p>If the <a href="#insertion">insertion point</a> is undefined, the
+ <code title=dom-document-open><a href="#open">open()</a></code> method
+ must be called (with no arguments) on the <code
+ title=Document>document</code> object. The <a
+ href="#insertion">insertion point</a> will point at just before the end
+ of the (empty) <a href="#input0">input stream</a>.</p>
+
+ <li>
+ <p>The string <var title="">s</var> must be inserted into the <a
+ href="#input0">input stream</a><!-- XXX xref --> just before the <a
+ href="#insertion">insertion point</a>.</p>
+
+ <li>
+ <p>If there is <a href="#the-script" title="the script that will execute
+ as soon as the parser resumes">a script that will execute as soon as the
+ parser resumes</a>, then the method must now return without further
+ processing of the <a href="#input0">input stream</a>.</p>
+
+ <li>
+ <p>Otherwise, the tokeniser must process the characters that were
+ inserted, one at a time, processing resulting tokens as they are
+ emitted, and stopping when the tokeniser reaches the insertion point or
+ when the processing of the tokeniser is aborted by the tree construction
+ stage (this can happen if a <code><a href="#script0">script</a></code>
+ start tag token is emitted by the tokeniser).
+
+ <p class=note>If the <code title=dom-document-write-HTML><a
+ href="#document.write0">document.write()</a></code> method was called
+ from script executing inline (i.e. executing because the parser parsed a
+ set of <code><a href="#script0">script</a></code> tags), then this is a
+ <a href="#nestedParsing">reentrant invocation of the parser</a>.</p>
+
+ <li>
+ <p>Finally, the method must return.</p>
+ </ol>
+
+ <p>In HTML, the <dfn id=innerhtml0
+ title=dom-innerHTML-HTML><code>innerHTML</code></dfn> DOM attribute of all
+ <code><a href="#htmlelement">HTMLElement</a></code> and <code><a
+ href="#htmldocument">HTMLDocument</a></code> nodes returns a serialisation
+ of the node's children using the <span>HTML syntax</span><!-- XXX xref
+ -->.
+ On setting, it replaces the node's children with new nodes that result
+ from parsing the given value. The formal definitions follow.
+
+ <p>On getting, the <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> DOM attribute must return the
+ result of running the following algorithm:
+
+ <ol>
+ <li>
+ <p>Let <var title="">s</var> be a string, and initialise it to the empty
+ string.
+
+ <li>
+ <p>For each child node <var title="">child</var>, in <a
+ href="#tree-order">tree order</a>, append the appropriate string from
+ the following list to <var title="">s</var>:</p>
+
+ <dl class=switch>
+ <dt>If the child node is an <code title="">Element</code>
+
+ <dd>
+ <p>Append a U+003C LESS-THAN SIGN (<code title="">&lt;</code>)
+ character, followed by the element's tag name. (For nodes created by
+ the <a href="#html-0">HTML parser</a>, <code
+ title="">Document.createElement()</code>, or <code
+ title="">Document.renameNode()</code>, the tag name will be
+ lowercase.)</p>
+
+ <p>For each attribute that the element has, append a U+0020 SPACE
+ character, the attribute's name (which, for attributes set by the <a
+ href="#html-0">HTML parser</a> or by <code
+ title="">Element.setAttributeNode()</code> or <code
+ title="">Element.setAttribute()</code>, will be lowercase), a U+003D
+ EQUALS SIGN (<code title="">=</code>) character, a U+0022 QUOTATION
+ MARK (<code title="">&quot;</code>) character, the attribute's value,
+ <a href="#escapingString" title="escaping a string">escaped as
+ described below</a>, and a second U+0022 QUOTATION MARK (<code
+ title="">&quot;</code>) character.</p>
+
+ <p>While the exact order of attributes is UA-defined, and may depend on
+ factors such as the order that the attributes were given in the
+ original markup, the sort order must be stable, such that consecutive
+ calls to <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> serialise an element's
+ attributes in the same order.</p>
+
+ <p>Append a U+003E GREATER-THAN SIGN (<code title="">&gt;</code>)
+ character.</p>
+
+ <p>If the child node is an <code><a href="#area">area</a></code>,
+ <code><a href="#base">base</a></code>, <code>basefont</code>,
+ <code>bgsound</code>, <code><a href="#br">br</a></code>, <code><a
+ href="#col">col</a></code>, <code><a href="#embed">embed</a></code>,
+ <code>frame</code>, <code><a href="#hr">hr</a></code>, <code><a
+ href="#img">img</a></code>, <code>input</code>, <code><a
+ href="#link">link</a></code>, <code><a href="#meta0">meta</a></code>,
+ <code><a href="#param">param</a></code>, <code>spacer</code>, or
+ <code>wbr</code> element, then continue on to the next child node at
+ this point.</p>
+ <!-- also, i guess:
+ image, isindex, and keygen, but we don't list those because we
+ don't consider those "elements", more "macros", and thus we
+ should never serialise them -->
+ <!-- XXX when we get around to
+ it, add event-source -->
+ <p>Otherwise, append the value of the <var title="">child</var>
+ element's <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> DOM attribute (thus recursing
+ into this algorithm for that element), followed by a U+003C LESS-THAN
+ SIGN (<code title="">&lt;</code>) character, a U+002F SOLIDUS (<code
+ title="">/</code>) character, the element's tag name again, and
+ finally a U+003E GREATER-THAN SIGN (<code title="">&gt;</code>)
+ character.</p>
+
+ <dt>If the child node is a <code title="">Text</code> or <code
+ title="">CDATASection</code> node
+
+ <dd>
+ <p>If one of the ancestors of the child node is a <code><a
+ href="#style">style</a></code>, <code><a
+ href="#script0">script</a></code>, <code>xmp</code>, <code><a
+ href="#iframe">iframe</a></code>, <code>noembed</code>,
+ <code>noframes</code>, or <code><a
+ href="#noscript">noscript</a></code> element, then append the value of
+ the <var title="">child</var> node's <code title="">data</code> DOM
+ attribute literally.</p>
+ <!-- note about noscript: because this is defining an API, it
+ can assume that scripting is enabled, and that thus the
+ <noscript> element in the DOM will have been parsed in the
+ scripting-enabled mode, and that thus the text node is raw
+ markup -->
+
+ <p>Otherwise, append the value of the <var title="">child</var> node's
+ <code title="">data</code> DOM attribute, <a href="#escapingString"
+ title="escaping a string">escaped as described below</a>.</p>
+
+ <dt>If the child node is a <code title="">Comment</code>
+
+ <dd>
+ <p>Append the literal string <code>&lt;!--</code> (U+003C LESS-THAN
+ SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, U+002D
+ HYPHEN-MINUS), followed by the value of the <var title="">child</var>
+ node's <code title="">data</code> DOM attribute, followed by the
+ literal string <code>--&gt;</code> (U+002D HYPHEN-MINUS, U+002D
+ HYPHEN-MINUS, U+003E GREATER-THAN SIGN).</p>
+
+ <dt>If the child node is a <code title="">DocumentType</code>
+
+ <dd>
+ <p>Append the literal string <code>&lt;!DOCTYPE</code> (U+003C
+ LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER
+ D, U+004F LATIN CAPITAL LETTER O, U+0043 LATIN CAPITAL LETTER C,
+ U+0054 LATIN CAPITAL LETTER T, U+0059 LATIN CAPITAL LETTER Y, U+0050
+ LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL LETTER E), followed by a
+ space (U+0020 SPACE), followed by the value of the <var
+ title="">child</var> node's <code title="">name</code> DOM attribute,
+ followed by the literal string <code>&gt;</code> (U+003E GREATER-THAN
+ SIGN).</p>
+ </dl>
+
+ <p>Other nodes types (e.g. <code title="">Attr</code>) cannot occur as
+ children of elements. If they do, the <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute must raise an
+ <code>INVALID_STATE_ERR</code> exception.</p>
+
+ <li>
+ <p>The result of the algorithm is the string <var title="">s</var>.
+ </ol>
+
+ <p><dfn id=escapingString>Escaping a string</dfn> (for the purposes of the
+ algorithm above) consists of replacing any occurances of the "<code
+ title="">&amp;</code>" character by the string "<code
+ title="">&amp;amp;</code>", any occurances of the "<code
+ title="">&lt;</code>" character by the string "<code
+ title="">&amp;lt;</code>", any occurances of the "<code
+ title="">&gt;</code>" character by the string "<code
+ title="">&amp;gt;</code>", and any occurances of the "<code
+ title="">&quot;</code>" character by the string "<code
+ title="">&amp;quot;</code>".
+
+ <p class=note>Entity reference nodes are <a
+ href="#entity-references">assumed to be expanded</a> by the user agent,
+ and are therefore not covered in the algorithm above.
+
+ <p class=note>If the element's contents are not conformant, it is possible
+ that the roundtripping through <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> will not work. For instance, if
+ the element is a <code>textarea</code> element to which a <code
+ title="">Comment</code> node has been appended, then assigning <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code> to
+ itself will result in the comment being displayed in the text field.
+ Similarly, if, as a result of DOM manipulation, the element contains a
+ comment that contains the literal string "<code title="">--&gt;</code>",
+ then when the result of serialising the element is parsed, the comment
+ will be truncated at that point and the rest of the comment will be
+ interpreted as markup. Another example would be making a <code><a
+ href="#script0">script</a></code> element contain a text node with the
+ text string "<code>&lt;/script></code>".
+
+ <p>On setting, if the node is a document, the <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code> DOM
+ attribute must run the following algorithm:
+
+ <ol>
+ <li>
+ <p>Otherwise, if the document has an <span>active
+ parser</span><!--XXX xref-->, then stop that parser, and throw away any
+ pending content in the input stream. <span class=big-issue>what about if
+ it doesn't, because it's either like a text/plain, or Atom, or PDF, or
+ XHTML, or image document, or something?</span></p>
+ <!-- XXX see also document.open() -->
+
+ <li>
+ <p>The user agent must remove the children nodes of the
+ <code>Document</code> whose <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute is being set.</p>
+
+ <li>
+ <p>The user agent must create a new <a href="#html-0">HTML parser</a>, in
+ its initial state, and associate it with the <code>Document</code> node.</p>
+ </li>
+ <!-- redundant, the document is forcably already so labelled if we get here
+ <li>
+
+ <p>The user agent must mark the <code>Document</code> object as
+ being an <span title="HTML documents">HTML document</span>.</p>
+
+ </li>
+-->
+
+ <li>
+ <p>The user agent must place into the <a href="#input0">input stream</a>
+ for the <a href="#html-0">HTML parser</a> just created the string being
+ assigned into the <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute.</p>
+
+ <li>
+ <p>The user agent must start the parser and let it run until it has
+ consumed all the characters just inserted into the input stream. (The
+ <code>Document</code> node will have been populated with elements and a
+ <code title=event-load><a href="#load0">load</a></code> event will have
+ fired on <a href="#the-body0" title="the body element">its body
+ element</a>.)</p>
+ </ol>
+
+ <p>Otherwise, if the node is an element, then setting the <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code> DOM
+ attribute must cause the following algorithm to run instead:
+
+ <ol>
+ <li>
+ <p>The user agent must create a new <code>Document</code> node, and mark
+ it as being an <a href="#html-" title="HTML documents">HTML
+ document</a>.</p>
+
+ <p>The user agent must create a new <a href="#html-0">HTML parser</a>,
+ and associate it with the just created <code>Document</code> node.</p>
+
+ <p class=note>Parts marked <dfn id=innerhtml1 title="innerHTML
+ case"><code>innerHTML</code> case</dfn> in algorithms in the parser
+ section are parts that only occur if the parser was created for the
+ purposes of handling the setting of an element's <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code>
+ attribute. The algorithms have been annotated with such markings for
+ informational purposes only; such markings have no normative weight. If
+ it is possible for a condition described as an <a
+ href="#innerhtml1"><code>innerHTML</code> case</a> to occur even when
+ the parser wasn't created for the purposes of handling an element's
+ <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute, then that is an error
+ in the specification.</p>
+
+ <li>
+ <p>The user agent must set the <a href="#html-0">HTML parser</a>'s <a
+ href="#tokenisation0">tokenisation</a> stage's <a
+ href="#content2">content model flag</a> according to the name of the
+ element whose <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute is being set, as
+ follows:</p>
+
+ <dl class=switch>
+ <dt>If it is a <code><a href="#title1">title</a></code> or
+ <code>textarea</code> element
+
+ <dd>Set the <a href="#content2">content model flag</a> to
+ <em>RCDATA</em>.
+
+ <dt>If it is a <code><a href="#style">style</a></code>, <code><a
+ href="#script0">script</a></code>, <code>xmp</code>, <code><a
+ href="#iframe">iframe</a></code>, <code>noembed</code>,
+ <code>noframes</code>, or <code><a href="#noscript">noscript</a></code>
+ element
+
+ <dd>Set the <a href="#content2">content model flag</a> to
+ <em>CDATA</em>.</dd>
+ <!-- note about noscript: we set it to CDATA here because if
+ someone is setting innerHTML, then we know scripting is enabled,
+ so the noscript element will be in CDATA mode -->
+
+ <dt>If it is a <code>plaintext</code> element
+
+ <dd>Set the <a href="#content2">content model flag</a> to
+ <em>PLAINTEXT</em>.
+
+ <dt>Otherwise
+
+ <dd>Set the <a href="#content2">content model flag</a> to
+ <em>PCDATA</em>.
+ </dl>
+
+ <li>
+ <p>The user agent must switch the <a href="#html-0">HTML parser</a>'s <a
+ href="#tree-construction0">tree construction</a> stage to <a
+ href="#the-main0">the main phase</a>.
+
+ <li>
+ <p>Let <var title="">root</var> be a new <code><a
+ href="#html">html</a></code> element with no attributes.</p>
+
+ <li>
+ <p>The user agent must append the element <var title="">root</var> to the
+ <code>Document</code> node created above.</p>
+
+ <li>
+ <p>The user agent must set up the parser's <a href="#stack">stack of open
+ elements</a> so that it contains just the single element <var
+ title="">root</var>.</p>
+
+ <li>
+ <p>The user agent must <a href="#reset" title="reset the insertion mode
+ appropriately">reset the parser's insertion mode appropriately</a>.</p>
+
+ <li>
+ <p>The user agent must set the parser's <a
+ href="#form-element"><code>form</code> element pointer</a> to the
+ nearest node to the element whose <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute is being set that is a
+ <code>form</code> element (going straight up the ancestor chain, and
+ including the element itself, if it is a <code>form</code> element), or,
+ if there is no such <code>form</code> element, to null.</p>
+
+ <li>
+ <p>The user agent must place into the <a href="#input0">input stream</a>
+ for the <a href="#html-0">HTML parser</a> just created the string being
+ assigned into the <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute.</p>
+
+ <li>
+ <p>The user agent must start the parser and let it run until it has
+ consumed all the characters just inserted into the input stream.</p>
+
+ <li>
+ <p>The user agent must remove the children of the element whose <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code>
+ attribute is being set.</p>
+
+ <li>
+ <p>The user agent must move all the child nodes of the <var
+ title="">root</var> element to the element whose <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code>
+ attribute is being set, preserving their order.</p>
+ </ol>
+ <!-- XXX must make sure we spec that innerHTML causes mutation
+ events to fire, but document.write() doesn't. (the latter is already
+ req-stated in the parser section, btw) -->
+ <!-- http://msdn.microsoft.com/workshop/author/dhtml/reference/properties/innerhtml.asp -->
+ <!-- http://lxr.mozilla.org/seamonkey/source/content/html/content/src/nsGenericHTMLElement.cpp#879
+ note script execution disabled
+ http://lxr.mozilla.org/seamonkey/source/content/base/src/nsContentUtils.cpp#3308
+ http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/html/HTMLElement.cpp#L295
+ http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/html/HTMLElement.cpp#L242
+ http://trac.webkit.org/projects/webkit/browser/trunk/WebCore/html/HTMLTokenizer.cpp#L1742
+ -->
+
+ <h4 id=dynamic1><span class=secno>2.5.3. </span>Dynamic markup insertion in
+ XML</h4>
+
+ <p>In an XML context, the <dfn id=document.write1
+ title=dom-document-write-XML><code>document.write(<var
+ title="">s</var>)</code></dfn> method must raise an
+ <code>INVALID_ACCESS_ERR</code> exception.</p>
+ <!--
+ For XHTML: content must be well-formed. Where does
+ it insert? Immediately after the script that called document.write()?</p>
+ how do we handle async scripts vs sync scripts?
+
+Consider:
+data:text/xml,<script xmlns="http://www.w3.org/1999/xhtml"><![CDATA[ document.write('<foo>Test</foo>'); ]]></script>
+data:text/xml,<script xmlns="http://www.w3.org/1999/xhtml"><![CDATA[ alert('test'); alert(document.write); try { document.write('<foo>Test</foo>'); alert(document.childNodes.length); } catch (e) { alert(e); } ]]></script>
+
+-->
+
+ <p>The <dfn id=innerhtml2
+ title=dom-innerHTML-XML><code>innerHTML</code></dfn> attributes, on the
+ other hand, in an XML context, are usable.
+
+ <p>On getting, the <code title=dom-innerHTML-XML><a
+ href="#innerhtml2">innerHTML</a></code> DOM attribute on <code><a
+ href="#htmlelement">HTMLElement</a></code>s and <code><a
+ href="#htmldocument">HTMLDocument</a></code>s, in an XML context, must
+ return an XML namespace-well-formed internal general parsed entity
+ representation of the element or document. User agents may adjust prefixes
+ and namespace declarations in the serialisation (and indeed might be
+ forced to do so in some cases to obtain namespace-well-formed XML). <a
+ href="#refsXML">[XML]</a> <a href="#refsXMLNS">[XMLNS]</a>
+
+ <p>On setting, in an XML context, the <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> DOM attribute on must run the
+ following algorithm:
+
+ <ol>
+ <li>
+ <p>The user agent must create a new <span>XML parser</span>.</p>
+
+ <li>
+ <p>If the <code title=dom-innerHTML-XML><a
+ href="#innerhtml2">innerHTML</a></code> attribute is being set on an
+ element, the user agent must <span>feed the parser</span> just created
+ the string corresponding to the start tag of that element, declaring all
+ the namespace prefixes that are in scope on that element in the DOM, as
+ well as declaring the default namespace (if any) that is in scope on
+ that element in the DOM.</p>
+
+ <li>
+ <p>The user agent must <span>feed the parser</span> just created the
+ string being assigned into the <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute.</p>
+
+ <li>
+ <p>If the <code title=dom-innerHTML-XML><a
+ href="#innerhtml2">innerHTML</a></code> attribute is being set on an
+ element, the user agent must <span>feed the parser</span> the string
+ corresponding to the end tag of that element.</p>
+
+ <li>
+ <p>If the parser found a well-formedness error, the attribute's setter
+ must raise a <code>SYNTAX_ERR</code> exception and abort these steps.</p>
+
+ <li>
+ <p>The user agent must remove the children nodes of the node whose <code
+ title=dom-innerHTML-XML><a href="#innerhtml2">innerHTML</a></code>
+ attribute is being set.</p>
+
+ <li>
+ <p>If the attribute is being set on a <code>Document</code> node, let
+ <var title="">new children</var> be the children of the document,
+ preserving their order. Otherwise, the attribute is being set on an
+ <code>Element</code> node; let <var title="">new children</var> be the
+ children of the the document's root element, preserving their order.</p>
+
+ <li>
+ <p>If the attribute is being set on a <code>Document</code> node, let
+ <var title="">target document</var> be that <code>Document</code> node.
+ Otherwise, the attribute is being set on an <code>Element</code> node;
+ let <var title="">target document</var> be the <code
+ title="">ownerDocument</code> of that <code>Element</code>.</p>
+
+ <li>
+ <p>Set the <code title="">ownerDocument</code> of all the nodes in <var
+ title="">new children</var> to the <var title="">target document</var>.</p>
+
+ <li>
+ <p>Append all the <var title="">new children</var> nodes to the node
+ whose <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute is being set,
+ preserving their order.</p>
+ </ol>
+
+ <h3 id=apis-in><span class=secno>2.6. </span>APIs in HTML documents</h3>
+ <!-- XXX case-sensitivity training required here. -->
+
+ <p>For <a href="#html-">HTML documents</a>, and for <a
+ href="#html-elements">HTML elements</a> in <a href="#html-">HTML
+ documents</a>, certain APIs defined in DOM3 Core become case-insensitive
+ or case-changing, as sometimes defined in DOM3 Core, and as summarised or
+ required below. <a href="#refsDOM3CORE">[DOM3CORE]</a>.
+
+ <p>This does not apply to <a href="#xml-documents">XML documents</a> or to
+ elements that are not in the <a href="#html-namespace0">HTML namespace</a>
+ despite being in <a href="#html-">HTML documents</a>.
+
+ <dl>
+ <dt><code title="">Element.tagName</code>, <code
+ title="">Node.nodeName</code>, and <code title="">Node.localName</code>
+
+ <dd>
+ <p>These attributes return tag names in all uppercase<!-- XXX
+ xref-->
+ and attribute names in all lowercase<!-- XXX xref -->, regardless of the
+ case with which they were created.</p>
+
+ <dt><code title="">Document.createElement()</code>
+
+ <dd>
+ <p>The canonical form of HTML markup is all-lowercase; thus, this method
+ will lowercase<!-- XXX xref --> the argument before creating the
+ requisite element. Also, the element created must be in the <a
+ href="#html-namespace0">HTML namespace</a>.</p>
+
+ <p class=note>This doesn't apply to <code
+ title="">Document.createElementNS()</code>. Thus, it is possible, by
+ passing this last method a tag name in the wrong case, to create an
+ element that claims to have the tag name of an HTML element, but doesn't
+ support its interfaces, because it really has another tag name not
+ accessible from the DOM APIs.</p>
+
+ <dt><code title="">Element.setAttributeNode()</code>
+
+ <dd>
+ <p>When an <code>Attr</code> node is set on an <a href="#html-elements"
+ title="HTML elements">HTML element</a>, it must have its name
+ lowercased<!-- XXX xref --> before the element is affected.</p>
+
+ <p class=note>This doesn't apply to <code
+ title="">Document.setAttributeNodeNS()</code>.</p>
+
+ <dt><code title="">Element.setAttribute()</code>
+
+ <dd>
+ <p>When an attribute is set on an <a href="#html-elements" title="HTML
+ elements">HTML element</a>, the name argument must be
+ lowercased<!-- XXX xref
+ --> before the element is affected.</p>
+
+ <p class=note>This doesn't apply to <code
+ title="">Document.setAttributeNS()</code>.</p>
+
+ <dt><code title="">Document.getElementsByTagName()</code> and <code
+ title="">Element.getElementsByTagName()</code>
+
+ <dd>
+ <p>These methods (but not their namespaced counterparts) must compare the
+ given argument case-insensitively<!-- XXX xref --> when looking at <a
+ href="#html-elements" title="HTML elements">HTML elements</a>, and
+ case-sensitively otherwise.</p>
+
+ <p class=note>Thus, in an <a href="#html-" title="HTML documents">HTML
+ document</a> with nodes in multiple namespaces, these methods will be
+ both case-sensitive and case-insensitive at the same time.</p>
+
+ <dt><code title="">Document.renameNode()</code>
+
+ <dd>
+ <p>If the new namespace is the <a href="#html-namespace0">HTML
+ namespace</a>, then the new qualified name must be lowercased before the
+ rename takes place.<!-- XXX xref --></p>
+ </dl>
+
+ <h2 id=semantics><span class=secno>3. </span>Semantics and structure of
+ HTML elements</h2>
+
+ <h3 id=semantics-intro><span class=secno>3.1. </span>Introduction</h3>
+
+ <p><em>This section is non-normative.</em>
+
+ <p class=big-issue>An introduction to marking up a document.
+
+ <h3 id=common1><span class=secno>3.2. </span>Common microsyntaxes</h3>
+
+ <p>There are various places in HTML that accept particular data types, such
+ as dates or numbers. This section describes what the conformance criteria
+ for content in those formats is, and how to parse them.</p>
+ <!-- XXX need to define how to handle U+000A LINE FEED and U+000D
+ CARRIAGE RETURN in attributes (for HTML) -->
+
+ <p class=big-issue>Need to go through the whole spec and make sure all the
+ attribute values are clearly defined either in terms of microsyntaxes or
+ in terms of other specs, or as "Text" or some such.
+
+ <h4 id=common2><span class=secno>3.2.1. </span>Common parser idioms</h4>
+
+ <p>The <dfn id=space title="space character">space characters</dfn>, for
+ the purposes of this specification, are U+0020 SPACE, U+0009 CHARACTER
+ TABULATION (tab), U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), and U+000D CARRIAGE RETURN (CR).
+
+ <p>Some of the micro-parsers described below follow the pattern of having
+ an <var title="">input</var> variable that holds the string being parsed,
+ and having a <var title="">position</var> variable pointing at the next
+ character to parse in <var title="">input</var>.
+
+ <p>For parsers based on this pattern, a step that requires the user agent
+ to <dfn id=collect>collect a sequence of characters</dfn> means that the
+ following algorithm must be run, with <var title="">characters</var> being
+ the set of characters that can be collected:
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> and <var title="">position</var> be the
+ same variables as those of the same name in the algorithm that invoked
+ these steps.
+
+ <li>
+ <p>Let <var title="">result</var> be the empty string.
+
+ <li>
+ <p>While <var title="">position</var> doesn't point past the end of <var
+ title="">input</var> and the character at <var title="">position</var>
+ is one of the <var title="">characters</var>, append that character to
+ the end of <var title="">result</var> and advance <var
+ title="">position</var> to the next character in <var
+ title="">input</var>.
+
+ <li>
+ <p>Return <var title="">result</var>.
+ </ol>
+
+ <p>The step <dfn id=skip-whitespace>skip whitespace</dfn> means that the
+ user agent must <a href="#collect">collect a sequence of characters</a>
+ that are <a href="#space" title="space character">space characters</a>.
+ The step <dfn id=skip->skip Zs characters</dfn> means that the user agent
+ must <a href="#collect">collect a sequence of characters</a> that are in
+ the Unicode character class Zs. In both cases, the collected characters
+ are not used. <a href="#refsUNICODE">[UNICODE]</a>
+
+ <h4 id=boolean><span class=secno>3.2.2. </span>Boolean attributes</h4>
+
+ <p>A number of attributes in HTML5 are <dfn id=boolean0 title="boolean
+ attribute">boolean attributes</dfn>. The presence of a boolean attribute
+ on an element represents the true value, and the absence of the attribute
+ represents the false value.
+
+ <p>If the attribute is present, its value must either be the empty string
+ or the attribute's canonical name, exactly, with no leading or trailing
+ whitespace, and in lowercase.
+
+ <h4 id=numbers><span class=secno>3.2.3. </span>Numbers</h4>
+
+ <h5 id=unsigned><span class=secno>3.2.3.1. </span>Unsigned integers</h5>
+
+ <p>A string is a <dfn id=valid>valid non-negative integer</dfn> if it
+ consists of one of more characters in the range U+0030 DIGIT ZERO (0) to
+ U+0039 DIGIT NINE (9).
+
+ <p>The <dfn id=rules>rules for parsing non-negative integers</dfn> are as
+ given in the following algorithm. When invoked, the steps must be followed
+ in the order given, aborting at the first step that returns a value. This
+ algorithm will either return zero, a positive integer, or an error.
+ Leading spaces are ignored. Trailing spaces and indeed any trailing
+ garbage characters are ignored.
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> be the string being parsed.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p>Let <var title="">value</var> have the value 0.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace.</a>
+
+ <li>
+ <p>If <var title="">position</var> is past the end of <var
+ title="">input</var>, return an error.
+
+ <li>
+ <p>If the next character is not one of U+0030 DIGIT ZERO (0) .. U+0039
+ DIGIT NINE (9), then return an error.
+ </li>
+ <!-- Ok. At this point we know we have a number. It might have
+ trailing garbage which we'll ignore, but it's a number, and we
+ won't return an error. -->
+
+ <li>
+ <p>If the next character is one of U+0030 DIGIT ZERO (0) .. U+0039 DIGIT
+ NINE (9):</p>
+
+ <ol>
+ <li>Multiply <var title="">value</var> by ten.
+
+ <li>Add the value of the current character (0..9) to <var
+ title="">value</var>.
+
+ <li>Advance <var title="">position</var> to the next character.
+
+ <li>If <var title="">position</var> is not past the end of <var
+ title="">input</var>, return to the top of step 7 in the overall
+ algorithm (that's the step within which these substeps find
+ themselves).
+ </ol>
+
+ <li>
+ <p>Return <var title="">value</var>.
+ </ol>
+
+ <h5 id=signed><span class=secno>3.2.3.2. </span>Signed integers</h5>
+
+ <p>A string is a <dfn id=valid0>valid integer</dfn> if it consists of one
+ of more characters in the range U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE
+ (9), optionally prefixed with a U+002D HYPHEN-MINUS ("-") character.
+
+ <p>The <dfn id=rules0>rules for parsing integers</dfn> are similar to the
+ rules for non-negative integers, and are as given in the following
+ algorithm. When invoked, the steps must be followed in the order given,
+ aborting at the first step that returns a value. This algorithm will
+ either return an integer or an error. Leading spaces are ignored. Trailing
+ spaces and trailing garbage characters are ignored.
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> be the string being parsed.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p>Let <var title="">value</var> have the value 0.
+
+ <li>
+ <p>Let <var title="">sign</var> have the value "positive".
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace.</a>
+
+ <li>
+ <p>If <var title="">position</var> is past the end of <var
+ title="">input</var>, return an error.
+
+ <li>
+ <p>If the character indicated by <var title="">position</var> (the first
+ character) is a U+002D HYPHEN-MINUS ("-") character:</p>
+
+ <ol>
+ <li>Let <var title="">sign</var> be "negative".
+
+ <li>Advance <var title="">position</var> to the next character.
+
+ <li>If <var title="">position</var> is past the end of <var
+ title="">input</var>, return an error.
+ </ol>
+
+ <li>
+ <p>If the next character is not one of U+0030 DIGIT ZERO (0) .. U+0039
+ DIGIT NINE (9), then return an error.
+ </li>
+ <!-- Ok. At this point we know we have a number. It might have
+ trailing garbage which we'll ignore, but it's a number, and we
+ won't return an error. -->
+
+ <li>
+ <p>If the next character is one of U+0030 DIGIT ZERO (0) .. U+0039 DIGIT
+ NINE (9):</p>
+
+ <ol>
+ <li>Multiply <var title="">value</var> by ten.
+
+ <li>Add the value of the current character (0..9) to <var
+ title="">value</var>.
+
+ <li>Advance <var title="">position</var> to the next character.
+
+ <li>If <var title="">position</var> is not past the end of <var
+ title="">input</var>, return to the top of step 9 in the overall
+ algorithm (that's the step within which these substeps find
+ themselves).
+ </ol>
+
+ <li>
+ <p>If <var title="">sign</var> is "positive", return <var
+ title="">value</var>, otherwise return 0-<var title="">value</var>.
+ </ol>
+
+ <h5 id=real-numbers><span class=secno>3.2.3.3. </span>Real numbers</h5>
+
+ <p>A string is a <dfn id=valid1>valid floating point number</dfn> if it
+ consists of one of more characters in the range U+0030 DIGIT ZERO (0) to
+ U+0039 DIGIT NINE (9), optionally with a single U+002E FULL STOP (".")
+ character somewhere (either before these numbers, in between two numbers,
+ or after the numbers), all optionally prefixed with a U+002D HYPHEN-MINUS
+ ("-") character.
+
+ <p>The <dfn id=rules1>rules for parsing floating point number values</dfn>
+ are as given in the following algorithm. As with the previous algorithms,
+ when this one is invoked, the steps must be followed in the order given,
+ aborting at the first step that returns a value. This algorithm will
+ either return a number or an error. Leading spaces are ignored. Trailing
+ spaces and garbage characters are ignored.
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> be the string being parsed.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p>Let <var title="">value</var> have the value 0.
+
+ <li>
+ <p>Let <var title="">sign</var> have the value "positive".
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace.</a>
+
+ <li>
+ <p>If <var title="">position</var> is past the end of <var
+ title="">input</var>, return an error.
+
+ <li>
+ <p>If the character indicated by <var title="">position</var> (the first
+ character) is a U+002D HYPHEN-MINUS ("-") character:</p>
+
+ <ol>
+ <li>Let <var title="">sign</var> be "negative".
+
+ <li>Advance <var title="">position</var> to the next character.
+
+ <li>If <var title="">position</var> is past the end of <var
+ title="">input</var>, return an error.
+ </ol>
+
+ <li>
+ <p>If the next character is not one of U+0030 DIGIT ZERO (0) .. U+0039
+ DIGIT NINE (9) or U+002E FULL STOP ("."), then return an error.
+
+ <li>
+ <p>If the next character is U+002E FULL STOP ("."), but either that is
+ the last character or the character after that one is not one of U+0030
+ DIGIT ZERO (0) .. U+0039 DIGIT NINE (9), then return an error.
+ </li>
+ <!-- Ok. At this point we know we have a number. It might have
+ trailing garbage which we'll ignore, but it's a number, and we
+ won't return an error. -->
+
+ <li>
+ <p>If the next character is one of U+0030 DIGIT ZERO (0) .. U+0039 DIGIT
+ NINE (9):</p>
+
+ <ol>
+ <li>Multiply <var title="">value</var> by ten.
+
+ <li>Add the value of the current character (0..9) to <var
+ title="">value</var>.
+
+ <li>Advance <var title="">position</var> to the next character.
+
+ <li>If <var title="">position</var> is past the end of <var
+ title="">input</var>, then if <var title="">sign</var> is "positive",
+ return <var title="">value</var>, otherwise return 0-<var
+ title="">value</var>.
+
+ <li>Otherwise return to the top of step 10 in the overall algorithm
+ (that's the step within which these substeps find themselves).
+ </ol>
+
+ <li>
+ <p>Otherwise, if the next character is not a U+002E FULL STOP ("."), then
+ if <var title="">sign</var> is "positive", return <var
+ title="">value</var>, otherwise return 0-<var title="">value</var>.
+
+ <li>
+ <p>The next character is a U+002E FULL STOP ("."). Advance <var
+ title="">position</var> to the character after that.
+
+ <li>
+ <p>Let <var title="">divisor</var> be 1.
+
+ <li>
+ <p>If the next character is one of U+0030 DIGIT ZERO (0) .. U+0039 DIGIT
+ NINE (9):</p>
+
+ <ol>
+ <li>Multiply <var title="">divisor</var> by ten.
+
+ <li>Add the value of the current character (0..9) divided by <var
+ title="">divisor</var>, to <var title="">value</var>.
+
+ <li>Advance <var title="">position</var> to the next character.
+
+ <li>If <var title="">position</var> is past the end of <var
+ title="">input</var>, then if <var title="">sign</var> is "positive",
+ return <var title="">value</var>, otherwise return 0-<var
+ title="">value</var>.
+
+ <li>Otherwise return to the top of step 14 in the overall algorithm
+ (that's the step within which these substeps find themselves).
+ </ol>
+
+ <li>
+ <p>Otherwise, if <var title="">sign</var> is "positive", return <var
+ title="">value</var>, otherwise return 0-<var title="">value</var>.
+ </ol>
+
+ <h5 id=ratios><span class=secno>3.2.3.4. </span>Ratios</h5>
+
+ <p class=note>The algorithms described in this section are used by the
+ <code><a href="#progress">progress</a></code> and <code><a
+ href="#meter">meter</a></code> elements.
+
+ <p>A <dfn id=valid2>valid denominator punctuation character</dfn> is one of
+ the characters from the table below. There is <dfn id=a-value
+ title="values associated with denominator punctuation characters">a value
+ associated with each denominator punctuation character</dfn>, as shown in
+ the table below.
+
+ <table>
+ <thead>
+ <tr>
+ <th colspan=2>Denominator Punctuation Character
+
+ <th>Value
+
+ <tbody>
+ <tr>
+ <td>U+0025 PERCENT SIGN
+
+ <td>&#x0025;
+
+ <td>100
+
+ <tr>
+ <td>U+066A ARABIC PERCENT SIGN
+
+ <td>&#x066A;
+
+ <td>100
+
+ <tr>
+ <td>U+FE6A SMALL PERCENT SIGN
+
+ <td>&#xFE6A;
+
+ <td>100
+
+ <tr>
+ <td>U+FF05 FULLWIDTH PERCENT SIGN
+
+ <td>&#xFF05;
+
+ <td>100
+
+ <tr>
+ <td>U+2030 PER MILLE SIGN
+
+ <td>&#x2030;
+
+ <td>1000
+
+ <tr>
+ <td>U+2031 PER TEN THOUSAND SIGN
+
+ <td>&#x2031;
+
+ <td>10000
+ </table>
+
+ <p>The <dfn id=steps>steps for finding one or two numbers of a ratio in a
+ string</dfn> are as follows:
+
+ <ol>
+ <li>If the string is empty, then return nothing and abort these steps.
+
+ <li><a href="#find-a">Find a number</a> in the string according to the
+ algorithm below, starting at the start of the string.
+
+ <li>If the sub-algorithm in step 2 returned nothing or returned an error
+ condition, return nothing and abort these steps.
+
+ <li>Set <var title="">number1</var> to the number returned by the
+ sub-algorithm in step 2.
+
+ <li>Starting with the character immediately after the last one examined by
+ the sub-algorithm in step 2, skip any characters in the string that are
+ in the Unicode character class Zs (this might match zero characters). <a
+ href="#refsUNICODE">[UNICODE]</a>
+
+ <li>If there are still further characters in the string, and the next
+ character in the string is a <a href="#valid2">valid denominator
+ punctuation character</a>, set <var title="">denominator</var> to that
+ character.
+
+ <li>If the string contains any other characters in the range U+0030 DIGIT
+ ZERO to U+0039 DIGIT NINE, but <var title="">denominator</var> was given
+ a value in the step 6, return nothing and abort these steps.
+
+ <li>Otherwise, if <var title="">denominator</var> was given a value in
+ step 6, return <var title="">number1</var> and <var
+ title="">denominator</var> and abort these steps.
+
+ <li><a href="#find-a">Find a number</a> in the string again, starting
+ immediately after the last character that was examined by the
+ sub-algorithm in step 2.
+
+ <li>If the sub-algorithm in step 9 returned nothing or an error condition,
+ return nothing and abort these steps.
+
+ <li>Set <var title="">number2</var> to the number returned by the
+ sub-algorithm in step 9.
+
+ <li>If there are still further characters in the string, and the next
+ character in the string is a <a href="#valid2">valid denominator
+ punctuation character</a>, return nothing and abort these steps.
+
+ <li>If the string contains any other characters in the range U+0030 DIGIT
+ ZERO to U+0039 DIGIT NINE, return nothing and abort these steps.
+
+ <li>Otherwise, return <var title="">number1</var> and <var
+ title="">number2</var>.
+ </ol>
+ <!-- XXX again, this should say "positive number" -->
+
+ <p>The algorithm to <dfn id=find-a>find a number</dfn> is as follows. It is
+ given a string and a starting position, and returns either nothing, a
+ number, or an error condition.
+
+ <ol>
+ <li>Starting at the given starting position, ignore all characters in the
+ given string until the first character that is either a U+002E FULL STOP
+ or one of the ten characters in the range U+0030 DIGIT ZERO to U+0039
+ DIGIT NINE.
+
+ <li>If there are no such characters, return nothing and abort these steps.
+
+ <li>Starting with the character matched in step 1, collect all the
+ consecutive characters that are either a U+002E FULL STOP or one of the
+ ten characters in the range U+0030 DIGIT ZERO to U+0039 DIGIT NINE, and
+ assign this string of one or more characters to <var
+ title="">string</var>.
+
+ <li>If <var title="">string</var> contains more than one U+002E FULL STOP
+ character then return an error condition and abort these steps.
+
+ <li>Parse <var title="">string</var> according to the <a
+ href="#rules1">rules for parsing floating point number values</a>, to
+ obtain <var title="">number</var>. This step cannot fail (<var
+ title="">string</var> is guarenteed to be a <a href="#valid1">valid
+ floating point number</a>).
+
+ <li>Return <var title="">number</var>.
+ </ol>
+
+ <h5 id=percentages-and-dimensions><span class=secno>3.2.3.5.
+ </span>Percentages and dimensions</h5>
+
+ <p class=big-issue><dfn id=valid3 title="valid non-negative
+ percentage">valid non-negative percentages</dfn>, <dfn id=rules2>rules for
+ parsing dimension values</dfn> (only used by height/width on img, embed,
+ object &mdash; maybe they should do the same as canvas, then this wouldn't
+ even be needed)
+
+ <h5 id=lists><span class=secno>3.2.3.6. </span>Lists of integers</h5>
+
+ <p>A <dfn id=valid4>valid list of integers</dfn> is a number of <a
+ href="#valid0" title="valid integer">valid integers</a> separated by
+ U+002C COMMA characters, with no other characters (e.g. no <a
+ href="#space" title="space character">space characters</a>). In addition,
+ there might be restrictions on the number of integers that can be given,
+ or on the range of values allowed.
+
+ <p>The <dfn id=rules3>rules for parsing a list of integers</dfn> are as
+ follows:
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> be the string being parsed.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p>Let <var title="">numbers</var> be an initially empty list of
+ integers. This list will be the result of this algorithm.
+
+ <li>
+ <p>If there is a character in the string <var title="">input</var> at
+ position <var title="">position</var>, and it is either U+002C COMMA
+ character or a U+0020 SPACE character, then advance <var
+ title="">position</var> to the next character in <var
+ title="">input</var>, or to beyond the end of the string if there are no
+ more characters.
+
+ <li>
+ <p>If <var title="">position</var> points to beyond the end of <var
+ title="">input</var>, return <var title="">numbers</var> and abort.
+
+ <li>
+ <p>If the character in the string <var title="">input</var> at position
+ <var title="">position</var> is a U+002C COMMA character or a U+0020
+ SPACE character, return to step 4.
+
+ <li>
+ <p>Let <var title="">negated</var> be false.
+
+ <li>
+ <p>Let <var title="">value</var> be 0.
+
+ <li>
+ <p>Let <var title="">multiple</var> be 1.
+
+ <li>
+ <p>Let <var title="">started</var> be false.
+
+ <li>
+ <p>Let <var title="">finished</var> be false.
+
+ <li>
+ <p>Let <var title="">bogus</var> be false.
+
+ <li>
+ <p><em>Parser:</em> If the character in the string <var
+ title="">input</var> at position <var title="">position</var> is:</p>
+
+ <dl
+ class=switch><!-- XXX this doesn't quite match what IE does: http://www.hixie.ch/tests/adhoc/html/flow/image-maps/004-demo.html
+ I couldn't work out a pattern to IE's results. Let me know if you can see one. -->
+
+ <dt>A U+002D HYPHEN-MINUS character
+
+ <dd>
+ <p>Follow these substeps:</p>
+
+ <ol>
+ <li>If <var title="">finished</var> is true, skip to the next step in
+ the overall set of steps.
+
+ <li>If <var title="">started</var> is true or if <var
+ title="">bogus</var> is true, let <var title="">negated</var> be
+ false.
+
+ <li>Otherwise, if <var title="">started</var> is false and if <var
+ title="">bogus</var> is false, let <var title="">negated</var> be
+ true.
+
+ <li>Let <var title="">started</var> be true.
+ </ol>
+
+ <dt>A character in the range U+0030 DIGIT ZERO .. U+0039 DIGIT NINE
+
+ <dd>
+ <p>Follow these substeps:</p>
+
+ <ol>
+ <li>If <var title="">finished</var> is true, skip to the next step in
+ the overall set of steps.
+
+ <li>Let <var title="">n</var> be the value of the digit, interpreted
+ in base ten, multiplied by <var title="">multiple</var>.
+
+ <li>Add <var title="">n</var> to <var title="">value</var>.
+
+ <li>If <var title="">value</var> is greater than zero, multiply <var
+ title="">multiple</var> by ten.
+
+ <li>Let <var title="">started</var> be true.
+ </ol>
+
+ <dt>A U+002C COMMA character
+
+ <dt>A U+0020 SPACE character
+
+ <dd>
+ <p>Follow these substeps:</p>
+
+ <ol>
+ <li>If <var title="">started</var> is false, return the <var
+ title="">numbers</var> list and abort.
+
+ <li>If <var title="">negated</var> is true, then negate <var
+ title="">value</var>.
+
+ <li>Append <var title="">value</var> to the <var
+ title="">numbers</var> list.
+
+ <li>Jump to step 4 in the overall set of steps.
+ </ol>
+
+ <dt>A U+002E FULL STOP character
+
+ <dd>
+ <p>Follow these substeps:</p>
+
+ <ol>
+ <li>Let <var title="">finished</var> be true.
+ </ol>
+
+ <dt>Any other character
+
+ <dd>
+ <p>Follow these substeps:</p>
+
+ <ol>
+ <li>If <var title="">finished</var> is true, skip to the next step in
+ the overall set of steps.
+
+ <li>Let <var title="">negated</var> be false.
+
+ <li>Let <var title="">bogus</var> be true.
+
+ <li>If <var title="">started</var> is true, then return the <var
+ title="">numbers</var> list, and abort. (The value in <var
+ title="">value</var> is not appended to the list first; it is
+ dropped.)
+ </ol>
+ </dl>
+
+ <li>
+ <p>Advance <var title="">position</var> to the next character in <var
+ title="">input</var>, or to beyond the end of the string if there are no
+ more characters.
+
+ <li>
+ <p>If <var title="">position</var> points to a character (and not to
+ beyond the end of <var title="">input</var>), jump to the big
+ <em>Parser</em> step above.
+
+ <li>
+ <p>If <var title="">negated</var> is true, then negate <var
+ title="">value</var>.
+
+ <li>
+ <p>If <var title="">started</var> is true, then append <var
+ title="">value</var> to the <var title="">numbers</var> list, return
+ that list, and abort.
+
+ <li>
+ <p>Return the <var title="">numbers</var> list and abort.
+ </ol>
+
+ <h4 id=dates><span class=secno>3.2.4. </span>Dates and times</h4>
+
+ <p>In the algorithms below, the <dfn id=number>number of days in month <var
+ title="">month</var> of year <var title="">year</var></dfn> is:
+ <em>31</em> if <var title="">month</var> is 1, 3, 5, 7, 8, 10, or 12;
+ <em>30</em> if <var title="">month</var> is 4, 6, 9, or 11; <em>29</em> if
+ <var title="">month</var> is 2 and <var title="">year</var> is a number
+ divisible by 400, or if <var title="">year</var> is a number divisible by
+ 4 but not by 100; and <em>28</em> otherwise. This takes into account leap
+ years in the Gregorian calendar. <a href="#refsGREGORIAN">[GREGORIAN]</a>
+
+ <h5 id=specific><span class=secno>3.2.4.1. </span>Specific moments in time</h5>
+
+ <p>A string is a <dfn id=valid5>valid datetime</dfn> if it has four digits
+ (representing the year), a literal hyphen, two digits (representing the
+ month), a literal hyphen, two digits (representing the day), optionally
+ some spaces, either a literal T or a space, optionally some more spaces,
+ two digits (for the hour), a colon, two digits (the minutes), optionally
+ the seconds (which, if included, must consist of another colon, two digits
+ (the integer part of the seconds), and optionally a decimal point followed
+ by one or more digits (for the fractional part of the seconds)),
+ optionally some spaces, and finally either a literal Z (indicating the
+ time zone is UTC), or, a plus sign or a minus sign followed by two digits,
+ a colon, and two digits (for the sign, the hours and minutes of the
+ timezone offset respectively); with the month-day combination being a
+ valid date in the given year according to the Gregorian calendar, the hour
+ values (<var title="">h</var>) being in the range 0&nbsp;&le;&nbsp;<var
+ title="">h</var>&nbsp;&le;&nbsp;23, the minute values (<var
+ title="">m</var>) in the range 0&nbsp;&le;&nbsp;<var
+ title="">m</var>&nbsp;&le;&nbsp;59, and the second value (<var
+ title="">s</var>) being in the range 0&nbsp;&le;&nbsp;<var
+ title="">h</var>&nbsp;&lt;&nbsp;60. <a
+ href="#refsGREGORIAN">[GREGORIAN]</a></p>
+ <!--XXX [GREGORIAN] should point to
+ <dd id="refsGREGORIAN">[GREGORIAN]</dd>
+ <dd>(Non-normative) <cite>Inter Gravissimas</cite>, A. Lilius, C. Clavius. Gregory XIII Papal Bulls, February 1582.</dd>
+ -->
+
+ <p>The digits must be characters in the range U+0030 DIGIT ZERO (0) to
+ U+0039 DIGIT NINE (9), the hyphens must be a U+002D HYPHEN-MINUS
+ characters, the T must be a U+0054 LATIN CAPITAL LETTER T, the colons must
+ be U+003A COLON characters, the decimal point must be a U+002E FULL STOP,
+ the Z must be a U+005A LATIN CAPITAL LETTER Z, the plus sign must be a
+ U+002B PLUS SIGN, and the minus U+002D (same as the hyphen).
+
+ <div class=example>
+ <p>The following are some examples of dates written as <a href="#valid5"
+ title="valid datetime">valid datetimes</a>.</p>
+
+ <dl>
+ <dt>"<code>0037-12-13 00:00 Z</code>"
+
+ <dd>Midnight UTC on the birthday of Nero (the Roman Emperor).
+
+ <dt>"<code>1979-10-14T12:00:00.001-04:00</code>"
+
+ <dd>One millisecond after noon on October 14th 1979, in the time zone in
+ use on the east coast of North America during daylight saving time.
+
+ <dt>"<code>8592-01-01 T 02:09 +02:09</code>"
+
+ <dd>Midnight UTC on the 1st of January, 8592. The time zone associated
+ with that time is two hours and nine minutes ahead of UTC.
+ </dl>
+
+ <p>Several things are notable about these dates:</p>
+
+ <ul>
+ <li>Years with fewer than four digits have to be zero-padded. The date
+ "37-12-13" would not be a valid date.
+
+ <li>To unambiguously identify a moment in time prior to the introduction
+ of the Gregorian calendar, the date has to be first converted to the
+ Gregorian calendar from the calendar in use at the time (e.g. from the
+ Julian calendar). The date of Nero's birth is the 15th of December 37,
+ in the Julian Calendar, which is the 13th of December 37 in the
+ Gregorian Calendar.</li>
+ <!--
+ XXX this might not be true. I can't find a reference that gives
+ his birthday with an explicit statement about the calendar being
+ used. However, it seems unlikely that it would be given in the
+ Gregorian calendar, so I assume sites use the Julian one. -->
+
+ <li>The time and timezone components are not optional.
+
+ <li>Dates before the year 0 or after the year 9999 can't be represented
+ as a datetime in this version of HTML.
+
+ <li>Time zones differ based on daylight savings time.
+ </ul>
+ </div>
+
+ <p class=note>Conformance checkers can use the algorithm below to determine
+ if a datetime is a valid datetime or not.
+
+ <p>To <dfn id=datetime-parser>parse a string as a datetime value</dfn>, a
+ user agent must apply the following algorithm to the string. This will
+ either return a time in UTC, with associated timezone information for
+ round tripping or display purposes, or nothing, indicating the value is
+ not a <a href="#valid5">valid datetime</a>. If at any point the algorithm
+ says that it "fails", this means that it returns nothing.
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> be the string being parsed.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is not exactly four characters long, then fail. Otherwise,
+ interpret the resulting sequence as a base ten integer. Let that number
+ be the <var title="">year</var>.
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var> or if the character at <var title="">position</var>
+ is not a U+002D HYPHEN-MINUS character, then fail. Otherwise, move <var
+ title="">position</var> forwards one character.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is not exactly two characters long, then fail. Otherwise,
+ interpret the resulting sequence as a base ten integer. Let that number
+ be the <var title="">month</var>.
+
+ <li>If <var title="">month</var> is not a number in the range
+ 1&nbsp;&le;&nbsp;<var title="">month</var>&nbsp;&le;&nbsp;12, then fail.
+
+ <li>
+ <p>Let <var title="">maxday</var> be the <a href="#number">number of days
+ in month <var title="">month</var> of year <var title="">year</var></a>.
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var> or if the character at <var title="">position</var>
+ is not a U+002D HYPHEN-MINUS character, then fail. Otherwise, move <var
+ title="">position</var> forwards one character.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is not exactly two characters long, then fail. Otherwise,
+ interpret the resulting sequence as a base ten integer. Let that number
+ be the <var title="">day</var>.
+
+ <li>
+ <p>If <var title="">day</var> is not a number in the range
+ 1&nbsp;&le;&nbsp;<var title="">month</var>&nbsp;&le;&nbsp;<var
+ title="">maxday</var>, then fail.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> that are
+ either U+0054 LATIN CAPITAL LETTER T characters or <a href="#space"
+ title="space character">space characters</a>. If the collected sequence
+ is zero characters long, or if it contains more than one U+0054 LATIN
+ CAPITAL LETTER T character, then fail.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is not exactly two characters long, then fail. Otherwise,
+ interpret the resulting sequence as a base ten integer. Let that number
+ be the <var title="">hour</var>.
+
+ <li>If <var title="">hour</var> is not a number in the range
+ 0&nbsp;&le;&nbsp;<var title="">hour</var>&nbsp;&le;&nbsp;23, then fail.
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var> or if the character at <var title="">position</var>
+ is not a U+003A COLON character, then fail. Otherwise, move <var
+ title="">position</var> forwards one character.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is not exactly two characters long, then fail. Otherwise,
+ interpret the resulting sequence as a base ten integer. Let that number
+ be the <var title="">minute</var>.
+
+ <li>If <var title="">minute</var> is not a number in the range
+ 0&nbsp;&le;&nbsp;<var title="">minute</var>&nbsp;&le;&nbsp;59, then fail.
+
+ <li>
+ <p>Let <var title="">second</var> be a string with the value "0".
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var>, then fail.
+
+ <li>
+ <p>If the character at <var title="">position</var> is a U+003A COLON,
+ then:</p>
+
+ <ol>
+ <li>
+ <p>Advance <var title="">position</var> to the next character in <var
+ title="">input</var>.
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var>, or at the last character in <var
+ title="">input</var>, or if the next <em>two</em> characters in <var
+ title="">input</var> starting at <var title="">position</var> are not
+ two characters both in the range U+0030 DIGIT ZERO (0) to U+0039 DIGIT
+ NINE (9), then fail.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> that are
+ either characters in the range U+0030 DIGIT ZERO (0) to U+0039 DIGIT
+ NINE (9) or U+002E FULL STOP characters. If the collected sequence has
+ more than one U+002E FULL STOP characters, or if the last character in
+ the sequence is a U+002E FULL STOP character, then fail. Otherwise,
+ let the collected string be <var title="">second</var> instead of its
+ previous value.
+ </ol>
+
+ <li>
+ <p>Interpret <var title="">second</var> as a base ten number (possibly
+ with a fractional part). Let that number be <var title="">second</var>
+ instead of the string version.
+
+ <li>If <var title="">second</var> is not a number in the range
+ 0&nbsp;&le;&nbsp;<var title="">hour</var>&nbsp;&lt;&nbsp;60, then fail.
+ (The values 60 and 61 are not allowed: leap seconds cannot be represented
+ by datetime values.)
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var>, then fail.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace.</a>
+
+ <li>
+ <p>If the character at <var title="">position</var> is a U+005A LATIN
+ CAPITAL LETTER Z, then:</p>
+
+ <ol>
+ <li>
+ <p>Let <var title="">timezone<sub title="">hours</sub></var> be 0.
+
+ <li>
+ <p>Let <var title="">timezone<sub title="">minutes</sub></var> be 0.
+
+ <li>
+ <p>Advance <var title="">position</var> to the next character in <var
+ title="">input</var>.
+ </ol>
+
+ <li>
+ <p>Otherwise, if the character at <var title="">position</var> is either
+ a U+002B PLUS SIGN ("+") or a U+002D HYPHEN-MINUS ("-"), then:</p>
+
+ <ol>
+ <li>
+ <p>If the character at <var title="">position</var> is a U+002B PLUS
+ SIGN ("+"), let <var title="">sign</var> be "positive". Otherwise,
+ it's a U+002D HYPHEN-MINUS ("-"); let <var title="">sign</var> be
+ "negative".
+
+ <li>
+ <p>Advance <var title="">position</var> to the next character in <var
+ title="">input</var>.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is not exactly two characters long, then fail. Otherwise,
+ interpret the resulting sequence as a base ten integer. Let that
+ number be the <var title="">timezone<sub title="">hours</sub></var>.
+
+ <li>If <var title="">timezone<sub title="">hours</sub></var> is not a
+ number in the range 0&nbsp;&le;&nbsp;<var title="">timezone<sub
+ title="">hours</sub></var>&nbsp;&le;&nbsp;23, then fail.
+
+ <li>If <var title="">sign</var> is "negative", then negate <var
+ title="">timezone<sub title="">hours</sub></var>.
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var> or if the character at <var
+ title="">position</var> is not a U+003A COLON character, then fail.
+ Otherwise, move <var title="">position</var> forwards one character.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is not exactly two characters long, then fail. Otherwise,
+ interpret the resulting sequence as a base ten integer. Let that
+ number be the <var title="">timezone<sub title="">minutes</sub></var>.
+
+ <li>If <var title="">timezone<sub title="">minutes</sub></var> is not a
+ number in the range 0&nbsp;&le;&nbsp;<var title="">timezone<sub
+ title="">minutes</sub></var>&nbsp;&le;&nbsp;59, then fail.
+ </ol>
+
+ <li>
+ <p>If <var title="">position</var> is <em>not</em> beyond the end of <var
+ title="">input</var>, then fail.
+
+ <li>
+ <p>Let <var title="">time</var> be the moment in time at year <var
+ title="">year</var>, month <var title="">month</var>, day <var
+ title="">day</var>, hours <var title="">hour</var>, minute <var
+ title="">minute</var>, second <var title="">second</var>, adding <var
+ title="">timezone<sub title="">hours</sub></var> hours and <var
+ title="">timezone<sub title="">minutes</sub></var> minutes. That moment
+ in time is a moment in the UTC timezone.
+
+ <li>
+ <p>Let <var title="">timezone</var> be <var title="">timezone<sub
+ title="">hours</sub></var> hours and <var title="">timezone<sub
+ title="">minutes</sub></var> minutes from UTC.
+
+ <li>
+ <p>Return <var title="">time</var> and <var title="">timezone</var>.
+ </ol>
+
+ <h5 id=vaguer><span class=secno>3.2.4.2. </span>Vaguer moments in time</h5>
+
+ <p>This section defines <dfn id=date-or title="date or time string">date or
+ time strings</dfn>. There are two kinds, <dfn id=date-or0 title="date or
+ time string in content">date or time strings in content</dfn>, and <dfn
+ id=date-or1 title="date or time string in attributes">date or time strings
+ in attributes</dfn>. The only difference is in the handling of whitespace
+ characters.
+
+ <p>To parse a <a href="#date-or">date or time string</a>, user agents must
+ use the following algorithm. A <a href="#date-or">date or time string</a>
+ is a <em>valid</em> date or time string if the following algorithm, when
+ run on the string, doesn't say the string is invalid.
+
+ <p>The algorithm may return nothing (in which case the string will be
+ invalid), or it may return a date, a time, a date and a time, or a date
+ and a time and and a timezone. Even if the algorithm returns one or more
+ values, the string can still be invalid.
+
+ <ol><!-- INIT -->
+
+ <li>
+ <p>Let <var title="">input</var> be the string being parsed.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p>Let <var title="">results</var> be the collection of results that are
+ to be returned (one or more of a date, a time, and a timezone),
+ initially empty. If the algorithm aborts at any point, then whatever is
+ currently in <var title="">results</var> must be returned as the result
+ of the algorithm.
+ </li>
+ <!-- LEADING WHITESPACE -->
+
+ <li>
+ <p>For the "in content" variant: <a href="#skip-">skip Zs characters</a>;
+ for the "in attributes" variant: <a href="#skip-whitespace">skip
+ whitespace</a>.
+ </li>
+ <!-- XXX skip whitespace in attribute?
+ really? -->
+ <!-- YEAR or HOUR -->
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is empty, then the string is invalid; abort these steps.
+
+ <li>
+ <p>Let the sequence of characters collected in the last step be <var
+ title="">s</var>.
+
+ <li>
+ <p>If <var title="">position</var> is past the end of <var
+ title="">input</var>, the string is invalid; abort these steps.
+
+ <li>
+ <p>If the character at <var title="">position</var> is <em>not</em> a
+ U+003A COLON character, then:</p>
+ <!-- DATE -->
+ <ol>
+ <li>
+ <p>If the character at <var title="">position</var> is not a U+002D
+ HYPHEN-MINUS ("-") character either, then the string is invalid, abort
+ these steps.
+ </li>
+ <!-- YEAR -->
+
+ <li>
+ <p>If the sequence <var title="">s</var> is not exactly four digits
+ long, then the string is invalid. (This does not stop the algorithm,
+ however.)
+
+ <li>
+ <p>Interpret the sequence of characters collected in step 5 as a base
+ ten integer, and let that number be <var title="">year</var>.
+
+ <li>
+ <p>Advance <var title="">position</var> past the U+002D HYPHEN-MINUS
+ ("-") character.
+ </li>
+ <!-- MONTH -->
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is empty, then the string is invalid; abort these steps.
+
+ <li>
+ <p>If the sequence collected in the last step is not exactly two digits
+ long, then the string is invalid.
+
+ <li>
+ <p>Interpret the sequence of characters collected two steps ago as a
+ base ten integer, and let that number be <var title="">month</var>.
+
+ <li>If <var title="">month</var> is not a number in the range
+ 1&nbsp;&le;&nbsp;<var title="">month</var>&nbsp;&le;&nbsp;12, then the
+ string is invalid, abort these steps.
+
+ <li>
+ <p>Let <var title="">maxday</var> be the <a href="#number">number of
+ days in month <var title="">month</var> of year <var
+ title="">year</var></a>.
+
+ <li>
+ <p>If <var title="">position</var> is past the end of <var
+ title="">input</var>, or if the character at <var
+ title="">position</var> is <em>not</em> a U+002D HYPHEN-MINUS ("-")
+ character, then the string is invalid, abort these steps. Otherwise,
+ advance <var title="">position</var> to the next character.
+ </li>
+ <!-- DAY -->
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is empty, then the string is invalid; abort these steps.
+
+ <li>
+ <p>If the sequence collected in the last step is not exactly two digits
+ long, then the string is invalid.
+
+ <li>
+ <p>Interpret the sequence of characters collected two steps ago as a
+ base ten integer, and let that number be <var title="">day</var>.
+
+ <li>
+ <p>If <var title="">day</var> is not a number in the range
+ 1&nbsp;&le;&nbsp;<var title="">day</var>&nbsp;&le;&nbsp;<var
+ title="">maxday</var>, then the string is invalid, abort these steps.
+
+ <li>
+ <p>Add the date represented by <var title="">year</var>, <var
+ title="">month</var>, and <var title="">day</var> to the <var
+ title="">results</var>.
+ </li>
+ <!-- WHITESPACE -->
+
+ <li>
+ <p>For the "in content" variant: <a href="#skip-">skip Zs
+ characters</a>; for the "in attributes" variant: <a
+ href="#skip-whitespace">skip whitespace</a>.
+
+ <li>
+ <p>If the character at <var title="">position</var> is a U+0054 LATIN
+ CAPITAL LETTER T, then move <var title="">position</var> forwards one
+ character.
+
+ <li>
+ <p>For the "in content" variant: <a href="#skip-">skip Zs
+ characters</a>; for the "in attributes" variant: <a
+ href="#skip-whitespace">skip whitespace</a>.
+ </li>
+ <!-- at this point, if <var title="">position</var> points to a
+ number, we know that we passed at least one space or a T, because
+ otherwise the number would have been slurped up in the last
+ "collect" step. -->
+ <!-- HOUR -->
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is empty, then the string is invalid; abort these steps.
+
+ <li>
+ <p>Let <var title="">s</var> be the sequence of characters collected in
+ the last step.
+ </ol>
+ </li>
+ <!-- TIME -->
+
+ <li>
+ <p>If <var title="">s</var> is not exactly two digits long, then the
+ string is invalid.
+
+ <li>
+ <p>Interpret the sequence of characters collected two steps ago as a base
+ ten integer, and let that number be <var title="">hour</var>.
+
+ <li>
+ <p>If <var title="">hour</var> is not a number in the range
+ 0&nbsp;&le;&nbsp;<var title="">hour</var>&nbsp;&le;&nbsp;23, then the
+ string is invalid, abort these steps.
+
+ <li>
+ <p>If <var title="">position</var> is past the end of <var
+ title="">input</var>, or if the character at <var
+ title="">position</var> is <em>not</em> a U+003A COLON character, then
+ the string is invalid, abort these steps. Otherwise, advance <var
+ title="">position</var> to the next character.
+ </li>
+ <!-- MINUTE -->
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the range
+ U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the collected
+ sequence is empty, then the string is invalid; abort these steps.
+
+ <li>
+ <p>If the sequence collected in the last step is not exactly two digits
+ long, then the string is invalid.
+
+ <li>
+ <p>Interpret the sequence of characters collected two steps ago as a base
+ ten integer, and let that number be <var title="">minute</var>.
+
+ <li>
+ <p>If <var title="">minute</var> is not a number in the range
+ 0&nbsp;&le;&nbsp;<var title="">minute</var>&nbsp;&le;&nbsp;59, then the
+ string is invalid, abort these steps.
+ </li>
+ <!-- SECOND -->
+
+ <li>
+ <p>Let <var title="">second</var> be 0. It may be changed to another
+ value in the next step.
+
+ <li>
+ <p>If <var title="">position</var> is not past the end of <var
+ title="">input</var> and the character at <var title="">position</var>
+ is a U+003A COLON character, then:</p>
+
+ <ol>
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> that are
+ either characters in the range U+0030 DIGIT ZERO (0) to U+0039 DIGIT
+ NINE (9) or are U+002E FULL STOP. If the collected sequence is empty,
+ or contains more than one U+002E FULL STOP character, then the string
+ is invalid; abort these steps.
+
+ <li>
+ <p>If the first character in the sequence collected in the last step is
+ not in the range U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9), then
+ the string is invalid.
+
+ <li>
+ <p>Interpret the sequence of characters collected two steps ago as a
+ base ten number (possibly with a fractional part), and let that number
+ be <var title="">second</var>.
+
+ <li>
+ <p>If <var title="">second</var> is not a number in the range
+ 0&nbsp;&le;&nbsp;<var title="">minute</var>&nbsp;&lt;&nbsp;60, then
+ the string is invalid, abort these steps.
+ </ol>
+
+ <li>
+ <p>Add the time represented by <var title="">hour</var>, <var
+ title="">minute</var>, and <var title="">second</var> to the <var
+ title="">results</var>.
+ </li>
+ <!-- TIME ZONE -->
+
+ <li>
+ <p>If <var title="">results</var> has both a date and a time, then:</p>
+
+ <ol>
+ <li>
+ <p>For the "in content" variant: <a href="#skip-">skip Zs
+ characters</a>; for the "in attributes" variant: <a
+ href="#skip-whitespace">skip whitespace</a>.
+
+ <li>
+ <p>If <var title="">position</var> is past the end of <var
+ title="">input</var>, then skip to the next step in the overall set of
+ steps.</p>
+ <!-- UTC -->
+
+ <li>
+ <p>Otherwise, if the character at <var title="">position</var> is a
+ U+005A LATIN CAPITAL LETTER Z, then:</p>
+
+ <ol>
+ <li>
+ <p>Add the timezone corresponding to UTC (zero offset) to the <var
+ title="">results</var>.
+
+ <li>
+ <p>Advance <var title="">position</var> to the next character in <var
+ title="">input</var>.
+
+ <li>
+ <p>Skip to the next step in the overall set of steps.
+ </ol>
+ </li>
+ <!-- EXPLICIT TIMEZONE OFFSET -->
+
+ <li>
+ <p>Otherwise, if the character at <var title="">position</var> is
+ either a U+002B PLUS SIGN ("+") or a U+002D HYPHEN-MINUS ("-"), then:</p>
+
+ <ol><!-- SIGN -->
+
+ <li>
+ <p>If the character at <var title="">position</var> is a U+002B PLUS
+ SIGN ("+"), let <var title="">sign</var> be "positive". Otherwise,
+ it's a U+002D HYPHEN-MINUS ("-"); let <var title="">sign</var> be
+ "negative".
+ </li>
+ <!-- HOURS -->
+
+ <li>
+ <p>Advance <var title="">position</var> to the next character in <var
+ title="">input</var>.
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the
+ range U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the
+ collected sequence is not exactly two characters long, then the
+ string is invalid.
+
+ <li>
+ <p>Interpret the sequence collected in the last step as a base ten
+ number, and let that number be <var title="">timezone<sub
+ title="">hours</sub></var>.
+
+ <li>If <var title="">timezone<sub title="">hours</sub></var> is not a
+ number in the range 0&nbsp;&le;&nbsp;<var title="">timezone<sub
+ title="">hours</sub></var>&nbsp;&le;&nbsp;23, then the string is
+ invalid; abort these steps.
+
+ <li>If <var title="">sign</var> is "negative", then negate <var
+ title="">timezone<sub title="">hours</sub></var>.
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var> or if the character at <var
+ title="">position</var> is not a U+003A COLON character, then the
+ string is invalid; abort these steps. Otherwise, move <var
+ title="">position</var> forwards one character.
+ </li>
+ <!-- MINUTES -->
+
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> in the
+ range U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9). If the
+ collected sequence is not exactly two characters long, then the
+ string is invalid.
+
+ <li>
+ <p>Interpret the sequence collected in the last step as a base ten
+ number, and let that number be <var title="">timezone<sub
+ title="">minutes</sub></var>.
+
+ <li>If <var title="">timezone<sub title="">minutes</sub></var> is not
+ a number in the range 0&nbsp;&le;&nbsp;<var title="">timezone<sub
+ title="">minutes</sub></var>&nbsp;&le;&nbsp;59, then the string is
+ invalid; abort these steps.
+
+ <li>
+ <p>Add the timezone corresponding to an offset of <var
+ title="">timezone<sub title="">hours</sub></var> hours and <var
+ title="">timezone<sub title="">minutes</sub></var> minutes to the
+ <var title="">results</var>.
+
+ <li>
+ <p>Skip to the next step in the overall set of steps.
+ </ol>
+
+ <li>
+ <p>Otherwise, the string is invalid; abort these steps.
+ </ol>
+
+ <li>
+ <p>For the "in content" variant: <a href="#skip-">skip Zs characters</a>;
+ for the "in attributes" variant: <a href="#skip-whitespace">skip
+ whitespace</a>.
+
+ <li>
+ <p>If <var title="">position</var> is <em>not</em> past the end of <var
+ title="">input</var>, then the string is invalid.</p>
+
+ <li>
+ <p>Abort these steps (the string is parsed).
+ </ol>
+
+ <h4 id=time-offsets><span class=secno>3.2.5. </span>Time offsets</h4>
+
+ <p class=big-issue><dfn id=valid6>valid time offset</dfn>, <dfn
+ id=rules4>rules for parsing time offsets</dfn>, <dfn id=time-offset>time
+ offset serialisation rules</dfn>; in the format "5d4h3m2s1ms" or "3m 9.2s"
+ or "00:00:00.00" or similar.
+
+ <h4 id=tokens><span class=secno>3.2.6. </span>Tokens</h4>
+
+ <p>A <dfn id=set-of>set of space-separated tokens</dfn> is a set of zero or
+ more words separated by one or more <a href="#space" title="space
+ character">space characters</a>, where words consist of any string of one
+ or more characters, none of which are <a href="#space" title="space
+ character">space characters</a>.
+
+ <p>A string containing a <a href="#set-of">set of space-separated
+ tokens</a> may have leading or trailing <a href="#space" title="space
+ character">space characters</a>.
+
+ <p>An <dfn id=unordered>unordered set of space-separated tokens</dfn> is a
+ <a href="#set-of">set of space-separated tokens</a> where none of the
+ words are duplicated.
+
+ <p>An <dfn id=ordered>ordered set of unique space-separated tokens</dfn> is
+ a <a href="#set-of">set of space-separated tokens</a> where none of the
+ words are duplicated but where the order of the tokens is meaningful.
+
+ <p>When a user agent has to <dfn id=split>split a string on spaces</dfn>,
+ it must use the following algorithm:
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> be the string being parsed.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p>Let <var title="">tokens</var> be a list of tokens, initially empty.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a>
+
+ <li>
+ <p>While <var title="">position</var> is not past the end of <var
+ title="">input</var>:</p>
+
+ <ol>
+ <li>
+ <p><a href="#collect">Collect a sequence of characters</a> that are not
+ <a href="#space" title="space character">space characters</a>.
+
+ <li>
+ <p>Add the string collected in the previous step to <var
+ title="">tokens</var>.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a>
+ </ol>
+
+ <li>
+ <p>Return <var title="">tokens</var>.
+ </ol>
+
+ <p>When a user agent has to <dfn id=remove0>remove a token from a
+ string</dfn>, it must use the following algorithm:
+
+ <ol>
+ <li>
+ <p>Let <var title="">input</var> be the string being modified.
+
+ <li>
+ <p>Let <var title="">token</var> be the token being removed. It will not
+ contain any <a href="#space" title="space character">space
+ characters</a>.
+
+ <li>
+ <p>Let <var title="">output</var> be the output string, initially empty.
+
+ <li>
+ <p>Let <var title="">position</var> be a pointer into <var
+ title="">input</var>, initially pointing at the start of the string.
+
+ <li>
+ <p>If <var title="">position</var> is beyond the end of <var
+ title="">input</var>, set the string being modified to <var
+ title="">output</var>, and abort these steps.
+
+ <li>
+ <p>If the character at <var title="">position</var> is a <a
+ href="#space">space character</a>:
+
+ <ol>
+ <li>
+ <p>Append the character at <var title="">position</var> to the end of
+ <var title="">output</var>.
+
+ <li>
+ <p>Increment <var title="">position</var> so it points at the next
+ character in <var title="">input</var>.
+
+ <li>
+ <p>Return to step 5 in the overall set of steps.
+ </ol>
+
+ <li>
+ <p>Otherwise, the character at <var title="">position</var> is the first
+ character of a token. <a href="#collect">Collect a sequence of
+ characters</a> that are not <a href="#space" title="space
+ character">space characters</a>, and let that be <var title="">s</var>.
+
+ <li>
+ <p>If <var title="">s</var> is exactly equal to <var
+ title="">token</var>, then:</p>
+
+ <ol>
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a> (in <var
+ title="">input</var>).
+
+ <li>
+ <p>Remove any <a href="#space" title="space character">space
+ characters</a> currently at the end of <var title="">output</var>.
+
+ <li>
+ <p>If <var title="">position</var> is not past the end of <var
+ title="">input</var>, and <var title="">output</var> is not the empty
+ string, append a single U+0020 SPACE character at the end of <var
+ title="">output</var>.
+ </ol>
+
+ <li>
+ <p>Otherwise, append <var title="">s</var> to the end of <var
+ title="">output</var>.
+
+ <li>
+ <p>Return to step 6 in the overall set of steps.
+ </ol>
+
+ <p class=note>This causes any occurrences of the token to be removed from
+ the string, and any spaces that were surrounding the token to be collapsed
+ to a single space, except at the start and end of the string, where such
+ spaces are removed.
+
+ <h4 id=keywords><span class=secno>3.2.7. </span>Keywords and enumerated
+ attributes</h4>
+
+ <p>Some attributes are defined as taking one of a finite set of keywords.
+ Such attributes are called <dfn id=enumerated title="enumerated
+ attribute">enumerated attributes</dfn>. The keywords are each defined to
+ map to a particular <em>state</em> (several keywords might map to the same
+ state, in which case some of the keywords are synonyms of each other;
+ additionally, some of the keywords can be said to be non-conforming, and
+ are only in the specification for historical reasons). In addition, two
+ default states can be given. The first is the <em>invalid value
+ default</em>, the second is the <em>missing value default</em>.
+
+ <p>If an enumerated attribute is specified, the attribute's value must be
+ one of the given keywords that are not said to be non-conforming, with no
+ leading or trailing whitespace. The keyword may use any mix of uppercase
+ and lowercase letters.<!-- XXX should
+ say "uppercase and lowercase ASCII letters" or some such -->
+
+ <p>When the attribute is specified, if its value
+ <span>case-insensitively</span><!-- XXX ascii case folding --> matches one
+ of the given keywords then that keyword's state is the state that the
+ attribute represents. If the attribute value matches none of the given
+ keywords, but the attribute has an <em>invalid value default</em>, then
+ the attribute represents that state. Otherwise, if the attribute value
+ matches none of the keywords but there is a <em>missing value default</em>
+ state defined, then <em>that</em> is the state represented by the
+ attribute. Otherwise, there is no default, and invalid values must simply
+ be ignored.
+
+ <p>When the attribute is <em>not</em> specified, if there is a <em>missing
+ value default</em> state defined, then that is the state represented by
+ the (missing) attribute. Otherwise, the absence of the attribute means
+ that there is no state represented.
+
+ <p class=note>The empty string can be one of the keywords in some cases.
+ For example the <code title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute has two
+ states: <em>true</em>, matching the <code title="">true</code> keyword and
+ the empty string, <em>false</em>, matching <code title="">false</code> and
+ all other keywords (it's the <em>invalid value default</em>). It could
+ further be thought of as having a third state <em>inherit</em>, which
+ would be the default when the attribute is not specified at all (the
+ <em>missing value default</em>), but for various reasons that isn't the
+ way this specification actually defines it.
+
+ <h4 id=syntax-references><span class=secno>3.2.8. </span>References</h4>
+
+ <p>A <dfn id=valid7>valid hashed ID reference</dfn> to an element of type
+ <var title="">type</var> is a string consisting of a U+0023 NUMBER SIGN
+ (<code title="">#</code>) character followed by a string which exactly
+ matches the value of the <code title=attr-id><a href="#id">id</a></code>
+ attribute of an element in the document with type <var
+ title="">type</var>.
+
+ <p>The <dfn id=rules5>rules for parsing a hashed ID reference</dfn> to an
+ element of type <var title="">type</var> are as follows:
+
+ <ol>
+ <li>
+ <p>If the string being parsed does not contain a U+0023 NUMBER SIGN
+ character, or if the first such character in the string is the last
+ character in the string, then return null and abort these steps.
+
+ <li>
+ <p>Let <var title="">s</var> be the string from the character immediately
+ after the first U+0023 NUMBER SIGN character in the string being parsed
+ up to the end of that string.
+
+ <li>
+ <p>Return the first element of type <var title="">type</var> that has an
+ <code title=attr-id><a href="#id">id</a></code> or <code
+ title="">name</code> attribute whose value case-insensitively matches
+ <var title="">s</var>.
+ </ol>
+
+ <h3 id=documents0><span class=secno>3.3. </span>Documents and document
+ fragments</h3>
+
+ <h4 id=semantics0><span class=secno>3.3.1. </span>Semantics</h4>
+
+ <p>Elements, attributes, and attribute values in HTML are defined (by this
+ specification) to have certain meanings (semantics). For example, the
+ <code><a href="#ol">ol</a></code> element represents an ordered list, and
+ the <code title=lang>lang</code> attribute represents the language of the
+ content.
+
+ <p>Authors must only use elements, attributes, and attribute values for
+ their appropriate semantic purposes.
+
+ <div class=example>
+ <p>For example, the following document is non-conforming, despite being
+ syntactically correct:</p>
+
+ <pre>&lt;!DOCTYPE html&gt;
+&lt;html lang="en-GB"&gt;
+ &lt;head&gt; &lt;title&gt; Demonstration &lt;/title&gt; &lt;/head&gt;
+ &lt;body&gt;
+ &lt;table&gt;
+ &lt;tr&gt; &lt;td&gt; My favourite animal is the cat. &lt;/td&gt; &lt;/tr&gt;
+ &lt;tr&gt;
+ &lt;td&gt;
+ &mdash;&lt;a href="http://example.org/~ernest/"&gt;&lt;cite&gt;Ernest&lt;/cite&gt;&lt;/a&gt;,
+ in an essay from 1992
+ &lt;/td&gt;
+ &lt;/tr&gt;
+ &lt;/table&gt;
+ &lt;/body&gt;
+&lt;/html&gt;</pre>
+
+ <p>...because the data placed in the cells is clearly not tabular data. A
+ corrected version of this document might be:</p>
+
+ <pre>&lt;!DOCTYPE html&gt;
+&lt;html lang="en-GB"&gt;
+ &lt;head&gt; &lt;title&gt; Demonstration &lt;/title&gt; &lt;/head&gt;
+ &lt;body&gt;
+ &lt;blockquote&gt;
+ &lt;p&gt; My favourite animal is the cat. &lt;/p&gt;
+ &lt;/blockquote&gt;
+ &lt;p&gt;
+ &mdash;&lt;a href="http://example.org/~ernest/"&gt;&lt;cite&gt;Ernest&lt;/cite&gt;&lt;/a&gt;,
+ in an essay from 1992
+ &lt;/p&gt;
+ &lt;/body&gt;
+&lt;/html&gt;</pre>
+
+ <p>This next document fragment, intended to represent the heading of a
+ corporate site, is similarly non-conforming because the second line is
+ not intended to be a heading of a subsection, but merely a subheading or
+ subtitle (a subordinate heading for the same section).</p>
+
+ <pre>&lt;body&gt;
+ &lt;h1&gt;ABC Company&lt;/h1&gt;
+ &lt;h2&gt;Leading the way in widget design since 1432&lt;/h2&gt;
+ ...</pre>
+
+ <p>The <code><a href="#header">header</a></code> element should be used in
+ these kinds of situations:</p>
+
+ <pre>&lt;body&gt;
+ &lt;header&gt;
+ &lt;h1&gt;ABC Company&lt;/h1&gt;
+ &lt;h2&gt;Leading the way in widget design since 1432&lt;/h2&gt;
+ &lt;/header&gt;
+ ...</pre>
+ </div>
+
+ <p>Through scripting and using other mechanisms, the values of attributes,
+ text, and indeed the entire structure of the document may change
+ dynamically while a user agent is processing it. The semantics of a
+ document at an instant in time are those represented by the state of the
+ document at that instant in time, and the semantics of a document can
+ therefore change over time. User agents must update their presentation of
+ the document as this occurs.
+
+ <p class=example>HTML has a <code><a href="#progress">progress</a></code>
+ element that describes a progress bar. If its "value" attribute is
+ dynamically updated by a script, the UA would update the rendering to show
+ the progress changing.
+
+ <h4 id=structure0><span class=secno>3.3.2. </span>Structure</h4>
+
+ <p>All the elements in this specification have a defined content model,
+ which describes what nodes are allowed inside the elements, and thus what
+ the structure of an HTML document or fragment must look like. Authors must
+ only put elements inside an element if that element allows them to be
+ there according to its content model.
+
+ <p class=note>As noted in the conformance and terminology sections, for the
+ purposes of determining if an element matches its content model or not, <a
+ href="#text-node" title="text node"><code>CDATASection</code> nodes in the
+ DOM are treated as equivalent to <code>Text</code> nodes</a>, and <a
+ href="#entity-references">entity reference nodes are treated as if they
+ were expanded in place</a>.
+
+ <p>The <a href="#space" title="space character">space characters</a> are
+ always allowed between elements. User agents represent these characters
+ between elements in the source markup as text nodes in the
+ DOM.<!-- not a conf criteria since the parser now requires this
+ -->
+ Empty <a href="#text-node" title="text node">text nodes</a> and <a
+ href="#text-node" title="text node">text nodes</a> consisting of just
+ sequences of those characters are considered <dfn
+ id=inter-element>inter-element whitespace</dfn>.
+
+ <p><a href="#inter-element">Inter-element whitespace</a>, comment nodes,
+ and processing instruction nodes must be ignored when establishing whether
+ an element matches its content model or not, and must be ignored when
+ following algorithms that define document and element semantics.
+
+ <p>An element <var title="">A</var> is said to be <dfn
+ id=preceeded>preceeded or followed</dfn> by a second element <var
+ title="">B</var> if <var title="">A</var> and <var title="">B</var> have
+ the same parent node and there are no other element nodes or text nodes
+ (other than <a href="#inter-element">inter-element whitespace</a>) between
+ them.
+
+ <p>Authors must only use <a href="#elements1">elements in the HTML
+ namespace</a> in the contexts where they are allowed, as defined for each
+ element. For XML compound documents, these contexts could be inside
+ elements from other namespaces, if those elements are defined as providing
+ the relevant contexts.
+
+ <div class=example>
+ <p>The SVG specification defines the SVG <code>foreignObject</code>
+ element as allowing foreign namespaces to be included, thus allowing
+ compound documents to be created by inserting subdocument content under
+ that element. <em>This</em> specification defines the XHTML <code><a
+ href="#html">html</a></code> element as being allowed where subdocument
+ fragments are allowed in a compound document. Together, these two
+ definitions mean that placing an XHTML <code><a
+ href="#html">html</a></code> element as a child of an SVG
+ <code>foreignObject</code> element is conforming.</p>
+ </div>
+
+ <h4 id=kinds><span class=secno>3.3.3. </span>Kinds of elements</h4>
+
+ <p>Each element in HTML falls into zero or more categories that group
+ elements with similar characteristics together. This specification uses
+ the following categories:
+
+ <ul class=brief>
+ <li><a href="#metadata">Metadata elements</a>
+
+ <li><a href="#sectioning">Sectioning elements</a>
+
+ <li><a href="#block-level0">Block-level elements</a>
+
+ <li><a href="#strictly">Strictly inline-level content</a>
+
+ <li><a href="#structured">Structured inline-level elements</a>
+
+ <li><a href="#interactive1">Interactive elements</a>
+
+ <li><span>Form control elements</span>
+ </ul>
+ <!-- XXX check that all the above got a section defining them,
+ however briefly -->
+ <!-- XXX check that the element definitions also link to those
+ sections -->
+
+ <p>Some elements have unique requirements and do not fit into any
+ particular category.
+
+ <p>In addition, some elements represent various common concepts; for
+ example, some elements represent <span>paragraphs</span>.
+
+ <h5 id=block-level><span class=secno>3.3.3.1. </span><dfn
+ id=block-level0>Block-level elements</dfn></h5>
+
+ <p>Block-level elements are used for structural grouping of page content.
+
+ <p>There are several kinds of block-level elements:
+
+ <ul>
+ <li>Some can only contain other block-level elements: <code><a
+ href="#blockquote">blockquote</a></code>, <code><a
+ href="#section">section</a></code>, <code><a
+ href="#article">article</a></code>, <code><a
+ href="#header">header</a></code>.
+
+ <li>Some can only contain <a href="#inline-level0">inline-level
+ content</a>: <code><a href="#p">p</a></code>, <code><a
+ href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code>, <code><a
+ href="#address">address</a></code>.
+
+ <li>Some can contain either block-level elements or <a
+ href="#inline-level0">inline-level content</a> (but not both): <code><a
+ href="#nav">nav</a></code>, <code><a href="#aside">aside</a></code>,
+ <code><a href="#footer">footer</a></code>, <code><a
+ href="#div">div</a></code>.
+
+ <li>Finally, some have very specific content models: <code><a
+ href="#ul">ul</a></code>, <code><a href="#ol">ol</a></code>, <code><a
+ href="#dl">dl</a></code>, <code><a href="#table">table</a></code>,
+ <code><a href="#script0">script</a></code>.
+ </ul>
+
+ <p>There are also elements that seem to be block-level but aren't, such as
+ <code><a href="#body0">body</a></code>, <code><a href="#li">li</a></code>,
+ <code><a href="#dt">dt</a></code>, <code><a href="#dd">dd</a></code>, and
+ <code><a href="#td">td</a></code>. These elements are allowed only in
+ specific places, not simply anywhere that block-level elements are
+ allowed.
+
+ <p>Some block-level elements play multiple roles. For instance, the
+ <code><a href="#script0">script</a></code> elements is allowed inside
+ <code><a href="#head">head</a></code> elements and can also be used as <a
+ href="#inline-level0">inline-level content</a>. Similarly, the <code><a
+ href="#ul">ul</a></code>, <code><a href="#ol">ol</a></code>, <code><a
+ href="#dl">dl</a></code>, <code><a href="#table">table</a></code>, and
+ <code><a href="#blockquote">blockquote</a></code> elements play dual roles
+ as both block-level and inline-level elements.
+
+ <h5 id=inline-level><span class=secno>3.3.3.2. </span><dfn
+ id=inline-level0>Inline-level content</dfn></h5>
+
+ <p>Inline-level content consists of text and various elements to annotate
+ the text, as well as some <a href="#embedded0">embedded content</a> (such
+ as images or sound clips).
+
+ <p>Inline-level content comes in various types:
+
+ <dl>
+ <dt><dfn id=strictly>Strictly inline-level content</dfn>
+
+ <dd>Text, <a href="#embedded0">embedded content</a>, and elements that
+ annotate the text without introducing structural grouping. For example:
+ <code><a href="#a">a</a></code>, <code><a href="#meter">meter</a></code>,
+ <code><a href="#img">img</a></code>. Elements used in contexts allowing
+ only strictly inline-level content must not have any descendants that are
+ anything other than strictly inline-level content.
+
+ <dt><dfn id=structured>Structured inline-level elements</dfn>
+
+ <dd>Block-level elements that can also be used as inline-level content.
+ For example: <code><a href="#ol">ol</a></code>, <code><a
+ href="#blockquote">blockquote</a></code>, <code><a
+ href="#table">table</a></code>.
+ </dl>
+
+ <p>Some elements are defined to have as a content model <dfn
+ id=significant>significant inline content</dfn>. This means that at least
+ one descendant of the element must be <a href="#significant0">significant
+ text</a> or <a href="#embedded0">embedded content</a>.
+
+ <p>Unless an element's content model explicitly states that it must contain
+ <a href="#significant">significant inline content</a>, simply having no <a
+ href="#text-node" title="text node">text nodes</a> and no elements
+ satisfies an element whose content model is some kind of inline content.
+
+ <p><dfn id=significant0>Significant text</dfn>, for the purposes of
+ determining the presence of <a href="#significant">significant inline
+ content</a>, consists of any character other than those falling in the <a
+ href="http://unicode.org/Public/UNIDATA/UCD.html#General_Category_Values">Unicode
+ categories</a> Zs, Zl, Zp, Cc, and Cf. <a
+ href="#refsUNICODE">[UNICODE]</a>
+
+ <div class=example>
+ <p>The following three paragraphs are non-conforming because their content
+ model is not satisfied (they all count as empty).</p>
+
+ <pre>
+&lt;p&gt;&lt;/p&gt;
+&lt;p&gt;&lt;em&gt;&amp;#x00A0;&lt;/em&gt;&lt;/p&gt;
+&lt;p&gt;
+ &lt;ol&gt;
+ &lt;li&gt;&lt;/li&gt;
+ &lt;/ol&gt;
+&lt;/p&gt;
+</pre>
+ </div>
+
+ <p><dfn id=embedded0>Embedded content</dfn> consists of elements that
+ introduce content from other resources into the document, for example
+ <code><a href="#img">img</a></code>. Embedded content elements can have
+ <dfn id=fallback>fallback content</dfn>: content that is to be used when
+ the external resource cannot be used (e.g. because it is of an unsupported
+ format). The element definitions state what the fallback is, if any.
+
+ <h5 id=transparent><span class=secno>3.3.3.3. </span>Transparent content
+ models</h5>
+
+ <p>Some elements are described as <dfn id=transparent0>transparent</dfn>;
+ they have "transparent" as their content model. Some elements are
+ described as <dfn id=semi-transparent>semi-transparent</dfn>; this means
+ that part of their content model is "transparent" but that is not the only
+ part of the content model that must be satisfied.
+
+ <p>When a content model includes a part that is "transparent", those parts
+ must only contain content that would still be conformant if all
+ transparent and semi-transparent elements in the tree were replaced, in
+ their parent element, by the children in the "transparent" part of their
+ content model, retaining order.
+
+ <p>When a transparent or semi-transparent element has no parent, then the
+ part of its content model that is "transparent" must instead be treated as
+ zero or more <a href="#block-level0">block-level elements</a>, or <a
+ href="#inline-level0">inline-level content</a> (but not both).
+
+ <h5 id=determining><span class=secno>3.3.3.4. </span><dfn
+ id=determining0>Determining if a particular element contains block-level
+ elements or inline-level content</dfn></h5>
+
+ <p>Some elements are defined to have content models that allow either <a
+ href="#block-level0">block-level elements</a> or <a
+ href="#inline-level0">inline-level content</a>, but not both. For example,
+ the <code><a href="#aside">aside</a></code> and <code><a
+ href="#li">li</a></code> elements.
+
+ <p>To establish whether such an element is being used as a block-level
+ container or as an inline-level container, for example in order to
+ determine if a document conforms to these requirements, user agents must
+ look at the element's child nodes. If any of the child nodes are not
+ allowed in block-level contexts, then the element is being used for <a
+ href="#inline-level0">inline-level content</a>. If all the child nodes are
+ allowed in a block-level context, then the element is being used for <a
+ href="#block-level0">block-level elements</a>.
+
+ <p>Whenever this search would examine a <a
+ href="#transparent0">transparent</a> element, the element's own child
+ nodes must be examined instead, potentially recursing further if any of
+ those are themselves transparent.
+
+ <div class=example>
+ <p>For instance, in the following (non-conforming) XML fragment, the
+ <code><a href="#li">li</a></code> element is being used as an
+ inline-level element container, because the <code><a
+ href="#meta0">meta</a></code> element is not allowed in a block-level
+ context. (It doesn't matter, for the purposes of determining whether it
+ is an inline-level or block-level context, that the <code><a
+ href="#meta0">meta</a></code> element is not allowed in inline-level
+ contexts either.)</p>
+
+ <pre>&lt;ol&gt;
+ &lt;li&gt;
+ &lt;p&gt; Hello World &lt;/p&gt;
+ &lt;meta title="this is an invalid example"/&gt;
+ &lt;/li&gt;
+&lt;/ol&gt;
+</pre>
+
+ <p>In the following fragment, the <code><a href="#aside">aside</a></code>
+ element is being used as a block-level container, because even though all
+ the elements it contains could be considered inline-level elements, there
+ are no nodes that can only be considered inline-level.</p>
+
+ <pre>&lt;aside&gt;
+ &lt;ol&gt;
+ &lt;li&gt; ... &lt;/li&gt;
+ &lt;/ol&gt;
+ &lt;ul&gt;
+ &lt;li&gt; ... &lt;/li&gt;
+ &lt;/ul&gt;
+&lt;/aside&gt;</pre>
+
+ <p>On the other hand, in the following similar fragment, the <code><a
+ href="#aside">aside</a></code> element is an inline-level container,
+ because the text ("Foo") can only be considered inline-level.</p>
+
+ <pre>&lt;aside&gt;
+ &lt;ol&gt;
+ &lt;li&gt; ... &lt;/li&gt;
+ &lt;/ol&gt;
+ Foo
+&lt;/aside&gt;</pre>
+ </div>
+
+ <h5 id=interactive0><span class=secno>3.3.3.5. </span><dfn
+ id=interactive1>Interactive elements</dfn></h5>
+ <!-- Don't change the above <dfn> or the text below without checking
+ all cross-references. Some of them refer specifically to the
+ activation behavior stuff. -->
+
+ <p class=big-issue>Parts of this section should eventually be moved to DOM3
+ Events.</p>
+ <!-- but see comment above -->
+ <!--
+TESTS:
+http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3E%0A%3Cp%20tabindex%3D1%3Etest%20%3Ca%20href%3D%22%22%3E%20%3Cem%3Etest%3C/em%3E%20%3C/a%3E%0A%3Cscript%3E%0A%20function%20test%20%28e%29%20%7B%20w%28e.type%20+%20%27%20on%20%27%20+%20e.target.tagName%20+%20%27%20through%20%27%20+%20e.currentTarget.tagName%29%3B%20%7D%0A%20document.getElementsByTagName%28%27a%27%29%5B0%5D.addEventListener%28%27click%27%2C%20test%2C%20false%29%3B%0A%20document.getElementsByTagName%28%27a%27%29%5B0%5D.addEventListener%28%27DOMActivate%27%2C%20test%2C%20false%29%3B%0A%20document.getElementsByTagName%28%27p%27%29%5B0%5D.addEventListener%28%27click%27%2C%20test%2C%20false%29%3B%0A%20document.getElementsByTagName%28%27p%27%29%5B0%5D.addEventListener%28%27DOMActivate%27%2C%20test%2C%20false%29%3B%0A%3C/script%3E%0A
+http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20HTML%3E%0A%3Ca%20href%3Dhttp%3A//google.com/%20target%3Da%3EA%3C/a%3E%3Ca%20href%3Dhttp%3A//yahoo.com/%20target%3Db%3EB%3C/a%3E%3Cbr%3E%0A%3Ciframe%20name%3Da%3E%3C/iframe%3E%3Ciframe%20name%3Db%3E%3C/iframe%3E%0A%3Cscript%3E%0A%20var%20a%20%3D%20document.getElementsByTagName%28%27a%27%29%5B0%5D%3B%0A%20var%20b%20%3D%20document.getElementsByTagName%28%27a%27%29%5B1%5D%3B%0A%20a.appendChild%28b%29%3B%0A%3C/script%3E
+http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20HTML%3E%0A%3Cform%20action%3D%22http%3A//google.com/%22%20onsubmit%3D%22w%28%27onsubmit%27%29%22%3E%3Cem%3EA%3C/em%3E%3C/form%3E%0A%3Cscript%3E%0Adocument.getElementsByTagName%28%27form%27%29%5B0%5D.attachEvent%28%27onsubmit%27%2C%20function%20%28%29%20%7B%20w%28%27submit%20fired%27%29%20%7D%29%3B%0Adocument.getElementsByTagName%28%27form%27%29%5B0%5D.fireEvent%28%27onsubmit%27%29%3B%0A%3C/script%3E
+http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20HTML%3E%0A%3Cform%20action%3D%22http%3A//google.com/%22%3EX%3C/form%3E%0A%3Cscript%3E%0Avar%20evt%20%3D%20document.createEvent%28%22Events%22%29%3B%0Aevt.initEvent%28%22submit%22%2C%20true%2C%20true%29%3B%0Adocument.getElementsByTagName%28%27form%27%29%5B0%5D.dispatchEvent%28evt%29%3B%0A%3C/script%3E
+-->
+
+ <p>Certain elements in HTML can be activated, for instance <code><a
+ href="#a">a</a></code> elements, <code>button</code> elements, or
+ <code>input</code> elements when their <code>type</code> attribute is set
+ to <code>radio</code>. Activation of those elements can happen in various
+ (UA-defined) ways, for instance via the mouse or keyboard.
+
+ <p>When activation is performed via some method other than clicking the
+ pointing device, the default action of the event that triggers the
+ activation must, instead of being activating the element directly, be to
+ <a href="#firing">fire a <code title="">click</code> event</a> on the same
+ element.
+
+ <p>The default action of this <code title=event-click>click</code> event,
+ or of the real <code title=event-click>click</code> event if the element
+ was activated by clicking a pointing device, must be to <span title="fire
+ a DOMActivate event">fire a further <code
+ title=event-DOMActivate>DOMActivate</code> event</span> at the same
+ element, whose own default action is to go through all the elements the
+ <code title=event-DOMActivate>DOMActivate</code> event bubbled through
+ (starting at the target node and going towards the <code>Document</code>
+ node), looking for an element with an <a href="#activation0">activation
+ behavior</a>; the first element, in reverse tree order, to have one, must
+ have its activation behavior executed.
+
+ <p class=note>The above doesn't happen for arbitrary synthetic events
+ dispatched by author script. However, the <code title=dom-click><a
+ href="#click">click()</a></code> method can be used to make it happen
+ programmatically.
+
+ <p>For certain form controls, this process is complicated further by <a
+ href="http://www.whatwg.org/specs/web-forms/current-work/#the-click">changes
+ that must happen around the click event</a>. <a href="#refsWF2">[WF2]</a></p>
+ <!-- XXX WF2: when this is merged into
+ this spec, update xrefs -->
+
+ <p class=note>Most interactive elements have content models that disallow
+ nesting interactive elements.</p>
+ <!--
+ <li><span>Form control elements</span></li> XXX
+-->
+
+ <h5 id=paragraphs><span class=secno>3.3.3.6. </span>Paragraphs</h5>
+
+ <p>A <dfn id=paragraph>paragraph</dfn> is typically a block of text with
+ one or more sentences that discuss a particular topic, as in typography,
+ but can also be used for more general thematic grouping. For instance, an
+ address is also a paragraph, as is a part of a form, a byline, or a stanza
+ in a poem.
+
+ <p>Paragraphs can be represented by several elements. The <code><a
+ href="#address">address</a></code> element always represents a paragraph
+ of contact information for its section, the <code><a
+ href="#aside">aside</a></code>, <code><a href="#nav">nav</a></code>,
+ <code><a href="#footer">footer</a></code>, <code><a
+ href="#li">li</a></code>, and <code><a href="#dd">dd</a></code> elements
+ represent paragraphs with various specific semantics when they are <a
+ href="#determining0" title="Determining if a particular element contains
+ block-level elements or inline-level content">used as inline-level content
+ containers</a>, the <code><a href="#figure">figure</a></code> element
+ represents a paragraph in the form of <a href="#embedded0">embedded
+ content</a>, and the <code><a href="#p">p</a></code> element represents
+ all the other kinds of paragraphs, for which there are no dedicated
+ elements.
+
+ <h3 id=global><span class=secno>3.4. </span>Global attributes</h3>
+
+ <p>The following attributes are common to and may be specified on all <a
+ href="#html-elements">HTML elements</a> (even those not defined in this
+ specification):
+
+ <dl class=element>
+ <dt>Global attributes:
+
+ <dd><code title=attr-class><a href="#class">class</a></code>
+
+ <dd><code title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code>
+
+ <dd><code title=attr-contextmenu><a
+ href="#contextmenu">contextmenu</a></code>
+
+ <dd><code title=attr-dir><a href="#dir">dir</a></code>
+
+ <dd><code title=attr-draggable><a href="#draggable">draggable</a></code>
+
+ <dd><code title=attr-id><a href="#id">id</a></code>
+
+ <dd><code title=attr-irrelevant><a
+ href="#irrelevant">irrelevant</a></code>
+
+ <dd><code title=attr-lang><a href="#lang">lang</a></code>
+
+ <dd><code title=attr-tabindex><a href="#tabindex">tabindex</a></code>
+
+ <dd><code title=attr-title><a href="#title">title</a></code>
+ </dl>
+
+ <p>In addition, the following <a href="#event2">event handler content
+ attributes</a> may be specified on any <span>HTML element</span>:
+
+ <ul class=brief>
+ <li><code title=handler-onabort><a href="#onabort">onabort</a></code>
+
+ <li><code title=handler-onbeforeunload><a
+ href="#onbeforeunload">onbeforeunload</a></code>
+
+ <li><code title=handler-onblur><a href="#onblur">onblur</a></code>
+
+ <li><code title=handler-onchange><a href="#onchange">onchange</a></code>
+
+ <li><code title=handler-onclick><a href="#onclick">onclick</a></code>
+
+ <li><code title=handler-oncontextmenu><a
+ href="#oncontextmenu">oncontextmenu</a></code>
+
+ <li><code title=handler-ondblclick><a
+ href="#ondblclick">ondblclick</a></code>
+
+ <li><code title=handler-ondrag><a href="#ondrag">ondrag</a></code>
+
+ <li><code title=handler-ondragend><a
+ href="#ondragend">ondragend</a></code>
+
+ <li><code title=handler-ondragenter><a
+ href="#ondragenter">ondragenter</a></code>
+
+ <li><code title=handler-ondragleave><a
+ href="#ondragleave">ondragleave</a></code>
+
+ <li><code title=handler-ondragover><a
+ href="#ondragover">ondragover</a></code>
+
+ <li><code title=handler-ondragstart><a
+ href="#ondragstart">ondragstart</a></code>
+
+ <li><code title=handler-ondrop><a href="#ondrop">ondrop</a></code>
+
+ <li><code title=handler-onerror><a href="#onerror">onerror</a></code>
+
+ <li><code title=handler-onfocus><a href="#onfocus">onfocus</a></code>
+
+ <li><code title=handler-onkeydown><a
+ href="#onkeydown">onkeydown</a></code>
+
+ <li><code title=handler-onkeypress><a
+ href="#onkeypress">onkeypress</a></code>
+
+ <li><code title=handler-onkeyup><a href="#onkeyup">onkeyup</a></code>
+
+ <li><code title=handler-onload><a href="#onload">onload</a></code>
+
+ <li><code title=handler-onmessage><a
+ href="#onmessage">onmessage</a></code>
+
+ <li><code title=handler-onmousedown><a
+ href="#onmousedown">onmousedown</a></code>
+
+ <li><code title=handler-onmousemove><a
+ href="#onmousemove">onmousemove</a></code>
+
+ <li><code title=handler-onmouseout><a
+ href="#onmouseout">onmouseout</a></code>
+
+ <li><code title=handler-onmouseover><a
+ href="#onmouseover">onmouseover</a></code>
+
+ <li><code title=handler-onmouseup><a
+ href="#onmouseup">onmouseup</a></code>
+
+ <li><code title=handler-onmousewheel><a
+ href="#onmousewheel">onmousewheel</a></code>
+
+ <li><code title=handler-onresize><a href="#onresize">onresize</a></code>
+
+ <li><code title=handler-onscroll><a href="#onscroll">onscroll</a></code>
+
+ <li><code title=handler-onselect><a href="#onselect">onselect</a></code>
+
+ <li><code title=handler-onsubmit><a href="#onsubmit">onsubmit</a></code>
+
+ <li><code title=handler-onunload><a href="#onunload">onunload</a></code>
+ </ul>
+
+ <h4 id=the-id><span class=secno>3.4.1. </span>The <dfn id=id
+ title=attr-id><code>id</code></dfn> attribute</h4>
+
+ <p>The <code title=attr-id><a href="#id">id</a></code> attribute represents
+ its element's unique identifier. The value must be unique in the subtree
+ within which the element finds itself and must contain at least one
+ character. The value must not contain any <a href="#space" title="space
+ character">space characters</a>.</p>
+ <!-- space characters are disallowed because space-separated lists
+ of IDs otherwise would not be able to reach all valid IDs -->
+
+ <p>If the value is not the empty string, user agents must associate the
+ element with the given value (exactly, including any space characters) for
+ the purposes of ID matching within the subtree the element finds itself
+ (e.g. for selectors in CSS or for the <code>getElementById()</code> method
+ in the DOM).
+
+ <p>Identifiers are opaque strings. Particular meanings should not be
+ derived from the value of the <code title=attr-id><a
+ href="#id">id</a></code> attribute.
+
+ <p>This specification doesn't preclude an element having multiple IDs, if
+ other mechanisms (e.g. DOM Core methods) can set an element's ID in a way
+ that doesn't conflict with the <code title=attr-id><a
+ href="#id">id</a></code> attribute.
+
+ <p>The <dfn id=id0 title=dom-id><code>id</code></dfn> DOM attribute must <a
+ href="#reflect">reflect</a> the <code title=attr-id><a
+ href="#id">id</a></code> content attribute.
+
+ <h4 id=the-title><span class=secno>3.4.2. </span>The <dfn id=title
+ title=attr-title><code>title</code></dfn> attribute</h4>
+
+ <p>The <code title=attr-title><a href="#title">title</a></code> attribute
+ represents advisory information for the element, such as would be
+ appropriate for a tooltip. On a link, this could be the title or a
+ description of the target resource; on an image, it could be the image
+ credit or a description of the image; on a paragraph, it could be a
+ footnote or commentary on the text; on a citation, it could be further
+ information about the source; and so forth. The value is text.
+
+ <p>If this attribute is omitted from an element, then it implies that the
+ <code title=attr-title><a href="#title">title</a></code> attribute of the
+ nearest ancestor with a <code title=attr-title><a
+ href="#title">title</a></code> attribute set is also relevant to this
+ element. Setting the attribute overrides this, explicitly stating that the
+ advisory information of any ancestors is not relevant to this element.
+ Setting the attribute to the empty string indicates that the element has
+ no advisory information.
+
+ <p>If the <code title=attr-title><a href="#title">title</a></code>
+ attribute's value contains U+000A LINE FEED (LF) characters, the content
+ is split into multiple lines. Each U+000A LINE FEED (LF) character
+ represents a line break.
+
+ <p>Some elements, such as <code><a href="#link">link</a></code> and
+ <code><a href="#dfn">dfn</a></code>, define additional semantics for the
+ <code title=attr-title><a href="#title">title</a></code> attribute beyond
+ the semantics described above.
+
+ <p>The <dfn id=title0 title=dom-title><code>title</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the <code
+ title=attr-title><a href="#title">title</a></code> content attribute.
+
+ <h4 id=the-lang><span class=secno>3.4.3. </span>The <dfn id=lang
+ title=attr-lang><code>lang</code></dfn> (HTML only) and <dfn id=xmllang
+ title=attr-xml-lang><code>xml:lang</code></dfn> (XML only) attributes</h4>
+
+ <p>The <code title=attr-lang><a href="#lang">lang</a></code> attribute
+ specifies the primary <dfn id=language>language</dfn> for the element's
+ contents and for any of the element's attributes that contain text. Its
+ value must be a valid RFC 3066 language code, or the empty string. <a
+ href="#refsRFC3066">[RFC3066]</a>
+
+ <p>The <code title=attr-xml-lang><a href="#xmllang">xml:lang</a></code>
+ attribute is defined in XML. <a href="#refsXML">[XML]</a>
+
+ <p>If these attributes are omitted from an element, then it implies that
+ the language of this element is the same as the language of the parent
+ element. Setting the attribute to the empty string indicates that the
+ primary language is unknown.
+
+ <p>The <code title=attr-lang><a href="#lang">lang</a></code> attribute may
+ only be used on elements of <a href="#html-">HTML documents</a>. Authors
+ must not use the <code title=attr-lang><a href="#lang">lang</a></code>
+ attribute in <a href="#xml-documents">XML documents</a>.
+
+ <p>The <code title=attr-xml-lang><a href="#xmllang">xml:lang</a></code>
+ attribute may only be used on elements of <a href="#xml-documents">XML
+ documents</a>. Authors must not use the <code title=attr-xml-lang><a
+ href="#xmllang">xml:lang</a></code> attribute in <a href="#html-">HTML
+ documents</a>.</p>
+ <!-- technically this
+ is redundant with the XML spec -->
+
+ <p>To determine the language of a node, user agents must look at the
+ nearest ancestor element (including the element itself if the node is an
+ element) that has a <code title=attr-lang><a href="#lang">lang</a></code>
+ or <code title=attr-xml-lang><a href="#xmllang">xml:lang</a></code>
+ attribute set. That specifies the language of the node.
+
+ <p>If both the <code title=attr-xml-lang><a
+ href="#xmllang">xml:lang</a></code> attribute and the <code
+ title=attr-lang><a href="#lang">lang</a></code> attribute are set on an
+ element, user agents must use the <code title=attr-xml-lang><a
+ href="#xmllang">xml:lang</a></code> attribute, and the <code
+ title=attr-lang><a href="#lang">lang</a></code> attribute must be <a
+ href="#ignored" title=ignore>ignored</a> for the purposes of determining
+ the element's language.
+
+ <p>If no explicit language is given for the <a href="#root-element">root
+ element</a>, then language information from a higher-level protocol (such
+ as HTTP), if any, must be used as the final fallback language. In the
+ absence of any language information, the default value is unknown (the
+ empty string).
+
+ <p>User agents may use the element's language to determine proper
+ processing or rendering (e.g. in the selection of appropriate fonts or
+ pronounciations, or for dictionary selection). <!--User
+ agents must not use the element's language to determine text
+ directionality. (commented out because text directionality is a
+ rendering-level concern.)-->
+
+ <p>The <dfn id=lang0 title=dom-lang><code>lang</code></dfn> DOM attribute
+ must <a href="#reflect">reflect</a> the <code title=attr-lang><a
+ href="#lang">lang</a></code> content attribute.
+
+ <h4 id=the-dir><span class=secno>3.4.4. </span>The <dfn id=dir
+ title=attr-dir><code>dir</code></dfn> attribute</h4>
+
+ <p>The <code title=attr-dir><a href="#dir">dir</a></code> attribute
+ specifies the element's text directionality. The attribute is an <a
+ href="#enumerated">enumerated attribute</a> with the keyword <code
+ title="">ltr</code> mapping to the state <em>ltr</em>, and the keyword
+ <code title="">rtl</code> mapping to the state <em>rtl</em>. The attribute
+ has no defaults.
+
+ <p>If the attribute has the state <em>ltr</em>, the element's
+ directionality is left-to-right. If the attribute has the state
+ <em>rtl</em>, the element's directionality is right-to-left. Otherwise,
+ the element's directionality is the same as its parent.
+
+ <p>The processing of this attribute depends on the presentation layer. For
+ example, CSS 2.1 defines a mapping from this attribute to the CSS
+ 'direction' and 'unicode-bidi' properties, and defines rendering in terms
+ of those properties.
+
+ <p>The <dfn id=dir0 title=dom-dir><code>dir</code></dfn> DOM attribute on
+ an element must <a href="#reflect">reflect</a> the <code title=attr-dir><a
+ href="#dir">dir</a></code> content attribute of that element, <a
+ href="#limited">limited to only known values</a>.
+
+ <p>The <dfn id=dir1 title=dom-document-dir><code>dir</code></dfn> DOM
+ attribute on <code><a href="#htmldocument">HTMLDocument</a></code> objects
+ must <a href="#reflect">reflect</a> the <code title=attr-dir><a
+ href="#dir">dir</a></code> content attribute of <a href="#the-html0">the
+ <code>html</code> element</a>, if any, <a href="#limited">limited to only
+ known values</a>. If there is no such element, then the attribute must
+ return the empty string and do nothing on setting.
+
+ <h4 id=classes><span class=secno>3.4.5. </span>The <dfn id=class
+ title=attr-class><code>class</code></dfn> attribute</h4>
+
+ <p>Every <span>HTML element</span> may have a <code title=attr-class><a
+ href="#class">class</a></code> attribute specified.
+
+ <p>The attribute, if specified, must have a value that is an <a
+ href="#unordered">unordered set of space-separated tokens</a> representing
+ the various classes that the element belongs to.
+
+ <p>The classes that an HTML element has assigned to it consists of all the
+ classes returned when the value of the <code title=attr-class><a
+ href="#class">class</a></code> attribute is <a href="#split" title="split
+ a string on spaces">split on spaces</a>.
+
+ <p class=note>Assigning classes to an element affects class matching in
+ selectors in CSS, the <code title=dom-document-getElementsByClassName><a
+ href="#getelementsbyclassname">getElementsByClassName()</a></code> method
+ in the DOM, and other such features.
+
+ <p>Authors may use any value in the <code title=attr-class><a
+ href="#class">class</a></code> attribute, but are encouraged to use the
+ values that describe the nature of the content, rather than values that
+ describe the desired presentation of the content.
+
+ <p>The <dfn id=classname title=dom-className><code>className</code></dfn>
+ and <dfn id=classlist title=dom-classList><code>classList</code></dfn> DOM
+ attributes must both <a href="#reflect">reflect</a> the <code
+ title=attr-class><a href="#class">class</a></code> content attribute.
+
+ <h4 id=the-irrelevant><span class=secno>3.4.6. </span>The <dfn
+ id=irrelevant title=attr-irrelevant><code>irrelevant</code></dfn>
+ attribute</h4>
+
+ <p>All elements may have the <code title=attr-irrelevant><a
+ href="#irrelevant">irrelevant</a></code> content attribute set. The <code
+ title=attr-irrelevant><a href="#irrelevant">irrelevant</a></code>
+ attribute is a <a href="#boolean0">boolean attribute</a>. When specified
+ on an element, it indicates that the element is not yet, or is no longer,
+ relevant. User agents should not render elements that have the <code
+ title=attr-irrelevant><a href="#irrelevant">irrelevant</a></code>
+ attribute specified.
+
+ <div class=example>
+ <p>In the following skeletal example, the attribute is used to hide the a
+ Web game until the user logs in:</p>
+
+ <pre> &lt;h1>The Example Game&lt;/h1>
+ &lt;section id="login">
+ &lt;h2>Login&lt;/h2>
+ &lt;form>
+ ...
+ &lt;!-- calls login() once the user's credentials have been checked -->
+ &lt;/form>
+ &lt;script>
+ function login() {
+ // switch screens
+ document.getElementById('login').irrelevant = true;
+ document.getElementById('game').irrelevant = false;
+ }
+ &lt;/script>
+ &lt;/section>
+ &lt;section id="game">
+ ...
+ &lt;/section></pre>
+
+ <p>In the following example, an image acts as a surrogate for an video.
+ When the image is clicked, it tries to load the video (and disables the
+ playback button). If the load succeeds enough that a frame of data has
+ been downloaded, the <code><a href="#video1">video</a></code> element
+ hides the surrogate image and shows the video instead, along with its
+ controls, and turns on autoplay (so that the video will commence playback
+ as soon as enough of it is loaded). If the load fails for any reason, the
+ video and the surrogate frame are both hidden (by hiding the paragraph
+ element containing them both), and the following paragraph is shown
+ instead, with its unhelpful error message and potentially helpful link to
+ download the video directly.</p>
+
+ <p>In legacy user agents, the surrogate image would show (though clicking
+ it would have no effect) and the link to the video would be present
+ (allowing the video to be viewed in another application).</p>
+
+ <pre> &lt;p>
+ &lt;input type="image" src="frame.png" alt="Play Video"
+ onclick=" nextSibling.load();
+ disabled = true;
+ return false;"
+ >&lt;video src="video.ogg" controls="" irrelevant=""
+ onloadedfirstframe="
+ irrelevant = false;
+ previousSibling.irrelevant = true;
+ autoplay = true"
+ onerror=" parentNode.irrelevant = true;
+ parentNode.nextSibling.irrelevant = false">
+ &lt;/video>
+ &lt;/p>&lt;p irrelevant="">
+ Playback unavailable.
+ &lt;a href="video.ogg">Download Video&lt;/a>
+ &lt;/p></pre>
+ </div>
+
+ <p>The <code title=attr-irrelevant><a
+ href="#irrelevant">irrelevant</a></code> attribute must not be used to
+ hide content that could legitimately be shown in another presentation. For
+ example, it is incorrect to use <code title=attr-irrelevant><a
+ href="#irrelevant">irrelevant</a></code> to hide panels in a tabbed
+ dialog, because the tabbed interface is merely a kind of overflow
+ presentation &mdash; showing all the form controls in one big page with a
+ scrollbar would be equivalent, and no less correct.
+
+ <p>Elements in a section hidden by the <code title=attr-irrelevant><a
+ href="#irrelevant">irrelevant</a></code> attribute are still active, e.g.
+ scripts and form controls in such sections still render execute and submit
+ respectively. Only their presentation to the user changes.
+
+ <p>The <dfn id=irrelevant0
+ title=dom-irrelevant><code>irrelevant</code></dfn> DOM attribute must <a
+ href="#reflect">reflect</a> the content attribute of the same name.
+
+ <h3 id=interaction><span class=secno>3.5. </span><dfn
+ id=interaction0>Interaction</dfn></h3>
+ <!--
+ELEMENT
+ attribute long <span title="dom-tabindex">tabIndex</span>;
+ void <span title="dom-click">click</span>();
+ void <span title="dom-focus">focus</span>();
+ void <span title="dom-blur">blur</span>();
+ void <span title="dom-scrollIntoView">scrollIntoView</span>();
+ void <span title="dom-scrollIntoView">scrollIntoView</span>(in boolean top);
+
+DOCUMENT
+ readonly attribute <span>Element</span> <span title="dom-document-activeElement">activeElement</span>;
+ readonly attribute boolean <span title="dom-document-hasFocus">hasFocus</span>;
+-->
+
+ <h4 id=activation><span class=secno>3.5.1. </span>Activation</h4>
+
+ <p>The <dfn id=click title=dom-click>click()</dfn> method must <a
+ href="#firing">fire a <code>click</code> event</a> at the element, whose
+ default action is the <span title="fire a DOMActivate event">firing of a
+ further <code title=event-DOMActivate>DOMActivate</code> event</span> at
+ the same element, whose own default action is to go through all the
+ elements the <code title=event-DOMActivate>DOMActivate</code> event
+ bubbled through (starting at the target node and going towards the
+ <code>Document</code> node), looking for an element with an <a
+ href="#activation0">activation behavior</a>; the first element, in reverse
+ tree order, to have one, must have its activation behavior executed.
+
+ <h4 id=focus><span class=secno>3.5.2. </span>Focus</h4>
+
+ <p>When an element is <em>focused</em>, key events received by the document
+ must be targeted at that element. There is always an element focused; in
+ the absence of other elements being focused, the document's root element
+ is it.
+
+ <p>Which element within a document currently has focus is independent of
+ whether or not the document itself has the <em>system focus</em>.
+
+ <p>Some focusable elements might take part in <em>sequential focus
+ navigation</em>.
+
+ <h5 id=focus-management><span class=secno>3.5.2.1. </span>Focus management</h5>
+
+ <p>The <dfn id=focus0 title=dom-focus><code>focus()</code></dfn> and <dfn
+ id=blur title=dom-blur><code>blur()</code></dfn> methods must focus and
+ unfocus the element respectively, if the element is focusable.
+
+ <p>Some elements, most notably <code><a href="#area">area</a></code>, can
+ correspond to more than one distinct focusable area. When such an element
+ is focused using the <code title=dom-focus><a
+ href="#focus0">focus()</a></code> method, the first such region in tree
+ order is the one that must be focused.
+
+ <p class=big-issue>Well that clearly needs more.</p>
+ <!-- XXX e.g. should the click, focus, blur methods be recursible? -->
+
+ <p>The <dfn id=activeelement
+ title=dom-document-activeElement><code>activeElement</code></dfn>
+ attribute must return the element in the document that has focus. If no
+ element specifically has focus, this must return <a href="#the-body0">the
+ <code>body</code> element</a>.
+
+ <p>The <dfn id=hasfocus
+ title=dom-document-hasFocus><code>hasFocus</code></dfn> attribute must
+ return true if the document, one of its nested <a href="#browsing0"
+ title="browsing context">browsing contexts</a>, or any element in the
+ document or its browsing contexts currently has the system focus.
+
+ <h5 id=sequential><span class=secno>3.5.2.2. </span>Sequential focus
+ navigation</h5>
+
+ <p class=issue>This section on the <code>tabindex</code> attribute needs to
+ be checked for backwards-compatibility.
+
+ <p>The <dfn id=tabindex title=attr-tabindex><code>tabindex</code></dfn>
+ attribute specifies the relative order of elements for the purposes of
+ sequential focus navigation. The name "tab index" comes from the common
+ use of the "tab" key to navigate through the focusable elements. The term
+ "tabbing" refers to moving forward through the focusable elements.
+
+ <p>The <code title=attr-tabindex><a href="#tabindex">tabindex</a></code>
+ attribute, if specified, must have a value that is a <a
+ href="#valid0">valid integer</a>.
+
+ <p>If the attribute is specified, it must be parsed using the <a
+ href="#rules0">rules for parsing integers</a>. If parsing the value
+ returns an error, the attribute is ignored for the purposes of focus
+ management (as if it wasn't specified).
+
+ <p>A positive integer or zero specifies the index of the element in the
+ current scope's tab order. Elements with the same index are sorted in <a
+ href="#tree-order">tree order</a> for the purposes of tabbing.
+
+ <p id=negative-tabindex>A negative integer specifies that the element
+ should be removed from the tab order. If the element does normally take
+ focus, it may still be focused using other means (e.g. it could be focused
+ by a click).
+
+ <p>If the attribute is absent (or invalid), then the user agent must treat
+ the element as if it had the value 0 or the value -1, based on platform
+ conventions.
+
+ <p class=example>For example, a user agent might default
+ <code>textarea</code> elements to 0, and <code>button</code> elements to
+ -1, making text fields part of the tabbing cycle but buttons not.
+
+ <p>When an element that does not normally take focus (i.e. whose default
+ value would be -1) has the <code title=attr-tabindex><a
+ href="#tabindex">tabindex</a></code> attribute specified with a positive
+ value, then it should be added to the tab order and should be made
+ focusable. When focused, the element matches the CSS <code>:focus</code>
+ pseudo-class and key events are dispatched on that element in response to
+ keyboard input.
+
+ <p>The <dfn id=tabindex0 title=dom-tabIndex><code>tabIndex</code></dfn> DOM
+ attribute reflects the value of the <code title=attr-tabIndex><a
+ href="#tabindex">tabIndex</a></code> content attribute. If the attribute
+ is not present (or has an invalid value) then the DOM attribute must
+ return the UA's default value for that element, which will be either 0
+ (for elements in the tab order) or -1 (for elements not in the tab order).</p>
+ <!--XXX
+ <h5>The <dfn><code>DocumentFocus</code></dfn> interface</h5>
+
+ <p>The <code>DocumentFocus</code> interface contains methods for
+ moving focus around the document. It can be obtained from objects
+ that implement the <code>Document</code> interface using
+ binding-specific casting methods.</p>
+
+ <pre class="idl">interface <dfn>DocumentFocus</dfn> {
+ void moveFocusForward();
+ void moveFocusBackward();
+ void moveFocusUp();
+ void moveFocusRight();
+ void moveFocusDown();
+ void moveFocusLeft();
+};</pre>
+
+ <p>The <dfn><code>currentFocus</code></dfn> attribute returns the
+ element to which key events will be sent when the document receives
+ key events.</p>
+
+ <p>The <dfn><code>moveFocusForward</code></dfn> method uses the
+ <code>'nav-index'</code> property and the <code>tabindex</code>
+ attribute to find the next focusable element and focuses it.</p>
+
+ <p>The <dfn><code>moveFocusBackward</code></dfn> method uses the
+ <code>'nav-index'</code> property and the <code>tabindex</code>
+ attribute to find the previous focusable element and focuses
+ it.</p>
+
+ <p>The <dfn><code>moveFocusUp</code></dfn> method uses the
+ <code>'nav-up'</code> property and the <code>tabindex</code>
+ attribute to find an appropriate focusable element and focuses
+ it.</p>
+
+ <p>In a similar manner, the <dfn><code>moveFocusRight</code></dfn>,
+ <dfn><code>moveFocusDown</code></dfn>, and
+ <dfn><code>moveFocusLeft</code></dfn> methods use the
+ <code>'nav-right'</code>, <code>'nav-down'</code>, and
+ <code>'nav-left'</code> properties (respectively), and the
+ <code>tabindex</code> attribute, to find an appropriate focusable
+ element and focus it.</p>
+
+ <p>The <code>'nav-index'</code>, <code>'nav-up'</code>,
+ <code>'nav-right'</code>, <code>'nav-down'</code>, and
+ <code>'nav-left'</code> properties are defined in <a
+ href="#refsCSS3UI">[CSS3UI]</a>.</p>
+
+Other things to look at are IE's focus APIs (document.activeElement,
+document.hasFocus, HTMLElement.setActive(), onBeforeActivate,
+onActivate, onBeforeDeactivate, onDeactivate, document.hasFocus):
+ https://bugzilla.mozilla.org/show_bug.cgi?id=296471
+ https://bugzilla.mozilla.org/show_bug.cgi?id=296469
+ http://msdn.microsoft.com/workshop/author/dhtml/reference/properties/activeelement.asp
+ http://msdn.microsoft.com/workshop/author/dhtml/reference/methods/setactive.asp
+ http://msdn.microsoft.com/workshop/author/dhtml/reference/events/onbeforeactivate.asp
+ http://msdn.microsoft.com/workshop/author/dhtml/reference/methods/focus.asp
+-->
+
+ <h4 id=scrolling><span class=secno>3.5.3. </span>Scrolling elements into
+ view</h4>
+
+ <p>The <dfn id=scrollintoview
+ title=dom-scrollIntoView><code>scrollIntoView([<var
+ title="">top</var>])</code></dfn> method, when called, must cause the
+ element on which the method was called to have the attention of the user
+ called to it.
+
+ <p class=note>In a speech browser, this could happen by having the current
+ playback position move to the start of the given element.
+
+ <p>In visual user agents, if the argument is present and has the value
+ false, the user agent should scroll the element into view such that both
+ the bottom and the top of the element are in the viewport, with the bottom
+ of the element aligned with the bottom of the viewport. If it isn't
+ possible to show the entire element in that way, or if the argument is
+ omitted or is true, then the user agent must instead simply align the top
+ of the element with the top of the viewport.
+
+ <p>Non-visual user agents may ignore the argument, or may treat it in some
+ media-specific manner most useful to the user.</p>
+ <!-- XXX maybe this should move to CSSOM -->
+
+ <h3 id=the-root><span class=secno>3.6. </span>The root element</h3>
+
+ <h4 id=the-html><span class=secno>3.6.1. </span>The <dfn
+ id=html><code>html</code></dfn> element</h4>
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the root element of a document.
+
+ <dd>Wherever a subdocument fragment is allowed in a compound document.
+
+ <dt>Content model:
+
+ <dd>A <code><a href="#head">head</a></code> element followed by a <code><a
+ href="#body0">body</a></code> element.</dd>
+ <!--XXX
+Steven Pemberton was as always, a remarkable speaker, but his answer
+to my question leaves me a very bad taste. Basically, I asked him why
+XHTML2 preserves the useless head and body element. The answer was in
+substance "because this is a compromise". Ah. So XHTML2 preserves two
+useless elements that add potential dangers to the interpretation and
+styling of documents because it's a compromise. Getting rid of head
+would allow to attach directly the document's metadata to the root
+element of the document, making much more sense than a head element.
+Having a head element also preserves the ridiculous
+engraved-in-the-marble "head contents are not rendered". Body is
+dangerous because it's another box between the document and the
+contents; you all have written a blog template with a <div
+class="main"> or <div class="content">. Why do we also need a body?
+ - http://www.glazman.org/weblog/dotclear/index.php?2005/05/27/1055-adam-2
+-->
+
+ <dt>Element-specific attributes:
+
+ <dd>None (but see prose).
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#html">html</a></code> element represents the root of
+ an HTML document.
+
+ <p>Though it has absolutely no effect and no meaning, the <code><a
+ href="#html">html</a></code> element, in <a href="#html-">HTML
+ documents</a>, may have an <code title="">xmlns</code> attribute
+ specified, if, and only if, it has the exact value
+ "<code>http://www.w3.org/1999/xhtml</code>". This does not apply to <a
+ href="#xml-documents">XML documents</a>.
+
+ <p class=note>In HTML, the <code title="">xmlns</code> attribute has
+ absolutely no effect. It is basically a talisman. It is allowed merely to
+ make migration to and from XHTML mildly easier. When parsed by an <a
+ href="#html-0">HTML parser</a>, the attribute ends up in the null
+ namespace, not the "<code>http://www.w3.org/2000/xmlns/</code>" namespace
+ like namespace declaration attributes in XML do.
+
+ <p class=note>In XML, an <code title="">xmlns</code> attribute is part of
+ the namespace declaration mechanism, and an element cannot actually have
+ an <code title="">xmlns</code> attribute in the null namespace specified.
+
+ <h3 id=document><span class=secno>3.7. </span>Document metadata</h3>
+
+ <p>Document metadata is represented by <dfn id=metadata>metadata
+ elements</dfn> in the document's <code><a href="#head">head</a></code>
+ element.
+
+ <h4 id=the-head><span class=secno>3.7.1. </span>The <dfn
+ id=head><code>head</code></dfn> element</h4>
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the first element in an <code><a href="#html">html</a></code>
+ element.
+
+ <dt>Content model:
+
+ <dd>In any order unless otherwise specified: optionally one <code><a
+ href="#meta0">meta</a></code> element with a <code
+ title=attr-meta-charset><a href="#charset0">charset</a></code> attribute,
+ exactly one <code><a href="#title1">title</a></code> element, optionally
+ one <code><a href="#base">base</a></code> element, and zero or more other
+ <a href="#metadata">metadata elements</a> (in particular, <code><a
+ href="#link">link</a></code>, <code><a href="#meta0">meta</a></code>,
+ <code><a href="#style">style</a></code>, and <code><a
+ href="#script0">script</a></code>).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#head">head</a></code> element collects the
+ document's metadata.
+
+ <h4 id=the-title0><span class=secno>3.7.2. </span>The <dfn
+ id=title1><code>title</code></dfn> element</h4>
+
+ <p><a href="#metadata" title="metadata elements">Metadata element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element containing no other
+ <code><a href="#title1">title</a></code> elements.
+
+ <dt>Content model:
+
+ <dd>Text (for details, see prose).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#title1">title</a></code> element represents the
+ document's title or name. Authors should use titles that identify their
+ documents even when they are used out of context, for example in a user's
+ history or bookmarks, or in search results. The document's title is often
+ different from its first header, since the first header does not have to
+ stand alone when taken out of context.
+
+ <div class=example>
+ <p>Here are some examples of appropriate titles, contrasted with the
+ top-level headers that might be used on those same pages.</p>
+
+ <pre> &lt;title>Introduction to The Mating Rituals of Bees&lt;/title>
+ ...
+ &lt;h1>Introduction&lt;/h1>
+ &lt;p>This companion guide to the highly successful
+ &lt;cite>Introduction to Medieval Bee-Keeping&lt;/cite> book is...
+</pre>
+
+ <p>The next page might be a part of the same site. Note how the title
+ describes the subject matter unambiguously, while the first header
+ assumes the reader knowns what the context is and therefore won't wonder
+ if the dances are Salsa or Waltz:</p>
+
+ <pre> &lt;title>Dances used during bee mating rituals&lt;/title>
+ ...
+ &lt;h1>The Dances&lt;/h1></pre>
+ </div>
+
+ <p>The <code><a href="#title1">title</a></code> element must not contain
+ any elements.
+
+ <p>The string to use as the document's title is given by the <code
+ title=dom-document-title><a
+ href="#document.title">document.title</a></code> DOM attribute. User
+ agents should use the document's title when referring to the document in
+ their user interface.
+
+ <h4 id=the-base><span class=secno>3.7.3. </span>The <dfn
+ id=base><code>base</code></dfn> element</h4>
+
+ <p><a href="#metadata" title="metadata elements">Metadata element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element, after the <code><a
+ href="#meta0">meta</a></code> element with the <code
+ title=attr-meta-charset><a href="#charset0">charset</a></code> attribute,
+ if any, but before any other elements.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-base-href><a href="#href">href</a></code>
+
+ <dd><code title=attr-base-target><a href="#target">target</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlbaseelement>HTMLBaseElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#href0" title=dom-base-href>href</a>;
+ attribute DOMString <a href="#target0" title=dom-base-target>target</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#base">base</a></code> element allows authors to
+ specify the document's base URI for the purposes of resolving relative
+ URIs, and the name of the default <a href="#browsing0">browsing
+ context</a> for the purposes of <a href="#following0">following
+ hyperlinks</a>.
+
+ <p>There must be no more than one <code><a href="#base">base</a></code>
+ element per document.
+
+ <p>The <dfn id=href title=attr-base-href><code>href</code></dfn> content
+ attribute, if specified, must contain a URI (or IRI).
+
+ <p>User agents must use the value of the <code
+ title=att-base-href>href</code> attribute of the first <code><a
+ href="#base">base</a></code> element that is both a child of <a
+ href="#the-head0">the <code>head</code> element</a> and has an <code
+ title=att-base-href>href</code> attribute, if there is such an element, as
+ the document entity's base URI for the purposes of section 5.1.1 of RFC
+ 2396 ("Establishing a Base URI": "Base URI within Document Content"). This
+ base URI from RFC 2396 is referred to by the algorithm given in XML Base,
+ which <a href="#xmlBase">is a normative part of this specification</a>. <a
+ href="#refsRFC2396">[RFC2396]</a>
+
+ <p>If the base URI given by this attribute is a relative URI, it must be
+ resolved relative to the higher-level base URIs (i.e. the base URI from
+ the encapsulating entity or the URI used to retrieve the entity) to obtain
+ an absolute base URI. All <code title=attr-xml-base>xml:base</code>
+ attributes must be ignored when resolving relative URIs in this <code
+ title=attr-base-href><a href="#href">href</a></code> attribute.
+
+ <p class=note>If there are multiple <code><a href="#base">base</a></code>
+ elements with <code title=att-base-href>href</code> attributes, all but
+ the first are ignored.
+
+ <p>The <dfn id=target title=attr-base-target><code>target</code></dfn>
+ attribute, if specified, must contain a <a href="#valid8">valid browsing
+ context name</a>. User agents use this name when <a
+ href="#following0">following hyperlinks</a>.
+
+ <p>The <dfn id=href0 title=dom-base-href><code>href</code></dfn> and <dfn
+ id=target0 title=dom-base-target><code>target</code></dfn> DOM attributes
+ must <a href="#reflect">reflect</a> the content attributes of the same
+ name.
+
+ <p class=note>Pages with multiple <code><a href="#base">base</a></code>
+ elements have all but their first <code><a href="#base">base</a></code>
+ element with an <code title=attr-base-href><a href="#href">href</a></code>
+ attribute ignored for the purposes of URI resolution, and all but their
+ first <code><a href="#base">base</a></code> element with a <code
+ title=attr-base-target><a href="#target">target</a></code> attribute
+ ignored for the purposes of default browsing context name resolution.</p>
+ <!-- XXX the former is for compat with IE7. The latter is not
+ actually exactly compatible with anything. We'll see if it breaks
+ anything. -->
+
+ <h4 id=the-link><span class=secno>3.7.4. </span>The <dfn
+ id=link><code>link</code></dfn> element</h4>
+
+ <p><a href="#metadata" title="metadata elements">Metadata element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-link-href><a href="#href1">href</a></code> (required)
+
+ <dd><code title=attr-link-rel><a href="#rel">rel</a></code> (required)
+
+ <dd><code title=attr-link-media><a href="#media0">media</a></code>
+
+ <dd><code title=attr-link-hreflang><a href="#hreflang">hreflang</a></code>
+
+ <dd><code title=attr-link-type><a href="#type">type</a></code>
+
+ <dd>Also, the <code title=attr-link-title><a
+ href="#title2">title</a></code> attribute has special semantics on this
+ element.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmllinkelement>HTMLLinkElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute boolean <a href="#disabled" title=dom-link-disabled>disabled</a>;
+ attribute DOMString <a href="#href2" title=dom-link-href>href</a>;
+ attribute DOMString <a href="#rel0" title=dom-link-rel>rel</a>;
+ readonly attribute DOMTokenList <a href="#rellist" title=dom-link-relList>relList</a>;
+ attribute DOMString <a href="#media1" title=dom-link-media>media</a>;
+ attribute DOMString <a href="#hreflang0" title=dom-link-hreflang>hreflang</a>;
+ attribute DOMString <a href="#type0" title=dom-link-type>type</a>;
+};</pre>
+
+ <p>The <code>LinkStyle</code> interface must also be implemented by this
+ element, the <a href="#styling0">styling processing model</a> defines
+ how. <a href="#refsCSSOM">[CSSOM]</a></p>
+ </dl>
+
+ <p>The <code><a href="#link">link</a></code> element allows authors to
+ indicate explicit relationships between their document and other
+ resources.
+
+ <p>The destination of the link is given by the <dfn id=href1
+ title=attr-link-href><code>href</code></dfn> attribute, which must be
+ present and must contain a URI (or IRI). If the <code
+ title=attr-link-href><a href="#href1">href</a></code> attribute is absent,
+ then the element does not define a link.
+
+ <p>The type of link indicated (the relationship) is given by the value of
+ the <dfn id=rel title=attr-link-rel><code>rel</code></dfn> attribute,
+ which must be present, and must have a value that is an <a
+ href="#unordered">unordered set of space-separated tokens</a>. The <a
+ href="#linkTypes">allowed values and their meanings</a> are defined in a
+ later section. If the <code title=attr-link-rel><a
+ href="#rel">rel</a></code> attribute is absent, or if the value used is
+ not allowed according to the definitions in this specification, then the
+ element does not define a link.
+
+ <p>Two categories of links can be created using the <code><a
+ href="#link">link</a></code> element. <dfn id=links1 title="external
+ resource link">Links to external resources</dfn> are links to resources
+ that are to be used to augment the current document, and <dfn
+ id=hyperlink1 title="hyperlink link">hyperlink links</dfn> are <a
+ href="#hyperlinks" title=hyperlink>links to other documents</a>. The <a
+ href="#linkTypes">link types section</a> defines whether a particular link
+ type is an external resource or a hyperlink. One element can create
+ multiple links (of which some might be external resource links and some
+ might be hyperlinks). User agents should process the links on a per-link
+ basis, not a per-element basis.
+
+ <p>The exact behaviour for links to external resources depends on the exact
+ relationship, as defined for the relevant link type. Some of the
+ attributes control whether or not the external resource is to be applied
+ (as defined below). For external resources that are represented in the DOM
+ (for example, style sheets), the DOM representation must be made available
+ even if the resource is not applied. (However, user agents may opt to only
+ fetch such resources when they are needed, instead of pro-actively
+ downloading all the external resources that are not applied.)
+
+ <p>Interactive user agents should provide users with a means to <a
+ href="#following0" title="following hyperlinks">follow the hyperlinks</a>
+ created using the <code><a href="#link">link</a></code> element, somewhere
+ within their user interface. The exact interface is not defined by this
+ specification, but it should include the following information (obtained
+ from the element's attributes, again as defined below), in some form or
+ another (possibly simplified), for each hyperlink created with each
+ <code><a href="#link">link</a></code> element in the document:
+
+ <ul><!-- the order here is the order that makes most sense for a UI -->
+
+ <li>The relationship between this document and the resource (given by the
+ <code title=attr-link-rel><a href="#rel">rel</a></code> attribute)
+
+ <li>The title of the resource (given by the <code title=attr-link-title><a
+ href="#title2">title</a></code> attribute).
+
+ <li>The URI of the resource (given by the <code title=attr-link-href><a
+ href="#href1">href</a></code> attribute).
+
+ <li>The language of the resource (given by the <code
+ title=attr-link-hreflang><a href="#hreflang">hreflang</a></code>
+ attribute).
+
+ <li>The optimum media for the resource (given by the <code
+ title=attr-link-media><a href="#media0">media</a></code> attribute).
+ </ul>
+
+ <p>User agents may also include other information, such as the type of the
+ resource (as given by the <code title=attr-link-type><a
+ href="#type">type</a></code> attribute).
+
+ <p>The <dfn id=media0 title=attr-link-media><code>media</code></dfn>
+ attribute says which media the resource applies to. The value must be a
+ valid media query. <a href="#refsMQ">[MQ]</a>
+
+ <p>If the link is a <a href="#hyperlink1" title="hyperlink
+ link">hyperlink</a> then the <code title=attr-link-media><a
+ href="#media0">media</a></code> attribute is purely advisory, and
+ describes for which media the document in question was designed.
+
+ <p>However, if the link is an <a href="#links1">external resource link</a>,
+ then the <code title=attr-link-media><a href="#media0">media</a></code>
+ attribute is prescriptive. The user agent must only apply the external
+ resource to <span>views</span><!-- XXX xref --> while their state match
+ the listed media.
+
+ <p id=default-media>The default, if the <code title=attr-link-media><a
+ href="#media0">media</a></code> attribute is omitted, is <code>all</code>,
+ meaning that by default links apply to all media.
+
+ <p>The <dfn id=hreflang
+ title=attr-link-hreflang><code>hreflang</code></dfn> attribute on the
+ <code><a href="#link">link</a></code> element has the same semantics as
+ the <a href="#hreflang3"
+ title=attr-hyperlink-hreflang><code>hreflang</code> attribute on hyperlink
+ elements</a>.
+
+ <p>The <dfn id=type title=attr-link-type><code>type</code></dfn> attribute
+ gives the MIME type of the linked resource. It is purely advisory. The
+ value must be a valid MIME type, optionally with parameters. <a
+ href="#refsRFC2046">[RFC2046]</a>
+
+ <p>For <a href="#links1" title="external resource link">external resource
+ links</a>, user agents may use the type given in this attribute to decide
+ whether or not to consider using the resource at all. If the UA does not
+ support the given MIME type for the given link relationship, then the UA
+ may opt not to download and apply the resource.
+
+ <p>User agents must not consider the <code title=attr-link-type><a
+ href="#type">type</a></code> attribute authoritative &mdash; upon fetching
+ the resource, user agents must not use metadata included in the link to
+ the resource to determine its type.
+
+ <p>If the attribute is omitted, then the UA must fetch the resource to
+ determine its type and thus determine if it supports (and can apply) that
+ external resource.
+
+ <div class=example>
+ <p>If a document contains three style sheet links labelled as follows:</p>
+
+ <pre>&lt;link rel="stylesheet" href="A" type="text/css"&gt;
+&lt;link rel="stylesheet" href="B" type="text/plain"&gt;
+&lt;link rel="stylesheet" href="C"&gt;</pre>
+
+ <p>...then a compliant UA that supported only CSS style sheets would fetch
+ the A and C files, and skip the B file (since <code>text/plain</code> is
+ not the MIME type for CSS style sheets). For these two files, it would
+ then check the actual types returned by the UA. For those that are sent
+ as <code>text/css</code>, it would apply the styles, but for those
+ labelled as <code>text/plain</code>, or any other type, it would not.</p>
+ </div>
+ <!--(to be deleted) (charset dropped)
+ <p>The <dfn title="attr-link-charset"><code>charset</code></dfn>
+ attribute gives the character encoding of the linked resource. It is
+ purely advisory. The value must be a valid character encoding name.
+ <a href="#refsIANACHARSET">[IANACHARSET]</a></p>
+
+ <p>For <span title="external resource link">external resource
+ links</span>, user agents may use the character encoding given in
+ this attribute to decide whether or not to consider using the
+ resource at all. If the UA does not support the given encoding for
+ the given link relationship, then the UA may opt not to download and
+ apply the resource.</p>
+
+ <p>However, once the resource has been fetched, user agents must
+ follow the rules for that resource type when determining the actual
+ character encoding.</p>
+-->
+
+ <p>The <dfn id=title2 title=attr-link-title><code>title</code></dfn>
+ attribute gives the title of the link. With one exception, it is purely
+ advisory. The value is text. The exception is for style sheet links, where
+ the <code title=attr-link-title><a href="#title2">title</a></code>
+ attribute defines <a href="#alternative">alternative style sheet sets</a>.
+
+ <p class=note>The <code title=attr-link-title><a
+ href="#title2">title</a></code> attribute on <code><a
+ href="#link">link</a></code> elements differs from the global <code
+ title=attr-title><a href="#title">title</a></code> attribute of most other
+ elements in that a link without a title does not inherit the title of the
+ parent element: it merely has no title.
+
+ <p>Some versions of HTTP defined a <code title="">Link:</code> header, to
+ be processed like a series of <code><a href="#link">link</a></code>
+ elements. When processing links, those must be taken into consideration as
+ well. For the purposes of ordering, links defined by HTTP headers must be
+ assumed to come before any links in the document, in the order that they
+ were given in the HTTP entity header. Relative URIs in these headers must
+ be resolved according to the rules given in HTTP, not relative to base
+ URIs set by the document (e.g. using a <code><a
+ href="#base">base</a></code> element or <code
+ title=attr-xml-base>xml:base</code> attributes). <a
+ href="#refsRFC2616">[RFC2616]</a> <a href="#refsRFC2068">[RFC2068]</a>
+
+ <p>The DOM attributes <dfn id=href2
+ title=dom-link-href><code>href</code></dfn>, <dfn id=rel0
+ title=dom-link-rel><code>rel</code></dfn>, <dfn id=media1
+ title=dom-link-media><code>media</code></dfn>, <dfn id=hreflang0
+ title=dom-link-hreflang><code>hreflang</code></dfn>, and <dfn id=type0
+ title=dom-link-type><code>type</code></dfn> each must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <p>The DOM attribute <dfn id=rellist
+ title=dom-link-rellist><code>relList</code></dfn> must <a
+ href="#reflect">reflect</a> the <code title=attr-link-rel><a
+ href="#rel">rel</a></code> content attribute.
+
+ <p>The DOM attribute <dfn id=disabled
+ title=dom-link-disabled><code>disabled</code></dfn> only applies to style
+ sheet links. When the <code><a href="#link">link</a></code> element
+ defines a style sheet link, then the <code title=dom-link-disabled><a
+ href="#disabled">disabled</a></code> attribute behaves as defined <a
+ href="#disabled1" title=dom-linkstyle-disabled>for the alternative style
+ sheets DOM</a>. For all other <code><a href="#link">link</a></code>
+ elements it always return false and does nothing on setting.
+
+ <h4 id=meta><span class=secno>3.7.5. </span>The <dfn
+ id=meta0><code>meta</code></dfn> element</h4>
+
+ <p><a href="#metadata" title="metadata elements">Metadata element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-meta-name><a href="#name">name</a></code>
+
+ <dd><code title=attr-meta-http-equiv><a
+ href="#http-equiv0">http-equiv</a></code>
+
+ <dd><code title=attr-meta-content><a href="#content0">content</a></code>
+
+ <dd><code title=attr-meta-charset><a href="#charset0">charset</a></code>
+ (<a href="#html-" title="HTML documents">HTML</a> only)
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlmetaelement>HTMLMetaElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#content1" title=dom-meta-content>content</a>;
+ attribute DOMString <a href="#name0" title=dom-meta-name>name</a>;
+ attribute DOMString <a href="#httpequiv" title=dom-meta-httpEquiv>httpEquiv</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#meta0">meta</a></code> element represents various
+ kinds of metadata that cannot be expressed using the <code><a
+ href="#title1">title</a></code>, <code><a href="#base">base</a></code>,
+ <code><a href="#link">link</a></code>, <code><a
+ href="#style">style</a></code>, and <code><a
+ href="#script0">script</a></code> elements.
+
+ <p>The <code><a href="#meta0">meta</a></code> element can represent
+ document-level metadata with the <code title=attr-meta-name><a
+ href="#name">name</a></code> attribute, pragma directives with the <code
+ title=attr-meta-http-equiv><a href="#http-equiv0">http-equiv</a></code>
+ attribute, and the file's character encoding declaration when an HTML
+ document is serialised to string form (e.g. for transmission over the
+ network or for disk storage) with the <code title=attr-meta-charset><a
+ href="#charset0">charset</a></code> attribute.
+
+ <p>Exactly one of the <code title=attr-meta-name><a
+ href="#name">name</a></code>, <code title=attr-meta-http-equiv><a
+ href="#http-equiv0">http-equiv</a></code>, and <code
+ title=attr-meta-charset><a href="#charset0">charset</a></code> attributes
+ must be specified.
+
+ <p>If either <code title=attr-meta-name><a href="#name">name</a></code> or
+ <code title=attr-meta-http-equiv><a
+ href="#http-equiv0">http-equiv</a></code> is specified, then the <code
+ title=attr-meta-content><a href="#content0">content</a></code> attribute
+ must also be specified. Otherwise, it must be omitted.
+
+ <p>The <code title=attr-meta-charset><a href="#charset0">charset</a></code>
+ attribute may only be specified in <a href="#html5" title=HTML5>HTML
+ documents</a>, it must not be used in <a href="#xhtml5" title=XHTML>XML
+ documents</a>. If the <code title=attr-meta-charset><a
+ href="#charset0">charset</a></code> attribute is specified, the element
+ must be the first element in <a href="#the-head0">the <code>head</code>
+ element</a> of the file.
+
+ <p>The <dfn id=content0 title=attr-meta-content><code>content</code></dfn>
+ attribute gives the value of the document metadata or pragma directive
+ when the element is used for those purposes. The allowed values depend on
+ the exact context, as described in subsequent sections of this
+ specification.
+
+ <p>If a <code><a href="#meta0">meta</a></code> element has a <dfn id=name
+ title=attr-meta-name><code>name</code></dfn> attribute, it sets document
+ metadata. Document metadata is expressed in terms of name/value pairs, the
+ <code title=attr-meta-name><a href="#name">name</a></code> attribute on
+ the <code><a href="#meta0">meta</a></code> element giving the name, and
+ the <code title=attr-meta-content><a href="#content0">content</a></code>
+ attribute on the same element giving the value. The name specifies what
+ aspect of metadata is being set; valid names and the meaning of their
+ values are described in the following sections. If a <code><a
+ href="#meta0">meta</a></code> element has no <code
+ title=attr-meta-content><a href="#content0">content</a></code> attribute,
+ then the value part of the metadata name/value pair is the empty string.
+
+ <p>The DOM attributes <dfn id=name0
+ title=dom-meta-name><code>name</code></dfn> and <dfn id=content1
+ title=dom-meta-content><code>content</code></dfn> must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name. The DOM attribute <dfn id=httpequiv
+ title=dom-meta-httpEquiv><code>httpEquiv</code></dfn> must reflect the
+ content attribute <code title=attr-meta-http-equiv><a
+ href="#http-equiv0">http-equiv</a></code>.
+
+ <h5 id=standard><span class=secno>3.7.5.1. </span>Standard metadata names</h5>
+
+ <p>This specification defines a few names for the <code
+ title=attr-meta-name><a href="#name">name</a></code> attribute of the
+ <code><a href="#meta0">meta</a></code> element.
+
+ <dl>
+ <dt><dfn id=generator title=meta-generator>generator</dfn>
+
+ <dd>
+ <p>The value must be a free-form string that identifies the software used
+ to generate the document. This value must not be used on hand-authored
+ pages. WYSIWYG editors have <a href="#wysiwyg2" title="WYSIWYG
+ signature">additional constraints</a> on the value used with this
+ metadata name.
+
+ <dt><dfn id=dns title=meta-dns>dns</dfn>
+
+ <dd>
+ <p>The value must be an <a href="#ordered">ordered set of unique
+ space-separated tokens</a>, each word of which is a host name. The list
+ allows authors to provide a list of host names that the user is expected
+ to subsequently need. User agents may, according to user preferences and
+ prevailing network conditions, pre-emptively resolve the given DNS names
+ (extracting the names from the value using the <a href="#split"
+ title="split a string on spaces">rules for splitting a string on
+ spaces</a>), thus precaching the DNS information for those hosts and
+ potentially reducing the time between page loads for subsequent user
+ interactions. Higher priority should be given to host names given
+ earlier in the list.
+ </dl>
+
+ <h5 id=other><span class=secno>3.7.5.2. </span>Other metadata names</h5>
+
+ <p><dfn id=extensions title=concept-meta-extensions>Extensions to the
+ predefined set of metadata names</dfn> may be registered in the <a
+ href="http://wiki.whatwg.org/wiki/MetaExtensions">WHATWG Wiki
+ MetaExtensions page</a>.
+
+ <p>Anyone is free to edit the WHATWG Wiki MetaExtensions page at any time
+ to add a type. These new names must be specified with the following
+ information:
+
+ <dl>
+ <dt>Keyword
+
+ <dd>
+ <p>The actual name being defined. The name should not be confusingly
+ similar to any other defined name (e.g. differing only in case).
+
+ <dt>Brief description
+
+ <dd>
+ <p>A short description of what the metadata name's meaning is, including
+ the format the value is required to be in.
+
+ <dt>Link to more details
+
+ <dd>A link to a more detailed description of the metadata name's semantics
+ and requirements. It could be another page on the Wiki, or a link to an
+ external page.
+
+ <dt>Synonyms
+
+ <dd>
+ <p>A list of other names that have exactly the same processing
+ requirements. Authors should not use the names defined to be synonyms,
+ they are only intended to allow user agents to support legacy content.
+
+ <dt>Status
+
+ <dd>
+ <p>One of the following:</p>
+
+ <dl>
+ <dt>Proposal
+
+ <dd>The name has not received wide peer review and approval. Someone has
+ proposed it and is using it.
+
+ <dt>Accepted
+
+ <dd>The name has received wide peer review and approval. It has a
+ specification that unambiguously defines how to handle pages that use
+ the name, including when they use it in incorrect ways.
+
+ <dt>Unendorsed
+
+ <dd>The metadata name has received wide peer review and it has been
+ found wanting. Existing pages are using this keyword, but new pages
+ should avoid it. The "brief description" and "link to more details"
+ entries will give details of what authors should use instead, if
+ anything.
+ </dl>
+
+ <p>If a metadata name is added with the "proposal" status and found to be
+ redundant with existing values, it should be removed and listed as a
+ synonym for the existing value.</p>
+ </dl>
+
+ <p>Conformance checkers must use the information given on the WHATWG Wiki
+ MetaExtensions page to establish if a value not explicitly defined in this
+ specification is allowed or not. When an author uses a new type not
+ defined by either this specification or the Wiki page, conformance
+ checkers should offer to add the value to the Wiki, with the details
+ described above, with the "proposal" status.
+
+ <p>This specification does not define how new values will get approved. It
+ is expected that the Wiki will have a community that addresses this.
+
+ <p>Metadata names whose values are to be URIs must not be proposed or
+ accepted. Links must be represented using the <code><a
+ href="#link">link</a></code> element, not the <code><a
+ href="#meta0">meta</a></code> element.
+
+ <h5 id=pragma><span class=secno>3.7.5.3. </span>Pragma directives</h5>
+
+ <p>When the <dfn id=http-equiv
+ title=attr-meta-http-equiv><code>http-equiv</code></dfn> attribute is
+ specified on a <code><a href="#meta0">meta</a></code> element, the element
+ is a pragma directive.
+
+ <p>The <dfn id=http-equiv0
+ title=attr-meta-http-equiv><code>http-equiv</code></dfn> attribute is an
+ <a href="#enumerated">enumerated attribute</a>. The following table lists
+ the keywords defined for this attribute. The states given in the first
+ cell of the the rows with keywords give the states to which those keywords
+ map.<!-- Some of the keywords are non-conforming, as
+ noted in the last column.-->
+
+ <table>
+ <thead>
+ <tr>
+ <th>State
+
+ <th>Keywords <!-- <th>Notes-->
+
+ <tbody><!-- things that are neither conforming nor do anything are commented out
+ <tr>
+ <td><span title="attr-meta-http-equiv-content-language">Content-Language</span>
+ <td><code title="">Content-Language</code>
+ <td>Non-conforming
+ <tr>
+ <td><span title="attr-meta-http-equiv-content-type">Content-Type</span>
+ <td><code title="">Content-Type</code>
+ <td>Non-conforming
+ <tr>
+ <td><span title="attr-meta-http-equiv-content-script-type">Content-Script-Type</span>
+ <td><code title="">Content-Script-Type</code>
+ <td>Non-conforming
+ <tr>
+ <td><span title="attr-meta-http-equiv-content-style-type">Content-Style-Type</span>
+ <td><code title="">Content-Style-Type</code>
+ <td>Non-conforming
+-->
+
+ <tr>
+ <td><a href="#refresh" title=attr-meta-http-equiv-refresh>Refresh</a>
+
+ <td><code title="">refresh</code> <!-- <td>-->
+
+ <tr>
+ <td><a href="#default" title=attr-meta-http-equiv-default-style>Default
+ style</a>
+
+ <td><code title="">default-style</code> <!-- <td>-->
+ </table>
+
+ <p>When a <code><a href="#meta0">meta</a></code> element is inserted into
+ the document, if its <code title=attr-meta-http-equiv><a
+ href="#http-equiv0">http-equiv</a></code> attribute is present and
+ represents one of the above states, then the user agent must run the
+ algorithm appropriate for that state, as described in the following list:
+
+ <dl>
+ <dt><dfn id=refresh title=attr-meta-http-equiv-refresh>Refresh state</dfn>
+
+
+ <dd>
+ <ol><!-- TESTS: http://www.hixie.ch/tests/adhoc/html/meta/refresh/ -->
+
+ <li>
+ <p>If another <code><a href="#meta0">meta</a></code> element in the <a
+ href="#refresh" title=attr-meta-http-equiv-refresh>Refresh state</a>
+ has already been successfully processed (i.e. when it was inserted the
+ user agent processed it and reached the last step of this list of
+ steps), then abort these steps.
+
+ <li>
+ <p>If the <code><a href="#meta0">meta</a></code> element has no <code
+ title=attr-meta-content><a href="#content0">content</a></code>
+ attribute, or if that attribute's value is the empty string, then
+ abort these steps.
+
+ <li>
+ <p>Let <var title="">input</var> be the value of the element's <code
+ title=attr-meta-content><a href="#content0">content</a></code>
+ attribute.
+
+ <li>
+ <p>Let <var title="">position</var> point at the first character of
+ <var title="">input</var>.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a>.
+
+ <li>
+ <p><a href="#collect" title="collect a sequence of characters">Collect
+ a sequence of characters</a> in the range U+0030 DIGIT ZERO to U+0039
+ DIGIT NINE, and parse the resulting string using the <a
+ href="#rules">rules for parsing non-negative integers</a>. If the
+ sequence of characters collected is the empty string, then no number
+ will have been parsed; abort these steps. Otherwise, let <var
+ title="">time</var> be the parsed number.
+
+ <li>
+ <p><a href="#collect" title="collect a sequence of characters">Collect
+ a sequence of characters</a> in the range U+0030 DIGIT ZERO to U+0039
+ DIGIT NINE and U+002E FULL STOP ("<code title="">.</code>"). Ignore
+ any collected characters.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a>.
+
+ <li>
+ <p>Let <var title="">url</var> be the address of the current page.
+
+ <li>
+ <p>If the character in <var title="">input</var> pointed to by <var
+ title="">position</var> is a U+003B SEMICOLON ("<code
+ title="">;</code>"), then advance <var title="">position</var> to the
+ next character. Otherwise, jump to the last step.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a>.
+
+ <li>
+ <p>If the character in <var title="">input</var> pointed to by <var
+ title="">position</var> is one of U+0055 LATIN CAPITAL LETTER U or
+ U+0075 LATIN SMALL LETTER U, then advance <var title="">position</var>
+ to the next character. Otherwise, jump to the last step.
+
+ <li>
+ <p>If the character in <var title="">input</var> pointed to by <var
+ title="">position</var> is one of U+0052 LATIN CAPITAL LETTER R or
+ U+0072 LATIN SMALL LETTER R, then advance <var title="">position</var>
+ to the next character. Otherwise, jump to the last step.
+
+ <li>
+ <p>If the character in <var title="">input</var> pointed to by <var
+ title="">position</var> is one of U+004C LATIN CAPITAL LETTER L or
+ U+006C LATIN SMALL LETTER L, then advance <var title="">position</var>
+ to the next character. Otherwise, jump to the last step.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a>.
+
+ <li>
+ <p>If the character in <var title="">input</var> pointed to by <var
+ title="">position</var> is a U+003D EQUALS SIGN ("<code
+ title="">=</code>"), then advance <var title="">position</var> to the
+ next character. Otherwise, jump to the last step.
+
+ <li>
+ <p><a href="#skip-whitespace">Skip whitespace</a>.
+
+ <li>
+ <p>Let <var title="">url</var> be equal to the substring of <var
+ title="">input</var> from the character at <var
+ title="">position</var> to the end of the string.
+
+ <li>
+ <p>Strip any trailing <a href="#space" title="space character">space
+ characters</a> from the end of <var title="">url</var>.
+
+ <li>
+ <p>Strip any U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), and
+ U+000D CARRIAGE RETURN (CR) characters from <var title="">url</var>.
+
+ <li>
+ <p>Resolve the <var title="">url</var> value to an absolute URI using
+ the base URI of the <code><a href="#meta0">meta</a></code> element.
+
+ <li>
+ <p>Set a timer so that in <var title="">time</var> seconds, if the user
+ has not canceled the redirect, the user agent <a href="#navigate"
+ title=navigate>navigates</a> to <var title="">url</var>, with <a
+ href="#replacement">replacement enabled</a>.
+ </ol>
+
+ <p>For <code><a href="#meta0">meta</a></code> elements in the <a
+ href="#refresh" title=attr-meta-http-equiv-refresh>Refresh state</a>,
+ the <code title=attr-meta-content><a href="#content0">content</a></code>
+ attribute must have a value consisting either of:
+
+ <ul>
+ <li> just a <a href="#valid">valid non-negative integer</a>, or
+
+ <li> a <a href="#valid">valid non-negative integer</a>, followed by a
+ U+003B SEMICOLON (<code title="">;</code>), followed by one or more <a
+ href="#space" title="space character">space characters</a>, followed by
+ either a U+0055 LATIN CAPITAL LETTER U or a U+0075 LATIN SMALL LETTER
+ U, a U+0052 LATIN CAPITAL LETTER R or a U+0072 LATIN SMALL LETTER R, a
+ U+004C LATIN CAPITAL LETTER L or a U+006C LATIN SMALL LETTER L, a
+ U+003D EQUALS SIGN (<code title="">=</code>), and then a valid URI (or
+ IRI).
+ </ul>
+
+ <p>In the former case, the integer represents a number of seconds before
+ the page is to be reloaded; in the latter case the integer represents a
+ number of seconds before the page is to be replaced by the page at the
+ given URI.</p>
+
+ <dd>
+
+ <dt><dfn id=default title=attr-meta-http-equiv-default-style>Default style
+ state</dfn>
+
+ <dd>
+ <ol>
+ <li class=big-issue>...
+ </ol>
+
+ <dd>
+ </dl>
+
+ <h5 id=charset><span class=secno>3.7.5.4. </span>Specifying and
+ establishing the document's character encoding</h5>
+
+ <p>The <code><a href="#meta0">meta</a></code> element may also be used to
+ provide UAs with character encoding information for <a href="#html5"
+ title=HTML5>HTML</a> files, by setting the <dfn id=charset0
+ title=attr-meta-charset><code>charset</code></dfn> attribute to the name
+ of a character encoding. This is called a character encoding declaration.
+
+ <p>The following restrictions apply to character encoding declarations:
+
+ <ul>
+ <li>When <a href="#syntax">serialised</a>, the <code
+ title=attr-meta-charset><a href="#charset0">charset</a></code> attribute
+ and its value must be contained completely in the first 512 bytes of the
+ file.
+
+ <li>The attribute value must be serialised without the use of character
+ entity references of any kind.
+
+ <li>The value must be a valid character encoding name. <a
+ href="#refsIANACHARSET">[IANACHARSET]</a> <!-- XXX
+ http://www.iana.org/assignments/character-sets -->
+
+ <li>The character encoding name given must be the name of the character
+ encoding used to serialise the file.
+
+ <li>The character encoding used must be a superset of US-ASCII
+ (specifically, ANSI_X3.4-1968) for bytes in the range 0x09 - 0x0D, 0x20,
+ 0x21, 0x22, 0x26, 0x27, 0x2C - 0x3F, 0x41 - 0x5A, and 0x61 - 0x7A.</li>
+ <!-- XXX #refs RFC1345 ? -->
+ <!-- is that list ok? do
+ any character sets we want to support do things outside that range?
+ -->
+ </ul>
+
+ <p>If the encoding is one of UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, or
+ UTF-32LE, then authors can use a BOM at the start of the file to indicate
+ the character encoding.</p>
+ <!-- UTF-EBCDIC, too! -->
+
+ <p>In XHTML, the XML declaration should be used for inline character
+ encoding information, if necessary.
+
+ <h4 id=the-style><span class=secno>3.7.6. </span>The <dfn
+ id=style><code>style</code></dfn> element</h4>
+
+ <p><a href="#metadata" title="metadata elements">Metadata element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element.
+
+ <dd>At the start of <code><a href="#article">article</a></code>, <code><a
+ href="#aside">aside</a></code>, <code><a href="#div">div</a></code>, and
+ <code><a href="#section">section</a></code> elements.
+
+ <dt>Content model:
+
+ <dd>Depends on the value of the <code title=attr-style-type><a
+ href="#type1">type</a></code> attribute.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-style-media><a href="#media2">media</a></code>
+
+ <dd><code title=attr-style-type><a href="#type1">type</a></code>
+
+ <dd><code title=attr-style-scoped><a href="#scoped">scoped</a></code>
+
+ <dd>Also, the <code title=attr-style-title><a
+ href="#title3">title</a></code> attribute has special semantics on this
+ element.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlstyleelement>HTMLStyleElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute boolean <code title=dom-style-disabled><a href="#disabled0">disabled</a></code>;
+ attribute DOMString <code title=dom-style-media><a href="#media3">media</a></code>;
+ attribute DOMString <code title=dom-style-type><a href="#type2">type</a></code>;
+ attribute boolean <code title=dom-style-scoped><a href="#scoped0">scoped</a></code>;
+};</pre>
+
+ <p>The <code>LinkStyle</code> interface must also be implemented by this
+ element, the <a href="#styling0">styling processing model</a> defines
+ how. <a href="#refsCSSOM">[CSSOM]</a></p>
+ </dl>
+
+ <p>The <code><a href="#style">style</a></code> element allows authors to
+ embed style information in their documents. The <code><a
+ href="#style">style</a></code> element is one of several inputs to the <a
+ href="#styling0">styling processing model</a>.
+
+ <p>If the <dfn id=type1 title=attr-style-type><code>type</code></dfn>
+ attribute is given, it must contain a valid MIME type, optionally with
+ parameters, that designates a styling language. <a
+ href="#refsRFC2046">[RFC2046]</a> If the attribute is absent, the type
+ defaults to <code>text/css</code>. <a href="#refsRFC2318">[RFC2138]</a></p>
+ <!-- XXX this is the second time we have this paragraph here... -->
+
+ <p>When examining types to determine if they support the language, user
+ agents must not ignore unknown MIME parameters &mdash; types with unknown
+ parameters must be assumed to be unsupported.
+
+ <p>The <dfn id=media2 title=attr-style-media><code>media</code></dfn>
+ attribute says which media the styles apply to. The value must be a valid
+ media query. <a href="#refsMQ">[MQ]</a> User agents must only apply the
+ styles to <span>views</span> while their state match the listed media. <a
+ href="#refsDOM3VIEWS">[DOM3VIEWS]</a>
+
+ <p id=style-default-media>The default, if the <code
+ title=attr-style-media><a href="#media2">media</a></code> attribute is
+ omitted, is <code>all</code>, meaning that by default styles apply to all
+ media.
+
+ <p>The <dfn id=scoped title=attr-style-scoped><code>scoped</code></dfn>
+ attribute is a <a href="#boolean0">boolean attribute</a>. If the attribute
+ is present, then the user agent must only apply the specified style
+ information to the <code><a href="#style">style</a></code> element's
+ parent element (if any), and that element's child nodes. Otherwise, the
+ specified styles must, if applied, be applied to the entire document.
+
+ <p id=title-on-style>The <dfn id=title3
+ title=attr-style-title><code>title</code></dfn> attribute on <code><a
+ href="#style">style</a></code> elements defines <a
+ href="#alternative">alternative style sheet sets</a>. If the <code><a
+ href="#style">style</a></code> element has no <code
+ title=attr-style-title><a href="#title3">title</a></code> attribute, then
+ it has no title; the <code title=attr-title><a
+ href="#title">title</a></code> attribute of ancestors does not apply to
+ the <code><a href="#style">style</a></code> element.</p>
+ <!--
+ XXX xref -->
+
+ <p class=note>The <code title=attr-style-title><a
+ href="#title3">title</a></code> attribute on <code><a
+ href="#style">style</a></code> elements, like the <code
+ title=attr-link-title><a href="#title2">title</a></code> attribute on
+ <code><a href="#link">link</a></code> elements, differs from the global
+ <code title=attr-title><a href="#title">title</a></code> attribute in that
+ a <code><a href="#style">style</a></code> block without a title does not
+ inherit the title of the parent element: it merely has no title.
+
+ <p>All descendant elements must be processed, according to their semantics,
+ before the <code><a href="#style">style</a></code> element itself is
+ evaluated. For styling languages that consist of pure text, user agents
+ must evaluate <code><a href="#style">style</a></code> elements by passing
+ the concatenation of the contents of all the <a href="#text-node"
+ title="text node">text nodes</a> that are direct children of the <code><a
+ href="#style">style</a></code> element (not any other nodes such as
+ comments or elements), in <a href="#tree-order">tree order</a>, to the
+ style system. For XML-based styling languages, user agents must pass all
+ the children nodes of the <code><a href="#style">style</a></code> element
+ to the style system.
+
+ <p class=note>This specification does not specify a style system, but CSS
+ is expected to be supported by most Web browsers. <a
+ href="#refsCSS21">[CSS21]</a>
+
+ <p>The <dfn id=media3 title=dom-style-media><code>media</code></dfn>, <dfn
+ id=type2 title=dom-style-type><code>type</code></dfn> and <dfn id=scoped0
+ title=dom-style-scoped><code>scoped</code></dfn> DOM attributes must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <p>The DOM <dfn id=disabled0
+ title=dom-style-disabled><code>disabled</code></dfn> attribute behaves as
+ defined <a href="#disabled1" title=dom-linkstyle-disabled>for the
+ alternative style sheets DOM</a>.
+
+ <h4 id=styling><span class=secno>3.7.7. </span><dfn id=styling0
+ title="styling processing model">Styling</dfn></h4>
+
+ <p>The <code><a href="#link">link</a></code> and <code><a
+ href="#style">style</a></code> elements can provide styling information
+ for the user agent to use when rendering the document. The DOM Styling
+ specification specifies what styling information is to be used by the user
+ agent and how it is to be used. <a href="#refsCSSOM">[CSSOM]</a>
+
+ <p>The <code><a href="#style">style</a></code> and <code><a
+ href="#link">link</a></code> elements implement the <code>LinkStyle</code>
+ interface. <a href="#refsCSSOM">[CSSOM]</a>
+
+ <p>For <code><a href="#style">style</a></code> elements, if the user agent
+ does not support the specified styling language, then the <code
+ title=dom-LinkStyle-sheet>sheet</code> attribute of the element's
+ <code>LinkStyle</code> interface must return null. Similarly, <code><a
+ href="#link">link</a></code> elements that do not represent <a
+ href="#stylesheet" title=rel-stylesheet>external resource links that
+ contribute to the styling processing model</a> (i.e. that do not have a
+ <code title=rel-stylesheet><a href="#stylesheet">stylesheet</a></code>
+ keyword in their <code title=attr-link-rel><a href="#rel">rel</a></code>
+ attribute), and <code><a href="#link">link</a></code> elements whose
+ specified resource has not yet been downloaded, or is not in a supported
+ styling language, must have their <code>LinkStyle</code> interface's <code
+ title=dom-LinkStyle-sheet>sheet</code> attribute return null.
+
+ <p>Otherwise, the <code>LinkStyle</code> interface's <code
+ title=dom-LinkStyle-sheet>sheet</code> attribute must return a
+ <code>StyleSheet</code> object with the attributes implemented as follows:
+ <a href="#refsCSSOM">[CSSOM]</a>
+
+ <dl>
+ <dt>The content type (<code title=dom-stylesheet-type>type</code> DOM
+ attribute)
+
+ <dd>
+ <p>The content type must be the same as the style's specified type. For
+ <code><a href="#style">style</a></code> elements, this is the same as
+ the <code title=attr-style-type><a href="#type1">type</a></code> content
+ attribute's value, or <code title="">text/css</code> if that is omitted.
+ For <code><a href="#link">link</a></code> elements, this is the <a
+ href="#content-type8" title=Content-Type>Content-Type metadata of the
+ specified resource</a>.
+
+ <dt>The location (<code title=dom-stylesheet-href>href</code> DOM
+ attribute)
+
+ <dd>
+ <p>For <code><a href="#link">link</a></code> elements, the location must
+ be the URI given by the element's <code title=attr-link-href><a
+ href="#href1">href</a></code> content attribute. For <code><a
+ href="#style">style</a></code> elements, there is no location.
+
+ <dt>The intended destination media for style information (<code
+ title=dom-stylesheet-media>media</code> DOM attribute)
+
+ <dd>
+ <p>The media must be the same as the value of the element's <code
+ title="">media</code> content attribute.
+
+ <dt>The style sheet title (<code title=dom-stylesheet-title>title</code>
+ DOM attribute)
+
+ <dd>
+ <p>The title must be the same as the value of the element's <code
+ title="">title</code> content attribute. If the attribute is absent,
+ then the style sheet does not have a title. The title is used for
+ defining <dfn id=alternative>alternative style sheet sets</dfn>.
+ </dl>
+
+ <p>The <dfn id=disabled1
+ title=dom-LinkStyle-disabled><code>disabled</code></dfn> DOM attribute on
+ <code><a href="#link">link</a></code> and <code><a
+ href="#style">style</a></code> elements must return false and do nothing
+ on setting, if the <code title=dom-linkstyle-sheet>sheet</code> attribute
+ of their <code>LinkStyle</code> interface is null. Otherwise, it must
+ return the value of the <code>StyleSheet</code> interface's <code
+ title=dom-stylesheet-disabled>disabled</code> attribute on getting, and
+ forward the new value to that same attribute on setting.</p>
+ <!-- <p class="big-issue">Need more here - defining preferred
+ stylesheets, alternative stylesheets, persistent stylesheets, ordering
+ of stylesheets, dynamic additions/removals, how it maps to
+ .styleSheets, HTTP Link: headers, and the stuff about the alternative
+ stylesheet API.</p> XXX that will all be covered by Anne's spec -->
+
+ <h3 id=sections><span class=secno>3.8. </span>Sections</h3>
+
+ <p><dfn id=sectioning>Sectioning elements</dfn> are elements that divide
+ the page into, for lack of a better word, sections. This section describes
+ HTML's sectioning elements and elements that support them.
+
+ <p id=applyToSection>Some elements are scoped to their nearest ancestor
+ sectioning element. For example, <code><a
+ href="#address">address</a></code> elements apply just to their section.
+ For such elements <var title="">x</var>, the elements that apply to a
+ sectioning element <var title="">e</var> are all the <var title="">x</var>
+ elements whose nearest sectioning element is <var title="">e</var>.
+
+ <h4 id=the-body><span class=secno>3.8.1. </span>The <dfn
+ id=body0><code>body</code></dfn> element</h4>
+
+ <p><a href="#sectioning" title="sectioning elements">Sectioning
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the second element in an <code><a href="#html">html</a></code>
+ element.
+
+ <dt>Content model:
+
+ <dd>Zero or more <a href="#block-level0">block-level elements</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#body0">body</a></code> element represents the main
+ content of the document.
+
+ <p>The <code><a href="#body0">body</a></code> element potentially has a
+ heading. See the section on <a href="#headings0">headings and sections</a>
+ for further details.
+
+ <p>In conforming documents, there is only one <code><a
+ href="#body0">body</a></code> element. The <code
+ title=dom-document-body><a href="#body">document.body</a></code> DOM
+ attribute provides scripts with easy access to a document's <code><a
+ href="#body0">body</a></code> element.
+
+ <p class=note>Some DOM operations (for example, parts of the <a
+ href="#drag-and">drag and drop</a> model) are defined in terms of "<a
+ href="#the-body0">the body element</a>". This refers to a particular
+ element in the DOM, as per the definition of the term, and not any
+ arbitrary <code><a href="#body0">body</a></code> element.
+
+ <h4 id=the-section><span class=secno>3.8.2. </span>The <dfn
+ id=section><code>section</code></dfn> element</h4>
+
+ <p><a href="#sectioning" title="sectioning elements">Sectioning</a> <a
+ href="#block-level0" title="block-level elements">block-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#style">style</a></code> elements,
+ followed by zero or more <a href="#block-level0">block-level
+ elements</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#section">section</a></code> element represents a
+ generic document or application section. A section, in this context, is a
+ thematic grouping of content, typically with a header, possibly with a
+ footer.
+
+ <p class=example>Examples of sections would be chapters, the various tabbed
+ pages in a tabbed dialog box, or the numbered sections of a thesis. A Web
+ site's home page could be split into sections for an introduction, news
+ items, contact information.
+
+ <p>Each <code><a href="#section">section</a></code> element potentially has
+ a heading. See the section on <a href="#headings0">headings and
+ sections</a> for further details.
+
+ <h4 id=the-nav><span class=secno>3.8.3. </span>The <dfn
+ id=nav><code>nav</code></dfn> element</h4>
+
+ <p><a href="#sectioning" title="sectioning elements">Sectioning</a> <a
+ href="#block-level0" title="block-level elements">block-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Zero or more <a href="#block-level0">block-level elements</a>, or <a
+ href="#inline-level0">inline-level content</a> (but not both).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#nav">nav</a></code> element represents a section of
+ a page that links to other pages or to parts within the page: a section
+ with navigation links.
+
+ <p>When <a href="#determining0" title="Determining if a particular element
+ contains block-level elements or inline-level content">used as an
+ inline-level content</a> container, the element represents a <a
+ href="#paragraph">paragraph</a>.
+
+ <p>Each <code><a href="#nav">nav</a></code> element potentially has a
+ heading. See the section on <a href="#headings0">headings and sections</a>
+ for further details.
+
+ <h4 id=the-article><span class=secno>3.8.4. </span>The <dfn
+ id=article><code>article</code></dfn> element</h4>
+
+ <p><a href="#sectioning" title="sectioning elements">Sectioning</a> <a
+ href="#block-level0" title="block-level elements">block-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#style">style</a></code> elements,
+ followed by zero or more <a href="#block-level0">block-level
+ elements</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.</dd>
+ <!--
+XXX attributes to give the date authored, date published
+-->
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#article">article</a></code> element represents a
+ section of a page that consists of a composition that forms an independent
+ part of a document, page, or site. This could be a forum post, a magazine
+ or newspaper article, a Web log entry, a user-submitted comment, or any
+ other independent item of content.
+
+ <p class=note>An <code><a href="#article">article</a></code> element is
+ "independent" in that its contents could stand alone, for example in
+ syndication. However, the element is still associated with its ancestors;
+ for instance, contact information that <a
+ href="#applyToSection">applies</a> to a parent <code><a
+ href="#body0">body</a></code> element still covers the <code><a
+ href="#article">article</a></code> as well.
+
+ <p>When <code><a href="#article">article</a></code> elements are nested,
+ the inner <code><a href="#article">article</a></code> elements represent
+ articles that are in principle related to the contents of the outer
+ article. For instance, a Web log entry on a site that accepts
+ user-submitted comments could represent the comments as <code><a
+ href="#article">article</a></code> elements nested within the <code><a
+ href="#article">article</a></code> element for the Web log entry.
+
+ <p>Author information associated with an <code><a
+ href="#article">article</a></code> element (q.v. the <code><a
+ href="#address">address</a></code> element) does not apply to nested
+ <code><a href="#article">article</a></code> elements.
+
+ <p>Each <code><a href="#article">article</a></code> element potentially has
+ a heading. See the section on <a href="#headings0">headings and
+ sections</a> for further details.
+
+ <h4 id=the-blockquote><span class=secno>3.8.5. </span>The <dfn
+ id=blockquote><code>blockquote</code></dfn> element</h4>
+
+ <p><a href="#sectioning" title="sectioning elements">Sectioning</a> <a
+ href="#block-level0" title="block-level elements">block-level element</a>,
+ and <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#structured">structured inline-level elements</a> are
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Zero or more <a href="#block-level0">block-level elements</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-blockquote-cite><a href="#cite">cite</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlquoteelement>HTMLQuoteElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#cite0" title=dom-quote-cite>cite</a>;
+};</pre>
+
+ <p class=note>The <code><a
+ href="#htmlquoteelement">HTMLQuoteElement</a></code> interface is also
+ used by the <code><a href="#q">q</a></code> element.</p>
+ </dl>
+
+ <p>The <code><a href="#blockquote">blockquote</a></code> element represents
+ a section that is quoted from another source.
+
+ <p>Content inside a <code><a href="#blockquote">blockquote</a></code> must
+ be quoted from another source, whose URI, if it has one, should be cited
+ in the <dfn id=cite title=attr-blockquote-cite><code>cite</code></dfn>
+ attribute.
+
+ <p>If the <code title=attr-blockquote-cite><a href="#cite">cite</a></code>
+ attribute is present, it must be a URI (or IRI). User agents should allow
+ users to follow such citation links.
+
+ <p>If a <code><a href="#blockquote">blockquote</a></code> element is <a
+ href="#preceeded">preceeded or followed</a> by a <code><a
+ href="#p">p</a></code> element that contains a single <code><a
+ href="#cite2">cite</a></code> element and is itself not <a
+ href="#preceeded">preceeded or followed</a> by another <code><a
+ href="#blockquote">blockquote</a></code> element and does not itself have
+ a <code><a href="#q">q</a></code> element descendant, then, the citation
+ given by that <code><a href="#cite2">cite</a></code> element gives the
+ source of the quotation contained in the <code><a
+ href="#blockquote">blockquote</a></code> element.
+
+ <p>Each <code><a href="#blockquote">blockquote</a></code> element
+ potentially has a heading. See the section on <a
+ href="#headings0">headings and sections</a> for further details.
+
+ <p>The <dfn id=cite0 title=dom-quote-cite><code>cite</code></dfn> DOM
+ attribute <code>reflects</code> the element's <code title="">cite</code>
+ content attribte.
+
+ <p class=note>The best way to represent a conversation is not with the
+ <code><a href="#cite2">cite</a></code> and <code><a
+ href="#blockquote">blockquote</a></code> elements, but with the <code><a
+ href="#dialog">dialog</a></code> element.
+
+ <h4 id=the-aside><span class=secno>3.8.6. </span>The <dfn
+ id=aside><code>aside</code></dfn> element</h4>
+
+ <p><a href="#sectioning" title="sectioning elements">Sectioning</a> <a
+ href="#block-level0" title="block-level elements">block-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#style">style</a></code> elements,
+ followed by either zero or more <a href="#block-level0">block-level
+ elements</a>, or <a href="#inline-level0">inline-level content</a> (but
+ not both).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#aside">aside</a></code> element represents a section
+ of a page that consists of content that is tangentially related to the
+ content around the <code><a href="#aside">aside</a></code> element, and
+ which could be considered separate from that content. Such sections are
+ often represented as sidebars in printed typography.
+
+ <p>When <a href="#determining0" title="Determining if a particular element
+ contains block-level elements or inline-level content">used as an
+ inline-level content</a> container, the element represents a <a
+ href="#paragraph">paragraph</a>.
+
+ <p>Each <code><a href="#aside">aside</a></code> element potentially has a
+ heading. See the section on <a href="#headings0">headings and sections</a>
+ for further details.
+
+ <h4 id=the-h1><span class=secno>3.8.7. </span>The <dfn
+ id=h1><code>h1</code></dfn>, <dfn id=h2><code>h2</code></dfn>, <dfn
+ id=h3><code>h3</code></dfn>, <dfn id=h4><code>h4</code></dfn>, <dfn
+ id=h5><code>h5</code></dfn>, and <dfn id=h6><code>h6</code></dfn> elements</h4>
+
+ <p><a href="#block-level0">Block-level elements</a>.
+
+ <dl class=element>
+ <dt>Contexts in which these elements may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd><a href="#significant" title="significant inline
+ content">Significant</a> <a href="#strictly">strictly inline-level
+ content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>These elements define headers for their sections.
+
+ <p>The semantics and meaning of these elements are defined in the section
+ on <a href="#headings0">headings and sections</a>.
+
+ <p>These elements have a <dfn id=rank>rank</dfn> given by the number in
+ their name. The <code><a href="#h1">h1</a></code> element is said to have
+ the highest rank, the <code><a href="#h6">h6</a></code> element has the
+ lowest rank, and two elements with the same name have equal rank.
+
+ <p>These elements must not be <a href="#significant" title="significant
+ inline content">empty</a>.
+
+ <h4 id=the-header><span class=secno>3.8.8. </span>The <dfn
+ id=header><code>header</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected
+ and there are no <code><a href="#header">header</a></code> ancestors.
+
+ <dt>Content model:
+
+ <dd>Zero or more <a href="#block-level0">block-level elements</a>,
+ including at least one descendant <code><a href="#h1">h1</a></code>,
+ <code><a href="#h2">h2</a></code>, <code><a href="#h3">h3</a></code>,
+ <code><a href="#h4">h4</a></code>, <code><a href="#h5">h5</a></code>, or
+ <code><a href="#h6">h6</a></code> element, but no <span>sectioning
+ element</span> descendants, no <code><a href="#header">header</a></code>
+ element descendants, and no <code><a href="#footer">footer</a></code>
+ element descendants.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#header">header</a></code> element represents the
+ header of a section. Headers may contain more than just the section's
+ heading &mdash; for example it would be reasonable for the header to
+ include version history information.
+
+ <p><code><a href="#header">header</a></code> elements must not contain any
+ <code><a href="#header">header</a></code> elements, <code><a
+ href="#footer">footer</a></code> elements, or any sectioning elements
+ (such as <code><a href="#section">section</a></code>) as descendants.
+
+ <p><code><a href="#header">header</a></code> elements must have at least
+ one <code><a href="#h1">h1</a></code>, <code><a href="#h2">h2</a></code>,
+ <code><a href="#h3">h3</a></code>, <code><a href="#h4">h4</a></code>,
+ <code><a href="#h5">h5</a></code>, or <code><a href="#h6">h6</a></code>
+ element as a descendant.
+
+ <p>For the purposes of document summaries, outlines, and the like, <code><a
+ href="#header">header</a></code> elements are equivalent to the highest <a
+ href="#rank" title=rank>ranked</a> <code><a
+ href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code> element
+ descendant (the first such element if there are multiple elements with
+ that <a href="#rank">rank</a>).
+
+ <p>Other heading elements indicate subheadings or subtitles.
+
+ <div class=example>
+ <p>Here are some examples of valid headers. In each case, the emphasised
+ text represents the text that would be used as the header in an
+ application extracting header data and ignoring subheadings.</p>
+
+ <pre>&lt;header&gt;
+ &lt;h1&gt;<strong>The reality dysfunction</strong>&lt;/h1&gt;
+ &lt;h2&gt;Space is not the only void&lt;/h2&gt;
+&lt;/header&gt;</pre>
+
+ <pre>&lt;header&gt;
+ &lt;p&gt;Welcome to...&lt;/p&gt;
+ &lt;h1&gt;<strong>Voidwars!</strong>&lt;/h1&gt;
+&lt;/header&gt;</pre>
+
+ <pre>&lt;header&gt;
+ &lt;h1&gt;<strong>Scalable Vector Graphics (SVG) 1.2</strong>&lt;/h1&gt;
+ &lt;h2&gt;W3C Working Draft 27 October 2004&lt;/h2&gt;
+ &lt;dl&gt;
+ &lt;dt&gt;This version:&lt;/dt&gt;
+ &lt;dd&gt;&lt;a href="http://www.w3.org/TR/2004/WD-SVG12-20041027/"&gt;http://www.w3.org/TR/2004/WD-SVG12-20041027/&lt;/a&gt;&lt;/dd&gt;
+ &lt;dt&gt;Previous version:&lt;/dt&gt;
+ &lt;dd&gt;&lt;a href="http://www.w3.org/TR/2004/WD-SVG12-20040510/"&gt;http://www.w3.org/TR/2004/WD-SVG12-20040510/&lt;/a&gt;&lt;/dd&gt;
+ &lt;dt&gt;Latest version of SVG 1.2:&lt;/dt&gt;
+ &lt;dd&gt;&lt;a href="http://www.w3.org/TR/SVG12/"&gt;http://www.w3.org/TR/SVG12/&lt;/a&gt;&lt;/dd&gt;
+ &lt;dt&gt;Latest SVG Recommendation:&lt;/dt&gt;
+ &lt;dd&gt;&lt;a href="http://www.w3.org/TR/SVG/"&gt;http://www.w3.org/TR/SVG/&lt;/a&gt;&lt;/dd&gt;
+ &lt;dt&gt;Editor:&lt;/dt&gt;
+ &lt;dd&gt;Dean Jackson, W3C, &lt;a href="mailto:dean@w3.org"&gt;dean@w3.org&lt;/a&gt;&lt;/dd&gt;
+ &lt;dt&gt;Authors:&lt;/dt&gt;
+ &lt;dd&gt;See &lt;a href="#authors"&gt;Author List&lt;/a&gt;&lt;/dd&gt;
+ &lt;/dl&gt;
+ &lt;p class="copyright"&gt;&lt;a href="http://www.w3.org/Consortium/Legal/ipr-notic <em>...</em>
+&lt;/header&gt;</pre>
+ </div>
+
+ <p>The section on <a href="#headings0">headings and sections</a> defines
+ how <code><a href="#header">header</a></code> elements are assigned to
+ individual sections.
+
+ <p>The <a href="#rank">rank</a> of a <code><a
+ href="#header">header</a></code> element is the same as for an <code><a
+ href="#h1">h1</a></code> element (the highest rank).
+
+ <h4 id=the-footer><span class=secno>3.8.9. </span>The <dfn
+ id=footer><code>footer</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Either zero or more <a href="#block-level0">block-level elements</a>,
+ but with no <code><a href="#h1">h1</a></code>, <code><a
+ href="#h2">h2</a></code>, <code><a href="#h3">h3</a></code>, <code><a
+ href="#h4">h4</a></code>, <code><a href="#h5">h5</a></code>, <code><a
+ href="#h6">h6</a></code>, <code><a href="#header">header</a></code>, or
+ <code><a href="#footer">footer</a></code> elements as descendants, and
+ with no <a href="#sectioning" title="sectioning elements">sectioning
+ elements</a> as descendants; or, <a href="#inline-level0">inline-level
+ content</a> (but not both).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#footer">footer</a></code> element represents the
+ footer for the section it <a href="#applyToSection">applies</a> to. A
+ footer typically contains information about its section such as who wrote
+ it, links to related documents, copyright data, and the like.
+
+ <p><code><a href="#footer">footer</a></code> elements must not contain any
+ <code><a href="#footer">footer</a></code>, <code><a
+ href="#header">header</a></code>, <code><a href="#h1">h1</a></code>,
+ <code><a href="#h2">h2</a></code>, <code><a href="#h3">h3</a></code>,
+ <code><a href="#h4">h4</a></code>, <code><a href="#h5">h5</a></code>, or
+ <code><a href="#h6">h6</a></code> elements, or any of the sectioning
+ elements (such as <code><a href="#section">section</a></code>), as
+ descendants.
+
+ <p>When <a href="#determining0" title="Determining if a particular element
+ contains block-level elements or inline-level content">used as an
+ inline-level content</a> container, the element represents a <a
+ href="#paragraph">paragraph</a>.
+
+ <p>Contact information for the section given in a <code><a
+ href="#footer">footer</a></code> should be marked up using the <code><a
+ href="#address">address</a></code> element.</p>
+ <!-- XXX examples needed -->
+
+ <h4 id=the-address><span class=secno>3.8.10. </span>The <dfn
+ id=address><code>address</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd><a href="#inline-level0">Inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#address">address</a></code> element represents a <a
+ href="#paragraph">paragraph</a> of contact information for the section it
+ <a href="#applyToSection">applies</a> to.
+
+ <div class=example>
+ <p>For example, a page at the W3C Web site related to HTML might include
+ the following contact information:</p>
+
+ <pre>&lt;ADDRESS>
+ &lt;A href="../People/Raggett/">Dave Raggett&lt;/A>,
+ &lt;A href="../People/Arnaud/">Arnaud Le Hors&lt;/A>,
+ contact persons for the &lt;A href="Activity">W3C HTML Activity&lt;/A>
+&lt;/ADDRESS></pre>
+ </div>
+
+ <p>The <code><a href="#address">address</a></code> element must not be used
+ to represent arbitrary addresses (e.g. postal addresses), unless those
+ addresses are contact information for the section. (The <code><a
+ href="#p">p</a></code> element is the appropriate element for marking up
+ such addresses.)
+
+ <p>The <code><a href="#address">address</a></code> element must not contain
+ information other than contact information.
+
+ <div class=example>
+ <p>For example, the following is non-conforming use of the <code><a
+ href="#address">address</a></code> element:</p>
+
+ <pre>&lt;ADDRESS>Last Modified: 1999/12/24 23:37:50&lt;/ADDRESS></pre>
+ </div>
+
+ <p>Typically, the <code><a href="#address">address</a></code> element would
+ be included with other information in a <code><a
+ href="#footer">footer</a></code> element.
+
+ <p>To determine the contact information for a sectioning element (such as a
+ document's <code><a href="#body0">body</a></code> element, which would
+ give the contact information for the page), UAs must collect all the
+ <code><a href="#address">address</a></code> elements that <a
+ href="#applyToSection">apply</a> to that sectioning element and its
+ ancestor sectioning elements. The contact information is the collection of
+ all the information given by those elements.
+
+ <p class=note>Contact information for one sectioning element, e.g. a
+ <code><a href="#aside">aside</a></code> element, does not apply to its
+ ancestor elements, e.g. the page's <code><a href="#body0">body</a></code>.
+
+ <h4 id=headings><span class=secno>3.8.11. </span><dfn id=headings0>Headings
+ and sections</dfn></h4>
+
+ <p>The <code><a href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code>
+ elements and the <code><a href="#header">header</a></code> element are
+ headings.
+
+ <p>The first heading in a sectioning element gives the header for that
+ section. Subsequent headers of equal or higher <a href="#rank">rank</a>
+ start new (implied) sections, headers of lower <a href="#rank">rank</a>
+ start subsections that are part of the previous one.
+
+ <p>Sectioning elements other than <code><a
+ href="#blockquote">blockquote</a></code> are always considered subsections
+ of their nearest ancestor sectioning element, regardless of what implied
+ sections other headings may have created. However, <code><a
+ href="#blockquote">blockquote</a></code> elements <em>are</em> associated
+ with implied sections. Effectively, <code><a
+ href="#blockquote">blockquote</a></code> elements act like sections on the
+ inside, and act opaquely on the outside.
+
+ <div class=example>
+ <p>For the following fragment:</p>
+
+ <pre>&lt;body&gt;
+ &lt;h1&gt;Foo&lt;/h1&gt;
+ &lt;h2&gt;Bar&lt;/h2&gt;
+ &lt;blockquote&gt;
+ &lt;h3&gt;Bla&lt;/h3&gt;
+ &lt;/blockquote&gt;
+ &lt;p&gt;Baz&lt;/p&gt;
+ &lt;h2&gt;Quux&lt;/h2&gt;
+ &lt;section&gt;
+ &lt;h3&gt;Thud&lt;/h3&gt;
+ &lt;/section&gt;
+ &lt;p&gt;Grunt&lt;/p&gt;
+&lt;/body&gt;</pre>
+
+ <p>...the structure would be:</p>
+
+ <ol>
+ <li> Foo (heading of explicit <code><a href="#body0">body</a></code>
+ section)
+ <ol>
+ <li> Bar (heading starting implied section)
+ <ol>
+ <li> Bla (heading of explicit <code><a
+ href="#blockquote">blockquote</a></code> section)
+ </ol>
+ Baz (paragraph)
+
+ <li> Quux (heading starting implied section)
+
+ <li> Thud (heading of explicit <code><a
+ href="#section">section</a></code> section)
+ </ol>
+ Grunt (paragraph)
+ </ol>
+
+ <p>Notice how the <code><a href="#blockquote">blockquote</a></code> nests
+ inside an implicit section while the <code><a
+ href="#section">section</a></code> does not (and in fact, ends the
+ earlier implicit section so that a later paragraph is back at the top
+ level).</p>
+ </div>
+
+ <p>Sections may contain headers of any <a href="#rank">rank</a>, but
+ authors are strongly encouraged to either use only <code><a
+ href="#h1">h1</a></code> elements, or to use elements of the appropriate
+ <a href="#rank">rank</a> for the section's nesting level.
+
+ <p>Authors are also encouraged to explictly wrap sections in sectioning
+ elements, instead of relying on the implicit sections generated by having
+ multiple heading in one sectioning element.
+
+ <div class=example>
+ <p>For example, the following is correct:</p>
+
+ <pre>&lt;body&gt;
+ &lt;h4&gt;Apples&lt;/h4&gt;
+ &lt;p&gt;Apples are fruit.&lt;/p&gt;
+ &lt;section&gt;
+ &lt;h2&gt;Taste&lt;/h2&gt;
+ &lt;p&gt;They taste lovely.&lt;/p&gt;
+ &lt;h6&gt;Sweet&lt;/h6&gt;
+ &lt;p&gt;Red apples are sweeter than green ones.&lt;/p&gt;
+ &lt;h1&gt;Color&lt;/h1&gt;
+ &lt;p&gt;Apples come in various colors.&lt;/p&gt;
+ &lt;/section&gt;
+&lt;/body&gt;</pre>
+
+ <p>However, the same document would be more clearly expressed as:</p>
+
+ <pre>&lt;body&gt;
+ &lt;h1&gt;Apples&lt;/h1&gt;
+ &lt;p&gt;Apples are fruit.&lt;/p&gt;
+ &lt;section&gt;
+ &lt;h2&gt;Taste&lt;/h2&gt;
+ &lt;p&gt;They taste lovely.&lt;/p&gt;
+ &lt;section&gt;
+ &lt;h3&gt;Sweet&lt;/h3&gt;
+ &lt;p&gt;Red apples are sweeter than green ones.&lt;/p&gt;
+ &lt;/section&gt;
+ &lt;/section&gt;
+ &lt;section&gt;
+ &lt;h2&gt;Color&lt;/h2&gt;
+ &lt;p&gt;Apples come in various colors.&lt;/p&gt;
+ &lt;/section&gt;
+&lt;/body&gt;</pre>
+
+ <p>Both of the documents above are semantically identical and would
+ produce the same outline in compliant user agents.</p>
+ </div>
+
+ <h5 id=outlines><span class=secno>3.8.11.1. </span>Creating an outline</h5>
+
+ <p>Documents can be viewed as a tree of sections, which defines how each
+ element in the tree is semantically related to the others, in terms of the
+ overall section structure. This tree is related to the document tree, but
+ there is not a one-to-one relationship between elements in the DOM and the
+ document's sections.
+
+ <p>The tree of sections should be used when generating document outlines,
+ for example when generating tables of contents.
+
+ <p>To derive the tree of sections from the document tree, a hypothetical
+ tree is used, consisting of a view of the document tree containing only
+ the <code><a href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code>
+ and <code><a href="#header">header</a></code> elements, and the sectioning
+ elements other than <code><a href="#blockquote">blockquote</a></code>.
+ Descendants of <code><a href="#h1">h1</a></code>-<code><a
+ href="#h6">h6</a></code>, <code><a href="#header">header</a></code>, and
+ <code><a href="#blockquote">blockquote</a></code> elements must be removed
+ from this view.
+
+ <p>The hypothetical tree must be rooted at the <a href="#root-element">root
+ element</a> or at a sectioning element. In particular, while the sections
+ inside <code><a href="#blockquote">blockquote</a></code>s do not
+ contribute to the document's tree of sections, <code><a
+ href="#blockquote">blockquote</a></code>s can have outlines of their own.
+
+ <p>UAs must take this hypothetical tree (which will become the outline) and
+ mutate it by walking it depth first in <a href="#tree-order">tree
+ order</a> and, for each <code><a href="#h1">h1</a></code>-<code><a
+ href="#h6">h6</a></code> or <code><a href="#header">header</a></code>
+ element that is not the first element of its parent sectioning element,
+ inserting a new sectioning element, as follows:
+
+ <dl class=switch>
+ <dt>If the element is a <code><a href="#header">header</a></code> element,
+ or if it is an <code><a href="#h1">h1</a></code>-<code><a
+ href="#h6">h6</a></code> node of <a href="#rank">rank</a> equal to or
+ higher than the first element in the parent sectioning element (assuming
+ that is also an <code><a href="#h1">h1</a></code>-<code><a
+ href="#h6">h6</a></code> node), or if the first element of the parent
+ sectioning element is a sectioning element:
+
+ <dd>Insert the new sectioning element as the immediately following sibling
+ of the parent sectioning element, and move all the elements from the
+ current heading element up to the end of the parent sectioning element
+ into the new sectioning element.
+
+ <dt>Otherwise:
+
+ <dd>Move the current heading element, and all subsequent siblings up to
+ but excluding the next sectioning element, <code><a
+ href="#header">header</a></code> element, or <code><a
+ href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code> of equal or
+ higher <a href="#rank">rank</a>, whichever comes first, into the new
+ sectioning element, then insert the new sectioning element where the
+ current header was.
+ </dl>
+
+ <p>The outline is then the resulting hypothetical tree. The <a href="#rank"
+ title=rank>ranks</a> of the headers become irrelevant at this point: each
+ sectioning element in the hypothetical tree contains either no or one
+ heading element child. If there is one, then it gives the section's
+ heading, of there isn't, the section has no heading.
+
+ <p>Sections are nested as in the hypothetical tree. If a sectioning element
+ is a child of another, that means it is a subsection of that other
+ section.
+
+ <p>When creating an interactive table of contents, entries should jump the
+ user to the relevant section element, if it was a real element in the
+ original document, or to the heading, if the section element was one of
+ those created during the above process.
+
+ <p class=example>Selecting the first section of the document therefore
+ always takes the user to the top of the document, regardless of where the
+ first header in the <code><a href="#body0">body</a></code> is to be found.</p>
+ <!-- XXX assuming there is a body, anyway -->
+
+ <div class=note>
+ <p>The hypothetical tree (before mutations) could be generated by creating
+ a <code>TreeWalker</code> with the following <a
+ href="http://www.w3.org/TR/DOM-Level-2-Traversal-Range/traversal.html#Traversal-NodeFilter"><code>NodeFilter</code></a>
+ (described here as an anonymous ECMAScript function). <a
+ href="#refsDOMTR">[DOMTR]</a> <a href="#refsECMA262">[ECMA262]</a></p>
+
+ <pre>function (n) {
+ // This implementation only knows about HTML elements.
+ // An implementation that supports other languages might be
+ // different.
+
+ // Reject anything that isn't an element.
+ if (n.nodeType != Node.ELEMENT_NODE)
+ return NodeFilter.FILTER_REJECT;
+
+ // Skip any descendants of headings.
+ if ((n.parentNode &amp;&amp; n.parentNode.namespaceURI == 'http://www.w3.org/1999/xhtml') &amp;&amp;
+ (n.parentNode.localName == 'h1' || n.parentNode.localName == 'h2' ||
+ n.parentNode.localName == 'h3' || n.parentNode.localName == 'h4' ||
+ n.parentNode.localName == 'h5' || n.parentNode.localName == 'h6' ||
+ n.parentNode.localName == 'header'))
+ return NodeFilter.FILTER_REJECT;
+
+ // Skip any blockquotes.
+ if ((n.namespaceURI == 'http://www.w3.org/1999/xhtml') &amp;&amp;
+ (n.localName == 'blockquote'))
+ return NodeFilter.FILTER_REJECT;
+
+ // Accept HTML elements in the list given in the prose above.
+ if ((n.namespaceURI == 'http://www.w3.org/1999/xhtml') &amp;&amp;
+ (n.localName == 'body' || /*n.localName == 'blockquote' ||*/
+ n.localName == 'section' || n.localName == 'nav' ||
+ n.localName == 'article' || n.localName == 'aside' ||
+ n.localName == 'h1' || n.localName == 'h2' ||
+ n.localName == 'h3' || n.localName == 'h4' ||
+ n.localName == 'h5' || n.localName == 'h6' ||
+ n.localName == 'header'))
+ return NodeFilter.FILTER_ACCEPT;
+
+ // Skip the rest.
+ return NodeFilter.FILTER_SKIP;
+}</pre>
+ </div>
+
+ <h5 id=associatedSection><span class=secno>3.8.11.2. </span>Determining
+ which heading and section applies to a particular node</h5>
+
+ <p>Given a particular node, user agents must use the following algorithm,
+ <em>in the given order</em>, to determine which heading and section the
+ node is most closely associated with. The processing of this algorithm
+ must stop as soon as the associated section and heading are established
+ (even if they are established to be nothing).
+
+ <ol>
+ <li>If the node has an ancestor that is a <code><a
+ href="#header">header</a></code> element, then the associated heading is
+ the most distant such ancestor. The associated section is that <code><a
+ href="#header">header</a></code>'s associated section (i.e. repeat this
+ algorithm for that <code><a href="#header">header</a></code>).
+
+ <li>If the node has an ancestor that is an <code><a
+ href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code> element, then
+ the associated heading is the most distant such ancestor. The associated
+ section is that heading's section (i.e. repeat this algorithm for that
+ heading element).
+
+ <li>If the node is an <code><a href="#h1">h1</a></code>-<code><a
+ href="#h6">h6</a></code> element or a <code><a
+ href="#header">header</a></code> element, then the associated heading is
+ the element itself. The UA must then generate the <a
+ href="#outlines">hypothetical section tree</a> described in the previous
+ section, rooted at the nearest section ancestor (or the <a
+ href="#root-element">root element</a> if there is no such ancestor). If
+ the parent of the heading in that hypothetical tree is an element in the
+ real document tree, then that element is the associated section.
+ Otherwise, there is no associated section element.
+
+ <li>If the node is a sectioning element, then the associated section is
+ itself. The UA must then generate the <a href="#outlines">hypothetical
+ section tree</a> described in the previous section, rooted at the section
+ itself. If the section element, in that hypothetical tree, has a child
+ element that is an <code><a href="#h1">h1</a></code>-<code><a
+ href="#h6">h6</a></code> element or a <code><a
+ href="#header">header</a></code> element, then that element is the
+ associated heading. Otherwise, there is no associated heading element.
+
+ <li>If the node is a <code><a href="#footer">footer</a></code> or <code><a
+ href="#address">address</a></code> element, then the associated section
+ is the nearest ancestor sectioning element, if there is one. The node's
+ associated heading is the same as that sectioning element's associated
+ heading (i.e. repeat this algorithm for that sectioning element). If
+ there is no ancestor sectioning element, the element has no associated
+ section nor an associated heading.
+
+ <li>Otherwise, the node is just a normal node, and the document has to be
+ examined more closely to determine its section and heading. Create a view
+ rooted at the nearest ancestor sectioning element (or the <a
+ href="#root-element">root element</a> if there is none) that has just
+ <code><a href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code>
+ elements, <code><a href="#header">header</a></code> elements, the node
+ itself, and sectioning elements other than <code><a
+ href="#blockquote">blockquote</a></code> elements. (Descendants of any of
+ the nodes in this view can be ignored, as can any node later in the tree
+ than the node in question, as the algorithm below merely walks backwards
+ up this view.)
+
+ <li>Let <var title="">n</var> be an iterator for this view, initialised at
+ the node in question.
+
+ <li>Let <var title="">c</var> be the current best candidate heading,
+ initially null, and initially not used. It is used when top-level heading
+ candidates are to be searched for (see below).
+
+ <li>Repeat these steps (which effectively goes backwards through the
+ node's previous siblings) until an answer is found:
+ <ol>
+ <li>If <var title="">n</var> points to a node with no previous sibling,
+ and <var title="">c</var> is null, then return the node's parent node
+ as the answer. If the node has no parent node, return null as the
+ answer.
+
+ <li>Otherwise, if <var title="">n</var> points to a node with no
+ previous sibling, return <var title="">c</var> as the answer.
+
+ <li>Adjust <var title="">n</var> so that it points to the previous
+ sibling of the current position.
+
+ <li>If <var title="">n</var> is pointing at an <code><a
+ href="#h1">h1</a></code> or <code><a href="#header">header</a></code>
+ element, then return that element as the answer.
+
+ <li>If <var title="">n</var> is pointing at an <code><a
+ href="#h2">h2</a></code>-<code><a href="#h6">h6</a></code> element, and
+ heading candidates are not being searched for, then return that element
+ as the answer.
+
+ <li>Otherwise, if <var title="">n</var> is pointing at an <code><a
+ href="#h2">h2</a></code>-<code><a href="#h6">h6</a></code> element, and
+ either <var title="">c</var> is still null, or <var title="">c</var> is
+ a heading of lower <a href="#rank">rank</a> than this one, then set
+ <var title="">c</var> to be this element, and continue going backwards
+ through the previous siblings.
+
+ <li>If <var title="">n</var> is pointing at a sectioning element, then
+ from this point on top-level heading candidates are being searched for.
+ (Specifically, we are looking for the nearest top-level header for the
+ current section.) Continue going backwards through the previous
+ siblings.
+ </ol>
+
+ <li>If the answer from the previous step (the loop) is null, which can
+ only happen if the node has no preceeding headings and is not contained
+ in a sectioning element, then there is no associated heading and no
+ associated section.
+
+ <li>Otherwise, if the answer from the earlier loop step is a sectioning
+ element, then the associated section is that element and the associated
+ heading is that sectioning element's associated heading (i.e. repeat this
+ algorithm for that section).
+
+ <li>Otherwise, if the answer from that same earlier step is an <code><a
+ href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code> element or a
+ <code><a href="#header">header</a></code> element, then the associated
+ heading is that element and the associated section is that heading
+ element's associated section (i.e. repeat this algorithm for that
+ heading).
+ </ol>
+
+ <p class=note>Not all nodes have an associated header or section. For
+ example, if a section is implied, as when multiple headers are found in
+ one sectioning element, then a node in that section has an anonymous
+ associated section (its section is not represented by a real element), and
+ the algorithm above does not associate that node with any particular
+ sectioning element.
+
+ <div class=example>
+ <p>For the following fragment:</p>
+
+ <pre>&lt;body&gt;
+ &lt;h1&gt;X&lt;/h1&gt;
+ &lt;h2&gt;X&lt;/h2&gt;
+ &lt;blockquote&gt;
+ &lt;h3&gt;X&lt;/h3&gt;
+ &lt;/blockquote&gt;
+ &lt;p id="a"&gt;X&lt;/p&gt;
+ &lt;h4&gt;Text Node A&lt;/h4&gt;
+ &lt;section&gt;
+ &lt;h5&gt;X&lt;/h5&gt;
+ &lt;/section&gt;
+ &lt;p&gt;Text Node B&lt;/p&gt;
+&lt;/body&gt;</pre>
+
+ <p>The associations are as follows (not all associations are shown):</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Node
+
+ <th>Associated heading
+
+ <th>Associated section
+
+ <tbody>
+ <tr>
+ <td><code>&lt;body&gt;</code>
+
+ <td><code>&lt;h1&gt;</code>
+
+ <td><code>&lt;body&gt;</code>
+
+ <tr>
+ <td><code>&lt;h1&gt;</code>
+
+ <td><code>&lt;h1&gt;</code>
+
+ <td><code>&lt;body&gt;</code>
+
+ <tr>
+ <td><code>&lt;h2&gt;</code>
+
+ <td><code>&lt;h2&gt;</code>
+
+ <td>None.
+
+ <tr>
+ <td><code>&lt;blockquote&gt;</code>
+
+ <td><code>&lt;h2&gt;</code>
+
+ <td>None.
+
+ <tr>
+ <td><code>&lt;h3&gt;</code>
+
+ <td><code>&lt;h3&gt;</code>
+
+ <td><code>&lt;blockquote&gt;</code>
+
+ <tr>
+ <td><code>&lt;p id="a"&gt;</code>
+
+ <td><code>&lt;h2&gt;</code>
+
+ <td>None.
+
+ <tr>
+ <td><code>Text Node A</code>
+
+ <td><code>&lt;h4&gt;</code>
+
+ <td>None.
+
+ <tr>
+ <td><code>Text Node B</code>
+
+ <td><code>&lt;h1&gt;</code>
+
+ <td><code>&lt;body&gt;</code>
+ </table>
+ </div>
+
+ <h5 id=distinguishing><span class=secno>3.8.11.3. </span>Distinguishing
+ site-wide headers from page headers</h5>
+
+ <p>Given the <a href="#outlines">hypothetical section tree</a>, but
+ ignoring any sections created for <code><a href="#nav">nav</a></code> and
+ <code><a href="#aside">aside</a></code> elements, and any of their
+ descendants, if the root of the tree is <a href="#the-body0">the
+ <code>body</code> element</a>'s section, and it has only a single
+ subsection which is created by an <code><a
+ href="#article">article</a></code> element, then the header of <a
+ href="#the-body0">the <code>body</code> element</a> should be assumed to
+ be a site-wide header, and the header of the <code><a
+ href="#article">article</a></code> element should be assumed to be the
+ page's header.
+
+ <p>If a page starts with a heading that is common to the whole site, the
+ document must be authored such that, in the document's <a
+ href="#outlines">hypothetical section tree</a>, ignoring any sections
+ created for <code><a href="#nav">nav</a></code> and <code><a
+ href="#aside">aside</a></code> elements and any of their descendants, the
+ root of the tree is <a href="#the-body0">the <code>body</code>
+ element</a>'s section, its heading is the site-wide heading, <a
+ href="#the-body0">the <code>body</code> element</a> has just one
+ subsection, that subsection is created by an <code><a
+ href="#article">article</a></code> element, and that <code><a
+ href="#article">article</a></code>'s header is the page heading.
+
+ <p>If a page does not contain a site-wide heading, then the page must be
+ authored such that, in the document's <a href="#outlines">hypothetical
+ section tree</a>, ignoring any sections created for <code><a
+ href="#nav">nav</a></code> and <code><a href="#aside">aside</a></code>
+ elements and any of their descendants, either <a href="#the-body0">the
+ <code>body</code> element</a> has no subsections, or it has more than one
+ subsection, or it has a single subsection but that subsection is not
+ created by an <code><a href="#article">article</a></code> element.
+
+ <p class=note>Conceptually, a site is thus a document with many articles
+ &mdash; when those articles are split into many pages, the heading of the
+ original single page becomes the heading of the site, repeated on every
+ page.
+
+ <h3 id=prose><span class=secno>3.9. </span>Prose</h3>
+
+ <h4 id=the-p><span class=secno>3.9.1. </span>The <dfn
+ id=p><code>p</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd><a href="#significant" title="significant inline
+ content">Significant</a> <a href="#inline-level0">inline-level
+ content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#p">p</a></code> element represents a <a
+ href="#paragraph">paragraph</a>.
+
+ <p><code><a href="#p">p</a></code> elements can contain a mixture of <a
+ href="#strictly">strictly inline-level content</a>, such as text, images,
+ hyperlinks, etc, and <a href="#structured">structured inline-level
+ elements</a>, such as lists, tables, and block quotes. <code><a
+ href="#p">p</a></code> elements must not be <a href="#significant"
+ title="significant inline content">empty</a>.
+
+ <div class=example>
+ <p>The following examples are conforming HTML fragments:</p>
+
+ <pre>&lt;p&gt;The little kitten gently seated himself on a piece of
+carpet. Later in his life, this would be referred to as the time the
+cat sat on the mat.&lt;/p&gt;</pre>
+
+ <pre>&lt;fieldset&gt;
+ &lt;legend&gt;Personal information&lt;/legend&gt;
+ &lt;p&gt;
+ &lt;label&gt;Name: &lt;input name="n"&gt;&lt;/label&gt;
+ &lt;label&gt;&lt;input name="anon" type="checkbox"&gt; Hide from other users&lt;/label&gt;
+ &lt;/p&gt;
+ &lt;p&gt;&lt;label&gt;Address: &lt;textarea name="a"&gt;&lt;/textarea&gt;&lt;/label&gt;&lt;/p&gt;
+&lt;/fieldset&gt;</pre>
+
+ <pre>&lt;p&gt;There was once an example from Femley,&lt;br&gt;
+Whose markup was of dubious quality.&lt;br&gt;
+The validator complained,&lt;br&gt;
+So the author was pained,&lt;br&gt;
+To move the error from the markup to the rhyming.&lt;/p&gt;</pre>
+ </div>
+
+ <p>The <code><a href="#p">p</a></code> element should not be used when a
+ more specific element is more appropriate.
+
+ <div class=example>
+ <p>The following example is technically correct:</p>
+
+ <pre>&lt;section&gt;
+ &lt;!-- ... --&gt;
+ &lt;p&gt;Last modified: 2001-04-23&lt;/p&gt;
+ &lt;p&gt;Author: fred@example.com&lt;/p&gt;
+&lt;/section&gt;</pre>
+
+ <p>However, it would be better marked-up as:</p>
+
+ <pre>&lt;section&gt;
+ &lt;!-- ... --&gt;
+ &lt;footer&gt;Last modified: 2001-04-23&lt;/footer&gt;
+ &lt;address&gt;Author: fred@example.com&lt;/address&gt;
+&lt;/section&gt;</pre>
+
+ <p>Or:</p>
+
+ <pre>&lt;section&gt;
+ &lt;!-- ... --&gt;
+ &lt;footer&gt;
+ &lt;p&gt;Last modified: 2001-04-23&lt;/p&gt;
+ &lt;address&gt;Author: fred@example.com&lt;/address&gt;
+ &lt;/footer&gt;
+&lt;/section&gt;</pre>
+ </div>
+
+ <h4 id=the-hr><span class=secno>3.9.2. </span>The <dfn
+ id=hr><code>hr</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#hr">hr</a></code> element represents a <a
+ href="#paragraph">paragraph</a>-level thematic break, e.g. a scene change
+ in a story, or a transition to another topic within a section of a
+ reference book.
+
+ <h4 id=the-br><span class=secno>3.9.3. </span>The <dfn
+ id=br><code>br</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#br">br</a></code> element represents a line break.
+
+ <p><code><a href="#br">br</a></code> elements must be empty. Any content
+ inside <code><a href="#br">br</a></code> elements must not be considered
+ part of the surrounding text.
+
+ <p><code><a href="#br">br</a></code> elements must only be used for line
+ breaks that are actually part of the content, as in poems or addresses.
+
+ <div class=example>
+ <p>The following example is correct usage of the <code><a
+ href="#br">br</a></code> element:</p>
+
+ <pre>&lt;p&gt;P. Sherman&lt;br&gt;
+42 Wallaby Way&lt;br&gt;
+Sydney&lt;/p&gt;</pre>
+ </div>
+
+ <p><code><a href="#br">br</a></code> elements must not be used for
+ separating thematic groups in a paragraph.
+
+ <div class=example>
+ <p>The following examples are non-conforming, as they abuse the <code><a
+ href="#br">br</a></code> element:</p>
+
+ <pre>&lt;p&gt;&lt;a ...&gt;34 comments.&lt;/a&gt;&lt;br&gt;
+&lt;a ...&gt;Add a comment.&lt;a&gt;&lt;/p&gt;</pre>
+
+ <pre>&lt;p&gt;Name: &lt;input name="name"&gt;&lt;br&gt;
+Address: &lt;input name="address"&gt;&lt;/p&gt;</pre>
+
+ <p>Here are alternatives to the above, which are correct:</p>
+
+ <pre>&lt;p&gt;&lt;a ...&gt;34 comments.&lt;/a&gt;&lt;/p&gt;
+&lt;p&gt;&lt;a ...&gt;Add a comment.&lt;a&gt;&lt;/p&gt;</pre>
+
+ <pre>&lt;p&gt;Name: &lt;input name="name"&gt;&lt;/p&gt;
+&lt;p&gt;Address: &lt;input name="address"&gt;&lt;/p&gt;</pre>
+ <!-- XXX should have labels in the examples above --></div>
+
+ <h4 id=the-dialog><span class=secno>3.9.4. </span>The <dfn
+ id=dialog><code>dialog</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Zero or more pairs of <code><a href="#dt">dt</a></code> and <code><a
+ href="#dd">dd</a></code> elements.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#dialog">dialog</a></code> element represents a
+ conversation.
+
+ <p>Each part of the conversation must have an explicit talker (or speaker)
+ given by a <code><a href="#dt">dt</a></code> element, and a discourse (or
+ quote) given by a <code><a href="#dd">dd</a></code> element.
+
+ <div class=example>
+ <p>This example demonstrates this using an extract from Abbot and
+ Costello's famous sketch, <cite>Who's on first</cite>:</p>
+
+ <pre>&lt;dialog>
+ &lt;dt>Costello
+ &lt;dd> Look, you gotta first baseman?
+ &lt;dt> Abbott
+ &lt;dd> Certainly.
+ &lt;dt> Costello
+ &lt;dd> Who's playing first?
+ &lt;dt> Abbott
+ &lt;dd> That's right.
+ &lt;dt> Costello
+ &lt;dd> When you pay off the first baseman every month, who gets the money?
+ &lt;dt> Abbott
+ &lt;dd> Every dollar of it.
+&lt;/dialog></pre>
+ </div>
+
+ <p class=note>Text in a <code><a href="#dt">dt</a></code> element in a
+ <code><a href="#dialog">dialog</a></code> element is implicitly the source
+ of the text given in the following <code><a href="#dd">dd</a></code>
+ element, and the contents of the <code><a href="#dd">dd</a></code> element
+ are implicitly a quote from that speaker. There is thus no need to include
+ <code><a href="#cite2">cite</a></code>, <code><a href="#q">q</a></code>,
+ or <code><a href="#blockquote">blockquote</a></code> elements in this
+ markup. Indeed, a <code><a href="#q">q</a></code> element inside a
+ <code><a href="#dd">dd</a></code> element in a conversation would actually
+ imply the person talking were themselves quoting someone else. See the
+ <code><a href="#cite2">cite</a></code>, <code><a href="#q">q</a></code>,
+ and <code><a href="#blockquote">blockquote</a></code> elements for other
+ ways to cite or quote.
+
+ <h3 id=preformatted><span class=secno>3.10. </span>Preformatted text</h3>
+
+ <h4 id=the-pre><span class=secno>3.10.1. </span>The <dfn
+ id=pre><code>pre</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, and <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#structured">structured inline-level elements</a> are
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#pre">pre</a></code> element represents a block of
+ preformatted text, in which structure is represented by typographic
+ conventions rather than by elements.
+
+ <p>Some examples of cases where the <code><a href="#pre">pre</a></code>
+ element could be used:
+
+ <ul>
+ <li>Including an e-mail, with paragraphs indicated by blank lines, lists
+ indicated by lines prefixed with a bullet, and so on.
+
+ <li>Including fragments of computer code, with structure indicated
+ according to the conventions of that language.
+
+ <li>Displaying ASCII art.</li>
+ <!-- XXX need a note about non-visual UAs -->
+ </ul>
+
+ <p>If, ignoring <a href="#text-node" title="text node">text nodes</a>
+ consisting only of <a href="#inter-element" title="inter-element
+ whitespace">whitespace</a>, the only child of a <code><a
+ href="#pre">pre</a></code> is a <code><a href="#code">code</a></code>
+ element, then the <code><a href="#pre">pre</a></code> element represents a
+ block of computer code.
+
+ <p>If, ignoring <a href="#text-node" title="text node">text nodes</a>
+ consisting only of <a href="#inter-element" title="inter-element
+ whitespace">whitespace</a>, the only child of a <code><a
+ href="#pre">pre</a></code> is a <code><a href="#samp">samp</a></code>
+ element, then the <code><a href="#pre">pre</a></code> element represents a
+ block of computer output.</p>
+ <!-- XXX examples -->
+
+ <h3 id=lists0><span class=secno>3.11. </span>Lists</h3>
+
+ <h4 id=the-ol><span class=secno>3.11.1. </span>The <dfn
+ id=ol><code>ol</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, and <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#structured">structured inline-level elements</a> are
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#li">li</a></code> elements.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-ol-start><a href="#start0">start</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlolistelement>HTMLOListElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute long <a href="#start1" title=dom-ol-start>start</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#ol">ol</a></code> element represents an ordered list
+ of items (which are represented by <code><a href="#li">li</a></code>
+ elements).
+
+ <p>The <dfn id=start0 title=attr-ol-start><code>start</code></dfn>
+ attribute, if present, must be a <a href="#valid0">valid integer</a>
+ giving the ordinal value of the first list item.
+
+ <p>If the <code title=attr-ol-start><a href="#start0">start</a></code>
+ attribute is present, user agents must <a href="#rules0" title="rules for
+ parsing integers">parse it as an integer</a>, in order to determine the
+ attribute's value. The default value, used if the attribute is missing or
+ if the value cannot be converted to a number according to the referenced
+ algorithm, is 1.
+
+ <p>The items of the list are the <code><a href="#li">li</a></code> element
+ child nodes of the <code><a href="#ol">ol</a></code> element, in <a
+ href="#tree-order">tree order</a>.
+
+ <p>The first item in the list has the ordinal value given by the <code><a
+ href="#ol">ol</a></code> element's <code title=attr-ol-start><a
+ href="#start0">start</a></code> attribute, unless that <code><a
+ href="#li">li</a></code> element has a <code title=attr-li-value><a
+ href="#value">value</a></code> attribute with a value that can be
+ successfully parsed, in which case it has the ordinal value given by that
+ <code title=attr-li-value><a href="#value">value</a></code> attribute.
+
+ <p>Each subsequent item in the list has the ordinal value given by its
+ <code title=attr-li-value><a href="#value">value</a></code> attribute, if
+ it has one, or, if it doesn't, the ordinal value of the previous item,
+ plus one.
+
+ <p>The <dfn id=start1 title=dom-ol-start><code>start</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the value of the <code
+ title=attr-ol-start><a href="#start0">start</a></code> content attribute.</p>
+ <!-- XXX resuming numbering of lists from previous lists? -->
+ <!-- XXX counting up and down? -->
+ <!-- XXX reverse-counted lists? -->
+
+ <h4 id=the-ul><span class=secno>3.11.2. </span>The <dfn
+ id=ul><code>ul</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, and <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#structured">structured inline-level elements</a> are
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#li">li</a></code> elements.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#ul">ul</a></code> element represents an unordered
+ list of items (which are represented by <code><a href="#li">li</a></code>
+ elements).
+
+ <p>The items of the list are the <code><a href="#li">li</a></code> element
+ child nodes of the <code><a href="#ul">ul</a></code> element.
+
+ <h4 id=the-li><span class=secno>3.11.3. </span>The <dfn
+ id=li><code>li</code></dfn> element</h4>
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Inside <code><a href="#ol">ol</a></code> elements.
+
+ <dd>Inside <code><a href="#ul">ul</a></code> elements.
+
+ <dd>Inside <code><a href="#menu">menu</a></code> elements.
+
+ <dt>Content model:
+
+ <dd>When the element is a child of an <code><a href="#ol">ol</a></code> or
+ <code><a href="#ul">ul</a></code> element and the grandchild of an
+ element that is <a href="#determining0" title="Determining if a
+ particular element contains block-level elements or inline-level
+ content">being used as an inline-level content container</a>, or, when
+ the element is a child of a <code><a href="#menu">menu</a></code>
+ element: <a href="#inline-level0">inline-level content</a>.
+
+ <dd>Otherwise: zero or more <a href="#block-level0">block-level
+ elements</a>, or <a href="#inline-level0">inline-level content</a> (but
+ not both).
+
+ <dt>Element-specific attributes:
+
+ <dd>If the element is a child of an <code><a href="#ol">ol</a></code>
+ element: <code title=attr-li-value><a href="#value">value</a></code>
+
+ <dd>If the element is not the child of an <code><a
+ href="#ol">ol</a></code> element: None.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmllielement>HTMLLIElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute long <a href="#value0" title=dom-li-value>value</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#li">li</a></code> element represents a list item. If
+ its parent element is an <code><a href="#ol">ol</a></code>, <code><a
+ href="#ul">ul</a></code>, or <code><a href="#menu">menu</a></code>
+ element, then the element is an item of the parent element's list, as
+ defined for those elements. Otherwise, the list item has no defined
+ list-related relationship to any other <code><a href="#li">li</a></code>
+ element.
+
+ <p>When the list item is the child of an <code><a href="#ol">ol</a></code>
+ or <code><a href="#ul">ul</a></code> element, the content model of the
+ item depends on the way that parent element was used. If it was used as
+ structured inline content (i.e. if <em>that</em> element's parent was <a
+ href="#determining0" title="Determining if a particular element contains
+ block-level elements or inline-level content">used as an inline-level
+ content</a> container), then the <code><a href="#li">li</a></code> element
+ must only contain <a href="#inline-level0">inline-level content</a>.
+ Otherwise, the element may be used either for <a href="#inline-level0"
+ title="inline-level content">inline content</a> or <a
+ href="#block-level0">block-level elements</a>.
+
+ <p>When the list item is the child of a <code><a
+ href="#menu">menu</a></code> element, the <code><a
+ href="#li">li</a></code> element must contain only <a
+ href="#inline-level0">inline-level content</a>.
+
+ <p>When the list item is not the child of an <code><a
+ href="#ol">ol</a></code>, <code><a href="#ul">ul</a></code>, or <code><a
+ href="#menu">menu</a></code> element, e.g. because it is an orphaned node
+ not in the document, it may contain either for <a href="#inline-level0"
+ title="inline-level content">inline content</a> or <a
+ href="#block-level0">block-level elements</a>.
+
+ <p>When <a href="#determining0" title="Determining if a particular element
+ contains block-level elements or inline-level content">used as an
+ inline-level content</a> container, the list item represents a single <a
+ href="#paragraph">paragraph</a>.
+
+ <p>The <dfn id=value title=attr-li-value><code>value</code></dfn>
+ attribute, if present, must be a <a href="#valid0">valid integer</a>
+ giving the ordinal value of the first list item.
+
+ <p>If the <code title=attr-li-value><a href="#value">value</a></code>
+ attribute is present, user agents must <a href="#rules0" title="rules for
+ parsing integers">parse it as an integer</a>, in order to determine the
+ attribute's value. If the attribute's value cannot be converted to a
+ number, it must be treated as if the attribute was absent. The attribute
+ has no default value.
+
+ <p>The <code title=attr-li-value><a href="#value">value</a></code>
+ attribute is processed relative to the element's parent <code><a
+ href="#ol">ol</a></code> element (q.v.), if there is one. If there is not,
+ the attribute has no effect.
+
+ <p>The <dfn id=value0 title=dom-li-value><code>value</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the value of the <code
+ title=dom-li-value><a href="#value0">value</a></code> content attribute.
+
+ <h4 id=the-dl><span class=secno>3.11.4. </span>The <dfn
+ id=dl><code>dl</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, and <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#structured">structured inline-level elements</a> are
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Zero or more groups each consisting of one or more <code><a
+ href="#dt">dt</a></code> elements followed by one or mode <code><a
+ href="#dd">dd</a></code> elements.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#dl">dl</a></code> element introduces an unordered
+ association list consisting of zero or more name-value groups (a
+ description list). Each group must consist of one or more names (<code><a
+ href="#dt">dt</a></code> elements) followed by one or more values
+ (<code><a href="#dd">dd</a></code> elements).
+
+ <p>Name-value groups may be terms and definitions, metadata topics and
+ values, or any other groups of name-value data.
+
+ <div class=example>
+ <p>The following are all conforming HTML fragments.</p>
+
+ <p>In the following example, one entry ("Authors") is linked to two values
+ ("John" and "Luke").</p>
+
+ <pre>&lt;dl&gt;
+ &lt;dt&gt; Authors
+ &lt;dd&gt; John
+ &lt;dd&gt; Luke
+ &lt;dt&gt; Editor
+ &lt;dd&gt; Frank
+&lt;/dl&gt;</pre>
+
+ <p>In the following example, one definition is linked to two terms.</p>
+
+ <pre>&lt;dl&gt;
+ &lt;dt lang="en-US"&gt; &lt;dfn>color&lt;/dfn> &lt;/dt&gt;
+ &lt;dt lang="en-GB"&gt; &lt;dfn>colour&lt;/dfn> &lt;/dt&gt;
+ &lt;dd&gt; A sensation which (in humans) derives from the ability of
+ the fine structure of the eye to distinguish three differently
+ filtered analyses of a view. &lt;/dd&gt;
+&lt;/dl&gt;</pre>
+
+ <p>The following example illustrates the use of the <code><a
+ href="#dl">dl</a></code> element to mark up metadata of sorts. At the end
+ of the example, one group has two metadata labels ("Authors" and
+ "Editors") and two values ("Robert Rothman" and "Daniel Jackson").</p>
+
+ <pre>&lt;dl&gt;
+ &lt;dt&gt; Last modified time &lt;/dt&gt;
+ &lt;dd&gt; 2004-12-23T23:33Z &lt;/dd&gt;
+ &lt;dt&gt; Recommended update interval &lt;/dt&gt;
+ &lt;dd&gt; 60s &lt;/dd&gt;
+ &lt;dt&gt; Authors &lt;/dt&gt;
+ &lt;dt&gt; Editors &lt;/dt&gt;
+ &lt;dd&gt; Robert Rothman &lt;/dd&gt;
+ &lt;dd&gt; Daniel Jackson &lt;/dd&gt;
+&lt;/dl&gt;</pre>
+ </div>
+
+ <p>If a <code><a href="#dl">dl</a></code> element is empty, it contains no
+ groups.
+
+ <p>If a <code><a href="#dl">dl</a></code> element contains non-<a
+ href="#inter-element" title="inter-element whitespace">whitespace</a> <a
+ href="#text-node" title="text node">text nodes</a>, or elements other than
+ <code><a href="#dt">dt</a></code> and <code><a href="#dd">dd</a></code>,
+ then those elements or <a href="#text-node" title="text node">text
+ nodes</a> do not form part of any groups in that <code><a
+ href="#dl">dl</a></code>, and the document is non-conforming.
+
+ <p>If a <code><a href="#dl">dl</a></code> element contains only <code><a
+ href="#dt">dt</a></code> elements, then it consists of one group with
+ names but no values, and the document is non-conforming.
+
+ <p>If a <code><a href="#dl">dl</a></code> element contains only <code><a
+ href="#dd">dd</a></code> elements, then it consists of one group with
+ values but no names, and the document is non-conforming.
+
+ <p class=note>The <code><a href="#dl">dl</a></code> element is
+ inappropriate for marking up dialogue, since dialogue is ordered (each
+ speaker/line pair comes after the next). For an example of how to mark up
+ dialogue, see the <code><a href="#dialog">dialog</a></code> element.
+
+ <h4 id=the-dt><span class=secno>3.11.5. </span>The <dfn
+ id=dt><code>dt</code></dfn> element</h4>
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Before <code><a href="#dd">dd</a></code> or <code><a
+ href="#dt">dt</a></code> elements inside <code><a
+ href="#dl">dl</a></code> elements.
+
+ <dd>Before a <code><a href="#dd">dd</a></code> element inside a <code><a
+ href="#dialog">dialog</a></code> element.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#dt">dt</a></code> element represents the term, or
+ name, part of a term-description group in a description list (<code><a
+ href="#dl">dl</a></code> element), and the talker, or speaker, part of a
+ talker-discourse pair in a conversation (<code><a
+ href="#dialog">dialog</a></code> element).
+
+ <p class=note>The <code><a href="#dt">dt</a></code> element itself, when
+ used in a <code><a href="#dl">dl</a></code> element, does not indicate
+ that its contents are a term being defined, but this can be indicated
+ using the <code><a href="#dfn">dfn</a></code> element.
+
+ <h4 id=the-dd><span class=secno>3.11.6. </span>The <dfn
+ id=dd><code>dd</code></dfn> element</h4>
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>After <code><a href="#dt">dt</a></code> or <code><a
+ href="#dd">dd</a></code> elements inside <code><a
+ href="#dl">dl</a></code> elements.
+
+ <dd>After a <code><a href="#dt">dt</a></code> element inside a <code><a
+ href="#dialog">dialog</a></code> element.
+
+ <dt>Content model:
+
+ <dd>When the element is a child of a <code><a href="#dl">dl</a></code>
+ element and the grandchild of an element that is <a href="#determining0"
+ title="Determining if a particular element contains block-level elements
+ or inline-level content">being used as an inline-level content
+ container</a>: <a href="#inline-level0">inline-level content</a>.
+
+ <dd>Otherwise: zero or more <a href="#block-level0">block-level
+ elements</a>, or <a href="#inline-level0">inline-level content</a> (but
+ not both).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#dd">dd</a></code> element represents the
+ description, definition, or value, part of a term-description group in a
+ description list (<code><a href="#dl">dl</a></code> element), and the
+ discourse, or quote, part in a conversation (<code><a
+ href="#dialog">dialog</a></code> element).
+
+ <p>The content model of a <code><a href="#dd">dd</a></code> element depends
+ on the way its parent element is being used. If the parent element is a
+ <code><a href="#dl">dl</a></code> element that is being used as structured
+ inline content (i.e. if the <code><a href="#dl">dl</a></code> element's
+ parent element is being <a href="#determining0" title="Determining if a
+ particular element contains block-level elements or inline-level
+ content">used as an inline-level content</a> container), then the <code><a
+ href="#dd">dd</a></code> element must only contain <a
+ href="#inline-level0">inline-level content</a>.
+
+ <p>Otherwise, the element may be used either for <a href="#inline-level0"
+ title="inline-level content">inline content</a> or <a
+ href="#block-level0">block-level elements</a>.
+
+ <h3 id=phrase><span class=secno>3.12. </span>Phrase elements</h3>
+ <!-- XXX ruby (delayed until someone can define it with error handling rules) -->
+
+ <h4 id=the-a><span class=secno>3.12.1. </span>The <dfn
+ id=a><code>a</code></dfn> element</h4>
+
+ <p><a href="#interactive1" title="interactive elements">Interactive</a>, <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed, if there are no ancestor <a href="#interactive1">interactive
+ elements</a>.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#significant" title="significant inline content">significant</a> <a
+ href="#strictly">strictly inline-level content</a>, but there must be no
+ <a href="#interactive1" title="interactive elements">interactive</a>
+ descendants.
+
+ <dd>Otherwise: any <a href="#significant" title="significant inline
+ content">significant</a> <a href="#inline-level0">inline-level
+ content</a>, but there must be no <a href="#interactive1"
+ title="interactive elements">interactive</a> descendants.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-hyperlink-href><a href="#href6">href</a></code>
+
+ <dd><code title=attr-hyperlink-target><a href="#target3">target</a></code>
+
+ <dd><code title=attr-hyperlink-ping><a href="#ping">ping</a></code>
+
+ <dd><code title=attr-hyperlink-rel><a href="#rel3">rel</a></code>
+
+ <dd><code title=attr-hyperlink-media><a href="#media12">media</a></code>
+
+ <dd><code title=attr-hyperlink-hreflang><a
+ href="#hreflang3">hreflang</a></code>
+
+ <dd><code title=attr-hyperlink-type><a href="#type17">type</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlanchorelement>HTMLAnchorElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#href3" title=dom-a-href>href</a>;
+ attribute DOMString <a href="#target1" title=dom-a-target>target</a>;
+ attribute DOMString <a href="#ping0" title=dom-a-ping>ping</a>;
+ attribute DOMString <a href="#rel1" title=dom-a-rel>rel</a>;
+ readonly attribute DOMTokenList <a href="#rellist0" title=dom-a-relList>relList</a>;
+ attribute DOMString <a href="#media4" title=dom-a-media>media</a>;
+ attribute DOMString <a href="#hreflang1" title=dom-a-hreflang>hreflang</a>;
+ attribute DOMString <a href="#type3" title=dom-a-type>type</a>;
+};</pre>
+
+ <p>The <code title=command-ro><a href="#command2">Command</a></code>
+ interface must also be implemented by this element.</p>
+ </dl>
+
+ <p>If the <code><a href="#a">a</a></code> element has an <code
+ title=attr-hyperlink-href><a href="#href6">href</a></code> attribute, then
+ it represents a <a href="#hyperlinks">hyperlink</a>.
+
+ <p>If the <code><a href="#a">a</a></code> element has no <code
+ title=attr-hyperlink-href><a href="#href6">href</a></code> attribute, then
+ the element is a placeholder for where a link might otherwise have been
+ placed, if it had been relevant.
+
+ <p>The <code title=attr-hyperlink-target><a
+ href="#target3">target</a></code>, <code title=attr-hyperlink-ping><a
+ href="#ping">ping</a></code>, <code title=attr-hyperlink-rel><a
+ href="#rel3">rel</a></code>, <code title=attr-hyperlink-media><a
+ href="#media12">media</a></code>, <code title=attr-hyperlink-hreflang><a
+ href="#hreflang3">hreflang</a></code>, and <code
+ title=attr-hyperlink-type><a href="#type17">type</a></code> attributes
+ must be omitted if the <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attribute is not present.</p>
+ <!-- XXX really? that makes it harder to use as a
+ placeholder. -->
+
+ <div class=example>
+ <p>If a site uses a consistent navigation toolbar on every page, then the
+ link that would normally link to the page itself could be marked up using
+ an <code><a href="#a">a</a></code> element:</p>
+
+ <pre>&lt;nav>
+ &lt;ul>
+ &lt;li> &lt;a href="/">Home&lt;/a> &lt;/li>
+ &lt;li> &lt;a href="/news">News&lt;/a> &lt;/li>
+ &lt;li> &lt;a>Examples&lt;/a> &lt;/li>
+ &lt;li> &lt;a href="/legal">Legal&lt;/a> &lt;/li>
+ &lt;/ul>
+&lt;/nav></pre>
+ </div>
+
+ <p>Interactive user agents should allow users to <a href="#following0"
+ title="following hyperlinks">follow hyperlinks</a> created using the
+ <code><a href="#a">a</a></code> element. The <code
+ title=attr-hyperlink-href><a href="#href6">href</a></code>, <code
+ title=attr-hyperlink-target><a href="#target3">target</a></code> and <code
+ title=attr-hyperlink-ping><a href="#ping">ping</a></code> attributes
+ decide how the link is followed. The <code title=attr-hyperlink-rel><a
+ href="#rel3">rel</a></code>, <code title=attr-hyperlink-media><a
+ href="#media12">media</a></code>, <code title=attr-hyperlink-hreflang><a
+ href="#hreflang3">hreflang</a></code>, and <code
+ title=attr-hyperlink-type><a href="#type17">type</a></code> attributes may
+ be used to indicate to the user the likely nature of the target resource
+ before the user follows the link.
+
+ <p>The <a href="#activation0">activation behavior</a> of <code><a
+ href="#a">a</a></code> elements that represent <span>hyperlinks</span> is
+ to run the following steps:
+
+ <ol>
+ <li>
+ <p>If the <code title=event-DOMActivate>DOMActivate</code> event in
+ question is not <span title=concept-events-trusted>trusted</span> (i.e.
+ a <code title=dom-click><a href="#click">click()</a></code> method call
+ was the reason for the event being dispatched), and the <code><a
+ href="#a">a</a></code> element's <code title=attr-hyperlink-target><a
+ href="#target3">target</a></code> attribute is <span
+ class=big-issue>...</span> then raise an <code>INVALID_ACCESS_ERR</code>
+ exception and abort these steps.
+
+ <li>
+ <p>If the target of the <code title=event-DOMActivate>DOMActivate</code>
+ event is an <code><a href="#img">img</a></code> element with an <code
+ title=attr-img-ismap><a href="#ismap">ismap</a></code> attribute
+ specified, then server-side image map processing must be performed, as
+ follows:</p>
+
+ <ol><!-- http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3E%0A...%3Ca%20href%3D%22%23%22%3E%3Cimg%20ismap%20usemap%3D%22%23a%22%20src%3D/resources/images/smallcats%3E%3C/a%3E%0A%3Cmap%20name%3Da%3E%3Carea%20shape%3Drect%20coords%3D0%2C0%2C50%2C50%20href%3Db%3E%3C/map%3E -->
+
+ <li>If the <code title=event-DOMActivate>DOMActivate</code> event was
+ dispatched as the result of a real pointing-device-triggered <code
+ title=event-click>click</code> event on the <code><a
+ href="#img">img</a></code> element, then let <var title="">x</var> be
+ the distance in CSS pixels from the left edge of the image to the
+ location of the click, and let <var title="">y</var> be the distance in
+ CSS pixels from the top edge of the image to the location of the click.
+ Otherwise, let <var title="">x</var> and <var title="">y</var> be zero.
+
+ <li>Let the <dfn id=hyperlink2><var>hyperlink suffix</var></dfn> be a
+ U+003F QUESTION MARK character, the value of <var title="">x</var>
+ expressed as a base-ten integer using ASCII digits (U+0030 DIGIT ZERO
+ to U+0039 DIGIT NINE), a U+002C COMMA character, and the value of <var
+ title="">y</var> expressed as a base-ten integer using ASCII digits.
+ </ol>
+
+ <li>
+ <p>Finally, the user agent must <a href="#following0" title="following
+ hyperlinks">follow the hyperlink</a> defined by the <code><a
+ href="#a">a</a></code> element. If the steps above defined a <var><a
+ href="#hyperlink2">hyperlink suffix</a></var>, then take that into
+ account when following the hyperlink.
+ </ol>
+
+ <p class=note>One way that a user agent can enable users to follow
+ hyperlinks is by allowing <code><a href="#a">a</a></code> elements to be
+ clicked, or focussed and activated by the keyboard. This <a
+ href="#interactive1" title="interactive elements">will cause</a> the
+ aforementioned <a href="#activation0">activation behavior</a> to be
+ invoked.
+
+ <p>The <code><a href="#a">a</a></code> element must not be <a
+ href="#significant" title="significant inline content">empty</a>.
+
+ <p>The DOM attributes <dfn id=href3
+ title=dom-a-href><code>href</code></dfn>, <dfn id=ping0
+ title=dom-a-ping><code>ping</code></dfn>, <dfn id=target1
+ title=dom-a-target><code>target</code></dfn>, <dfn id=rel1
+ title=dom-a-rel><code>rel</code></dfn>, <dfn id=media4
+ title=dom-a-media><code>media</code></dfn>, <dfn id=hreflang1
+ title=dom-a-hreflang><code>hreflang</code></dfn>, and <dfn id=type3
+ title=dom-a-type><code>type</code></dfn>, must each <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <p>The DOM attribute <dfn id=rellist0
+ title=dom-a-rellist><code>relList</code></dfn> must <a
+ href="#reflect">reflect</a> the <code title=attr-hyperlink-rel><a
+ href="#rel3">rel</a></code> content attribute.
+
+ <h4 id=the-q><span class=secno>3.12.2. </span>The <dfn
+ id=q><code>q</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-q-cite><a href="#cite1">cite</a></code>
+
+ <dt>DOM interface:
+
+ <dd> The <code><a href="#q">q</a></code> element uses the <code><a
+ href="#htmlquoteelement">HTMLQuoteElement</a></code> interface.
+ </dl>
+
+ <p>The <code><a href="#q">q</a></code> element represents a part of a
+ paragraph quoted from another source.
+
+ <p>Content inside a <code><a href="#q">q</a></code> element must be quoted
+ from another source, whose URI, if it has one, should be cited in the <dfn
+ id=cite1 title=attr-q-cite><code>cite</code></dfn> attribute.
+
+ <p>If the <code title=attr-q-cite><a href="#cite1">cite</a></code>
+ attribute is present, it must be a URI (or IRI). User agents should allow
+ users to follow such citation links.
+
+ <p>If a <code><a href="#q">q</a></code> element is contained (directly or
+ indirectly) in a <a href="#paragraph">paragraph</a> that contains a single
+ <code><a href="#cite2">cite</a></code> element and has no other <code><a
+ href="#q">q</a></code> element descendants, then, the citation given by
+ that <code><a href="#cite2">cite</a></code> element gives the source of
+ the quotation contained in the <code><a href="#q">q</a></code> element.</p>
+ <!-- XXX need examples -->
+
+ <h4 id=the-cite><span class=secno>3.12.3. </span>The <dfn
+ id=cite2><code>cite</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.</dd>
+ <!-- XXX should the cite element have a cite attribute? -->
+ </dl>
+
+ <p>The <code><a href="#cite2">cite</a></code> element represents a
+ citation: the source, or reference, for a quote or statement made in the
+ document.
+
+ <p class=note>A <em>citation</em> is not a <em>quote</em> (for which the
+ <code><a href="#q">q</a></code> element is appropriate).
+
+ <div class=example>
+ <p>This is incorrect usage:</p>
+
+ <pre>&lt;p>&lt;cite>This is wrong!&lt;/cite>, said Ian.&lt;/p></pre>
+
+ <p>This is the correct way to do it:</p>
+
+ <pre>&lt;p>&lt;q>This is correct!&lt;/q>, said &lt;cite>Ian&lt;/cite>.&lt;/p></pre>
+
+ <p>This is also wrong, because the title and the name are not references
+ or citations:</p>
+
+ <pre>&lt;p>My favourite book is &lt;cite>The Reality Dysfunction&lt;/cite>
+by &lt;cite>Peter F. Hamilton&lt;/cite>.&lt;/p></pre>
+
+ <p>This is correct, because even though the source is not quoted, it is
+ cited:</p>
+
+ <pre>&lt;p>According to &lt;cite>the Wikipedia article on
+HTML&lt;/cite>, HTML is defined in formal specifications that were
+developed and published throughout the 1990s.&lt;/p></pre>
+ </div>
+
+ <p class=note>The <code><a href="#cite2">cite</a></code> element can apply
+ to <code><a href="#blockquote">blockquote</a></code> and <code><a
+ href="#q">q</a></code> elements in certain cases described in the
+ definitions of those elements.
+
+ <h4 id=the-em><span class=secno>3.12.4. </span>The <dfn
+ id=em><code>em</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#em">em</a></code> element represents stress emphasis
+ of its contents.
+
+ <p>The level of emphasis that a particlar piece of content has is given by
+ its number of ancestor <code><a href="#em">em</a></code> elements.
+
+ <p>The placement of emphasis changes the meaning of the sentence. The
+ element thus forms an integral part of the content. The precise way in
+ which emphasis is used in this way depends on the language.
+
+ <div class=example>
+ <p>These examples show how changing the emphasis changes the meaning.
+ First, a general statement of fact, with no emphasis:</p>
+
+ <pre>&lt;p>Cats are cute animals.&lt;/p></pre>
+
+ <p>By emphasising the first word, the statement implies that the kind of
+ animal under discussion is in question (maybe someone is asserting that
+ dogs are cute):</p>
+
+ <pre>&lt;p>&lt;em>Cats&lt;/em> are cute animals.&lt;/p></pre>
+
+ <p>Moving the emphasis to the verb, one highlights that the truth of the
+ entire sentence is in question (maybe someone is saying cats are not
+ cute):</p>
+
+ <pre>&lt;p>Cats &lt;em>are&lt;/em> cute animals.&lt;/p></pre>
+
+ <p>By moving it to the adjective, the exact nature of the cats is
+ reasserted (maybe someone suggested cats were <em>mean</em> animals):</p>
+
+ <pre>&lt;p>Cats are &lt;em>cute&lt;/em> animals.&lt;/p></pre>
+
+ <p>Similarly, if someone asserted that cats were vegetables, someone
+ correcting this might emphasise the last word:</p>
+
+ <pre>&lt;p>Cats are cute &lt;em>animals&lt;/em>.&lt;/p></pre>
+
+ <p>By emphasising the entire sentence, it becomes clear that the speaker
+ is fighting hard to get the point across. This kind of emphasis also
+ typically affects the punctuation, hence the exclamation mark here.</p>
+
+ <pre>&lt;p>&lt;em>Cats are cute animals!&lt;/em>&lt;/p></pre>
+
+ <p>Anger mixed with emphasising the cuteness could lead to markup such as:</p>
+
+ <pre>&lt;p>&lt;em>Cats are &lt;em>cute&lt;/em> animals!&lt;/em>&lt;/p></pre>
+ </div>
+ <!-- XXX should say it is wrong to use as in:
+
+ <p><em>Note</em>: ...</p>
+
+ -->
+
+ <h4 id=the-strong><span class=secno>3.12.5. </span>The <dfn
+ id=strong><code>strong</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#strong">strong</a></code> element represents strong
+ importance for its contents.
+
+ <p>The relative level of importance of a piece of content is given by its
+ number of ancestor <code><a href="#strong">strong</a></code> elements;
+ each <code><a href="#strong">strong</a></code> element increases the
+ importance of its contents.
+
+ <p>Changing the importance of a piece of text with the <code><a
+ href="#strong">strong</a></code> element does not change the meaning of
+ the sentence.
+
+ <div class=example>
+ <p>Here is an example of a warning notice in a game, with the various
+ parts marked up according to how important they are:</p>
+ <!-- DO NOT REFLOW THIS EXAMPLE it has been carefully balanced -->
+ <pre>&lt;p>&lt;strong>Warning.&lt;/strong> This dungeon is dangerous.
+&lt;strong>Avoid the ducks.&lt;/strong> Take any gold you find.
+&lt;strong>&lt;strong>Do not take any of the diamonds&lt;/strong>,
+they are explosive and &lt;strong>will destroy anything within
+ten meters.&lt;/strong>&lt;/strong> You have been warned.&lt;/p></pre>
+ </div>
+
+ <h4 id=the-small><span class=secno>3.12.6. </span>The <dfn
+ id=small><code>small</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#small">small</a></code> element represents small
+ print (part of a document often describing legal restrictions, such as
+ copyrights or other disadvantages), or other side comments.
+
+ <p class=note>The <code><a href="#small">small</a></code> element does not
+ "de-emphasise" or lower the importance of text emphasised by the <code><a
+ href="#em">em</a></code> element or marked as important with the <code><a
+ href="#strong">strong</a></code> element.
+
+ <div class=example>
+ <p>In this example the footer contains contact information and a
+ copyright.</p>
+
+ <pre>&lt;footer>
+ &lt;address>
+ For more details, contact
+ &lt;a href="mailto:js@example.com">John Smith&lt;/a>.
+ &lt;/address>
+ &lt;p>&lt;small>&copy; copyright 2038 Example Corp.&lt;/small>&lt;/p>
+&lt;/footer></pre>
+
+ <p>In this second example, the <code><a href="#small">small</a></code>
+ element is used for a side comment.</p>
+
+ <pre>&lt;p>Example Corp today announced record profits for the
+second quarter &lt;small>(Full Disclosure: Foo News is a subsidiary of
+Example Corp)&lt;/small>, leading to speculation about a third quarter
+merger with Demo Group.&lt;/p></pre>
+
+ <p>In this last example, the <code><a href="#small">small</a></code>
+ element is marked as being <em>important</em> small print.</p>
+
+ <pre>&lt;p>&lt;strong>&lt;small>Continued use of this service will result in a kiss.&lt;/small>&lt;/strong>&lt;/p></pre>
+ </div>
+
+ <h4 id=the-m><span class=secno>3.12.7. </span>The <dfn
+ id=m><code>m</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#m">m</a></code> element represents a run of text
+ marked or highlighted.
+
+ <div class=example>
+ <p>In the following snippet, a paragraph of text refers to a specific part
+ of a code fragment.</p>
+
+ <pre>&lt;p>The highlighted part below is where the error lies:&lt;/p>
+&lt;pre>&lt;code>var i: Integer;
+begin
+ i := &lt;m>1.1&lt;/m>;
+end.&lt;/code>&lt;/pre></pre>
+
+ <p>Another example of the <code><a href="#m">m</a></code> element is
+ highlighting parts of a document that are matching some search string. If
+ someone looked at a document, and the server knew that the user was
+ searching for the word "kitten", then the server might return the
+ document with one paragraph modified as follows:</p>
+
+ <pre>&lt;p>I also have some &lt;m>kitten&lt;/m>s who are visiting me
+these days. They're really cute. I think they like my garden!&lt;/p></pre>
+ </div>
+
+ <h4 id=the-dfn><span class=secno>3.12.8. </span>The <dfn
+ id=dfn><code>dfn</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed, if there are no ancestor <code><a href="#dfn">dfn</a></code>
+ elements.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>, but there must
+ be no descendant <code><a href="#dfn">dfn</a></code> elements.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-dfn-title><a
+ href="#title4">title</a></code> attribute has special semantics on this
+ element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#dfn">dfn</a></code> element represents the defining
+ instance of a term. The <a href="#paragraph">paragraph</a>, <a href="#dl"
+ title=dl>description list group</a>, or <a href="#sectioning"
+ title="sectioning elements">section</a> that contains the <code><a
+ href="#dfn">dfn</a></code> element contains the definition for the term
+ given by the contents of the <code><a href="#dfn">dfn</a></code> element.
+
+ <p><code><a href="#dfn">dfn</a></code> elements must not be nested.
+
+ <p><dfn id=defining>Defining term</dfn>: If the <code><a
+ href="#dfn">dfn</a></code> element has a <dfn id=title4
+ title=attr-dfn-title><code>title</code></dfn> attribute, then the exact
+ value of that attribute is the term being defined. Otherwise, if it
+ contains exactly one element child node and no child <a href="#text-node"
+ title="text node">text nodes</a>, and that child element is an <code><a
+ href="#abbr">abbr</a></code> element with a <code title=attr-abbr-title><a
+ href="#title5">title</a></code> attribute, then the exact value of
+ <em>that</em> attribute is the term being defined. Otherwise, it is the
+ exact <code><a href="#textcontent">textContent</a></code> of the <code><a
+ href="#dfn">dfn</a></code> element that gives the term being defined.</p>
+ <!-- XXX that means <dfn>x \n x</dfn> won't match <span>x x</span> -->
+
+ <p>If the <code title=attr-dfn-title><a href="#title4">title</a></code>
+ attribute of the <code><a href="#dfn">dfn</a></code> element is present,
+ then it must only contain the term being defined.
+
+ <p>There must only be one <code><a href="#dfn">dfn</a></code> element per
+ document for each term defined (i.e. there must not be any duplicate <a
+ href="#defining" title="defining term">terms</a>).
+
+ <p class=note>The <code title=attr-title><a href="#title">title</a></code>
+ attribute of ancestor elements does not affect <code><a
+ href="#dfn">dfn</a></code> elements.
+
+ <p>The <code><a href="#dfn">dfn</a></code> element enables automatic
+ cross-references. Specifically, any <code><a href="#span">span</a></code>,
+ <code><a href="#abbr">abbr</a></code>, <code><a
+ href="#code">code</a></code>, <code><a href="#var">var</a></code>,
+ <code><a href="#samp">samp</a></code>, or <code><a href="#i">i</a></code>
+ element that has a non-empty <code title=attr-title><a
+ href="#title">title</a></code> attribute whose value exactly equals the <a
+ href="#defining" title="defining term">term</a> of a <code><a
+ href="#dfn">dfn</a></code> element in the same document, or which has no
+ <code title=attr-title><a href="#title">title</a></code> attribute but
+ whose <code><a href="#textcontent">textContent</a></code> exactly equals
+ the <a href="#defining" title="defining term">term</a> of a <code><a
+ href="#dfn">dfn</a></code> element in the document, and that has no <a
+ href="#interactive1">interactive elements</a> or <code><a
+ href="#dfn">dfn</a></code> elements either as ancestors or descendants,
+ and has no other elements as ancestors that are themselves matching these
+ conditions, should be presented in such a way that the user can jump from
+ the element to the first <code><a href="#dfn">dfn</a></code> element
+ giving the defining instance of that term.</p>
+ <!-- XXX that means <dfn>x x</dfn> won't match <span>x \n x</span> -->
+ <!-- need to mention that <span> is useful for cross-refs that don't
+ actually use the term itself -->
+
+ <div class=example>
+ <p>In the following fragment, the term "GDO" is first defined in the first
+ paragraph, then used in the second. A compliant UA could provide a link
+ from the <code><a href="#abbr">abbr</a></code> element in the second
+ paragraph to the <code><a href="#dfn">dfn</a></code> element in the
+ first.</p>
+
+ <pre>&lt;p>The &lt;dfn>&lt;abbr title="Garage Door Opener">GDO&lt;/abbr>&lt;/dfn>
+is a device that allows off-world teams to open the iris.&lt;/p>
+&lt;!-- ... later in the document: -->
+&lt;p>Teal'c activated his &lt;abbr title="Garage Door Opener">GDO&lt;/abbr>
+and so Hammond ordered the iris to be opened.&lt;/p></pre>
+ <!-- XXX need some examples of nesting where the top element makes
+ a crossref but the inner ones don't despite also matching the
+ algorithm above -->
+ <!-- XXX need some examples of duplicates being bad, of title
+ attributes being bad, etc -->
+ </div>
+
+ <h4 id=the-abbr><span class=secno>3.12.9. </span>The <dfn
+ id=abbr><code>abbr</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-abbr-title><a
+ href="#title5">title</a></code> attribute has special semantics on this
+ element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#abbr">abbr</a></code> element represents an
+ abbreviation or acronym. The <dfn id=title5
+ title=attr-abbr-title><code>title</code></dfn> attribute should be used to
+ provide an expansion of the abbreviation. If present, the attribute must
+ only contain an expansion of the abbreviation.
+
+ <div class=example>
+ <p>The paragraph below contains an abbreviation marked up with the
+ <code><a href="#abbr">abbr</a></code> element.</p>
+
+ <pre>&lt;p>The &lt;abbr title="Web Hypertext Application Technology
+Working Group">WHATWG&lt;/abbr> is a loose unofficial collaboration of
+Web browser manufacturers and interested parties who wish to develop
+new technologies designed to allow authors to write and deploy
+Applications over the World Wide Web.&lt;/p></pre>
+ </div>
+
+ <p>The <code title=attr-abbr-title><a href="#title5">title</a></code>
+ attribute may be omitted if there is a <code><a href="#dfn">dfn</a></code>
+ element in the document whose <a href="#defining">defining term</a> is the
+ abbreviation (the <code><a href="#textcontent">textContent</a></code> of
+ the <code><a href="#abbr">abbr</a></code> element).
+
+ <div class=example>
+ <p>In the example below, the word "Zat" is used as an abbreviation in the
+ second paragraph. The abbreviation is defined in the first, so the
+ explanatory <code title=attr-abbr-title><a
+ href="#title5">title</a></code> attribute has been omitted. Because of
+ the way <code><a href="#dfn">dfn</a></code> elements are defined, the
+ second <code><a href="#abbr">abbr</a></code> element in this example
+ would be connected (in some UA-specific way) to the first.</p>
+
+ <pre>&lt;p>The &lt;dfn>&lt;abbr>Zat&lt;/abbr>&lt;/dfn>, short for Zat'ni'catel, is a weapon.&lt;/p>
+&lt;p>Jack used a &lt;abbr>Zat&lt;/abbr> to make the boxes of evidence disappear.&lt;/p></pre>
+ </div>
+
+ <h4 id=the-time><span class=secno>3.12.10. </span>The <dfn
+ id=time><code>time</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-time-datetime><a href="#datetime">datetime</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmltimeelement>HTMLTimeElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#datetime0" title=dom-time-datetime>dateTime</a>;
+ readonly attribute DOMTimeStamp <a href="#date0" title=dom-time-date>date</a>;
+ readonly attribute DOMTimeStamp <a href="#time1" title=dom-time-time>time</a>;
+ readonly attribute DOMTimeStamp <a href="#timezone0" title=dom-time-timezone>timezone</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#time">time</a></code> element represents a date
+ and/or a time.
+
+ <p>The <dfn id=datetime
+ title=attr-time-datetime><code>datetime</code></dfn> attribute, if
+ present, must contain a <a href="#date-or">date or time string</a> that
+ identifies the date or time being specified.
+
+ <p>If the <code title=attr-time-datetime><a
+ href="#datetime">datetime</a></code> attribute is not present, then the
+ date or time must be specified in the content of the element, such that
+ parsing the element's <code><a href="#textcontent">textContent</a></code>
+ according to the rules for parsing <a href="#date-or0" title="date or time
+ string in content">date or time strings in content</a> successfully
+ extracts a date or time.
+
+ <p>The <dfn id=datetime0
+ title=dom-time-datetime><code>dateTime</code></dfn> DOM attribute must <a
+ href="#reflect">reflect</a> the <code title=attr-time-datetime><a
+ href="#datetime">datetime</a></code> content attribute.
+
+ <p>User agents, to obtain the <dfn id=date
+ title=concept-time-date>date</dfn>, <dfn id=time0
+ title=concept-time-time>time</dfn>, and <dfn id=timezone
+ title=concept-time-timezone>timezone</dfn> represented by a <code><a
+ href="#time">time</a></code> element, must follow the following steps:
+
+ <ol>
+ <li>If the <code title=attr-time-datetime><a
+ href="#datetime">datetime</a></code> attribute is present, then parse it
+ according to the rules for parsing <a href="#date-or1" title="date or
+ time string in attributes">date or time strings in content</a>, and let
+ the result be <var title="">result</var>.
+
+ <li>Otherwise, parse the element's <code><a
+ href="#textcontent">textContent</a></code> according to the rules for
+ parsing <a href="#date-or1" title="date or time string in
+ attributes">date or time strings in content</a>, and let the result be
+ <var title="">result</var>.
+
+ <li>If <var title="">result</var> is empty (because the parsing failed),
+ then the <a href="#date" title=concept-time-date>date</a> is unknown, the
+ <a href="#time0" title=concept-time-time>time</a> is unknown, and the <a
+ href="#timezone" title=concept-time-timezone>timezone</a> is unknown.
+
+ <li>Otherwise: if <var title="">result</var> contains a date, then that is
+ the <a href="#date" title=concept-time-date>date</a>; if <var
+ title="">result</var> contains a time, then that is the <a href="#time0"
+ title=concept-time-time>time</a>; and if <var title="">result</var>
+ contains a timezone, then the timezone is the element's <a
+ href="#timezone" title=concept-time-timezone>timezone</a>. (A timezone
+ can only be present if both a date and a time are also present.)
+ </ol>
+
+ <p>The <dfn id=date0 title=dom-time-date><code>date</code></dfn> DOM
+ attribute must return null if the <a href="#date"
+ title=concept-time-date>date</a> is unknown, and otherwise must return the
+ time corresponding to midnight UTC (i.e. the first second) of the given <a
+ href="#date" title=concept-time-date>date</a>.
+
+ <p>The <dfn id=time1 title=dom-time-time><code>time</code></dfn> DOM
+ attribute must return null if the <a href="#time0"
+ title=concept-time-time>time</a> is unknown, and otherwise must return the
+ time corresponding to the given <a href="#time0"
+ title=concept-time-time>time</a> of 1970-01-01, with the timezone UTC.
+
+ <p>The <dfn id=timezone0
+ title=dom-time-timezone><code>timezone</code></dfn> DOM attribute must
+ return null if the <a href="#timezone"
+ title=concept-time-timezone>timezone</a> is unknown, and otherwise must
+ return the time corresponding to 1970-01-01 00:00 UTC in the given <a
+ href="#timezone" title=concept-time-timezone>timezone</a>, with the
+ timezone set to UTC (i.e. the time corresponding to 1970-01-01 at 00:00
+ UTC plus the offset corresponding to the timezone).
+
+ <div class=example>
+ <p>In the following snippet:</p>
+
+ <pre>&lt;p>Our first date was &lt;time datetime="2006-09-23">a saturday&lt;/time>.&lt;/p></pre>
+
+ <p>...the <code><a href="#time">time</a></code> element's <code
+ title=dom-time-date><a href="#date0">date</a></code> attribute would have
+ the value 1,158,969,600,000ms, and the <code title=dom-time-time><a
+ href="#time1">time</a></code> and <code title=dom-time-timezone><a
+ href="#timezone0">timezone</a></code> attributes would return null.</p>
+
+ <p>In the following snippet:</p>
+
+ <pre>&lt;p>We stopped talking at &lt;time datetime="2006-09-24 05:00 -7">5am the next morning&lt;/time>.&lt;/p></pre>
+
+ <p>...the <code><a href="#time">time</a></code> element's <code
+ title=dom-time-date><a href="#date0">date</a></code> attribute would have
+ the value 1,159,056,000,000ms, the <code title=dom-time-time><a
+ href="#time1">time</a></code> attribute would have the value
+ 18,000,000ms, and the <code title=dom-time-timezone><a
+ href="#timezone0">timezone</a></code> attribute would return
+ -25,200,000ms. To obtain the actual time, the three attributes can be
+ added together, obtaining 1,159,048,800,000, which is the specified date
+ and time in UTC.</p>
+
+ <p>Finally, in the following snippet:</p>
+
+ <pre>&lt;p>Many people get up at &lt;time>08:00&lt;/time>.&lt;/p></pre>
+
+ <p>...the <code><a href="#time">time</a></code> element's <code
+ title=dom-time-date><a href="#date0">date</a></code> attribute would have
+ the value null, the <code title=dom-time-time><a
+ href="#time1">time</a></code> attribute would have the value
+ 28,800,000ms, and the <code title=dom-time-timezone><a
+ href="#timezone0">timezone</a></code> attribute would return null.</p>
+ </div>
+
+ <p class=big-issue>These APIs may be suboptimal. Comments on making them
+ more useful to JS authors are welcome. The primary use cases for these
+ elements are for marking up publication dates e.g. in blog entries, and
+ for marking event dates in hCalendar markup. Thus the DOM APIs are likely
+ to be used as ways to generate interactive calendar widgets or some such.
+
+ <h4 id=the-meter><span class=secno>3.12.11. </span>The <dfn
+ id=meter><code>meter</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-meter-value><a href="#value1">value</a></code>
+
+ <dd><code title=attr-meter-min><a href="#min">min</a></code>
+
+ <dd><code title=attr-meter-low><a href="#low">low</a></code>
+
+ <dd><code title=attr-meter-high><a href="#high">high</a></code>
+
+ <dd><code title=attr-meter-max><a href="#max">max</a></code>
+
+ <dd><code title=attr-meter-optimum><a href="#optimum">optimum</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlmeterelement>HTMLMeterElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute long <a href="#value2" title=dom-meter-value>value</a>;
+ attribute long <a href="#min0" title=dom-meter-min>min</a>;
+ attribute long <a href="#max0" title=dom-meter-max>max</a>;
+ attribute long <a href="#low0" title=dom-meter-low>low</a>;
+ attribute long <a href="#high0" title=dom-meter-high>high</a>;
+ attribute long <a href="#optimum0" title=dom-meter-optimum>optimum</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#meter">meter</a></code> element represents a scalar
+ measurement within a known range, or a fractional value; for example disk
+ usage, the relevance of a query result, or the fraction of a voting
+ population to have selected a particular candidate.
+
+ <p>This is also known as a gauge.
+
+ <p class=note>The <code><a href="#meter">meter</a></code> element should
+ not be used to indicate progress (as in a progress bar). For that role,
+ HTML provides a separate <code><a href="#progress">progress</a></code>
+ element.
+
+ <p>There are six attributes that determine the semantics of the gauge
+ represented by the element.
+
+ <p>The <dfn id=min title=attr-meter-min><code>min</code></dfn> attribute
+ specifies the lower bound of the range, and the <dfn id=max
+ title=attr-meter-max><code>max</code></dfn> attribute specifies the upper
+ bound. The <dfn id=value1 title=attr-meter-value><code>value</code></dfn>
+ attribute specifies the value to have the gauge indicate as the "measured"
+ value.
+
+ <p>The other three attributes can be used to segment the gauge's range into
+ "low", "medium", and "high" parts, and to indicate which part of the gauge
+ is the "optimum" part. The <dfn id=low
+ title=attr-meter-low><code>low</code></dfn> attribute specifies the range
+ that is considered to be the "low" part, and the <dfn id=high
+ title=attr-meter-high><code>high</code></dfn> attribute specifies the
+ range that is considered to be the "high" part. The <dfn id=optimum
+ title=attr-meter-optimum><code>optimum</code></dfn> attribute gives the
+ position that is "optimum"; if that is higher than the "high" value then
+ this indicates that the higher the value, the better; if it's lower than
+ the "low" mark then it indicates that lower values are better, and
+ naturally if it is in between then it indicates that neither high nor low
+ values are good.
+
+ <p><strong>Authoring requirements</strong>: The recommended way of giving
+ the value is to include it as contents of the element, either as two
+ numbers (the higher number represents the maximum, the other number the
+ current value), or as a percentage or similar (using one of the characters
+ such as "%"), or as a fraction.
+
+ <p>The <code title=attr-meter-value><a href="#value1">value</a></code>,
+ <code title=attr-meter-min><a href="#min">min</a></code>, <code
+ title=attr-meter-low><a href="#low">low</a></code>, <code
+ title=attr-meter-high><a href="#high">high</a></code>, <code
+ title=attr-meter-max><a href="#max">max</a></code>, and <code
+ title=attr-meter-optimum><a href="#optimum">optimum</a></code> attributes
+ are all optional. When present, they must have values that are <a
+ href="#valid1" title="valid floating point number">valid floating point
+ numbers</a>.
+
+ <div class=example>
+ <p>The following examples all represent a measurement of three quarters
+ (of the maximum of whatever is being measured):</p>
+
+ <pre>&lt;meter>75%&lt;/meter>
+&lt;meter>750&#x2030;&lt;/meter>
+&lt;meter>3/4&lt;/meter>
+&lt;meter>6 blocks used (out of 8 total)&lt;/meter>
+&lt;meter>max: 100; current: 75&lt;/meter>
+&lt;meter>&lt;object data="graph75.png">0.75&lt;/object>&lt;/meter>
+&lt;meter min="0" max="100" value="75">&lt;/meter></pre>
+ </div>
+
+ <p><strong>User agent requirements</strong>: User agents must parse the
+ <code title=attr-meter-min><a href="#min">min</a></code>, <code
+ title=attr-meter-max><a href="#max">max</a></code>, <code
+ title=attr-meter-value><a href="#value1">value</a></code>, <code
+ title=attr-meter-low><a href="#low">low</a></code>, <code
+ title=attr-meter-high><a href="#high">high</a></code>, and <code
+ title=attr-meter-optimum><a href="#optimum">optimum</a></code> attributes
+ using the <a href="#rules1">rules for parsing floating point number
+ values</a>.
+
+ <p>If the <code title=attr-meter-value><a href="#value1">value</a></code>
+ attribute has been omitted, the user agent must also process the <code><a
+ href="#textcontent">textContent</a></code> of the element according to the
+ <a href="#steps">steps for finding one or two numbers of a ratio in a
+ string</a>. These steps will return nothing, one number, one number with a
+ denominator punctuation character, or two numbers.
+
+ <p>User agents must then use all these numbers to obtain values for six
+ points on the gauge, as follows. (The order in which these are evaluated
+ is important, as some of the values refer to earlier ones.)
+
+ <dl>
+ <dt>The minimum value
+
+ <dd>
+ <p>If the <code title=attr-meter-min><a href="#min">min</a></code>
+ attribute is specified and a value could be parsed out of it, then the
+ minimum value is that value. Otherwise, the minimum value is zero.</p>
+
+ <dt>The maximum value
+
+ <dd>
+ <p>If the <code title=attr-meter-max><a href="#max">max</a></code>
+ attribute is specified and a value could be parsed out of it, the
+ maximum value is that value.</p>
+
+ <p>Otherwise, if the <code title=attr-meter-max><a
+ href="#max">max</a></code> attribute is specified but no value could be
+ parsed out of it, or if it was not specified, but either or both of the
+ <code title=attr-meter-min><a href="#min">min</a></code> or <code
+ title=attr-meter-value><a href="#value1">value</a></code> attributes
+ <em>were</em> specified, then the maximum value is 1.</p>
+
+ <p>Otherwise, none of the <code title=attr-meter-max><a
+ href="#max">max</a></code>, <code title=attr-meter-min><a
+ href="#min">min</a></code>, and <code title=attr-meter-value><a
+ href="#value1">value</a></code> attributes were specified. If the result
+ of processing the <code><a href="#textcontent">textContent</a></code> of
+ the element was either nothing or just one number with no denominator
+ punctuation character, then the maximum value is 1; if the result was
+ one number but it had an associated denominator punctuation character,
+ then the maximum value is the <a href="#a-value" title="values
+ associated with denominator punctuation characters">value associated
+ with that denominator punctuation character</a>; and finally, if there
+ were two numbers parsed out of the <code><a
+ href="#textcontent">textContent</a></code>, then the maximum is the
+ higher of those two numbers.</p>
+
+ <p>If the above machinations result in a maximum value less than the
+ minimum value, then the maximum value is actually the same as the
+ minimum value.</p>
+
+ <dt>The actual value
+
+ <dd>
+ <p>If the <code title=attr-meter-value><a href="#value1">value</a></code>
+ attribute is specified and a value could be parsed out of it, then that
+ value is the actual value.</p>
+
+ <p>If the <code title=attr-meter-value><a href="#value1">value</a></code>
+ attribute is not specified but the <code title=attr-meter-max><a
+ href="#max">max</a></code> attribute <em>is</em> specified and the
+ result of processing the <code><a
+ href="#textcontent">textContent</a></code> of the element was one number
+ with no associated denominator punctuation character, then that number
+ is the actual value.</p>
+
+ <p>If neither of the <code title=attr-meter-value><a
+ href="#value1">value</a></code> and <code title=attr-meter-max><a
+ href="#max">max</a></code> attributes are specified, then, if the result
+ of processing the <code><a href="#textcontent">textContent</a></code> of
+ the element was one number (with or without an associated denominator
+ punctuation character), then that is the actual value, and if the result
+ of processing the <code><a href="#textcontent">textContent</a></code> of
+ the element was two numbers, then the actual value is the lower of the
+ two numbers found.</p>
+
+ <p>Otherwise, if none of the above apply, the actual value is zero.</p>
+
+ <p>If the above procedure results in an actual value less than the
+ minimum value, then the actual value is actually the same as the minimum
+ value.</p>
+
+ <p>If, on the other hand, the result is an actual value greater than the
+ maximum value, then the actual value is the maximum value.</p>
+
+ <dt>The low boundary
+
+ <dd>
+ <p>If the <code title=attr-meter-low><a href="#low">low</a></code>
+ attribute is specified and a value could be parsed out of it, then the
+ low boundary is that value. Otherwise, the low boundary is the same as
+ the minimum value.</p>
+
+ <p>If the above results in a low boundary that is less than the minimum
+ value, the low boundary is the minimum value.</p>
+
+ <dt>The high boundary
+
+ <dd>
+ <p>If the <code title=attr-meter-high><a href="#high">high</a></code>
+ attribute is specified and a value could be parsed out of it, then the
+ high boundary is that value. Otherwise, the high boundary is the same as
+ the maximum value.</p>
+
+ <p>If the above results in a high boundary that is higher than the
+ maximum value, the high boundary is the maximum value.</p>
+
+ <dt>The optimum point
+
+ <dd>
+ <p>If the <code title=attr-meter-optimum><a
+ href="#optimum">optimum</a></code> attribute is specified and a value
+ could be parsed out of it, then the optimum point is that value.
+ Otherwise, the optimum point is the midpoint between the minimum value
+ and the maximum value.</p>
+
+ <p>If the optimum point is then less than the minimum value, then the
+ optimum point is actually the same as the minimum value. Similarly, if
+ the optimum point is greater than the maximum value, then it is actually
+ the maximum value instead.</p>
+ </dl>
+
+ <p>All of which should result in the following inequalities all being true:
+
+ <ul class=brief>
+ <li>minimum value &le; actual value &le; maximum value
+
+ <li>minimum value &le; low boundary &le; high boundary &le; maximum value
+
+ <li>minimum value &le; optimum point &le; maximum value
+ </ul>
+
+ <p><strong>UA requirements for regions of the gauge</strong>: If the
+ optimum point is equal to the low boundary or the high boundary, or
+ anywhere in between them, then the region between the low and high
+ boundaries of the gauge must be treated as the optimum region, and the low
+ and high parts, if any, must be treated as suboptimal. Otherwise, if the
+ optimum point is less than the low boundary, then the region between the
+ minimum value and the low boundary must be treated as the optimum region,
+ the region between the low boundary and the high boundary must be treated
+ as a suboptimal region, and the region between the high boundary and the
+ maximum value must be treated as an even less good region. Finally, if the
+ optimum point is higher than the high boundary, then the situation is
+ reversed; the region between the high boundary and the maximum value must
+ be treated as the optimum region, the region between the high boundary and
+ the low boundary must be treated as a suboptimal region, and the remaining
+ region between the low boundary and the minimum value must be treated as
+ an even less good region.
+
+ <p><strong>UA requirements for showing the gauge</strong>: When
+ representing a <code><a href="#meter">meter</a></code> element to the
+ user, the UA should indicate the relative position of the actual value to
+ the minimum and maximum values, and the relationship between the actual
+ value and the three regions of the gauge.
+
+ <div class=example>
+ <p>The following markup:</p>
+
+ <pre>
+&lt;h3>Suggested groups&lt;/h3>
+&lt;menu type="toolbar">
+ &lt;a href="?cmd=hsg" onclick="hideSuggestedGroups()">Hide suggested groups&lt;/a>
+&lt;/menu>
+&lt;ul>
+ &lt;li>
+ &lt;p>&lt;a href="/group/comp.infosystems.www.authoring.stylesheets/view">comp.infosystems.www.authoring.stylesheets&lt;/a> -
+ &lt;a href="/group/comp.infosystems.www.authoring.stylesheets/subscribe">join&lt;/a>&lt;/p>
+ &lt;p>Group description: &lt;strong>Layout/presentation on the WWW.&lt;/strong>&lt;/p>
+ &lt;p><strong>&lt;meter value="0.5">Moderate activity,&lt;/meter></strong> Usenet, 618 subscribers&lt;/p>
+ &lt;/li>
+ &lt;li>
+ &lt;p>&lt;a href="/group/netscape.public.mozilla.xpinstall/view">netscape.public.mozilla.xpinstall&lt;/a> -
+ &lt;a href="/group/netscape.public.mozilla.xpinstall/subscribe">join&lt;/a>&lt;/p>
+ &lt;p>Group description: &lt;strong>Mozilla XPInstall discussion.&lt;/strong>&lt;/p>
+ &lt;p><strong>&lt;meter value="0.25">Low activity,&lt;/meter></strong> Usenet, 22 subscribers&lt;/p>
+ &lt;/li>
+ &lt;li>
+ &lt;p>&lt;a href="/group/mozilla.dev.general/view">mozilla.dev.general&lt;/a> -
+ &lt;a href="/group/mozilla.dev.general/subscribe">join&lt;/a>&lt;/p>
+ &lt;p><strong>&lt;meter value="0.25">Low activity,&lt;/meter></strong> Usenet, 66 subscribers&lt;/p>
+ &lt;/li>
+&lt;/ul>
+</pre>
+
+ <p>Might be rendered as follows:</p>
+
+ <p><img alt="With the &lt;meter> elements rendered as inline green bars of
+ varying lengths." src="images/sample-meter.png"></p>
+ </div>
+
+ <p>The <dfn id=min0 title=dom-meter-min><code>min</code></dfn>, <dfn
+ id=max0 title=dom-meter-max><code>max</code></dfn>, <dfn id=value2
+ title=dom-meter-value><code>value</code></dfn>, <dfn id=low0
+ title=dom-meter-low><code>low</code></dfn>, <dfn id=high0
+ title=dom-meter-high><code>high</code></dfn>, and <dfn id=optimum0
+ title=dom-meter-optimum><code>optimum</code></dfn> DOM attributes must
+ reflect the elements' content attributes of the same name. When the
+ relevant content attributes are absent, the DOM attributes must return
+ zero. The value parsed from the <code><a
+ href="#textcontent">textContent</a></code> never affects the DOM values.
+
+ <p class=big-issue>Would be cool to have the <code title=dom-meter-value><a
+ href="#value2">value</a></code> DOM attribute update the <code><a
+ href="#textcontent">textContent</a></code> in-line...</p>
+ <!-- XXX
+should we also look inside the title="" attribute?
+ Disk usage: <meter title="985MB of 986MB total" high="980">Full!</meter>
+should we make the contents accessible in some way, e.g. as a tooltip?
+-->
+
+ <h4 id=the-progress><span class=secno>3.12.12. </span>The <dfn
+ id=progress><code>progress</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-progress-value><a href="#value3">value</a></code>
+
+ <dd><code title=attr-progress-max><a href="#max1">max</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlprogresselement>HTMLProgressElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute float <a href="#value4" title=dom-progress-value>value</a>;
+ attribute float <a href="#max2" title=dom-progress-max>max</a>;
+ readonly attribute float <a href="#position" title=dom-progress-position>position</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#progress">progress</a></code> element represents the
+ completion progress of a task. The progress is either indeterminate,
+ indicating that progress is being made but that it is not clear how much
+ more work remains to be done before the task is complete (e.g. because the
+ task is waiting for a remote host to respond), or the progress is a number
+ in the range zero to a maximum, giving the fraction of work that has so
+ far been completed.
+
+ <p>There are two attributes that determine the current task completion
+ represented by the element.
+
+ <p>The <dfn id=value3 title=attr-progress-value><code>value</code></dfn>
+ attribute specifies how much of the task has been completed, and the <dfn
+ id=max1 title=attr-progress-max><code>max</code></dfn> attribute specifies
+ how much work the task requires in total. The units are arbitrary and not
+ specified.
+
+ <p>Instead of using the attributes, authors are recommended to simply
+ include the current value and the maximum value inline as text inside the
+ element.
+
+ <div class=example>
+ <p>Here is a snippet of a Web application that shows the progress of some
+ automated task:</p>
+
+ <pre>&lt;section>
+ &lt;h2>Task Progress&lt;/h2>
+ &lt;p>&lt;label>Progress: &lt;progress>&lt;span id="p">0&lt;/span>%&lt;/progress>&lt;/p>
+ &lt;script>
+ var progressBar = document.getElementById('p');
+ function updateProgress(newValue) {
+ progressBar.textContent = newValue;
+ }
+ &lt;/script>
+&lt;/section></pre>
+
+ <p>(The <code>updateProgress()</code> method in this example would be
+ called by some other code on the page to update the actual progress bar
+ as the task progressed.)</p>
+ </div>
+
+ <p><strong>Author requirements</strong>: The <code
+ title=attr-progress-max><a href="#max1">max</a></code> and <code
+ title=attr-progress-value><a href="#value3">value</a></code> attributes,
+ when present, must have values that are <a href="#valid1" title="valid
+ floating point number">valid floating point numbers</a>. The <code
+ title=attr-progress-max><a href="#max1">max</a></code> attribute, if
+ present, must have a value greater than zero. The <code
+ title=attr-progress-value><a href="#value3">value</a></code> attribute, if
+ present, must have a value equal to or greater than zero, and less than or
+ equal to the value of the <code title=attr-progress-max><a
+ href="#max1">max</a></code> attribute, if present.
+
+ <p><strong>User agent requirements</strong>: User agents must parse the
+ <code title=attr-progress-max><a href="#max1">max</a></code> and <code
+ title=attr-progress-value><a href="#value3">value</a></code> attributes'
+ values according to the <a href="#rules1">rules for parsing floating point
+ number values</a>.
+
+ <p>If the <code title=attr-progress-value><a
+ href="#value3">value</a></code> attribute is omitted, then user agents
+ must also parse the <code><a href="#textcontent">textContent</a></code> of
+ the <code><a href="#progress">progress</a></code> element in question
+ using the <a href="#steps">steps for finding one or two numbers of a ratio
+ in a string</a>. These steps will return nothing, one number, one number
+ with a denominator punctuation character, or two numbers.
+
+ <p>Using the results of this processing, user agents must determine whether
+ the progress bar is an indeterminate progress bar, or whether it is a
+ determinate progress bar, and in the latter case, what its current and
+ maximum values are, all as follows:
+
+ <ol>
+ <li>If the <code title=attr-progress-max><a href="#max1">max</a></code>
+ attribute is omitted, and the <code title=attr-progress-value><a
+ href="#value3">value</a></code> is omitted, and the results of parsing
+ the <code><a href="#textcontent">textContent</a></code> was nothing, then
+ the progress bar is an indeterminate progress bar. Abort these steps.
+
+ <li>Otherwise, it is a determinate progress bar.
+
+ <li>If the <code title=attr-progress-max><a href="#max1">max</a></code>
+ attribute is included, then, if a value could be parsed out of it, then
+ the maximum value is that value.
+
+ <li>Otherwise, if the <code title=attr-progress-max><a
+ href="#max1">max</a></code> attribute is absent but the <code
+ title=attr-progress-value><a href="#value3">value</a></code> attribute is
+ present, or, if the <code title=attr-progress-max><a
+ href="#max1">max</a></code> attribute is present but no value could be
+ parsed from it, then the maximum is 1.
+
+ <li>Otherwise, if neither attribute is included, then, if the <code><a
+ href="#textcontent">textContent</a></code> contained one number with an
+ associated denominator punctuation character, then the maximum value is
+ the <span>value associated with that denominator punctuation
+ character</span>; otherwise, if the <code><a
+ href="#textcontent">textContent</a></code> contained two numbers, the
+ maximum value is the higher of the two values; otherwise, the maximum
+ value is 1.
+
+ <li>If the <code title=attr-progress-value><a
+ href="#value3">value</a></code> attribute is present on the element and a
+ value could be parsed out of it, that value is the current value of the
+ progress bar. Otherwise, if the attribute is present but no value could
+ be parsed from it, the current value is zero.
+
+ <li>Otherwise if the <code title=attr-progress-value><a
+ href="#value3">value</a></code> attribute is absent and the <code
+ title=attr-progress-max><a href="#max1">max</a></code> attribute is
+ present, then, if the <code><a href="#textcontent">textContent</a></code>
+ was parsed and found to contain just one number, with no associated
+ denominator punctuation character, then the current value is that number.
+ Otherwise, if the <code title=attr-progress-value><a
+ href="#value3">value</a></code> attribute is absent and the <code
+ title=attr-progress-max><a href="#max1">max</a></code> attribute is
+ present then the current value is zero.
+
+ <li>Otherwise, if neither attribute is present, then the current value is
+ the lower of the one or two numbers that were found in the <code><a
+ href="#textcontent">textContent</a></code> of the element.
+
+ <li>If the maximum value is less than or equal to zero, then it is reset
+ to 1.
+
+ <li>If the current value is less than zero, then it is reset to zero.
+
+ <li>Finally, if the current value is greater than the maximum value, then
+ the current value is reset to the maximum value.
+ </ol>
+
+ <p><strong>UA requirements for showing the progress bar</strong>: When
+ representing a <code><a href="#progress">progress</a></code> element to
+ the user, the UA should indicate whether it is a determinate or
+ indeterminate progress bar, and in the former case, should indicate the
+ relative position of the current value relative to the maximum value.
+
+ <p>The <dfn id=max2 title=dom-progress-max><code>max</code></dfn> and <dfn
+ id=value4 title=dom-progress-value><code>value</code></dfn> DOM attributes
+ must reflect the elements' content attributes of the same name. When the
+ relevant content attributes are absent, the DOM attributes must return
+ zero. The value parsed from the <code><a
+ href="#textcontent">textContent</a></code> never affects the DOM values.
+
+ <p class=big-issue>Would be cool to have the <code
+ title=dom-progress-value><a href="#value4">value</a></code> DOM attribute
+ update the <code><a href="#textcontent">textContent</a></code> in-line...
+
+ <p>If the progress bar is an indeterminate progress bar, then the <dfn
+ id=position title=dom-progress-position><code>position</code></dfn> DOM
+ attribute must return -1. Otherwise, it must return the result of dividing
+ the current value by the maximum value.
+
+ <h4 id=the-code><span class=secno>3.12.13. </span>The <dfn
+ id=code><code>code</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-title><a href="#title">title</a></code>
+ attribute has special semantics on this element when used with the
+ <code><a href="#dfn">dfn</a></code> element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#code">code</a></code> element represents a fragment
+ of computer code. This could be an XML element name, a filename, a
+ computer program, or any other string that a computer would recognise.
+
+ <p class=note>See the <code><a href="#pre">pre</a></code> element for more
+ detais.
+
+ <div class=example>
+ <p>The following example shows how a block of code could be marked up
+ using the <code><a href="#pre">pre</a></code> and <code><a
+ href="#code">code</a></code> elements.</p>
+
+ <pre>&lt;pre>&lt;code>var i: Integer;
+begin
+ i := 1;
+end.&lt;/code>&lt;/pre></pre>
+ </div>
+
+ <h4 id=the-var><span class=secno>3.12.14. </span>The <dfn
+ id=var><code>var</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-title><a href="#title">title</a></code>
+ attribute has special semantics on this element when used with the
+ <code><a href="#dfn">dfn</a></code> element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#var">var</a></code> element represents a variable.
+ This could be an actual variable in a mathematical expression or
+ programming context, or it could just be a term used as a placeholder in
+ prose.
+
+ <div class=example>
+ <p>In the paragraph below, the letter "n" is being used as a variable in
+ prose:</p>
+
+ <pre>&lt;p>If there are &lt;var>n&lt;/var> pipes leading to the ice
+cream factory then I expect at &lt;em>least&lt;/em> &lt;var>n&lt;/var>
+flavours of ice cream to be available for purchase!&lt;/p></pre>
+ </div>
+
+ <h4 id=the-samp><span class=secno>3.12.15. </span>The <dfn
+ id=samp><code>samp</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-title><a href="#title">title</a></code>
+ attribute has special semantics on this element when used with the
+ <code><a href="#dfn">dfn</a></code> element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#samp">samp</a></code> element represents (sample)
+ output from a program or computing system.
+
+ <p class=note>See the <code><a href="#pre">pre</a></code> and <code><a
+ href="#kbd">kbd</a></code> elements for more detais.
+
+ <div class=example>
+ <p>This example shows the <code><a href="#samp">samp</a></code> element
+ being used inline:</p>
+
+ <pre>&lt;p>The computer said &lt;samp>Too much cheese in tray
+two&lt;/samp> but I didn't know what that meant.&lt;/p></pre>
+
+ <p>This second example shows a block of sample output. Nested <code><a
+ href="#samp">samp</a></code> and <code><a href="#kbd">kbd</a></code>
+ elements allow for the styling of specific elements of the sample output
+ using a style sheet.</p>
+ <!-- XXX should those nested SAMPs be SPANs? -->
+ <pre>&lt;pre>&lt;samp>&lt;samp class="prompt">jdoe@mowmow:~$&lt;/samp> &lt;kbd>ssh demo.example.com&lt;/kbd>
+Last login: Tue Apr 12 09:10:17 2005 from mowmow.example.com on pts/1
+Linux demo 2.6.10-grsec+gg3+e+fhs6b+nfs+gr0501+++p3+c4a+gr2b-reslog-v6.189 #1 SMP Tue Feb 1 11:22:36 PST 2005 i686 unknown
+
+&lt;samp class="prompt">jdoe@demo:~$&lt;/samp> &lt;samp class="cursor">_&lt;/samp>&lt;/samp>&lt;/pre></pre>
+ </div>
+
+ <h4 id=the-kbd><span class=secno>3.12.16. </span>The <dfn
+ id=kbd><code>kbd</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#kbd">kbd</a></code> element represents user input
+ (typically keyboard input, although it may also be used to represent other
+ input, such as voice commands).
+
+ <p>When the <code><a href="#kbd">kbd</a></code> element is nested inside a
+ <code><a href="#samp">samp</a></code> element, it represents the input as
+ it was echoed by the system.
+
+ <p>When the <code><a href="#kbd">kbd</a></code> element <em>contains</em> a
+ <code><a href="#samp">samp</a></code> element, it represents input based
+ on system output, for example invoking a menu item.
+
+ <p>When the <code><a href="#kbd">kbd</a></code> element is nested inside
+ another <code><a href="#kbd">kbd</a></code> element, it represents an
+ actual key or other single unit of input as appropriate for the input
+ mechanism.
+
+ <div class=example>
+ <p>Here the <code><a href="#kbd">kbd</a></code> element is used to
+ indicate keys to press:</p>
+
+ <pre>&lt;p>To make George eat an apple, press &lt;kbd>&lt;kbd>Shift&lt;/kbd>+&lt;kbd>F3&lt;/kbd>&lt;/kbd>&lt;/p></pre>
+
+ <p>In this second example, the user is told to pick a particular menu
+ item. The outer <code><a href="#kbd">kbd</a></code> element marks up a
+ block of input, with the inner <code><a href="#kbd">kbd</a></code>
+ elements representing each individual step of the input, and the <code><a
+ href="#samp">samp</a></code> elements inside them indicating that the
+ steps are input based on something being displayed by the system, in this
+ case menu labels:</p>
+
+ <pre>&lt;p>To make George eat an apple, select
+ &lt;kbd>&lt;kbd>&lt;samp>File&lt;/samp>&lt;/kbd>|&lt;kbd>&lt;samp>Eat Apple...&lt;/samp>&lt;/kbd>&lt;/kbd>
+&lt;/p></pre>
+ </div>
+
+ <h4 id=the-sup><span class=secno>3.12.17. </span>The <dfn
+ id=sup><code>sup</code></dfn> and <dfn id=sub><code>sub</code></dfn>
+ elements</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which these elements may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#sup">sup</a></code> element represents a superscript
+ and the <code><a href="#sub">sub</a></code> element represents a
+ subscript.
+
+ <p>These elements must only be used to mark up typographical conventions
+ with specific meanings, not for typographical presentation for
+ presentation's sake. For example, it would be inappropriate for the
+ <code><a href="#sup">sup</a></code> and <code><a
+ href="#sub">sub</a></code> elements to be used in the name of the LaTeX
+ document preparation system. In general, authors should not use these
+ elements if the <em>absence</em> of those elements would not change the
+ meaning of the content.
+
+ <p>When the <code><a href="#sub">sub</a></code> element is used inside a
+ <code><a href="#var">var</a></code> element, it represents the subscript
+ that identifies the variable in a family of variables.
+
+ <div class=example>
+ <pre>&lt;p>The coordinate of the &lt;var>i&lt;/var>th point is
+(&lt;var>x&lt;sub>&lt;var>i&lt;/var>&lt;/sub>&lt;/var>, &lt;var>y&lt;sub>&lt;var>i&lt;/var>&lt;/sub>&lt;/var>).
+For example, the 10th point has coordinate
+(&lt;var>x&lt;sub>10&lt;/sub>&lt;/var>, &lt;var>y&lt;sub>10&lt;/sub>&lt;/var>).&lt;/p></pre>
+ </div>
+
+ <p>In certain languages, superscripts are part of the typographical
+ conventions for some abbreviations.
+
+ <div class=example>
+ <pre>&lt;p>The most beautiful women are
+&lt;span lang="fr">&lt;abbr>M&lt;sup>lle&lt;/sup>&lt;/abbr> Gwendoline&lt;/span> and
+&lt;span lang="fr">&lt;abbr>M&lt;sup>me&lt;/sup>&lt;/abbr> Denise&lt;/span>.&lt;/p></pre>
+ </div>
+
+ <p>Mathematical expressions often use subscripts and superscripts.
+ <!--Authors are encouraged to use MathML for marking up mathematics,
+ but authors may opt to use <code>sub</code> and <code>sup</code> if
+ detailed mathematical markup is not desired. <a
+ href="#refsMathML">[MathML]</a>--></p>
+ <!-- XXX -->
+
+ <div class=example>
+ <pre>&lt;var>E&lt;/var>=&lt;var>m&lt;/var>&lt;var>c&lt;/var>&lt;sup>2&lt;/sup></pre>
+
+ <pre>f(&lt;var>x&lt;/var>, &lt;var>n&lt;/var>) = log&lt;sub>4&lt;/sub>&lt;var>x&lt;/var>&lt;sup>&lt;var>n&lt;/var>&lt;/sup></pre>
+ </div>
+
+ <h4 id=the-span><span class=secno>3.12.18. </span>The <dfn
+ id=span><code>span</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used in an element whose content model is only <a
+ href="#strictly">strictly inline-level content</a>: only <a
+ href="#strictly">strictly inline-level content</a>.
+
+ <dd>Otherwise: any <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-title><a href="#title">title</a></code>
+ attribute has special semantics on this element when used with the
+ <code><a href="#dfn">dfn</a></code> element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#span">span</a></code> element doesn't mean anything
+ on its own, but can be useful when used together with other attributes,
+ e.g. <code title=attr-class><a href="#class">class</a></code>, <code
+ title=attr-lang><a href="#lang">lang</a></code>, or <code
+ title=attr-dir><a href="#dir">dir</a></code>, or when used in conjunction
+ with the <code><a href="#dfn">dfn</a></code> element.</p>
+ <!-- XXX need examples -->
+
+ <h4 id=the-i><span class=secno>3.12.19. </span>The <dfn
+ id=i><code>i</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-title><a href="#title">title</a></code>
+ attribute has special semantics on this element when used with the
+ <code><a href="#dfn">dfn</a></code> element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#i">i</a></code> element represents a span of text in
+ an alternate voice or mood, or otherwise offset from the normal prose,
+ such as a taxonomic designation, a technical term, an idiomatic phrase
+ from another language, a thought, a ship name, or some other prose whose
+ typical typographic presentation is italicized.
+
+ <p>Terms in languages different from the main text should be annotated with
+ <code title=attr-lang><a href="#lang">lang</a></code> attributes (<code
+ title=attr-xml-lang><a href="#xmllang">xml:lang</a></code> in XML).
+
+ <div class=example>
+ <p>The examples below show uses of the <code><a href="#i">i</a></code>
+ element:</p>
+
+ <pre>&lt;p>The &lt;i>felis silvestris catus&lt;/i> is cute.&lt;/p>
+&lt;p>The &lt;i>block-level elements&lt;/i> are defined above.&lt;/p>
+&lt;p>There is a certain &lt;i lang="fr">je ne sais quoi&lt;/i> in the air.&lt;/p></pre>
+
+ <p>In the following example, a dream sequence is marked up using <code><a
+ href="#i">i</a></code> elements.</p>
+
+ <pre>&lt;p>Raymond tried to sleep.&lt;/p>
+&lt;p>&lt;i>The ship sailed away on Thursday&lt;/i>, he
+dreamt. &lt;i>The ship had many people aboard, including a beautiful
+princess called Carey. He watched her, day-in, day-out, hoping she
+would notice him, but she never did.&lt;/i>&lt;/p>
+&lt;p>&lt;i>Finally one night he picked up the courage to speak with
+her&mdash;&lt;/i>&lt;/p>
+&lt;p>Raymond woke with a start as the fire alarm rang out.&lt;/p></pre>
+ </div>
+
+ <p>The <code><a href="#i">i</a></code> element should be used as a last
+ resort when no other element is more appropriate. In particular, citations
+ should use the <code><a href="#cite2">cite</a></code> element, defining
+ instances of terms should use the <code><a href="#dfn">dfn</a></code>
+ element, stress emphasis should use the <code><a href="#em">em</a></code>
+ element, importance should be denoted with the <code><a
+ href="#strong">strong</a></code> element, quotes should be marked up with
+ the <code><a href="#q">q</a></code> element, and small print should use
+ the <code><a href="#small">small</a></code> element.
+
+ <p class=note>Style sheets can be used to format <code><a
+ href="#i">i</a></code> elements, just like any other element can be
+ restyled. Thus, it is not the case that content in <code><a
+ href="#i">i</a></code> elements will necessarily be italicised.
+
+ <h4 id=the-b><span class=secno>3.12.20. </span>The <dfn
+ id=b><code>b</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#b">b</a></code> element represents a span of text to
+ be stylistically offset from the normal prose without conveying any extra
+ importance, such as key words in a document abstract, product names in a
+ review, or other spans of text whose typical typographic presentation is
+ boldened.
+
+ <div class=example>
+ <p>The following example shows a use of the <code><a
+ href="#b">b</a></code> element to highlight key words without marking
+ them up as important:</p>
+
+ <pre>&lt;p>The &lt;b>frobonitor&lt;/b> and &lt;b>barbinator&lt;/b> components are fried.&lt;/p></pre>
+
+ <p>The following would be <em>incorrect</em> usage:</p>
+
+ <pre>&lt;p>&lt;b>WARNING!&lt;/b> Do not frob the barbinator!&lt;/p></pre>
+
+ <p>In the previous example, the correct element to use would have been
+ <code><a href="#strong">strong</a></code>, not <code><a
+ href="#b">b</a></code>.</p>
+
+ <p>In the following example, objects in a text adventure are highlighted
+ as being special by use of the <code><a href="#b">b</a></code> element.</p>
+
+ <pre>&lt;p>You enter a small room. Your &lt;b>sword&lt;/b> glows
+brighter. A &lt;b>rat&lt;/b> scurries past the corner wall.&lt;/p></pre>
+ </div>
+
+ <p>The <code><a href="#b">b</a></code> element should be used as a last
+ resort when no other element is more appropriate. In particular, headers
+ should use the <code><a href="#h1">h1</a></code> to <code><a
+ href="#h6">h6</a></code> elements, stress emphasis should use the <code><a
+ href="#em">em</a></code> element, importance should be denoted with the
+ <code><a href="#strong">strong</a></code> element, and text marked or
+ highlighted should use the <code><a href="#m">m</a></code> element.
+
+ <p class=note>Style sheets can be used to format <code><a
+ href="#b">b</a></code> elements, just like any other element can be
+ restyled. Thus, it is not the case that content in <code><a
+ href="#b">b</a></code> elements will necessarily be boldened.
+
+ <h4 id=the-bdo><span class=secno>3.12.21. </span>The <dfn
+ id=bdo><code>bdo</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#strictly">Strictly inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None, but the <code title=attr-dir><a href="#dir">dir</a></code>
+ global attribute is required on this element.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#bdo">bdo</a></code> element allows authors to
+ override the Unicode bidi algorithm by explicitly specifying a direction
+ override. <a href="#refsBIDI">[BIDI]</a>
+
+ <p>Authors must specify the <code title=attr-dir><a
+ href="#dir">dir</a></code> attribute on this element, with the value
+ <code>ltr</code> to specify a left-to-right override and with the value
+ <code>rtl</code> to specify a right-to-left override.
+
+ <p>If the element has the <code title=attr-dir><a
+ href="#dir">dir</a></code> attribute set to the exact value
+ <code>ltr</code>, then for the purposes of the bidi algorithm, the user
+ agent must act as if there was a U+202D LEFT-TO-RIGHT OVERRIDE character
+ at the start of the element, and a U+202C POP DIRECTIONAL FORMATTING at
+ the end of the element.
+
+ <p>If the element has the <code title=attr-dir><a
+ href="#dir">dir</a></code> attribute set to the exact value
+ <code>rtl</code>, then for the purposes of the bidi algorithm, the user
+ agent must act as if there was a U+202E RIGHT-TO-LEFT OVERRIDE character
+ at the start of the element, and a U+202C POP DIRECTIONAL FORMATTING at
+ the end of the element.
+
+ <p>The requirements on handling the <code><a href="#bdo">bdo</a></code>
+ element for the bidi algorithm may be implemented indirectly through the
+ style layer. For example, an HTML+CSS user agent should implement these
+ requirements by implementing the CSS <code>unicode-bidi</code> property.
+ <a href="#refsCSS21">[CSS21]</a></p>
+ <!-- XXX need examples -->
+
+ <h3 id=edits><span class=secno>3.13. </span>Edits</h3>
+
+ <p>The <code><a href="#ins">ins</a></code> and <code><a
+ href="#del">del</a></code> elements represent edits to the document.
+
+ <h4 id=the-ins><span class=secno>3.13.1. </span>The <dfn
+ id=ins><code>ins</code></dfn> element</h4>
+
+ <p><a href="#transparent0">Transparent</a> <a href="#block-level0"
+ title="block-level elements">block-level element</a>, and <a
+ href="#transparent0">transparent</a> <a href="#strictly">strictly
+ inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> is expected.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#transparent0">Transparent</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-mod-cite><a href="#cite3">cite</a></code>
+
+ <dd><code title=attr-mod-datetime><a href="#datetime1">datetime</a></code>
+
+ <dt>DOM interface:
+
+ <dd>Uses the <code><a href="#htmlmodelement">HTMLModElement</a></code>
+ interface.
+ </dl>
+
+ <p>The <code><a href="#ins">ins</a></code> element represents an addition
+ to the document.
+
+ <p>The <code><a href="#ins">ins</a></code> element must be used only where
+ <a href="#block-level0">block-level elements</a> or <a
+ href="#strictly">strictly inline-level content</a> can be used.
+
+ <p>An <code><a href="#ins">ins</a></code> element can only contain content
+ that would still be conformant if all elements with <a
+ href="#transparent0">transparent</a> content models were replaced by their
+ contents.
+
+ <div class=example>
+ <p>The following would be syntactically legal:</p>
+
+ <pre>&lt;aside>
+ &lt;ins>
+ &lt;p>...&lt;/p>
+ &lt;/ins>
+&lt;/aside></pre>
+
+ <p>As would this:</p>
+
+ <pre>&lt;aside>
+ &lt;ins>
+ &lt;em>...&lt;/em>
+ &lt;/ins>
+&lt;/aside></pre>
+
+ <p>However, this last example would be illegal, as <code><a
+ href="#em">em</a></code> and <code><a href="#p">p</a></code> cannot both
+ be used inside an <code><a href="#aside">aside</a></code> element at the
+ same time:</p>
+
+ <pre>&lt;aside>
+ &lt;ins>
+ &lt;p>...&lt;/p>
+ &lt;/ins>
+ &lt;ins>
+ &lt;em>...&lt;/em>
+ &lt;/ins>
+&lt;/aside></pre>
+ </div>
+
+ <h4 id=the-del><span class=secno>3.13.2. </span>The <dfn
+ id=del><code>del</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, and <a href="#strictly">strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> is expected.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When the element has a parent: same content model as the parent
+ element (without taking into account the other children of the parent
+ element).
+
+ <dd>Otherwise: zero or more <a href="#block-level0">block-level
+ elements</a>, or <a href="#inline-level0">inline-level content</a> (but
+ not both).
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-mod-cite><a href="#cite3">cite</a></code>
+
+ <dd><code title=attr-mod-datetime><a href="#datetime1">datetime</a></code>
+
+ <dt>DOM interface:
+
+ <dd>Uses the <code><a href="#htmlmodelement">HTMLModElement</a></code>
+ interface.
+ </dl>
+
+ <p>The <code><a href="#del">del</a></code> element represents a removal
+ from the document.
+
+ <p>The <code><a href="#del">del</a></code> element must only contain
+ content that would be allowed inside the parent element (regardless of
+ what the parent element actually contains).
+
+ <div class=example>
+ <p>The following would be syntactically legal:</p>
+
+ <pre>&lt;aside>
+ &lt;del>
+ &lt;p>...&lt;/p>
+ &lt;/del>
+ &lt;ins>
+ &lt;em>...&lt;/em>
+ &lt;/ins>
+&lt;/aside></pre>
+
+ <p>...even though the <code><a href="#p">p</a></code> and <code><a
+ href="#em">em</a></code> elements would never be allowed side by side in
+ the <code><a href="#aside">aside</a></code> element. This is allowed
+ because the <code><a href="#del">del</a></code> element represents
+ content that was removed, and it is quite possible that an edit could
+ cause an element to go from being an inline-level container to a
+ block-level container, or vice-versa.</p>
+ </div>
+
+ <h4 id=attributes><span class=secno>3.13.3. </span>Attributes common to
+ <code><a href="#ins">ins</a></code> and <code><a
+ href="#del">del</a></code> elements</h4>
+
+ <p>The <dfn id=cite3 title=attr-mod-cite><code>cite</code></dfn> attribute
+ may be used to specify a URI that explains the change. When that document
+ is long, for instance the minutes of a meeting, authors are encouraged to
+ include a fragment identifier pointing to the specific part of that
+ document that discusses the change.
+
+ <p>If the <code title=attr-mod-cite><a href="#cite3">cite</a></code>
+ attribute is present, it must be a URI (or IRI) that explains the change.
+ User agents should allow users to follow such citation links.
+
+ <p>The <dfn id=datetime1
+ title=attr-mod-datetime><code>datetime</code></dfn> attribute may be used
+ to specify the time and date of the change.
+
+ <p>If present, the <code title=attr-mod-datetime><a
+ href="#datetime1">datetime</a></code> attribute must be a <a
+ href="#valid5">valid datetime</a> value.
+
+ <p>User agents must parse the <code title=attr-mod-datetime><a
+ href="#datetime1">datetime</a></code> attribute according to the <a
+ href="#datetime-parser">parse a string as a datetime value</a> algorithm.
+ If that doesn't return a time, then the modification has no associated
+ timestamp (the value is non-conforming; it is not a <a
+ href="#valid5">valid datetime</a>). Otherwise, the modification is marked
+ as having been made at the given datetime. User agents should use the
+ associated timezone information to determine which timezone to present the
+ given datetime in.
+
+ <p>The <code><a href="#ins">ins</a></code> and <code><a
+ href="#del">del</a></code> elements must implement the <code><a
+ href="#htmlmodelement">HTMLModElement</a></code> interface:
+
+ <pre
+ class=idl>interface <dfn id=htmlmodelement>HTMLModElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#cite4" title=dom-mod-cite>cite</a>;
+ attribute DOMString <a href="#datetime2" title=dom-mod-datetime>dateTime</a>;
+};</pre>
+
+ <p>The <dfn id=cite4 title=dom-mod-cite><code>cite</code></dfn> DOM
+ attribute must reflect the element's ><code title=attr-mod-cite><a
+ href="#cite3">cite</a></code> content attribute. The <dfn id=datetime2
+ title=dom-mod-datetime><code>dateTime</code></dfn> DOM attribute must
+ reflect the element's <code title="">datetime</code> content attribute.
+
+ <h3 id=embedded><span class=secno>3.14. </span>Embedded content</h3>
+
+ <h4 id=the-figure><span class=secno>3.14.1. </span>The <dfn
+ id=figure><code>figure</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>In any order, exactly one <code><a href="#legend">legend</a></code>
+ element, and exactly one <a href="#embedded0">embedded content</a>
+ element.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#figure">figure</a></code> element represents a <a
+ href="#paragraph">paragraph</a> consisting of embedded content and a
+ caption.
+
+ <p>The first <a href="#embedded0">embedded content</a> element child of the
+ <code><a href="#figure">figure</a></code> element, if any, is the
+ paragraph's content.
+
+ <p>The first <code><a href="#legend">legend</a></code> element child of the
+ element, if any, represents the caption of the embedded content. If there
+ is no child <code><a href="#legend">legend</a></code> element, then there
+ is no caption.
+
+ <p>If the embedded content cannot be used, then, for the purposes of
+ establishing what the <code><a href="#figure">figure</a></code> element
+ represents:
+
+ <dl class=switch>
+ <dt>If the embedded content's <a href="#fallback">fallback content</a> is
+ a single <a href="#embedded0">embedded content</a> element
+
+ <dd>The <code><a href="#figure">figure</a></code> element must be treated
+ as if that <a href="#embedded0">embedded content</a> element was the
+ <code><a href="#figure">figure</a></code> element's embedded content. (If
+ that embedded content can't be used either, then this processing must be
+ done again, with the new embedded content's <a href="#fallback">fallback
+ content</a>.)
+
+ <dt>If the embedded content's fallback is nothing
+
+ <dd>The entire <code><a href="#figure">figure</a></code> element
+ (including the caption, if any) must be ignored.
+
+ <dt>If the embedded content's fallback is <a
+ href="#inline-level0">inline-level content</a>
+
+ <dd>The entire <code><a href="#figure">figure</a></code> element
+ (including the caption, if any) must be treated as being a single <a
+ href="#paragraph">paragraph</a> with that <a
+ href="#inline-level0">inline-level content</a> as its content.
+
+ <dt>Otherwise
+
+ <dd>The entire <code><a href="#figure">figure</a></code> element
+ (including the caption, if any) must be treated as being replaced by that
+ fallback content.
+ </dl>
+
+ <h4 id=the-img><span class=secno>3.14.2. </span>The <dfn
+ id=img><code>img</code></dfn> element</h4>
+
+ <p><a href="#strictly" title="Strictly inline-level content">Strictly
+ inline-level</a> <a href="#embedded0">embedded content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the only <a href="#embedded0">embedded content</a> child of a
+ <code><a href="#figure">figure</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-img-alt><a href="#alt">alt</a></code> (required)
+
+ <dd><code title=attr-img-src><a href="#src">src</a></code> (required)
+
+ <dd><code title=attr-hyperlink-usemap><a href="#usemap1">usemap</a></code>
+
+ <dd><code title=attr-img-ismap><a href="#ismap">ismap</a></code> (but only
+ if one of the ancestor elements is an <code><a href="#a">a</a></code>
+ element)
+
+ <dd><code title=attr-img-height><a href="#height">height</a></code>
+
+ <dd><code title=attr-img-width><a href="#width">width</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlimageelement>HTMLImageElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#alt0" title=dom-img-alt>alt</a>;
+ attribute DOMString <a href="#src0" title=dom-img-src>src</a>;
+ attribute DOMString <a href="#usemap" title=dom-img-useMap>useMap</a>;
+ attribute boolean <a href="#ismap0" title=dom-img-isMap>isMap</a>;
+ attribute long <a href="#height0" title=dom-img-height>height</a>;
+ attribute long <a href="#width0" title=dom-img-width>width</a>;
+ readonly attribute boolean <a href="#complete" title=dom-img-complete>complete</a>;
+};</pre>
+
+ <p class=note>An instance of <code><a
+ href="#htmlimageelement">HTMLImageElement</a></code> can be obtained
+ using the <code title=dom-image><a href="#image0">Image</a></code>
+ constructor.</p>
+ </dl>
+
+ <p>The <code><a href="#img">img</a></code> element represents a piece of
+ text with an alternate graphical representation. The text is given by the
+ <dfn id=alt title=attr-img-alt><code>alt</code></dfn> attribute, which
+ must be present, and the URI to the graphical representation of that text
+ is given in the <dfn id=src title=attr-img-src><code>src</code></dfn>
+ attribute, which must also be present.
+
+ <p>The image given by the <code title=attr-img-src><a
+ href="#src">src</a></code> attribute is the embedded content, and the
+ value of the <code title=attr-img-alt><a href="#alt">alt</a></code>
+ attribute is the <code><a href="#img">img</a></code> element's <a
+ href="#fallback">fallback content</a>.
+
+ <p>When the <code title=attr-img-alt><a href="#alt">alt</a></code>
+ attribute's value is the empty string, the image supplements the
+ surrounding content. In such cases, the image could be omitted without
+ affecting the meaning of the document.
+
+ <p>If the <code title=attr-img-alt><a href="#alt">alt</a></code> attribute
+ is omitted, user agents must treat the element as if it had an <code
+ title=attr-img-alt><a href="#alt">alt</a></code> attribute set to the
+ empty string.
+
+ <p>The <code title=attr-img-alt><a href="#alt">alt</a></code> attribute
+ does not represent advisory information. User agents must not present the
+ contents of the <code title=attr-img-alt><a href="#alt">alt</a></code>
+ attribute in the same way as content of the <code title=attr-title><a
+ href="#title">title</a></code> attribute.
+
+ <p class=big-issue>Guidelines on writing "alt" text here.</p>
+ <!-- http://www.cs.tut.fi/~jkorpela/html/alt.html -->
+
+ <p>The <code title=attr-img-src><a href="#src">src</a></code> attribute
+ must contain a URI (or IRI). If the <code title=attr-img-src><a
+ href="#src">src</a></code> attribute is omitted, there is no alternative
+ image representation.
+
+ <p>When the <code title=attr-img-src><a href="#src">src</a></code>
+ attribute is set, the user agent must immediately begin to download the
+ specified
+ resource<!-- XXX xref what fetching means, how to resolve URIs in
+ attributes (including those not in the DOM) -->,
+ unless the user agent cannot support images, or its support for images has
+ been disabled.
+
+ <p>The download of the image must <a href="#delays">delay the <code
+ title=event-load>load</code> event</a>.
+
+ <p>Once the download has completed, if the image is a valid image, the user
+ agent must <a href="#firing4">fire a <code title=event-load>load</code>
+ event</a> on the <code><a href="#img">img</a></code> element. If the
+ download fails or it completes but the image is not a valid or supported
+ image, the user agent must <a href="#firing5">fire an <code
+ title=event-error>error</code> event</a> on the <code><a
+ href="#img">img</a></code> element.
+
+ <p>The remote server's response metadata (e.g. an HTTP 404 status code, or
+ <a href="#content-type8" title=Content-Type>associated Content-Type
+ headers</a>) must be ignored when determining whether the resource
+ obtained is a valid image or not.
+
+ <p class=note>This allows servers to return images with error responses.
+
+ <p>User agents must not support non-image resources with the <code><a
+ href="#img">img</a></code> element.
+
+ <p>The <code title=attr-hyperlink-usemap><a
+ href="#usemap1">usemap</a></code> attribute, if present, can indicate that
+ the image has an associated <a href="#image">image map</a>.
+
+ <p>The <dfn id=ismap title=attr-img-ismap><code>ismap</code></dfn>
+ attribute, when used on an element that is a descendant of an <code><a
+ href="#a">a</a></code> element with an <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attribute, indicates by its presence that
+ the element provides access to a server-side image map. This affects how
+ events are handled on the corresponding <code><a href="#a">a</a></code>
+ element.
+
+ <p>The <code title=attr-img-ismap><a href="#ismap">ismap</a></code>
+ attribute is a <a href="#boolean0">boolean attribute</a>. The attribute
+ must not be specified on an element that does not have an ancestor
+ <code><a href="#a">a</a></code> element.
+
+ <p>The <dfn id=height title=attr-img-height><code>height</code></dfn> and
+ <dfn id=width title=attr-img-width><code>width</code></dfn> attributes
+ give the preferred rendered dimensions of the image if the image is to be
+ shown in a visual medium.
+
+ <p class=big-issue>Should we require the dimensions to be correct? Should
+ we disallow percentages?
+
+ <p>The values of the <code title=attr-img-height><a
+ href="#height">height</a></code> and <code title=attr-img-width><a
+ href="#width">width</a></code> attributes must be either <a href="#valid"
+ title="valid non-negative integer">valid non-negative integers</a> or <a
+ href="#valid3" title="valid non-negative percentage">valid non-negative
+ percentages</a>.
+
+ <p>To parse the attributes, user agents must use the <a
+ href="#rules2">rules for parsing dimension values</a>. This will return
+ either an integer length, a percentage value, or nothing. When one of
+ these attributes has no value, it must be
+ <span>ignored</span><!-- XXX xref -->.
+
+ <p>The user agent requirements for processing the values obtained from
+ parsing these attributes are described <a href="#sizing" title="sizing of
+ embedded content">in the rendering section</a><!-- XXX xref
+ -->.
+
+ <p>The <code><a href="#img">img</a></code> element must be empty.</p>
+ <!-- contents
+ should be ignored for rendering but not for semantics,
+ e.g. <script>, <input>, etc. -->
+
+ <p>The DOM attributes <dfn id=alt0
+ title=dom-img-alt><code>alt</code></dfn>, <dfn id=src0
+ title=dom-img-src><code>src</code></dfn>, <dfn id=usemap
+ title=dom-img-useMap><code>useMap</code></dfn>, and <dfn id=ismap0
+ title=dom-img-isMap><code>isMap</code></dfn> each must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <p>The DOM attributes <dfn id=height0
+ title=dom-img-height><code>height</code></dfn> and <dfn id=width0
+ title=dom-img-width><code>width</code></dfn> must return the rendered
+ height and width of the image, in CSS pixels, if the image is being
+ rendered, and is being rendered to a visual medium, or 0 otherwise. <a
+ href="#refsCSS21">[CSS21]</a>
+
+ <p>The DOM attribute <dfn id=complete
+ title=dom-img-complete><code>complete</code></dfn> must return true if the
+ user agent has downloaded the image specified in the <code
+ title=attr-img-src><a href="#src">src</a></code> attribute, and it is a
+ valid image, and false otherwise.
+
+ <h4 id=the-iframe><span class=secno>3.14.3. </span>The <dfn
+ id=iframe><code>iframe</code></dfn> element</h4>
+
+ <p><a href="#strictly" title="Strictly inline-level content">Strictly
+ inline-level</a> <a href="#embedded0">embedded content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the only <a href="#embedded0">embedded content</a> child of a
+ <code><a href="#figure">figure</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Text (for details, see prose).
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-iframe-src><a href="#src1">src</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmliframeelement>HTMLIFrameElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#src2" title=dom-iframe-src>src</a>;<!--
+ readonly attribute Document <span title="dom-iframe-contentDocument">contentDocument</span>;
+ readonly attribute <span>Window</span> <span title="dom-iframe-contentWindow">contentWindow</span>;-->
+};</pre>
+
+ <p>Objects implementing the <code><a
+ href="#htmliframeelement">HTMLIFrameElement</a></code> interface must
+ also implement the <code>EmbeddingElement</code> interface defined in
+ the Window Object specification. <a href="#refsWINDOW">[WINDOW]</a></p>
+ <!-- XXX -->
+ </dl>
+
+ <p>The <code><a href="#iframe">iframe</a></code> element introduces a new
+ nested <a href="#browsing0">browsing context</a>.
+
+ <p>The <dfn id=src1 title=attr-iframe-src><code>src</code></dfn> attribute,
+ if present, must be a URI (or IRI) to a page that the nested <a
+ href="#browsing0">browsing context</a> is to contain. When the browsing
+ context is created, if the attribute is present, the user agent must <a
+ href="#navigate">navigate</a> this browsing context to the given URI, with
+ <a href="#replacement">replacement enabled</a>. If the user <a
+ href="#navigate" title=navigate>navigates</a> away from this page, the
+ <code><a href="#iframe">iframe</a></code>'s corresponding <code><a
+ href="#window">Window</a></code> object will reference new
+ <code>Document</code> objects, but the <code title=attr-iframe-src><a
+ href="#src1">src</a></code> attribute will not change.
+
+ <p>Whenever the <code title=attr-iframe-src><a href="#src1">src</a></code>
+ attribute is set, the nested <a href="#browsing0">browsing context</a>
+ must be <a href="#navigate" title=navigate>navigated</a> to the given URI.
+
+ <p>If the <code title=attr-iframe-src><a href="#src1">src</a></code>
+ attribute is not set when the element is created, the browsing context
+ will remain at the initial <code>about:blank</code> page.
+
+ <p>When content loads in an <code><a href="#iframe">iframe</a></code>,
+ after any <code title=event-load><a href="#load0">load</a></code> events
+ are fired within the content itself, the user agent must <a
+ href="#firing4">fire a <code title=event-load>load</code> event</a> at the
+ <code><a href="#iframe">iframe</a></code> element. When content fails to
+ load (e.g. due to a network error), then the user agent must <a
+ href="#firing5">fire an <code title=event-error>error</code> event</a> at
+ the element instead.
+
+ <p>When there is an active parser in the <code><a
+ href="#iframe">iframe</a></code>, and when anything in the <code><a
+ href="#iframe">iframe</a></code> that is <a href="#delays" title="delay
+ the load event">delaying the <code title=event-load>load</code> event</a>
+ in the <code><a href="#iframe">iframe</a></code>'s <a
+ href="#browsing0">browsing context</a>, the <code><a
+ href="#iframe">iframe</a></code> must <a href="#delays">delay the <code
+ title=event-load>load</code> event</a>.
+
+ <p class=note>If, during the handling of the <code title=event-load><a
+ href="#load0">load</a></code> event, the <a href="#browsing0">browsing
+ context</a> in the <code><a href="#iframe">iframe</a></code> is again <a
+ href="#navigate" title=navigate>navigated</a>, that will further <a
+ href="#delays">delay the <code title=event-load>load</code> event</a>.
+
+ <p>An <code><a href="#iframe">iframe</a></code> element never has <a
+ href="#fallback">fallback content</a>, as it will always create a nested
+ <a href="#browsing0">browsing context</a>, regardless of whether the
+ specified initial contents are successfully used.
+
+ <p><code><a href="#iframe">iframe</a></code> elements may contain any text.
+ <code><a href="#iframe">iframe</a></code> elements must not contain
+ element nodes. Descendants of <code><a href="#iframe">iframe</a></code>
+ elements represent nothing. (In legacy user agents that do not support
+ <code><a href="#iframe">iframe</a></code> elements, the contents would be
+ parsed as markup that could act as fallback content.)
+
+ <p class=big-issue>restrictions for what that text must be?
+
+ <p class=note>The <a href="#html-0">HTML parser</a> treats markup inside
+ <code><a href="#iframe">iframe</a></code> elements as text.
+
+ <p>The DOM attribute <dfn id=src2
+ title=dom-iframe-src><code>src</code></dfn> must <a
+ href="#reflect">reflect</a> the content attribute of the same name.
+
+ <h4 id=the-embed><span class=secno>3.14.4. </span>The <dfn
+ id=embed><code>embed</code></dfn> element</h4>
+
+ <p><a href="#strictly" title="Strictly inline-level content">Strictly
+ inline-level</a> <a href="#embedded0">embedded content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the only <a href="#embedded0">embedded content</a> child of a
+ <code><a href="#figure">figure</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-embed-src><a href="#src3">src</a></code> (required)
+
+ <dd><code title=attr-embed-type><a href="#type4">type</a></code>
+
+ <dd><code title=attr-embed-height>height</code>
+
+ <dd><code title=attr-embed-width>width</code>
+
+ <dd>Any other attribute that has no namespace (see prose).
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlembedelement>HTMLEmbedElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#src4" title=dom-embed-src>src</a>;
+ attribute DOMString <a href="#type5" title=dom-embed-type>type</a>;
+ attribute long <a href="#height1" title=dom-embed-height>height</a>;
+ attribute long <a href="#width1" title=dom-embed-width>width</a>;
+};</pre>
+
+ <p>Depending on the type of content instantiated by the <code><a
+ href="#embed">embed</a></code> element, the node may also support other
+ interfaces.</p>
+ </dl>
+
+ <p>The <code><a href="#embed">embed</a></code> element represents an
+ integration point for an external (typically non-HTML) application or
+ interactive content.
+
+ <p>The <dfn id=src3 title=attr-embed-src><code>src</code></dfn> attribute
+ gives the address of the resource being embedded. The attribute must be
+ present and contain a URI (or IRI).
+
+ <p>If the <code title=attr-embed-src><a href="#src3">src</a></code>
+ attribute is missing, then the <code><a href="#embed">embed</a></code>
+ element must be ignored.
+
+ <p>When the <code title=attr-embed-src><a href="#src3">src</a></code>
+ attribute is set, user agents are expected to find an appropriate handler
+ for the specified resource, based on the <a href="#type-of"
+ title=concept-embed-type>content's type</a>, and hand that handler the
+ content of the resource. If the handler supports a scriptable interface,
+ the <code><a href="#htmlembedelement">HTMLEmbedElement</a></code> object
+ representing the element should expose that interfaces.
+
+ <p>The download of the resource must <a href="#delays">delay the <code
+ title=event-load>load</code> event</a>.
+
+ <p>The user agent should pass the names and values of all the attributes of
+ the <code><a href="#embed">embed</a></code> element that have no namespace
+ to the handler used. Any (namespace-less) attribute may be specified on
+ the <code><a href="#embed">embed</a></code> element.</p>
+ <!-- duplicates what's in <object> section below -->
+
+ <p class=note>This specification does not define a mechanism for
+ interacting with third-party handlers, as it is expected to be
+ user-agent-specific. Some UAs might opt to support a plugin mechanism such
+ as the Netscape Plugin API; others may use remote content convertors or
+ have built-in support for certain types. <a href="#refsNPAPI">[NPAPI]</a>
+
+ <p>The <code><a href="#embed">embed</a></code> element has no <a
+ href="#fallback">fallback content</a>. If the user agent can't display the
+ specified resource, e.g. because the given type is not supported, then the
+ user agent must use a default handler for the content. (This default could
+ be as simple as saying "Unsupported Format", of course.)
+
+ <p>The <dfn id=type4 title=attr-embed-type><code>type</code></dfn>
+ attribute, if present, gives the MIME type of the linked resource. The
+ value must be a valid MIME type, optionally with parameters. <a
+ href="#refsRFC2046">[RFC2046]</a>
+
+ <p>The <dfn id=type-of title=concept-embed-type>type of the content</dfn>
+ being embedded is defined as follows:
+
+ <ol>
+ <li>If the element has a <code title=attr-embed-type><a
+ href="#type4">type</a></code> attribute, then the value of the <code
+ title=attr-embed-type><a href="#type4">type</a></code> attribute is the
+ <span>content's type</span>.
+
+ <li>Otherwise, if the specified resource has <a href="#content-type8"
+ title=Content-Type>explicit Content-Type metadata</a>, then that is the
+ <span>content's type</span>.
+
+ <li>Otherwise, the content has no type and there can be no appropriate
+ handler for it.
+ </ol>
+
+ <p class=big-issue>Should we instead say that the content-sniffing that
+ we're going to define for top-level browsing contexts should apply here?
+
+ <p class=big-issue>Should we require the type attribute to match the server
+ information?
+
+ <p class=big-issue>We should say that 404s, etc, don't affect whether the
+ resource is used or not. Not sure how to say it here though.
+
+ <p>Browsers should take extreme care when interacting with external content
+ intended for third-party renderers. When third-party software is run with
+ the same privileges as the user agent itself, vulnerabilities in the
+ third-party software become as dangerous as those in the user agent.
+
+ <p class=big-issue>height/width
+
+ <p>The DOM attributes <dfn id=src4
+ title=dom-embed-src><code>src</code></dfn> and <dfn id=type5
+ title=dom-embed-type><code>type</code></dfn> each must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <p>The DOM attributes <dfn id=height1
+ title=dom-embed-height><code>height</code></dfn> and <dfn id=width1
+ title=dom-embed-width><code>width</code></dfn> must return the rendered
+ height and width of the image, in CSS pixels, if the image is being
+ rendered, and is being rendered to a visual medium, or 0 otherwise. <a
+ href="#refsCSS21">[CSS21]</a>
+
+ <h4 id=the-object><span class=secno>3.14.5. </span>The <dfn
+ id=object><code>object</code></dfn> element</h4>
+
+ <p><a href="#strictly" title="Strictly inline-level content">Strictly
+ inline-level</a> <a href="#embedded0">embedded content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the only <a href="#embedded0">embedded content</a> child of a
+ <code><a href="#figure">figure</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>When used as the child of a <code><a href="#figure">figure</a></code>
+ element, or, when used as a <em><code><a href="#figure">figure</a></code>
+ fallback <code><a href="#object">object</a></code></em>: Zero or more
+ <code><a href="#param">param</a></code> elements, followed by either zero
+ or more <a href="#block-level0">block-level elements</a> or a single
+ <code><a href="#object">object</a></code> element, which is then
+ considered to be a <em><code><a href="#figure">figure</a></code> fallback
+ <code><a href="#object">object</a></code></em>.
+
+ <dd>Otherwise: Zero or more <code><a href="#param">param</a></code>
+ elements, followed by <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-object-data><a href="#data">data</a></code> (required
+ if <code title=attr-object-type><a href="#type6">type</a></code> is not
+ given)
+
+ <dd><code title=attr-object-type><a href="#type6">type</a></code>
+ (required if <code title=attr-object-data><a href="#data">data</a></code>
+ is not given)
+
+ <dd><code title=attr-hyperlink-usemap><a href="#usemap1">usemap</a></code>
+
+ <dd><code title=attr-object-height>height</code>
+
+ <dd><code title=attr-object-width>width</code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlobjectelement>HTMLObjectElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#data0" title=dom-object-data>data</a>;
+ attribute DOMString <a href="#type7" title=dom-object-type>type</a>;
+ attribute DOMString <a href="#usemap0" title=dom-object-useMap>useMap</a>;
+ attribute long <a href="#height2" title=dom-object-height>height</a>;
+ attribute long <a href="#width2" title=dom-object-width>width</a>;<!--
+ readonly attribute Document <span title="dom-object-contentDocument">contentDocument</span>;
+ readonly attribute <span>Window</span> <span title="dom-object-contentWindow">contentWindow</span>;-->
+};</pre>
+
+ <p>Objects implementing the <code><a
+ href="#htmlobjectelement">HTMLObjectElement</a></code> interface must
+ also implement the <code>EmbeddingElement</code> interface defined in
+ the Window Object specification. <a href="#refsWINDOW">[WINDOW]</a></p>
+
+ <p>Depending on the type of content instantiated by the <code><a
+ href="#object">object</a></code> element, the node may also support
+ other interfaces.</p>
+ </dl>
+
+ <p class=big-issue>Shouldn't allow inline-level content to be the content
+ model when the parent's content model is strictly inline only.
+
+ <p>The <code><a href="#object">object</a></code> element can represent an
+ external resource, which, depending on the type of the resource, will
+ either be treated as an image, as a nested <a href="#browsing0">browsing
+ context</a>, or as an external resource to be processed by a third-party
+ software package.
+
+ <p>The <dfn id=data title=attr-object-data><code>data</code></dfn>
+ attribute, if present, specifies the address of the resource. If present,
+ the attribute must be a URI (or IRI).
+
+ <p>The <dfn id=type6 title=attr-object-type><code>type</code></dfn>
+ attribute, if present, specifies the type of the resource. If present, the
+ attribute must be a valid MIME type, optionally with parameters. <a
+ href="#refsRFC2046">[RFC2046]</a>
+
+ <p>One or both of the <code title=attr-object-data><a
+ href="#data">data</a></code> and <code title=attr-object-type><a
+ href="#type6">type</a></code> attributes must be present.
+
+ <p>Whenever the <code title=attr-object-data><a
+ href="#data">data</a></code> attribute changes, or, if the <code
+ title=attr-object-data><a href="#data">data</a></code> attribute is not
+ present, whenever the <code title=attr-object-type><a
+ href="#type6">type</a></code> attribute changes, the user agent must
+ follow the following steps to determine what the <code><a
+ href="#object">object</a></code> element represents:
+
+ <ol>
+ <li>
+ <p>If the <code title=attr-object-data><a href="#data">data</a></code>
+ attribute is present, then:</p>
+
+ <ol>
+ <li>
+ <p>Begin a load for the resource.</p>
+ <!-- XXX define that
+ --><!-- XXX xref -->
+ <p>The download of the resource must <a href="#delays">delay the <code
+ title=event-load>load</code> event</a>.</p>
+
+ <li>
+ <p>If the resource is not yet available (e.g. because the resource was
+ not available in the cache, so that loading the resource required
+ making a request over the network), then jump to step 3 in the overall
+ set of steps (fallback). When the resource becomes available, or if
+ the load fails, restart this algorithm from this step. Resources can
+ load incrementally; user agents may opt to consider a resource
+ "available" whenever enough data has been obtained to begin processing
+ the resource.
+
+ <li>
+ <p>If the load failed (e.g. DNS error), <a href="#firing5">fire an
+ <code title=event-error>error</code> event</a> at the element, then
+ jump to step 3 in the overall set of steps (fallback).
+
+ <li>
+ <p>Determine the <em>resource type</em>, as follows:</p>
+
+ <p class=big-issue>This says to trust the type. Should we instead use
+ the same mechanism as for browsing contexts?</p>
+
+ <dl class=switch>
+ <dt>If the resource has <a href="#content-type8"
+ title=Content-Type>associated Content-Type metadata</a>
+
+ <dd>The type is the type specified in <a href="#content-type8"
+ title=Content-Type>the resource's Content-Type metadata</a>.
+
+ <dt>Otherwise, if the <code title=attr-object-type><a
+ href="#type6">type</a></code> attribute is present
+
+ <dd>The type is the type specified in the <code
+ title=attr-object-type><a href="#type6">type</a></code> attribute.
+
+ <dt>Otherwise, there is no explicit type information
+
+ <dd>The type is the <span title="sniffed type of a resource">sniffed
+ type of the resource</span>.
+ </dl>
+
+ <li>
+ <p>Handle the content as given by the first of the following cases that
+ matches:</p>
+
+ <dl class=switch>
+ <dt>If the resource requires a special handler (e.g. a plugin)
+
+ <dd>
+ <p>The user agent should find an appropriate handler for the
+ specified resource, based on the <em>resource type</em> found in the
+ previous step, and pass the content of the resource to that handler.
+ If the handler supports a scriptable interface, the <code><a
+ href="#htmlobjectelement">HTMLObjectElement</a></code> object
+ representing the element should expose that interface. The handler
+ is not a nested <a href="#browsing0">browsing context</a>. If no
+ appropriate handler can be found, then jump to step 3 in the overall
+ set of steps (fallback).</p>
+
+ <p>The user agent should pass the names and values of all the <span
+ title=concept-param-parameter>parameters</span> given by <code><a
+ href="#param">param</a></code> elements that are children of the
+ <code><a href="#object">object</a></code> element to the handler
+ used.</p>
+ <!-- duplicates what's in <embed> section above -->
+ <p class=note>This specification does not define a mechanism for
+ interacting with third-party handlers, as it is expected to be
+ user-agent-specific. Some UAs might opt to support a plugin
+ mechanism such as the Netscape Plugin API; others may use remote
+ content convertors or have built-in support for certain types. <a
+ href="#refsNPAPI">[NPAPI]</a></p>
+
+ <p class=big-issue>this doesn't completely duplicate the navigation
+ section, since it handles &lt;param>, etc, but surely some work
+ should be done to work with it</p>
+
+ <dt>If the type of the resource is an <span>XML MIME
+ type</span><!-- XXX xref -->
+
+ <dt>If the type of the resource is HTML
+
+ <dt>If the type of the resource does not start with
+ "<code>image/</code>"
+
+ <dd>
+ <p>The <code><a href="#object">object</a></code> element must be
+ associated with a nested <a href="#browsing0">browsing context</a>,
+ if it does not already have one. The element's nested <a
+ href="#browsing0">browsing context</a> must then be <a
+ href="#navigate" title=navigate>navigated</a> to the given resource,
+ with <a href="#replacement">replacement enabled</a>. (The <code
+ title=attr-object-data><a href="#data">data</a></code> attribute of
+ the <code><a href="#object">object</a></code> element doesn't get
+ updated if the browsing context gets further navigated to other
+ locations.)</p>
+
+ <p class=big-issue>navigation might end up treating it as something
+ else, because it can do sniffing. how should we handle that?</p>
+
+ <dt>If the resource is a supported image format, and support for
+ images has not been disabled
+
+ <dd>
+ <p>The <code><a href="#object">object</a></code> element represents
+ the specified image. The image is not a nested <a
+ href="#browsing0">browsing context</a>.</p>
+
+ <p class=big-issue>shouldn't we use the image-sniffing stuff here?</p>
+
+ <dt>Otherwise
+
+ <dd>
+ <p>The <code><a href="#object">object</a></code> element represents
+ the specified image, but the image cannot be shown. Jump to step 3
+ below in the overall set of steps (fallback).</p>
+ </dl>
+
+ <li>
+ <p>The element's contents are not part of what the <code><a
+ href="#object">object</a></code> element represents.</p>
+
+ <li>
+ <p>Once the resource is completely loaded, <a href="#firing4">fire a
+ <code title=event-load>load</code> event</a> at the element.
+ </li>
+ <!-- XXX ordering of events (like with iframe)
+ -->
+ </ol>
+
+ <li>
+ <p>If the <code title=attr-object-data><a href="#data">data</a></code>
+ attribute is absent but the <code title=attr-object-type><a
+ href="#type6">type</a></code> attribute is present, and if the user
+ agent can find a handler suitable according to the value of the <code
+ title=attr-object-type><a href="#type6">type</a></code> attribute, then
+ that handler should be used. If the handler supports a scriptable
+ interface, the <code><a
+ href="#htmlobjectelement">HTMLObjectElement</a></code> object
+ representing the element should expose that interface. The handler is
+ not a nested <a href="#browsing0">browsing context</a>. If no suitable
+ handler can be found, jump to the next step (fallback).
+
+ <li>
+ <p>(Fallback.) The <code><a href="#object">object</a></code> element
+ doesn't represent anything except what the element's contents represent,
+ ignoring any leading <code><a href="#param">param</a></code> element
+ children. This is the element's <a href="#fallback">fallback
+ content</a>.
+ </ol>
+
+ <p>In the absence of other factors (such as style sheets), user agents must
+ show the user what the <code><a href="#object">object</a></code> element
+ represents. Thus, the contents of <code><a
+ href="#object">object</a></code> elements act as <a
+ href="#fallback">fallback content</a>, to be used only when referenced
+ resources can't be shown (e.g. because it returned a 404 error). This
+ allows multiple <code><a href="#object">object</a></code> elements to be
+ nested inside each other, targeting multiple user agents with different
+ capabilities, with the user agent picking the best one it supports.
+
+ <p>The <code title=attr-hyperlink-usemap><a
+ href="#usemap1">usemap</a></code> attribute, if present while the <code><a
+ href="#object">object</a></code> element represents an image, can indicate
+ that the object has an associated <a href="#image">image map</a>. The
+ attribute must be ignored if the <code><a href="#object">object</a></code>
+ element doesn't represent an image.
+
+ <p class=big-issue>height/width
+
+ <p>The DOM attributes <dfn id=data0
+ title=dom-object-data><code>data</code></dfn>, <dfn id=type7
+ title=dom-object-type><code>type</code></dfn>, <dfn id=usemap0
+ title=dom-object-useMap><code>useMap</code></dfn>, <dfn id=height2
+ title=dom-object-height><code>height</code></dfn>, and <dfn id=width2
+ title=dom-object-width><code>width</code></dfn> each must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <h4 id=the-param><span class=secno>3.14.6. </span>The <dfn
+ id=param><code>param</code></dfn> element</h4>
+ <!-- no type -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of an <code><a href="#object">object</a></code> element,
+ before any content other than <code><a href="#param">param</a></code>
+ elements.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-param-name><a href="#name1">name</a></code>
+ (required)
+
+ <dd><code title=attr-param-value><a href="#value5">value</a></code>
+ (required)
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlparamelement>HTMLParamElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#name2" title=dom-param-name>name</a>;
+ attribute DOMString <a href="#value6" title=dom-param-value>value</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#param">param</a></code> element defines parameters
+ for handlers invoked by <code><a href="#object">object</a></code>
+ elements.
+
+ <p>The <dfn id=name1 title=attr-param-name><code>name</code></dfn>
+ attribute gives the name of the parameter.
+
+ <p>The <dfn id=value5 title=attr-param-value><code>value</code></dfn>
+ attribute gives the value of the parameter.
+
+ <p>Both attributes must be present. They may have any value.
+
+ <p>If both attributes are present, and if the parent element of the
+ <code><a href="#param">param</a></code> is an <code><a
+ href="#object">object</a></code> element, then the element defines a <dfn
+ id=parameter title=concept-param-parameters>parameter</dfn> with the given
+ name/value pair.
+
+ <p>The DOM attributes <dfn id=name2
+ title=dom-param-name><code>name</code></dfn> and <dfn id=value6
+ title=dom-param-value><code>value</code></dfn> must both <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <h4 id=video><span class=secno>3.14.7. </span>The <dfn
+ id=video1><code>video</code></dfn> element</h4>
+
+ <p><a href="#semi-transparent">Semi-transparent</a> <a href="#strictly"
+ title="Strictly inline-level content">strictly inline-level</a> <a
+ href="#embedded0">embedded content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the only <a href="#embedded0">embedded content</a> child of a
+ <code><a href="#figure">figure</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>If the element has a <code title=attr-media-src><a
+ href="#src5">src</a></code> attribute: <a
+ href="#transparent0">transparent</a>.
+
+ <dd>If the element does not have a <code title=attr-media-src><a
+ href="#src5">src</a></code> attribute: one or more <code><a
+ href="#source">source</a></code> elements, then, <a
+ href="#transparent0">transparent</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-media-src><a href="#src5">src</a></code>
+
+ <dd><code title=attr-media-autoplay><a
+ href="#autoplay">autoplay</a></code>
+
+ <dd><code title=attr-media-start><a href="#start2">start</a></code>
+
+ <dd><code title=attr-media-loopstart><a
+ href="#loopstart">loopstart</a></code>
+
+ <dd><code title=attr-media-loopend><a href="#loopend">loopend</a></code>
+
+ <dd><code title=attr-media-end><a href="#end">end</a></code>
+
+ <dd><code title=attr-media-loopcount><a
+ href="#loopcount">loopcount</a></code>
+
+ <dd><code title=attr-media-controls><a
+ href="#controls">controls</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlvideoelement>HTMLVideoElement</dfn> : <a href="#htmlmediaelement">HTMLMediaElement</a> {
+ readonly attribute unsigned long <a href="#videowidth" title=dom-video-videoWidth>videoWidth</a>;
+ readonly attribute unsigned long <a href="#videoheight" title=dom-video-videoHeight>videoHeight</a>;
+};</pre>
+ </dl>
+ <!-- XXX request: changing the playback aspect ratio -->
+ <!-- XXX request: applying CSS filters -->
+
+ <p>A <code><a href="#video1">video</a></code> element represents a video or
+ movie.
+
+ <p>Content may be provided inside the <code><a
+ href="#video1">video</a></code> element so that older Web browsers, which
+ do not support <code><a href="#video1">video</a></code>, can display text
+ to the user informing them of how to access the video contents. User
+ agents should not show this fallback content to the user.
+
+ <p>The <code><a href="#video1">video</a></code> element is a <a
+ href="#media5">media element</a> whose <a href="#media7">media data</a> is
+ ostensibly video data, possibly with associated audio data.
+
+ <p>The <code title=attr-media-src><a href="#src5">src</a></code>, <code
+ title=attr-media-autoplay><a href="#autoplay">autoplay</a></code>, <code
+ title=attr-media-start><a href="#start2">start</a></code>, <code
+ title=attr-media-loopstart><a href="#loopstart">loopstart</a></code>,
+ <code title=attr-media-loopend><a href="#loopend">loopend</a></code>,
+ <code title=attr-media-end><a href="#end">end</a></code>, <code
+ title=attr-media-loopcount><a href="#loopcount">loopcount</a></code>, and
+ <code title=attr-media-controls><a href="#controls">controls</a></code>
+ attributes are <a href="#media6" title="media element attributes">the
+ attributes common to all media elements</a>.
+
+ <p>The <dfn id=videowidth
+ title=dom-video-videoWidth><code>videoWidth</code></dfn> DOM attribute
+ must return the native width of the video in CSS pixels. The <dfn
+ id=videoheight title=dom-video-videoHeight><code>videoHeight</code></dfn>
+ DOM attribute must return the native height of the video in CSS pixels. In
+ the absence of resolution information, user agents may assume that one
+ pixel in the video corresponds to one CSS pixel. If no video data is
+ available, then the attributes must return 0.
+
+ <p>When no video data is available (the element's <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute is either <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code>, <code
+ title=dom-media-LOADING><a href="#loading0">LOADING</a></code>, or <code
+ title=dom-media-LOADED_METADATA><a
+ href="#loadedmetadata">LOADED_METADATA</a></code>), <code><a
+ href="#video1">video</a></code> elements represent nothing.
+
+ <p>When a <code><a href="#video1">video</a></code> element is <a
+ href="#actively">actively playing</a>, it represents the frame of video at
+ the continuously increasing <a href="#current" title="current playback
+ position">"current" position</a>. When the <a href="#current">current
+ playback position</a> changes such that the last frame rendered is no
+ longer the frame corresponding to the <a href="#current">current playback
+ position</a> in the video, the new frame must be rendered. Similarly, any
+ audio associated with the video must, if played, be played synchronised
+ with the <a href="#current">current playback position</a>, at the
+ specified <a href="#volume" title=dom-media-volume>volume</a> with the
+ specified <a href="#muted" title=dom-media-muted>mute state</a>.
+
+ <p>When a <code><a href="#video1">video</a></code> element is <a
+ href="#paused" title=dom-media-paused>paused</a>, the element represents
+ the frame of video corresponding to the <a href="#current" title="current
+ playback position">current playback position</a>, or, if that is not
+ available yet (e.g. because the video is seeking or buffering), the last
+ rendered frame of video.
+
+ <p>When a <code><a href="#video1">video</a></code> element is neither <a
+ href="#actively">actively playing</a> nor <a href="#paused"
+ title=dom-media-paused>paused</a>, the element represents the last frame
+ of the video to have been rendered.
+
+ <p class=note>Which frame in a video stream corresponds to a particular
+ playback position is defined by the video stream's format.
+
+ <p>Video content should be rendered inside the element's playback area such
+ that the video content is shown centered in the playback area at the
+ largest possible size that fits completely within it, with the video
+ content's aspect ratio being preserved. Thus, if the aspect ratio of the
+ playback area does not match the aspect ratio of the video, the video will
+ be shown letterboxed. Areas of the element's playback area that do not
+ contain the video represent nothing.</p>
+ <!-- XXX
+ make it an interactive element
+ default activation behaviour is to do the play() if paused, pause()
+ otherwise
+ -->
+
+ <p>User agents should provide controls to enable or disable the display of
+ closed captions associated with the video stream, though such features
+ should, again, not interfere with the page's normal rendering.
+
+ <p>User agents may allow users to view the video content in manners more
+ suitable to the user (e.g. full-screen or in an independent resizable
+ window). As for the other user interface features, controls to enable this
+ should not interfere with the page's normal rendering unless the user
+ agent is <a href="#expose" title="expose a user interface to the
+ user">exposing a user interface</a>. In such an independent context,
+ however, user agents may make full user interfaces visible, with, e.g.,
+ play, pause, seeking, and volume controls, even if the <code
+ title=attr-media-controls><a href="#controls">controls</a></code>
+ attribute is absent.
+
+ <p>User agents may allow video playback to affect system features that
+ could interfere with the user's experience; for example, user agents could
+ disable screensavers while video playback is in progress.</p>
+ <!-- XXX rendering section should mention that resizing a video
+ should in no way interrupt playback -->
+
+ <h5 id=video0><span class=secno>3.14.7.1. </span>Video and audio codecs for
+ <code><a href="#video1">video</a></code> elements</h5>
+
+ <p>User agents may support any video and audio codecs and container
+ formats.
+
+ <p>User agents should support Ogg Theora video and Ogg Vorbis audio, as
+ well as the Ogg container format. <a href="#refsOggTheora">[THEORA]</a> <a
+ href="#refsOggVorbis">[VORBIS]</a> <a href="#refsOgg">[OGG]</a></p>
+ <!-- (it's not a MUST because some vendors may have legal reasons
+ why they can't or won't support it, and there's no point making them
+ non-conforming when they have no choice in the matter) -->
+ <!-- XXX mention that this spec doesn't require native support or
+ plugin support, either is fine -->
+
+ <h4 id=audio><span class=secno>3.14.8. </span>The <dfn
+ id=audio1><code>audio</code></dfn> element</h4>
+
+ <p><a href="#semi-transparent">Semi-transparent</a> <a href="#strictly"
+ title="Strictly inline-level content">strictly inline-level</a> <a
+ href="#embedded0">embedded content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the only <a href="#embedded0">embedded content</a> child of a
+ <code><a href="#figure">figure</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>If the element has a <code title=attr-media-src><a
+ href="#src5">src</a></code> attribute: <a
+ href="#transparent0">transparent</a>.
+
+ <dd>If the element does not have a <code title=attr-media-src><a
+ href="#src5">src</a></code> attribute: one or more <code><a
+ href="#source">source</a></code> elements, then, <a
+ href="#transparent0">transparent</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-media-src><a href="#src5">src</a></code>
+
+ <dd><code title=attr-media-autoplay><a
+ href="#autoplay">autoplay</a></code>
+
+ <dd><code title=attr-media-start><a href="#start2">start</a></code>
+
+ <dd><code title=attr-media-loopstart><a
+ href="#loopstart">loopstart</a></code>
+
+ <dd><code title=attr-media-loopend><a href="#loopend">loopend</a></code>
+
+ <dd><code title=attr-media-end><a href="#end">end</a></code>
+
+ <dd><code title=attr-media-loopcount><a
+ href="#loopcount">loopcount</a></code>
+
+ <dd><code title=attr-media-controls><a
+ href="#controls">controls</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlaudioelement>HTMLAudioElement</dfn> : <a href="#htmlmediaelement">HTMLMediaElement</a> {
+ // no members
+};</pre>
+ </dl>
+
+ <p>An <code><a href="#audio1">audio</a></code> element represents a sound
+ or audio stream.
+
+ <p>Content may be provided inside the <code><a
+ href="#audio1">audio</a></code> element so that older Web browsers, which
+ do not support <code><a href="#audio1">audio</a></code>, can display text
+ to the user informing them of how to access the audio contents. User
+ agents should not show this fallback content to the user.
+
+ <p>The <code><a href="#audio1">audio</a></code> element is a <a
+ href="#media5">media element</a> whose <a href="#media7">media data</a> is
+ ostensibly audio data.
+
+ <p>The <code title=attr-media-src><a href="#src5">src</a></code>, <code
+ title=attr-media-autoplay><a href="#autoplay">autoplay</a></code>, <code
+ title=attr-media-start><a href="#start2">start</a></code>, <code
+ title=attr-media-loopstart><a href="#loopstart">loopstart</a></code>,
+ <code title=attr-media-loopend><a href="#loopend">loopend</a></code>,
+ <code title=attr-media-end><a href="#end">end</a></code>, <code
+ title=attr-media-loopcount><a href="#loopcount">loopcount</a></code>, and
+ <code title=attr-media-controls><a href="#controls">controls</a></code>
+ attributes are <a href="#media6" title="media element attributes">the
+ attributes common to all media elements</a>.
+
+ <p>When an <code><a href="#audio1">audio</a></code> element is <a
+ href="#actively">actively playing</a>, it must have its audio data played
+ synchronised with the <a href="#current">current playback position</a>, at
+ the specified <a href="#volume" title=dom-media-volume>volume</a> with the
+ specified <a href="#muted" title=dom-media-muted>mute state</a>.
+
+ <p>When an <code><a href="#audio1">audio</a></code> element is not <a
+ href="#actively">actively playing</a>, audio must not play for the
+ element.
+
+ <h5 id=audio0><span class=secno>3.14.8.1. </span>Audio codecs for <code><a
+ href="#audio1">audio</a></code> elements</h5>
+
+ <p>User agents may support any audio codecs and container formats.
+
+ <p>User agents must support the WAVE container format with audio encoded
+ using the PCM format. <!-- XXX references? #refs --></p>
+ <!-- XXX mention that this spec doesn't require native support or
+ plugin support, either is fine -->
+
+ <h4 id=media><span class=secno>3.14.9. </span>Media elements</h4>
+
+ <p><dfn id=media5 title="media element">Media elements</dfn> implement the
+ following interface:
+
+ <pre
+ class=idl>interface <dfn id=htmlmediaelement>HTMLMediaElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+
+ // error state
+ readonly attribute <a href="#mediaerror">MediaError</a> <a href="#error0" title=dom-media-error>error</a>;
+
+ // network state
+ attribute DOMString <a href="#src6" title=dom-media-src>src</a>;
+ readonly attribute DOMString <a href="#currentsrc" title=dom-media-currentSrc>currentSrc</a>;
+ const unsigned short <a href="#empty" title=dom-media-EMPTY>EMPTY</a> = 0;
+ const unsigned short <a href="#loading0" title=dom-media-LOADING>LOADING</a> = 1;
+ const unsigned short <a href="#loadedmetadata" title=dom-media-LOADED_METADATA>LOADED_METADATA</a> = 2;
+ const unsigned short <a href="#loadedfirstframe" title=dom-media-LOADED_FIRST_FRAME>LOADED_FIRST_FRAME</a> = 3;
+ const unsigned short <a href="#loaded" title=dom-media-LOADED>LOADED</a> = 4;
+ readonly attribute unsigned short <a href="#networkstate" title=dom-media-networkState>networkState</a>;
+ readonly attribute float <a href="#bufferingrate" title=dom-media-bufferingRate>bufferingRate</a>;
+ readonly attribute <a href="#timeranges">TimeRanges</a> <a href="#buffered" title=dom-media-buffered>buffered</a>;
+ void <a href="#load" title=dom-media-load>load</a>();
+
+ // ready state
+ const unsigned short <a href="#dataunavailable" title=dom-media-DATA_UNAVAILABLE>DATA_UNAVAILABLE</a> = 0;
+ const unsigned short <a href="#canshowcurrentframe" title=dom-media-CAN_SHOW_CURRENT_FRAME>CAN_SHOW_CURRENT_FRAME</a> = 1;
+ const unsigned short <a href="#canplay" title=dom-media-CAN_PLAY>CAN_PLAY</a> = 2;
+ const unsigned short <a href="#canplaythrough" title=dom-media-CAN_PLAY_THROUGH>CAN_PLAY_THROUGH</a> = 3;
+ readonly attribute unsigned short <a href="#readystate" title=dom-media-readyState>readyState</a>;
+ readonly attribute boolean <a href="#seeking0" title=dom-media-seeking>seeking</a>;
+
+ // playback state
+ attribute float <a href="#currenttime" title=dom-media-currentTime>currentTime</a>;
+ readonly attribute float <a href="#duration" title=dom-media-duration>duration</a>;
+ readonly attribute unsigned short <a href="#paused" title=dom-media-paused>paused</a>;
+ attribute float <a href="#defaultplaybackrate" title=dom-media-defaultPlaybackRate>defaultPlaybackRate</a>;
+ attribute float <a href="#playbackrate" title=dom-media-playbackRate>playbackRate</a>;
+ readonly attribute <a href="#timeranges">TimeRanges</a> <a href="#played" title=dom-media-played>played</a>;
+ readonly attribute <a href="#timeranges">TimeRanges</a> <a href="#seekable" title=dom-media-seekable>seekable</a>;
+ readonly attribute boolean <a href="#ended0" title=dom-media-ended>ended</a>;
+ attribute boolean <a href="#autoplay0" title=dom-media-autoplay>autoplay</a>;
+ void <a href="#play" title=dom-media-play>play</a>();
+ void <a href="#pause0" title=dom-media-pause>pause</a>();
+
+ // looping
+ attribute float <a href="#start3" title=dom-media-start>start</a>;
+ attribute float <a href="#end0" title=dom-media-end>end</a>;
+ attribute float <a href="#loopstart0" title=dom-media-loopStart>loopStart</a>;
+ attribute float <a href="#loopend0" title=dom-media-loopEnd>loopEnd</a>;
+ attribute unsigned long <a href="#loopcount0" title=dom-media-loopCount>loopCount</a>;
+ attribute unsigned long <a href="#currentloop" title=dom-media-currentLoop>currentLoop</a>;
+
+ // cue points
+ void <a href="#addcuepoint" title=dom-media-addCuePoint>addCuePoint</a>(in float time, in <a href="#voidcallback">VoidCallback</a> callback, in bool pause);
+ void <a href="#removecuepoint" title=dom-media-removeCuePoint>removeCuePoint</a>(in float time, in <a href="#voidcallback">VoidCallback</a> callback);
+
+ // controls
+ attribute boolean <a href="#controls0" title=dom-media-controls>controls</a>;
+ attribute float <a href="#volume" title=dom-media-volume>volume</a>;
+ attribute boolean <a href="#muted" title=dom-media-muted>muted</a>;
+};</pre>
+
+ <p>The <dfn id=media6>media element attributes</dfn>, <code
+ title=attr-media-src><a href="#src5">src</a></code>, <code
+ title=attr-media-autoplay><a href="#autoplay">autoplay</a></code>, <code
+ title=attr-media-start><a href="#start2">start</a></code>, <code
+ title=attr-media-loopstart><a href="#loopstart">loopstart</a></code>,
+ <code title=attr-media-loopend><a href="#loopend">loopend</a></code>,
+ <code title=attr-media-end><a href="#end">end</a></code>, <code
+ title=attr-media-loopcount><a href="#loopcount">loopcount</a></code>, and
+ <code title=attr-media-controls><a href="#controls">controls</a></code>,
+ apply to all <a href="#media5" title="media element">media elements</a>.
+ They are defined in this section.</p>
+ <!-- XXX v3 features:
+ * frame forward / backwards / step(n) while paused
+ * hasAudio, hasVideo, hasCaptions, etc
+ * per-frame control: get current frame; set current frame
+ * queue of content
+ - pause current stream and insert content at front of queue to play immediately
+ - pre-download another stream
+ - add stream(s) to play at end of current stream
+ - pause playback upon reaching a certain time
+ - playlists, with the ability to get metadata out of them (e.g. xspf)
+ * control over closed captions: enable, disable, select language
+ * get byte ranges as well as time ranges for buffered data
+ * in-band metadata and cue points to allow:
+ - Chapter markers that synchronize to playback (without having to poll
+ the playhead position)
+ - Annotations on video content (i.e., pop-up video)
+ - General custom metadata store (ratings, etc.)
+ * notification of chapter labels changing on the fly:
+ - onchapterlabelupdate, which has a time and a label
+ * general meta data, implemented as getters (don't expose the whole thing)
+ - getMetadata(key: string, language: string) => HTMLImageElement or string
+ - onmetadatachanged (no context info)
+ -->
+
+ <p><a href="#media5" title="media element">Media elements</a> are used to
+ present audio data, or video and audio data, to the user. This is referred
+ to as <dfn id=media7>media data</dfn> in this section, since this section
+ applies equally to <a href="#media5" title="media element">media
+ elements</a> for audio or for video. The term <dfn id=media8>media
+ resource</dfn> is used to refer to the complete set of media data, e.g.
+ the complete video file, or complete audio file.
+
+ <h5 id=error><span class=secno>3.14.9.1. </span>Error codes</h5>
+
+ <p>All <a href="#media5" title="media element">media elements</a> have an
+ associated error status, which records the last error the element
+ encountered since the <code title=dom-media-load><a
+ href="#load">load()</a></code> method was last invoked. The <dfn id=error0
+ title=dom-media-error><code>error</code></dfn> attribute, on getting, must
+ return the <code><a href="#mediaerror">MediaError</a></code> object
+ created for this last error, or null if there has not been an error.
+
+ <pre class=idl>interface <dfn id=mediaerror>MediaError</dfn> {
+ const unsigned short <a href="#mediaerraborted" title=dom-MediaError-MEDIA_ERR_ABORTED>MEDIA_ERR_ABORTED</a> = 1;
+ const unsigned short <a href="#mediaerrnetwork" title=dom-MediaError-MEDIA_ERR_NETWORK>MEDIA_ERR_NETWORK</a> = 2;
+ const unsigned short <a href="#mediaerrdecode" title=dom-MediaError-MEDIA_ERR_DECODE>MEDIA_ERR_DECODE</a> = 3;
+ readonly attribute unsigned short <a href="#code0" title=dom-MediaError-code>code</a>;
+};</pre>
+
+ <p>The <dfn id=code0 title=dom-MediaError-code><code>code</code></dfn>
+ attribute of a <code><a href="#mediaerror">MediaError</a></code> object
+ must return the code for the error, which must be one of the following:
+
+ <dl>
+ <dt><dfn id=mediaerraborted
+ title=dom-MediaError-MEDIA_ERR_ABORTED><code>MEDIA_ERR_ABORTED</code></dfn>
+ (numeric value 1)
+
+ <dd>The download of the <a href="#media8">media resource</a> was aborted
+ by the user agent at the user's request.
+
+ <dt><dfn id=mediaerrnetwork
+ title=dom-MediaError-MEDIA_ERR_NETWORK><code>MEDIA_ERR_NETWORK</code></dfn>
+ (numeric value 2)
+
+ <dd>A network error of some description caused the user agent to stop
+ downloading the <a href="#media8">media resource</a>.
+
+ <dt><dfn id=mediaerrdecode
+ title=dom-MediaError-MEDIA_ERR_DECODE><code>MEDIA_ERR_DECODE</code></dfn>
+ (numeric value 3)
+
+ <dd>An error of some description occurred while decoding the <a
+ href="#media8">media resource</a>.
+ </dl>
+
+ <h5 id=location><span class=secno>3.14.9.2. </span>Location of the media
+ resource</h5>
+
+ <p>The <dfn id=src5 title=attr-media-src><code>src</code></dfn> content
+ attribute on <a href="#media5" title="media element">media elements</a>
+ gives the address of the video to show. The attribute, if present, must
+ contain a URI (or IRI).
+
+ <p class=note>If a <code title=attr-media-src><a
+ href="#src5">src</a></code> attribute is specified, the resource it
+ specifies is the <a href="#media8">media resource</a> that will be used.
+ Otherwise, the resource specified by the first suitable <code><a
+ href="#source">source</a></code> element child of the <a
+ href="#media5">media element</a> is the one used.
+
+ <p>The <dfn id=src6 title=dom-media-src><code>src</code></dfn> DOM
+ attribute on <a href="#media5" title="media element">media elements</a>
+ must <a href="#reflect">reflect</a> the content attribute of the same
+ name.
+
+ <p>To <dfn id=pick-a>pick a media resource</dfn> for a <a
+ href="#media5">media element</a>, a user agent must follow the following
+ steps:
+
+ <ol>
+ <li>
+ <p>If the <a href="#media5">media element</a> has a <code
+ title=attr-media-src><a href="#src5">src</a></code>, then the address
+ given in that attribute is the address of the <a href="#media8">media
+ resource</a>; jump to the last step.
+
+ <li>
+ <p>Otherwise, let <var title="">candidate</var> be the first <code><a
+ href="#source">source</a></code> element child in the <a
+ href="#media5">media element</a>, or null if there is no such child.
+
+ <li>
+ <p>If either:</p>
+
+ <ul>
+ <li><var title="">candidate</var> is null, or
+
+ <li>the <var title="">candidate</var> element has no <code
+ title=attr-source-src><a href="#src7">src</a></code> attribute, or
+
+ <li>the <var title="">candidate</var> element has a <code
+ title=attr-source-type><a href="#type8">type</a></code> attribute and
+ that attribute's value, when parsed as a MIME type, does not represent
+ a type that the user agent can render (including any codecs described
+ by the <code title="">codec</code> parameter), or <a
+ href="#refsRFC2046">[RFC2046]</a> <a href="#refsRFC4281">[RFC4281]</a>
+
+ <li>the <var title="">candidate</var> element has a <code
+ title=attr-source-media><a href="#media9">media</a></code> attribute
+ and that attribute's value, when processed according to the rules for
+ media queries, does not match the current environment, <a
+ href="#refsMQ">[MQ]</a>
+ </ul>
+
+ <p>...then the <var title="">candidate</var> is not suitable; go to the
+ next step.</p>
+
+ <p>Otherwise, the address given in that <var title="">candidate</var>
+ element's <code title=attr-source-src><a href="#src7">src</a></code>
+ attribute is the address of the <a href="#media8">media resource</a>;
+ jump to the last step.</p>
+
+ <li>
+ <p>Let <var title="">candidate</var> be the next <code><a
+ href="#source">source</a></code> element child in the <a
+ href="#media5">media element</a>, or null if there are no more such
+ children.
+
+ <li>
+ <p>If <var title="">candidate</var> is not null, return to step 3.
+
+ <li>
+ <p>There is no <a href="#media8">media resource</a>. Abort these steps.
+
+ <li>
+ <p>Let the address of the <dfn id=chosen>chosen media resource</dfn> be
+ the one that was found before jumping to this step.
+ </ol>
+
+ <p>The <dfn id=currentsrc
+ title=dom-media-currentSrc><code>currentSrc</code></dfn> DOM attribute
+ must return the empty string if the <a href="#media5">media element</a>'s
+ <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> has the value <a
+ href="#empty" title=dom-media-EMPTY>EMPTY</a>, and the absolute URL of the
+ <a href="#chosen">chosen media resource</a> otherwise.
+
+ <h5 id=network0><span class=secno>3.14.9.3. </span>Network states</h5>
+
+ <p>As <a href="#media5" title="media element">media elements</a> interact
+ with the network, they go through several states. The <dfn id=networkstate
+ title=dom-media-networkState><code>networkState</code></dfn> attribute, on
+ getting, must return the current network state of the element, which must
+ be one of the following values:
+
+ <dl>
+ <dt><dfn id=empty title=dom-media-EMPTY><code>EMPTY</code></dfn> (numeric
+ value 0)
+
+ <dd>The element has not yet been initialised. All attributes are in their
+ initial states.
+
+ <dt><dfn id=loading0 title=dom-media-LOADING><code>LOADING</code></dfn>
+ (numeric value 1)
+
+ <dd>The element has <a href="#pick-a" title="pick a media resource">picked
+ a media resource</a> (the <a href="#chosen">chosen media resource</a> is
+ available from the <code title=dom-media-currentSrc><a
+ href="#currentsrc">currentSrc</a></code> attribute), but none of the
+ metadata has yet been obtained and therefore all the other attributes are
+ still in their initial states.
+
+ <dt><dfn id=loadedmetadata
+ title=dom-media-LOADED_METADATA><code>LOADED_METADATA</code></dfn>
+ (numeric value 2)
+
+ <dd>Enough of the resource has been obtained that the metadata attributes
+ are initialized (e.g. the length is known). The API will no longer raise
+ exceptions when used.
+
+ <dt><dfn id=loadedfirstframe
+ title=dom-media-LOADED_FIRST_FRAME><code>LOADED_FIRST_FRAME</code></dfn>
+ (numeric value 3)
+
+ <dd>Actual <a href="#media7">media data</a> has been obtained. In the case
+ of video, this specifically means that a frame of video is available and
+ can be shown.
+
+ <dt><dfn id=loaded title=dom-media-LOADED><code>LOADED</code></dfn>
+ (numeric value 4)
+
+ <dd>The entire <a href="#media8">media resource</a> has been obtained and
+ is available to the user agent locally. Network connectivity could be
+ lost without affecting the media playback.
+ </dl>
+
+ <p>The algorithm for the <code title=dom-media-load><a
+ href="#load">load()</a></code> method defined below describes exactly when
+ the <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute changes value.
+
+ <h5 id=loading><span class=secno>3.14.9.4. </span>Loading the media
+ resource</h5>
+
+ <p>All <a href="#media5" title="media element">media elements</a> have a
+ <dfn id=begun>begun flag</dfn>, which must begin in the false state, a
+ <dfn id=loaded-first-frame>loaded-first-frame flag</dfn>, which must begin
+ in the false state, and an <dfn id=autoplaying>autoplaying flag</dfn>,
+ which must begin in the true state.
+
+ <p>When the <dfn id=load title=dom-media-load><code>load()</code></dfn>
+ method on a <a href="#media5">media element</a> is invoked, the user agent
+ must run the following steps. Note that this algorithm might get aborted,
+ e.g. if the <code title=dom-media-load><a href="#load">load()</a></code>
+ method itself is invoked again.
+
+ <ol>
+ <li>
+ <p>Any already-running instance of this algorithm for this element must
+ be aborted. If those method calls have not yet returned, they must
+ finish the step they are on, and then immediately return.
+
+ <li>
+ <p>If the element's <a href="#begun">begun flag</a> is true, then the <a
+ href="#begun">begun flag</a> must be set to false, the <code
+ title=dom-media-error><a href="#error0">error</a></code> attribute must
+ be set to a new <code><a href="#mediaerror">MediaError</a></code> object
+ whose <code title=dom-MediaError-code><a href="#code0">code</a></code>
+ attribute is set to <code title=dom-MediaError-MEDIA_ERR_ABORTED><a
+ href="#mediaerraborted">MEDIA_ERR_ABORTED</a></code>, and the user agent
+ must synchronously <a href="#firing6">fire a progress event</a> called
+ <code title=event-abort><a href="#abort">abort</a></code> at the <a
+ href="#media5">media element</a>.
+
+ <li>
+ <p>The <code title=dom-media-error><a href="#error0">error</a></code>
+ attribute must be set to null, and the <a
+ href="#loaded-first-frame">loaded-first-frame flag</a> and
+ <span>loaded-enough-to-play-through flag</span> must be both set to
+ false.
+
+ <li>
+ <p>The <code title=dom-media-playbackRate><a
+ href="#playbackrate">playbackRate</a></code> attribute must be set to
+ the value of the <code title=dom-media-defaultPlaybackRate><a
+ href="#defaultplaybackrate">defaultPlaybackRate</a></code> attribute.
+
+ <li>
+ <p>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> is not set to <a
+ href="#empty" title=dom-media-EMPTY>EMPTY</a>, then the following
+ substeps must be followed:
+
+ <ol><!--<li>Let <var title="">events</var> be a list of event names,
+ initially empty.</li>-->
+
+ <li>The <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute must be set to
+ <a href="#empty"
+ title=dom-media-EMPTY>EMPTY</a><!--, and the user agent must
+ add <code title="event-emptied">emptied</code> to the <var
+ title="">events</var> list-->.
+
+ <li>If <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> is not set to <code
+ title=dom-media-DATA_UNAVAILABLE><a
+ href="#dataunavailable">DATA_UNAVAILABLE</a></code>, it must be set to
+ that state<!-- and the user agent must add <code
+ title="event-dataunavailable">dataunavailable</code> to the
+ <var title="">events</var> list-->.
+
+ <li>If the <code title=dom-media-paused><a
+ href="#paused">paused</a></code> attribute is false, it must be set to
+ true<!--, and the user agent must add
+ <code title="event-pause">pause</code> to the <var
+ title="">events</var> list-->.
+
+ <li>If <code title=dom-media-seeking><a
+ href="#seeking0">seeking</a></code> is true, it must be set to false.
+
+ <li>The <a href="#current">current playback position</a> must be set to
+ 0.
+
+ <li>The <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> DOM attribute must be set to
+ 0.</li>
+ <!--<li>The user agent must synchronously <span>fire a simple
+ event</span> at the <span>media element</span> for each event
+ name in <var title="">events</var>, in the same order that they
+ were added to that list.</li>-->
+
+ <li>The user agent must synchronously <a href="#firing2">fire a simple
+ event</a> called <code title=event-emptied><a
+ href="#emptied">emptied</a></code> at the <a href="#media5">media
+ element</a>.
+ </ol>
+
+ <li>
+ <p>The user agent must <a href="#pick-a">pick a media resource</a> for
+ the <a href="#media5">media element</a>. If that fails, the method must
+ raise an <code>INVALID_STATE_ERR</code> exception, and abort these
+ steps.
+
+ <li>
+ <p>The <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute must be set to <a
+ href="#loading0" title=dom-media-LOADING>LOADING</a>.
+
+ <li>
+ <p class=note>The <code title=dom-media-currentSrc><a
+ href="#currentsrc">currentSrc</a></code> attribute starts returning the
+ new value.
+
+ <li>
+ <p>The user agent must then set the <a href="#begun">begun flag</a> to
+ true and <a href="#firing6">fire a progress event</a> called
+ <code>begin</code> at the <a href="#media5">media element</a>.
+
+ <li>
+ <p>The method must return, but these steps must continue.
+
+ <li>
+ <p class=note>Playback of any previously playing <a href="#media8">media
+ resource</a> for this element stops.
+
+ <li>
+ <p>If a download is in progress for the <a href="#media5">media
+ element</a>, the user agent should stop the download.
+
+ <li>
+ <p>The user agent must then begin to download the <a
+ href="#chosen">chosen media resource</a>. The rate of the download may
+ be throttled, however, in response to user preferences (including
+ throttling it to zero until the user indicates that the download can
+ start), or to balance the download with other connections sharing the
+ same bandwidth.
+
+ <li>
+ <p>While the download is progressing, the user agent must <a
+ href="#firing6">fire a progress event</a> called <code
+ title=event-progress><a href="#progress0">progress</a></code> at the
+ element every 350ms (&#xB1;200ms) or for every byte received, whichever
+ is <em>least</em> frequent.</p>
+
+ <p>If at any point the user agent has received no data for more than
+ about three seconds, the user agent must <a href="#firing6">fire a
+ progress event</a> called <code title=event-stalled><a
+ href="#stalled">stalled</a></code> at the element.</p>
+
+ <p>User agents may allow users to selectively block or slow <a
+ href="#media7">media data</a> downloads. When a <a href="#media5">media
+ element</a>'s download has been blocked, the user agent must act as if
+ it was stalled (as opposed to acting as if the connection was closed).</p>
+
+ <p>The user agent may use whatever means necessary to download the
+ resource (within the constraints put forward by this and other
+ specifications); for example, reconnecting to the server in the face of
+ network errors, using HTTP partial range requests, or switching to a
+ streaming protocol. The user agent must only consider a resource
+ erroneous if it has given up trying to download it.</p>
+
+ <dl class=switch>
+ <dt>If the <a href="#media7">media data</a> cannot be downloaded at all,
+ due to network errors, causing the user agent to give up trying to
+ download the resource
+
+ <dd>
+ <p>DNS errors and HTTP 4xx and 5xx errors (and equivalents in other
+ protocols) must cause the user agent to follow the following steps.
+ User agents may also follow these steps in response to other network
+ errors of similar severity.</p>
+
+ <ol>
+ <li>The user agent should cancel the download.
+
+ <li>The <code title=dom-media-error><a href="#error0">error</a></code>
+ attribute must be set to a new <code><a
+ href="#mediaerror">MediaError</a></code> object whose <code
+ title=dom-MediaError-code><a href="#code0">code</a></code> attribute
+ is set to <code title=dom-MediaError-MEDIA_ERR_NETWORK><a
+ href="#mediaerrnetwork">MEDIA_ERR_NETWORK</a></code>.
+
+ <li>The <a href="#begun">begun flag</a> must be set to false and the
+ user agent must <a href="#firing6">fire a progress event</a> called
+ <code title=event-error><a href="#error1">error</a></code> at the <a
+ href="#media5">media element</a>.
+
+ <li>The element's <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute must be
+ switched to the <a href="#empty" title=dom-media-EMPTY>EMPTY</a>
+ value and the user agent must <a href="#firing2">fire a simple
+ event</a> called <code title=event-emptied><a
+ href="#emptied">emptied</a></code> at the element.
+
+ <li>These steps must be aborted.
+ </ol>
+
+ <dt id=fatal-decode-error>If the <a href="#media7">media data</a> can be
+ downloaded but is in an unsupported format, or can otherwise not be
+ properly rendered at all
+
+ <dd>
+ <p>The server returning a file of the wrong kind (e.g. one that that
+ turns out to not be pure audio when the <a href="#media5">media
+ element</a> is a <code><a href="#audio1">audio</a></code> element), or
+ the file using unsupported codecs for all the data, must cause the
+ user agent to follow the following steps. User agents may also follow
+ these steps in response to other codec-related fatal errors, such as
+ the file requiring more resources to process than the user agent can
+ provide in real time.</p>
+
+ <ol>
+ <li>The user agent should cancel the download.
+
+ <li>The <code title=dom-media-error><a href="#error0">error</a></code>
+ attribute must be set to a new <code><a
+ href="#mediaerror">MediaError</a></code> object whose <code
+ title=dom-MediaError-code><a href="#code0">code</a></code> attribute
+ is set to <code title=dom-MediaError-MEDIA_ERR_DECODE><a
+ href="#mediaerrdecode">MEDIA_ERR_DECODE</a></code>.
+
+ <li>The <a href="#begun">begun flag</a> must be set to false and the
+ user agent must <a href="#firing6">fire a progress event</a> called
+ <code title=event-error><a href="#error1">error</a></code> at the <a
+ href="#media5">media element</a>.
+
+ <li>The element's <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute must be
+ switched to the <a href="#empty" title=dom-media-EMPTY>EMPTY</a>
+ value and the user agent must <a href="#firing2">fire a simple
+ event</a> called <code title=event-emptied><a
+ href="#emptied">emptied</a></code> at the element.
+
+ <li>These steps must be aborted.
+ </ol>
+
+ <dt>If the <a href="#media7">media data</a> download is aborted by the
+ user
+
+ <dd>
+ <p>The download is aborted by the user, e.g. because the user navigated
+ the browsing context to another page, the user agent must follow the
+ following steps. These steps are not followed if the <code
+ title=dom-media-load><a href="#load">load()</a></code> method itself
+ is reinvoked, as the steps above handle that particular kind of abort.</p>
+
+ <ol>
+ <li>The user agent should cancel the download.
+
+ <li>The <code title=dom-media-error><a href="#error0">error</a></code>
+ attribute must be set to a new <code><a
+ href="#mediaerror">MediaError</a></code> object whose <code
+ title=dom-MediaError-code><a href="#code0">code</a></code> attribute
+ is set to <code
+ title=dom-MediaError-MEDIA_ERR_ABORT>MEDIA_ERR_ABORT</code>.
+
+ <li>The <a href="#begun">begun flag</a> must be set to false and the
+ user agent must <a href="#firing6">fire a progress event</a> called
+ <code title=event-abort><a href="#abort">abort</a></code> at the <a
+ href="#media5">media element</a>.
+
+ <li>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute has the value
+ <code title=dom-media-LOADING><a href="#loading0">LOADING</a></code>,
+ the element's <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute must be
+ switched to the <a href="#empty" title=dom-media-EMPTY>EMPTY</a>
+ value and the user agent must <a href="#firing2">fire a simple
+ event</a> called <code title=event-emptied><a
+ href="#emptied">emptied</a></code> at the element.
+
+ <li>These steps must be aborted.
+ </ol>
+
+ <dt id=non-fatal-media-error>If the <a href="#media7">media data</a> can
+ be downloaded but has non-fatal errors or uses, in part, codecs that
+ are unsupported, preventing the user agent from rendering the content
+ completely correctly but not preventing playback altogether
+
+ <dd>
+ <p>The server returning data that is partially usable but cannot be
+ optimally rendered must cause the user agent to follow the following
+ steps.</p>
+
+ <ol>
+ <li class=big-issue>Should we fire a 'warning' event? Set the 'error'
+ flag to 'MEDIA_ERR_SUBOPTIMAL' or something?
+ </ol>
+
+ <dt>Once enough of the <a href="#media7">media data</a> has been
+ downloaded to determine the duration of the <a href="#media8">media
+ resource</a>, its dimensions, and other metadata
+
+ <dd>
+ <p>The user agent must follow these substeps:</p>
+
+ <ol>
+ <li>
+ <p>The <a href="#current">current playback position</a> must be set
+ to the <var><a href="#effective">effective start</a></var>.
+
+ <li>
+ <p>The <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute must be set
+ to <code title=dom-media-LOADED_METADATA><a
+ href="#loadedmetadata">LOADED_METADATA</a></code>.
+
+ <li>
+ <p class=note>A number of attributes, including <code
+ title=dom-media-duration><a href="#duration">duration</a></code>,
+ <code title=dom-media-buffered><a
+ href="#buffered">buffered</a></code>, and <code
+ title=dom-media-played><a href="#played">played</a></code>, become
+ available.
+
+ <li>
+ <p class=note>The user agent will <a href="#firing2">fire a simple
+ event</a> called <code title=event-durationchange><a
+ href="#durationchange">durationchange</a></code> at the element at
+ this point.
+
+ <li>
+ <p>The user agent must <a href="#firing2">fire a simple event</a>
+ called <code title=event-loadedmetadata><a
+ href="#loadedmetadata0">loadedmetadata</a></code> at the element.
+ </ol>
+
+ <dt id=handling-first-frame-available>Once enough of the <a
+ href="#media7">media data</a> has been downloaded to enable the user
+ agent to display the first frame of the <a href="#media8">media
+ resource</a>
+
+ <dd>
+ <p>The user agent must follow these substeps:</p>
+
+ <ol>
+ <li>
+ <p>The <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute must be set
+ to <code title=dom-media-LOADED_FIRST_FRAME><a
+ href="#loadedfirstframe">LOADED_FIRST_FRAME</a></code>.
+
+ <li>
+ <p>The <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> attribute must change to
+ <code title=dom-media-CAN_SHOW_CURRENT_FRAME><a
+ href="#canshowcurrentframe">CAN_SHOW_CURRENT_FRAME</a></code>.
+
+ <li>
+ <p>The <a href="#loaded-first-frame">loaded-first-frame flag</a> must
+ be set to true.
+
+ <li>
+ <p>The user agent must <a href="#firing2">fire a simple event</a>
+ called <code title=event-loadedfirstfame>loadedfirstframe</code> at
+ the element.
+
+ <li>
+ <p>The user agent must <a href="#firing2">fire a simple event</a>
+ called <code title=event-canshowcurrentframe><a
+ href="#canshowcurrentframe0">canshowcurrentframe</a></code> at the
+ element.
+ </ol>
+ </dl>
+
+ <p>When the user agent has completed the download of the entire <a
+ href="#media8">media resource</a>, it must move on to the next step.</p>
+
+ <li>
+ <p>If the download completes without errors, the <a href="#begun">begun
+ flag</a> must be set to false and the user agent must <a
+ href="#firing6">fire a progress event</a> called <code
+ title=event-load><a href="#load0">load</a></code> at the element.
+ </ol>
+
+ <p>If a <a href="#media5">media element</a> whose <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> has the value <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> is inserted into a
+ document, user agents must implicitly invoke the <code
+ title=dom-media-load><a href="#load">load()</a></code> method on the <a
+ href="#media5">media element</a> as soon as all other scripts have
+ finished executing<!-- XXX phrase that better? -->. Any exceptions raised
+ must be ignored.
+
+ <p>The <dfn id=bufferingrate
+ title=dom-media-bufferingRate><code>bufferingRate</code></dfn> attribute
+ must return the average number of bits received per second for the current
+ download over the past few seconds. If there is no download in progress,
+ the attribute must return 0.
+
+ <p>The <dfn id=buffered
+ title=dom-media-buffered><code>buffered</code></dfn> attribute must return
+ a static <a href="#normalised">normalised <code>TimeRanges</code>
+ object</a> that represents the ranges of the <a href="#media8">media
+ resource</a>, if any, that the user agent has downloaded, at the time the
+ attribute is evaluated.
+
+ <p class=note>Typically this will be a single range anchored at the zero
+ point, but if, e.g. the user agent uses HTTP range requests in response to
+ seeking, then there could be multiple ranges.
+
+ <h5 id=offsets><span class=secno>3.14.9.5. </span>Offsets into the media
+ resource</h5>
+
+ <p>The <dfn id=duration
+ title=dom-media-duration><code>duration</code></dfn> attribute must return
+ the length of the <a href="#media8">media resource</a>, in seconds. If no
+ <a href="#media7">media data</a> is available, then the attributes must
+ return 0. If <a href="#media7">media data</a> is available but the length
+ is not known, the attribute must return the Not-a-Number (NaN) value. If
+ the <a href="#media8">media resource</a> is known to be unbounded (e.g. a
+ streaming radio), then the attribute must return the positive Infinity
+ value.
+
+ <p>When the length of the <a href="#media8">media resource</a> changes
+ (e.g. from being unknown to known, or from indeterminate to known, or from
+ a previously established length to a new length) the user agent must, once
+ any running scripts have finished, <a href="#firing2">fire a simple
+ event</a> called <code title=event-durationchange><a
+ href="#durationchange">durationchange</a></code> at the <a
+ href="#media5">media element</a>.
+
+ <p><a href="#media5" title="media element">Media elements</a> have a <dfn
+ id=current>current playback position</dfn>, which must initially be zero.
+ The current position is a time.
+
+ <p>The <dfn id=currenttime
+ title=dom-media-currentTime><code>currentTime</code></dfn> attribute must,
+ on getting, return the <a href="#current">current playback position</a>,
+ expressed in seconds. On setting, the user agent must <a href="#seek"
+ title=dom-media-seek>seek</a> to the new value.
+
+ <p>The <dfn id=start2 title=attr-media-start><code>start</code></dfn>
+ content attribute gives the offset into the <a href="#media8">media
+ resource</a> at which playback is to begin. The default value is 0.
+
+ <p>The <dfn id=effective><var>effective start</var></dfn> is the smaller of
+ <code title=dom-media-start><a href="#start3">start</a></code> and the end
+ of the <a href="#media8">media resource</a>.
+
+ <p>
+
+ <p>The <dfn id=loopstart
+ title=attr-media-loopstart><code>loopstart</code></dfn> content attribute
+ gives the offset into the <a href="#media8">media resource</a> at which
+ playback is to begin when looping a clip. The default value is 0.
+
+ <p>The <dfn id=effective0><var>effective loop start</var></dfn> is the
+ smaller of <code title=dom-media-loopStart><a
+ href="#loopstart0">loopStart</a></code> and the end of the <a
+ href="#media8">media resource</a>.
+
+ <p>
+
+ <p>The <dfn id=loopend title=attr-media-loopend><code>loopend</code></dfn>
+ content attribute gives an offset into the <a href="#media8">media
+ resource</a> at which playback is to jump back to the <code
+ title=attr-media-loopstart><a href="#loopstart">loopstart</a></code>, when
+ looping the clip. The default value is infinity.
+
+ <p>The <dfn id=effective1><var>effective loop end</var></dfn> is the
+ greater of <code title=dom-media-start><a href="#start3">start</a></code>,
+ <code title=dom-media-loopStart><a
+ href="#loopstart0">loopStart</a></code>, and <code
+ title=dom-media-loopEnd><a href="#loopend0">loopEnd</a></code>, and the
+ end of the <a href="#media8">media resource</a>.
+
+ <p>
+
+ <p>The <dfn id=end title=attr-media-end><code>end</code></dfn> content
+ attribute gives an offset into the <a href="#media8">media resource</a> at
+ which playback is to end. The default value is infinity.
+
+ <p>The <dfn id=effective2><var>effective end</var></dfn> is the greater of
+ <code title=dom-media-start><a href="#start3">start</a></code>, <code
+ title=dom-media-loopStart><a href="#loopstart0">loopStart</a></code>,
+ <code title=dom-media-loopEnd><a href="#loopend0">end</a></code>, and the
+ end of the <a href="#media8">media resource</a>.
+
+ <p>
+
+ <p>The <code title=attr-media-start><a href="#start2">start</a></code>,
+ <code title=attr-media-loopstart><a
+ href="#loopstart">loopstart</a></code>, <code title=attr-media-loopend><a
+ href="#loopend">loopend</a></code>, and <code title=attr-media-end><a
+ href="#end">end</a></code> attributes must, if specified, contain <span
+ title="value time offset">value time offsets</span>. To get the time
+ values they represent, user agents must use the <a href="#rules4">rules
+ for parsing time offsets</a>.
+
+ <p>The <dfn id=start3 title=dom-media-start><code>start</code></dfn>, <dfn
+ id=loopstart0 title=dom-media-loopStart><code>loopStart</code></dfn>, <dfn
+ id=loopend0 title=dom-media-loopEnd><code>loopEnd</code></dfn>, and <dfn
+ id=end0 title=dom-media-end><code>end</code></dfn> DOM attributes must <a
+ href="#reflect">reflect</a> the <code title=attr-media-start><a
+ href="#start2">start</a></code>, <code title=attr-media-loopstart><a
+ href="#loopstart">loopstart</a></code>, <code title=attr-media-loopend><a
+ href="#loopend">loopend</a></code>, and <code title=attr-media-end><a
+ href="#end">end</a></code> content attributes on the <a
+ href="#media5">media element</a> respectively.
+
+ <p>The <dfn id=loopcount
+ title=attr-media-loopcount><code>loopcount</code></dfn> content attribute
+ gives the number of times to play the clip. The default value is 1.
+
+ <p>The <dfn id=loopcount0
+ title=dom-media-loopCount><code>loopCount</code></dfn> DOM attribute must
+ <a href="#reflect">reflect</a> the <code title=attr-media-loopcount><a
+ href="#loopcount">loopcount</a></code> content attribute on the <a
+ href="#media5">media element</a>. The value must be <a
+ href="#limited0">limited to only positive non-zero numbers</a>.
+
+ <p>The <dfn id=currentloop
+ title=dom-media-currentLoop><code>currentLoop</code></dfn> attribute must
+ initially have the value 0. It gives the index of the current loop. It is
+ changed during playback as described below.
+
+ <p>When any of the <code title=dom-media-start><a
+ href="#start3">start</a></code>, <code title=dom-media-loopStart><a
+ href="#loopstart0">loopStart</a></code>, <code title=dom-media-loopEnd><a
+ href="#loopend0">loopEnd</a></code>, <code title=dom-media-end><a
+ href="#end0">end</a></code>, and <code title=dom-media-loopCount><a
+ href="#loopcount0">loopCount</a></code> DOM attributes change value
+ (either through content attribute mutations reflecting into the DOM
+ attribute, or direct mutations of the DOM attribute), the user agent must
+ apply the following steps:
+
+ <ol>
+ <li>
+ <p>If the <code title=dom-media-loopCount><a
+ href="#loopcount0">loopCount</a></code> DOM attribute's value is less
+ than the <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> DOM attribute's value, then
+ the <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> DOM attribute's value must be
+ set to the value of the <code title=dom-media-loopCount><a
+ href="#loopcount0">loopCount</a></code> DOM attribute's value (which
+ will make the current loop the last loop).
+
+ <li>
+ <p>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> is in the <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> state or the
+ <code title=dom-media-LOADING><a href="#loading0">LOADING</a></code>
+ state, then the user agent must at this point abort these steps.
+
+ <li>
+ <p>If the <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is zero, and the <a
+ href="#current">current playback position</a> is before the <var><a
+ href="#effective">effective start</a></var>, the user agent must <a
+ href="#seek" title=dom-media-seek>seek</a> to the <var><a
+ href="#effective">effective start</a></var>.
+
+ <li>
+ <p>If the <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is greater than zero, and the
+ <a href="#current">current playback position</a> is before the <var><a
+ href="#effective0">effective loop start</a></var>, the user agent must
+ <a href="#seek" title=dom-media-seek>seek</a> to the <var><a
+ href="#effective0">effective loop start</a></var>.
+
+ <li>
+ <p>If the <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is less than <code
+ title=dom-media-loopCount><a href="#loopcount0">loopCount</a></code>,
+ and the <a href="#current">current playback position</a> is after the
+ <var><a href="#effective1">effective loop end</a></var>, the user agent
+ must <a href="#seek" title=dom-media-seek>seek</a> to the <var><a
+ href="#effective0">effective loop start</a></var>, and increase <code
+ title=dom-media-loopCount><a href="#loopcount0">loopCount</a></code> by
+ 1.
+
+ <li>
+ <p>If the <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is equal to the <code
+ title=dom-media-loopCount><a href="#loopcount0">loopCount</a></code>,
+ and the <a href="#current">current playback position</a> is after the
+ <var><a href="#effective2">effective end</a></var>, the user agent must
+ <a href="#seek" title=dom-media-seek>seek</a> to the <var><a
+ href="#effective2">effective end</a></var> and then the looping will
+ end.
+ </ol>
+
+ <h5 id=the-ready><span class=secno>3.14.9.6. </span>The ready states</h5>
+
+ <p><a href="#media5" title="media element">Media elements</a> have a
+ <em>ready state</em>, which describes to what degree they are ready to be
+ rendered at the <a href="#current">current playback position</a>. The
+ possible values are as follows; the ready state of a media element at any
+ particular time is the greatest value describing the state of the element:
+
+ <dl>
+ <dt><dfn id=dataunavailable
+ title=dom-media-DATA_UNAVAILABLE><code>DATA_UNAVAILABLE</code></dfn>
+ (numeric value 0)
+
+ <dd>No data for the <a href="#current">current playback position</a> is
+ available. <a href="#media5" title="media element">Media elements</a>
+ whose <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute is less than <code
+ title=dom-media-LOADED_FIRST_FRAME><a
+ href="#loadedfirstframe">LOADED_FIRST_FRAME</a></code> are always in the
+ <code title=dom-media-DATA_UNAVAILABLE><a
+ href="#dataunavailable">DATA_UNAVAILABLE</a></code> state.
+
+ <dt><dfn id=canshowcurrentframe
+ title=dom-media-CAN_SHOW_CURRENT_FRAME><code>CAN_SHOW_CURRENT_FRAME</code></dfn>
+ (numeric value 1)
+
+ <dd>Data for the immediate <a href="#current">current playback
+ position</a> is available, but not enough data is available that the user
+ agent could successfully advance the <a href="#current">current playback
+ position</a> at all without immediately reverting to the <code
+ title=dom-media-DATA_UNAVAILABLE><a
+ href="#dataunavailable">DATA_UNAVAILABLE</a></code> state. In video, this
+ corresponds to the user agent having data from the current frame, but not
+ the next frame. In audio, this corresponds to the user agent only having
+ audio up to the <a href="#current">current playback position</a>, but no
+ further.
+
+ <dt><dfn id=canplay title=dom-media-CAN_PLAY><code>CAN_PLAY</code></dfn>
+ (numeric value 2)
+
+ <dd>Data for the immediate <a href="#current">current playback
+ position</a> is available, as well as enough data for the user agent to
+ advance the <a href="#current">current playback position</a> at least a
+ little without immediately reverting to the <code
+ title=dom-media-DATA_UNAVAILABLE><a
+ href="#dataunavailable">DATA_UNAVAILABLE</a></code> state. In video, this
+ corresponds to the user agent having data for the current frame and the
+ next frame. In audio, this corresponds ot the user agent having data
+ beyond the <a href="#current">current playback position</a>.
+
+ <dt><dfn id=canplaythrough
+ title=dom-media-CAN_PLAY_THROUGH><code>CAN_PLAY_THROUGH</code></dfn>
+ (numeric value 3)
+
+ <dd>Data for the immediate <a href="#current">current playback
+ position</a> is available, as well as enough data for the user agent to
+ advance the <a href="#current">current playback position</a> at least a
+ little without immediately reverting to the <code
+ title=dom-media-DATA_UNAVAILABLE><a
+ href="#dataunavailable">DATA_UNAVAILABLE</a></code> state, and, in
+ addition, the user agent estimates that data is being downloaded at a
+ rate where the <a href="#current">current playback position</a>, if it
+ were to advance at the rate given by the <code
+ title=dom-media-defaultPlaybackRate><a
+ href="#defaultplaybackrate">defaultPlaybackRate</a></code> attribute,
+ would not overtake the available data before playback reaches the <a
+ href="#effective2">effective end</a> of the <a href="#media8">media
+ resource</a> on the last <a href="#loopcount0"
+ title=dom-media-loopCount>loop</a>.
+ </dl>
+
+ <p>When the ready state of a <a href="#media5">media element</a> whose
+ <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> is not <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> changes, the user
+ agent must follow the steps given below:
+
+ <dl class=switch>
+ <dt>If the new ready state is <code title=dom-media-DATA_UNAVAILABLE><a
+ href="#dataunavailable">DATA_UNAVAILABLE</a></code>
+
+ <dd>
+ <p>The user agent must <a href="#firing2">fire a simple event</a> called
+ <code title=event-dataunavailable><a
+ href="#dataunavailable0">dataunavailable</a></code> at the element.
+
+ <dt>If the new ready state is <code
+ title=dom-media-CAN_SHOW_CURRENT_FRAME><a
+ href="#canshowcurrentframe">CAN_SHOW_CURRENT_FRAME</a></code>
+
+ <dd>
+ <p>If the element's <a href="#loaded-first-frame">loaded-first-frame
+ flag</a> is true, the user agent must <a href="#firing2">fire a simple
+ event</a> called <code title=event-canshowcurrentframe><a
+ href="#canshowcurrentframe0">canshowcurrentframe</a></code> event.</p>
+
+ <p class=note>The first time the <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute switches to this
+ value, the <a href="#loaded-first-frame">loaded-first-frame flag</a> is
+ false, and the event is fired <a
+ href="#handling-first-frame-available">by the algorithm described
+ above</a> for the <code title=dom-media-load><a
+ href="#load">load()</a></code> method, in conjunction with other steps.</p>
+
+ <dt>If the new ready state is <code title=dom-media-CAN_PLAY><a
+ href="#canplay">CAN_PLAY</a></code>
+
+ <dd>
+ <p>The user agent must <a href="#firing2">fire a simple event</a> called
+ <code title=event-canplay><a href="#canplay0">canplay</a></code>.
+
+ <dt>If the new ready state is <code title=dom-media-CAN_PLAY_THROUGH><a
+ href="#canplaythrough">CAN_PLAY_THROUGH</a></code>
+
+ <dd>
+ <p>The user agent must <a href="#firing2">fire a simple event</a> called
+ <code title=event-canplaythrough><a
+ href="#canplaythrough0">canplaythrough</a></code> event. If the <a
+ href="#autoplaying">autoplaying flag</a> is true, and the <code
+ title=dom-media-paused><a href="#paused">paused</a></code> attribute is
+ true, and the <a href="#media5">media element</a> has an <code
+ title=attr-media-autoplay><a href="#autoplay">autoplay</a></code>
+ attribute specified, then the user agent must also set the <code
+ title=dom-media-paused><a href="#paused">paused</a></code> attribute to
+ false and <a href="#firing2">fire a simple event</a> called <code
+ title=event-play><a href="#play0">play</a></code>.
+ </dl>
+
+ <p>The <dfn id=readystate
+ title=dom-media-readyState><code>readyState</code></dfn> DOM attribute
+ must, on getting, return the value described above that describes the
+ current ready state of the <a href="#media5">media element</a>.
+
+ <p>The <dfn id=autoplay
+ title=attr-media-autoplay><code>autoplay</code></dfn> attribute is a <a
+ href="#boolean0">boolean attribute</a>. When present, the algorithm
+ described herein will cause the user agent to automatically begin playback
+ of the <a href="#media8">media resource</a> as soon as it can do so
+ without stopping.
+
+ <p>The <dfn id=autoplay0
+ title=dom-media-autoplay><code>autoplay</code></dfn> DOM attribute must <a
+ href="#reflect">reflect</a> the content attribute of the same name.
+
+ <h5 id=playing><span class=secno>3.14.9.7. </span>Playing the media
+ resource</h5>
+
+ <p>The <dfn id=paused title=dom-media-paused><code>paused</code></dfn>
+ attribute represents whether the <a href="#media5">media element</a> is
+ paused or not. The attribute must initially be true.
+
+ <p>A <a href="#media5">media element</a> is said to be <dfn
+ id=actively>actively playing</dfn> when its <code
+ title=dom-media-paused><a href="#paused">paused</a></code> attribute is
+ false, the <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> attribute is either <code
+ title=dom-media-CAN_PLAY><a href="#canplay">CAN_PLAY</a></code> or <code
+ title=dom-media-CAN_PLAY_THROUGH><a
+ href="#canplaythrough">CAN_PLAY_THROUGH</a></code>, the element has not <a
+ href="#ended">ended playback</a>, playback has not <a
+ href="#stopped">stopped due to errors</a>, and the element has not <a
+ href="#paused0">paused for user interaction</a>.
+
+ <p>A <a href="#media5">media element</a> is said to have <dfn
+ id=ended>ended playback</dfn> when the element's <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute is <code
+ title=dom-media-LOADED_METADATA><a
+ href="#loadedmetadata">LOADED_METADATA</a></code> or greater, the <a
+ href="#current">current playback position</a> is equal to the <var><a
+ href="#effective2">effective end</a></var> of the <a href="#media8">media
+ resource</a>, and the <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> attribute is equal to the <code
+ title=dom-media-loopCount><a href="#loopcount0">loopCount</a></code> DOM
+ attribute.
+
+ <p>A <a href="#media5">media element</a> is said to have <dfn
+ id=stopped>stopped due to errors</dfn> when the element's <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute is <code
+ title=dom-media-LOADED_METADATA><a
+ href="#loadedmetadata">LOADED_METADATA</a></code> or greater, and the user
+ agent has <a href="#non-fatal-media-error">encounters a non-fatal
+ error</a> during the processing of the <a href="#media7">media data</a>,
+ and due to that error, is not able to play the content at the <a
+ href="#current">current playback position</a>.
+
+ <p>A <a href="#media5">media element</a> is said to have <dfn
+ id=paused0>paused for user interaction</dfn> when its <code
+ title=dom-media-paused><a href="#paused">paused</a></code> attribute is
+ false, the <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> attribute is either <code
+ title=dom-media-CAN_PLAY><a href="#canplay">CAN_PLAY</a></code> or <code
+ title=dom-media-CAN_PLAY_THROUGH><a
+ href="#canplaythrough">CAN_PLAY_THROUGH</a></code> and the user agent has
+ reached a point in the <a href="#media8">media resource</a> where the user
+ has to make a selection for the resource to continue.
+
+ <p>It is possible for a <a href="#media5">media element</a> to have both <a
+ href="#ended">ended playback</a> and <a href="#paused0">paused for user
+ interaction</a> at the same time.
+
+ <p>When a <a href="#media5">media element</a> is <a
+ href="#actively">actively playing</a>, its <a href="#current">current
+ playback position</a> must increase monotonically at <code
+ title=dom-media-playbackRate><a
+ href="#playbackrate">playbackRate</a></code> units of media time per unit
+ time of wall clock time. If this value is not 1, the user agent may apply
+ pitch adjustments to any audio component of the <a href="#media8">media
+ resource</a>.
+
+ <p><a href="#media8" title="media resource">Media resources</a> might be
+ internally scripted or interactive. Thus, a <a href="#media5">media
+ element</a> could play in a non-linear fashion. If this happens, the user
+ agent must act as if the algorithm for <a href="#seek"
+ title=dom-media-seek>seeking</a> was used whenever the <a
+ href="#current">current playback position</a> changes in a discontinuous
+ fashion (so that the relevant events fire).
+
+ <p>When a <a href="#media5">media element</a> that is <a
+ href="#actively">actively playing</a> stops playing because its <code
+ title=dom-media-readyState><a href="#readystate">readyState</a></code>
+ attribute changes to a value lower than <code title=dom-media-CAN_PLAY><a
+ href="#canplay">CAN_PLAY</a></code>, without the element having <a
+ href="#ended">ended playback</a>, or playback having <a
+ href="#stopped">stopped due to errors</a>, or playback having <a
+ href="#paused0">paused for user interaction</a>, the user agent must <a
+ href="#firing2">fire a simple event</a> called <code
+ title=event-timeupdate><a href="#timeupdate">timeupdate</a></code> at the
+ element, and then must <a href="#firing2">fire a simple event</a> called
+ <code title=event-waiting><a href="#waiting">waiting</a></code> at the
+ element.
+
+ <p>When a <a href="#media5">media element</a> that is <a
+ href="#actively">actively playing</a> stops playing because it has <a
+ href="#paused0">paused for user interaction</a>, the user agent must <a
+ href="#firing2">fire a simple event</a> called <code
+ title=event-timeupdate><a href="#timeupdate">timeupdate</a></code> at the
+ element.
+
+ <p>When <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is less than <span><code
+ title=dom-media-loopCount><a
+ href="#loopcount0">loopCount</a></code>-1</span> and the <a
+ href="#current">current playback position</a> reaches the <var><a
+ href="#effective1">effective loop end</a></var>, then the user agent must
+ <a href="#seek" title=dom-media-seek>seek</a> to the <var><a
+ href="#effective0">effective loop start</a></var>, increase <code
+ title=dom-media-loopCount><a href="#loopcount0">loopCount</a></code> by 1,
+ and <a href="#firing2">fire a simple event</a> called <code
+ title=event-timeupdate><a href="#timeupdate">timeupdate</a></code>.
+
+ <p>When <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is equal to the <span><code
+ title=dom-media-loopCount><a
+ href="#loopcount0">loopCount</a></code>-1</span> and the <a
+ href="#current">current playback position</a> reaches the <var><a
+ href="#effective2">effective end</a></var>, then the user agent must
+ follow these steps:
+
+ <ol>
+ <li>
+ <p>The user agent must stop playback.
+
+ <li>
+ <p>The <code title=dom-media-ended><a href="#ended0">ended</a></code>
+ attribute becomes true, as described below.
+
+ <li>
+ <p>The user agent must <a href="#firing2">fire a simple event</a> called
+ <code title=event-timeupdate><a href="#timeupdate">timeupdate</a></code>
+ at the element.
+
+ <li>
+ <p>The user agent must <a href="#firing2">fire a simple event</a> called
+ <code title=event-ended><a href="#ended1">ended</a></code> at the
+ element.
+ </ol>
+
+ <p>The <dfn id=defaultplaybackrate
+ title=dom-media-defaultPlaybackRate><code>defaultPlaybackRate</code></dfn>
+ attribute gives the desired speed at which the <a href="#media8">media
+ resource</a> is to play, as a multiple of its intrinsic speed. The
+ attribute is mutable, but on setting, if the new value is 0.0, a
+ <code>NOT_SUPPORTED_ERR</code> exception must be raised instead of the
+ value being changed. It must initially have the value 1.0.
+
+ <p>The <dfn id=playbackrate
+ title=dom-media-playbackRate><code>playbackRate</code></dfn> attribute
+ gives the speed at which the <a href="#media8">media resource</a> plays,
+ as a multiple of its intrinsic speed. If it is not equal to the <code
+ title=dom-media-defaultPlaybackRate><a
+ href="#defaultplaybackrate">defaultPlaybackRate</a></code>, then the
+ implication is that the user is using a feature such as fast forward or
+ slow motion playback. The attribute is mutable, but on setting, if the new
+ value is 0.0, a <code>NOT_SUPPORTED_ERR</code> exception must be raised
+ instead of the value being changed. Otherwise, the playback must change
+ speed (if the element is <a href="#actively">actively playing</a>). It
+ must initially have the value 1.0.
+
+ <p>When the <code title=dom-media-defaultPlaybackRate><a
+ href="#defaultplaybackrate">defaultPlaybackRate</a></code> or <code
+ title=dom-media-playbackRate><a
+ href="#playbackrate">playbackRate</a></code> attributes change value
+ (either by being set by script or by being changed directly by the user
+ agent, e.g. in response to user control) the user agent must, once any
+ running scripts have finished, <a href="#firing2">fire a simple event</a>
+ called <code title=event-ratechange>ratechange</code> at the <a
+ href="#media5">media element</a>.
+
+ <p>When the <dfn id=play title=dom-media-play><code>play()</code></dfn>
+ method on a <a href="#media5">media element</a> is invoked, the user agent
+ must run the following steps.
+
+ <ol>
+ <li>
+ <p>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute has the value <a
+ href="#empty" title=dom-media-EMPTY>EMPTY</a>, then the user agent must
+ sychronously invoke the <code title=dom-media-load><a
+ href="#load">load()</a></code> method. If that raises an exception, that
+ exception must be reraised by the <code title=dom-media-play><a
+ href="#play">play()</a></code> method.
+
+ <li>
+ <p>If the <a href="#ended" title="ended playback">playback has ended</a>,
+ then the user agent must set <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> to zero and <a href="#seek"
+ title=dom-media-seek>seek</a> to the <var><a href="#effective">effective
+ start</a></var>.</p>
+
+ <li>
+ <p>The <code title=dom-media-playbackRate><a
+ href="#playbackrate">playbackRate</a></code> attribute must be set to
+ the value of the <code title=dom-media-defaultPlaybackRate><a
+ href="#defaultplaybackrate">defaultPlaybackRate</a></code> attribute.
+
+ <li>
+ <p>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-paused><a href="#paused">paused</a></code> attribute is
+ true, it must be set to false.
+
+ <li>
+ <p>The <a href="#media5">media element</a>'s <a
+ href="#autoplaying">autoplaying flag</a> must be set to false.
+
+ <li>
+ <p>The method must then return.
+ </ol>
+
+ <p class=note>If the second step above involved a seek, the user agent will
+ <a href="#firing2">fire a simple event</a> called <code
+ title=event-timeupdate><a href="#timeupdate">timeupdate</a></code> at the
+ <a href="#media5">media element</a>.
+
+ <p class=note>If the third step above caused the <code
+ title=dom-media-playbackRate><a
+ href="#playbackrate">playbackRate</a></code> attribute to change value,
+ the user agent will <a href="#firing2">fire a simple event</a> called
+ <code title=event-ratechange>ratechange</code> at the <a
+ href="#media5">media element</a>.
+
+ <ul>
+ <li>
+ <p>If the fourth step above changed the value of <code
+ title=dom-media-paused><a href="#paused">paused</a></code>, the user
+ agent must <a href="#firing2">fire a simple event</a> called <code
+ title=event-play><a href="#play0">play</a></code> at the <a
+ href="#media5">media element</a>.
+ </ul>
+
+ <p>When the <dfn id=pause0 title=dom-media-pause><code>pause()</code></dfn>
+ method is invoked, the user agent must run the following steps:
+
+ <ol>
+ <li>
+ <p>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> attribute has the value <a
+ href="#empty" title=dom-media-EMPTY>EMPTY</a>, then the user agent must
+ sychronously invoke the <code title=dom-media-load><a
+ href="#load">load()</a></code> method. If that raises an exception, that
+ exception must be reraised by the <code title=dom-media-play><a
+ href="#play">play()</a></code> method.
+
+ <li>
+ <p>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-paused><a href="#paused">paused</a></code> attribute is
+ false, it must be set to true.
+
+ <li>
+ <p>The <a href="#media5">media element</a>'s <a
+ href="#autoplaying">autoplaying flag</a> must be set to false.
+
+ <li>
+ <p>The method must then return.
+
+ <li>
+ <p>If the second step above changed the value of <code
+ title=dom-media-paused><a href="#paused">paused</a></code>, the user
+ agent must first <a href="#firing2">fire a simple event</a> called <code
+ title=event-timeupdate><a href="#timeupdate">timeupdate</a></code> at
+ the element, and then <a href="#firing2">fire a simple event</a> called
+ title="event-pause">pause at the element.
+ </ol>
+
+ <p>The <dfn id=ended0 title=dom-media-ended><code>ended</code></dfn>
+ attribute must return true if the <a href="#media5">media element</a> has
+ <a href="#ended">ended playback</a>, and false otherwise.
+
+ <p>The <dfn id=played title=dom-media-played><code>played</code></dfn>
+ attribute must return a static <a href="#normalised">normalised
+ <code>TimeRanges</code> object</a> that represents the ranges of the <a
+ href="#media8">media resource</a>, if any, that the user agent has so far
+ rendered, at the time the attribute is evaluated.
+
+ <h5 id=seeking><span class=secno>3.14.9.8. </span>Seeking</h5>
+
+ <p>The <dfn id=seeking0 title=dom-media-seeking><code>seeking</code></dfn>
+ attribute must initially have the value false.
+
+ <p>When the user agent is required to <dfn id=seek
+ title=dom-media-seek>seek</dfn> to a particular <var title="">new playback
+ position</var> in the <a href="#media8">media resource</a>, it means that
+ the user agent must run the following steps:
+
+ <ol>
+ <li>
+ <p>If the <a href="#media5">media element</a>'s <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> is less than <code
+ title=dom-media-LOADED_METADATA><a
+ href="#loadedmetadata">LOADED_METADATA</a></code>, then the user agent
+ must raise an <code>INVALID_STATE_ERR</code> exception (if the seek was
+ in response to a DOM method call or setting of a DOM attribute), and
+ abort these steps.
+
+ <li>
+ <p>If <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is 0, let <var
+ title="">min</var> be the <var><a href="#effective">effective
+ start</a></var>. Otherwise, let it be the <var><a
+ href="#effective0">effective loop start</a></var>.
+
+ <li>
+ <p>If <code title=dom-media-currentLoop><a
+ href="#currentloop">currentLoop</a></code> is equal to the value of
+ <code title=dom-media-loopCount><a
+ href="#loopcount0">loopCount</a></code>, let <var title="">max</var> be
+ the <var><a href="#effective2">effective end</a></var>. Otherwise, let
+ it be the <var><a href="#effective1">effective loop end</a></var>.
+
+ <li>
+ <p>If the <var title="">new playback position</var> is more than <var
+ title="">max</var>, let it be <var title="">max</var>.
+
+ <li>
+ <p>If the <var title="">new playback position</var> is less than <var
+ title="">min</var>, let it be <var title="">min</var>.
+
+ <li>
+ <p>If the (possibly now changed) <var title="">new playback
+ position</var> is not in one of the ranges given in the <code
+ title=dom-media-seekable><a href="#seekable">seekable</a></code>
+ attribute, then the user agent must raise an <code>INDEX_SIZE_ERR</code>
+ exception (if the seek was in response to a DOM method call or setting
+ of a DOM attribute), and abort these steps.
+
+ <li>
+ <p>The <a href="#current">current playback position</a> must be set to
+ the given <var title="">new playback position</var>.
+
+ <li>
+ <p>The <code title=dom-media-seeking><a
+ href="#seeking0">seeking</a></code> DOM attribute must be set to true.
+
+ <li>
+ <p>The user agent must <a href="#firing2">fire a simple event</a> called
+ <code title=event-timeupdate><a href="#timeupdate">timeupdate</a></code>
+ at the element.
+
+ <li>
+ <p>As soon as the user agent has established whether or not the <a
+ href="#media7">media data</a> for the <var title="">new playback
+ position</var> is available, and, if it is, decoded enough data to play
+ back that position, the <code title=dom-media-seeking><a
+ href="#seeking0">seeking</a></code> DOM attribute must be set to false.
+ </ol>
+
+ <p>The <dfn id=seekable
+ title=dom-media-seekable><code>seekable</code></dfn> attribute must return
+ a static <a href="#normalised">normalised <code>TimeRanges</code>
+ object</a> that represents the ranges of the <a href="#media8">media
+ resource</a>, if any, that the user agent is able to seek to, at the time
+ the attribute is evaluated, notwithstanding the looping attributes (i.e.
+ the <var><a href="#effective">effective start</a></var> and <var><a
+ href="#effective2">effective end</a></var>, etc, don't affect the <code
+ title=dom-media-seekable><a href="#seekable">seeking</a></code>
+ attribute).
+
+ <p class=note>If the user agent can seek to anywhere in the <a
+ href="#media8">media resource</a>, e.g. because the user agent and the
+ server support HTTP Range requests, then the attribute would return an
+ object with one range, whose start is 0, and whose end is the same as the
+ <code title=dom-media-duration><a href="#duration">duration</a></code>
+ attribute's value.
+
+ <h5 id=cue-points><span class=secno>3.14.9.9. </span>Cue points</h5>
+
+ <p><a href="#media5" title="media element">Media elements</a> have an
+ ordered list of (<var title="">time</var>, <var title="">callback</var>)
+ tuples called the <dfn id=cue-point>cue point list</dfn>. Each entry in
+ the list also has a boolean <var title="">pause</var> associated with it.
+
+ <p>The <dfn id=addcuepoint
+ title=dom-media-addCuePoint><code>addCuePoint(<var title="">time</var>,
+ <var title="">callback</var>, <var title="">pause</var>)</code></dfn>
+ method must, when called, add a tuple made of the given <var
+ title="">time</var> and <var title="">callback</var> to the element's <a
+ href="#cue-point">cue point list</a>, and associate with that entry the
+ value of the <var title="">pause</var> argument.
+
+ <p>The <dfn id=removecuepoint
+ title=dom-media-removeCuePoint><code>removeCuePoint(<var
+ title="">time</var>, <var title="">callback</var>)</code></dfn> method
+ must, when called, remove the first tuple matching the given <var
+ title="">time</var> and <var title="">callback</var> from the element's <a
+ href="#cue-point">cue point list</a>.
+
+ <p>When the <a href="#current">current playback position</a> of a <a
+ href="#media5">media element</a> reaches one of the times given in the
+ element's <a href="#cue-point">cue point list</a>, the user agent must
+ follow these steps:
+
+ <ol>
+ <li>
+ <p>First, if any of the entries in the <a href="#cue-point">cue point
+ list</a> with that time have their associated <var title="">pause</var>
+ boolean set to true, then the user agent must immediately act as if the
+ element's <code title=dom-media-pause><a
+ href="#pause0">pause()</a></code> method had been invoked.
+
+ <li>
+ <p>The user agent must then <a href="#firing2">fire a simple event</a>
+ called <code title=event-timeupdate><a
+ href="#timeupdate">timeupdate</a></code> at the element.
+
+ <li>
+ <p>The user agent must then invoke all the non-null callbacks for all the
+ entries in the list that match the <a href="#current">current playback
+ position</a> time, in the order they were added to the list.
+ </ol>
+
+ <p>Invoking a callback (an object implementing the <code><a
+ href="#voidcallback">VoidCallback</a></code> interface) means calling its
+ <code title=dom-VoidCallback-handleEvent><a
+ href="#handleevent">handleEvent()</a></code> method.
+
+ <pre class=idl>interface <dfn id=voidcallback>VoidCallback</dfn> {
+ void <a href="#handleevent" title=dom-voidCallback-handleEvent>handleEvent</a>();
+};</pre>
+
+ <p>The <dfn id=handleevent
+ title=dom-voidCallback-handleEvent><code>handleEvent</code></dfn> method
+ of objects implementing the <code><a
+ href="#voidcallback">VoidCallback</a></code> interface is the entrypoint
+ for the callback represented by the object.
+
+ <p>In the ECMAScript DOM binding, the ECMAScript native
+ <code>Function</code> type must implement the <code><a
+ href="#voidcallback">VoidCallback</a></code> interface such that invoking
+ the <code>handleEvent()</code> method of that interface on the object from
+ another language binding invokes the function itself. In the ECMAScript
+ binding itself, however, the <code>handleEvent()</code> method of the
+ interface is not directly accessible on <code>Function</code> objects.
+ Such functions, when invoked, must be called at the scope of the <a
+ href="#browsing0">browsing context</a>.</p>
+ <!--
+ XXX if you change this make sure to also look up the other mentions
+ of handleEvent() in this file -->
+
+ <h5 id=user-interface><span class=secno>3.14.9.10. </span>User interface</h5>
+
+ <p>The <dfn id=controls
+ title=attr-media-controls><code>controls</code></dfn> attribute is a <a
+ href="#boolean0">boolean attribute</a>. If the attribute is present, or if
+ <a href="#scripting1">scripting is disabled</a>, then the user agent
+ should <dfn id=expose>expose a user interface to the user</dfn>. This user
+ interface should include features to begin playback, pause playback, seek
+ to an arbitrary position in the content (if the content supports arbitrary
+ seeking), change the volume, and showe the media content in manners more
+ suitable to the user (e.g. full-screen video or in an independent
+ resizable window). Other controls may also be made available.
+
+ <p>If the attribute is absent, then the user agent should avoid making a
+ user interface available that could conflict with an author-provided user
+ interface. User agents may make the following features available, however,
+ even when the attribute is absent:
+
+ <p>User agents may provide controls to affect playback of the media
+ resource (e.g. play, pause, seeking, and volume controls), but such
+ features should not interfere with the page's normal rendering. For
+ example, such features could be exposed in the <a href="#media5">media
+ element</a>'s context menu.
+
+ <p>Where possible (specifically, for starting, stopping, pausing, and
+ unpausing playback, for muting or changing the volume of the audio, and
+ for seeking), user interface features exposed by the user agent must be
+ implemented in terms of the DOM API described above, so that, e.g., all
+ the same events fire.
+
+ <p>The <dfn id=controls0
+ title=dom-media-controls><code>controls</code></dfn> DOM attribute must <a
+ href="#reflect">reflect</a> the content attribute of the same name.
+
+ <p>The <dfn id=volume title=dom-media-volume><code>volume</code></dfn>
+ attribute must return the playback volume of any audio portions of the <a
+ href="#media5">media element</a>, in the range 0.0 (silent) to 1.0
+ (loudest). Initially, the volume must be 0.5, but user agents may remember
+ the last set value across sessions, on a per-site basis or otherwise, so
+ the volume may start at other values. On setting, if the new value is in
+ the range 0.0 to 1.0 inclusive, the attribute must be set to the new value
+ and the playback volume must be correspondingly adjusted as soon as
+ possible after setting the attribute, with 0.0 being silent, and 1.0 being
+ the loudest setting, values in between increasing in loudness. The range
+ need not be linear. The loudest setting may be lower than the system's
+ loudest possible setting; for example the user could have set a maximum
+ volume. If the new value is outside the range 0.0 to 1.0 inclusive, then,
+ on setting, an <code>INDEX_SIZE_ERR</code> exception must be raised
+ instead.
+
+ <p>The <dfn id=muted title=dom-media-muted><code>muted</code></dfn>
+ attribute must return true if the audio channels are muted and false
+ otherwise. On setting, the attribute must be set to the new value; if the
+ new value is true, audio playback for this <a href="#media8">media
+ resource</a> must then be muted, and if false, audio playback must then be
+ enabled.
+
+ <p>Whenever either the <code title=dom-media-muted><a
+ href="#muted">muted</a></code> or <code title=dom-media-volume><a
+ href="#volume">volume</a></code> attributes are changed, after any running
+ scripts have finished executing, the user agent must <a
+ href="#firing2">fire a simple event</a> called <code
+ title=event-volumechange><a href="#volumechange">volumechange</a></code>
+ at the <a href="#media5">media element</a>.
+
+ <h5 id=time-range><span class=secno>3.14.9.11. </span>Time range</h5>
+
+ <p>Objects implementing the <code><a
+ href="#timeranges">TimeRanges</a></code> interface represent a list of
+ ranges (periods) of time.
+
+ <pre class=idl>interface <dfn id=timeranges>TimeRanges</dfn> {
+ readonly attribute unsigned long <a href="#length3" title=dom-TimeRanges-length>length</a>;
+ float <a href="#start4" title=dom-TimeRanges-start>start</a>(in unsigned long index);
+ float <a href="#endindex" title=dom-TimeRanges-end>end</a>(in unsigned long index);
+};</pre>
+
+ <p>The <dfn id=length3
+ title=dom-TimeRanges-length><code>length</code></dfn> DOM attribute must
+ return the number of ranges represented by the object.
+
+ <p>The <dfn id=start4 title=dom-TimeRanges-start><code>start(<var
+ title="">index</var>)</code></dfn> method must return the position of the
+ start of the <var title="">index</var>th range represented by the object,
+ in seconds measured from the start of the timeline that the object covers.
+
+ <p>The <dfn id=endindex title=dom-TimeRanges-end><code>end(<var
+ title="">index</var>)</code></dfn> method must return the position of the
+ end of the <var title="">index</var>th range represented by the object, in
+ seconds measured from the start of the timeline that the object covers.
+
+ <p>When a <code><a href="#timeranges">TimeRanges</a></code> object is said
+ to be a <dfn id=normalised>normalised <code>TimeRanges</code>
+ object</dfn>, the ranges it represents must obey the following criteria:
+
+ <ul>
+ <li>The start of a range must be greater than the end of all earlier
+ ranges.
+
+ <li>The start of a range must be less than the end of that same range.
+ </ul>
+
+ <p>In other words, the ranges in such an object are ordered, don't overlap,
+ and don't touch (adjacent ranges are folded into one bigger range).
+
+ <p>The timelines used by the objects returned by the <code
+ title=dom-media-buffered><a href="#buffered">buffered</a></code>, <code
+ title=dom-media-seekable><a href="#seekable">seekable</a></code> and <code
+ title=dom-media-played><a href="#played">played</a></code> DOM attributes
+ of <a href="#media5" title="media element">media elements</a> must be the
+ same as that element's <a href="#media8">media resource</a>'s timeline.
+
+ <h5 id=mediaevents><span class=secno>3.14.9.12. </span>Event summary</h5>
+
+ <p>The following events fire on <a href="#media5" title="media
+ element">media elements</a> as part of the processing model described
+ above:
+
+ <table>
+ <thead>
+ <tr>
+ <th>Event name
+
+ <th>Interface
+
+ <th>Dispatched when...
+
+ <th>Preconditions
+
+ <tbody>
+ <tr>
+ <td><dfn id=begin title=event-begin><code>begin</code></dfn>
+
+ <td><code>ProgressEvent</code> <a href="#refsPROGRESS">[PROGRESS]</a>
+
+ <td>The user agent begins fetching the <a href="#media7">media data</a>,
+ synchronously during the <code title=dom-media-load><a
+ href="#load">load()</a></code> method call.
+
+ <td><code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> equals <code
+ title=dom-media-LOADING><a href="#loading0">LOADING</a></code>
+
+ <tr>
+ <td><dfn id=progress0 title=event-progress><code>progress</code></dfn>
+
+ <td><code>ProgressEvent</code> <a href="#refsPROGRESS">[PROGRESS]</a>
+
+ <td>The user agent is fetching <a href="#media7">media data</a>.
+
+ <td><code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> is more than <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> and less than
+ <code title=dom-media-LOADED><a href="#loaded">LOADED</a></code>
+
+ <tr>
+ <td><dfn id=loadedmetadata0
+ title=event-loadedmetadata><code>loadedmetadata</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The user agent is fetching <a href="#media7">media data</a>, and the
+ <a href="#media8">media resource</a>'s metadata has just been received.
+
+
+ <td><code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> equals <code
+ title=dom-media-LOADED_METADATA><a
+ href="#loadedmetadata">LOADED_METADATA</a></code>
+
+ <tr>
+ <td><dfn id=loadedfirstframe0
+ title=event-loadedfirstframe><code>loadedfirstframe</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The user agent is fetching <a href="#media7">media data</a>, and the
+ <a href="#media8">media resource</a>'s metadata has just been received.
+
+
+ <td><code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> equals <code
+ title=dom-media-LOADED_FIRST_FRAME><a
+ href="#loadedfirstframe">LOADED_FIRST_FRAME</a></code>
+
+ <tr>
+ <td><dfn id=load0 title=event-load><code>load</code></dfn>
+
+ <td><code>ProgressEvent</code> <a href="#refsPROGRESS">[PROGRESS]</a>
+
+ <td>The user agent finishes downloading the entire <a
+ href="#media8">media resource</a>.
+
+ <td><code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> equals <code
+ title=dom-media-LOADED><a href="#loaded">LOADED</a></code>
+
+ <tr>
+ <td><dfn id=abort title=event-abort><code>abort</code></dfn>
+
+ <td><code>ProgressEvent</code> <a href="#refsPROGRESS">[PROGRESS]</a>
+
+ <td>The user agent stops fetching the <a href="#media7">media data</a>
+ before it is completely downloaded. This can be fired synchronously
+ during the <code title=dom-media-load><a href="#load">load()</a></code>
+ method call.
+
+ <td><code title=dom-media-error><a href="#error0">error</a></code> is an
+ object with the code <code title=dom-MediaError-MEDIA_ERR_ABORTED><a
+ href="#mediaerraborted">MEDIA_ERR_ABORTED</a></code>. <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> equals either <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> or <code
+ title=dom-media-LOADED><a href="#loaded">LOADED</a></code>, depending
+ on when the download was aborted.
+
+ <tr>
+ <td><dfn id=error1 title=event-error><code>error</code></dfn>
+
+ <td><code>ProgressEvent</code> <a href="#refsPROGRESS">[PROGRESS]</a>
+
+ <td>An error occurs while fetching the <a href="#media7">media data</a>.
+
+
+ <td><code title=dom-media-error><a href="#error0">error</a></code> is an
+ object with the code <code
+ title=dom-MediaError-MEDIA_ERR_NETWORK_ERROR>MEDIA_ERR_NETWORK_ERROR</code>
+ or higher. <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> equals either <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> or <code
+ title=dom-media-LOADED><a href="#loaded">LOADED</a></code>, depending
+ on when the download was aborted.
+
+ <tr>
+ <td><dfn id=emptied title=event-emptied><code>emptied</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>A <a href="#media5">media element</a> whose <code
+ title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> was previously not in the
+ <code title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> state has
+ just switched to that state (either because of a fatal error during
+ load that's about to be reported, or because the <code
+ title=dom-media-load><a href="#load">load()</a></code> method was
+ reinvoked, in which case it is fired synchronously during the <code
+ title=dom-media-load><a href="#load">load()</a></code> method call).
+
+ <td><code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> is <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code>; all the DOM
+ attributes are in their initial states.
+
+ <tr>
+ <td><dfn id=stalled title=event-stalled><code>stalled</code></dfn>
+
+ <td><code>ProgressEvent</code>
+
+ <td>The user agent is trying to fetch <a href="#media7">media data</a>,
+ but data is unexpectedly not forthcoming.
+
+ <td>
+
+ <tr>
+ <td><dfn id=play0 title=event-play><code>play</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>Playback has begun. Fired after the <code title=dom-media-play><a
+ href="#play">play</a></code> method has returned.
+
+ <td><code title=dom-media-paused><a href="#paused">paused</a></code> is
+ newly false.
+
+ <tr>
+ <td><dfn id=pause1 title=event-pause><code>pause</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>Playback has been paused. Fired after the <code
+ title=dom-media-pause><a href="#pause0">pause</a></code> method has
+ returned.
+
+ <td><code title=dom-media-paused><a href="#paused">paused</a></code> is
+ newly true.
+
+ <tr>
+ <td><dfn id=waiting title=event-waiting><code>waiting</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>Playback has stopped because the next frame is not available, but
+ the user agent expects that frame to become available in due course.
+
+ <td><code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> is either <code
+ title=dom-media-DATA_UNAVAILABLE><a
+ href="#dataunavailable">DATA_UNAVAILABLE</a></code> or <code
+ title=dom-media-CAN_SHOW_CURRENT_FRAME><a
+ href="#canshowcurrentframe">CAN_SHOW_CURRENT_FRAME</a></code>, and
+ <code title=dom-media-paused><a href="#paused">paused</a></code> is
+ false. Either <code title=dom-media-seeking><a
+ href="#seeking0">seeking</a></code> is true, or the <a
+ href="#current">current playback position</a> is not contained in any
+ of the ranges in <code title=dom-media-buffered><a
+ href="#buffered">buffered</a></code>. It is possible for playback to
+ stop for two other reasons without <code title=dom-media-paused><a
+ href="#paused">paused</a></code> being false, but those two reasons do
+ not fire this event: maybe <a href="#ended" title="ended
+ playback">playback ended</a>, or playback <a href="#stopped">stopped
+ due to errors</a>.
+
+ <tr>
+ <td><dfn id=timeupdate
+ title=event-timeupdate><code>timeupdate</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The <a href="#current">current playback position</a> changed in an
+ interesting way, for example discontinuously.
+
+ <td>
+
+ <tr>
+ <td><dfn id=ended1 title=event-ended><code>ended</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>Playback has stopped because the end of the <a href="#media8">media
+ resource</a> was reached.
+
+ <td><code title=dom-media-currentTime><a
+ href="#currenttime">currentTime</a></code> equals the <var><a
+ href="#effective2">effective end</a></var>; <code
+ title=dom-media-ended><a href="#ended0">ended</a></code> is true.
+
+ <tr>
+ <td><dfn id=dataunavailable0
+ title=event-dataunavailable><code>dataunavailable</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The user agent cannot render the data at the <a
+ href="#current">current playback position</a> because data for the
+ current frame is not immediately available.
+
+ <td>The <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> attribute is newly equal to
+ <code title=DATA_UNAVAILABLE>DATA_UNAVAILABLE</code>.
+
+ <tr>
+ <td><dfn id=canshowcurrentframe0
+ title=event-canshowcurrentframe><code>canshowcurrentframe</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The user agent cannot render the data after the <a
+ href="#current">current playback position</a> because data for the next
+ frame is not immediately available.
+
+ <td>The <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> attribute is newly equal to
+ <code title=CAN_SHOW_CURRENT_FRAME>CAN_SHOW_CURRENT_FRAME</code>.
+
+ <tr>
+ <td><dfn id=canplay0 title=event-canplay><code>canplay</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The user agent can resume playback of the <a href="#media7">media
+ data</a>, but estimates that if playback were to be started now, the <a
+ href="#media8">media resource</a> could not be rendered at the current
+ playback rate up to its end without having to stop for further
+ buffering of content.
+
+ <td>The <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> attribute is newly equal to
+ <code title=CAN_PLAY>CAN_PLAY</code>.
+
+ <tr>
+ <td><dfn id=canplaythrough0
+ title=event-canplaythrough><code>canplaythrough</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The user agent estimates that if playback were to be started now,
+ the <a href="#media8">media resource</a> could be rendered at the
+ current playback rate all the way to its end without having to stop for
+ further buffering.
+
+ <td>The <code title=dom-media-readyState><a
+ href="#readystate">readyState</a></code> attribute is newly equal to
+ <code title=CAN_PLAY_THROUGH>CAN_PLAY_THROUGH</code>.
+
+ <tr>
+ <td><dfn id=ratechange
+ title=event-ratehange><code>ratechange</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>Either the <code title=dom-media-defaultPlaybackRate><a
+ href="#defaultplaybackrate">defaultPlaybackRate</a></code> or the <code
+ title=dom-media-playbackRate><a
+ href="#playbackrate">playbackRate</a></code> attribute has just been
+ updated.
+
+ <td>
+
+ <tr>
+ <td><dfn id=durationchange
+ title=event-durationchange><code>durationchange</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>The <code title=dom-media-duration><a
+ href="#duration">duration</a></code> attribute has just been updated.
+
+ <td>
+
+ <tr>
+ <td><dfn id=volumechange
+ title=event-volumechange><code>volumechange</code></dfn>
+
+ <td><code>Event</code>
+
+ <td>Either the <code title=dom-media-volume><a
+ href="#volume">volume</a></code> attribute or the <code
+ title=dom-media-muted><a href="#muted">muted</a></code> attribute has
+ changed. Fired after the relevant attribute's setter has returned.
+
+ <td>
+ </table>
+
+ <h5 id=security0><span class=secno>3.14.9.13. </span>Security and privacy
+ considerations</h5>
+
+ <p class=big-issue>Talk about making sure interactive media files (e.g.
+ SVG) don't have access to the container DOM (XSS potential); talk about
+ not exposing any sensitive data like metadata from tracks in the media
+ files (intranet snooping risk)
+
+ <h4 id=the-source><span class=secno>3.14.10. </span>The <dfn
+ id=source><code>source</code></dfn> element</h4>
+ <!-- no type -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <a href="#media5">media element</a>, before any
+ content other than <code><a href="#source">source</a></code> elements.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-source-src><a href="#src7">src</a></code> (required)
+
+ <dd><code title=attr-source-type><a href="#type8">type</a></code>
+
+ <dd><code title=attr-source-media><a href="#media9">media</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlsourceelement>HTMLSourceElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#src8" title=dom-source-src>src</a>;
+ attribute DOMString <a href="#type9" title=dom-source-type>type</a>;
+ attribute DOMString <a href="#media10" title=dom-source-media>media</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#source">source</a></code> element allows authors to
+ specify multiple <a href="#media8" title="media resource">media
+ resources</a> for <a href="#media5" title="media element">media
+ elements</a>.
+
+ <p>The <dfn id=src7 title=attr-source-src><code>src</code></dfn> attribute
+ gives the address of the <a href="#media8">media resource</a>. The value
+ must be a URI (or IRI). This attribute must be present.
+
+ <p>The <dfn id=type8 title=attr-source-type><code>type</code></dfn>
+ attribute gives the type of the <a href="#media8">media resource</a>, to
+ help the user agent determine if it can play this <a href="#media8">media
+ resource</a> before downloading it. Its value must be a MIME type. The
+ <code title="">codec</code> parameter may be specified and might be
+ necessary to specify exactly how the resource is encoded. <a
+ href="#refsRFC2046">[RFC2046]</a> <a href="#refsRFC4281">[RFC4281]</a>
+
+ <p>The <dfn id=media9 title=attr-source-media><code>media</code></dfn>
+ attribute gives the intended media type of the <a href="#media8">media
+ resource</a>, to help the user agent determine if this <a
+ href="#media8">media resource</a> is useful to the user before downloading
+ it. Its value must be a valid media query. <a href="#refsMQ">[MQ]</a>
+
+ <p>Either the <code title=attr-source-type><a href="#type8">type</a></code>
+ attribute, the <code title=attr-source-media><a
+ href="#media9">media</a></code> attribute or both, must be specified,
+ unless this is the last <code><a href="#source">source</a></code> element
+ child of the parent element.
+
+ <p>If a <code><a href="#source">source</a></code> element is inserted into
+ a <a href="#media5">media element</a> that is already in a document and
+ whose <code title=dom-media-networkState><a
+ href="#networkstate">networkState</a></code> is in the <code
+ title=dom-media-EMPTY><a href="#empty">EMPTY</a></code> state, the user
+ agent must implicitly invoke the <code title=dom-media-load><a
+ href="#load">load()</a></code> method on the <a href="#media5">media
+ element</a> as soon as all other scripts have finished executing. Any
+ exceptions raised must be ignored.
+
+ <p>The DOM attributes <dfn id=src8
+ title=dom-source-src><code>src</code></dfn>, <dfn id=type9
+ title=dom-source-type><code>type</code></dfn>, and <dfn id=media10
+ title=dom-source-media><code>media</code></dfn> must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <h4 id=the-canvas><span class=secno>3.14.11. </span>The <dfn
+ id=canvas><code>canvas</code></dfn> element</h4>
+
+ <p><a href="#strictly" title="Strictly inline-level content">Strictly
+ inline-level</a> <a href="#embedded0">embedded content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the only <a href="#embedded0">embedded content</a> child of a
+ <code><a href="#figure">figure</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#inline-level0">Inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-canvas-height><a href="#height3">height</a></code>
+
+ <dd><code title=attr-canvas-width><a href="#width3">width</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlcanvaselement>HTMLCanvasElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute unsigned long <a href="#width4" title=dom-canvas-width>width</a>;
+ attribute unsigned long <a href="#height4" title=dom-canvas-height>height</a>;
+
+ DOMString <a href="#todataurl" title=dom-canvas-toDataURL>toDataURL</a>();
+ DOMString <a href="#todataurl0" title=dom-canvas-toDataURL-type>toDataURL</a>(in DOMString type);
+
+ DOMObject <a href="#getcontext" title=dom-canvas-getContext>getContext</a>(in DOMString contextId);
+};</pre>
+ </dl>
+
+ <p class=big-issue>Shouldn't allow inline-level content to be the content
+ model when the parent's content model is strictly inline only.
+
+ <p>The <code><a href="#canvas">canvas</a></code> element represents a
+ resolution-dependent bitmap canvas, which can be used for rendering
+ graphs, game graphics, or other visual images on the fly.
+
+ <p>Authors should not use the <code><a href="#canvas">canvas</a></code>
+ element in a document when a more suitable element is available. For
+ example, it is inappropriate to use a <code><a
+ href="#canvas">canvas</a></code> element to render a page heading: if the
+ desired presentation of the heading is graphically intense, it should be
+ marked up using appropriate elements (typically <code><a
+ href="#h1">h1</a></code>) and then styled using CSS and supporting
+ technologies such as XBL.
+
+ <p>When authors use the <code><a href="#canvas">canvas</a></code> element,
+ they should also provide content that, when presented to the user, conveys
+ essentially the same function or purpose as the bitmap canvas. This
+ content may be placed as content of the <code><a
+ href="#canvas">canvas</a></code> element. The contents of the <code><a
+ href="#canvas">canvas</a></code> element, if any, are the element's <a
+ href="#fallback">fallback content</a>.
+
+ <p>In interactive visual media with <span>scripting enabled</span>, the
+ canvas element is an embedded element with a dynamically created image.
+
+ <p>In non-interactive, static, visual media, if the <code><a
+ href="#canvas">canvas</a></code> element has been previously painted on
+ (e.g. if the page was viewed in an interactive visual medium and is now
+ being printed, or if some script that ran during the page layout process
+ painted on the element), then the <code><a
+ href="#canvas">canvas</a></code> element must be treated as embedded
+ content with the current image and size. Otherwise, the element's fallback
+ content must be used instead.
+
+ <p>In non-visual media, and in visual media with <span>scripting
+ disabled</span>, the <code><a href="#canvas">canvas</a></code> element's
+ fallback content must be used instead.
+
+ <p>The <code><a href="#canvas">canvas</a></code> element has two attributes
+ to control the size of the coordinate space: <dfn id=height3
+ title=attr-canvas-height><code>height</code></dfn> and <dfn id=width3
+ title=attr-canvas-width><code>width</code></dfn>. These attributes, when
+ specified, must have values that are <a href="#valid" title="valid
+ non-negative integer">valid non-negative integers</a>. The <a
+ href="#rules">rules for parsing non-negative integers</a> must be used to
+ obtain their numeric values. If an attribute is missing, or if parsing its
+ value returns an error, then the default value must be used instead. The
+ <code title=attr-canvas-width><a href="#width3">width</a></code> attribute
+ defaults to 300, and the <code title=attr-canvas-height><a
+ href="#height3">height</a></code> attribute defaults to 150.
+
+ <p>The intrinsic dimensions of the <code><a
+ href="#canvas">canvas</a></code> element equal the size of the coordinate
+ space, with the numbers interpreted in CSS pixels. However, the element
+ can be sized arbitrarily by a style sheet. During rendering, the image is
+ scaled to fit this layout size.
+
+ <p>The size of the coordinate space does not necessarily represent the size
+ of the actual bitmap that the user agent will use internally or during
+ rendering. On high-definition displays, for instance, the user agent may
+ internally use a bitmap with two device pixels per unit in the coordinate
+ space, so that the rendering remains at high quality throughout.
+
+ <p>The canvas must initially be fully transparent black.
+
+ <p>Whenever the <code title=attr-canvas-width><a
+ href="#width3">width</a></code> and <code title=attr-canvas-height><a
+ href="#height3">height</a></code> attributes are set (whether to a new
+ value or to the previous value), the bitmap and any associated contexts
+ must be cleared back to their initial state and reinitialised with the
+ newly specified coordinate space dimensions.
+
+ <p>The <dfn id=width4 title=dom-canvas-width><code>width</code></dfn> and
+ <dfn id=height4 title=dom-canvas-height><code>height</code></dfn> DOM
+ attributes must <a href="#reflect">reflect</a> the content attributes of
+ the same name.
+
+ <div class=example>
+ <p>Only one square appears to be drawn in the following example:</p>
+
+ <pre>
+ // canvas is a reference to a &lt;canvas> element
+ var context = canvas.getContext('2d');
+ context.fillRect(0,0,50,50);
+ canvas.setAttribute('width', '300'); // clears the canvas
+ context.fillRect(0,100,50,50);
+ canvas.width = canvas.width; // clears the canvas
+ context.fillRect(100,0,50,50); // only this square remains</pre>
+ </div>
+
+ <p>To draw on the canvas, authors must first obtain a reference to a <dfn
+ id=context0>context</dfn> using the <dfn id=getcontext
+ title=dom-canvas-getContext><code>getContext(<var
+ title="">contextId</var>)</code></dfn> method of the <code><a
+ href="#canvas">canvas</a></code> element.
+
+ <p>This specification only defines one context, with the name "<code
+ title=canvas-context-2d><a href="#d">2d</a></code>". If <code
+ title=dom-canvas-getContext><a href="#getcontext">getContext()</a></code>
+ is called with that exact string for tis <var title="">contextId</var>
+ argument, then the UA must return a reference to an object implementing
+ <code><a
+ href="#canvasrenderingcontext2d">CanvasRenderingContext2D</a></code>.
+ Other specifications may define their own contexts, which would return
+ different objects.
+
+ <p>Vendors may also define experimental contexts using the syntax
+ <code><var title="">vendorname</var>-<var title="">context</var></code>,
+ for example, <code>moz-3d</code>.
+
+ <p>When the UA is passed an empty string or a string specifying a context
+ that it does not support, then it must return null. String comparisons
+ must be literal and case-sensitive.
+
+ <p class=note>A future version of this specification will probably define a
+ <code>3d</code> context (probably based on the OpenGL ES API).
+
+ <p>The <dfn id=todataurl
+ title=dom-canvas-toDataURL><code>toDataURL()</code></dfn> method must,
+ when called with no arguments, return a <code title="">data:</code> URI
+ containing a representation of the image as a PNG file. <a
+ href="#refsPNG">[PNG]</a>.
+
+ <p>The <dfn id=todataurl0
+ title=dom-canvas-toDataURL-type><code>toDataURL(<var
+ title="">type</var>)</code></dfn> method (when called with one <em>or
+ more</em> arguments) must return a <code>data:</code> URI containing a
+ representation of the image in the format given by <var
+ title="">type</var>. The possible values are MIME types with no
+ parameters, for example <code>image/png</code>, <code>image/jpeg</code>,
+ or even maybe <code>image/svg+xml</code> if the implementation actually
+ keeps enough information to reliably render an SVG image from the canvas.
+
+ <p>Only support for <code>image/png</code> is required. User agents may
+ support other types. If the user agent does not support the requested
+ type, it must return the image using the PNG format.
+
+ <p>User agents must convert the provided type to lower case before
+ establishing if they support that type and before creating the
+ <code>data:</code> URI.</p>
+ <!-- XXX define "convert to lower case"
+ -->
+
+ <p class=note>When trying to use types other than <code>image/png</code>,
+ authors can check if the image was really returned in the requested format
+ by checking to see if the returned string starts with one the exact
+ strings "<code title="">data:image/png,</code>" or "<code
+ title="">data:image/png;</code>". If it does, the image is PNG, and thus
+ the requested type was not supported.
+
+ <p>Arguments other than the <var title="">type</var> must be ignored, and
+ must not cause the user agent to raise an exception (as would normally
+ occur if a method was called with the wrong number of arguments). A future
+ version of this specification will probably allow extra parameters to be
+ passed to <code title=dom-canvas-toDataURL><a
+ href="#todataurl">toDataURL()</a></code> to allow authors to more
+ carefully control compression settings, image metadata, etc.
+
+ <p><strong>Security:</strong> To prevent <em>information leakage</em>, the
+ <code title=dom-canvas-toDataURL><a
+ href="#todataurl">toDataURL()</a></code> and <code
+ title=dom-context-2d-getImageData><a
+ href="#getimagedata">getImageData()</a></code> methods should raise a <a
+ href="#security8">security exception</a> if the canvas has ever had an
+ image painted on it whose <a href="#origin0">origin</a> is different from
+ that of the script calling the method.
+
+ <h5 id=the-2d><span class=secno>3.14.11.1. </span>The 2D context</h5>
+
+ <p>When the <code title=dom-canvas-getContext><a
+ href="#getcontext">getContext()</a></code> method of a <code><a
+ href="#canvas">canvas</a></code> element is invoked with <dfn id=d
+ title=canvas-context-2d><code>2d</code></dfn> as the argument, a <code><a
+ href="#canvasrenderingcontext2d">CanvasRenderingContext2D</a></code>
+ object is returned.
+
+ <p>There is only one <code><a
+ href="#canvasrenderingcontext2d">CanvasRenderingContext2D</a></code>
+ object per canvas, so calling the <code title=dom-canvas-getContext><a
+ href="#getcontext">getContext()</a></code> method with the <code
+ title=canvas-context-2d><a href="#d">2d</a></code> argument a second time
+ must return the same object.
+
+ <p>The 2D context represents a flat cartesian surface whose origin (0,0) is
+ at the top left corner, with the coordinate space having <var
+ title="">x</var> values increasing when going right, and <var
+ title="">y</var> values increasing when going down.
+
+ <pre
+ class=idl>interface <dfn id=canvasrenderingcontext2d>CanvasRenderingContext2D</dfn> {
+
+ // back-reference to the canvas
+ readonly attribute <a href="#htmlcanvaselement">HTMLCanvasElement</a> <a href="#canvas0" title=dom-context-2d-canvas>canvas</a>;
+
+ // state
+ void <a href="#save" title=dom-context-2d-save>save</a>(); // push state on state stack
+ void <a href="#restore" title=dom-context-2d-restore>restore</a>(); // pop state stack and restore state
+
+ // transformations (default transform is the identity matrix)
+ void <a href="#scale" title=dom-context-2d-scale>scale</a>(in float x, in float y);
+ void <a href="#rotate" title=dom-context-2d-rotate>rotate</a>(in float angle);
+ void <a href="#translate" title=dom-context-2d-translate>translate</a>(in float x, in float y);
+ void <a href="#transform" title=dom-context-2d-transform>transform</a>(in float m11, in float m12, in float m21, in float m22, in float dx, in float dy);
+ void <a href="#settransform" title=dom-context-2d-setTransform>setTransform</a>(in float m11, in float m12, in float m21, in float m22, in float dx, in float dy);
+<!--
+ // XXXv3 we've also received requests for:
+ void skew(...);
+ void reflect(...); // or mirror(...)
+-->
+ // compositing
+ attribute float <a href="#globalalpha" title=dom-context-2d-globalAlpha>globalAlpha</a>; // (default 1.0)
+ attribute DOMString <a href="#globalcompositeoperation" title=dom-context-2d-globalCompositeOperation>globalCompositeOperation</a>; // (default source-over)
+
+ // colors and styles
+ attribute DOMObject <a href="#strokestyle" title=dom-context-2d-strokeStyle>strokeStyle</a>; // (default black)
+ attribute DOMObject <a href="#fillstyle" title=dom-context-2d-fillStyle>fillStyle</a>; // (default black)
+ <a href="#canvasgradient0">CanvasGradient</a> <a href="#createlineargradient" title=dom-context-2d-createLinearGradient>createLinearGradient</a>(in float x0, in float y0, in float x1, in float y1);
+ <a href="#canvasgradient0">CanvasGradient</a> <a href="#createradialgradient" title=dom-context-2d-createRadialGradient>createRadialGradient</a>(in float x0, in float y0, in float r0, in float x1, in float y1, in float r1);
+ <a href="#canvaspattern0">CanvasPattern</a> <a href="#createpatternimage" title=dom-context-2d-createPattern>createPattern</a>(in <a href="#htmlimageelement">HTMLImageElement</a> image, DOMString repetition);
+ <a href="#canvaspattern0">CanvasPattern</a> <a href="#createpatternimage" title=dom-context-2d-createPattern>createPattern</a>(in <a href="#htmlcanvaselement">HTMLCanvasElement</a> image, DOMString repetition);
+
+ // line caps/joins
+ attribute float <a href="#linewidth" title=dom-context-2d-lineWidth>lineWidth</a>; // (default 1)
+ attribute DOMString <a href="#linecap" title=dom-context-2d-lineCap>lineCap</a>; // "butt", "round", "square" (default "butt")
+ attribute DOMString <a href="#linejoin" title=dom-context-2d-lineJoin>lineJoin</a>; // "round", "bevel", "miter" (default "miter")
+ attribute float <a href="#miterlimit" title=dom-context-2d-miterLimit>miterLimit</a>; // (default 10)
+
+ // shadows
+ attribute float <a href="#shadowoffsetx" title=dom-context-2d-shadowOffsetX>shadowOffsetX</a>; // (default 0)
+ attribute float <a href="#shadowoffsety" title=dom-context-2d-shadowOffsetY>shadowOffsetY</a>; // (default 0)
+ attribute float <a href="#shadowblur" title=dom-context-2d-shadowBlur>shadowBlur</a>; // (default 0)
+ attribute DOMString <a href="#shadowcolor" title=dom-context-2d-shadowColor>shadowColor</a>; // (default transparent black)
+
+ // rects
+ void <a href="#clearrect" title=dom-context-2d-clearRect>clearRect</a>(in float x, in float y, in float w, in float h);
+ void <a href="#fillrect" title=dom-context-2d-fillRect>fillRect</a>(in float x, in float y, in float w, in float h);
+ void <a href="#strokerect" title=dom-context-2d-strokeRect>strokeRect</a>(in float x, in float y, in float w, in float h);
+
+ // path API
+ void <a href="#beginpath" title=dom-context-2d-beginPath>beginPath</a>();
+ void <a href="#closepath" title=dom-context-2d-closePath>closePath</a>();
+ void <a href="#moveto" title=dom-context-2d-moveTo>moveTo</a>(in float x, in float y);
+ void <a href="#lineto" title=dom-context-2d-lineTo>lineTo</a>(in float x, in float y);
+ void <a href="#quadraticcurveto" title=dom-context-2d-quadraticCurveTo>quadraticCurveTo</a>(in float cpx, in float cpy, in float x, in float y);
+ void <a href="#beziercurveto" title=dom-context-2d-bezierCurveTo>bezierCurveTo</a>(in float cp1x, in float cp1y, in float cp2x, in float cp2y, in float x, in float y);
+ void <a href="#arcto" title=dom-context-2d-arcTo>arcTo</a>(in float x1, in float y1, in float x2, in float y2, in float radius);
+ void <a href="#rectx" title=dom-context-2d-rect>rect</a>(in float x, in float y, in float w, in float h);
+ void <a href="#arcx-" title=dom-context-2d-arc>arc</a>(in float x, in float y, in float radius, in float startAngle, in float endAngle, in boolean anticlockwise);
+ void <a href="#fill" title=dom-context-2d-fill>fill</a>();
+ void <a href="#stroke" title=dom-context-2d-stroke>stroke</a>();
+ void <a href="#clip" title=dom-context-2d-clip>clip</a>();
+ boolean <a href="#ispointinpath" title=dom-context-2d-isPointInPath>isPointInPath</a>(in float x, in float y);
+
+ // drawing images
+ void <a href="#drawimage" title=dom-context-2d-drawImage>drawImage</a>(in <a href="#htmlimageelement">HTMLImageElement</a> image, in float dx, in float dy);
+ void <a href="#drawimage" title=dom-context-2d-drawImage>drawImage</a>(in <a href="#htmlimageelement">HTMLImageElement</a> image, in float dx, in float dy, in float dw, in float dh);
+ void <a href="#drawimage" title=dom-context-2d-drawImage>drawImage</a>(in <a href="#htmlimageelement">HTMLImageElement</a> image, in float sx, in float sy, in float sw, in float sh, in float dx, in float dy, in float dw, in float dh);
+ void <a href="#drawimage" title=dom-context-2d-drawImage>drawImage</a>(in <a href="#htmlcanvaselement">HTMLCanvasElement</a> image, in float dx, in float dy);
+ void <a href="#drawimage" title=dom-context-2d-drawImage>drawImage</a>(in <a href="#htmlcanvaselement">HTMLCanvasElement</a> image, in float dx, in float dy, in float dw, in float dh);
+ void <a href="#drawimage" title=dom-context-2d-drawImage>drawImage</a>(in <a href="#htmlcanvaselement">HTMLCanvasElement</a> image, in float sx, in float sy, in float sw, in float sh, in float dx, in float dy, in float dw, in float dh);
+
+ // pixel manipulation
+ <a href="#imagedata">ImageData</a> <a href="#getimagedata" title=dom-context-2d-getImageData>getImageData</a>(in float sx, in float sy, in float sw, in float sh);
+ void <a href="#putimagedata" title=dom-context-2d-putImageData>putImageData</a>(in <a href="#imagedata">ImageData</a> image, in float dx, in float dy);
+
+ // drawing text is not supported in this version of the API
+ // (there is no way to predict what metrics the fonts will have,
+ // which makes fonts very hard to use for painting)
+
+};
+
+interface <dfn id=canvasgradient>CanvasGradient</dfn> {
+ // opaque object
+ void <a href="#addcolorstop" title=dom-canvasgradient-addColorStop>addColorStop</a>(in float offset, in DOMString color);
+};
+
+interface <dfn id=canvaspattern>CanvasPattern</dfn> {
+ // opaque object
+};
+
+interface <dfn id=imagedata>ImageData</dfn> {
+ readonly attribute long int <a href="#width5" title=dom-imagedata-width>width</a>;
+ readonly attribute long int <a href="#height5" title=dom-imagedata-height>height</a>;
+ readonly attribute int[] <a href="#data1" title=dom-imagedata-data>data</a>;
+};</pre>
+
+ <p>The <dfn id=canvas0
+ title=dom-context-2d-canvas><code>canvas</code></dfn> attribute must
+ return the <code><a href="#canvas">canvas</a></code> element that the
+ context paints on.
+
+ <h6 id=the-canvas0><span class=secno>3.14.11.1.1. </span>The canvas state</h6>
+
+ <p>Each context maintains a stack of drawing states. <dfn id=drawing0
+ title="drawing state">Drawing states</dfn> consist of:
+
+ <ul class=brief>
+ <li>The current transformation matrix.
+
+ <li>The current clip region.
+
+ <li>The current values of the following attributes: <code
+ title=dom-context-2d-strokeStyle><a
+ href="#strokestyle">strokeStyle</a></code>, <code
+ title=dom-context-2d-fillStyle><a href="#fillstyle">fillStyle</a></code>,
+ <code title=dom-context-2d-globalAlpha><a
+ href="#globalalpha">globalAlpha</a></code>, <code
+ title=dom-context-2d-lineWidth><a href="#linewidth">lineWidth</a></code>,
+ <code title=dom-context-2d-lineCap><a href="#linecap">lineCap</a></code>,
+ <code title=dom-context-2d-lineJoin><a
+ href="#linejoin">lineJoin</a></code>, <code
+ title=dom-context-2d-miterLimit><a
+ href="#miterlimit">miterLimit</a></code>, <code
+ title=dom-context-2d-shadowOffsetX><a
+ href="#shadowoffsetx">shadowOffsetX</a></code>, <code
+ title=dom-context-2d-shadowOffsetY><a
+ href="#shadowoffsety">shadowOffsetY</a></code>, <code
+ title=dom-context-2d-shadowBlur><a
+ href="#shadowblur">shadowBlur</a></code>, <code
+ title=dom-context-2d-shadowColor><a
+ href="#shadowcolor">shadowColor</a></code>, <code
+ title=dom-context-2d-globalCompositeOperation><a
+ href="#globalcompositeoperation">globalCompositeOperation</a></code>.
+ </ul>
+
+ <p class=note>The current path and the current bitmap are not part of the
+ drawing state. The current path is persistent, and can only be reset using
+ the <code title=dom-context-2d-beginPath><a
+ href="#beginpath">beginPath()</a></code> method. The current bitmap is
+ <span title=concept-canvas-image>a property of the
+ canvas</span><!-- XXX xref -->, not the context.
+
+ <p>The <dfn id=save title=dom-context-2d-save><code>save()</code></dfn>
+ method must push a copy of the current drawing state onto the drawing
+ state stack.
+
+ <p>The <dfn id=restore
+ title=dom-context-2d-restore><code>restore()</code></dfn> method must pop
+ the top entry in the drawing state stack, and reset the drawing state it
+ describes. If there is no saved state, the method must do nothing.
+
+ <h6 id=transformations><span class=secno>3.14.11.1.2. </span><dfn
+ id=transformations0>Transformations</dfn></h6>
+
+ <p>The transformation matrix is applied to all drawing operations prior to
+ their being rendered. It is also applied when creating the clip region.</p>
+ <!-- conformance criteria for actual drawing are
+ described in "drawing model" below -->
+
+ <p>When the context is created, the transformation matrix must initially be
+ the identity transform. It may then be adjusted using the transformation
+ methods.
+
+ <p>The transformation matrix can become infinite, at which point nothing is
+ drawn anymore.</p>
+ <!--
+ Philip Taylor wrote:
+ > My experience with some 3d canvas code is that infinities come up in
+ > naturally harmless places, e.g. having a function that scales by x then
+ > translates by 1/x and wanting it to work when x=0 (which ought to draw
+ > nothing, since anything it draws is zero pixels wide), and it's a bit
+ > annoying to track down and fix those issues, so I'd probably like it if
+ > they were harmless in canvas methods. Opera appears to silently not draw
+ > anything if the transformation matrix is not finite, but Firefox throws
+ > exceptions when passing in non-finite arguments.
+ -->
+
+ <p>The transformations must be performed in reverse order. For instance, if
+ a scale transformation that doubles the width is applied, followed by a
+ rotation transformation that rotates drawing operations by a quarter turn,
+ and a rectangle twice as wide as it is tall is then drawn on the canvas,
+ the actual result will be a square.
+
+ <p>The <dfn id=scale title=dom-context-2d-scale><code>scale(<var
+ title="">x</var>, <var title="">y</var>)</code></dfn> method must add the
+ scaling transformation described by the arguments to the transformation
+ matrix. The <var title="">x</var> argument represents the scale factor in
+ the horizontal direction and the <var title="">y</var> argument represents
+ the scale factor in the vertical direction. The factors are multiples. If
+ either argument is Infinity the transformation matrix must be marked as
+ infinite instead of the method throwing an exception.
+
+ <p>The <dfn id=rotate title=dom-context-2d-rotate><code>rotate(<var
+ title="">angle</var>)</code></dfn> method must add the rotation
+ transformation described by the argument to the transformation matrix. The
+ <var title="">angle</var> argument represents a clockwise rotation angle
+ expressed in radians.
+
+ <p>The <dfn id=translate
+ title=dom-context-2d-translate><code>translate(<var title="">x</var>, <var
+ title="">y</var>)</code></dfn> method must add the translation
+ transformation described by the arguments to the transformation matrix.
+ The <var title="">x</var> argument represents the translation distance in
+ the horizontal direction and the <var title="">y</var> argument represents
+ the translation distance in the vertical direction. The arguments are in
+ coordinate space units. If either argument is Infinity the transformation
+ matrix must be marked as infinite instead of the method throwing an
+ exception.
+
+ <p>The <dfn id=transform
+ title=dom-context-2d-transform><code>transform(<var title="">m11</var>,
+ <var title="">m12</var>, <var title="">m21</var>, <var title="">m22</var>,
+ <var title="">dx</var>, <var title="">dy</var>)</code></dfn> method must
+ multiply the current transformation matrix with the matrix described by:
+
+ <table class=matrix>
+ <tbody>
+ <tr>
+ <td><var title="">m11</var>
+
+ <td><var title="">m21</var>
+
+ <td><var title="">dx</var>
+
+ <tr>
+ <td><var title="">m12</var>
+
+ <td><var title="">m22</var>
+
+ <td><var title="">dy</var>
+
+ <tr>
+ <td>0
+
+ <td>0
+
+ <td>1
+ </table>
+
+ <p>If any of the arguments are Infinity the transformation matrix must be
+ marked as infinite instead of the method throwing an exception.
+
+ <p>The <dfn id=settransform
+ title=dom-context-2d-setTransform><code>setTransform(<var
+ title="">m11</var>, <var title="">m12</var>, <var title="">m21</var>, <var
+ title="">m22</var>, <var title="">dx</var>, <var
+ title="">dy</var>)</code></dfn> method must reset the current transform to
+ the identity matrix, and then invoke the <code><a href="#transform"
+ title=dom-context-2d-transform>transform</a>(<var title="">m11</var>, <var
+ title="">m12</var>, <var title="">m21</var>, <var title="">m22</var>, <var
+ title="">dx</var>, <var title="">dy</var>)</code> method with the same
+ arguments. If any of the arguments are Infinity the transformation matrix
+ must be marked as infinite instead of the method throwing an exception.
+
+ <h6 id=compositing><span class=secno>3.14.11.1.3. </span>Compositing</h6>
+
+ <p>All drawing operations are affected by the global compositing
+ attributes, <code title=dom-context-2d-globalAlpha><a
+ href="#globalalpha">globalAlpha</a></code> and <code
+ title=dom-context-2d-globalCompositeOperation><a
+ href="#globalcompositeoperation">globalCompositeOperation</a></code>.</p>
+ <!-- conformance criteria for painting are described in the "drawing
+ model" section below -->
+
+ <p>The <dfn id=globalalpha
+ title=dom-context-2d-globalAlpha><code>globalAlpha</code></dfn> attribute
+ gives an alpha value that is applied to shapes and images before they are
+ composited onto the canvas. The value must be in the range from 0.0 (fully
+ transparent) to 1.0 (no additional transparency). If an attempt is made to
+ set the attribute to a value outside this range, the attribute must retain
+ its previous value. When the context is created, the <code
+ title=dom-context-2d-globalAlpha><a
+ href="#globalalpha">globalAlpha</a></code> attribute must initially have
+ the value 1.0.
+
+ <p>The <dfn id=globalcompositeoperation
+ title=dom-context-2d-globalCompositeOperation><code>globalCompositeOperation</code></dfn>
+ attribute sets how shapes and images are drawn onto the existing bitmap,
+ once they have had <code title=dom-context-2d-globalAlpha><a
+ href="#globalalpha">globalAlpha</a></code> and the current transformation
+ matrix applied. It must be set to a value from the following list. In the
+ descriptions below, the source image, <var title="">A</var>, is the shape
+ or image being rendered, and the destination image, <var title="">B</var>,
+ is the current state of the bitmap.
+
+ <dl>
+ <dt><dfn id=source-atop
+ title=gcop-source-atop><code>source-atop</code></dfn>
+
+ <dd><var title="">A</var> atop <var title="">B</var>. Display the source
+ image wherever both images are opaque. Display the destination image
+ wherever the destination image is opaque but the source image is
+ transparent. Display transparency elsewhere.
+
+ <dt><dfn id=source-in title=gcop-source-in><code>source-in</code></dfn>
+
+ <dd><var title="">A</var> in <var title="">B</var>. Display the source
+ image wherever both the source image and destination image are opaque.
+ Display transparency elsewhere.
+
+ <dt><dfn id=source-out title=gcop-source-out><code>source-out</code></dfn>
+
+ <dd><var title="">A</var> out <var title="">B</var>. Display the source
+ image wherever the source image is opaque and the destination image is
+ transparent. Display transparency elsewhere.
+
+ <dt><dfn id=source-over
+ title=gcop-source-over><code>source-over</code></dfn> (default)
+
+ <dd><var title="">A</var> over <var title="">B</var>. Display the source
+ image wherever the source image is opaque. Display the destination image
+ elsewhere.
+
+ <dt><dfn id=destination-atop
+ title=gcop-destination-atop><code>destination-atop</code></dfn>
+
+ <dd><var title="">B</var> atop <var title="">A</var>. Same as <code
+ title=gcop-source-atop><a href="#source-atop">source-atop</a></code> but
+ using the destination image instead of the source image and vice versa.
+
+ <dt><dfn id=destination-in
+ title=gcop-destination-in><code>destination-in</code></dfn>
+
+ <dd><var title="">B</var> in <var title="">A</var>. Same as <code
+ title=gcop-source-in><a href="#source-in">source-in</a></code> but using
+ the destination image instead of the source image and vice versa.
+
+ <dt><dfn id=destination-out
+ title=gcop-destination-out><code>destination-out</code></dfn>
+
+ <dd><var title="">B</var> out <var title="">A</var>. Same as <code
+ title=gcop-source-out><a href="#source-out">source-out</a></code> but
+ using the destination image instead of the source image and vice versa.
+
+ <dt><dfn id=destination-over
+ title=gcop-destination-over><code>destination-over</code></dfn>
+
+ <dd><var title="">B</var> over <var title="">A</var>. Same as <code
+ title=gcop-source-over><a href="#source-over">source-over</a></code> but
+ using the destination image instead of the source image and vice versa.</dd>
+ <!-- no clear definition of this operator (doesn't correspond to a PorterDuff operator)
+ <dt><dfn title="gcop-darker"><code>darker</code></dfn></dt>
+
+ <dd>Display the sum of the source image and destination image,
+ with color values approaching 0 as a limit.</dd>
+-->
+
+ <dt><dfn id=lighter title=gcop-lighter><code>lighter</code></dfn>
+
+ <dd><var title="">A</var> plus <var title="">B</var>. Display the sum of
+ the source image and destination image, with color values approaching 1
+ as a limit.
+
+ <dt><dfn id=copy title=gcop-copy><code>copy</code></dfn>
+
+ <dd><var title="">A</var> (<var title="">B</var> is ignored). Display the
+ source image instead of the destination image.
+
+ <dt><dfn id=xor title=gcop-xor><code>xor</code></dfn>
+
+ <dd><var title="">A</var> xor <var title="">B</var>. Exclusive OR of the
+ source image and destination image.
+
+ <dt><code><var title="">vendorName</var>-<var
+ title="">operationName</var></code>
+
+ <dd>Vendor-specific extensions to the list of composition operators should
+ use this syntax.
+ </dl>
+
+ <p>These values are all case-sensitive &mdash; they must be used exactly as
+ shown. User agents must only recognise values that exactly match the
+ values given above.
+
+ <p>The operators in the above list must be treated as described by the
+ Porter-Duff operator given at the start of their description (e.g. <var
+ title="">A</var> over <var title="">B</var>). <a
+ href="#refsPORTERDUFF">[PORTERDUFF]</a></p>
+ <!--
+ <dd id="refsPORTERDUFF">[PORTERDUFF]</dd>
+ <dd><cite>Compositing Digital Images</cite>, SIGGRAPH '84: Proceedings of the 11th annual conference on Computer graphics and interactive techniques, Volume 18, Number 3, T. Porter, T Duff. ACM Press, July 1984. ISBN 0-89791-138-5.</dd>
+ -->
+
+ <p>On setting, if the user agent does not recognise the specified value, it
+ must be ignored, leaving the value of <code
+ title=dom-context-2d-globalCompositeOperation><a
+ href="#globalcompositeoperation">globalCompositeOperation</a></code>
+ unaffected.
+
+ <p>When the context is created, the <code
+ title=dom-context-2d-globalCompositeOperation><a
+ href="#globalcompositeoperation">globalCompositeOperation</a></code>
+ attribute must initially have the value <code>source-over</code>.
+
+ <h6 id=colors><span class=secno>3.14.11.1.4. </span>Colors and styles</h6>
+
+ <p>The <dfn id=strokestyle
+ title=dom-context-2d-strokeStyle><code>strokeStyle</code></dfn> attribute
+ represents the color or style to use for the lines around shapes, and the
+ <dfn id=fillstyle
+ title=dom-context-2d-fillStyle><code>fillStyle</code></dfn> attribute
+ represents the color or style to use inside the shapes.
+
+ <p>Both attributes can be either strings, <code><a
+ href="#canvasgradient0">CanvasGradient</a></code>s, or <code><a
+ href="#canvaspattern0">CanvasPattern</a></code>s. On setting, strings must
+ be parsed as CSS &lt;color&gt; values and the color assigned, and <code><a
+ href="#canvasgradient0">CanvasGradient</a></code> and <code><a
+ href="#canvaspattern0">CanvasPattern</a></code> objects must be assigned
+ themselves. <a href="#refsCSS3COLOR">[CSS3COLOR]</a> If the value is a
+ string but is not a valid color, or is neither a string, a <code><a
+ href="#canvasgradient0">CanvasGradient</a></code>, nor a <code><a
+ href="#canvaspattern0">CanvasPattern</a></code>, then it must be ignored,
+ and the attribute must retain its previous value.
+
+ <p>On getting, if the value is a color, then: if it has alpha equal to 1.0,
+ then the color must be returned as a lowercase six-digit hex value,
+ prefixed with a "#" character (U+0023 NUMBER SIGN), with the first two
+ digits representing the red component, the next two digits representing
+ the green component, and the last two digits representing the blue
+ component, the digits being in the range 0-9 a-f (U+0030 to U+0039 and
+ U+0061 to U+0066). If the value has alpha less than 1.0, then the value
+ must instead be returned in the CSS <code title="">rgba()</code>
+ functional-notation format: the literal string <code title="">rgba</code>
+ (U+0072 U+0067 U+0062 U+0061) followed by a U+0028 LEFT PARENTHESIS, a
+ base-ten integer in the range 0-255 representing the red component (using
+ digits 0-9, U+0030 to U+0039, in the shortest form possible), a literal
+ U+002C COMMA and U+0020 SPACE, an integer for the green component, a comma
+ and a space, an integer for the blue component, another comma and space, a
+ U+0030 DIGIT ZERO, a U+002E FULL STOP (representing the decimal point),
+ one or more digits in the range 0-9 (U+0030 to U+0039) representing the
+ fractional part of the alpha value, and finally a U+0029 RIGHT
+ PARENTHESIS.
+
+ <p>Otherwise, if it is not a color but a <code><a
+ href="#canvasgradient0">CanvasGradient</a></code> or <code><a
+ href="#canvaspattern0">CanvasPattern</a></code>, then the respective
+ object must be returned. (Such objects are opaque and therefore only
+ useful for assigning to other attributes or for comparison to other
+ gradients or patterns.)
+
+ <p>When the context is created, the <code
+ title=dom-context-2d-strokeStyle><a
+ href="#strokestyle">strokeStyle</a></code> and <code
+ title=dom-context-2d-fillStyle><a href="#fillstyle">fillStyle</a></code>
+ attributes must initially have the string value <code
+ title="">#000000</code>.
+
+ <p>There are two types of gradients, linear gradients and radial gradients,
+ both represented by objects implementing the opaque <dfn
+ id=canvasgradient0><code>CanvasGradient</code></dfn> interface.
+
+ <p>Once a gradient has been created (see below), stops are placed along it
+ to define how the colors are distributed along the gradient. The color of
+ the gradient at each stop is the color specified for that stop. Between
+ each such stop, the colors and the alpha component must be linearly
+ interpolated over the RGBA space without premultiplying the alpha value to
+ find the color to use at that offset. Before the first stop, the color
+ must be the color of the first stop. After the last stop, the color must
+ be the color of the last stop. When there are no stops, the gradient is
+ transparent black.
+
+ <p>The <dfn id=addcolorstop
+ title=dom-canvasgradient-addColorStop><code>addColorStop(<var
+ title="">offset</var>, <var title="">color</var>)</code></dfn> method on
+ the <code><a href="#canvasgradient0">CanvasGradient</a></code> interface
+ adds a new stop to a gradient. If the <var title="">offset</var> is less
+ than 0 or greater than 1 then an <code>INDEX_SIZE_ERR</code> exception
+ must be raised. If the <var title="">color</var> cannot be parsed as a CSS
+ color, then a <code>SYNTAX_ERR</code> exception must be raised. Otherwise,
+ the gradient must have a new stop placed, at offset <var
+ title="">offset</var> relative to the whole gradient, and with the color
+ obtained by parsing <var title="">color</var> as a CSS &lt;color&gt;
+ value. If multiple stops are added at the same offset on a gradient, they
+ must be placed in the order added, with the first one closest to the start
+ of the gradient, and each subsequent one infinitesimally further along
+ towards the end point (in effect causing all but the first and last stop
+ added at each point to be ignored).
+
+ <p>The <dfn id=createlineargradient
+ title=dom-context-2d-createLinearGradient><code>createLinearGradient(<var
+ title="">x0</var>, <var title="">y0</var>, <var title="">x1</var>, <var
+ title="">y1</var>)</code></dfn> method takes four arguments, representing
+ the start point (<var title="">x0</var>, <var title="">y0</var>) and end
+ point (<var title="">x1</var>, <var title="">y1</var>) of the gradient, in
+ coordinate space units, and must return a linear <code><a
+ href="#canvasgradient0">CanvasGradient</a></code> initialised with that
+ line.
+
+ <p>Linear gradients must be rendered such that at and before the starting
+ point on the canvas the color at offset 0 is used, that at and after the
+ ending point the color at offset 1 is used, and that all points on a line
+ perpendicular to the line that crosses the start and end points have the
+ color at the point where those two lines cross (with the colors coming
+ from the interpolation described above).
+
+ <p>If <span><var title="">x<sub>0</sub></var>&nbsp;=&nbsp;<var
+ title="">x<sub>1</sub></var></span> and <span><var
+ title="">y<sub>0</sub></var>&nbsp;=&nbsp;<var
+ title="">y<sub>1</sub></var></span>, then the linear gradient must paint
+ nothing.</p>
+ <!-- XXX could make this paint the start colour,
+ or the end colour, or raise an exception -->
+
+ <p>The <dfn id=createradialgradient
+ title=dom-context-2d-createRadialGradient><code>createRadialGradient(<var
+ title="">x0</var>, <var title="">y0</var>, <var title="">r0</var>, <var
+ title="">x1</var>, <var title="">y1</var>, <var
+ title="">r1</var>)</code></dfn> method takes six arguments, the first
+ three representing the start circle with origin (<var title="">x0</var>,
+ <var title="">y0</var>) and radius <var title="">r0</var>, and the last
+ three representing the end circle with origin (<var title="">x1</var>,
+ <var title="">y1</var>) and radius <var title="">r1</var>. The values are
+ in coordinate space units. The method must return a radial <code><a
+ href="#canvasgradient0">CanvasGradient</a></code> initialised with those
+ two circles. If either of <var title="">r0</var> or <var title="">r1</var>
+ are negative, an <code>INDEX_SIZE_ERR</code> exception must be raised.
+
+ <p>Radial gradients must be rendered by following these steps:
+
+ <ol>
+ <li>
+ <p>Let <span>x(<var title="">&omega;</var>)&nbsp;=&nbsp;(<var
+ title="">x<sub>1</sub></var>-<var title="">x<sub>0</sub></var>)<var
+ title="">&omega;</var>&nbsp;+&nbsp;<var
+ title="">x<sub>0</sub></var></span></p>
+
+ <p>Let <span>y(<var title="">&omega;</var>)&nbsp;=&nbsp;(<var
+ title="">y<sub>1</sub></var>-<var title="">y<sub>0</sub></var>)<var
+ title="">&omega;</var>&nbsp;+&nbsp;<var
+ title="">y<sub>0</sub></var></span></p>
+
+ <p>Let <span>r(<var title="">&omega;</var>)&nbsp;=&nbsp;(<var
+ title="">r<sub>1</sub></var>-<var title="">r<sub>0</sub></var>)<var
+ title="">&omega;</var>&nbsp;+&nbsp;<var
+ title="">r<sub>0</sub></var></span></p>
+
+ <p>Let the color at <var title="">&omega;</var> be the color of the
+ gradient at offset 0.0 for all values of <var title="">&omega;</var>
+ less than 0.0, the color at offset 1.0 for all values of <var
+ title="">&omega;</var> greater than 1.0, and the color at the given
+ offset for values of <var title="">&omega;</var> in the range
+ <span>0.0&nbsp;&le;&nbsp;<var
+ title="">&omega;</var>&nbsp;&le;&nbsp;1.0</span>
+
+ <li>
+ <p>For all values of <var title="">&omega;</var> where <span>r(<var
+ title="">&omega;</var>)&nbsp;&gt;&nbsp;0</span>, starting with the value
+ of <var title="">&omega;</var> nearest to positive infinity and ending
+ with the value of <var title="">&omega;</var> nearest to negative
+ infinity, draw the circumference of the circle with radius <span>r(<var
+ title="">&omega;</var>)</span> at position (<span>x(<var
+ title="">&omega;</var>)</span>, <span>y(<var
+ title="">&omega;</var>)</span>), with the color at <var
+ title="">&omega;</var>, but only painting on the parts of the canvas
+ that have not yet been painted on by earlier circles in this step for
+ this rendering of the gradient.
+ </ol>
+
+ <p>If <span><var title="">x<sub>0</sub></var>&nbsp;=&nbsp;<var
+ title="">x<sub>1</sub></var></span> and <span><var
+ title="">y<sub>0</sub></var>&nbsp;=&nbsp;<var
+ title="">y<sub>1</sub></var></span> and <span><var
+ title="">r<sub>0</sub></var>&nbsp;=&nbsp;<var
+ title="">r<sub>1</sub></var></span>, then the radial gradient must paint
+ nothing.</p>
+ <!-- XXX could make this paint the start colour,
+ or the end colour, or a circle of one in the other, or raise an
+ exception -->
+
+ <p class=note>This effectively creates a cone, touched by the two circles
+ defined in the creation of the gradient, with the part of the cone before
+ the start circle (0.0) using the color of the first offset, the part of
+ the cone after the end circle (1.0) using the color of the last offset,
+ and areas outside the cone untouched by the gradient (transparent black).
+
+ <p>Gradients must only be painted where the relevant stroking or filling
+ effects requires that they be drawn.
+
+ <p>Support for actually painting gradients is optional. Instead of painting
+ the gradients, user agents may instead just paint the first stop's color.
+ However, <code title=dom-context-2d-createLinearGradient><a
+ href="#createlineargradient">createLinearGradient()</a></code> and <code
+ title=dom-context-2d-createRadialGradient><a
+ href="#createradialgradient">createRadialGradient()</a></code> must always
+ return objects when passed valid arguments.
+
+ <p>Patterns are represented by objects implementing the opaque <dfn
+ id=canvaspattern0><code>CanvasPattern</code></dfn> interface.
+
+ <p>To create objects of this type, the <dfn id=createpatternimage
+ title=dom-context-2d-createPattern><code>createPattern(image,
+ repetition)</code></dfn> method is used. The first argument gives the
+ image to use as the pattern (either an <code><a
+ href="#htmlimageelement">HTMLImageElement</a></code> or an <code><a
+ href="#htmlcanvaselement">HTMLCanvasElement</a></code>). Modifying this
+ image after calling the <code title=dom-context-2d-createPattern><a
+ href="#createpatternimage">createPattern()</a></code> method must not
+ affect the pattern. The second argument must be a string with one of the
+ following values: <code title="">repeat</code>, <code
+ title="">repeat-x</code>, <code title="">repeat-y</code>, <code
+ title="">no-repeat</code>. If the empty string or null is specified, <code
+ title="">repeat</code> must be assumed. If an unrecognised value is given,
+ then the user agent must raise a <code>SYNTAX_ERR</code> exception. User
+ agents must recognise the four values described above exactly (e.g. they
+ must not do case folding). The method must return a <code><a
+ href="#canvaspattern0">CanvasPattern</a></code> object suitably
+ initialised.
+
+ <p>The <var title="">image</var> argument must be an instance of an
+ <code><a href="#htmlimageelement">HTMLImageElement</a></code> or <code><a
+ href="#htmlcanvaselement">HTMLCanvasElement</a></code>. If the <var
+ title="">image</var> is of the wrong type, the implementation must raise a
+ <code>TYPE_MISMATCH_ERR</code> exception. If the <var title="">image</var>
+ argument is an <code><a
+ href="#htmlimageelement">HTMLImageElement</a></code> object whose <code
+ title=dom-attr-complete>complete</code> attribute is false, then the
+ implementation must raise an <code>INVALID_STATE_ERR</code> exception.
+
+ <p>Patterns must be painted so that the top left of the first image is
+ anchored at the origin of the coordinate space, and images are then
+ repeated horizontally to the left and right (if the <code>repeat-x</code>
+ string was specified) or vertically up and down (if the
+ <code>repeat-y</code> string was specified) or in all four directions all
+ over the canvas (if the <code>repeat</code> string was specified). The
+ images are not be scaled by this process; one CSS pixel of the image must
+ be painted on one coordinate space unit. Of course, patterns must only
+ actually painted where the stroking or filling effect requires that they
+ be drawn, and are affected by the current transformation matrix.
+
+ <p>Support for patterns is optional. If the user agent doesn't support
+ patterns, then <code title=dom-context-2d-createPattern><a
+ href="#createpatternimage">createPattern()</a></code> must return null.</p>
+ <!--
+ XXXv3 Requests for v3 features:
+ * apply transforms to patterns, so you don't have to create
+ transformed patterns manually by rendering them to an off-screen
+ canvas then using that canvas as the pattern.
+ -->
+
+ <h6 id=line-styles><span class=secno>3.14.11.1.5. </span>Line styles</h6>
+
+ <p>The <dfn id=linewidth
+ title=dom-context-2d-lineWidth><code>lineWidth</code></dfn> attribute
+ gives the default width of lines, in coordinate space units. On setting,
+ zero and negative values must be ignored, leaving the value unchanged.
+
+ <p>When the context is created, the <code title=dom-context-2d-lineWidth><a
+ href="#linewidth">lineWidth</a></code> attribute must initially have the
+ value <code>1.0</code>.
+
+ <p>The <dfn id=linecap
+ title=dom-context-2d-lineCap><code>lineCap</code></dfn> attribute defines
+ the type of endings that UAs shall place on the end of lines. The three
+ valid values are <code>butt</code>, <code>round</code>, and
+ <code>square</code>. The <code>butt</code> value means that the end of
+ each line is a flat edge perpendicular to the direction of the line. The
+ <code>round</code> value means that a semi-circle with the diameter equal
+ to the width of the line is then added on to the end of the line. The
+ <code>square</code> value means that at the end of each line is a
+ rectangle with the length of the line width and the width of half the line
+ width, placed flat against the edge perpendicular to the direction of the
+ line. On setting, any other value than the literal strings
+ <code>butt</code>, <code>round</code>, and <code>square</code> must be
+ ignored, leaving the value unchanged.
+
+ <p>When the context is created, the <code title=dom-context-2d-lineCap><a
+ href="#linecap">lineCap</a></code> attribute must initially have the value
+ <code>butt</code>.
+
+ <p>The <dfn id=linejoin
+ title=dom-context-2d-lineJoin><code>lineJoin</code></dfn> attribute
+ defines the type of corners that that UAs will place where two lines meet.
+ The three valid values are <code>round</code>, <code>bevel</code>, and
+ <code>miter</code>.
+
+ <p>On setting, any other value than the literal strings <code>round</code>,
+ <code>bevel</code> and <code>miter</code> must be ignored, leaving the
+ value unchanged.
+
+ <p>When the context is created, the <code title=dom-context-2d-lineJoin><a
+ href="#linejoin">lineJoin</a></code> attribute must initially have the
+ value <code>miter</code>.
+
+ <p>The <code>round</code> value means that a filled arc connecting the
+ corners on the outside of the join, with the diameter equal to the line
+ width, and the origin at the point where the inside edges of the lines
+ touch, must be rendered at joins. The <code>bevel</code> value means that
+ a filled triangle connecting those two corners with a straight line, the
+ third point of the triangle being the point where the lines touch on the
+ inside of the join, must be rendered at joins. The <code>miter</code>
+ value means that a filled four- or five-sided polygon must be placed at
+ the join, with two of the lines being the perpendicular edges of the
+ joining lines, and the other two being continuations of the outside edges
+ of the two joining lines, as long as required to intersect without going
+ over the miter limit.
+
+ <p>The miter length is the distance from the point where the lines touch on
+ the inside of the join to the intersection of the line edges on the
+ outside of the join. The miter limit ratio is the maximum allowed ratio of
+ the miter length to the line width. If the miter limit would be exceeded,
+ then a fifth line must be added to the polygon, connecting the two outside
+ lines, such that the distance from the inside point of the join to the
+ point in the middle of this fifth line is the maximum allowed value for
+ the miter length.
+
+ <p>The miter limit ratio can be explicitly set using the <dfn id=miterlimit
+ title=dom-context-2d-miterLimit><code>miterLimit</code></dfn> attribute.
+ On setting, zero and negative values must be ignored, leaving the value
+ unchanged.
+
+ <p>When the context is created, the <code
+ title=dom-context-2d-miterLimit><a
+ href="#miterlimit">miterLimit</a></code> attribute must initially have the
+ value <code>10.0</code>.</p>
+ <!-- XXX this section doesn't say what these attributes return or
+ what they do on setting. not a big deal; it's pretty obvious. but if
+ anyone complains, we'll have to add it -->
+ <!--
+XXXv3 dashed lines have been requested. Philip Taylor provides these
+notes on what would need to be defined for dashed lines:
+> I don't think it's entirely trivial to add, to the detail that's
+> necessary in a specification. The common graphics APIs (at least
+> Cairo, Quartz and java.awt.Graphics, and any SVG implementation) all
+> have dashes specified by passing an array of dash lengths (alternating
+> on/off), so that should be alright as long as you define what units
+> it's measured in and what happens when you specify an odd number of
+> values and how errors are handled and what happens if you update the
+> array later. But after that, what does it do when stroking multiple
+> subpaths, in terms of offsetting the dashes? When you use strokeRect,
+> where is offset 0? Does moveTo reset the offset? How does it interact
+> with lineCap/lineJoin? All the potential issues need test cases too,
+> and the implementations need to make sure they handle any edge cases
+> that the underlying graphics library does differently. (SVG Tiny 1.2
+> appears to skip some of the problems by leaving things undefined and
+> allowing whatever behaviour the graphics library has.)
+ -->
+
+ <h6 id=shadows><span class=secno>3.14.11.1.6. </span><dfn
+ id=shadows0>Shadows</dfn></h6>
+
+ <p>All drawing operations are affected by the four global shadow
+ attributes. Shadows form part of the source image during composition.
+
+ <p>The <dfn id=shadowcolor
+ title=dom-context-2d-shadowColor><code>shadowColor</code></dfn> attribute
+ sets the color of the shadow.</p>
+ <!-- XXX this section doesn't say what this attributes returns or
+ what they do on setting. if anyone complains, we'll have to add
+ it. shadowColor is a CSS Color attribute. -->
+
+ <p>When the context is created, the <code
+ title=dom-context-2d-shadowColor><a
+ href="#shadowcolor">shadowColor</a></code> attribute initially must be
+ fully-transparent black.
+
+ <p>The <dfn id=shadowoffsetx
+ title=dom-context-2d-shadowOffsetX><code>shadowOffsetX</code></dfn> and
+ <dfn id=shadowoffsety
+ title=dom-context-2d-shadowOffsetY><code>shadowOffsetY</code></dfn>
+ attributes specify the distance that the shadow will be offset in the
+ positive horizontal and positive vertical distance respectively. Their
+ values are in coordinate space units.</p>
+ <!-- XXX we don't define getting/setting -->
+
+ <p>When the context is created, the shadow offset attributes initially have
+ the value <code>0</code>.
+
+ <p>The <dfn id=shadowblur
+ title=dom-context-2d-shadowBlur><code>shadowBlur</code></dfn> attribute
+ specifies the number of coordinate space units that the blurring is to
+ cover. On setting, negative numbers must be ignored, leaving the attribute
+ unmodified.</p>
+ <!-- XXX we don't define getting/setting -->
+
+ <p>When the context is created, the <code
+ title=dom-context-2d-shadowBlur><a
+ href="#shadowblur">shadowBlur</a></code> attribute must initially have the
+ value <code>0</code>.
+
+ <p>Support for shadows is optional. When they are supported, then, when
+ shadows are drawn, they must be rendered using the specified color,
+ offset, and blur radius.</p>
+ <!-- XXX we don't really define what that means -->
+
+ <h6 id=simple><span class=secno>3.14.11.1.7. </span>Simple shapes
+ (rectangles)</h6>
+
+ <p>There are three methods that immediately draw rectangles to the bitmap.
+ They each take four arguments; the first two give the <var
+ title="">x</var> and <var title="">y</var> coordinates of the top left of
+ the rectangle, and the second two give the width and height of the
+ rectangle, respectively.
+
+ <p>Shapes are painted without affecting the current path, and are subject
+ to <span title=dom-context-2d->transformations</span>, <a href="#shadows0"
+ title=shadows>shadow effects</a>, <span title=globalAlpha>global
+ alpha</span>, <a href="#clipping" title="clipping path">clipping
+ paths</a>, and <span title=globalCompositeOperation>global composition
+ operators</span>.
+
+ <p>Negative values for width and height must cause the implementation to
+ raise an <code>INDEX_SIZE_ERR</code> exception.
+
+ <p>The <dfn id=clearrect
+ title=dom-context-2d-clearRect><code>clearRect()</code></dfn> method must
+ clear the pixels in the specified rectangle to a fully transparent black,
+ erasing any previous image. If either height or width are zero, this
+ method has no effect.
+
+ <p>The <dfn id=fillrect
+ title=dom-context-2d-fillRect><code>fillRect()</code></dfn> method must
+ paint the specified rectangular area using the <code
+ title=dom-context-2d-fillStyle><a href="#fillstyle">fillStyle</a></code>.
+ If either height or width are zero, this method has no effect.
+
+ <p>The <dfn id=strokerect
+ title=dom-context-2d-strokeRect><code>strokeRect()</code></dfn> method
+ must draw stroke the path that would be created for the outline of a
+ rectangle of the specified size using the <code
+ title=dom-context-2d-strokeStyle><a
+ href="#strokestyle">strokeStyle</a></code>, <code
+ title=dom-context-2d-lineWidth><a href="#linewidth">lineWidth</a></code>,
+ <code title=dom-context-2d-lineJoin><a
+ href="#linejoin">lineJoin</a></code>, and (if appropriate) <code
+ title=dom-context-2d-miterLimit><a
+ href="#miterlimit">miterLimit</a></code> attributes. If both height and
+ width are zero, this method has no effect, since there is no path to
+ stroke (it's a point). If only one of the two is zero, then the method
+ will draw a line instead (the path for the outline is just a straight line
+ along the non-zero dimension).
+
+ <h6 id=complex><span class=secno>3.14.11.1.8. </span>Complex shapes (paths)</h6>
+
+ <p>The context always has a current path. There is only one current path,
+ it is not part of the <span title=dom-context-2d->drawing state</span>.
+
+ <p>A <dfn id=path>path</dfn> has a list of zero or more subpaths. Each
+ subpath consists of a list of one or more points, connected by straight or
+ curved lines, and a flag indicating whether the subpath is closed or not.
+ A closed subpath is one where the last point of the subpath is connected
+ to the first point of the subpath by a straight line. Subpaths with fewer
+ than two points are ignored when painting the path.
+
+ <p>Initially, the context's path must have zero subpaths.
+
+ <p>The <dfn id=beginpath
+ title=dom-context-2d-beginPath><code>beginPath()</code></dfn> method must
+ empty the list of subpaths so that the context once again has zero
+ subpaths.
+
+ <p>The <dfn id=moveto title=dom-context-2d-moveTo><code>moveTo(<var
+ title="">x</var>, <var title="">y</var>)</code></dfn> method must create a
+ new subpath with the specified point as its first (and only) point.
+
+ <p>The <dfn id=closepath
+ title=dom-context-2d-closePath><code>closePath()</code></dfn> method must
+ do nothing if the context has no subpaths. Otherwise, it must mark the
+ last subpath as closed, create a new subpath whose first point is the same
+ as the previous subpath's first point, and finally add this new subpath to
+ the path. (If the last subpath had more than one point in its list of
+ points, then this is equivalent to adding a straight line connecting the
+ last point back to the first point, thus "closing" the shape, and then
+ repeating the last <code title=dom-context-2d-moveTo><a
+ href="#moveto">moveTo()</a></code> call.)
+
+ <p>New points and the lines connecting them are added to subpaths using the
+ methods described below. In all cases, the methods only modify the last
+ subpath in the context's paths.
+
+ <p>The <dfn id=lineto title=dom-context-2d-lineTo><code>lineTo(<var
+ title="">x</var>, <var title="">y</var>)</code></dfn> method must do
+ nothing if the context has no subpaths. Otherwise, it must connect the
+ last point in the subpath to the given point (<var title="">x</var>, <var
+ title="">y</var>) using a straight line, and must then add the given point
+ (<var title="">x</var>, <var title="">y</var>) to the subpath.
+
+ <p>The <dfn id=quadraticcurveto
+ title=dom-context-2d-quadraticCurveTo><code>quadraticCurveTo(<var
+ title="">cpx</var>, <var title="">cpy</var>, <var title="">x</var>, <var
+ title="">y</var>)</code></dfn> method must do nothing if the context has
+ no subpaths. Otherwise it must connect the last point in the subpath to
+ the given point (<var title="">x</var>, <var title="">y</var>) by a
+ quadratic curve with control point (<var title="">cpx</var>, <var
+ title="">cpy</var>), and must then add the given point (<var
+ title="">x</var>, <var title="">y</var>) to the subpath.
+
+ <p>The <dfn id=beziercurveto
+ title=dom-context-2d-bezierCurveTo><code>bezierCurveTo(<var
+ title="">cp1x</var>, <var title="">cp1y</var>, <var title="">cp2x</var>,
+ <var title="">cp2y</var>, <var title="">x</var>, <var
+ title="">y</var>)</code></dfn> method must do nothing if the context has
+ no subpaths. Otherwise, it must connect the last point in the subpath to
+ the given point (<var title="">x</var>, <var title="">y</var>) using a
+ bezier curve with control points (<var title="">cp1x</var>, <var
+ title="">cp1y</var>) and (<var title="">cp2x</var>, <var
+ title="">cp2y</var>). Then, it must add the point (<var title="">x</var>,
+ <var title="">y</var>) to the subpath.
+
+ <p>The <dfn id=arcto title=dom-context-2d-arcTo><code>arcTo(<var
+ title="">x1</var>, <var title="">y1</var>, <var title="">x2</var>, <var
+ title="">y2</var>, <var title="">radius</var>)</code></dfn> method must do
+ nothing if the context has no subpaths. If the context <em>does</em> have
+ a subpath, then the behaviour depends on the arguments and the last point
+ in the subpath.
+
+ <p>Let the point (<var title="">x0</var>, <var title="">y0</var>) be the
+ last point in the subpath. Let <var title="">The Arc</var> be the shortest
+ arc given by circumference of the circle that has one point tangent to the
+ line defined by the points (<var title="">x0</var>, <var
+ title="">y0</var>) and (<var title="">x1</var>, <var title="">y1</var>),
+ another point tangent to the line defined by the points (<var
+ title="">x1</var>, <var title="">y1</var>) and (<var title="">x2</var>,
+ <var title="">y2</var>), and that has radius <var title="">radius</var>.
+ The points at which this circle touches these two lines are called the
+ start and end tangent points respectively.
+
+ <p>If the point (<var title="">x2</var>, <var title="">y2</var>) is on the
+ line defined by the points (<var title="">x0</var>, <var
+ title="">y0</var>) and (<var title="">x1</var>, <var title="">y1</var>)
+ then the method must do nothing, as no arc would satisfy the above
+ constraints.
+
+ <p>Otherwise, the method must connect the point (<var title="">x0</var>,
+ <var title="">y0</var>) to the start tangent point by a straight line,
+ then connect the start tangent point to the end tangent point by <var
+ title="">The Arc</var>, and finally add the start and end tangent points
+ to the subpath.
+
+ <p>Negative or zero values for <var title="">radius</var> must cause the
+ implementation to raise an <code>INDEX_SIZE_ERR</code> exception.
+
+ <p>The <dfn id=arcx- title=dom-context-2d-arc><code>arc(<var
+ title="">x</var>, <var title="">y</var>, <var title="">radius</var>, <var
+ title="">startAngle</var>, <var title="">endAngle</var>, <var
+ title="">anticlockwise</var>)</code></dfn> method draws an arc. If the
+ context has any subpaths, then the method must add a straight line from
+ the last point in the subpath to the start point of the arc. In any case,
+ it must draw the arc between the start point of the arc and the end point
+ of the arc, and add the start and end points of the arc to the subpath.
+ The arc and its start and end points are defined as follows:
+
+ <p>Consider a circle that has its origin at (<var title="">x</var>, <var
+ title="">y</var>) and that has radius <var title="">radius</var>. The
+ points at <var title="">startAngle</var> and <var title="">endAngle</var>
+ along the circle's circumference, measured in radians clockwise from the
+ positive x-axis, are the start and end points respectively. The arc is the
+ path along the circumference of this circle from the start point to the
+ end point, going anti-clockwise if the <var title="">anticlockwise</var>
+ argument is true, and clockwise otherwise.
+
+ <p>Negative or zero values for <var title="">radius</var> must cause the
+ implementation to raise an <code>INDEX_SIZE_ERR</code> exception.
+
+ <p>The <dfn id=rectx title=dom-context-2d-rect><code>rect(<var
+ title="">x</var>, <var title="">y</var>, <var title="">w</var>, <var
+ title="">h</var>)</code></dfn> method must create a new subpath containing
+ just the four points (<var title="">x</var>, <var title="">y</var>), (<var
+ title="">x</var>+<var title="">w</var>, <var title="">y</var>), (<var
+ title="">x</var>+<var title="">w</var>, <var title="">y</var>+<var
+ title="">h</var>), (<var title="">x</var>, <var title="">y</var>+<var
+ title="">h</var>), with those four points connected by straight lines, and
+ must then mark the subpath as closed. It must then create a new subpath
+ with the point (<var title="">x</var>, <var title="">y</var>) as the only
+ point in the subpath.
+
+ <p>Negative values for <var title="">w</var> and <var title="">h</var> must
+ cause the implementation to raise an <code>INDEX_SIZE_ERR</code>
+ exception.
+
+ <p>The <dfn id=fill title=dom-context-2d-fill><code>fill()</code></dfn>
+ method must fill each subpath of the current path in turn, using <code
+ title=dom-context-2d-fillStyle><a href="#fillstyle">fillStyle</a></code>,
+ and using the non-zero winding number rule. Open subpaths must be
+ implicitly closed when being filled (without affecting the actual
+ subpaths).
+
+ <p>The <dfn id=stroke
+ title=dom-context-2d-stroke><code>stroke()</code></dfn> method must stroke
+ each subpath of the current path in turn, using the <code
+ title=dom-context-2d-strokeStyle><a
+ href="#strokestyle">strokeStyle</a></code>, <code
+ title=dom-context-2d-lineWidth><a href="#linewidth">lineWidth</a></code>,
+ <code title=dom-context-2d-lineJoin><a
+ href="#linejoin">lineJoin</a></code>, and (if appropriate) <code
+ title=dom-context-2d-miterLimit><a
+ href="#miterlimit">miterLimit</a></code> attributes.
+
+ <p>Paths, when filled or stroked, must be painted without affecting the
+ current path, and must be subject to <a
+ href="#transformations0">transformations</a>, <a href="#shadows0"
+ title=shadows>shadow effects</a>, <a href="#globalalpha"
+ title=dom-context-2d-globalAlpha>global alpha</a>, <a href="#clipping"
+ title="clipping path">clipping paths</a>, and <a
+ href="#globalcompositeoperation"
+ title=dom-context-2d-globalCompositeOperation>global composition
+ operators</a>.
+
+ <p class=note>The transformation is applied to the path when it is drawn,
+ not when the path is constructed. Thus, a single path can be constructed
+ and then drawn according to different transformations without recreating
+ the path.
+
+ <p>The <dfn id=clip title=dom-context-2d-clip><code>clip()</code></dfn>
+ method must create a new <dfn id=clipping>clipping path</dfn> by
+ calculating the intersection of the current clipping path and the area
+ described by the current path (after applying the <span>current
+ transformation</span>), using the non-zero winding number rule. Open
+ subpaths must be implicitly closed when computing the clipping path,
+ without affecting the actual subpaths.
+
+ <p>When the context is created, the initial clipping path is the rectangle
+ with the top left corner at (0,0) and the width and height of the
+ coordinate space.</p>
+ <!-- XXXv3
+ Jordan OSETE suggests:
+ * support ways of extending the clip region (union instead of intersection)
+ - also "add", "substract", "replace", "intersect" and "xor"
+ * support ways of resetting the clip region without save/restore
+ -->
+
+ <p>The <dfn id=ispointinpath
+ title=dom-context-2d-isPointInPath><code>isPointInPath(<var
+ title="">x</var>, <var title="">y</var>)</code></dfn> method must return
+ true if the point given by the <var title="">x</var> and <var
+ title="">y</var> coordinates passed to the method, when treated as
+ coordinates in the canvas' coordinate space unaffected by the current
+ transformation, is within the area of the canvas that would be filled if
+ the current path was to be filled; and must return false otherwise.
+
+ <h6 id=images><span class=secno>3.14.11.1.9. </span>Images</h6>
+
+ <p>To draw images onto the canvas, the <dfn id=drawimage
+ title=dom-context-2d-drawImage><code>drawImage</code></dfn> method can be
+ used.
+
+ <p>This method is overloaded with three variants: <code
+ title="">drawImage(<var title="">image</var>, <var title="">dx</var>, <var
+ title="">dy</var>)</code>, <code title="">drawImage(<var
+ title="">image</var>, <var title="">dx</var>, <var title="">dy</var>, <var
+ title="">dw</var>, <var title="">dh</var>)</code>, and <code
+ title="">drawImage(<var title="">image</var>, <var title="">sx</var>, <var
+ title="">sy</var>, <var title="">sw</var>, <var title="">sh</var>, <var
+ title="">dx</var>, <var title="">dy</var>, <var title="">dw</var>, <var
+ title="">dh</var>)</code>. (Actually it is overloaded with six; each of
+ those three can take either an <code><a
+ href="#htmlimageelement">HTMLImageElement</a></code> or an <code><a
+ href="#htmlcanvaselement">HTMLCanvasElement</a></code> for the <var
+ title="">image</var> argument.) If not specified, the <var
+ title="">dw</var> and <var title="">dh</var> arguments default to the
+ values of <var title="">sw</var> and <var title="">sh</var>, interpreted
+ such that one CSS pixel in the image is treated as one unit in the canvas
+ coordinate space. If the <var title="">sx</var>, <var title="">sy</var>,
+ <var title="">sw</var>, and <var title="">sh</var> arguments are omitted,
+ they default to 0, 0, the image's intrinsic width in image pixels, and the
+ image's intrinsic height in image pixels, respectively.
+
+ <p>The <var title="">image</var> argument must be an instance of an
+ <code><a href="#htmlimageelement">HTMLImageElement</a></code> or <code><a
+ href="#htmlcanvaselement">HTMLCanvasElement</a></code>. If the <var
+ title="">image</var> is of the wrong type, the implementation must raise a
+ <code>TYPE_MISMATCH_ERR</code> exception. If one of the <var
+ title="">sy</var>, <var title="">sw</var>, <var title="">sw</var>, and
+ <var title="">sh</var> arguments is outside the size of the image, or if
+ one of the <var title="">dw</var> and <var title="">dh</var> arguments is
+ negative, the implementation must raise an <code>INDEX_SIZE_ERR</code>
+ exception. If the <var title="">image</var> argument is an <code><a
+ href="#htmlimageelement">HTMLImageElement</a></code> object whose <code
+ title=dom-attr-complete>complete</code> attribute is false, then the
+ implementation must raise an <code>INVALID_STATE_ERR</code> exception.
+
+ <p>When <code title=dom-context-2d-drawImage><a
+ href="#drawimage">drawImage()</a></code> is invoked, the specified region
+ of the image specified by the source rectangle (<var title="">sx</var>,
+ <var title="">sy</var>, <var title="">sw</var>, <var title="">sh</var>)
+ must be painted on the region of the canvas specified by the destination
+ rectangle (<var title="">dx</var>, <var title="">dy</var>, <var
+ title="">dw</var>, <var title="">dh</var>).
+
+ <p><img alt="" src="images/drawImage.png"></p>
+ <!-- no alt="" text
+ since the image is just repeating what was stated in the previous
+ paragraph. -->
+
+ <p>Images are painted without affecting the current path, and are subject
+ to <a href="#transformations0">transformations</a>, <a href="#shadows0"
+ title=shadows>shadow effects</a>, <a href="#globalalpha"
+ title=dom-context-2d-globalAlpha>global alpha</a>, <a href="#clipping"
+ title="clipping path">clipping paths</a>, and <a
+ href="#globalcompositeoperation"
+ title=dom-context-2d-globalCompositeOperation>global composition
+ operators</a>.</p>
+ <!-- XXX should somehow say that the image used is the actual image
+ of the target element, not the rendered image (e.g. height/width
+ attributes don't affect it -->
+
+ <h6 id=pixel><span class=secno>3.14.11.1.10. </span><dfn id=pixel0>Pixel
+ manipulation</dfn></h6>
+
+ <p>The <dfn id=getimagedata
+ title=dom-context-2d-getImageData><code>getImageData(<var
+ title="">sx</var>, <var title="">sy</var>, <var title="">sw</var>, <var
+ title="">sh</var>)</code></dfn> method must return an <code><a
+ href="#imagedata">ImageData</a></code> object representing the underlying
+ pixel data for the area of the canvas denoted by the rectangle which has
+ one corner at the (<var title="">sx</var>, <var title="">sy</var>)
+ coordinate, and that has width <var title="">sw</var> and height <var
+ title="">sh</var>. Pixels outside the canvas must be returned as
+ transparent black. Pixels must be returned as non-premultiplied alpha
+ values.
+
+ <p><code><a href="#imagedata">ImageData</a></code> objects must be
+ initialised so that their <dfn id=height5
+ title=dom-imagedata-height><code>height</code></dfn> attribute is set to
+ <var title="">h</var>, the number of rows in the image data, their <dfn
+ id=width5 title=dom-imagedata-width><code>width</code></dfn> attribute is
+ set to <var title="">w</var>, the number of physical device pixels per row
+ in the image data, and the <dfn id=data1
+ title=dom-imagedata-data><code>data</code></dfn> attribute is initialised
+ to an array of <var title="">h</var>&times;<var title="">w</var>&times;4
+ integers. The pixels must be represented in this array in left-to-right
+ order, row by row, starting at the top left, with each pixel's red, green,
+ blue, and alpha components being given in that order. Each component of
+ each device pixel represented in this array must be in the range 0..255,
+ representing the 8 bit value for that component. At least one pixel must
+ be returned.
+
+ <p class=note>The width and height (<var title="">w</var> and <var
+ title="">h</var>) might be different than the <var title="">sw</var> and
+ <var title="">sh</var> arguments to the function, e.g. if the canvas is
+ backed by a high-resolution bitmap.
+
+ <p>If the <code title=dom-context-2d-getImageData><a
+ href="#getimagedata">getImageData(<var title="">sx</var>, <var
+ title="">sy</var>, <var title="">sw</var>, <var
+ title="">sh</var>)</a></code> method is called with either the <var
+ title="">sw</var> or <var title="">sh</var> arguments set to zero or
+ negative values, the method must raise an <code>INDEX_SIZE_ERR</code>
+ exception.
+
+ <p>The <dfn id=putimagedata
+ title=dom-context-2d-putImageData><code>putImageData(<var
+ title="">image</var>, <var title="">dx</var>, <var
+ title="">dy</var>)</code></dfn> method must take the given <code><a
+ href="#imagedata">ImageData</a></code> structure, and draw it at the
+ specified location <var title="">dx</var>,<var title="">dy</var> in the
+ canvas coordinate space, mapping each pixel represented by the <code><a
+ href="#imagedata">ImageData</a></code> structure into one device pixel.
+
+ <p>If the first argument to the method is not an object whose [[Class]]
+ property is <code><a href="#imagedata">ImageData</a></code>, but all of
+ the following conditions are true, then the method must treat the first
+ argument as if it was an <code><a href="#imagedata">ImageData</a></code>
+ object (and thus not raise the <code>TYPE_MISMATCH_ERR</code> exception):
+
+ <ul>
+ <li>The method's first argument is an object with <code
+ title=dom-imagedata-width><a href="#width5">width</a></code> and <code
+ title=dom-imagedata-height><a href="#height5">height</a></code>
+ attributes with integer values and a <code title=dom-imagedata-data><a
+ href="#data1">data</a></code> attribute whose value is an integer array.
+
+ <li>The <code><a href="#imagedata">ImageData</a></code> object's <code
+ title=dom-imagedata-width><a href="#width5">width</a></code> is greater
+ than zero.
+
+ <li>The <code><a href="#imagedata">ImageData</a></code> object's <code
+ title=dom-imagedata-height><a href="#height5">height</a></code> is
+ greater than zero.
+
+ <li>The <code><a href="#imagedata">ImageData</a></code> object's <code
+ title=dom-imagedata-width><a href="#width5">width</a></code> multiplied
+ by its <code title=dom-imagedata-height><a
+ href="#height5">height</a></code> multiplied by 4 is equal to the number
+ of entries in the <code><a href="#imagedata">ImageData</a></code>
+ object's <code title=dom-imagedata-data><a href="#data1">data</a></code>
+ array.
+
+ <li>The <code><a href="#imagedata">ImageData</a></code> object's <code
+ title=dom-imagedata-data><a href="#data1">data</a></code> array only
+ contains entries that are in the range 0 to 255 inclusive.
+ </ul>
+
+ <p>The handling of pixel rounding when the specified coordinates do not
+ exactly map to the device coordinate space is not defined by this
+ specification, except that the following must result in no visible changes
+ to the rendering:
+
+ <pre>context.putImageData(context.getImageData(x, y, w, h), x, y);</pre>
+
+ <p>...for any value of <var title="">x</var> and <var title="">y</var>. In
+ other words, while user agents may round the arguments of the two methods
+ so that they map to device pixel boundaries, any rounding performed must
+ be performed consistently for both the <code
+ title=dom-context-2d-getImageData><a
+ href="#getimagedata">getImageData()</a></code> and <code
+ title=dom-context-2d-putImageData><a
+ href="#putimagedata">putImageData()</a></code> operations.
+
+ <p>The current transformation matrix must not affect the <code
+ title=dom-context-2d-getImageData><a
+ href="#getimagedata">getImageData()</a></code> and <code
+ title=dom-context-2d-putImageData><a
+ href="#putimagedata">putImageData()</a></code> methods.
+
+ <div class=example>
+ <p>The data returned by <code title=dom-context-2d-getImageData><a
+ href="#getimagedata">getImageData()</a></code> is at the resolution of
+ the canvas backing store, which is likely to not be one device pixel to
+ each CSS pixel if the display used is a high resolution display. Thus,
+ while one could create an <code><a href="#imagedata">ImageData</a></code>
+ object, one would net necessarily know what resolution the canvas
+ expected (how many pixels the canvas wants to paint over one coordinate
+ space unit pixel).</p>
+
+ <p>In the following example, the script first obtains the size of the
+ canvas backing store, and then generates a few new <code><a
+ href="#imagedata">ImageData</a></code> objects which can be used.</p>
+
+ <pre>
+ // canvas is a reference to a &lt;canvas> element
+ // (note: this example uses JavaScript 1.7 features)
+ var context = canvas.getContext('2d');
+ var backingStore = context.getImageData(0, 0, canvas.width, canvas.height);
+ var actualWidth = backingStore.width;
+ var actualHeight = backingStore.height;
+
+ function CreateImageData(w, h) {
+ return {
+ height: h,
+ width: w,
+ data: [i for (i in function (n) { for (let i = 0; i < n; i += 1) yield 0 }(w*h*4)) ]
+ };
+ }
+
+ // create some plasma
+ var plasma = CreateImageData(actualWidth, actualHeight);
+ FillPlasma(plasma, 'green'); // green plasma
+
+ // create a cloud
+ var could = CreateImageData(actualWidth, actualHeight);
+ FillCloud(cloud, actualWidth/2, actualHeight/2); // put a cloud in the middle
+
+ // paint them on top of each other
+ context.putImageData(plasma, 0, 0);
+ context.putImageData(cloud, 0, 0);
+
+ function FillPlasma(data) { ... }
+ function FillCload(data, x, y) { ... }
+</pre>
+ </div>
+
+ <h6 id=drawing><span class=secno>3.14.11.1.11. </span>Drawing model</h6>
+
+ <p>When a shape or image is painted, user agents must follow these steps,
+ in the order given (or act as if they do):
+
+ <ol>
+ <li>If the current transformation matrix is infinite, then do nothing.
+ Abort these steps.
+
+ <li>The coordinates are transformed by the current transformation matrix.
+
+ <li>The shape or image is rendered, creating image <var title="">A</var>,
+ as described in the previous sections. For shapes, the current fill,
+ stroke, and line styles must be honoured.
+
+ <li>The shadow is rendered from image <var title="">A</var>, using the
+ current shadow styles, creating image <var title="">B</var>.
+
+ <li>Image <var title="">A</var> is composited over image <var
+ title="">B</var> creating the source image.
+
+ <li>The source image has its alpha adjusted by <code
+ title=dom-context-2d-globalAlpha><a
+ href="#globalalpha">globalAlpha</a></code>.
+
+ <li>Within the clip region (as affected by the current transformation
+ matrix), the source image is composited over the current canvas bitmap
+ using the current composition operator.
+ </ol>
+ <!--
+ <h5 id="3d">The 3D context</h5>
+
+ <p class="big-issue">Well, one day.</p>
+-->
+
+ <h4 id=the-map><span class=secno>3.14.12. </span>The <dfn
+ id=map><code>map</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Zero or more <a href="#block-level0">block-level elements</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlmapelement>HTMLMapElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#areas" title=dom-map-areas>areas</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#map">map</a></code> element, in conjuction with any
+ <code><a href="#area">area</a></code> element descendants, defines an <a
+ href="#image">image map</a>.
+
+ <p>The <dfn id=areas title=dom-map-areas><code>areas</code></dfn> attribute
+ must return an <code><a href="#htmlcollection0">HTMLCollection</a></code>
+ rooted at the <code><a href="#map">map</a></code> element, whose filter
+ matches only <code><a href="#area">area</a></code> elements.
+
+ <h4 id=the-area><span class=secno>3.14.13. </span>The <dfn
+ id=area><code>area</code></dfn> element</h4>
+
+ <p><a href="#strictly">Strictly inline-level content</a>.</p>
+ <!-- XXX as defined, the area element on its own isn't enough to
+ satisfy "significant inline-level content" model. should it be? -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed, but only as a descendant of a <code><a
+ href="#map">map</a></code> element.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-area-alt><a href="#alt1">alt</a></code>
+
+ <dd><code title=attr-area-coords><a href="#coords">coords</a></code>
+
+ <dd><code title=attr-area-shape><a href="#shape">shape</a></code>
+
+ <dd><code title=attr-hyperlink-href><a href="#href6">href</a></code>
+
+ <dd><code title=attr-hyperlink-target><a href="#target3">target</a></code>
+
+ <dd><code title=attr-hyperlink-ping><a href="#ping">ping</a></code>
+
+ <dd><code title=attr-hyperlink-rel><a href="#rel3">rel</a></code>
+
+ <dd><code title=attr-hyperlink-media><a href="#media12">media</a></code>
+
+ <dd><code title=attr-hyperlink-hreflang><a
+ href="#hreflang3">hreflang</a></code>
+
+ <dd><code title=attr-hyperlink-type><a href="#type17">type</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlareaelement>HTMLAreaElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#alt2" title=dom-area-alt>alt</a>;
+ attribute DOMString <a href="#coords0" title=dom-area-coords>coords</a>;
+ attribute DOMString <a href="#shape0" title=dom-area-shape>shape</a>;
+ attribute DOMString <a href="#href4" title=dom-area-href>href</a>;
+ attribute DOMString <a href="#target2" title=dom-area-target>target</a>;
+ attribute DOMString <a href="#ping1" title=dom-area-ping>ping</a>;
+ attribute DOMString <a href="#rel2" title=dom-area-rel>rel</a>;
+ readonly attribute DOMTokenList <a href="#rellist1" title=dom-area-relList>relList</a>;
+ attribute DOMString <a href="#media11" title=dom-area-media>media</a>;
+ attribute DOMString <a href="#hreflang2" title=dom-area-hreflang>hreflang</a>;
+ attribute DOMString <a href="#type10" title=dom-area-type>type</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#area">area</a></code> element represents either a
+ hyperlink with some text and a corresponding area on an <a
+ href="#image">image map</a>, or a dead area on an image map.
+
+ <p>If the <code><a href="#area">area</a></code> element has an <code
+ title=attr-hyperlink-href><a href="#href6">href</a></code> attribute, then
+ the <code><a href="#area">area</a></code> element represents a <a
+ href="#hyperlinks">hyperlink</a>; the <dfn id=alt1
+ title=attr-area-alt><code>alt</code></dfn> attribute, which must then be
+ present, specifies the text.
+
+ <p>However, if the <code><a href="#area">area</a></code> element has no
+ <code title=attr-hyperlink-href><a href="#href6">href</a></code>
+ attribute, then the area represented by the element cannot be selected,
+ and the <code title=attr-area-alt><a href="#alt1">alt</a></code> attribute
+ must be omitted.
+
+ <p>In both cases, the <code title=attr-area-shape><a
+ href="#shape">shape</a></code> and <code title=attr-area-coords><a
+ href="#coords">coords</a></code> attributes specify the area.
+
+ <p>The <dfn id=shape title=attr-area-shape><code>shape</code></dfn>
+ attribute is an <a href="#enumerated">enumerated attribute</a>. The
+ following table lists the keywords defined for this attribute. The states
+ given in the first cell of the the rows with keywords give the states to
+ which those keywords map. Some of the keywords are non-conforming, as
+ noted in the last column.
+
+ <table>
+ <thead>
+ <tr>
+ <th>State
+
+ <th>Keywords
+
+ <th>Notes
+
+ <tbody>
+ <tr>
+ <td rowspan=2><dfn id=circle title=attr-area-shape-circle>Circle
+ state</dfn>
+
+ <td><code title="">circ</code>
+
+ <td>Non-conforming
+
+ <tr>
+ <td><code title="">circle</code>
+
+ <td>
+
+ <tr>
+ <td><dfn id=default0 title=attr-area-shape-default>Default state</dfn>
+
+ <td><code title="">default</code>
+
+ <td>
+
+ <tr>
+ <td rowspan=2><dfn id=polygon title=attr-area-shape-poly>Polygon
+ state</dfn>
+
+ <td><code title="">poly</code>
+
+ <td>
+
+ <tr>
+ <td><code title="">polygon</code>
+
+ <td>Non-conforming
+
+ <tr>
+ <td rowspan=2><dfn id=rectangle title=attr-area-shape-rect>Rectangle
+ state</dfn>
+
+ <td><code title="">rect</code>
+
+ <td>
+
+ <tr>
+ <td><code title="">rectangle</code>
+
+ <td>Non-conforming
+ </table>
+
+ <p>The attribute may be ommited. The <i>missing value default</i> is the <a
+ href="#rectangle" title=attr-area-shape-rect>rectangle</a> state.
+
+ <p>The <dfn id=coords title=attr-area-coords><code>coords</code></dfn>
+ attribute must, if specified, contain a <a href="#valid4">valid list of
+ integers</a>. This attribute gives the coordinates for the shape described
+ by the <code title=attr-area-shape><a href="#shape">shape</a></code>
+ attribute. The processing for this attribute is described as part of the
+ <a href="#image">image map</a> processing model.
+
+ <p>In the <a href="#circle" title=attr-area-shape-circle>circle state</a>,
+ <code><a href="#area">area</a></code> elements must have a <code
+ title=attr-area-coords><a href="#coords">coords</a></code> attribute
+ present, with three integers, the last of which must be non-negative. The
+ first integer must be the distance in CSS pixels from the left edge of the
+ image to the center of the circle, the second integer must be the distance
+ in CSS pixels from the top edge of the image to the center of the circle,
+ and the third integer must be the radius of the circle, again in CSS
+ pixels.
+
+ <p>In the <a href="#default0" title=attr-area-shape-default>default
+ state</a> state, <code><a href="#area">area</a></code> elements must not
+ have a <code title=attr-area-coords><a href="#coords">coords</a></code>
+ attribute.
+
+ <p>In the <a href="#polygon" title=attr-area-shape-poly>polygon state</a>,
+ <code><a href="#area">area</a></code> elements must have a <code
+ title=attr-area-coords><a href="#coords">coords</a></code> attribute with
+ at least six integers, and the number of integers must be even. Each pair
+ of integers must represent a coordinate given as the distances from the
+ left and the top of the image in CSS pixels respectively, and all the
+ coordinates together must represent the points of the polygon, in order.
+
+ <p>In the <a href="#rectangle" title=attr-area-shape-rect>rectangle
+ state</a>, <code><a href="#area">area</a></code> elements must have a
+ <code title=attr-area-coords><a href="#coords">coords</a></code> attribute
+ with exactly four integers, the first of which must be less than the
+ third, and the second of which must be less than the fourth. The four
+ points must represent, respectively, the distance from the left edge of
+ the image to the top left side of the rectangle, the distance from the top
+ edge to the top side, the distance from the left edge to the right side,
+ and the distance from the top edge to the bottom side, all in CSS pixels.
+
+ <p>When user agents allow users to <a href="#following0" title="following
+ hyperlinks">follow hyperlinks</a> created using the <code><a
+ href="#area">area</a></code> element, as described in the next section,
+ the <code title=attr-hyperlink-href><a href="#href6">href</a></code>,
+ <code title=attr-hyperlink-target><a href="#target3">target</a></code> and
+ <code title=attr-hyperlink-ping><a href="#ping">ping</a></code> attributes
+ decide how the link is followed. The <code title=attr-hyperlink-rel><a
+ href="#rel3">rel</a></code>, <code title=attr-hyperlink-media><a
+ href="#media12">media</a></code>, <code title=attr-hyperlink-hreflang><a
+ href="#hreflang3">hreflang</a></code>, and <code
+ title=attr-hyperlink-type><a href="#type17">type</a></code> attributes may
+ be used to indicate to the user the likely nature of the target resource
+ before the user follows the link.
+
+ <p>The <code title=attr-hyperlink-target><a
+ href="#target3">target</a></code>, <code title=attr-hyperlink-ping><a
+ href="#ping">ping</a></code>, <code title=attr-hyperlink-rel><a
+ href="#rel3">rel</a></code>, <code title=attr-hyperlink-media><a
+ href="#media12">media</a></code>, <code title=attr-hyperlink-hreflang><a
+ href="#hreflang3">hreflang</a></code>, and <code
+ title=attr-hyperlink-type><a href="#type17">type</a></code> attributes
+ must be omitted if the <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attribute is not present.
+
+ <p>The <a href="#activation0">activation behavior</a> of <code><a
+ href="#area">area</a></code> elements is to run the following steps:
+
+ <ol>
+ <li>If the <code title=event-DOMActivate>DOMActivate</code> event in
+ question is not <span title=concept-events-trusted>trusted</span> (i.e. a
+ <code title=dom-click><a href="#click">click()</a></code> method call was
+ the reason for the event being dispatched), and the <code><a
+ href="#area">area</a></code> element's <code
+ title=attr-area-target>target</code> attribute is <span
+ class=big-issue>...</span> then raise an <code>INVALID_ACCESS_ERR</code>
+ exception.
+
+ <li>Otherwise, the user agent must <a href="#following0" title="following
+ hyperlinks">follow the hyperlink</a> defined by the <code><a
+ href="#area">area</a></code> element, if any.
+ </ol>
+
+ <p class=note>One way that a user agent can enable users to follow
+ hyperlinks is by allowing <code><a href="#area">area</a></code> elements
+ to be clicked, or focussed and activated by the keyboard. This <a
+ href="#interactive1" title="interactive elements">will cause</a> the
+ aforementioned <a href="#activation0">activation behavior</a> to be
+ invoked.
+
+ <p>The DOM attributes <dfn id=alt2
+ title=dom-area-alt><code>alt</code></dfn>, <dfn id=coords0
+ title=dom-area-coords><code>coords</code></dfn>, <dfn id=shape0
+ title=dom-area-shape><code>shape</code></dfn>, <dfn id=href4
+ title=dom-area-href><code>href</code></dfn>, <dfn id=target2
+ title=dom-area-target><code>target</code></dfn>, <dfn id=ping1
+ title=dom-area-ping><code>ping</code></dfn>, <dfn id=rel2
+ title=dom-area-rel><code>rel</code></dfn>, <dfn id=media11
+ title=dom-area-media><code>media</code></dfn>, <dfn id=hreflang2
+ title=dom-area-hreflang><code>hreflang</code></dfn>, and <dfn id=type10
+ title=dom-area-type><code>type</code></dfn>, each must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <p>The DOM attribute <dfn id=rellist1
+ title=dom-area-rellist><code>relList</code></dfn> must <a
+ href="#reflect">reflect</a> the <code title=attr-hyperlink-rel><a
+ href="#rel3">rel</a></code> content attribute.
+
+ <h4 id=image-maps><span class=secno>3.14.14. </span>Image maps</h4>
+ <!-- TESTS
+ http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3E%0A%3Cimg%20src%3D%22http%3A//hixie.ch/resources/images/smallcats%22%20usemap%3D%23a%20onclick%3Dw%28%27img%27%29%3E%0A%3Cmap%20name%3Da%3E%0A%20%3Carea%20onclick%3Dw%28%271%27%29%20coords%3D%270%25%200%25%20100%25%20100%25%27%20href%3Djavascript%3A%3E%0A%3C/map%3E
+ http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3E%0A%3Cbody%20onfocus%3D%22w%28document.activeElement.tagName%29%22%3E%0A%3Cimg%20src%3D%22http%3A//hixie.ch/resources/images/smallcats%22%20usemap%3D%23a%20onclick%3Dw%28%27img%27%29%20onfocus%3D%22w%28document.activeElement.tagName%29%22%3E%0A%3Cimg%20src%3D%22http%3A//hixie.ch/resources/images/sample%22%20usemap%3D%23a%20onclick%3Dw%28%27img%27%29%20onfocus%3D%22w%28document.activeElement.tagName%29%22%3E%0A%3Cmap%20name%3Da%20onfocus%3D%22w%28document.activeElement.tagName%29%22%3E%0A%20%3Carea%20onclick%3Dw%28%271%27%29%20coords%3D%270%200%2050%2050%27%20href%3Djavascript%3A%20onfocus%3D%22w%28document.activeElement.tagName%29%22%3E%0A%3C/map%3E%0A%3Cscript%3E%0A%20var%20x%20%3D%20document.getElementsByTagName%28%27img%27%29%5B0%5D%3B%0A%20x.parentNode.appendChild%28x%29%3B%0A%20document.getElementsByTagName%28%27area%27%29%5B0%5D.focus%28%29%3B%0A%3C/script%3E
+ http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3Ex%3Cmap%3E%3Carea%20shape%3Dpolyg%20coords%3D%221%2C2%203%22%3E%3C/map%3E%0A%3Cscript%3Ex%20%3D%20document.getElementsByTagName%28%27area%27%29%5B0%5D%3B%20w%28x.shape%20+%20%27%20%27%20+%20x.coords%29%3C/script%3E
+ http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3E%0D%0A%3Cp%3E%3Cimg%20src%3D%22http%3A//hixie.ch/resources/images/astrophy/128%22%20usemap%3D%23a%3E%0D%0A%3Cmap%20name%3Da%3E%3Carea%20shape%3Dcirc%20coords%3D%2220%2C20%2C10%25%22%20href%3D%23%3E%3Carea%20shape%3Dcirc%20coords%3D%2220%2C20%2C10%22%20href%3D%23%3E%3C/map%3E%0D%0A%3Cscript%3Edocument.write%28document.getElementsByTagName%28%27area%27%29%5B0%5D.coords%29%3C/script%3E
+ -->
+
+ <p>An <dfn id=image>image map</dfn> allows geometric areas on an image to
+ be associated with <a href="#hyperlinks" title=hyperlink>hyperlinks</a>.
+
+ <p>An image, in the form of an <code><a href="#img">img</a></code> element
+ or an <code><a href="#object">object</a></code> element representing an
+ image, may be associated with an image map (in the form of a <code><a
+ href="#map">map</a></code> element) by specifying a <dfn id=usemap1
+ title=attr-hyperlink-usemap><code>usemap</code></dfn> attribute on the
+ <code><a href="#img">img</a></code> or <code><a
+ href="#object">object</a></code> element. The <code
+ title=attr-area-usemap>usemap</code> attribute, if specified, must be a <a
+ href="#valid7">valid hashed ID reference</a> to a <code><a
+ href="#map">map</a></code> element.
+
+ <p>If an <code><a href="#img">img</a></code> element or an <code><a
+ href="#object">object</a></code> element representing an image has a <code
+ title=attr-area-usemap>usemap</code> attribute specified, user agents must
+ process it as follows:
+
+ <ol>
+ <li>
+ <p>First, <a href="#rules5">rules for parsing a hashed ID reference</a>
+ to a <code><a href="#map">map</a></code> element must be followed. This
+ will return either an element (the <var title="">map</var>) or null.
+
+ <li>
+ <p>If that returned null, then abort these steps. The image is not
+ associated with an image map after all.
+
+ <li>
+ <p>Otherwise, the user agent must collect all the <code><a
+ href="#area">area</a></code> elements that are descendants of the <var
+ title="">map</var>. Let those be the <var title="">areas</var>.
+ </ol>
+
+ <p>Having obtained the list of <code><a href="#area">area</a></code>
+ elements that form the image map (the <var title="">areas</var>),
+ interactive user agents must process the list in one of two ways.
+
+ <p>If the user agent intends to show the text that the <code><a
+ href="#img">img</a></code> element represents, then it must use the
+ following steps.
+
+ <p class=note>In user agents that do not support images, or that have
+ images disabled, <code><a href="#object">object</a></code> elements cannot
+ represent images, and thus this section never applies (the fallback
+ content is shown instead). The following steps therefore only apply to
+ <code><a href="#img">img</a></code> elements.
+
+ <ol>
+ <li>
+ <p>Remove all the <code><a href="#area">area</a></code> elements in <var
+ title="">areas</var> that have no <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attribute.
+
+ <li>
+ <p>Remove all the <code><a href="#area">area</a></code> elements in <var
+ title="">areas</var> that have no <code title=attr-area-alt><a
+ href="#alt1">alt</a></code> attribute, or whose <code
+ title=attr-area-alt><a href="#alt1">alt</a></code> attribute's value is
+ the empty string, <em>if</em> there is another <code><a
+ href="#area">area</a></code> element in <var title="">areas</var> with
+ the same value in the <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attribute and with a non-empty <code
+ title=attr-area-alt><a href="#alt1">alt</a></code> attribute.
+
+ <li>
+ <p>Each remaining <code><a href="#area">area</a></code> element in <var
+ title="">areas</var> represents a <a href="#hyperlinks">hyperlink</a>.
+ Those hyperlinks should all be made available to the user in a manner
+ associated with the text of the <code><a href="#img">img</a></code>
+ element.</p>
+
+ <p>In this context, user agents may represent <code><a
+ href="#area">area</a></code> and <code><a href="#img">img</a></code>
+ elements with no specified <code title="">alt</code> attributes, or
+ whose <code title="">alt</code> attributes are the empty string or some
+ other non-visible text, in a user-agent-defined fashion intended to
+ indicate the lack of suitable author-provided text.
+ </ol>
+
+ <p>If the user agent intends to show the image and allow interaction with
+ the image to select hyperlinks, then the image must be associated with a
+ set of layered shapes, taken from the <code><a
+ href="#area">area</a></code> elements in <var title="">areas</var>, in
+ reverse tree order (so the last specified <code><a
+ href="#area">area</a></code> element in the <var title="">map</var> is the
+ bottom-most shape, and the first element in the <var title="">map</var>,
+ in tree order, is the top-most shape).
+
+ <p>Each <code><a href="#area">area</a></code> element in <var
+ title="">areas</var> must be processed as follows to obtain a shape to
+ layer onto the image:
+
+ <ol>
+ <li>
+ <p>Find the state that the element's <code title=attr-area-shape><a
+ href="#shape">shape</a></code> attribute represents.
+
+ <li>
+ <p>Use the <a href="#rules3">rules for parsing a list of integers</a> to
+ parse the element's <code title=attr-area-coords><a
+ href="#coords">coords</a></code> attribute, if it is present, and let
+ the result be the <var title="">coords</var> list. If the attribute is
+ absent, let the <var title="">coords</var> list be the empty list.
+
+ <li>
+ <p>If the number of items in the <var title="">coords</var> list is less
+ than the minimum number given for the <code><a
+ href="#area">area</a></code> element's current state, as per the
+ following table, then the shape is empty; abort these steps.</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>State
+
+ <th>Minimum number of items
+
+ <tbody>
+ <tr>
+ <td><a href="#circle" title=attr-area-shape-circle>Circle state</a>
+
+ <td>3
+
+ <tr>
+ <td><a href="#default0" title=attr-area-shape-default>Default
+ state</a>
+
+ <td>0
+
+ <tr>
+ <td><a href="#polygon" title=attr-area-shape-poly>Polygon state</a>
+
+ <td>6
+
+ <tr>
+ <td><a href="#rectangle" title=attr-area-shape-rect>Rectangle
+ state</a>
+
+ <td>4
+ </table>
+
+ <li>
+ <p>Check for excess items in the <var title="">coords</var> list as per
+ the entry in the following list corresponding to the <code
+ title=attr-area-shape><a href="#shape">shape</a></code> attribute's
+ state:</p>
+
+ <dl class=switch>
+ <dt><a href="#circle" title=attr-area-shape-circle>Circle state</a>
+
+ <dd>Drop any items in the list beyond the third.
+
+ <dt><a href="#default0" title=attr-area-shape-default>Default state</a>
+
+ <dd>Drop all items in the list.
+
+ <dt><a href="#polygon" title=attr-area-shape-poly>Polygon state</a>
+
+ <dd>Drop the last item if there's an odd number of items.
+
+ <dt><a href="#rectangle" title=attr-area-shape-rect>Rectangle state</a>
+
+ <dd>Drop any items in the list beyond the fourth.
+ </dl>
+
+ <li>
+ <p>If the <code title=attr-area-shape><a href="#shape">shape</a></code>
+ attribute represents the <a href="#rectangle"
+ title=attr-area-shape-rect>rectangle state</a>, and the first number in
+ the list is numerically less than the third number in the list, then
+ swap those two numbers around.
+
+ <li>
+ <p>If the <code title=attr-area-shape><a href="#shape">shape</a></code>
+ attribute represents the <a href="#rectangle"
+ title=attr-area-shape-rect>rectangle state</a>, and the second number in
+ the list is numerically less than the fourth number in the list, then
+ swap those two numbers around.
+
+ <li>
+ <p>If the <code title=attr-area-shape><a href="#shape">shape</a></code>
+ attribute represents the <a href="#circle"
+ title=attr-area-shape-circle>circle state</a>, and the third number in
+ the list is less than or equal to zero, then the shape is empty; abort
+ these steps.
+
+ <li>
+ <p>Now, the shape represented by the element is the one described for the
+ entry in the list below corresponding to the state of the <code
+ title=attr-area-shape><a href="#shape">shape</a></code> attribute:</p>
+
+ <dl class=switch>
+ <dt><a href="#circle" title=attr-area-shape-circle>Circle state</a>
+
+ <dd>
+ <p>Let <var title="">x</var> be the first number in <var
+ title="">coords</var>, <var title="">y</var> be the second number, and
+ <var title="">r</var> be the third number.</p>
+
+ <p>The shape is a circle whose center is <var title="">x</var> CSS
+ pixels from the left edge of the image and <var title="">x</var> CSS
+ pixels from the top edge of the image, and whose radius is <var
+ title="">r</var> pixels.</p>
+
+ <dt><a href="#default0" title=attr-area-shape-default>Default state</a>
+
+ <dd>
+ <p>The shape is a rectangle that exactly covers the entire image.</p>
+
+ <dt><a href="#polygon" title=attr-area-shape-poly>Polygon state</a>
+
+ <dd>
+ <p>Let <var title="">x<sub title=""><var title="">i</var></sub></var>
+ be the <span>(2<var title="">i</var>)</span>th entry in <var
+ title="">coords</var>, and <var title="">y<sub title=""><var
+ title="">i</var></sub></var> be the <span>(2<var
+ title="">i</var>+1)</span>th entry in <var title="">coords</var> (the
+ first entry in <var title="">coords</var> being the one with index 0).</p>
+
+ <p>Let <var title="">the coordinates</var> be (<var title="">x<sub
+ title=""><var title="">i</var></sub></var>, <var title="">y<sub
+ title=""><var title="">i</var></sub></var>), interpreted in CSS pixels
+ measured from the top left of the image, for all integer values of
+ <var title="">i</var> from 0 to <span>(<var
+ title="">N</var>/2)-1</span>, where <var title="">N</var> is the
+ number of items in <var title="">coords</var>.</p>
+
+ <p>The shape is a polygon whose vertices are given by <var title="">the
+ coordinates</var>, and whose interior is established using the
+ even-odd rule. <a href="#refsGRAPHICS">[GRAPHICS]</a></p>
+ <!-- If anyone has this book ("Computer Graphics: Principles and
+ Practice in C"), please check page 34 or so and see if it
+ makes any references to literature in the bibliographic
+ section to define the "even-odd" rule for polygon filling
+ and hit testing.
+ <dd id="refsGRAPHICS">[GRAPHICS]</dd>
+ <dd>(Non-normative) <cite>Computer Graphics: Principles and Practice in C</cite>, Second Edition, J. Foley, A. van Dam, S. Feiner, J. Hughes. Addison-Wesley, July 1995. ISBN 0-201-84840-6.</dd>
+ -->
+ <!--
+ browsers implement the even-odd rule / even winding rule:
+ http://software.hixie.ch/utilities/js/live-dom-viewer/?%3C%21DOCTYPE%20html%3E%0A%3Cimg%20usemap%3D%22%23x%22%20src%3D%22/resources/images/sample%22%3E%0A%3Cmap%20name%3D%22x%22%3E%0A%20%20%3Carea%20shape%3Dpolygon%20coords%3D%220%2C0%200%2C100%20100%2C100%20100%2C2%201%2C2%202%2C1%202%2C99%2099%2C99%2099%2C0%22%20href%3Da%3E%0A%3C/map%3E%0A
+ -->
+
+
+ <dt><a href="#rectangle" title=attr-area-shape-rect>Rectangle state</a>
+
+ <dd>
+ <p>Let <var title="">x1</var> be the first number in <var
+ title="">coords</var>, <var title="">y1</var> be the second number,
+ <var title="">x2</var> be the third number, and <var title="">y2</var>
+ be the fourth number.</p>
+
+ <p>The shape is a rectangle whose top-left corner is given by the
+ coordinate (<var title="">x1</var>, <var title="">y1</var>) and whose
+ bottom right corner is given by the coordinate (<var
+ title="">x2</var>, <var title="">y2</var>), those coordinates being
+ interpreted as CSS pixels from the top left corner of the image.</p>
+ </dl>
+ </ol>
+
+ <p>Mouse clicks on an image associated with a set of layered shapes per the
+ above algorithm must be dispatched to the top-most shape covering the
+ point that the pointing device indicated (if any), and then, must be
+ dispatched again (with a new <code>Event</code> object) to the image
+ element itself. User agents may also allow individual <code><a
+ href="#area">area</a></code> elements representing <a href="#hyperlinks"
+ title=hyperlink>hyperlinks</a> to be selected and activated (e.g. using a
+ keyboard); events from this are not also propagated to the image.
+
+ <p class=note>Because a <code><a href="#map">map</a></code> element (and
+ its <code><a href="#area">area</a></code> elements) can be associated with
+ multiple <code><a href="#img">img</a></code> elements, it is possible for
+ an <code><a href="#area">area</a></code> element to correspond to multiple
+ focusable areas of the document.
+
+ <p>Image maps are <em><a href="#live">live</a></em>; if the DOM is mutated,
+ then the user agent must act as if it had rerun the algorithms for image
+ maps.
+
+ <h3 id=tabular><span class=secno>3.15. </span>Tabular data</h3>
+
+ <h4 id=the-table><span class=secno>3.15.1. </span>The <dfn
+ id=table><code>table</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, and <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#structured">structured inline-level elements</a> are
+ allowed.
+
+ <dt>Content model:
+
+ <dd>In this order: optionally a <code><a
+ href="#caption0">caption</a></code> element, followed by either zero or
+ more <code><a href="#colgroup">colgroup</a></code> elements, followed
+ optionally by a <code><a href="#thead0">thead</a></code> element,
+ followed optionally by a <code><a href="#tfoot0">tfoot</a></code>
+ element, followed by either zero or more <code><a
+ href="#tbody">tbody</a></code> elements <em>or</em> one or more <code><a
+ href="#tr">tr</a></code> elements, followed optionally by a <code><a
+ href="#tfoot0">tfoot</a></code> element (but there can only be one
+ <code><a href="#tfoot0">tfoot</a></code> element child in total).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmltableelement>HTMLTableElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute <span>HTMLTableCaptionElement</span> <a href="#caption" title=dom-table-caption>caption</a>;
+ HTMLElement <a href="#createcaption" title=dom-table-createCaption>createCaption</a>();
+ void <a href="#deletecaption" title=dom-table-deleteCaption>deleteCaption</a>();
+ attribute <a href="#htmltablesectionelement">HTMLTableSectionElement</a> <a href="#thead" title=dom-table-tHead>tHead</a>;
+ HTMLElement <a href="#createthead" title=dom-table-createTHead>createTHead</a>();
+ void <a href="#deletethead" title=dom-table-deleteTHead>deleteTHead</a>();
+ attribute <a href="#htmltablesectionelement">HTMLTableSectionElement</a> <a href="#tfoot" title=dom-table-tFoot>tFoot</a>;
+ HTMLElement <a href="#createtfoot" title=dom-table-createTFoot>createTFoot</a>();
+ void <a href="#deletetfoot" title=dom-table-deleteTFoot>deleteTFoot</a>();
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#tbodies" title=dom-table-tBodies>tBodies</a>;
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#rows" title=dom-table-rows>rows</a>;
+ HTMLElement <a href="#insertrow" title=dom-table-insertRow>insertRow</a>(in long index);
+ void <a href="#deleterow" title=dom-table-deleteRow>deleteRow</a>(in long index);
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#table">table</a></code> element represents data with
+ more than one dimension (a <a href="#table1"
+ title=concept-table>table</a>).
+
+ <p>The children of a <code><a href="#table">table</a></code> element must
+ be, in order:
+
+ <ol>
+ <li>
+ <p>Zero or one <code><a href="#caption0">caption</a></code> elements.
+
+ <li>
+ <p>Zero or more <code><a href="#colgroup">colgroup</a></code> elements.
+
+ <li>
+ <p>Zero or one <code><a href="#thead0">thead</a></code> elements.
+
+ <li>
+ <p>Zero or one <code><a href="#tfoot0">tfoot</a></code> elements, if the
+ last element in the table is not a <code><a
+ href="#tfoot0">tfoot</a></code> element.
+
+ <li>
+ <p>Either:</p>
+
+ <ul>
+ <li>Zero or more <code><a href="#tbody">tbody</a></code> elements, or
+
+ <li>One or more <code><a href="#tr">tr</a></code> elements.
+ </ul>
+
+ <li>
+ <p>Zero or one <code><a href="#tfoot0">tfoot</a></code> element, if there
+ are no other <code><a href="#tfoot0">tfoot</a></code> elements in the
+ table.
+ </ol>
+
+ <p>The <code><a href="#table">table</a></code> element takes part in the <a
+ href="#table0">table model</a>.
+
+ <p>The <dfn id=caption title=dom-table-caption><code>caption</code></dfn>
+ DOM attribute must return, on getting, the first <code><a
+ href="#caption0">caption</a></code> element child of the <code><a
+ href="#table">table</a></code> element. On setting, if the new value is a
+ <code><a href="#caption0">caption</a></code> element, the first <code><a
+ href="#caption0">caption</a></code> element child of the <code><a
+ href="#table">table</a></code> element, if any, must be removed, and the
+ new value must be inserted as the first node of the <code><a
+ href="#table">table</a></code> element. If the new value is not a <code><a
+ href="#caption0">caption</a></code> element, then a
+ <code>HIERARCHY_REQUEST_ERR</code> DOM exception must be raised instead.
+
+ <p>The <dfn id=createcaption
+ title=dom-table-createCaption><code>createCaption()</code></dfn> method
+ must return the first <code><a href="#caption0">caption</a></code> element
+ child of the <code><a href="#table">table</a></code> element, if any;
+ otherwise a new <code><a href="#caption0">caption</a></code> element must
+ be created, inserted as the first node of the <code><a
+ href="#table">table</a></code> element, and then returned.
+
+ <p>The <dfn id=deletecaption
+ title=dom-table-deleteCaption><code>deleteCaption()</code></dfn> method
+ must remove the first <code><a href="#caption0">caption</a></code> element
+ child of the <code><a href="#table">table</a></code> element, if any.
+
+ <p>The <dfn id=thead title=dom-table-tHead><code>tHead</code></dfn> DOM
+ attribute must return, on getting, the first <code><a
+ href="#thead0">thead</a></code> element child of the <code><a
+ href="#table">table</a></code> element. On setting, if the new value is a
+ <code><a href="#thead0">thead</a></code> element, the first <code><a
+ href="#thead0">thead</a></code> element child of the <code><a
+ href="#table">table</a></code> element, if any, must be removed, and the
+ new value must be inserted immediately before the first element in the
+ <code><a href="#table">table</a></code> element that is neither a <code><a
+ href="#caption0">caption</a></code> element nor a <code><a
+ href="#colgroup">colgroup</a></code> element, if any, or at the end of the
+ table otherwise. If the new value is not a <code><a
+ href="#thead0">thead</a></code> element, then a
+ <code>HIERARCHY_REQUEST_ERR</code> DOM exception must be raised instead.
+
+ <p>The <dfn id=createthead
+ title=dom-table-createTHead><code>createTHead()</code></dfn> method must
+ return the first <code><a href="#thead0">thead</a></code> element child of
+ the <code><a href="#table">table</a></code> element, if any; otherwise a
+ new <code><a href="#thead0">thead</a></code> element must be created and
+ inserted immediately before the first element in the <code><a
+ href="#table">table</a></code> element that is neither a <code><a
+ href="#caption0">caption</a></code> element nor a <code><a
+ href="#colgroup">colgroup</a></code> element, if any, or at the end of the
+ table otherwise, and then that new element must be returned.
+
+ <p>The <dfn id=deletethead
+ title=dom-table-deleteTHead><code>deleteTHead()</code></dfn> method must
+ remove the first <code><a href="#thead0">thead</a></code> element child of
+ the <code><a href="#table">table</a></code> element, if any.
+
+ <p>The <dfn id=tfoot title=dom-table-tFoot><code>tFoot</code></dfn> DOM
+ attribute must return, on getting, the first <code><a
+ href="#tfoot0">tfoot</a></code> element child of the <code><a
+ href="#table">table</a></code> element. On setting, if the new value is a
+ <code><a href="#tfoot0">tfoot</a></code> element, the first <code><a
+ href="#tfoot0">tfoot</a></code> element child of the <code><a
+ href="#table">table</a></code> element, if any, must be removed, and the
+ new value must be inserted immediately before the first element in the
+ <code><a href="#table">table</a></code> element that is neither a <code><a
+ href="#caption0">caption</a></code> element, a <code><a
+ href="#colgroup">colgroup</a></code> element, nor a <code><a
+ href="#thead0">thead</a></code> element, if any, or at the end of the
+ table if there are no such elements. If the new value is not a <code><a
+ href="#tfoot0">tfoot</a></code> element, then a
+ <code>HIERARCHY_REQUEST_ERR</code> DOM exception must be raised instead.
+
+ <p>The <dfn id=createtfoot
+ title=dom-table-createTFoot><code>createTFoot()</code></dfn> method must
+ return the first <code><a href="#tfoot0">tfoot</a></code> element child of
+ the <code><a href="#table">table</a></code> element, if any; otherwise a
+ new <code><a href="#tfoot0">tfoot</a></code> element must be created and
+ inserted immediately before the first element in the <code><a
+ href="#table">table</a></code> element that is neither a <code><a
+ href="#caption0">caption</a></code> element, a <code><a
+ href="#colgroup">colgroup</a></code> element, nor a <code><a
+ href="#thead0">thead</a></code> element, if any, or at the end of the
+ table if there are no such elements, and then that new element must be
+ returned.
+
+ <p>The <dfn id=deletetfoot
+ title=dom-table-deleteTFoot><code>deleteTFoot()</code></dfn> method must
+ remove the first <code><a href="#tfoot0">tfoot</a></code> element child of
+ the <code><a href="#table">table</a></code> element, if any.
+
+ <p>The <dfn id=tbodies title=dom-table-tBodies><code>tBodies</code></dfn>
+ attribute must return an <code><a
+ href="#htmlcollection0">HTMLCollection</a></code> rooted at the <code><a
+ href="#table">table</a></code> node, whose filter matches only <code><a
+ href="#tbody">tbody</a></code> elements that are children of the <code><a
+ href="#table">table</a></code> element.
+
+ <p>The <dfn id=rows title=dom-table-rows><code>rows</code></dfn> attribute
+ must return an <code><a href="#htmlcollection0">HTMLCollection</a></code>
+ rooted at the <code><a href="#table">table</a></code> node, whose filter
+ matches only <code><a href="#tr">tr</a></code> elements that are either
+ children of the <code><a href="#table">table</a></code> element, or
+ children of <code><a href="#thead0">thead</a></code>, <code><a
+ href="#tbody">tbody</a></code>, or <code><a
+ href="#tfoot0">tfoot</a></code> elements that are themselves children of
+ the <code><a href="#table">table</a></code> element. The elements in the
+ collection must be ordered such that those elements whose parent is a
+ <code><a href="#thead0">thead</a></code> are included first, in tree
+ order, followed by those elements whose parent is either a <code><a
+ href="#table">table</a></code> or <code><a href="#tbody">tbody</a></code>
+ element, again in tree order, followed finally by those elements whose
+ parent is a <code><a href="#tfoot0">tfoot</a></code> element, still in
+ tree order.
+
+ <p>The behaviour of the <dfn id=insertrow
+ title=dom-table-insertRow><code>insertRow(<var
+ title="">index</var>)</code></dfn> method depends on the state of the
+ table. When it is called, the method must act as required by the first
+ item in the following list of conditions that describes the state of the
+ table and the <var title="">index</var> argument:
+
+ <dl class=switch>
+ <dt>If <var title="">index</var> is less than -1 or greater than the
+ number of elements in <code title=dom-table-rows><a
+ href="#rows">rows</a></code> collection:
+
+ <dd>The method must raise an <code>INDEX_SIZE_ERR</code> exception.
+
+ <dt>If the <code title=dom-table-rows><a href="#rows">rows</a></code>
+ collection has zero elements in it, and the <code><a
+ href="#table">table</a></code> has no <code><a
+ href="#tbody">tbody</a></code> elements in it:
+
+ <dd>The method must create a <code><a href="#tbody">tbody</a></code>
+ element, then create a <code><a href="#tr">tr</a></code> element, then
+ append the <code><a href="#tr">tr</a></code> element to the <code><a
+ href="#tbody">tbody</a></code> element, then append the <code><a
+ href="#tbody">tbody</a></code> element to the <code><a
+ href="#table">table</a></code> element, and finally return the <code><a
+ href="#tr">tr</a></code> element.
+
+ <dt>If the <code title=dom-table-rows><a href="#rows">rows</a></code>
+ collection has zero elements in it:
+
+ <dd>The method must create a <code><a href="#tr">tr</a></code> element,
+ append it to the last <code><a href="#tbody">tbody</a></code> element in
+ the table, and return the <code><a href="#tr">tr</a></code> element.
+
+ <dt>If <var title="">index</var> is equal to -1 or equal to the number of
+ items in <code title=dom-table-rows><a href="#rows">rows</a></code>
+ collection:
+
+ <dd>The method must create a <code><a href="#tr">tr</a></code> element,
+ and append it to the parent of the last <code><a href="#tr">tr</a></code>
+ element in the <code title=dom-table-rows><a href="#rows">rows</a></code>
+ collection. Then, the newly created <code><a href="#tr">tr</a></code>
+ element must be returned.
+
+ <dt>Otherwise:
+
+ <dd>The method must create a <code><a href="#tr">tr</a></code> element,
+ insert it immediately before the <var title="">index</var>th <code><a
+ href="#tr">tr</a></code> element in the <code title=dom-table-rows><a
+ href="#rows">rows</a></code> collection, in the same parent, and finally
+ must return the newly created <code><a href="#tr">tr</a></code> element.
+ </dl>
+
+ <p>The <dfn id=deleterow title=dom-table-deleteRow><code>deleteRow(<var
+ title="">index</var>)</code></dfn> method must remove the <var
+ title="">index</var>th element in the <code title=dom-table-rows><a
+ href="#rows">rows</a></code> collection from its parent. If <var
+ title="">index</var> is less than zero or greater than or equal to the
+ number of elements in the <code title=dom-table-rows><a
+ href="#rows">rows</a></code> collection, the method must instead raise an
+ <code>INDEX_SIZE_ERR</code> exception.
+
+ <h4 id=the-caption><span class=secno>3.15.2. </span>The <dfn
+ id=caption0><code>caption</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the first element child of a <code><a
+ href="#table">table</a></code> element.
+
+ <dt>Content model:
+
+ <dd><a href="#significant" title="significant inline
+ content">Significant</a> <a href="#strictly">strictly inline-level
+ content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#caption0">caption</a></code> element represents the
+ title of the <code><a href="#table">table</a></code> that is its parent,
+ if it has a parent and that is a <code><a href="#table">table</a></code>
+ element.
+
+ <p>The <code><a href="#caption0">caption</a></code> element takes part in
+ the <a href="#table0">table model</a>.
+
+ <h4 id=the-colgroup><span class=secno>3.15.3. </span>The <dfn
+ id=colgroup><code>colgroup</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#table">table</a></code> element, after
+ any <code><a href="#caption0">caption</a></code> elements and before any
+ <code><a href="#thead0">thead</a></code>, <code><a
+ href="#tbody">tbody</a></code>, <code><a href="#tfoot0">tfoot</a></code>,
+ and <code><a href="#tr">tr</a></code> elements.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#col">col</a></code> elements.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-colgroup-span><a href="#span0">span</a></code>, but
+ only if the element contains no <code><a href="#col">col</a></code>
+ elements
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmltablecolelement>HTMLTableColElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute unsigned long <a href="#span1" title=dom-colgroup-span>span</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#colgroup">colgroup</a></code> element represents a
+ <a href="#column0" title=concept-column-group>group</a> of one or more <a
+ href="#column" title=concept-column>columns</a> in the <code><a
+ href="#table">table</a></code> that is its parent, if it has a parent and
+ that is a <code><a href="#table">table</a></code> element.
+
+ <p>If the <code><a href="#colgroup">colgroup</a></code> element contains no
+ <code><a href="#col">col</a></code> elements, then the element may have a
+ <dfn id=span0 title=attr-colgroup-span><code>span</code></dfn> content
+ attribute specified, whose value must be a <a href="#valid">valid
+ non-negative integer</a> greater than zero. Its default value, which must
+ be used if <a href="#rules" title="rules for parsing non-negative
+ integers">parsing the attribute as a non-negative integer</a> returns
+ either an error or zero, is 1.
+
+ <p>The <code><a href="#colgroup">colgroup</a></code> element and its <code
+ title=attr-colgroup-span><a href="#span0">span</a></code> attribute take
+ part in the <a href="#table0">table model</a>.
+
+ <p>The <dfn id=span1 title=dom-colgroup-span><code>span</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the content attribute of the
+ same name, with the exception that on setting, if the new value is 0, then
+ an <code>INDEX_SIZE_ERR</code> exception must be raised.
+
+ <h4 id=the-col><span class=secno>3.15.4. </span>The <dfn
+ id=col><code>col</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#colgroup">colgroup</a></code> element
+ that doesn't have a <code title=attr-col-span><a
+ href="#span2">span</a></code> attribute.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-col-span><a href="#span2">span</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <p><code><a href="#htmltablecolelement">HTMLTableColElement</a></code>,
+ same as for <code><a href="#colgroup">colgroup</a></code> elements. This
+ interface defines one member, <code title=dom-col-span><a
+ href="#span3">span</a></code>.</p>
+ </dl>
+
+ <p>If a <code><a href="#col">col</a></code> element has a parent and that
+ is a <code><a href="#colgroup">colgroup</a></code> element that itself has
+ a parent that is a <code><a href="#table">table</a></code> element, then
+ the <code><a href="#col">col</a></code> element represents one or more <a
+ href="#column" title=concept-column>columns</a> in the <a href="#column0"
+ title=concept-column-group>column group</a> represented by that <code><a
+ href="#colgroup">colgroup</a></code>.
+
+ <p>The element may have a <dfn id=span2
+ title=attr-col-span><code>span</code></dfn> content attribute specified,
+ whose value must be a <a href="#valid">valid non-negative integer</a>
+ greater than zero. Its default value, which must be used if <a
+ href="#rules" title="rules for parsing non-negative integers">parsing the
+ attribute as a non-negative integer</a> returns either an error or zero,
+ is 1.
+
+ <p>The <code><a href="#col">col</a></code> element and its <code
+ title=attr-col-span><a href="#span2">span</a></code> attribute take part
+ in the <a href="#table0">table model</a>.
+
+ <p>The <dfn id=span3 title=dom-col-span><code>span</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the content attribute of the
+ same name, with the exception that on setting, if the new value is 0, then
+ an <code>INDEX_SIZE_ERR</code> exception must be raised.
+
+ <h4 id=the-tbody><span class=secno>3.15.5. </span>The <dfn
+ id=tbody><code>tbody</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#table">table</a></code> element, after
+ any <code><a href="#caption0">caption</a></code>, <code><a
+ href="#colgroup">colgroup</a></code>, and <code><a
+ href="#thead0">thead</a></code> elements, but only if there are no
+ <code><a href="#tr">tr</a></code> elements that are children of the
+ <code><a href="#table">table</a></code> element.
+
+ <dt>Content model:
+
+ <dd>One or more <code><a href="#tr">tr</a></code> elements
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmltablesectionelement>HTMLTableSectionElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#rows0" title=dom-tbody-rows>rows</a>;
+ <a href="#htmlelement">HTMLElement</a> <a href="#insertrow0" title=dom-tbody-insertRow>insertRow</a>(in long index);
+ void <a href="#deleterow0" title=dom-tbody-deleteRow>deleteRow</a>(in long index);
+};</pre>
+
+ <p>The <code><a
+ href="#htmltablesectionelement">HTMLTableSectionElement</a></code>
+ interface is also used for <code><a href="#thead0">thead</a></code> and
+ <code><a href="#tfoot0">tfoot</a></code> elements.</p>
+ </dl>
+
+ <p>The <code><a href="#tbody">tbody</a></code> element represents a <a
+ href="#row-group" title=concept-row-group>block</a> of <a href="#row0"
+ title=concept-row>rows</a> that consist of a body of data for the parent
+ <code><a href="#table">table</a></code> element, if the <code><a
+ href="#tbody">tbody</a></code> element has a parent and it is a <code><a
+ href="#table">table</a></code>.
+
+ <p>The <code><a href="#tbody">tbody</a></code> element takes part in the <a
+ href="#table0">table model</a>.
+
+ <p>The <dfn id=rows0 title=dom-tbody-rows><code>rows</code></dfn> attribute
+ must return an <code><a href="#htmlcollection0">HTMLCollection</a></code>
+ rooted at the element, whose filter matches only <code><a
+ href="#tr">tr</a></code> elements that are children of the element.
+
+ <p>The <dfn id=insertrow0 title=dom-tbody-insertRow><code>insertRow(<var
+ title="">index</var>)</code></dfn> method must, when invoked on an element
+ <var title="">table section</var>, act as follows:
+
+ <p>If <var title="">index</var> is less than -1 or greater than the number
+ of elements in the <code title=dom-tbody-rows><a
+ href="#rows0">rows</a></code> collection, the method must raise an
+ <code>INDEX_SIZE_ERR</code> exception.
+
+ <p>If <var title="">index</var> is equal to -1 or equal to the number of
+ items in the <code title=dom-tbody-rows><a href="#rows0">rows</a></code>
+ collection, the method must create a <code><a href="#tr">tr</a></code>
+ element, append it to the element <var title="">table section</var>, and
+ return the newly created <code><a href="#tr">tr</a></code> element.
+
+ <p>Otherwise, the method must create a <code><a href="#tr">tr</a></code>
+ element, insert it as a child of the <var title="">table section</var>
+ element, immediately before the <var title="">index</var>th <code><a
+ href="#tr">tr</a></code> element in the <code title=dom-tbody-rows><a
+ href="#rows0">rows</a></code> collection, and finally must return the
+ newly created <code><a href="#tr">tr</a></code> element.
+
+ <p>The <dfn id=deleterow0 title=dom-tbody-deleteRow><code>deleteRow(<var
+ title="">index</var>)</code></dfn> method must remove the <var
+ title="">index</var>th element in the <code title=dom-tbody-rows><a
+ href="#rows0">rows</a></code> collection from its parent. If <var
+ title="">index</var> is less than zero or greater than or equal to the
+ number of elements in the <code title=dom-tbody-rows><a
+ href="#rows0">rows</a></code> collection, the method must instead raise an
+ <code>INDEX_SIZE_ERR</code> exception.
+
+ <h4 id=the-thead><span class=secno>3.15.6. </span>The <dfn
+ id=thead0><code>thead</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#table">table</a></code> element, after
+ any <code><a href="#caption0">caption</a></code>, and <code><a
+ href="#colgroup">colgroup</a></code> elements and before any <code><a
+ href="#tbody">tbody</a></code>, <code><a href="#tfoot0">tfoot</a></code>,
+ and <code><a href="#tr">tr</a></code> elements, but only if there are no
+ other <code><a href="#thead0">thead</a></code> elements that are children
+ of the <code><a href="#table">table</a></code> element.
+
+ <dt>Content model:
+
+ <dd>One or more <code><a href="#tr">tr</a></code> elements
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd><code><a
+ href="#htmltablesectionelement">HTMLTableSectionElement</a></code>, as
+ defined for <code><a href="#tbody">tbody</a></code> elements.
+ </dl>
+
+ <p>The <code><a href="#thead0">thead</a></code> element represents the <a
+ href="#row-group" title=concept-row-group>block</a> of <a href="#row0"
+ title=concept-row>rows</a> that consist of the column labels (headers) for
+ the parent <code><a href="#table">table</a></code> element, if the
+ <code><a href="#thead0">thead</a></code> element has a parent and it is a
+ <code><a href="#table">table</a></code>.
+
+ <p>The <code><a href="#thead0">thead</a></code> element takes part in the
+ <a href="#table0">table model</a>.
+
+ <h4 id=the-tfoot><span class=secno>3.15.7. </span>The <dfn
+ id=tfoot0><code>tfoot</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#table">table</a></code> element, after
+ any <code><a href="#caption0">caption</a></code>, <code><a
+ href="#colgroup">colgroup</a></code>, and <code><a
+ href="#thead0">thead</a></code> elements and before any <code><a
+ href="#tbody">tbody</a></code> and <code><a href="#tr">tr</a></code>
+ elements, but only if there are no other <code><a
+ href="#tfoot0">tfoot</a></code> elements that are children of the
+ <code><a href="#table">table</a></code> element.
+
+ <dd>As a child of a <code><a href="#table">table</a></code> element, after
+ any <code><a href="#caption0">caption</a></code>, <code><a
+ href="#colgroup">colgroup</a></code>, <code><a
+ href="#thead0">thead</a></code>, <code><a href="#tbody">tbody</a></code>,
+ and <code><a href="#tr">tr</a></code> elements, but only if there are no
+ other <code><a href="#tfoot0">tfoot</a></code> elements that are children
+ of the <code><a href="#table">table</a></code> element.
+
+ <dt>Content model:
+
+ <dd>One or more <code><a href="#tr">tr</a></code> elements
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd><code><a
+ href="#htmltablesectionelement">HTMLTableSectionElement</a></code>, as
+ defined for <code><a href="#tbody">tbody</a></code> elements.
+ </dl>
+
+ <p>The <code><a href="#tfoot0">tfoot</a></code> element represents the <a
+ href="#row-group" title=concept-row-group>block</a> of <a href="#row0"
+ title=concept-row>rows</a> that consist of the column summaries (footers)
+ for the parent <code><a href="#table">table</a></code> element, if the
+ <code><a href="#tfoot0">tfoot</a></code> element has a parent and it is a
+ <code><a href="#table">table</a></code>.
+
+ <p>The <code><a href="#tfoot0">tfoot</a></code> element takes part in the
+ <a href="#table0">table model</a>.
+
+ <h4 id=the-tr><span class=secno>3.15.8. </span>The <dfn
+ id=tr><code>tr</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#thead0">thead</a></code> element.
+
+ <dd>As a child of a <code><a href="#tbody">tbody</a></code> element.
+
+ <dd>As a child of a <code><a href="#tfoot0">tfoot</a></code> element.
+
+ <dd>As a child of a <code><a href="#table">table</a></code> element, after
+ any <code><a href="#caption0">caption</a></code>, <code><a
+ href="#colgroup">colgroup</a></code>, and <code><a
+ href="#thead0">thead</a></code> elements, but only if there are no
+ <code><a href="#tbody">tbody</a></code> elements that are children of the
+ <code><a href="#table">table</a></code> element.
+
+ <dt>Content model:
+
+ <dd>One or more <code><a href="#td">td</a></code> or <code><a
+ href="#th">th</a></code> elements
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmltablerowelement>HTMLTableRowElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ readonly attribute long <a href="#rowindex" title=dom-tr-rowIndex>rowIndex</a>;
+ readonly attribute long <a href="#rowindex0" title=dom-tr-sectionRowIndex>sectionRowIndex</a>;
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#cells" title=dom-tr-cells>cells</a>;
+ <a href="#htmlelement">HTMLElement</a> <a href="#insertcell" title=dom-tr-insertCell>insertCell</a>(in long index);
+ void <span>deleteCell</span>(in long index);
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#tr">tr</a></code> element represents a <a
+ href="#row0" title=concept-row>row</a> of <a href="#cell"
+ title=concept-cell>cells</a> in a <a href="#table1"
+ title=concept-table>table</a>.
+
+ <p>The <code><a href="#tr">tr</a></code> element takes part in the <a
+ href="#table0">table model</a>.
+
+ <p>The <dfn id=rowindex title=dom-tr-rowIndex><code>rowIndex</code></dfn>
+ element must, if the element has a parent <code><a
+ href="#table">table</a></code> element, or a parent <code><a
+ href="#tbody">tbody</a></code>, <code><a href="#thead0">thead</a></code>,
+ or <code><a href="#tfoot0">tfoot</a></code> element and a
+ <em>grandparent</em> <code><a href="#table">table</a></code> element,
+ return the index of the <code><a href="#tr">tr</a></code> element in that
+ <code><a href="#table">table</a></code> element's <code
+ title=dom-table-rows><a href="#rows">rows</a></code> collection. If there
+ is no such <code><a href="#table">table</a></code> element, then the
+ attribute must return 0.
+
+ <p>The <dfn id=rowindex0
+ title=dom-tr-sectionRowIndex><code>rowIndex</code></dfn> DOM attribute
+ must, if the element has a parent <code><a href="#table">table</a></code>,
+ <code><a href="#tbody">tbody</a></code>, <code><a
+ href="#thead0">thead</a></code>, or <code><a
+ href="#tfoot0">tfoot</a></code> element, return the index of the <code><a
+ href="#tr">tr</a></code> element in the parent element's <code
+ title="">rows</code> collection (for tables, that's the <code
+ title=dom-table-rows><a href="#rows">rows</a></code> collection; for table
+ sections, that's the <code title=dom-tbody-rows><a
+ href="#rows0">rows</a></code> collection). If there is no such parent
+ element, then the attribute must return 0.
+
+ <p>The <dfn id=cells title=dom-tr-cells><code>cells</code></dfn> attribute
+ must return an <code><a href="#htmlcollection0">HTMLCollection</a></code>
+ rooted at the <code><a href="#tr">tr</a></code> element, whose filter
+ matches only <code><a href="#td">td</a></code> and <code><a
+ href="#th">th</a></code> elements that are children of the <code><a
+ href="#tr">tr</a></code> element.
+
+ <p>The <dfn id=insertcell title=dom-tr-insertCell><code>insertCell(<var
+ title="">index</var>)</code></dfn> method must act as follows:
+
+ <p>If <var title="">index</var> is less than -1 or greater than the number
+ of elements in the <code title=dom-tr-cells><a
+ href="#cells">cells</a></code> collection, the method must raise an
+ <code>INDEX_SIZE_ERR</code> exception.
+
+ <p>If <var title="">index</var> is equal to -1 or equal to the number of
+ items in <code title=dom-tr-cells><a href="#cells">cells</a></code>
+ collection, the method must create a <code><a href="#td">td</a></code>
+ element, append it to the <code><a href="#tr">tr</a></code> element, and
+ return the newly created <code><a href="#td">td</a></code> element.
+
+ <p>Otherwise, the method must create a <code><a href="#td">td</a></code>
+ element, insert it as a child of the <code><a href="#tr">tr</a></code>
+ element, immediately before the <var title="">index</var>th <code><a
+ href="#td">td</a></code> or <code><a href="#th">th</a></code> element in
+ the <code title=dom-tr-cells><a href="#cells">cells</a></code> collection,
+ and finally must return the newly created <code><a
+ href="#td">td</a></code> element.
+
+ <p>The <dfn id=deletecell title=dom-tr-deleteCell><code>deleteCell(<var
+ title="">index</var>)</code></dfn> method must remove the <var
+ title="">index</var>th element in the <code title=dom-tr-cells><a
+ href="#cells">cells</a></code> collection from its parent. If <var
+ title="">index</var> is less than zero or greater than or equal to the
+ number of elements in the <code title=dom-tr-cells><a
+ href="#cells">cells</a></code> collection, the method must instead raise
+ an <code>INDEX_SIZE_ERR</code> exception.
+
+ <h4 id=the-td><span class=secno>3.15.9. </span>The <dfn
+ id=td><code>td</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#tr">tr</a></code> element.
+
+ <dt>Content model:
+
+ <dd>Zero or more <a href="#block-level0">block-level elements</a>, or <a
+ href="#inline-level0">inline-level content</a> (but not both).
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-td-colspan><a href="#colspan">colspan</a></code>
+
+ <dd><code title=attr-td-rowspan><a href="#rowspan">rowspan</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmltablecellelement>HTMLTableCellElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute long <a href="#colspan0" title=dom-td-colSpan>colSpan</a>;
+ attribute long <a href="#rowspan0" title=dom-td-rowSpan>rowSpan</a>;
+ readonly attribute long <a href="#cellindex" title=dom-td-cellIndex>cellIndex</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#td">td</a></code> element represents a data <a
+ href="#cell" title=concept-cell>cell</a> in a table.
+
+ <p>The <code><a href="#td">td</a></code> element may have a <dfn id=colspan
+ title=attr-td-colspan><code>colspan</code></dfn> content attribute
+ specified, whose value must be a <a href="#valid">valid non-negative
+ integer</a> greater than zero. Its default value, which must be used if <a
+ href="#rules" title="rules for parsing non-negative integers">parsing the
+ attribute as a non-negative integer</a> returns either an error or zero,
+ is 1.
+
+ <p>The <code><a href="#td">td</a></code> element may also have a <dfn
+ id=rowspan title=attr-td-rowspan><code>rowspan</code></dfn> content
+ attribute specified, whose value must be a <a href="#valid">valid
+ non-negative integer</a>. Its default value, which must be used if <a
+ href="#rules" title="rules for parsing non-negative integers">parsing the
+ attribute as a non-negative integer</a> returns an error, is also 1.
+
+ <p>The <code><a href="#td">td</a></code> element and its <code
+ title=attr-td-colspan><a href="#colspan">colspan</a></code> and <code
+ title=attr-td-rowspan><a href="#rowspan">rowspan</a></code> attributes
+ take part in the <a href="#table0">table model</a>.
+
+ <p>The <dfn id=colspan0 title=dom-td-colspan><code>colspan</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the content attribute of the
+ same name, with the exception that on setting, if the new value is 0, then
+ an <code>INDEX_SIZE_ERR</code> exception must be raised.
+
+ <p>The <dfn id=rowspan0 title=dom-td-rowspan><code>rowspan</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the content attribute of the
+ same name.
+
+ <p>The <dfn id=cellindex
+ title=dom-td-cellIndex><code>cellIndex</code></dfn> DOM attribute must, if
+ the element has a parent <code><a href="#tr">tr</a></code> element, return
+ the index of the cell's element in the parent element's <code
+ title=dom-tr-cells><a href="#cells">cells</a></code> collection. If there
+ is no such parent element, then the attribute must return 0.
+
+ <h4 id=the-th><span class=secno>3.15.10. </span>The <dfn
+ id=th><code>th</code></dfn> element</h4>
+ <!-- element has no special category -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As a child of a <code><a href="#tr">tr</a></code> element.
+
+ <dt>Content model:
+
+ <dd>Zero or more <a href="#block-level0">block-level elements</a>, or <a
+ href="#inline-level0">inline-level content</a> (but not both).
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-th-colspan><a href="#colspan1">colspan</a></code>
+
+ <dd><code title=attr-th-rowspan><a href="#rowspan1">rowspan</a></code>
+
+ <dd><code title=attr-th-scope><a href="#scope0">scope</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmltableheadercellelement>HTMLTableHeaderCellElement</dfn> : <a href="#htmltablecellelement">HTMLTableCellElement</a> {
+ attribute DOMString <a href="#scope1" title=dom-th-scope>scope</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#th">th</a></code> element represents a header <a
+ href="#cell" title=concept-cell>cell</a> in a table.
+
+ <p>The <code><a href="#th">th</a></code> element may have a <dfn
+ id=colspan1 title=attr-th-colspan><code>colspan</code></dfn> content
+ attribute specified, whose value must be a <a href="#valid">valid
+ non-negative integer</a> greater than zero. Its default value, which must
+ be used if <a href="#rules" title="rules for parsing non-negative
+ integers">parsing the attribute as a non-negative integer</a> returns
+ either an error or zero, is 1.
+
+ <p>The <code><a href="#th">th</a></code> element may also have a <dfn
+ id=rowspan1 title=attr-th-rowspan><code>rowspan</code></dfn> content
+ attribute specified, whose value must be a <a href="#valid">valid
+ non-negative integer</a>. Its default value, which must be used if <a
+ href="#rules" title="rules for parsing non-negative integers">parsing the
+ attribute as a non-negative integer</a> returns an error, is also 1.
+
+ <p>The <code><a href="#th">th</a></code> element may have a <dfn id=scope0
+ title=attr-th-scope><code>scope</code></dfn> content attribute specified.
+ The <code title=attr-th-scope><a href="#scope0">scope</a></code> attribute
+ is an <a href="#enumerated">enumerated attribute</a> with five states,
+ four of which have explicit keywords:
+
+ <dl>
+ <dt>The <dfn id=row title=attr-th-scope-row><code>row</code></dfn>
+ keyword, which maps to the <em>row</em> state
+
+ <dd>The <em>row</em> state means the header cell applies to all the
+ remaining cells in the row.
+
+ <dt>The <dfn id=col0 title=attr-th-scope-col><code>col</code></dfn>
+ keyword, which maps to the <em>column</em> state
+
+ <dd>The <em>column</em> state means the header cell applies to all the
+ remaining cells in the column.
+
+ <dt>The <dfn id=rowgroup
+ title=attr-th-scope-rowgroup><code>rowgroup</code></dfn> keyword, which
+ maps to the <em>row group</em> state
+
+ <dd>The <em>row group</em> state means the header cell applies to all the
+ remaining cells in the row group.
+
+ <dt>The <dfn id=colgroup0
+ title=attr-th-scope-colgroup><code>colgroup</code></dfn> keyword, which
+ maps to the <em>column group</em> state
+
+ <dd>The <em>column group</em> state means the header cell applies to all
+ the remaining cells in the column group.
+
+ <dt>The <em>auto</em> state
+
+ <dd>The <em>auto</em> state makes the header cell apply to a set of cells
+ selected based on context.
+ </dl>
+
+ <p>The <code title=attr-th-scope><a href="#scope0">scope</a></code>
+ attribute's <em>missing value default</em> is the <em>auto</em> state.
+
+ <p>The exact effect of these values is described in detail in the <a
+ href="#algorithm2">algorithm for assigning header cells to data cells</a>,
+ which user agents must apply to determine the relationships between data
+ cells and header cells.
+
+ <p>The <code><a href="#th">th</a></code> element and its <code
+ title=attr-th-colspan><a href="#colspan1">colspan</a></code>, <code
+ title=attr-th-rowspan><a href="#rowspan1">rowspan</a></code>, and <code
+ title=attr-th-scope><a href="#scope0">scope</a></code> attributes take
+ part in the <a href="#table0">table model</a>.
+
+ <p>The <dfn id=scope1 title=dom-th-scope><code>scope</code></dfn> DOM
+ attribute must <a href="#reflect">reflect</a> the content attribute of the
+ same name.
+
+ <p>The <code><a
+ href="#htmltableheadercellelement">HTMLTableHeaderCellElement</a></code>
+ interface inherits from the <code><a
+ href="#htmltablecellelement">HTMLTableCellElement</a></code> interface and
+ therefore also has the DOM attributes defined above in the <code><a
+ href="#td">td</a></code> section.
+
+ <h4 id=processing><span class=secno>3.15.11. </span>Processing model</h4>
+
+ <p>The various table elements and their content attributes together define
+ the <dfn id=table0>table model</dfn>.
+
+ <p>A <dfn id=table1 title=concept-table>table</dfn> consists of cells
+ aligned on a two-dimensional grid of <dfn id=slots
+ title=concept-slots>slots</dfn> with coordinates (<var title="">x</var>,
+ <var title="">y</var>). The grid is finite, and is either empty or has one
+ or more slots. If the grid has one or more slots, then the <var
+ title="">x</var> coordinates are always in the range
+ <span>1&nbsp;&le;&nbsp;<var title="">x</var>&nbsp;&le;&nbsp;<var
+ title="">x<sub title="">max</sub></var></span>, and the <var
+ title="">y</var> coordinates are always in the range
+ <span>1&nbsp;&le;&nbsp;<var title="">y</var>&nbsp;&le;&nbsp;<var
+ title="">y<sub title="">max</sub></var></span>. If one or both of <var
+ title="">x<sub title="">max</sub></var> and <var title="">y<sub
+ title="">max</sub></var> are zero, then the table is empty (has no slots).
+ Tables correspond to <code><a href="#table">table</a></code> elements.
+
+ <p>A <dfn id=cell title=concept-cell>cell</dfn> is a set of slots anchored
+ at a slot (<var title="">cell<sub title="">x</sub></var>, <var
+ title="">cell<sub title="">y</sub></var>), and with a particular <var
+ title="">width</var> and <var title="">height</var> such that the cell
+ covers all the slots with coordinates (<var title="">x</var>, <var
+ title="">y</var>) where <span><var title="">cell<sub
+ title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">x</var>&nbsp;&lt;&nbsp;<var title="">cell<sub
+ title="">x</sub></var>+<var title="">width</var></span> and <span><var
+ title="">cell<sub title="">y</sub></var>&nbsp;&le;&nbsp;<var
+ title="">y</var>&nbsp;&lt;&nbsp;<var title="">cell<sub
+ title="">y</sub></var>+<var title="">height</var></span>. Cell can either
+ be <em>data cells</em> or <em>header cells</em>. Data cells correspond to
+ <code><a href="#td">td</a></code> elements, and have zero or more
+ associated header cells. Header cells correspond to <code><a
+ href="#th">th</a></code> elements.
+
+ <p>A <dfn id=row0 title=concept-row>row</dfn> is a complete set of slots
+ from <span><var title="">x</var>=1</span> to <span><var
+ title="">x</var>=<var title="">x<sub title="">max</sub></var></span>, for
+ a particular value of <var title="">y</var>. Rows correspond to <code><a
+ href="#tr">tr</a></code> elements.
+
+ <p>A <dfn id=column title=concept-column>column</dfn> is a complete set of
+ slots from <span><var title="">y</var>=1</span> to <span><var
+ title="">y</var>=<var title="">y<sub title="">max</sub></var></span>, for
+ a particular value of <var title="">x</var>. Columns can correspond to
+ <code><a href="#col">col</a></code> elements, but in the absense of
+ <code><a href="#col">col</a></code> elements are implied.
+
+ <p>A <dfn id=row-group title=concept-row-group>row group</dfn> is a set of
+ <a href="#row0" title=concept-row>rows</a> anchored at a slot (1, <var
+ title="">group<sub title="">y</sub></var>) with a particular <var
+ title="">height</var> such that the row group covers all the slots with
+ coordinates (<var title="">x</var>, <var title="">y</var>) where
+ <span>1&nbsp;&le;&nbsp;<var title="">x</var>&nbsp;&lt;&nbsp;<var
+ title="">x<sub title="">max</sub></var></span> and <span><var
+ title="">group<sub title="">y</sub></var>&nbsp;&le;&nbsp;<var
+ title="">y</var>&nbsp;&lt;&nbsp;<var title="">group<sub
+ title="">y</sub></var>+<var title="">height</var></span>. Row groups
+ correspond to <code><a href="#tbody">tbody</a></code>, <code><a
+ href="#thead0">thead</a></code>, and <code><a
+ href="#tfoot0">tfoot</a></code> elements. Not every row is necessarily in
+ a row group.
+
+ <p>A <dfn id=column0 title=concept-column-group>column group</dfn> is a set
+ of <a href="#column" title=concept-column>columns</a> anchored at a slot
+ (<var title="">group<sub title="">x</sub></var>, 1) with a particular <var
+ title="">width</var> such that the column group covers all the slots with
+ coordinates (<var title="">x</var>, <var title="">y</var>) where
+ <span><var title="">group<sub title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">x</var>&nbsp;&lt;&nbsp;<var title="">group<sub
+ title="">x</sub></var>+<var title="">width</var></span> and
+ <span>1&nbsp;&le;&nbsp;<var title="">y</var>&nbsp;&lt;&nbsp;<var
+ title="">y<sub title="">max</sub></var></span>. Column groups correspond
+ to <code><a href="#colgroup">colgroup</a></code> elements. Not every
+ column is necessarily in a column group.
+
+ <p><a href="#row-group" title=concept-row-group>Row groups</a> cannot
+ overlap each other. Similarly, <a href="#column0"
+ title=concept-column-group>column groups</a> cannot overlap each other.
+
+ <p>A <a href="#cell" title=concept-cell>cell</a> cannot cover slots that
+ are from two or more <a href="#row-group" title=concept-row-group>row
+ groups</a>. It is, however, possible for a cell to be in multiple <a
+ href="#column0" title=concept-column-group>column groups</a>. All the
+ slots that form part of one cell are part of zero or one <a
+ href="#row-group" title=concept-row-group>row groups</a> and zero or more
+ <a href="#column0" title=concept-column-group>column groups</a>.
+
+ <p>In addition to <a href="#cell" title=concept-cell>cells</a>, <a
+ href="#column" title=concept-column>columns</a>, <a href="#row0"
+ title=concept-row>rows</a>, <a href="#row-group"
+ title=concept-row-group>row groups</a>, and <a href="#column0"
+ title=concept-column-group>column groups</a>, <a href="#table1"
+ title=concept-table>tables</a> can have a <code><a
+ href="#caption0">caption</a></code> element associated with them. This
+ gives the table a heading, or legend.
+
+ <p>A <dfn id=table2>table model error</dfn> is an error with the data
+ represented by <code><a href="#table">table</a></code> elements and their
+ descendants. Documents must not have table model errors.
+
+ <h5 id=forming><span class=secno>3.15.11.1. </span>Forming a table</h5>
+
+ <p>To determine which elements correspond to which slots in a <a
+ href="#table1" title=concept-table>table</a> associated with a <code><a
+ href="#table">table</a></code> element, to determine the dimensions of the
+ table (<var title="">x<sub title="">max</sub></var> and <var
+ title="">y<sub title="">max</sub></var>), and to determine if there are
+ any <a href="#table2" title="table model error">table model errors</a>,
+ user agents must use the following algorithm:
+
+ <ol>
+ <li>
+ <p>Let <var title="">x<sub title="">max</sub></var> be zero.</p>
+
+ <li>
+ <p>Let <var title="">y<sub title="">max</sub></var> be zero.</p>
+
+ <li>
+ <p>Let <var title="">the table</var> be the <a href="#table1"
+ title=concept-table>table</a> represented by the <code><a
+ href="#table">table</a></code> element. The <var title="">x<sub
+ title="">max</sub></var> and <var title="">y<sub
+ title="">max</sub></var> variables give <var title="">the table</var>'s
+ extent. <var title="">The table</var> is initially empty.</p>
+
+ <li>
+ <p>If the <code><a href="#table">table</a></code> element has no table
+ children, then return <var title="">the table</var> (which will be
+ empty), and abort these steps.</p>
+
+ <li>
+ <p>Let the <var title="">current element</var> be the first element child
+ of the <code><a href="#table">table</a></code> element.</p>
+
+ <p>If a step in this algorithm ever requires the <var title="">current
+ element</var> to be advanced to the next child of the <code><a
+ href="#table">table</a></code> when there is no such next child, then
+ the algorithm must be aborted at that point and the algorithm must
+ return <var title="">the table</var>.</p>
+
+ <li>
+ <p>While the <var title="">current element</var> is not one of the
+ following elements, advance the <var title="">current element</var> to
+ the next child of the <code><a href="#table">table</a></code>:</p>
+
+ <ul class=brief>
+ <li><code><a href="#caption0">caption</a></code>
+
+ <li><code><a href="#colgroup">colgroup</a></code>
+
+ <li><code><a href="#thead0">thead</a></code>
+
+ <li><code><a href="#tbody">tbody</a></code>
+
+ <li><code><a href="#tfoot0">tfoot</a></code>
+
+ <li><code><a href="#tr">tr</a></code>
+ </ul>
+
+ <li>
+ <p>If the <var title="">current element</var> is a <code><a
+ href="#caption0">caption</a></code>, then that is the <code><a
+ href="#caption0">caption</a></code> element associated with <var
+ title="">the table</var>. Otherwise, it has no associated <code><a
+ href="#caption0">caption</a></code> element.</p>
+
+ <li>
+ <p>If the <var title="">current element</var> is a <code><a
+ href="#caption0">caption</a></code>, then while the <var
+ title="">current element</var> is not one of the following elements,
+ advance the <var title="">current element</var> to the next child of the
+ <code><a href="#table">table</a></code>:</p>
+
+ <ul class=brief>
+ <li><code><a href="#colgroup">colgroup</a></code>
+
+ <li><code><a href="#thead0">thead</a></code>
+
+ <li><code><a href="#tbody">tbody</a></code>
+
+ <li><code><a href="#tfoot0">tfoot</a></code>
+
+ <li><code><a href="#tr">tr</a></code>
+ </ul>
+
+ <p>(Otherwise, the <var title="">current element</var> will already be
+ one of those elements.)</p>
+
+ <li>
+ <p>If the <var title="">current element</var> is a <code><a
+ href="#colgroup">colgroup</a></code>, follow these substeps:</p>
+
+ <ol>
+ <li>
+ <p>Let <var title="">next column</var> be 1.</p>
+
+ <li>
+ <p><em>Column groups.</em> Process the <var title="">current
+ element</var> according to the appropriate one of the following two
+ cases:</p>
+
+ <dl class=switch>
+ <dt>If the <var title="">current element</var> has any <code><a
+ href="#col">col</a></code> element children
+
+ <dd>
+ <p>Follow these steps:</p>
+
+ <ol>
+ <li>
+ <p>Let <var title="">x<sub title="">start</sub></var> have the
+ value <span><var title="">x<sub title="">max</sub></var>+1</span>.</p>
+
+ <li>
+ <p>Let the <var title="">current column</var> be the first <code><a
+ href="#col">col</a></code> element child of the <code><a
+ href="#colgroup">colgroup</a></code> element.</p>
+
+ <li>
+ <p><em>Columns.</em> If the <var title="">current column</var>
+ <code><a href="#col">col</a></code> element has a <code
+ title=attr-col-span><a href="#span2">span</a></code> attribute,
+ then parse its value using the <a href="#rules">rules for parsing
+ non-negative integers</a>.</p>
+
+ <p>If the result of parsing the value is not an error or zero, then
+ let <var title="">span</var> be that value.</p>
+
+ <p>Otherwise, if the <code><a href="#col">col</a></code> element
+ has no <code title=attr-col-span><a href="#span2">span</a></code>
+ attribute, or if trying to parse the attribute's value resulted in
+ an error, then let <var title="">span</var> be 1.</p>
+
+ <li>
+ <p>Increase <var title="">x<sub title="">max</sub></var> by <var
+ title="">span</var>.</p>
+
+ <li>
+ <p>Let the last <var title="">span</var> <a href="#column"
+ title=concept-column>columns</a> in <var title="">the table</var>
+ correspond to the <var title="">current column</var> <code><a
+ href="#col">col</a></code> element.</p>
+
+ <li>
+ <p>If <var title="">current column</var> is not the last <code><a
+ href="#col">col</a></code> element child of the <code><a
+ href="#colgroup">colgroup</a></code> element, then let the <var
+ title="">current column</var> be the next <code><a
+ href="#col">col</a></code> element child of the <code><a
+ href="#colgroup">colgroup</a></code> element, and return to the
+ third step of this innermost group of steps (columns).</p>
+
+ <li>
+ <p>Let all the last <a href="#column"
+ title=concept-column>columns</a> in <var title="">the table</var>
+ from <span>x=<var title="">x<sub title="">start</sub></var></span>
+ to <span>x=<var title="">x<sub title="">max</sub></var></span>
+ form a new <a href="#column0" title=concept-column-group>column
+ group</a>, anchored at the slot (<var title="">x<sub
+ title="">start</sub></var>, 1), with width <var title="">x<sub
+ title="">max</sub></var>-<var title="">x<sub
+ title="">start</sub></var>-1, corresponding to the <code><a
+ href="#colgroup">colgroup</a></code> element.</p>
+ </ol>
+
+ <dt>If the <var title="">current element</var> has no <code><a
+ href="#col">col</a></code> element children
+
+ <dd>
+ <ol>
+ <li>
+ <p>If the <code><a href="#colgroup">colgroup</a></code> element has
+ a <code title=attr-colgroup-span><a href="#span0">span</a></code>
+ attribute, then parse its value using the <a href="#rules">rules
+ for parsing non-negative integers</a>.</p>
+
+ <p>If the result of parsing the value is not an error or zero, then
+ let <var title="">span</var> be that value.</p>
+
+ <p>Otherwise, if the <code><a href="#colgroup">colgroup</a></code>
+ element has no <code title=attr-col-span><a
+ href="#span2">span</a></code> attribute, or if trying to parse the
+ attribute's value resulted in an error, then let <var
+ title="">span</var> be 1.</p>
+
+ <li>
+ <p>Increase <var title="">x<sub title="">max</sub></var> by <var
+ title="">span</var>.</p>
+
+ <li>
+ <p>Let the last <var title="">span</var> <a href="#column"
+ title=concept-column>columns</a> in <var title="">the table</var>
+ form a new <a href="#column0" title=concept-column-group>column
+ group</a>, anchored at the slot (<var title="">x<sub
+ title="">max</sub></var>-<var title="">span</var>+1, 1), with
+ width <var title="">span</var>, corresponding to the <code><a
+ href="#colgroup">colgroup</a></code> element.</p>
+ </ol>
+ </dl>
+
+ <li>
+ <p>Advance the <var title="">current element</var> to the next child of
+ the <code><a href="#table">table</a></code>.</p>
+
+ <li>
+ <p>While the <var title="">current element</var> is not one of the
+ following elements, advance the <var title="">current element</var> to
+ the next child of the <code><a href="#table">table</a></code>:</p>
+
+ <ul class=brief>
+ <li><code><a href="#colgroup">colgroup</a></code>
+
+ <li><code><a href="#thead0">thead</a></code>
+
+ <li><code><a href="#tbody">tbody</a></code>
+
+ <li><code><a href="#tfoot0">tfoot</a></code>
+
+ <li><code><a href="#tr">tr</a></code>
+ </ul>
+
+ <li>
+ <p>If the <var title="">current element</var> is a <code><a
+ href="#colgroup">colgroup</a></code> element, jump to step 2 in these
+ substeps (column groups).</p>
+ </ol>
+
+ <li>
+ <p>Let <var title="">y<sub title="">current</sub></var> be zero. When the
+ algorithm is aborted, if <var title="">y<sub
+ title="">current</sub></var> does not equal <var title="">y<sub
+ title="">max</sub></var>, then that is a <a href="#table2">table model
+ error</a>.</p>
+
+ <li>
+ <p>Let the <var title="">list of downward-growing cells</var> be an empty
+ list.</p>
+
+ <li>
+ <p><em>Rows.</em> While the <var title="">current element</var> is not
+ one of the following elements, advance the <var title="">current
+ element</var> to the next child of the <code><a
+ href="#table">table</a></code>:</p>
+
+ <ul class=brief>
+ <li><code><a href="#thead0">thead</a></code>
+
+ <li><code><a href="#tbody">tbody</a></code>
+
+ <li><code><a href="#tfoot0">tfoot</a></code>
+
+ <li><code><a href="#tr">tr</a></code>
+ </ul>
+
+ <li>
+ <p>If the <var title="">current element</var> is a <code><a
+ href="#tr">tr</a></code>, then run the <a href="#algorithm0">algorithm
+ for processing rows</a> (defined below), then return to the previous
+ step (rows).</p>
+
+ <li>
+ <p>Otherwise, run the <a href="#algorithm">algorithm for ending a row
+ group</a>.</p>
+
+ <li>
+ <p>Let <var title="">y<sub title="">start</sub></var> have the value
+ <span><var title="">y<sub title="">max</sub></var>+1</span>.</p>
+
+ <li>
+ <p>For each <code><a href="#tr">tr</a></code> element that is a child of
+ the <var title="">current element</var>, in tree order, run the <a
+ href="#algorithm0">algorithm for processing rows</a> (defined below).</p>
+
+ <li> <!-- if we added any rows, make them part of a row group -->
+ <p>If <span><var title="">y<sub
+ title="">max</sub></var>&nbsp;&ge;&nbsp;<var title="">y<sub
+ title="">start</sub></var></span>, then let all the last <a href="#row0"
+ title=concept-row>rows</a> in <var title="">the table</var> from
+ <span>y=<var title="">y<sub title="">start</sub></var></span> to
+ <span>y=<var title="">y<sub title="">max</sub></var></span> form a new
+ <a href="#row-group" title=concept-row-group>row group</a>, anchored at
+ the slot with coordinate (1, <var title="">y<sub
+ title="">start</sub></var>), with height <var title="">y<sub
+ title="">max</sub></var>-<var title="">y<sub
+ title="">start</sub></var>+1, corresponding to the <var title="">current
+ element</var>.</p>
+
+ <li>
+ <p>Run the <a href="#algorithm">algorithm for ending a row group</a>
+ again.</p>
+
+ <li>
+ <p>Return to step 12 (rows).</p>
+ </ol>
+
+ <p>The <dfn id=algorithm>algorithm for ending a row group</dfn>, which is
+ invoked by the set of steps above when starting and eding a block of rows,
+ is:
+
+ <ol>
+ <li>
+ <p>If <var title="">y<sub title="">current</sub></var> is less than <var
+ title="">y<sub title="">max</sub></var>, then this is a <a
+ href="#table2">table model error</a>.</p>
+
+ <li>
+ <p>While <var title="">y<sub title="">current</sub></var> is less than
+ <var title="">y<sub title="">max</sub></var>, follow these steps:</p>
+
+ <ol>
+ <li>
+ <p>Increase <var title="">y<sub title="">current</sub></var> by 1.</p>
+
+ <li>
+ <p>Run the <a href="#algorithm1">algorithm for growing downward-growing
+ cells</a>.</p>
+ </ol>
+
+ <li>
+ <p>Empty the <var title="">list of downward-growing cells</var>.</p>
+ </ol>
+
+ <p>The <dfn id=algorithm0>algorithm for processing rows</dfn>, which is
+ invoked by the set of steps above for processing <code><a
+ href="#tr">tr</a></code> elements, is:
+
+ <ol>
+ <li>
+ <p>Increase <var title="">y<sub title="">current</sub></var> by 1.</p>
+ <!-- ymax is increased below once we know cell dimensions -->
+
+ <li>
+ <p>Run the <a href="#algorithm1">algorithm for growing downward-growing
+ cells</a>.</p>
+
+ <li>
+ <p>Let <var title="">x<sub title="">current</sub></var> be 1.</p>
+ <!-- xmax is increased below once we know cell dimensions -->
+
+ <li>
+ <p>If the <code><a href="#tr">tr</a></code> element being processed
+ contains no <code><a href="#td">td</a></code> or <code><a
+ href="#th">th</a></code> elements, then abort this set of steps and
+ return to the algorithm above.</p>
+
+ <li>
+ <p>Let <var title="">current cell</var> be the first <code><a
+ href="#td">td</a></code> or <code><a href="#th">th</a></code> element in
+ the <code><a href="#tr">tr</a></code> element being processed.</p>
+
+ <li>
+ <p><em>Cells.</em> While <var title="">x<sub title="">current</sub></var>
+ is less than or equal to <var title="">x<sub title="">max</sub></var>
+ and the slot with coordinate (<var title="">x<sub
+ title="">current</sub></var>, <var title="">y<sub
+ title="">current</sub></var>) already has a cell assigned to it,
+ increase <var title="">x<sub title="">current</sub></var> by 1.</p>
+
+ <li>
+ <p>If <var title="">x<sub title="">current</sub></var> is greater than
+ <var title="">x<sub title="">max</sub></var>, increase <var
+ title="">x<sub title="">max</sub></var> by 1 (which will make them
+ equal).</p>
+
+ <li>
+ <p>If the <var title="">current cell</var> has a <code
+ title="">colspan</code> attribute, then <span title="rules for parsing
+ non-negative integer values">parse that attribute's value</span>, and
+ let <var title="">colspan</var> be the result.</p>
+
+ <p>If parsing that value failed, or returned zero, or if the attribute is
+ absent, then let <var title="">colspan</var> be 1, instead.</p>
+
+ <li>
+ <p>If the <var title="">current cell</var> has a <code
+ title="">rowspan</code> attribute, then <span title="rules for parsing
+ non-negative integer values">parse that attribute's value</span>, and
+ let <var title="">rowspan</var> be the result.</p>
+
+ <p>If parsing that value failed or if the attribute is absent, then let
+ <var title="">rowspan</var> be 1, instead.</p>
+
+ <li>
+ <p>If <var title="">rowspan</var> is zero, then let <var title="">cell
+ grows downward</var> be true, and set <var title="">rowspan</var> to 1.
+ Otherwise, let <var title="">cell grows downward</var> be false.</p>
+
+ <li>
+ <p>If <span><var title="">x<sub
+ title="">max</sub></var>&nbsp;&lt;&nbsp;<var title="">x<sub
+ title="">current</sub></var>+<var title="">colspan</var>-1</span>, then
+ let <var title="">x<sub title="">max</sub></var> be <var title="">x<sub
+ title="">current</sub></var>+<var title="">colspan</var>-1.</p>
+
+ <li>
+ <p>If <span><var title="">y<sub
+ title="">max</sub></var>&nbsp;&lt;&nbsp;<var title="">y<sub
+ title="">current</sub></var>+<var title="">rowspan</var>-1</span>, then
+ let <var title="">y<sub title="">max</sub></var> be <var title="">y<sub
+ title="">current</sub></var>+<var title="">rowspan</var>-1.</p>
+
+ <li>
+ <p>Let the slots with coordinates (<var title="">x</var>, <var
+ title="">y</var>) such that <span><var title="">x<sub
+ title="">current</sub></var>&nbsp;&le;&nbsp;<var
+ title="">x</var>&nbsp;&lt;&nbsp;<var title="">x<sub
+ title="">current</sub></var>+<var title="">colspan</var></span> and
+ <span><var title="">y<sub
+ title="">current</sub></var>&nbsp;&le;&nbsp;<var
+ title="">y</var>&nbsp;&lt;&nbsp;<var title="">y<sub
+ title="">current</sub></var>+<var title="">rowspan</var></span> be
+ covered by a new <a href="#cell" title=concept-cell>cell</a> <var
+ title="">c</var>, anchored at (<var title="">x<sub
+ title="">current</sub></var>, <var title="">y<sub
+ title="">current</sub></var>), which has width <var
+ title="">colspan</var> and height <var title="">rowspan</var>,
+ corresponding to the <var title="">current cell</var> element.</p>
+
+ <p>If the <var title="">current cell</var> element is a <code><a
+ href="#th">th</a></code> element, let this new cell <var
+ title="">c</var> be a header cell; otherwise, let it be a data cell. To
+ establish what header cells apply to a data cell, use the <a
+ href="#algorithm2">algorithm for assigning header cells to data
+ cells</a> described in the next section.</p>
+
+ <p>If any of the slots involved already had a <a href="#cell"
+ title=concept-cell>cell</a> covering them, then this is a <a
+ href="#table2">table model error</a>. Those slots now have two cells
+ overlapping.</p>
+
+ <li>
+ <p>If <var title="">cell grows downward</var> is true, then add the tuple
+ {<var title="">c</var>, <var title="">x<sub
+ title="">current</sub></var>, <var title="">colspan</var>} to the <var
+ title="">list of downward-growing cells</var>.</p>
+
+ <li>
+ <p>Increase <var title="">x<sub title="">current</sub></var> by <var
+ title="">colspan</var>.</p>
+
+ <li>
+ <p>If <var title="">current cell</var> is the last <code><a
+ href="#td">td</a></code> or <code><a href="#th">th</a></code> element in
+ the <code><a href="#tr">tr</a></code> element being processed, then
+ abort this set of steps and return to the algorithm above.</p>
+
+ <li>
+ <p>Let <var title="">current cell</var> be the next <code><a
+ href="#td">td</a></code> or <code><a href="#th">th</a></code> element in
+ the <code><a href="#tr">tr</a></code> element being processed.</p>
+
+ <li>
+ <p>Return to step 5 (cells).</p>
+ </ol>
+
+ <p>The <dfn id=algorithm1>algorithm for growing downward-growing
+ cells</dfn>, used when adding a new row, is as follows:
+
+ <ol>
+ <li>
+ <p>If the <var title="">list of downward-growing cells</var> is empty, do
+ nothing. Abort these steps; return to the step that invoked this
+ algorithm.</p>
+
+ <li>
+ <p>Otherwise, if <var title="">y<sub title="">max</sub></var> is less
+ than <var title="">y<sub title="">current</sub></var>, then increase
+ <var title="">y<sub title="">max</sub></var> by 1 (this will make it
+ equal to <var title="">y<sub title="">current</sub></var>).</p>
+
+ <li>
+ <p>For each {<var title="">cell</var>, <var title="">cell<sub
+ title="">x</sub></var>, <var title="">width</var>} tuple in the <var
+ title="">list of downward-growing cells</var>, extend the <a
+ href="#cell" title=concept-cell>cell</a> <var title="">cell</var> so
+ that it also covers the slots with coordinates (<var title="">x</var>,
+ <var title="">y<sub title="">current</sub></var>), where <span><var
+ title="">cell<sub title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">x</var>&nbsp;&lt;&nbsp;<var title="">cell<sub
+ title="">x</sub></var>+<var title="">width</var>-1</span>.</p>
+ </ol>
+
+ <p>If, after establishing which elements correspond to which slots, there
+ exists a <a href="#column" title=concept-column>column</a> in the <a
+ href="#table1" title=concept-table>table</a> containing only <span
+ title=concept-slot>slots</span> that do not have a <a href="#cell"
+ title=concept-cell>cell</a> anchored to them, then this is a <a
+ href="#table2">table model error</a>.
+
+ <h5 id=header-and-data-cell-semantics><span class=secno>3.15.11.2.
+ </span>Forming relationships between data cells and header cells</h5>
+
+ <p>Each data cell can be assigned zero or more header cells. The <dfn
+ id=algorithm2>algorithm for assigning header cells to data cells</dfn> is
+ as follows.
+
+ <p>For each header cell in the table, in <a href="#tree-order">tree
+ order</a>:
+
+ <ol>
+ <li>
+ <p>Let (<var title="">header<sub title="">x</sub></var>, <var
+ title="">header<sub title="">y</sub></var>) be the coordinate of the
+ slot to which the header cell is anchored.</p>
+
+ <li>
+ <p>Examine the <code title=attr-th-scope><a
+ href="#scope0">scope</a></code> attribute of the <code><a
+ href="#th">th</a></code> element corresponding to the header cell, and,
+ based on its state, apply the appropriate substep:</p>
+
+ <dl class=switch>
+ <dt>If it is in the <em title=attr-th-scope-row><a
+ href="#row">row</a></em> state
+
+ <dd>
+ <p>Assign the header cell to any data cells anchored at slots with
+ coordinates (<var title="">data<sub title="">x</sub></var>, <var
+ title="">data<sub title="">y</sub></var>) where <span><var
+ title="">header<sub title="">x</sub></var>&nbsp;&lt;&nbsp;<var
+ title="">data<sub title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">x<sub title="">max</sub></var></span> and <span><var
+ title="">data<sub title="">y</sub></var>&nbsp;=&nbsp;<var
+ title="">header<sub title="">y</sub></var></span>.</p>
+
+ <dt>If it is in the <em title=attr-th-scope-col><a
+ href="#col0">column</a></em> state
+
+ <dd>
+ <p>Assign the header cell to any data cells anchored at slots with
+ coordinates (<var title="">data<sub title="">x</sub></var>, <var
+ title="">data<sub title="">y</sub></var>) where <span><var
+ title="">data<sub title="">x</sub></var>&nbsp;=&nbsp;<var
+ title="">header<sub title="">x</sub></var></span> and <span><var
+ title="">header<sub title="">y</sub></var>&nbsp;&lt;&nbsp;<var
+ title="">data<sub title="">y</sub></var>&nbsp;&le;&nbsp;<var
+ title="">y<sub title="">max</sub></var></span>.</p>
+
+ <dt>If it is in the <em title=attr-th-scope-rowgroup><a
+ href="#rowgroup">row group</a></em> state
+
+ <dd>
+ <p>If the header cell is not in a <a href="#row-group"
+ title=concept-row-group>row group</a>, then don't assign the header
+ cell to any data cells.</p>
+
+ <p>Otherwise, let (1, <var title="">group<sub title="">y</sub></var>)
+ be the slot at which the row group is anchored, let <var
+ title="">height</var> be the number of rows in the row group, and
+ assign the header cell to any data cells anchored at slots with
+ coordinates (<var title="">data<sub title="">x</sub></var>, <var
+ title="">data<sub title="">y</sub></var>) where <span><var
+ title="">header<sub title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">data<sub title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">x<sub title="">max</sub></var></span> and <span><var
+ title="">header<sub title="">y</sub></var>&nbsp;&le;&nbsp;<var
+ title="">data<sub title="">y</sub></var>&nbsp;&lt;&nbsp;<var
+ title="">group<sub title="">y</sub></var>+<var
+ title="">height</var></span>.</p>
+
+ <dt>If it is in the <em title=attr-th-scope-colgroup><a
+ href="#colgroup0">column group</a></em> state
+
+ <dd>
+ <p>If the header cell is not in a <a href="#column0"
+ title=concept-column-group>column group</a>, then don't assign the
+ header cell to any data cells.</p>
+
+ <p>Otherwise, let (<var title="">group<sub title="">x</sub></var>, 1)
+ be the slot at which the column group is anchored, let <var
+ title="">width</var> be the number of columns in the column group, and
+ assign the header cell to any data cells anchored at slots with
+ coordinates (<var title="">data<sub title="">x</sub></var>, <var
+ title="">data<sub title="">y</sub></var>) where <span><var
+ title="">header<sub title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">data<sub title="">x</sub></var>&nbsp;&lt;&nbsp;<var
+ title="">group<sub title="">x</sub></var>+<var
+ title="">width</var></span> and <span><var title="">header<sub
+ title="">y</sub></var>&nbsp;&le;&nbsp;<var title="">data<sub
+ title="">y</sub></var>&nbsp;&le;&nbsp;<var title="">y<sub
+ title="">max</sub></var></span>.</p>
+
+ <dt>Otherwise, it is in the <em title="">auto</em> state
+
+ <dd>
+ <p>If the header cell is not in the first row of the table, or not in
+ the first cell of a row, then don't assign the header cell to any data
+ cells.</p>
+
+ <p>Otherwise, if the header cell is in the first row of the table,
+ assign the header cell to any data cells anchored at slots with
+ coordinates (<var title="">data<sub title="">x</sub></var>, <var
+ title="">data<sub title="">y</sub></var>) where <span><var
+ title="">data<sub title="">x</sub></var>&nbsp;=&nbsp;<var
+ title="">header<sub title="">x</sub></var></span> and <span><var
+ title="">header<sub title="">y</sub></var>&nbsp;&lt;&nbsp;<var
+ title="">data<sub title="">y</sub></var>&nbsp;&le;&nbsp;<var
+ title="">y<sub title="">max</sub></var></span>.</p>
+
+ <p>Otherwise, the header cell is in the first column of the table;
+ assign the header cell to any data cells anchored at slots with
+ coordinates (<var title="">data<sub title="">x</sub></var>, <var
+ title="">data<sub title="">y</sub></var>) where <span><var
+ title="">header<sub title="">x</sub></var>&nbsp;&lt;&nbsp;<var
+ title="">data<sub title="">x</sub></var>&nbsp;&le;&nbsp;<var
+ title="">x<sub title="">max</sub></var></span> and <span><var
+ title="">data<sub title="">y</sub></var>&nbsp;=&nbsp;<var
+ title="">header<sub title="">y</sub></var></span>.</p>
+ </dl>
+ </ol>
+
+ <h3 id=forms><span class=secno>3.16. </span>Forms</h3>
+ <!-- XXX everything in WF2 -->
+
+ <p class=big-issue>This section will contain definitions of the
+ <code>form</code> element and so forth.
+
+ <p class=big-issue>This section will be a rewrite of the HTML4 Forms and
+ Web Forms 2.0 specifications, with hopefully no normative changes.</p>
+ <!-- From HTML4: BUTTON FIELDSET FORM INPUT LABEL OPTGROUP OPTION
+ SELECT TEXTAREA -->
+
+ <h4 id=the-form><span class=secno>3.16.1. </span>The <code>form</code>
+ element</h4>
+
+ <h4 id=the-fieldset><span class=secno>3.16.2. </span>The
+ <code>fieldset</code> element</h4>
+
+ <h4 id=the-input><span class=secno>3.16.3. </span>The <code>input</code>
+ element</h4>
+
+ <h4 id=the-button><span class=secno>3.16.4. </span>The <code>button</code>
+ element</h4>
+
+ <h4 id=the-label><span class=secno>3.16.5. </span>The <code>label</code>
+ element</h4>
+
+ <h4 id=the-select><span class=secno>3.16.6. </span>The <code>select</code>
+ element</h4>
+
+ <h4 id=the-datalist><span class=secno>3.16.7. </span>The
+ <code>datalist</code> element</h4>
+
+ <h4 id=the-optgroup><span class=secno>3.16.8. </span>The
+ <code>optgroup</code> element</h4>
+
+ <h4 id=the-option><span class=secno>3.16.9. </span>The <code>option</code>
+ element</h4>
+
+ <h4 id=the-textarea><span class=secno>3.16.10. </span>The
+ <code>textarea</code> element</h4>
+
+ <h4 id=the-output><span class=secno>3.16.11. </span>The <code>output</code>
+ element</h4>
+
+ <h4 id=processing0><span class=secno>3.16.12. </span>Processing model</h4>
+
+ <p class=big-issue>See <a
+ href="http://www.whatwg.org/specs/web-forms/current-work/#extend-form-controls">WF2</a>
+ for now
+
+ <h5 id=form-submission><span class=secno>3.16.12.1. </span>Form submission</h5>
+
+ <p class=big-issue>See <a
+ href="http://www.whatwg.org/specs/web-forms/current-work/#form-submission">WF2</a>
+ for now
+
+ <h3 id=scripting0><span class=secno>3.17. </span>Scripting</h3>
+
+ <h4 id=script><span class=secno>3.17.1. </span>The <dfn
+ id=script0><code>script</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, <a href="#strictly">strictly inline-level content</a>, and <a
+ href="#metadata" title="metadata elements">metadata element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element.
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#inline-level0">inline-level content</a> is expected.
+
+ <dt>Content model:
+
+ <dd>If there is no <code title=attr-script-src><a
+ href="#src9">src</a></code> attribute, depends on the value of the <code
+ title=attr-script-type><a href="#type11">type</a></code> attribute.
+
+ <dd>If there <em>is</em> a <code title=attr-script-src><a
+ href="#src9">src</a></code> attribute, the element must be empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-script-src><a href="#src9">src</a></code>
+
+ <dd><code title=attr-script-defer><a href="#defer">defer</a></code>
+
+ <dd><code title=attr-script-async><a href="#async">async</a></code>
+
+ <dd><code title=attr-script-type><a href="#type11">type</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlscriptelement>HTMLScriptElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <code title=dom-script-src><a href="#src10">src</a></code>;
+ attribute boolean <code title=dom-script-defer><a href="#defer0">defer</a></code>;
+ attribute boolean <code title=dom-script-async><a href="#async0">async</a></code>;
+ attribute DOMString <code title=dom-script-type><a href="#type12">type</a></code>;
+ attribute DOMString <code title=dom-script-text><a href="#text0">text</a></code>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#script0">script</a></code> element allows authors to
+ include dynamic script in their documents.
+
+ <p>When the <dfn id=src9 title=attr-script-src><code>src</code></dfn>
+ attribute is set, the <code><a href="#script0">script</a></code> element
+ refers to an external file. The value of the attribute must be a URI (or
+ IRI).
+
+ <p>If the <code title=attr-script-src><a href="#src9">src</a></code>
+ attribute is not set, then the script is given by the contents of the
+ element.
+
+ <p>The language of the script may be given by the <dfn id=type11
+ title=attr-script-type><code>type</code></dfn> attribute. If the attribute
+ is present, its value must be a valid MIME type, optionally with
+ parameters. <a href="#refsRFC2046">[RFC2046]</a>
+
+ <p>The <dfn id=defer title=attr-script-defer><code>defer</code></dfn> and
+ <dfn id=async title=attr-script-async><code>async</code></dfn> attributes
+ are <a href="#boolean0" title="boolean attribute">boolean attributes</a>
+ that indicate how the script should be executed.
+
+ <p>There are three possible modes that can be selected using these
+ attributes. If the <code title=attr-script-defer><a
+ href="#defer">defer</a></code> attribute is present, then the script is
+ executed when the page has finished parsing. If the <code
+ title=attr-script-defer><a href="#defer">defer</a></code> attribute is not
+ present but the <code title=attr-script-async><a
+ href="#async">async</a></code> attribute is present, then the script will
+ be executed asynchronously, as soon as it is available. If neither
+ attribute is present, then the script is downloaded and executed
+ immediately, before the user agent continues parsing the page. The exact
+ processing details for these attributes is described below.
+
+ <p>The <code title=attr-script-async><a href="#async">async</a></code>
+ attribute must not be specified if the <code title=attr-script-defer><a
+ href="#defer">defer</a></code> attribute is specified.
+
+ <p>Changing the <code title=attr-script-src><a href="#src9">src</a></code>,
+ <code title=attr-script-type><a href="#type11">type</a></code>, <code
+ title=attr-script-defer><a href="#defer">defer</a></code> and <code
+ title=attr-script-async><a href="#async">async</a></code> attributes
+ dynamically has no direct effect; these attribute are only used at
+ specific times described below (namely, when the element is inserted into
+ the document).
+
+ <p><code><a href="#script0">script</a></code> elements have three
+ associated pieces of metadata. The first is a flag indicating whether or
+ not the script block has been <dfn id=already>"already executed"</dfn>.
+ Initially, <code><a href="#script0">script</a></code> elements must have
+ this flag unset (script blocks, when created, are not "already executed").
+ When a <code><a href="#script0">script</a></code> element is cloned, the
+ "already executed" flag, if set, must be propagated to the clone when it
+ is created. The second is a flag indicating whether the element was <dfn
+ id=parser-inserted>"parser-inserted"</dfn>. This flag is set by the <a
+ href="#html-0">HTML parser</a> and is used to handle <code
+ title=dom-document-write-HTML><a
+ href="#document.write0">document.write()</a></code> calls. The third piece
+ of metadata is <dfn id=the-scripts><var>the script's type</var></dfn>. It
+ is determined when the script is run, based on the attributes on the
+ element at that time.
+
+ <p><dfn id=running0 title="running a script">Running a script</dfn>: when a
+ script block is <span>inserted into a document</span>, the user agent must
+ act as follows:
+
+ <ol>
+ <li>
+ <p>If the <code><a href="#script0">script</a></code> element has a <code
+ title=attr-script-type><a href="#type11">type</a></code> attribute but
+ its value is the empty string, or if the <code><a
+ href="#script0">script</a></code> element has no <code
+ title=attr-script-type><a href="#type11">type</a></code> attribute but
+ it has a <code title=attr-script-language>language</code> attribute, and
+ <em>that</em> attribute's value is the empty string, let <var><a
+ href="#the-scripts">the script's type</a></var> for this <code><a
+ href="#script0">script</a></code> element be "<code
+ title="">text/javascript</code>".</p>
+
+ <p>Otherwise, if the <code><a href="#script0">script</a></code> element
+ has a <code title=attr-script-type><a href="#type11">type</a></code>
+ attribute, let <var><a href="#the-scripts">the script's type</a></var>
+ for this <code><a href="#script0">script</a></code> element be the value
+ of that attribute.</p>
+
+ <p>Otherwise, if the element has a <code
+ title=attr-script-language>language</code> attribute, let <var><a
+ href="#the-scripts">the script's type</a></var> for this <code><a
+ href="#script0">script</a></code> element be the concatenation of the
+ string "<code title="">text/</code>" followed by the value of the <code
+ title=attr-script-language>language</code> attribute.</p>
+
+ <li>
+ <p>If <a href="#scripting1">scripting is disabled</a>, or if the
+ <code>Document</code> has <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> enabled, or if the user agent
+ does not <a href="#support">support the scripting language</a> given by
+ <var><a href="#the-scripts">the script's type</a></var> for this
+ <code><a href="#script0">script</a></code> element, or if the <code><a
+ href="#script0">script</a></code> element has its <a
+ href="#already">"already executed"</a> flag set, then the user agent
+ must abort these steps at this point. The script is not executed.</p>
+
+ <li>
+ <p>The user agent must set the element's <a href="#already">"already
+ executed"</a> flag.</p>
+
+ <li>
+ <p>If the element has a <code title=attr-script-src><a
+ href="#src9">src</a></code> attribute, then a load for the specified
+ content must be started.</p>
+
+ <p class=note>Later, once the load has completed, the user agent will
+ have to complete <a href="#when-a" title="when a script completes
+ loading">the steps described below</a>.</p>
+
+ <p>For performance reasons, user agents may start loading the script as
+ soon as the attribute is set, instead, in the hope that the element will
+ be inserted into the document. Either way, once the element is inserted
+ into the document, the load must have started. If the UA performs such
+ prefetching, but the element is never inserted in the document, or the
+ <code title=attr-script-src><a href="#src9">src</a></code> attribute is
+ dynamically changed, then the user agent will not execute the script,
+ and the load will have been effectively wasted.</p>
+
+ <li>
+ <p>Then, the first of the following options that describes the situation
+ must be followed:</p>
+
+ <dl class=switch>
+ <dt>If the document is still being parsed, and the element has a <code
+ title=attr-script-defer><a href="#defer">defer</a></code> attribute
+
+ <dd>The element must be added to the end of the <a href="#list-of">list
+ of scripts that will execute when the document has finished
+ parsing</a>. The user agent must begin <a href="#when-a" title="when a
+ script completes loading">the next set of steps</a> when the script is
+ ready. <span class=big-issue>This isn't compatible with IE for inline
+ deferred scripts, but then what IE does is pretty hard to pin down
+ exactly. Do we want to keep this like it is? Be more compatible?</span>
+ <!--XXX
+ http://www.websiteoptimization.com/speed/tweak/defer/test/
+ internal deferred scripts execute before any external scripts execute, or before the LOAD if there are none
+ external deferred scripts execute before the LOAD
+ -->
+
+
+ <dt>If the element has an <code title=attr-script-async><a
+ href="#async">async</a></code> attribute and a <code
+ title=attr-script-src><a href="#src9">src</a></code> attribute
+
+ <dd>The element must be added to the end of the <a href="#list-of0">list
+ of scripts that will execute asynchronously</a>. The user agent must
+ jump to <a href="#when-a" title="when a script completes loading">the
+ next set of steps</a> once the script is ready.
+
+ <dt>If the element has an <code title=attr-script-async><a
+ href="#async">async</a></code> attribute but no <code
+ title=attr-script-src><a href="#src9">src</a></code> attribute, and the
+ <a href="#list-of0">list of scripts that will execute
+ asynchronously</a> is not empty
+
+ <dd>The element must be added to the end of the <a href="#list-of0">list
+ of scripts that will execute asynchronously</a>.
+
+ <dt>If the element has a <code title=attr-script-src><a
+ href="#src9">src</a></code> attribute and has been flagged as <a
+ href="#parser-inserted">"parser-inserted"</a>
+
+ <dd>The element is <a href="#the-script">the script that will execute as
+ soon as the parser resumes</a>. (There can only be one such script at a
+ time.)
+
+ <dt>If the element has a <code title=attr-script-src><a
+ href="#src9">src</a></code> attribute
+
+ <dd>The element must be added to the end of the <a href="#list-of1">list
+ of scripts that will execute as soon as possible</a>. The user agent
+ must jump to <a href="#when-a" title="when a script completes
+ loading">the next set of steps</a> when the script is ready.
+
+ <dt>Otherwise
+
+ <dd>The user agent must immediately <a href="#executing0"
+ title="executing a script block">execute the script</a>, even if other
+ scripts are already executing.
+ </dl>
+ </ol>
+
+ <p><dfn id=when-a title="when a script completes loading">When a script
+ completes loading</dfn>: If a script whose element was added to one of the
+ lists mentioned above completes loading while the document is still being
+ parsed, then the parser handles it. Otherwise, when a script completes
+ loading, the UA must follow the following steps as soon as as any other
+ scripts that may be executing have finished executing:
+
+ <dl class=switch>
+ <dt>If the script's element was added to the <dfn id=list-of>list of
+ scripts that will execute when the document has finished parsing</dfn>:
+
+ <dd>
+ <ol>
+ <li>
+ <p>If the script's element is not the first element in the list, then
+ do nothing yet. Stop going through these steps.</p>
+
+ <li>
+ <p>Otherwise, <a href="#executing0" title="executing a script
+ block">execute the script</a> (that is, the script associated with the
+ first element in the list).</p>
+
+ <li>
+ <p>Remove the script's element from the list (i.e. shift out the first
+ entry in the list).</p>
+
+ <li>
+ <p>If there are any more entries in the list, and if the script
+ associated with the element that is now the first in the list is
+ already loaded, then jump back to step two to execute it.</p>
+ </ol>
+
+ <dt>If the script's element was added to the <dfn id=list-of0>list of
+ scripts that will execute asynchronously</dfn>:
+
+ <dd>
+ <ol>
+ <li>
+ <p>If the script is not the first element in the list, then do nothing
+ yet. Stop going through these steps.</p>
+
+ <li>
+ <p><a href="#executing0" title="executing a script block">Execute the
+ script</a> (the script associated with the first element in the list).</p>
+
+ <li>
+ <p>Remove the script's element from the list (i.e. shift out the first
+ entry in the list).</p>
+
+ <li>
+ <p>If there are any more scripts in the list, and the element now at
+ the head of the list had no <code title=attr-script-src><a
+ href="#src9">src</a></code> attribute when it was added to the list,
+ or had one, but its associated script has finished loading, then jump
+ back to step two to execute the script associated with this element.</p>
+ </ol>
+
+ <dt>If the script's element was added to the <dfn id=list-of1>list of
+ scripts that will execute as soon as possible</dfn>:
+
+ <dd>
+ <ol>
+ <li>
+ <p><a href="#executing0" title="executing a script block">Execute the
+ script</a>.</p>
+
+ <li>
+ <p>Remove the script's element from the list.</p>
+ </ol>
+
+ <dt>If the script is <dfn id=the-script>the script that will execute as
+ soon as the parser resumes</dfn>:
+
+ <dd>
+ <p>The script will be handled <a href="#scriptTagParserResumes">when the
+ parser resumes</a> (amazingly enough).</p>
+ </dl>
+
+ <p>The download of an external script must <a href="#delays">delay the
+ <code title=event-load>load</code> event</a>.
+
+ <p><dfn id=executing0 title="executing a script block">Executing a script
+ block</dfn>: If the load resulted in an error (for example a DNS error, or
+ an HTTP 404 error), then executing the script must just consist of <a
+ href="#firing5" title="fire an error event">firing an <code
+ title=event-error>error</code> event</a> at the element.
+
+ <p>If the load was successful, then first the user agent must <a
+ href="#firing4">fire a <code title=event-load>load</code> event</a> at the
+ element, and then, if <a href="#scripting2">scripting is enabled</a>, and
+ the <code>Document</code> does not have <code
+ title=dom-document-designMode><a href="#designMode">designMode</a></code>
+ enabled, and the <code>Document</code> is the <a href="#active">active
+ document</a> in its <a href="#browsing0">browsing context</a>, the user
+ agent must execute the script:
+
+ <p>If the script is from an external file, then that file must be used as
+ the file to execute.
+
+ <p>If the script is inline, then, for scripting languages that consist of
+ pure text, user agents must use the value of the DOM <code
+ title=dom-script-text><a href="#text0">text</a></code> attribute (defined
+ below) as the script to execute, and for XML-based scripting languages,
+ user agents must use all the child nodes of the <code><a
+ href="#script0">script</a></code> element as the script to execute.
+
+ <p>In any case, the user agent must execute the script according to the
+ semantics defined by the language associated with <var><a
+ href="#the-scripts">the script's type</a></var> (see the <a
+ href="#scriptingLanguages">scripting languages</a> section below).
+
+ <p>Scripts must be executed in the scope of the <a
+ href="#browsing0">browsing context</a> of the element's
+ <code>Document</code>.
+
+ <p class=note>The element's attributes' values might have changed between
+ when the element was inserted into the document and when the script has
+ finished loading, as may its other attributes; similarly, the element
+ itself might have been taken back out of the DOM, or had other changes
+ made. These changes do not in any way affect the above steps; only the
+ values of the attributes at the time the <code><a
+ href="#script0">script</a></code> element is first inserted into the
+ document matter.
+
+ <p>The DOM attributes <dfn id=src10
+ title=dom-script-src><code>src</code></dfn>, <dfn id=type12
+ title=dom-script-type><code>type</code></dfn>, <dfn id=defer0
+ title=dom-script-defer><code>defer</code></dfn>, <dfn id=async0
+ title=dom-script-async><code>async</code></dfn>, each must <a
+ href="#reflect">reflect</a> the respective content attributes of the same
+ name.
+
+ <p>The DOM attribute <dfn id=text0
+ title=dom-script-text><code>text</code></dfn> must return a concatenation
+ of the contents of all the <a href="#text-node" title="text node">text
+ nodes</a> that are direct children of the <code><a
+ href="#script0">script</a></code> element (ignoring any other nodes such
+ as comments or elements), in tree order. On setting, it must act the same
+ way as the <code><a href="#textcontent">textContent</a></code> DOM
+ attribute.
+
+ <h5 id=scriptingLanguages><span class=secno>3.17.1.1. </span>Scripting
+ languages</h5>
+
+ <p>A user agent is said to <dfn id=support>support the scripting
+ language</dfn> if <var><a href="#the-scripts">the script's type</a></var>
+ matches the MIME type of a scripting language that the user agent
+ implements.
+
+ <p>The following lists some MIME types and the languages to which they
+ refer:
+
+ <dl>
+ <dt><code>text/javascript</code>
+
+ <dd>ECMAScript. <a href="#refsECMA262">[ECMA262]</a>
+
+ <dt><code>text/javascript;e4x=1</code>
+
+ <dd>ECMAScript with ECMAScript for XML. <a
+ href="#refsECMA357">[ECMA357]</a>
+ </dl>
+
+ <p>User agents may support other MIME types and other languages.
+
+ <p>When examining types to determine if they support the language, user
+ agents must not ignore unknown MIME parameters &mdash; types with unknown
+ parameters must be assumed to be unsupported.</p>
+ <!--
+ XXX we should reference #refsRFC4329 http://www.ietf.org/rfc/rfc4329
+ -->
+
+ <h4 id=the-noscript><span class=secno>3.17.2. </span>The <dfn
+ id=noscript><code>noscript</code></dfn> element</h4>
+
+ <p>When <a href="#scripting1">scripting is disabled</a>: <a
+ href="#transparent0">transparent</a> <a href="#block-level0"
+ title="block-level elements">block-level element</a>, and <a
+ href="#transparent0">transparent</a> <a href="#strictly">strictly
+ inline-level content</a>.
+
+ <p>When <a href="#scripting2">scripting is enabled</a>: <a
+ href="#block-level0" title="block-level elements">block-level element</a>,
+ and <a href="#strictly">strictly inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element of an <a
+ href="#html-" title=">HTML documents">HTML document</a>, if there are no
+ ancestor <code><a href="#noscript">noscript</a></code> elements.
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected in
+ <a href="#html-">HTML documents</a>, if there are no ancestor <code><a
+ href="#noscript">noscript</a></code> elements.
+
+ <dd>Where <a href="#inline-level0">inline-level content</a> is expected in
+ <a href="#html-">HTML documents</a>, if there are no ancestor <code><a
+ href="#noscript">noscript</a></code> elements.
+
+ <dt>Content model:
+
+ <dd>When <a href="#scripting1">scripting is disabled</a>: <a
+ href="#transparent0">transparent</a>, but there must be no <code><a
+ href="#noscript">noscript</a></code> element descendants.
+
+ <dd>When <a href="#scripting2">scripting is enabled</a>: Text that
+ conforms to the requirements given in the prose.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#noscript">noscript</a></code> element does not
+ represent anything. It is used to present different markup to user agents
+ that support scripting and those that don't support scripting, by
+ affecting how the document is parsed.
+
+ <p>The <code><a href="#noscript">noscript</a></code> element must not be
+ used in <a href="#xml-documents">XML documents</a>.
+
+ <p>When used in <a href="#html-">HTML documents</a>, the allowed content
+ model depends on whether scripting is enabled or not.
+
+ <p>If <a href="#scripting1">scripting is disabled</a>, then the content
+ model of a <code><a href="#noscript">noscript</a></code> element is <a
+ href="#transparent0">transparent</a>, with the additional restriction that
+ a <code><a href="#noscript">noscript</a></code> element must not have a
+ <code><a href="#noscript">noscript</a></code> element as an ancestor (that
+ is, <code><a href="#noscript">noscript</a></code> can't be nested).
+
+ <p>If <a href="#scripting2">scripting is enabled</a>, then the content
+ model of a <code><a href="#noscript">noscript</a></code> element is text,
+ except that the text must be such that running the following algorithm
+ results in a conforming document with no <code><a
+ href="#noscript">noscript</a></code> elements and no <code><a
+ href="#script0">script</a></code> elements, and such that no step in the
+ algorithm causes an <a href="#html-0">HTML parser</a> to flag a <a
+ href="#parse">parse error</a>:
+
+ <ol>
+ <li>Remove every <code><a href="#script0">script</a></code> element from
+ the document.
+
+ <li>Make a list of every <code><a href="#noscript">noscript</a></code>
+ element in the document. For every <code><a
+ href="#noscript">noscript</a></code> element in that list, perform the
+ following steps:
+ <ol>
+ <li>Let the <var title="">parent element</var> be the parent element of
+ the <code><a href="#noscript">noscript</a></code> element.
+
+ <li>Take all the children of the <var title="">parent element</var> that
+ come before the <code><a href="#noscript">noscript</a></code> element,
+ and call these elements <var title="">the before children</var>.
+
+ <li>Take all the children of the <var title="">parent element</var> that
+ come <em>after</em> the <code><a href="#noscript">noscript</a></code>
+ element, and call these elements <var title="">the after
+ children</var>.
+
+ <li>Let <var title="">s</var> be the concatenation of all the <a
+ href="#text-node">text node</a> children of the <code><a
+ href="#noscript">noscript</a></code> element.
+
+ <li>Set the <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute of the <var
+ title="">parent element</var> to the value of <var title="">s</var>.
+ (This, as a side-effect, causes the <code><a
+ href="#noscript">noscript</a></code> element to be removed from the
+ document.)
+
+ <li>Insert <var title="">the before children</var> at the start of the
+ <var title="">parent element</var>, preserving their original relative
+ order.
+
+ <li>Insert <var title="">the after children</var> at the end of the <var
+ title="">parent element</var>, preserving their original relative
+ order.
+ </ol>
+ </ol>
+
+ <p>The <code><a href="#noscript">noscript</a></code> element has no other
+ requirements. In particular, children of the <code><a
+ href="#noscript">noscript</a></code> element are not exempt from form
+ submission, scripting, and so forth, even when scripting is enabled.
+
+ <p class=note>All these contortions are required because, for historical
+ reasons, the <code><a href="#noscript">noscript</a></code> element causes
+ the <a href="#html-0">HTML parser</a> to act differently based on whether
+ scripting is enabled or not. The element is not allowed in XML, because in
+ XML the parser is not affected by such state, and thus the element would
+ not have the desired effect.
+
+ <h4 id=the-event-source><span class=secno>3.17.3. </span>The <dfn
+ id=event-source><code>event-source</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, <a href="#strictly">strictly inline-level content</a>, and <a
+ href="#metadata" title="metadata elements">metadata element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element.
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#inline-level0">inline-level content</a> is expected.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-event-source-src><a href="#src11">src</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmleventsourceelement>HTMLEventSourceElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#src12" title=dom-event-source-src>src</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#event-source">event-source</a></code> element
+ represents a target for events generated by a remote server.
+
+ <p>The <dfn id=src11 title=attr-event-source-src><code>src</code></dfn>
+ attribute, if specified, must give a URI (or IRI) pointing to a resource
+ that uses the <code>application/x-dom-event-stream</code> format.
+
+ <p>When the element is inserted into the document, if it has the <code
+ title=attr-event-source-src><a href="#src11">src</a></code> attribute
+ specified, the user agent must act as if the <code
+ title=dom-remoteEventTarget-addEventSource><a
+ href="#addeventsource">addEventSource()</a></code> method on the <code><a
+ href="#event-source">event-source</a></code> element had been invoked with
+ the URI resulting from resolving the <code title=attr-event-source-src><a
+ href="#src11">src</a></code> attribute's value to an absolute URI.
+
+ <p>While the element is in a document, if its <code
+ title=attr-event-source-src><a href="#src11">src</a></code> attribute is
+ mutated, the user agent must act as if first the <code
+ title=dom-remoteEventTarget-removeEventSource><a
+ href="#removeeventsource">removeEventSource()</a></code> method on the
+ <code><a href="#event-source">event-source</a></code> element had been
+ invoked with the URI resulting from resolving the old value of the
+ attribute to an absolute URI, and then as if the <code
+ title=dom-remoteEventTarget-addEventSource><a
+ href="#addeventsource">addEventSource()</a></code> method on the element
+ had been invoked with the URI resulting from resolving the <em>new</em>
+ value of the <code title=attr-event-source-src><a
+ href="#src11">src</a></code> attribute to an absolute URI.
+
+ <p>When the element is removed from the document, if it has the <code
+ title=attr-event-source-src><a href="#src11">src</a></code> attribute
+ specified, or, when the <code title=attr-event-source-src><a
+ href="#src11">src</a></code> attribute is about to be removed, the user
+ agent must act as if the <code
+ title=dom-remoteEventTarget-removeEventSource><a
+ href="#removeeventsource">removeEventSource()</a></code> method on the
+ <code><a href="#event-source">event-source</a></code> element had been
+ invoked with the URI resulting from resolving the <code
+ title=attr-event-source-src><a href="#src11">src</a></code> attribute's
+ value to an absolute URI.
+
+ <p>There can be more than one <code><a
+ href="#event-source">event-source</a></code> element per document, but
+ authors should take care to avoid opening multiple connections to the same
+ server as HTTP recommends a limit to the number of simultaneous
+ connections that a user agent can open per server.</p>
+ <!-- XXX should we make 'load', 'error', 'abort' events fire on this
+ element? -->
+
+ <p>The <dfn id=src12 title=dom-event-source-src><code>src</code></dfn> DOM
+ attribute must reflect the content attribute of the same name.
+
+ <h3 id=interactive><span class=secno>3.18. </span>Interactive elements</h3>
+
+ <h4 id=the-details><span class=secno>3.18.1. </span>The <dfn
+ id=details><code>details</code></dfn> element</h4>
+
+ <p><a href="#interactive1" title="interactive elements">Interactive</a>, <a
+ href="#block-level0" title="block-level elements">block-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>One <code><a href="#legend">legend</a></code> element followed by
+ either one or more <a href="#block-level0">block-level elements</a> or <a
+ href="#inline-level0">inline-level content</a> (but not both).
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-details-open><a href="#open0">open</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmldetailselement>HTMLDetailsElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute boolean <a href="#open1" title=dom-details-open>open</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#details">details</a></code> element represents
+ additional information or controls which the user can obtain on demand.
+
+ <p>The first element child of a <code><a href="#details">details</a></code>
+ element, if it is a <code><a href="#legend">legend</a></code> element,
+ represents the summary of the details.
+
+ <p>If the first element is not a <code><a href="#legend">legend</a></code>
+ element, the UA should provide its own legend (e.g. "Details").
+
+ <p>The <dfn id=open0 title=attr-details-open><code>open</code></dfn>
+ content attribute is a <a href="#boolean0">boolean attribute</a>. If
+ present, it indicates that the details should be shown to the user. If the
+ attribute is absent, the details should not be shown.
+
+ <p>If the attribute is removed, then the details should be hidden. If the
+ attribute is added, the details should be shown.
+
+ <p>The user should be able to request that the details be shown or hidden.
+
+ <p>The <dfn id=open1 title=dom-details-open><code>open</code></dfn>
+ attribute must <a href="#reflect">reflect</a> the <code
+ title=attr-details-open><a href="#open0">open</a></code> content
+ attribute.</p>
+ <!--
+http://mail.gnome.org/archives/usability/2006-June/msg00015.html
+http://developer.apple.com/documentation/UserExperience/Conceptual/OSXHIGuidelines/XHIGControls/chapter_18_section_7.html
+https://www.google.com/base/settings
+-->
+
+ <p class=big-issue>Rendering will be described in the Rendering section in
+ due course. Basically CSS :open and :closed match the element, it's a
+ block-level element by default, and when it matches :closed it renders as
+ if it had an XBL binding attached to it whose template was just
+ <code>&lt;template>&#x25B6;&lt;content
+ includes="legend:first-child"&gt;Details&lt;/content>&lt;/template></code>,
+ and when it's :open it acts as if it had an XBL binding attached to it
+ whose template was just <code>&lt;template>&#x25BC;&lt;content
+ includes="legend:first-child"&gt;Details&lt;/content>&lt;content/>&lt;/template></code>
+ or some such.
+
+ <p class=big-issue>Clicking the legend would make it open/close (and would
+ change the content attribute). Question: Do we want the content attribute
+ to reflect the actual state like this? I think we do, the DOM not
+ reflecting state has been a pain in the neck before. But is it
+ semantically ok?
+
+ <h4 id=datagrid><span class=secno>3.18.2. </span>The <dfn
+ id=datagrid0><code>datagrid</code></dfn> element</h4>
+
+ <p><a href="#interactive1" title="interactive elements">Interactive</a>, <a
+ href="#block-level0" title="block-level elements">block-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected,
+ if there are no ancestor <a href="#interactive1">interactive
+ elements</a>.
+
+ <dt>Content model:
+
+ <dd>Either: Nothing.
+
+ <dd>Or: One or more <a href="#block-level0">block-level elements</a>, the
+ first of which is not a <code><a href="#table">table</a></code> element.
+
+ <dd>Or: A single <code><a href="#table">table</a></code> element.
+
+ <dd>Or: A single <code>select</code> element.
+
+ <dd>Or: A single <code>datalist</code> element.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-datagrid-multiple><a
+ href="#multiple0">multiple</a></code>
+
+ <dd><code title=attr-datagrid-disabled><a
+ href="#disabled3">disabled</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmldatagridelement>HTMLDataGridElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute <a href="#datagriddataprovider">DataGridDataProvider</a> <a href="#data2" title=dom-datagrid-data>data</a>;
+ readonly attribute <a href="#datagridselection">DataGridSelection</a> <a href="#selection0" title=dom-datagrid-selection>selection</a>;
+ attribute boolean <a href="#multiple" title=dom-datagrid-multiple>multiple</a>;
+ attribute boolean <a href="#disabled2" title=dom-datagrid-disabled>disabled</a>;
+ void <a href="#updateeverything" title=dom-datagrid-updateEverything>updateEverything</a>();
+ void <a href="#updaterowschanged" title=dom-datagrid-updateRowsChanged>updateRowsChanged</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long count);
+ void <a href="#updaterowsinserted" title=dom-datagrid-updateRowsInserted>updateRowsInserted</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long count);
+ void <a href="#updaterowsremoved" title=dom-datagrid-updateRowsRemoved>updateRowsRemoved</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long count);
+ void <a href="#updaterowchanged" title=dom-datagrid-updateRowChanged>updateRowChanged</a>(in <a href="#rowspecification">RowSpecification</a> row);
+ void <a href="#updatecolumnchanged" title=dom-datagrid-updateColumnChanged>updateColumnChanged</a>(in unsigned long column);
+ void <a href="#updatecellchanged" title=dom-datagrid-updateCellChanged>updateCellChanged</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long column);
+};</pre>
+ </dl>
+
+ <p class=big-issue>One possible thing to be added is a way to detect when a
+ row/selection has been deleted, activated, etc, by the user (delete key,
+ enter key, etc).</p>
+ <!-- XXXPA -->
+
+ <p class=big-issue>This element is defined as interactive, which means it
+ can't contain other interactive elements, despite the fact that we expect
+ it to work with other interactive elements e.g. checkboxes and input
+ fields. It should be called something like a Leaf Interactive Element or
+ something, which counts for ancestors looking in and not descendants
+ looking out.
+
+ <p>The <code><a href="#datagrid0">datagrid</a></code> element represents an
+ interactive representation of tree, list, or tabular data.
+
+ <p>The data being presented can come either from the content, as elements
+ given as children of the <code><a href="#datagrid0">datagrid</a></code>
+ element, or from a scripted data provider given by the <code
+ title=dom-datagrid-data><a href="#data2">data</a></code> DOM attribute.
+
+ <p>The <code title=attr-datagrid-multiple><a
+ href="#multiple0">multiple</a></code> and <code
+ title=attr-datagrid-disabled><a href="#disabled3">disabled</a></code>
+ attributes are <a href="#boolean0" title="boolean attribute">boolean
+ attributes</a>. Their effects are described in the processing model
+ sections below.
+
+ <p>The <dfn id=multiple
+ title=dom-datagrid-multiple><code>multiple</code></dfn> and <dfn
+ id=disabled2 title=dom-datagrid-disabled><code>disabled</code></dfn> DOM
+ attributes must <a href="#reflect">reflect</a> the <code
+ title=attr-datagrid-multiple><a href="#multiple0">multiple</a></code> and
+ <code title=attr-datagrid-disabled><a
+ href="#disabled3">disabled</a></code> content attributes respectively.
+
+ <h5 id=the-datagrid><span class=secno>3.18.2.1. </span>The <code><a
+ href="#datagrid0">datagrid</a></code> data model</h5>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>In the <code><a href="#datagrid0">datagrid</a></code> data model, data
+ is structured as a set of rows representing a tree, each row being split
+ into a number of columns. The columns are always present in the data
+ model, although individual columns may be hidden in the presentation.
+
+ <p>Each row can have child rows. Child rows may be hidden or shown, by
+ closing or opening (respectively) the parent row.
+
+ <p>Rows are referred to by the path along the tree that one would take to
+ reach the row, using zero-based indices. Thus, the first row of a list is
+ row "0", the second row is row "1"; the first child row of the first row
+ is row "0,0", the second child row of the first row is row "0,1"; the
+ fourth child of the seventh child of the third child of the tenth row is
+ "9,2,6,3", etc.
+
+ <p>The columns can have captions. Those captions are not considered a row
+ in their own right, they are obtained separately.
+
+ <p>Selection of data in a <code><a href="#datagrid0">datagrid</a></code>
+ operates at the row level. If the <code title=attr-datagrid-multiple><a
+ href="#multiple0">multiple</a></code> attribute is present, multiple rows
+ can be selected at once, otherwise the user can only select one row at a
+ time.
+
+ <p>The <code><a href="#datagrid0">datagrid</a></code> element can be
+ disabled entirely by setting the <code title=attr-datagrid-disabled><a
+ href="#disabled3">disabled</a></code> attribute.</p>
+ <!--XXXDND
+ <p class="big-issue">selection draggable [normative definitions are
+ in the interactive part below]</p>
+-->
+
+ <p>Columns, rows, and cells can each have specific flags, known as classes,
+ applied to them by the data provider. These classes <a
+ href="#datagridClassSummary">affect the functionality</a> of the <code><a
+ href="#datagrid0">datagrid</a></code> element, and are also <a
+ href="#datagridPseudos">passed to the style system</a>. They are similar
+ in concept to the <code title=attr-class><a href="#class">class</a></code>
+ attribute, except that they are not specified on elements but are given by
+ scripted data providers.</p>
+ <!-- XXX check xrefs -->
+
+ <h5 id=how-rows><span class=secno>3.18.2.2. </span>How rows are identified</h5>
+
+ <p>The chains of numbers that give a row's path, or identifier, are
+ represented by objects that implement the <a
+ href="#rowspecification">RowSpecification</a> interface.
+
+ <pre class=idl>interface <dfn id=rowspecification>RowSpecification</dfn> {
+ // binding-specific interface
+};</pre>
+
+ <p>In ECMAScript, two classes of objects are said to implement this
+ interface: Numbers representing non-negative integers, and homogeneous
+ arrays of Numbers representing non-negative integers. Thus,
+ <code>[1,0,9]</code> is a <a
+ href="#rowspecification">RowSpecification</a>, as is <code>1</code> on its
+ own. However, <code>[1,0.2,9]</code> is not a <a
+ href="#rowspecification">RowSpecification</a> object, since its second
+ value is not an integer.
+
+ <p>User agents must always represent <code><a
+ href="#rowspecification">RowSpecification</a></code>s in ECMAScript by
+ using arrays, even if the path only has one number.
+
+ <p>The root of the tree is represented by the empty path; in ECMAScript,
+ this is the empty array (<code>[]</code>). Only the <code
+ title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount()</a></code> and <code
+ title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">GetChildAtPosition()</a></code> methods ever
+ get called with the empty path.
+
+ <h5 id=the-data><span class=secno>3.18.2.3. </span>The data provider
+ interface</h5>
+
+ <p><em>The conformance criteria in this section apply to any implementation
+ of the <code><a
+ href="#datagriddataprovider">DataGridDataProvider</a></code>, including
+ (and most commonly) the content author's implementation(s).</em>
+
+ <pre class=idl>// To be implemented by Web authors as a JS object
+interface <dfn id=datagriddataprovider>DataGridDataProvider</dfn> {
+ void <a href="#initialize" title=dom-provider-initialize>initialize</a>(in HTMLDataGridElement datagrid);
+ unsigned long <a href="#getrowcount" title=dom-provider-getRowCount>getRowCount</a>(in <a href="#rowspecification">RowSpecification</a> row);
+ unsigned long <a href="#getchildatposition" title=dom-provider-getChildAtPosition>getChildAtPosition</a>(in <a href="#rowspecification">RowSpecification</a> parentRow, in unsigned long position);
+ unsigned long <a href="#getcolumncount" title=dom-provider-getColumnCount>getColumnCount</a>();
+ DOMString <a href="#getcaptiontext" title=dom-provider-getCaptionText>getCaptionText</a>(in unsigned long column);
+ void <a href="#getcaptionclasses" title=dom-provider-getCaptionClasses>getCaptionClasses</a>(in unsigned long column, in DOMTokenList classes);
+ DOMString <a href="#getrowimage" title=dom-provider-getRowImage>getRowImage</a>(in <a href="#rowspecification">RowSpecification</a> row);
+ <a href="#htmlmenuelement">HTMLMenuElement</a> <a href="#getrowmenu" title=dom-provider-getRowMenu>getRowMenu</a>(in <a href="#rowspecification">RowSpecification</a> row);
+ void <a href="#getrowclasses" title=dom-provider-getRowClasses>getRowClasses</a>(in <a href="#rowspecification">RowSpecification</a> row, in DOMTokenList classes);
+ DOMString <a href="#getcelldata" title=dom-provider-getCellData>getCellData</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long column);
+ void <a href="#getcellclasses" title=dom-provider-getCellClasses>getCellClasses</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long column, in DOMTokenList classes);
+<!--XXXDND
+ boolean <span title="dom-provider-canDrop">canDrop</span>(in <span>RowSpecification</span> row, in <span>RowSpecification</span> position, data);
+ boolean <span title="dom-provider-dropped">dropped</span>(in <span>RowSpecification</span> row, in <span>RowSpecification</span> position, data);
+--> void <a href="#togglecolumnsortstate" title=dom-provider-toggleColumnSortState>toggleColumnSortState</a>(in unsigned long column);
+ void <a href="#setcellcheckedstate" title=dom-provider-setCellCheckedState>setCellCheckedState</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long column, in long state);
+ void <a href="#cyclecell" title=dom-provider-cycleCell>cycleCell</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long column);
+ void <a href="#editcell" title=dom-provider-editCell>editCell</a>(in <a href="#rowspecification">RowSpecification</a> row, in unsigned long column, in DOMString data);
+<!--XXXPA
+ void <span title="dom-provider-performAction">performAction</span>(in DOMString action); // required if .performAction() is ever invoked on the datagrid
+ void <span title="dom-provider-performActionOnRow">performActionOnRow</span>(in <span>RowSpecification</span> row, in DOMString action); // required if getRowClasses ever includes 'deletable' or if <span title="dom-provider-.performActionOnRow">.performActionOnRow</span>() is ever invoked on the datagrid
+ void <span title="dom-provider-performActionOnCell">performActionOnCell</span>(in <span>RowSpecification</span> row, in unsigned long column, in DOMString action); // required if .performActionOnCell() is ever invoked on the datagrid
+-->};</pre>
+ <!-- based on http://lxr.mozilla.org/seamonkey/source/layout/xul/base/src/tree/public/nsITreeView.idl -->
+
+ <p>The <code><a
+ href="#datagriddataprovider">DataGridDataProvider</a></code> interface
+ represents the interface that objects must implement to be used as custom
+ data views for <code><a href="#datagrid0">datagrid</a></code> elements.
+
+ <p>Not all the methods are required. The minimum number of methods that
+ must be implemented in a useful view is two: the <code
+ title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount()</a></code> and <code
+ title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData()</a></code> methods.
+
+ <p>Once the object is written, it must be hooked up to the <code><a
+ href="#datagrid0">datagrid</a></code> using the <dfn id=data2
+ title=dom-datagrid-data><code>data</code></dfn> DOM attribute.
+
+ <p>The following methods may be usefully implemented:
+
+ <dl>
+ <dt><dfn id=initialize title=dom-provider-initialize><code>initialize(<var
+ title="">datagrid</var>)</code></dfn>
+
+ <dd>Called by the <code><a href="#datagrid0">datagrid</a></code> element
+ (the one given by the <var title="">datagrid</var> argument) after it has
+ first populated itself. This would typically be used to set the initial
+ selection of the <code><a href="#datagrid0">datagrid</a></code> element
+ when it is first loaded. The data provider could also use this method
+ call to register a <code title=event-select><a
+ href="#select">select</a></code> event handler on the <code><a
+ href="#datagrid0">datagrid</a></code> in order to monitor selection
+ changes.
+
+ <dt><dfn id=getrowcount
+ title=dom-provider-getRowCount><code>getRowCount(<var
+ title="">row</var>)</code></dfn>
+
+ <dd>Must return the number of rows that are children of the specified <var
+ title="">row</var>, including rows that are off-screen. If <var
+ title="">row</var> is empty, then the number of rows at the top level
+ must be returned. If the value that this method would return for a given
+ <var title="">row</var> changes, the relevant update methods on the
+ <code><a href="#datagrid0">datagrid</a></code> must be called first.
+ Otherwise, this method must always return the same number. For a list (as
+ opposed to a tree), this method must return 0 whenever it is called with
+ a <var title="">row</var> identifier that is not empty.
+
+ <dt><dfn id=getchildatposition
+ title=dom-provider-getChildAtPosition><code>getChildAtPosition(<var
+ title="">parentRow</var>, <var title="">position</var>)</code></dfn>
+
+ <dd>Must return the index of the row that is a child of <var
+ title="">parentRow</var> and that is to be positioned as the <var
+ title="">position</var>th row under <var title="">parentRow</var> when
+ rendering the children of <var title="">parentRow</var>. If <var
+ title="">parentRow</var> is empty, then <var title="">position</var>
+ refers to the <var title="">position</var>th row at the top level of the
+ data grid. May be omitted if the rows are always to be sorted in the
+ natural order. (The natural order is the one where the method always
+ returns <var title="">position</var>.) For a given <var
+ title="">parentRow</var>, this method must never return the same value
+ for different values of <var title="">position</var>. The returned value
+ <var title="">x</var> must be in the range 0&nbsp;&le;&nbsp;<var
+ title="">x</var>&nbsp;&lt;&nbsp;<var title="">n</var>, where <var
+ title="">n</var> is the value returned by <code
+ title=dom-provider-getRowCount><a href="#getrowcount">getRowCount(<var
+ title="">parentRow</var>)</a></code>.
+
+ <dt><dfn id=getcolumncount
+ title=dom-provider-getColumnCount><code>getColumnCount()</code></dfn>
+
+ <dd>Must return the number of columns currently in the data model
+ (including columns that might be hidden). May be omitted if there is only
+ one column. If the value that this method would return changes, the
+ <code><a href="#datagrid0">datagrid</a></code>'s <code
+ title=dom-datagrid-updateEverything><a
+ href="#updateeverything">updateEverything()</a></code> method must be
+ called.
+
+ <dt><dfn id=getcaptiontext
+ title=dom-provider-getCaptionText><code>getCaptionText(<var
+ title="">column</var>)</code></dfn>
+
+ <dd>Must return the caption, or label, for column <var
+ title="">column</var>. May be omitted if the columns have no captions. If
+ the value that this method would return changes, the <code><a
+ href="#datagrid0">datagrid</a></code>'s <code
+ title=dom-datagrid-updateColumnChanged><a
+ href="#updatecolumnchanged">updateColumnChanged()</a></code> method must
+ be called with the appropriate column index.
+
+ <dt><dfn id=getcaptionclasses
+ title=dom-provider-getCaptionClasses><code>getCaptionClasses(<var
+ title="">column</var>, <var title="">classes</var>)</code></dfn>
+
+ <dd>Must add the classes that apply to column <var title="">column</var>
+ to the <var title="">classes</var> object. May be omitted if the columns
+ have no special classes. If the classes that this method would add
+ changes, the <code><a href="#datagrid0">datagrid</a></code>'s <code
+ title=dom-datagrid-updateColumnChanged><a
+ href="#updatecolumnchanged">updateColumnChanged()</a></code> method must
+ be called with the appropriate column index. Some classes have <a
+ href="#datagridClassSummary">predefined meanings</a>.
+
+ <dt><dfn id=getrowimage
+ title=dom-provider-getRowImage><code>getRowImage(<var
+ title="">row</var>)</code></dfn>
+
+ <dd>Must return a URI to an image that represents row <var
+ title="">row</var>, or the empty string if there is no applicable image.
+ May be omitted if no rows have associated images. If the value that this
+ method would return changes, the <code><a
+ href="#datagrid0">datagrid</a></code>'s update methods must be called to
+ update the row in question.
+
+ <dt><dfn id=getrowmenu title=dom-provider-getRowMenu><code>getRowMenu(<var
+ title="">row</var>)</code></dfn>
+
+ <dd>Must return an <code><a
+ href="#htmlmenuelement">HTMLMenuElement</a></code> object that is to be
+ used as a context menu for row <var title="">row</var>, or null if there
+ is no particular context menu. May be omitted if none of the rows have a
+ special context menu. As this method is called immediately before showing
+ the menu in question, no precautions need to be taken if the return value
+ of this method changes.
+
+ <dt><dfn id=getrowclasses
+ title=dom-provider-getRowClasses><code>getRowClasses(<var
+ title="">row</var>, <var title="">classes</var>)</code></dfn>
+
+ <dd>Must add the classes that apply to row <var title="">row</var> to the
+ <var title="">classes</var> object. May be omitted if the rows have no
+ special classes. If the classes that this method would add changes, the
+ <code><a href="#datagrid0">datagrid</a></code>'s update methods must be
+ called to update the row in question. Some classes have <a
+ href="#datagridClassSummary">predefined meanings</a>.
+
+ <dt><dfn id=getcelldata
+ title=dom-provider-getCellData><code>getCellData(<var title="">row</var>,
+ <var title="">column</var>)</code></dfn>
+
+ <dd>Must return the value of the cell on row <var title="">row</var> in
+ column <var title="">column</var>. For text cells, this must be the text
+ to show for that cell. For <a href="#progress1"
+ title=datagrid-cell-class-progress>progress bar cells</a>, this must be
+ either a floating point number in the range 0.0 to 1.0 (converted to a
+ string representation<!-- XXX this isn't
+ technically enough to define what the author must be doing here,
+ but let's let that slide until someone notices -->),
+ indicating the fraction of the progress bar to show as full (1.0 meaning
+ complete), or the empty string, indicating an indeterminate progress bar.
+ If the value that this method would return changes, the <code><a
+ href="#datagrid0">datagrid</a></code>'s update methods must be called to
+ update the rows that changed. If only one cell changed, the <code
+ title=dom-datagrid-updateCellChanged><a
+ href="#updatecellchanged">updateCellChanged()</a></code> method may be
+ used.
+
+ <dt><dfn id=getcellclasses
+ title=dom-provider-getCellClasses><code>getCellClasses(<var
+ title="">row</var>, <var title="">column</var>, <var
+ title="">classes</var>)</code></dfn>
+
+ <dd>Must add the classes that apply to the cell on row <var
+ title="">row</var> in column <var title="">column</var> to the <var
+ title="">classes</var> object. May be omitted if the cells have no
+ special classes. If the classes that this method would add changes, the
+ <code><a href="#datagrid0">datagrid</a></code>'s update methods must be
+ called to update the rows or cells in question. Some classes have <a
+ href="#datagridClassSummary">predefined meanings</a>.
+
+ <dt><dfn id=togglecolumnsortstate
+ title=dom-provider-toggleColumnSortState><code>toggleColumnSortState(<var
+ title="">column</var>)</code></dfn>
+
+ <dd>Called by the <code><a href="#datagrid0">datagrid</a></code> when the
+ user tries to sort the data using a particular column <var
+ title="">column</var>. The data provider must update its state so that
+ the <code title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">GetChildAtPosition()</a></code> method returns
+ the new order, and the classes of the columns returned by <code
+ title=dom-provider-getCaptionClasses><a
+ href="#getcaptionclasses">getCaptionClasses()</a></code> represent the
+ new sort status. There is no need to tell the <code><a
+ href="#datagrid0">datagrid</a></code> that it the data has changed, as
+ the <code><a href="#datagrid0">datagrid</a></code> automatically assumes
+ that the entire data model will need updating.
+
+ <dt><dfn id=setcellcheckedstate
+ title=dom-provider-setCellCheckedState><code>setCellCheckedState(<var
+ title="">row</var>, <var title="">column</var>, <var
+ title="">state</var>)</code></dfn>
+
+ <dd>Called by the <code><a href="#datagrid0">datagrid</a></code> when the
+ user changes the state of a checkbox cell on row <var title="">row</var>,
+ column <var title="">column</var>. The checkbox should be toggled to the
+ state given by <var title="">state</var>, which is a positive integer (1)
+ if the checkbox is to be checked, zero (0) if it is to be unchecked, and
+ a negative number (-1) if it is to be set to the indeterminate state.
+ There is no need to tell the <code><a
+ href="#datagrid0">datagrid</a></code> that the cell has changed, as the
+ <code><a href="#datagrid0">datagrid</a></code> automatically assumes that
+ the given cell will need updating.
+
+ <dt><dfn id=cyclecell title=dom-provider-cycleCell><code>cycleCell(<var
+ title="">row</var>, <var title="">column</var>)</code></dfn>
+
+ <dd>Called by the <code><a href="#datagrid0">datagrid</a></code> when the
+ user changes the state of a cyclable cell on row <var title="">row</var>,
+ column <var title="">column</var>. The data provider should change the
+ state of the cell to the new state, as appropriate. There is no need to
+ tell the <code><a href="#datagrid0">datagrid</a></code> that the cell has
+ changed, as the <code><a href="#datagrid0">datagrid</a></code>
+ automatically assumes that the given cell will need updating.
+
+ <dt><dfn id=editcell title=dom-provider-editCell><code>editCell(<var
+ title="">row</var>, <var title="">column</var>, <var
+ title="">data</var>)</code></dfn>
+
+ <dd>Called by the <code><a href="#datagrid0">datagrid</a></code> when the
+ user edits the cell on row <var title="">row</var>, column <var
+ title="">column</var>. The new value of the cell is given by <var
+ title="">data</var>. The data provider should update the cell
+ accordingly. There is no need to tell the <code><a
+ href="#datagrid0">datagrid</a></code> that the cell has changed, as the
+ <code><a href="#datagrid0">datagrid</a></code> automatically assumes that
+ the given cell will need updating.</dd>
+ <!--XXXPA
+ void performAction(in DOMString action); // required if .performAction() is ever invoked on the datagrid
+ void performActionOnRow(in <span>RowSpecification</span> row, in DOMString action); // required if getRowClasses ever includes 'deletable' or if .performActionOnRow() is ever invoked on the datagrid
+ void performActionOnCell(in <span>RowSpecification</span> row, in unsigned long column, in DOMString action); // required if .performActionOnCell() is ever invoked on the datagrid
+-->
+ </dl>
+
+ <p>The following classes (for rows, columns, and cells) may be usefully
+ used in conjunction with this interface:
+
+ <table id=datagridClassSummary>
+ <tbody>
+ <tr>
+ <th>Class name
+
+ <th>Applies to
+
+ <th>Description
+
+ <tr>
+ <td><!--checked--><dfn id=checked
+ title=datagrid-cell-class-checked><code>checked</code></dfn>
+
+ <td>Cells
+
+ <td>The cell has a checkbox and it is checked. (The <code
+ title=datagrid-cell-class-cyclable><a
+ href="#cyclable">cyclable</a></code> and <code
+ title=datagrid-cell-class-progress><a
+ href="#progress1">progress</a></code> classes override this, though.)
+
+ <tr>
+ <td><!--cyclable--><dfn id=cyclable
+ title=datagrid-cell-class-cyclable><code>cyclable</code></dfn>
+
+ <td>Cells
+
+ <td>The cell can be cycled through multiple values. (The <code
+ title=datagrid-cell-class-progress><a
+ href="#progress1">progress</a></code> class overrides this, though.)
+
+ <tr>
+ <td><!--editable--><dfn id=editable
+ title=datagrid-cell-class-editable><code>editable</code></dfn>
+
+ <td>Cells
+
+ <td>The cell can be edited. (The <code
+ title=datagrid-cell-class-cyclable><a
+ href="#cyclable">cyclable</a></code>, <code
+ title=datagrid-cell-class-progress><a
+ href="#progress1">progress</a></code>, <code
+ title=datagrid-cell-class-checked><a
+ href="#checked">checked</a></code>, <code
+ title=datagrid-cell-class-checked><a
+ href="#checked">unchecked</a></code> and <code
+ title=datagrid-cell-class-checked><a
+ href="#checked">indeterminate</a></code> classes override this,
+ though.)
+
+ <tr>
+ <td><!--header--><dfn id=header0
+ title=datagrid-row-class-header><code>header</code></dfn>
+
+ <td>Rows
+
+ <td>The row is a heading, not a data row.
+
+ <tr>
+ <td><!--indeterminate--><dfn id=indeterminate
+ title=datagrid-cell-class-indeterminate><code>indeterminate</code></dfn>
+
+ <td>Cells
+
+ <td>The cell has a checkbox, and it can be set to an indeterminate
+ state. If neither the <code title=datagrid-cell-class-checked><a
+ href="#checked">checked</a></code> nor <code
+ title=datagrid-cell-class-checked><a
+ href="#checked">unchecked</a></code> classes are present, then the
+ checkbox is in that state, too. (The <code
+ title=datagrid-cell-class-cyclable><a
+ href="#cyclable">cyclable</a></code> and <code
+ title=datagrid-cell-class-progress><a
+ href="#progress1">progress</a></code> classes override this, though.)
+
+ <tr>
+ <td><!--initially-hidden--><dfn id=initially-hidden
+ title=datagrid-column-class-initially-hidden><code>initially-hidden</code></dfn>
+
+ <td>Columns
+
+ <td>The column will not be shown when the <code><a
+ href="#datagrid0">datagrid</a></code> is initially rendered. If this
+ class is not present on the column when the <code><a
+ href="#datagrid0">datagrid</a></code> is initially rendered, the column
+ will be visible if space allows.
+
+ <tr>
+ <td><!--initially-closed--><dfn id=initially-closed
+ title=datagrid-row-class-initially-closed><code>initially-closed</code></dfn>
+
+ <td>Rows
+
+ <td>The row will be closed when the <code><a
+ href="#datagrid0">datagrid</a></code> is initially rendered. If neither
+ this class nor the <code title=datagrid-row-class-initially-open><a
+ href="#initially-open">initially-open</a></code> class is present on
+ the row when the <code><a href="#datagrid0">datagrid</a></code> is
+ initially rendered, the initial state will depend on platform
+ conventions.
+
+ <tr>
+ <td><!--initially-open--><dfn id=initially-open
+ title=datagrid-row-class-initially-open><code>initially-open</code></dfn>
+
+ <td>Rows
+
+ <td>The row will be opened when the <code><a
+ href="#datagrid0">datagrid</a></code> is initially rendered. If neither
+ this class nor the <code title=datagrid-row-class-initially-closed><a
+ href="#initially-closed">initially-closed</a></code> class is present
+ on the row when the <code><a href="#datagrid0">datagrid</a></code> is
+ initially rendered, the initial state will depend on platform
+ conventions.
+
+ <tr>
+ <td><!--progress--><dfn id=progress1
+ title=datagrid-cell-class-progress><code>progress</code></dfn>
+
+ <td>Cells
+
+ <td>The cell is a progress bar.
+
+ <tr>
+ <td><!--reversed--><dfn id=reversed
+ title=datagrid-column-class-reversed><code>reversed</code></dfn>
+
+ <td>Columns
+
+ <td>If the cell is sorted, the sort direction is descending, instead of
+ ascending.
+
+ <tr>
+ <td><!--selectable-separator--><dfn id=selectable-separator
+ title=datagrid-row-class-selectable-separator><code>selectable-separator</code></dfn>
+
+ <td>Rows
+
+ <td>The row is a normal, selectable, data row, except that instead of
+ having data, it only has a separator. (The <code
+ title=datagrid-row-class-header><a href="#header0">header</a></code>
+ and <code title=datagrid-row-class-separator><a
+ href="#separator">separator</a></code> classes override this, though.)
+
+ <tr>
+ <td><!--separator--><dfn id=separator
+ title=datagrid-row-class-separator><code>separator</code></dfn>
+
+ <td>Rows
+
+ <td>The row is a separator row, not a data row. (The <code
+ title=datagrid-row-class-header><a href="#header0">header</a></code>
+ class overrides this, though.)
+
+ <tr>
+ <td><!--sortable--><dfn id=sortable
+ title=datagrid-column-class-sortable><code>sortable</code></dfn>
+
+ <td>Columns
+
+ <td>The data can be sorted by this column.
+
+ <tr>
+ <td><!--sorted--><dfn id=sorted
+ title=datagrid-column-class-sorted><code>sorted</code></dfn>
+
+ <td>Columns
+
+ <td>The data is sorted by this column. Unless the <code
+ title=datagrid-column-class-reversed><a
+ href="#reversed">reversed</a></code> class is also present, the sort
+ direction is ascending.
+
+ <tr>
+ <td><!--unchecked--><dfn id=unchecked
+ title=datagrid-cell-class-unchecked><code>unchecked</code></dfn>
+
+ <td>Cells
+
+ <td>The cell has a checkbox and, unless the <code
+ title=datagrid-cell-class-checked><a href="#checked">checked</a></code>
+ class is present as well, it is unchecked. (The <code
+ title=datagrid-cell-class-cyclable><a
+ href="#cyclable">cyclable</a></code> and <code
+ title=datagrid-cell-class-progress><a
+ href="#progress1">progress</a></code> classes override this, though.)
+ </tr>
+ <!--XXXPA
+ <tr>
+ <td><!- -deletable- -><dfn title="datagrid-row-class-deletable"><code>deletable</code></dfn></td>
+ <td>Rows</td>
+ <td></td>
+ </tr>
+-->
+ </table>
+
+ <h5 id=the-default><span class=secno>3.18.2.4. </span>The default data
+ provider</h5>
+
+ <p>The user agent must supply a default data provider for the case where
+ the <code><a href="#datagrid0">datagrid</a></code>'s <code
+ title=dom-datagrid-data><a href="#data2">data</a></code> attribute is
+ null. It must act as described in this section.
+
+ <p>The behaviour of the default data provider depends on the nature of the
+ first element child of the <code><a href="#datagrid0">datagrid</a></code>.
+
+ <dl class=switch>
+ <dt>While the first element child is a <code><a
+ href="#table">table</a></code> element
+
+ <dd>
+ <p><strong><code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount(<var
+ title="">row</var>)</a></code></strong>: The number of rows returned by
+ the default data provider for the root of the tree (when <var
+ title="">row</var> is empty) must be the total number of <code><a
+ href="#tr">tr</a></code> elements that are children of <code><a
+ href="#tbody">tbody</a></code> elements that are children of the
+ <code><a href="#table">table</a></code>, if there are any such child
+ <code><a href="#tbody">tbody</a></code> elements. If there are no such
+ <code><a href="#tbody">tbody</a></code> elements then the number of rows
+ returned for the root must be the number of <code><a
+ href="#tr">tr</a></code> elements that are children of the <code><a
+ href="#table">table</a></code>.</p>
+
+ <p>When <var title="">row</var> is not empty, the number of rows returned
+ must be zero.</p>
+
+ <p class=note>The <code><a href="#table">table</a></code>-based default
+ data provider cannot represent a tree.</p>
+
+ <p class=note>Rows in <code><a href="#thead0">thead</a></code> elements
+ do not contribute to the number of rows returned, although they do
+ affect the columns and column captions. Rows in <code><a
+ href="#tfoot0">tfoot</a></code> elements are <a href="#ignored"
+ title=ignore>ignored</a> completely by this algorithm.</p>
+
+ <p id=defaultDataProviderTableMapper><strong><code
+ title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">getChildAtPosition(<var title="">row</var>,
+ <var title="">i</var>)</a></code></strong>: The default data provider
+ must return the mapping appropriate to the <a
+ href="#defaultDataProviderTableSort">current sort order</a>.</p>
+
+ <p><strong><code title=dom-provider-getColumnCount><a
+ href="#getcolumncount">getColumnCount()</a></code></strong>: The number
+ of columns returned must be the number of <code><a
+ href="#td">td</a></code> element children in the first <code><a
+ href="#tr">tr</a></code> element child of the first <code><a
+ href="#tbody">tbody</a></code> element child of the <code><a
+ href="#table">table</a></code>, if there are any such <code><a
+ href="#tbody">tbody</a></code> elements. If there are no such <code><a
+ href="#tbody">tbody</a></code> elements, then it must be the number of
+ <code><a href="#td">td</a></code> element children in the first <code><a
+ href="#tr">tr</a></code> element child of the <code><a
+ href="#table">table</a></code>, if any, or otherwise 1. If the number
+ that would be returned by these rules is 0, then 1 must be returned
+ instead.</p>
+
+ <p><strong><code title=dom-provider-getCaptionText><a
+ href="#getcaptiontext">getCaptionText(<var
+ title="">i</var>)</a></code></strong>: If the <code><a
+ href="#table">table</a></code> has no <code><a
+ href="#thead0">thead</a></code> element child, or if its first <code><a
+ href="#thead0">thead</a></code> element child has no <code><a
+ href="#tr">tr</a></code> element child, the default data provider must
+ return the empty string for all captions. Otherwise, the value of the
+ <code><a href="#textcontent">textContent</a></code> attribute of the
+ <var title="">i</var>th <code><a href="#th">th</a></code> element child
+ of the first <code><a href="#tr">tr</a></code> element child of the
+ first <code><a href="#thead0">thead</a></code> element child of the
+ <code><a href="#table">table</a></code> element must be returned. If
+ there is no such <code><a href="#th">th</a></code> element, the empty
+ string must be returned.</p>
+
+ <p><strong><code title=dom-provider-getCaptionClasses><a
+ href="#getcaptionclasses">getCaptionClasses(<var title="">i</var>, <var
+ title="">classes</var>)</a></code></strong>: If the <code><a
+ href="#table">table</a></code> has no <code><a
+ href="#thead0">thead</a></code> element child, or if its first <code><a
+ href="#thead0">thead</a></code> element child has no <code><a
+ href="#tr">tr</a></code> element child, the default data provider must
+ not add any classes for any of the captions. Otherwise, each class in
+ the <code title=attr-class><a href="#class">class</a></code> attribute
+ of the <var title="">i</var>th <code><a href="#th">th</a></code> element
+ child of the first <code><a href="#tr">tr</a></code> element child of
+ the first <code><a href="#thead0">thead</a></code> element child of the
+ <code><a href="#table">table</a></code> element must be added to the
+ <var title="">classes</var>. If there is no such <code><a
+ href="#th">th</a></code> element, no classes must be added. The user
+ agent must then:</p>
+
+ <ol>
+ <li>Remove the <code title=datagrid-column-class-sorted><a
+ href="#sorted">sorted</a></code> and <code
+ title=datagrid-column-class-reversed><a
+ href="#reversed">reversed</a></code> classes.
+
+ <li>If the <code><a href="#table">table</a></code> element has a <code
+ title=attr-class><a href="#class">class</a></code> attribute that
+ includes the <code title="">sortable</code> class, add the <code
+ title=datagrid-column-class-sortable><a
+ href="#sortable">sortable</a></code> class.
+
+ <li>If the column is the one currently being used to sort the data, add
+ the <code title=datagrid-column-class-sorted><a
+ href="#sorted">sorted</a></code> class.
+
+ <li>If the column is the one currently being used to sort the data, and
+ it is sorted in descending order, add the <code
+ title=datagrid-column-class-reversed><a
+ href="#reversed">reversed</a></code> class as well.
+ </ol>
+
+ <p>The various row- and cell- related methods operate relative to a
+ particular element, the element of the row or cell specified by their
+ arguments.</p>
+
+ <p><strong>For rows</strong>: Since the default data provider for a
+ <code><a href="#table">table</a></code> always returns 0 as the number
+ of children for any row other than the root, the path to the row passed
+ to these methods will always consist of a single number. In the prose
+ below, this number is referred to as <var title="">i</var>.</p>
+
+ <p>If the <code><a href="#table">table</a></code> has <code><a
+ href="#tbody">tbody</a></code> element children, the element for the
+ <var title="">i</var>th row is the <var title="">i</var>th <code><a
+ href="#tr">tr</a></code> element that is a child of a <code><a
+ href="#tbody">tbody</a></code> element that is a child of the <code><a
+ href="#table">table</a></code> element. If the <code><a
+ href="#table">table</a></code> does not have <code><a
+ href="#tbody">tbody</a></code> element children, then the element for
+ the <var title="">i</var>th real row is the <var title="">i</var>th
+ <code><a href="#tr">tr</a></code> element that is a child of the
+ <code><a href="#table">table</a></code> element.</p>
+
+ <p><strong>For cells</strong>: Given a row and its element, the row's
+ <var title="">i</var>th cell's element is the <var title="">i</var>th
+ <code><a href="#td">td</a></code> element child of the row element.</p>
+
+ <p class=note>The <code>colspan</code> and <code>rowspan</code>
+ attributes are <a href="#ignored" title=ignore>ignored</a> by this
+ algorithm.</p>
+
+ <p><strong><code title=dom-provider-getRowImage><a
+ href="#getrowimage">getRowImage(<var
+ title="">i</var>)</a></code></strong>: If the row's first cell's element
+ has an <code><a href="#img">img</a></code> element child, then the URI
+ of the row's image is the URI of the first <code><a
+ href="#img">img</a></code> element child of the row's first cell's
+ element. Otherwise, the URI of the row's image is the empty string.</p>
+ <!-- XXX well. that sentence could
+ have gone better, that's for sure. -->
+
+ <p><strong><code title=dom-provider-getRowMenu><a
+ href="#getrowmenu">getRowMenu(<var
+ title="">i</var>)</a></code></strong>: If the row's first cell's element
+ has a <code><a href="#menu">menu</a></code> element child, then the
+ row's menu is the first <code><a href="#menu">menu</a></code> element
+ child of the row's first cell's element. Otherwise, the row has no menu.</p>
+
+ <p><strong><code title=dom-provider-getRowClasses><a
+ href="#getrowclasses">getRowClasses(<var title="">i</var>, <var
+ title="">classes</var>)</a></code></strong>: The default data provider
+ must never add a class to the row's classes.</p>
+
+ <p id=defaultDataProviderTableSort><strong><code
+ title=dom-provider-toggleColumnSortState><a
+ href="#togglecolumnsortstate">toggleColumnSortState(<var
+ title="">i</var>)</a></code></strong>: If the data is already being
+ sorted on the given column, then the user agent must change the current
+ sort mapping to be the inverse of the current sort mapping; if the sort
+ order was ascending before, it is now descending, otherwise it is now
+ ascending. Otherwise, if the current sort column is another column, or
+ the data model is currently not sorted, the user agent must create a new
+ mapping, which maps rows in the data model to rows in the DOM so that
+ the rows in the data model are sorted by the specified column, in
+ ascending order. (Which sort comparison operator to use is left up to
+ the UA to decide.)</p>
+
+ <p>When the sort mapping is changed, the values returned by the <code
+ title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">getChildAtPosition()</a></code> method for
+ the default data provider <a href="#defaultDataProviderTableMapper">will
+ change appropriately</a>.</p>
+
+ <p><strong><code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData(<var title="">i</var>, <var
+ title="">j</var>)</a></code>, <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses(<var title="">i</var>, <var
+ title="">j</var>, <var title="">classes</var>)</a></code>, <code
+ title=dom-provider-setCellCheckedState><a
+ href="#setcellcheckedstate">getCellCheckedState(<var title="">i</var>,
+ <var title="">j</var>, <var title="">state</var>)</a></code>, <code
+ title=dom-provider-cycleCell><a href="#cyclecell">cycleCell(<var
+ title="">i</var>, <var title="">j</var>)</a></code>, and <code
+ title=dom-provider-editCell><a href="#editcell">editCell(<var
+ title="">i</var>, <var title="">j</var>, <var
+ title="">data</var>)</a></code></strong>: See <a
+ href="#commonDefaultDataGridMethodDefinitions">the common definitions
+ below</a>.</p>
+
+ <p>The data provider must call the <code><a
+ href="#datagrid0">datagrid</a></code>'s update methods appropriately
+ whenever the descendants of the <code><a
+ href="#datagrid0">datagrid</a></code> mutate. For example, if a <code><a
+ href="#tr">tr</a></code> is removed, then the <code
+ title=dom-datagrid-updateRowsRemoved><a
+ href="#updaterowsremoved">updateRowsRemoved()</a></code> methods would
+ probably need to be invoked, and any change to a cell or its descendants
+ must cause the cell to be updated. If the <code><a
+ href="#table">table</a></code> element stops being the first child of
+ the <code><a href="#datagrid0">datagrid</a></code>, then the data
+ provider must call the <code title=dom-datagrid-updateEverything><a
+ href="#updateeverything">updateEverything()</a></code> method on the
+ <code><a href="#datagrid0">datagrid</a></code>. Any change to a cell
+ that is in the column that the data provider is currently using as its
+ sort column must also cause the sort to be reperformed, with a call to
+ <code title=dom-datagrid-updateEverything><a
+ href="#updateeverything">updateEverything()</a></code> if the change did
+ affect the sort order.</p>
+
+ <dt>While the first element child is a <code>select</code> or
+ <code>datalist</code> element
+
+ <dd>
+ <p>The default data provider must return 1 for the column count, the
+ empty string for the column's caption, and must not add any classes to
+ the column's classes.</p>
+
+ <p>For the rows, assume the existence of a node filter view of the
+ descendants of the first element child of the <code><a
+ href="#datagrid0">datagrid</a></code> element (the <code>select</code>
+ or <code>datalist</code> element), that skips all nodes other than
+ <code>optgroup</code> and <code>option</code> elements, as well as any
+ descendents of any <code>option</code> elements.</p>
+
+ <p>Given a path <var title="">row</var>, the corresponding element is the
+ one obtained by drilling into the view, taking the child given by the
+ path each time.</p>
+
+ <div class=example>
+ <p>Given the following XML markup:</p>
+
+ <pre>&lt;datagrid>
+ &lt;select>
+ &lt;!-- the options and optgroups have had their labels and values removed
+ to make the underlying structure clearer -->
+ &lt;optgroup>
+ &lt;option/>
+ &lt;option/>
+ &lt;/optgroup>
+ &lt;optgroup>
+ &lt;option/>
+ &lt;optgroup id="a">
+ &lt;option/>
+ &lt;option/>
+ &lt;bogus/>
+ &lt;option id="b"/>
+ &lt;/optgroup>
+ &lt;option/>
+ &lt;/optgroup>
+ &lt;/select>
+&lt;/datagrid></pre>
+
+ <p>The path "1,1,2" would select the element with ID "b". In the
+ filtered view, the text nodes, comment nodes, and bogus elements are
+ ignored; so for instance, the element with ID "a" (path "1,1") has only
+ 3 child nodes in the view.</p>
+ </div>
+
+ <p><code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount(<var title="">row</var>)</a></code> must
+ drill through the view to find the element corresponding to the method's
+ argument, and return the number of child nodes in the filtered view that
+ the corresponding element has. (If the <var title="">row</var> is empty,
+ the corresponding element is the <code>select</code> element at the root
+ of the filtered view.)</p>
+
+ <p><code title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">getChildAtPosition(<var title="">row</var>,
+ <var title="">position</var>)</a></code> must return <var
+ title="">position</var>. (The <code>select</code>/<code>datalist</code>
+ default data provider does not support sorting the data grid.)</p>
+
+ <p><code title=dom-provider-getRowImage><a
+ href="#getrowimage">getRowImage(<var title="">i</var>)</a></code> must
+ return the empty string, <code title=dom-provider-getRowMenu><a
+ href="#getrowmenu">getRowMenu(<var title="">i</var>)</a></code> must
+ return null.</p>
+
+ <p><code title=dom-provider-getRowClasses><a
+ href="#getrowclasses">getRowClasses(<var title="">row</var>, <var
+ title="">classes</var>)</a></code> must add the classes from the
+ following list to <var title="">classes</var> when their condition is
+ met:</p>
+
+ <ul>
+ <li>If the <var title="">row</var>'s corresponding element is an
+ <code>optgroup</code> element: <code title=datagrid-row-class-header><a
+ href="#header0">header</a></code>
+
+ <li>If the <var title="">row</var>'s corresponding element contains
+ other elements that are also in the view, and the element's <code
+ title=attr-class><a href="#class">class</a></code> attribute contains
+ the <code title="">closed</code> class: <code
+ title=datagrid-row-class-initially-closed><a
+ href="#initially-closed">initially-closed</a></code>
+
+ <li>If the <var title="">row</var>'s corresponding element contains
+ other elements that are also in the view, and the element's <code
+ title=attr-class><a href="#class">class</a></code> attribute contains
+ the <code title="">open</code> class: <code
+ title=datagrid-row-class-initially-open><a
+ href="#initially-open">initially-open</a></code>
+ </ul>
+
+ <p>The <code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData(<var title="">row</var>, <var
+ title="">cell</var>)</a></code> method must return the value of the
+ <code title=attr-optgroup-label>label</code> attribute if the <var
+ title="">row</var>'s corresponding element is an <code>optgroup</code>
+ element, otherwise, if the <var title="">row</var>'s corresponding
+ element is an <code>option</code>element, its <code
+ title=attr-option-label>label</code> attribute if it has one, otherwise
+ the value of its <code><a href="#textcontent">textContent</a></code> DOM
+ attribute.</p>
+
+ <p>The <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses(<var title="">row</var>, <var
+ title="">cell</var>, <var title="">classes</var>)</a></code> method must
+ add no classes.</p>
+
+ <p class=big-issue><!-- select-provider-selection
+ XXX-->autoselect
+ some rows when initialised, reflect the selection in the select, reflect
+ the multiple attribute somehow.</p>
+
+ <p>The data provider must call the <code><a
+ href="#datagrid0">datagrid</a></code>'s update methods appropriately
+ whenever the descendants of the <code><a
+ href="#datagrid0">datagrid</a></code> mutate.</p>
+
+ <dt>While the first element child is another element
+
+ <dd>
+ <p>The default data provider must return 1 for the column count, the
+ empty string for the column's caption, and must not add any classes to
+ the column's classes.</p>
+
+ <p>For the rows, assume the existence of a node filter view of the
+ descendants of the <code><a href="#datagrid0">datagrid</a></code> that
+ skips all nodes other than <code><a href="#li">li</a></code>, <code><a
+ href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code>, and <code><a
+ href="#hr">hr</a></code> elements, and skips any descendants of <code><a
+ href="#menu">menu</a></code> elements.</p>
+
+ <p>Given this view, each element in the view represents a row in the data
+ model. The element corresponding to a path <var title="">row</var> is
+ the one obtained by drilling into the view, taking the child given by
+ the path each time. The element of the row of a particular method call
+ is the element given by drilling into the view along the path given by
+ the method's arguments.</p>
+
+ <p><code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount(<var title="">row</var>)</a></code> must
+ return the number of child elements in this view for the given row, or
+ the number of elements at the root of the view if the <var
+ title="">row</var> is empty.</p>
+
+ <div class=example>
+ <p>In the following example, the elements are identified by the paths
+ given by their child text nodes:</p>
+
+ <pre>&lt;datagrid>
+ &lt;ol>
+ &lt;li> row 0 &lt;/li>
+ &lt;li> row 1
+ &lt;ol>
+ &lt;li> row 1,0 &lt;/li>
+ &lt;/ol>
+ &lt;/li>
+ &lt;li> row 2 &lt;/li>
+ &lt;/ol>
+&lt;/datagrid></pre>
+
+ <p>In this example, only the <code><a href="#li">li</a></code> elements
+ actually appear in the data grid; the <code><a href="#ol">ol</a></code>
+ element does not affect the data grid's processing model.</p>
+ </div>
+
+ <p><code title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">getChildAtPosition(<var title="">row</var>,
+ <var title="">position</var>)</a></code> must return <var
+ title="">position</var>. (The generic default data provider does not
+ support sorting the data grid.)</p>
+
+ <p><code title=dom-provider-getRowImage><a
+ href="#getrowimage">getRowImage(<var title="">i</var>)</a></code> must
+ return the URI of the image given by the first <code><a
+ href="#img">img</a></code> element descendant (in the real DOM) of the
+ row's element, that is not also a descendant of another element in the
+ filtered view that is a descendant of the row's element.</p>
+
+ <div class=example>
+ <p>In the following example, the row with path "1,0" returns
+ "http://example.com/a" as its image URI, and the other rows (including
+ the row with path "1") return the empty string:</p>
+
+ <pre>&lt;datagrid>
+ &lt;ol>
+ &lt;li> row 0 &lt;/li>
+ &lt;li> row 1
+ &lt;ol>
+ &lt;li> row 1,0 &lt;img src="http://example.com/a" alt=""> &lt;/li>
+ &lt;/ol>
+ &lt;/li>
+ &lt;li> row 2 &lt;/li>
+ &lt;/ol>
+&lt;/datagrid></pre>
+ </div>
+
+ <p><code title=dom-provider-getRowMenu><a
+ href="#getrowmenu">getRowMenu(<var title="">i</var>)</a></code> must
+ return the first <code><a href="#menu">menu</a></code> element
+ descendant (in the real DOM) of the row's element, that is not also a
+ descendant of another element in the filtered view that is a decsendant
+ of the row's element. (This is analogous to the image case above.)</p>
+
+ <p><code title=dom-provider-getRowClasses><a
+ href="#getrowclasses">getRowClasses(<var title="">i</var>, <var
+ title="">classes</var>)</a></code> must add the classes from the
+ following list to <var title="">classes</var> when their condition is
+ met:</p>
+
+ <ul>
+ <li>If the row's element contains other elements that are also in the
+ view, and the element's <code title=attr-class><a
+ href="#class">class</a></code> attribute contains the <code
+ title="">closed</code> class: <code
+ title=datagrid-row-class-initially-closed><a
+ href="#initially-closed">initially-closed</a></code>
+
+ <li>If the row's element contains other elements that are also in the
+ view, and the element's <code title=attr-class><a
+ href="#class">class</a></code> attribute contains the <code
+ title="">open</code> class: <code
+ title=datagrid-row-class-initially-open><a
+ href="#initially-open">initially-open</a></code>
+
+ <li>If the row's element is an <code><a
+ href="#h1">h1</a></code>-<code><a href="#h6">h6</a></code> element:
+ <code title=datagrid-row-class-header><a
+ href="#header0">header</a></code>
+
+ <li>If the row's element is an <code><a href="#hr">hr</a></code>
+ element: <code title=datagrid-row-class-separator><a
+ href="#separator">separator</a></code></li>
+ <!--
+ XXX no way to get selectable-separator -->
+ </ul>
+
+ <p>The <code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData(<var title="">i</var>, <var
+ title="">j</var>)</a></code>, <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses(<var title="">i</var>, <var
+ title="">j</var>, <var title="">classes</var>)</a></code>, <code
+ title=dom-provider-setCellCheckedState><a
+ href="#setcellcheckedstate">getCellCheckedState(<var title="">i</var>,
+ <var title="">j</var>, <var title="">state</var>)</a></code>, <code
+ title=dom-provider-cycleCell><a href="#cyclecell">cycleCell(<var
+ title="">i</var>, <var title="">j</var>)</a></code>, and <code
+ title=dom-provider-editCell><a href="#editcell">editCell(<var
+ title="">i</var>, <var title="">j</var>, <var
+ title="">data</var>)</a></code> methods must act as described in <a
+ href="#commonDefaultDataGridMethodDefinitions">the common definitions
+ below</a>, treating the row's element as being the cell's element.</p>
+
+ <p class=big-issue id=generic-provider-selection>selection handling?</p>
+
+ <p>The data provider must call the <code><a
+ href="#datagrid0">datagrid</a></code>'s update methods appropriately
+ whenever the descendants of the <code><a
+ href="#datagrid0">datagrid</a></code> mutate.</p>
+
+ <dt>Otherwise, while there is no element child
+
+ <dd>
+ <p>The data provider must return 0 for the number of rows, 1 for the
+ number of columns, the empty string for the first column's caption, and
+ must add no classes when asked for that column's classes. If the
+ <code><a href="#datagrid0">datagrid</a></code>'s child list changes such
+ that there is a first element child, then the data provider must call
+ the <code title=dom-datagrid-updateEverything><a
+ href="#updateeverything">updateEverything()</a></code> method on the
+ <code><a href="#datagrid0">datagrid</a></code>.</p>
+ </dl>
+
+ <h6 id=commonDefaultDataGridMethodDefinitions><span class=secno>3.18.2.4.1.
+ </span>Common default data provider method definitions for cells</h6>
+
+ <p>These definitions are used for the cell-specific methods of the default
+ data providers (other than in the
+ <code>select</code>/<code>datalist</code> case). How they behave is based
+ on the contents of an element that represents the cell given by their
+ first two arguments. Which element that is is defined in the previous
+ section.
+
+ <dl>
+ <dt>Cyclable cells
+
+ <dd>
+ <p>If the first element child of a cell's element is a
+ <code>select</code> element that has a no <code
+ title=attr-select-multiple>multiple</code> attribute and has at least
+ one <code>option</code> element descendent, then the cell acts as a
+ cyclable cell.</p>
+
+ <p>The "current" <code>option</code> element is the selected
+ <code>option</code> element, or the first <code>option</code> element if
+ none is selected.</p>
+
+ <p>The <code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData()</a></code> method must return the
+ <code><a href="#textcontent">textContent</a></code> of the current
+ <code>option</code> element (the <code
+ title=attr-option-label>label</code> attribute is <a href="#ignored"
+ title=ignore>ignored</a> in this context as the <code>optgroup</code>s
+ are not displayed).</p>
+
+ <p>The <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses()</a></code> method must add the
+ <code title=datagrid-cell-class-cyclable><a
+ href="#cyclable">cyclable</a></code> class and then all the classes of
+ the current <code>option</code> element.</p>
+
+ <p>The <code title=dom-provider-cycleCell><a
+ href="#cyclecell">cycleCell()</a></code> method must change the
+ selection of the <code>select</code> element such that the next
+ <code>option</code> element after the current <code>option</code>
+ element is the only one that is selected (in <a href="#tree-order">tree
+ order</a>). If the current <code>option</code> element is the last
+ <code>option</code> element descendent of the <code>select</code>, then
+ the first <code>option</code> element descendent must be selected
+ instead.</p>
+
+ <p>The <code title=dom-provider-setCellCheckedState><a
+ href="#setcellcheckedstate">setCellCheckedState()</a></code> and <code
+ title=dom-provider-editCell><a href="#editcell">editCell()</a></code>
+ methods must do nothing.</p>
+
+ <dt>Progress bar cells
+
+ <dd>
+ <p>If the first element child of a cell's element is a <code><a
+ href="#progress">progress</a></code> element, then the cell acts as a
+ progress bar cell.</p>
+
+ <p>The <code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData()</a></code> method must return the
+ value returned by the <code><a href="#progress">progress</a></code>
+ element's <code title=dom-progress-position><a
+ href="#position">position</a></code> DOM attribute.</p>
+
+ <p>The <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses()</a></code> method must add the
+ <code title=datagrid-cell-class-progress><a
+ href="#progress1">progress</a></code> class.</p>
+
+ <p>The <code title=dom-provider-setCellCheckedState><a
+ href="#setcellcheckedstate">setCellCheckedState()</a></code>, <code
+ title=dom-provider-cycleCell><a
+ href="#cyclecell">cycleCell()</a></code>, and <code
+ title=dom-provider-editCell><a href="#editcell">editCell()</a></code>
+ methods must do nothing.</p>
+
+ <dt>Checkbox cells
+
+ <dd>
+ <p>If the first element child of a cell's element is an
+ <code>input</code> element that has a <code
+ title=attr-input-type>type</code> attribute with the value <code
+ title="">checkbox</code>, then the cell acts as a check box cell.</p>
+
+ <p>The <code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData()</a></code> method must return the
+ <code><a href="#textcontent">textContent</a></code> of the cell element.</p>
+
+ <p>The <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses()</a></code> method must add the
+ <code title=datagrid-cell-class-checked><a
+ href="#checked">checked</a></code> class if the <code>input</code>
+ element is <span title=dom-input-checked>checked</span>, and the <code
+ title=datagrid-cell-class-unchecked><a
+ href="#unchecked">unchecked</a></code> class otherwise.</p>
+
+ <p>The <code title=dom-provider-setCellCheckedState><a
+ href="#setcellcheckedstate">setCellCheckedState()</a></code> method must
+ set the <code>input</code> element's checkbox <span
+ title=dom-input-checked>state</span> to checked if the method's third
+ argument is 1, and to unchecked otherwise.</p>
+
+ <p>The <code title=dom-provider-cycleCell><a
+ href="#cyclecell">cycleCell()</a></code> and <code
+ title=dom-provider-editCell><a href="#editcell">editCell()</a></code>
+ methods must do nothing.</p>
+
+ <dt>Editable cells
+
+ <dd>
+ <p>If the first element child of a cell's element is an
+ <code>input</code> element that has a <code
+ title=attr-input-type>type</code> attribute with the value <code
+ title="">text</code> or that has no <code
+ title=attr-input-type>type</code> attribute at all, then the cell acts
+ as an editable cell.</p>
+
+ <p>The <code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData()</a></code> method must return the
+ <code title=dom-input-value>value</code> of the <code>input</code>
+ element.</p>
+
+ <p>The <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses()</a></code> method must add the
+ <code title=datagrid-cell-class-editable><a
+ href="#editable">editable</a></code> class.</p>
+
+ <p>The <code title=dom-provider-editCell><a
+ href="#editcell">editCell()</a></code> method must set the
+ <code>input</code> element's <code title=dom-input-value>value</code>
+ DOM attribute to the value of the third argument to the method.</p>
+
+ <p>The <code title=dom-provider-setCellCheckedState><a
+ href="#setcellcheckedstate">setCellCheckedState()</a></code> and <code
+ title=dom-provider-cycleCell><a href="#cyclecell">cycleCell()</a></code>
+ methods must do nothing.</p>
+ </dl>
+ <!-- XXX Calculated cells, like in spreadsheets? -->
+
+ <h5 id=populating><span class=secno>3.18.2.5. </span>Populating the
+ <code><a href="#datagrid0">datagrid</a></code> element</h5>
+
+ <p>A <code><a href="#datagrid0">datagrid</a></code> must be disabled until
+ its end tag has been parsed (in the case of a <code><a
+ href="#datagrid0">datagrid</a></code> element in the original document
+ markup) or until it has been inserted into the document (in the case of a
+ dynamically created element). After that point, the element must fire a
+ single <code title=event-load><a href="#load0">load</a></code> event at
+ itself, which doesn't bubble and cannot be canceled.
+
+ <p class=big-issue>The end-tag parsing thing should be moved to the parsing
+ section.
+
+ <p>The <code><a href="#datagrid0">datagrid</a></code> must then populate
+ itself using the data provided by the data provider assigned to the <code
+ title=dom-datagrid-data><a href="#data2">data</a></code> DOM attribute.
+ After the view is populated (using the methods described below), the
+ <code><a href="#datagrid0">datagrid</a></code> must invoke the <code
+ title=dom-provider-initialize><a
+ href="#initialize">initialize()</a></code> method on the data provider
+ specified by the <code title=dom-datagrid-data><a
+ href="#data2">data</a></code> attribute, passing itself (the <code><a
+ href="#htmldatagridelement">HTMLDataGridElement</a></code> object) as the
+ only argument.
+
+ <p>When the <code title=dom-datagrid-data><a href="#data2">data</a></code>
+ attribute is null, the <code><a href="#datagrid0">datagrid</a></code> must
+ use the default data provider described in the previous section.
+
+ <p>To obtain data from the data provider, the element must invoke methods
+ on the data provider object in the following ways:
+
+ <dl>
+ <dt>To determine the total number of columns
+
+ <dd>Invoke the <code title=dom-provider-getColumnCount><a
+ href="#getcolumncount">getColumnCount()</a></code> method with no
+ arguments. The return value is the number of columns. If the return value
+ is zero or negative, not an integer, or simply not a numeric type, or if
+ the method is not defined, then 1 must be used instead.
+
+ <dt>To get the captions to use for the columns
+
+ <dd>Invoke the <code title=dom-provider-getCaptionText><a
+ href="#getcaptiontext">getCaptionText()</a></code> method with the index
+ of the column in question. The index <var title="">i</var> must be in the
+ range 0 &le; <var title="">i</var> &lt; <var title="">N</var>, where <var
+ title="">N</var> is the total number of columns. The return value is the
+ string to use when referring to that column. If the method returns null
+ or the empty string, the column has no caption. If the method is not
+ defined, then none of the columns have any captions.
+
+ <dt>To establish what classes apply to a column
+
+ <dd>Invoke the <code title=dom-provider-getCaptionClasses><a
+ href="#getcaptionclasses">getCaptionClasses()</a></code> method with the
+ index of the column in question, and an object implementing the <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> interface, associated with
+ an anonymous empty string. The index <var title="">i</var> must be in the
+ range 0 &le; <var title="">i</var> &lt; <var title="">N</var>, where <var
+ title="">N</var> is the total number of columns. The tokens contained in
+ the string underlying <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> object when the method
+ returns represent the classes that apply to the given column. If the
+ method is not defined, no classes apply to the column.
+
+ <dt>To establish whether a column should be initially included in the
+ visible columns
+
+ <dd>Check whether the <code
+ title=datagrid-column-class-initially-hidden><a
+ href="#initially-hidden">initially-hidden</a></code> class applies to the
+ column. If it does, then the column should not be initially included; if
+ it does not, then the column should be initially included.
+
+ <dt id=columnType2>To establish whether the data can be sorted relative to
+ a particular column
+
+ <dd>Check whether the <code title=datagrid-column-class-sortable><a
+ href="#sortable">sortable</a></code> class applies to the column. If it
+ does, then the user should be able to ask the UA to display the data
+ sorted by that column; if it does not, then the user agent must not allow
+ the user to ask for the data to be sorted by that column.
+
+ <dt>To establish if a column is a sorted column
+
+ <dd>If the user agent can handle multiple columns being marked as sorted
+ simultaneously: Check whether the <code
+ title=datagrid-column-class-sorted><a href="#sorted">sorted</a></code>
+ class applies to the column. If it does, then that column is the sorted
+ column, otherwise it is not.
+
+ <dd>If the user agent can only handle one column being marked as sorted at
+ a time: Check each column in turn, starting with the first one, to see
+ whether the <code title=datagrid-column-class-sorted><a
+ href="#sorted">sorted</a></code> class applies to that column. The first
+ column that has that class, if any, is the sorted column. If none of the
+ columns have that class, there is no sorted column.
+
+ <dt>To establish the sort direction of a sorted column
+
+ <dd>Check whether the <code title=datagrid-column-class-reversed><a
+ href="#reversed">reversed</a></code> class applies to the column. If it
+ does, then the sort direction is descending (down; first rows have the
+ highest values), otherwise it is ascending (up; first rows have the
+ lowest values).
+
+ <dt>To determine the total number of rows
+
+ <dd>Determine the number of rows for the root of the data grid, and
+ determine the number of child rows for each open row. The total number of
+ rows is the sum of all these numbers.
+
+ <dt>To determine the number of rows for the root of the data grid
+
+ <dd>Invoke the <code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount()</a></code> method with a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the empty path as its only argument. The return value is the number of
+ rows at the top level of the data grid. If the return value of the method
+ is negative, not an integer, or simply not a numeric type, or if the
+ method is not defined, then zero must be used instead.
+
+ <dt>To determine the number of child rows for a row
+
+ <dd>Invoke the <code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount()</a></code> method with a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the path to the row in question. The return value is the number of child
+ rows for the given row. If the return value of the method is negative,
+ not an integer, or simply not a numeric type, or if the method is not
+ defined, then zero must be used instead.
+
+ <dt>To determine what order to render rows in
+
+ <dd>
+ <p>Invoke the <code title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">getChildAtPosition()</a></code> method with a
+ <code><a href="#rowspecification">RowSpecification</a></code> object
+ representing the path to the parent of the rows that are being rendered
+ as the first argument, and the position that is being rendered as the
+ second argument. The return value is the index of the row to render in
+ that position.</p>
+
+ <div class=example>
+ <p>If the rows are:</p>
+
+ <ol>
+ <li> Row "0"
+ <ol>
+ <li> Row "0,0"
+
+ <li> Row "0,1"
+ </ol>
+
+ <li> Row "1"
+ <ol>
+ <li> Row "1,0"
+
+ <li> Row "1,1"
+ </ol>
+ </ol>
+
+ <p>...and the <code title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">getChildAtPosition()</a></code> method is
+ implemented as follows:</p>
+
+ <pre>function getChildAtPosition(parent, child) {
+ // always return the reverse order
+ return getRowCount(parent)-child-1;
+}</pre>
+
+ <p>...then the rendering would actually be:</p>
+
+ <ol>
+ <li> Row "1"
+ <ol>
+ <li> Row "1,1"
+
+ <li> Row "1,0"
+ </ol>
+
+ <li> Row "0"
+ <ol>
+ <li> Row "0,1"
+
+ <li> Row "0,0"
+ </ol>
+ </ol>
+ </div>
+
+ <p>If the return value of the method is negative, larger than the number
+ of rows that the <code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount()</a></code> method reported for that
+ parent, not an integer, or simply not a numeric type, then the entire
+ data grid should be disabled. Similarly, if the method returns the same
+ value for two or more different values for the second argument (with the
+ same first argument, and assuming that the data grid hasn't had relevant
+ update methods invoked in the meantime), then the data grid should be
+ disabled. Instead of disabling the data grid, the user agent may act as
+ if the <code title=dom-provider-getChildAtPosition><a
+ href="#getchildatposition">getChildAtPosition()</a></code> method was
+ not defined on the data provider (thus disabling sorting for that data
+ grid, but still letting the user interact with the data). If the method
+ is not defined, then the return value must be assumed to be the same as
+ the second argument (an indentity transform; the data is rendered in its
+ natural order).</p>
+
+ <dt>To establish what classes apply to a row
+
+ <dd>Invoke the <code title=dom-provider-getRowClasses><a
+ href="#getrowclasses">getRowClasses()</a></code> method with a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the row in question, and a <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> associated with an empty
+ string. The tokens contained in the <code><a
+ href="#domtokenlist0">DOMTokenList</a></code> object's underlying string
+ when the method returns represent the classes that apply to the row in
+ question. If the method is not defined, no classes apply to the row.
+
+ <dt>To establish whether a row is a data row or a special row
+
+ <dd>Examine the classes that apply to the row. If the <code
+ title=datagrid-row-class-header><a href="#header0">header</a></code>
+ class applies to the row, then it is not a data row, it is a subheading.
+ The data from the first cell of the row is the text of the subheading,
+ the rest of the cells must be ignored. Otherwise, if the <code
+ title=datagrid-row-class-separator><a
+ href="#separator">separator</a></code> class applies to the row, then in
+ the place of the row, a separator should be shown. Otherwise, if the
+ <code title=datagrid-row-class-selectable-separator><a
+ href="#selectable-separator">selectable-separator</a></code> class
+ applies to the row, then the row should be a data row, but represented as
+ a separator. (The difference between a <code
+ title=datagrid-row-class-separator><a
+ href="#separator">separator</a></code> and a <code
+ title=datagrid-row-class-selectable-separator><a
+ href="#selectable-separator">selectable-separator</a></code> is that the
+ former is not an item that can be actually selected, whereas the second
+ can be selected and thus has a context menu that applies to it, and so
+ forth.) For both kinds of separator rows, the data of the rows' cells
+ must all be ignored. If none of those three classes apply then the row is
+ a simple data row.
+
+ <dt id=rowType1>To establish whether a row is openable
+
+ <dd>Determine the number of child rows for that row. If there are one or
+ more child rows, then the row is openable.
+
+ <dt>To establish whether a row should be initially open or closed
+
+ <dd>If <a href="#rowType1">the row is openable</a>, examine the classes
+ that apply to the row. If the <code
+ title=datagrid-row-class-initially-open><a
+ href="#initially-open">initially-open</a></code> class applies to the
+ row, then it should be initially open. Otherwise, if the <code
+ title=datagrid-row-class-initially-closed><a
+ href="#initially-closed">initially-closed</a></code> class applies to the
+ row, then it must be initially closed. Otherwise, if neither class
+ applies to the row, or if the row is not openable, then the initial state
+ of the row is entirely up to the UA.</dd>
+ <!-- XXXPA
+ <dt>To establish whether a row is deletable</dt>
+
+ <dd>Check whether the <code
+ title="datagrid-row-class-deletable">deletable</code> class applies
+ to the row. If it does, the row is deletable, and interactive user
+ agents should provide a way for the user to request that the row be
+ deleted. (See the <code
+ title="dom-provider-performActionOnRow">performActionOnRow()</code>
+ method for more details.) Otherwise, the user agent should not
+ provide the user with a method for requesting that the row be
+ deleted.</dd>
+-->
+
+ <dt>To obtain a URI to an image representing a row
+
+ <dd>Invoke the <code title=dom-provider-getRowImage><a
+ href="#getrowimage">getRowImage()</a></code> method with a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the row in question. The return value is a string representing a URI (or
+ IRI) to an image. Relative URIs must be interpreted relative to the
+ <code><a href="#datagrid0">datagrid</a></code>'s base URI. If the method
+ returns the empty string, null, or if the method is not defined, then the
+ row has no associated image.
+
+ <dt>To obtain a context menu appropriate for a particular row
+
+ <dd>Invoke the <code title=dom-provider-getRowMenu><a
+ href="#getrowmenu">getRowMenu()</a></code> method with a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the row in question. The return value is a reference to an object
+ implementing the <code><a
+ href="#htmlmenuelement">HTMLMenuElement</a></code> interface, i.e. a
+ <code><a href="#menu">menu</a></code> element DOM node. (This element
+ must then be interpreted as described in the section on context menus to
+ obtain the actual context menu to use.<!-- XXXX update once menu section
+ works; with xrefs -->)
+ If the method returns something that is not an <code><a
+ href="#htmlmenuelement">HTMLMenuElement</a></code>, or if the method is
+ not defined, then the row has no associated context menu. User agents may
+ provide their own default context menu, and may add items to the
+ author-provided context menu. For example, such a menu could allow the
+ user to change the presentation of the <code><a
+ href="#datagrid0">datagrid</a></code> element.
+
+ <dt>To establish the value of a particular cell
+
+ <dd>Invoke the <code title=dom-provider-getCellData><a
+ href="#getcelldata">getCellData()</a></code> method with the first
+ argument being a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the row of the cell in question and the second argument being the index
+ of the cell's column. The second argument must be a non-negative integer
+ less than the total number of columns. The return value is the value of
+ the cell. If the return value is null or the empty string, or if the
+ method is not defined, then the cell has no data. (For progress bar
+ cells, the cell's value must be further interpreted, as described below.)
+
+ <dt>To establish what classes apply to a cell
+
+ <dd>Invoke the <code title=dom-provider-getCellClasses><a
+ href="#getcellclasses">getCellClasses()</a></code> method with the first
+ argument being a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the row of the cell in question, the second argument being the index of
+ the cell's column, and the third being an object implementing the
+ <code><a href="#domtokenlist0">DOMTokenList</a></code> interface,
+ associated with an empty string. The second argument must be a
+ non-negative integer less than the total number of columns. The tokens
+ contained in the <code><a href="#domtokenlist0">DOMTokenList</a></code>
+ object's underlying string when the method returns represent the classes
+ that apply to that cell. If the method is not defined, no classes apply
+ to the cell.
+
+ <dt id=cellType1>To establish how the type of a cell
+
+ <dd>Examine the classes that apply to the cell. If the <code
+ title=datagrid-cell-class-progress><a
+ href="#progress1">progress</a></code> class applies to the cell, it is a
+ progress bar. Otherwise, if the <code
+ title=datagrid-cell-class-cyclable><a
+ href="#cyclable">cyclable</a></code> class applies to the cell, it is a
+ cycling cell whose value can be cycled between multiple states.
+ Otherwise, none of these classes apply, and the cell is a simple text
+ cell.
+
+ <dt>To establish the value of a progress bar cell
+
+ <dd>If the value <var title="">x</var> of the cell is a string that can be
+ <a href="#rules1" title="rules for parsing floating point number
+ values">converted to a floating-point number</a> in the range
+ 0.0&nbsp;&le;&nbsp;<var title="">x</var>&nbsp;&le;&nbsp;1.0, then the
+ progress bar has that value (0.0 means no progress, 1.0 means complete).
+ Otherwise, the progress bar is an indeterminate progress bar.
+
+ <dt id=cellType2>To establish how a simple text cell should be presented
+
+ <dd>Check whether one of the <code title=datagrid-cell-class-checked><a
+ href="#checked">checked</a></code>, <code
+ title=datagrid-cell-class-unchecked><a
+ href="#unchecked">unchecked</a></code>, or <code
+ title=datagrid-cell-class-indeterminate><a
+ href="#indeterminate">indeterminate</a></code> classes applies to the
+ cell. If any of these are present, then the cell has a checkbox,
+ otherwise none are present and the cell does not have a checkbox. If the
+ cell has no checkbox, check whether the <code
+ title=datagrid-cell-class-editable><a
+ href="#editable">editable</a></code> class applies to the cell. If it
+ does, then the cell value is editable, otherwise the cell value is
+ static.
+
+ <dt>To establish the state of a cell's checkbox, if it has one
+
+ <dd>Check whether the <code title=datagrid-cell-class-checked><a
+ href="#checked">checked</a></code> class applies to the cell. If it does,
+ the cell is checked. Otherwise, check whether the <code
+ title=datagrid-cell-class-unchecked><a
+ href="#unchecked">unchecked</a></code> class applies to the cell. If it
+ does, the cell is unchecked. Otherwise, the <code
+ title=datagrid-cell-class-indeterminate><a
+ href="#indeterminate">indeterminate</a></code> class appplies to the cell
+ and the cell's checkbox is in an indeterminate state. When the <code
+ title=datagrid-cell-class-indeterminate><a
+ href="#indeterminate">indeterminate</a></code> class appplies to the
+ cell, the checkbox is a tristate checkbox, and the user can set it to the
+ indeterminate state. Otherwise, only the <code
+ title=datagrid-cell-class-checked><a href="#checked">checked</a></code>
+ and/or <code title=datagrid-cell-class-unchecked><a
+ href="#unchecked">unchecked</a></code> classes apply to the cell, and the
+ cell can only be toggled betwen those two states.
+ </dl>
+
+ <p>If the data provider ever raises an exception while the <code><a
+ href="#datagrid0">datagrid</a></code> is invoking one of its methods, the
+ <code><a href="#datagrid0">datagrid</a></code> must act, for the purposes
+ of that particular method call, as if the relevant method had not been
+ defined.
+
+ <p>A <code><a href="#rowspecification">RowSpecification</a></code> object
+ <var title="">p</var> with <var title="">n</var> path components passed to
+ a method of the data provider must fulfill the constraint
+ <span>0&nbsp;&le;&nbsp;<var title="">p<sub title=""><var
+ title="">i</var></sub></var>&nbsp;&lt;&nbsp;<var title="">m</var>-1</span>
+ for all integer values of <var title="">i</var> in the range
+ <span>0&nbsp;&le;&nbsp;<var title="">i</var>&nbsp;&lt;&nbsp;<var
+ title="">n</var>-1</span>, where <var title="">m</var> is the value that
+ was last returned by the <code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount()</a></code> method when it was passed the
+ <code><a href="#rowspecification">RowSpecification</a></code> object <var
+ title="">q</var> with <span><var title="">i</var>-1</span> items, where
+ <span><var title="">p<sub title=""><var
+ title="">i</var></sub></var>&nbsp;=&nbsp;<var title="">q<sub title=""><var
+ title="">i</var></sub></var></span> for all integer values of <var
+ title="">i</var> in the range <span>0&nbsp;&le;&nbsp;<var
+ title="">i</var>&nbsp;&lt;&nbsp;<var title="">n</var>-1</span>, with any
+ changes implied by the update methods taken into account.
+
+ <p id=inconsistentDataProvider>The data model is considered stable: user
+ agents may assume that subsequent calls to the data provider methods will
+ return the same data, until one of the update methods is called on the
+ <code><a href="#datagrid0">datagrid</a></code> element. If a user agent is
+ returned inconsistent data, for example if the number of rows returned by
+ <code title=dom-provider-getRowCount><a
+ href="#getrowcount">getRowCount()</a></code> varies in ways that do not
+ match the calls made to the update methods, the user agent may disable the
+ <code><a href="#datagrid0">datagrid</a></code>. User agents that do not
+ disable the <code><a href="#datagrid0">datagrid</a></code> in inconsistent
+ cases must honour the most recently returned values.
+
+ <p>User agents may cache returned values so that the data provider is never
+ asked for data that could contradict earlier data. User agents must not
+ cache the return value of the <code title=dom-provider-getRowMenu><a
+ href="#getrowmenu">getRowMenu</a></code> method.
+
+ <p>The exact algorithm used to populate the data grid is not defined here,
+ since it will differ based on the presentation used. However, the
+ behaviour of user agents must be consistent with the descriptions above.
+ For example, it would be non-conformant for a user agent to make cells
+ have both a checkbox and be editable, as the descriptions above state that
+ cells that have a checkbox cannot be edited.</p>
+ <!-- XXX speaking of which, do we actually want that
+ limitation? -->
+
+ <h5 id=updating><span class=secno>3.18.2.6. </span>Updating the <code><a
+ href="#datagrid0">datagrid</a></code></h5>
+
+ <p>Whenever the <code title=dom-datagrid-data><a
+ href="#data2">data</a></code> attribute is set to a new value, the
+ <code><a href="#datagrid0">datagrid</a></code> must clear the current
+ selection, remove all the displayed rows, and plan to repopulate itself
+ using the information from the new data provider at the earliest
+ opportunity.
+
+ <p>There are a number of update methods that can be invoked on the <code><a
+ href="#datagrid0">datagrid</a></code> element to cause it to refresh
+ itself in slightly less drastic ways:
+
+ <p>When the <dfn id=updateeverything
+ title=dom-datagrid-updateEverything><code>updateEverything()</code></dfn>
+ method is called, the user agent must repopulate the entire <code><a
+ href="#datagrid0">datagrid</a></code>. If the number of rows decreased,
+ the selection must be updated appropriately. If the number of rows
+ increased, the new rows should be left unselected.
+
+ <p>When the <dfn id=updaterowschanged
+ title=dom-datagrid-updateRowsChanged><code>updateRowsChanged(<var
+ title="">row</var>, <var title="">count</var>)</code></dfn> method is
+ called, the user agent must refresh the rendering of the rows starting
+ from the row specified by <var title="">row</var>, and including the <var
+ title="">count</var> next siblings of the row (or as many next siblings as
+ it has, if that is less than <var title="">count</var>), including all
+ descendant rows.
+
+ <p>When the <dfn id=updaterowsinserted
+ title=dom-datagrid-updateRowsInserted><code>updateRowsInserted(<var
+ title="">row</var>, <var title="">count</var>)</code></dfn> method is
+ called, the user agent must assume that <var title="">count</var> new rows
+ have been inserted, such that the first new row is indentified by <var
+ title="">row</var>. The user agent must update its rendering and the
+ selection accordingly. The new rows should not be selected.
+
+ <p>When the <dfn id=updaterowsremoved
+ title=dom-datagrid-updateRowsRemoved><code>updateRowsRemoved(<var
+ title="">row</var>, <var title="">count</var>)</code></dfn> method is
+ called, the user agent must assume that <var title="">count</var> rows
+ have been removed starting from the row that used to be identifier by <var
+ title="">row</var>. The user agent must update its rendering and the
+ selection accordingly.
+
+ <p>The <dfn id=updaterowchanged
+ title=dom-datagrid-updateRowChanged><code>updateRowChanged(<var
+ title="">row</var>)</code></dfn> method must be exactly equivalent to
+ calling <code title=dom-datagrid-updateRowsChanged><a
+ href="#updaterowschanged">updateRowsChanged(<var title="">row</var>,
+ 1)</a></code>.
+
+ <p>When the <dfn id=updatecolumnchanged
+ title=dom-datagrid-updateColumnChanged><code>updateColumnChanged(<var
+ title="">column</var>)</code></dfn> method is called, the user agent must
+ refresh the rendering of the specified column <var title="">column</var>,
+ for all rows.
+
+ <p>When the <dfn id=updatecellchanged
+ title=dom-datagrid-updateCellChanged><code>updateCellChanged(<var
+ title="">row</var>, <var title="">column</var>)</code></dfn> method is
+ called, the user agent must refresh the rendering of the cell on row <var
+ title="">row</var>, in column <var title="">column</var>.
+
+ <p>Any effects the update methods have on the <code><a
+ href="#datagrid0">datagrid</a></code>'s selection is not considered a
+ change to the selection, and must therefore not fire the <code
+ title=event-select><a href="#select">select</a></code> event.
+
+ <p>These update methods should only be called by the data provider, or code
+ acting on behalf of the data provider. In particular, calling the <code
+ title=dom-datagrid-updateRowsInserted><a
+ href="#updaterowsinserted">updateRowsInserted()</a></code> and <code
+ title=dom-datagrid-updateRowsRemoved><a
+ href="#updaterowsremoved">updateRowsRemoved()</a></code> methods without
+ actually inserting or removing rows from the data provider is <a
+ href="#inconsistentDataProvider">likely to result in inconsistent
+ renderings</a>, and the user agent is likely to disable the data grid.
+
+ <h5 id=requirements><span class=secno>3.18.2.7. </span>Requirements for
+ interactive user agents</h5>
+
+ <p><em>This section only applies to interactive user agents.</em>
+
+ <p>If the <code><a href="#datagrid0">datagrid</a></code> element has a <dfn
+ id=disabled3 title=attr-datagrid-disabled><code>disabled</code></dfn>
+ attribute, then the user agent must disable the <code><a
+ href="#datagrid0">datagrid</a></code>, preventing the user from
+ interacting with it. The <code><a href="#datagrid0">datagrid</a></code>
+ element should still continue to update itself when the data provider
+ signals changes to the data, though. Obviously, conformance requirements
+ stating that <code><a href="#datagrid0">datagrid</a></code> elements must
+ react to users in particular ways do not apply when one is disabled.
+
+ <p>If <a href="#rowType1">a row is openable</a>, then the user should be
+ able to toggle its open/closed state. When a row's open/closed state
+ changes, the user agent must update the rendering to match the new state.
+
+ <p>If a cell is a cell whose value <a href="#cellType1">can be cycled
+ between multiple states</a>, then the user must be able to activate the
+ cell to cycle its value. When the user activates this "cycling" behaviour
+ of a cell, then the <code><a href="#datagrid0">datagrid</a></code> must
+ invoke the data provider's <code title=dom-provider-cycleCell><a
+ href="#cyclecell">cycleCell()</a></code> method, with a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the cell's row as the first argument and the cell's column index as the
+ second. The <code><a href="#datagrid0">datagrid</a></code> must act as if
+ the <code><a href="#datagrid0">datagrid</a></code>'s <code
+ title=dom-datagrid-updateCellChanged><a
+ href="#updatecellchanged">updateCellChanged()</a></code> method had been
+ invoked with those same arguments immediately before the provider's method
+ was invoked.
+
+ <p>When a cell <a href="#cellType2">has a checkbox</a>, the user must be
+ able to set the checkbox's state. When the user changes the state of a
+ checkbox in such a cell, the <code><a
+ href="#datagrid0">datagrid</a></code> must invoke the data provider's
+ <code title=dom-provider-setCellCheckedState><a
+ href="#setcellcheckedstate">setCellCheckedState()</a></code> method, with
+ a <code><a href="#rowspecification">RowSpecification</a></code> object
+ representing the cell's row as the first argument, the cell's column index
+ as the second, and the checkbox's new state as the third. The state should
+ be represented by the number 1 if the new state is checked, 0 if the new
+ state is unchecked, and -1 if the new state is indeterminate (which must
+ only be possible if the cell has the <code
+ title=datagrid-cell-class-indeterminate><a
+ href="#indeterminate">indeterminate</a></code> class set). The <code><a
+ href="#datagrid0">datagrid</a></code> must act as if the <code><a
+ href="#datagrid0">datagrid</a></code>'s <code
+ title=dom-datagrid-updateCellChanged><a
+ href="#updatecellchanged">updateCellChanged()</a></code> method had been
+ invoked, specifying the same cell, immediately before the provider's
+ method was invoked.
+
+ <p>If a cell <a href="#cellType2">is editable</a>, the user must be able to
+ edit the data for that cell, and doing so must cause the user agent to
+ invoke the <code title=dom-provider-editCell><a
+ href="#editcell">editCell()</a></code> method of the data provider with
+ three arguments: a <code><a
+ href="#rowspecification">RowSpecification</a></code> object representing
+ the cell's row, the cell's column's index, and the new text entered by the
+ user. The user agent must act as if the <code
+ title=dom-datagrid-updateCellChanged><a
+ href="#updatecellchanged">updateCellChanged()</a></code> method had been
+ invoked, with the same row and column specified, immediately before the
+ provider's method was invoked.</p>
+ <!-- XXXPA <p class="big-issue">define actions (performAction(), etc)</p> -->
+
+ <h5 id=the-selection><span class=secno>3.18.2.8. </span>The selection</h5>
+
+ <p><em>This section only applies to interactive user agents. For other user
+ agents, the <code title=dom-datagrid-selection><a
+ href="#selection0">selection</a></code> attribute must return null.</em>
+
+ <pre
+ class=idl>interface <dfn id=datagridselection>DataGridSelection</dfn> {
+ readonly attribute unsigned long <span title=dom-DataGridSelection-count>length</span>;
+ <a href="#rowspecification">RowSpecification</a> <span title=dom-DataGridSelection->item</span>(in unsigned long index);
+ boolean <a href="#isselected" title=dom-DataGridSelection-isSelected>isSelected</a>(in <a href="#rowspecification">RowSpecification</a> row);
+ void <a href="#setselected" title=dom-DataGridSelection-setSelected>setSelected</a>(in <a href="#rowspecification">RowSpecification</a> row, in boolean selected);
+<!-- void <span title="dom-DataGridSelection-addRange">addRange</span>(in <span>RowSpecification</span> first, in <span>RowSpecification</span> last);
+ void <span title="dom-DataGridSelection-removeRange">removeRange</span>(in <span>RowSpecification</span> first, in <span>RowSpecification</span> last);
+XXX selection ranges -->
+ void <a href="#selectall" title=dom-DataGridSelection-selectAll>selectAll</a>();
+ void <a href="#invert" title=dom-DataGridSelection-invert>invert</a>();
+ void <a href="#clear" title=dom-DataGridSelection-clear>clear</a>();
+};</pre>
+
+ <p>Each <code><a href="#datagrid0">datagrid</a></code> element must keep
+ track of which rows are currently selected. Initially no rows are
+ selected, but this can be changed via the methods described in this
+ section. <!--XXX
+ select-provider-selection The default data provider, for instance,
+ changes which rows are selected when it is first initialised.-->
+
+ <p>The selection of a <code><a href="#datagrid0">datagrid</a></code> is
+ represented by its <dfn id=selection0
+ title=dom-datagrid-selection><code>selection</code></dfn> DOM attribute,
+ which must be a <code><a
+ href="#datagridselection">DataGridSelection</a></code> object.
+
+ <p><code><a href="#datagridselection">DataGridSelection</a></code> objects
+ represent the rows in the selection. In the selection the rows must be
+ ordered in the natural order of the data provider (and not, e.g., the
+ rendered order). Rows that are not rendered because one of their ancestors
+ is closed must share the same selection state as their nearest rendered
+ ancestor. Such rows are not considered part of the selection for the
+ purposes of iterating over the selection.
+
+ <p class=note>This selection API doesn't allow for hidden rows to be
+ selected because it is trivial to create a data provider that has infinite
+ depth, which would then require the selection to be infinite if every row,
+ including every hidden row, was selected.
+
+ <p>The <dfn id=length4
+ title=dom-DataGridSelection-length><code>length</code></dfn> attribute
+ must return the number of rows currently present in the selection. The
+ <dfn id=itemindex3 title=dom-DataGridSelection-item><code>item(<var
+ title="">index</var>)</code></dfn> method must return the <var
+ title="">index</var>th row in the selection. If the argument is out of
+ range (less than zero or greater than the number of selected rows minus
+ one), then it must raise an <code>INDEX_SIZE_ERR</code> exception. <a
+ href="#refsDOM3CORE">[DOM3CORE]</a>
+
+ <p>The <dfn id=isselected
+ title=dom-DataGridSelection-isSelected><code>isSelected()</code></dfn>
+ method must return the selected state of the row specified by its
+ argument. If the specified row exists and is selected, it must return
+ true, otherwise it must return false.
+
+ <p>The <dfn id=setselected
+ title=dom-DataGridSelection-setSelected><code>setSelected()</code></dfn>
+ method takes two arguments, <var title="">row</var> and <var
+ title="">selected</var>. When invoked, it must set the selection state of
+ row <var title="">row</var> to selected if <var title="">selected</var> is
+ true, and unselected if it is false. If <var title="">row</var> is not a
+ row in the data grid, the method must raise an <code>INDEX_SIZE_ERR</code>
+ exception. If the specified row is not rendered because one of its
+ ancestors is closed, the method must do nothing.
+
+ <p>The <dfn id=selectall
+ title=dom-DataGridSelection-selectAll><code>selectAll()</code></dfn>
+ method must mark all the rows in the data grid as selected. After a call
+ to <code title=dom-DataGridSelection-selectAll><a
+ href="#selectall">selectAll()</a></code>, the <code
+ title=dom-DataGridSelection-length><a href="#length4">length</a></code>
+ attribute will return the number of rows in the data grid, not counting
+ children of closed rows.
+
+ <p>The <dfn id=invert
+ title=dom-DataGridSelection-invert><code>invert()</code></dfn> method must
+ cause all the rows in the selection that were marked as selected to now be
+ marked as not selected, and vice versa.
+
+ <p>The <dfn id=clear
+ title=dom-DataGridSelection-clear><code>clear()</code></dfn> method must
+ mark all the rows in the data grid to be marked as not selected. After a
+ call to <code title=dom-DataGridSelection-clear><a
+ href="#clear">clear()</a></code>, the <code
+ title=dom-DataGridSelection-length><a href="#length4">length</a></code>
+ attribute will return zero.
+
+ <p>If the <code><a href="#datagrid0">datagrid</a></code> element has a <dfn
+ id=multiple0 title=attr-datagrid-multiple><code>multiple</code></dfn>
+ attribute, then the user must be able to select any number of rows (zero
+ or more). If the attribute is not present, then the user must only be able
+ to select a single row at a time, and selecting another one must unselect
+ all the other rows.
+
+ <p class=note>This only applies to the user. Scripts can select multiple
+ rows even when the <code title=attr-datagrid-multiple><a
+ href="#multiple0">multiple</a></code> attribute is absent.
+
+ <p>Whenever the selection of a <code><a
+ href="#datagrid0">datagrid</a></code> changes, whether due to the user
+ interacting with the element, or as a result of calls to methods of the
+ <code title=dom-datagrid-selection><a
+ href="#selection0">selection</a></code> object, a <dfn id=select
+ title=event-select><code>select</code></dfn><!-- XXX check if we
+ really should be DFNing this here. It's a DOM3 Core event. What's
+ our story going to be regarding events and defining them? -->
+ event that bubbles but is not cancelable must be fired on the <code><a
+ href="#datagrid0">datagrid</a></code> element. If changes are made to the
+ selection via calls to the object's methods during the execution of a
+ script<!-- XXX should xref to a better explanation -->, then the <code
+ title=event-select><a href="#select">select</a></code> events must be
+ coalesced into one, which must then be fired<!--XXX xref again--> when the
+ script execution has completed<!-- XXX xref -->.
+
+ <p class=note>The <code><a
+ href="#datagridselection">DataGridSelection</a></code> interface has no
+ relation to the <code><a href="#selection1">Selection</a></code>
+ interface.
+
+ <h5 id=columns><span class=secno>3.18.2.9. </span>Columns and captions</h5>
+
+ <p><em>This section only applies to interactive user agents.</em>
+
+ <p>Each <code><a href="#datagrid0">datagrid</a></code> element must keep
+ track of which columns are currently being rendered. User agents should
+ initially show all the columns except those with the <code
+ title=datagrid-column-class-initially-hidden><a
+ href="#initially-hidden">initially-hidden</a></code> class, but may allow
+ users to hide or show columns. User agents should initially display the
+ columns in the order given by the data provider, but may allow this order
+ to be changed by the user.
+
+ <p>If columns are not being used, as might be the case if the data grid is
+ being presented in an icon view, or if an overview of data is being read
+ in an aural context, then the text of the first column of each row should
+ be used to represent the row.
+
+ <p>If none of the columns have any captions (i.e. if the data provider does
+ not provide a <code title=dom-provider-getCaptionText><a
+ href="#getcaptiontext">getCaptionText()</a></code> method), then user
+ agents may avoid showing the column headers at all. This may prevent the
+ user from performing actions on the columns (such as reordering them,
+ changing the sort column, and so on).
+
+ <p class=note>Whatever the order used for rendering, and irrespective of
+ what columns are being shown or hidden, the "first column" as referred to
+ in this specification is always the column with index zero, and the "last
+ column" is always the column with the index one less than the value
+ returned by the <code title=dom-provider-getcolumnCount><a
+ href="#getcolumncount">getColumnCount()</a></code> method of the data
+ provider.
+
+ <p>If <a href="#columnType2">a column is sortable</a>, then the user must
+ be able to invoke it to sort the data. When the user does so, then the
+ <code><a href="#datagrid0">datagrid</a></code> must invoke the data
+ provider's <code title=dom-provider-toggleColumnSortState><a
+ href="#togglecolumnsortstate">toggleColumnSortState()</a></code> method,
+ with the column's index as the only argument. The <code><a
+ href="#datagrid0">datagrid</a></code> must <em>then</em> act as if the
+ <code><a href="#datagrid0">datagrid</a></code>'s <code
+ title=dom-datagrid-updateEverything><a
+ href="#updateeverything">updateEverything()</a></code> method had been
+ invoked.</p>
+ <!--XXXDND
+ <h5>Drag and drop in <code>datagrid</code>s</h5>
+
+ <p><em>This section only applies to interactive user agents.</p>
+
+ <p class="big-issue">define drag and drop in datagrids; selectiondraggable, etc.</p>
+-->
+
+ <h4 id=the-command><span class=secno>3.18.3. </span>The <dfn
+ id=command0><code>command</code></dfn> element</h4>
+
+ <p><a href="#metadata" title="metadata elements">Metadata element</a>, and
+ <a href="#strictly">strictly inline-level content</a>.</p>
+ <!-- XXX we sure we
+ want it to be metadata? -->
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>In a <code><a href="#head">head</a></code> element.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Empty.
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-command-type><a href="#type13">type</a></code>
+
+ <dd><code title=attr-command-label><a href="#label">label</a></code>
+
+ <dd><code title=attr-command-icon><a href="#icon">icon</a></code>
+
+ <dd><code title=attr-command-hidden><a href="#hidden">hidden</a></code>
+
+ <dd><code title=attr-command-disabled><a
+ href="#disabled4">disabled</a></code>
+
+ <dd><code title=attr-command-checked><a
+ href="#checked0">checked</a></code>
+
+ <dd><code title=attr-command-radiogroup><a
+ href="#radiogroup">radiogroup</a></code>
+
+ <dd><code title=attr-command-default><a
+ href="#default1">default</a></code>
+
+ <dd>Also, the <code title=attr-command-title><a
+ href="#title6">title</a></code> attribute has special semantics on this
+ element.
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlcommandelement>HTMLCommandElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <a href="#type14" title=dom-command-type>type</a>;
+ attribute DOMString <a href="#label0" title=dom-command-label>label</a>;
+ attribute DOMString <a href="#icon0" title=dom-command-icon>icon</a>;
+ attribute boolean <a href="#hidden0" title=dom-command-hidden>hidden</a>;
+ attribute boolean <a href="#disabled5" title=dom-command-disabled>disabled</a>;
+ attribute boolean <a href="#checked1" title=dom-command-checked>checked</a>;
+ attribute DOMString <a href="#radiogroup0" title=dom-command-radiogroup>radiogroup</a>;
+ attribute boolean <a href="#default2" title=dom-command-default>default</a>;
+ void <a href="#click0" title=dom-command-click>click</a>(); // shadows <code><a href="#htmlelement">HTMLElement</a></code>.<code title=dom-click><a href="#click">click()</a></code>
+};</pre>
+
+ <p>The <code title=command-ro><a href="#command2">Command</a></code>
+ interface must also be implemented by this element.</p>
+ </dl>
+
+ <p>The <code><a href="#command0">command</a></code> element represents a
+ command that the user can invoke.
+
+ <p>The <dfn id=type13 title=attr-command-type><code>type</code></dfn>
+ attribute indicates the kind of command: either a normal command with an
+ associated action, or a state or option that can be toggled, or a
+ selection of one item from a list of items.
+
+ <p>The attribute's value must be either "<code title="">command</code>",
+ "<code title="">checkbox</code>", or "<code title="">radio</code>",
+ denoting each of these three types of commands respectively. The attribute
+ may also be omitted if the element is to represent the first of these
+ types, a simple command.
+
+ <p>The <dfn id=label title=attr-command-label><code>label</code></dfn>
+ attribute gives the name of the command, as shown to the user.
+
+ <p>The <dfn id=title6 title=attr-command-title><code>title</code></dfn>
+ attribute gives a hint describing the command, which might be shown to the
+ user to help him.
+
+ <p>The <dfn id=icon title=attr-command-icon><code>icon</code></dfn>
+ attribute gives a picture that represents the command. If the attribute is
+ specified, the attribute's value must contain a URI (or IRI).
+
+ <p>The <dfn id=hidden title=attr-command-hidden><code>hidden</code></dfn>
+ attribute is a <a href="#boolean0">boolean attribute</a> that, if present,
+ indicates that the command is not relevant and is to be hidden.
+
+ <p>The <dfn id=disabled4
+ title=attr-command-disabled><code>disabled</code></dfn> attribute is a <a
+ href="#boolean0">boolean attribute</a> that, if present, indicates that
+ the command is not available in the current state.
+
+ <p class=note>The distinction between <a href="#disabled6"
+ title=command-facet-DisabledState>Disabled State</a> and <a
+ href="#hidden1" title=command-facet-HiddenState>Hidden State</a> is
+ subtle. A command should be Disabled if, in the same context, it could be
+ enabled if only certain aspects of the situation were changed. A command
+ should be marked as Hidden if, in that situation, the command will never
+ be enabled. For example, in the context menu for a water faucet, the
+ command "open" might be Disabled if the faucet is already open, but the
+ command "eat" would be marked Hidden since the faucet could never be
+ eaten.
+
+ <p>The <dfn id=checked0
+ title=attr-command-checked><code>checked</code></dfn> attribute is a <a
+ href="#boolean0">boolean attribute</a> that, if present, indicates that
+ the command is selected.
+
+ <p>The <dfn id=radiogroup
+ title=attr-command-radiogroup><code>radiogroup</code></dfn> attribute
+ gives the name of the group of commands that will be toggled when the
+ command itself is toggled, for commands whose <code
+ title=attr-command-type><a href="#type13">type</a></code> attribute has
+ the value "<code title="">radio</code>". The scope of the name is the
+ child list of the parent element.
+
+ <p>If the <code><a href="#command0">command</a></code> element is used when
+ <span title="menu generation">generating</span> a <span>context
+ menu</span>, then the <dfn id=default1
+ title=attr-command-default><code>default</code></dfn> attribute indicates,
+ if present, that the command is the one that would have been invoked if
+ the user had directly activated the menu's subject instead of using its
+ context menu. The <code title=attr-command-default><a
+ href="#default1">default</a></code> attribute is a <a
+ href="#boolean0">boolean attribute</a>.
+
+ <div class=example>
+ <p class=big-issue>Need an example that shows an element that, if
+ double-clicked, invokes an action, but that also has a context menu,
+ showing the various <code><a href="#command0">command</a></code>
+ attributes off, and that has a default command.</p>
+ </div>
+
+ <p>The <dfn id=type14 title=dom-command-type><code>type</code></dfn>, <dfn
+ id=label0 title=dom-command-label><code>label</code></dfn>, <dfn id=icon0
+ title=dom-command-icon><code>icon</code></dfn>, <dfn id=hidden0
+ title=dom-command-hidden><code>hidden</code></dfn>, <dfn id=disabled5
+ title=dom-command-disabled><code>disabled</code></dfn>, <dfn id=checked1
+ title=dom-command-checked><code>checked</code></dfn>, <dfn id=radiogroup0
+ title=dom-command-radiogroup><code>radiogroup</code></dfn>, and <dfn
+ id=default2 title=dom-command-default><code>default</code></dfn> DOM
+ attributes must <a href="#reflect">reflect</a> their respective namesake
+ content attributes.
+
+ <p>The <dfn id=click0 title=dom-command-click><code>click()</code></dfn>
+ method's behaviour depends on the value of the <code
+ title=attr-command-type><a href="#type13">type</a></code> attribute of the
+ element, as follows:
+
+ <dl class=switch>
+ <dt>If the <code title=attr-command-type><a href="#type13">type</a></code>
+ attribute has the value <code title="">checkbox</code>
+
+ <dd>
+ <p>If the element has a <code title=attr-command-checked><a
+ href="#checked0">checked</a></code> attribute, the UA must remove that
+ attribute. Otherwise, the UA must add a <code
+ title=attr-command-checked><a href="#checked0">checked</a></code>
+ attribute, with the literal value <code title="">checked</code>. The UA
+ must then <a href="#firing">fire a <code title="">click</code> event</a>
+ at the element.
+
+ <dt>If the <code title=attr-command-type><a href="#type13">type</a></code>
+ attribute has the value <code title="">radio</code>
+
+ <dd>
+ <p>If the element has a parent, then the UA must walk the list of child
+ nodes of that parent element, and for each node that is a <code><a
+ href="#command0">command</a></code> element, if that element has a <code
+ title=attr-command-radiogroup><a
+ href="#radiogroup">radiogroup</a></code> attribute whose value exactly
+ matches the current element's (treating missing <code
+ title=attr-command-radiogroup><a
+ href="#radiogroup">radiogroup</a></code> attributes as if they were the
+ empty string), and has a <code title=attr-command-checked><a
+ href="#checked0">checked</a></code> attribute, must remove that
+ attribute and <a href="#firing">fire a <code title="">click</code>
+ event</a> at the element.</p>
+
+ <p>Then, the element's <code title=attr-command-checked><a
+ href="#checked0">checked</a></code> attribute attribute must be set to
+ the literal value <code title="">checked</code> and a <span title="file
+ a click event"><code title="">click</code> event must be fired</span> at
+ the element.
+
+ <dt>Otherwise
+
+ <dd>
+ <p>The UA must <a href="#firing">fire a <code title="">click</code>
+ event</a> at the element.
+ </dl>
+
+ <p class=note>Firing a synthetic <code title=event-click>click</code> event
+ at the element does not cause any of the actions described above to
+ happen.
+
+ <p class=big-issue> should change all the above so it actually is just
+ trigged by a click event, then we could remove the shadowing click()
+ method and rely on actual events.
+
+ <p class=big-issue>Need to define the command="" attribute
+
+ <p class=note><code><a href="#command0">command</a></code> elements are not
+ rendered unless they <a href="#menu" title=menu>form part of a menu</a>.
+
+ <h4 id=menus><span class=secno>3.18.4. </span>The <dfn
+ id=menu><code>menu</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>, and <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dd>Where <a href="#structured">structured inline-level elements</a> are
+ allowed.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#li">li</a></code> elements, or <a
+ href="#inline-level0">inline-level content</a> (but not both).
+
+ <dt>Element-specific attributes:
+
+ <dd><code title=attr-menu-type><a href="#type15">type</a></code>
+
+ <dd><code title=attr-menu-label><a href="#label1">label</a></code>
+
+ <dd><code title=attr-menu-autosubmit><a
+ href="#autosubmit">autosubmit</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlmenuelement>HTMLMenuElement</dfn> : <a href="#htmlelement">HTMLElement</a> {
+ attribute DOMString <span title=dom-menu-type>type</span>;
+ attribute DOMString <span title=dom-menu-label>label</span>;
+ attribute boolean <span title=dom-menu-autosubmit>autosubmit</span>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#menu">menu</a></code> element represents a list of
+ commands.
+
+ <p>The <dfn id=type15 title=attr-menu-type><code>type</code></dfn>
+ attribute is an <a href="#enumerated">enumerated attribute</a> indicating
+ the kind of menu being declared. The attribute has three states. The <code
+ title=attr-menu-type-context>context</code> keyword maps to the <dfn
+ id=context1 title="context menu state">context menu</dfn> state, in which
+ the element is declaring a context menu. The <code
+ title=attr-menu-type-toolbar>toolbar</code> keyword maps to the <dfn
+ id=tool-bar title="tool bar state">tool bar</dfn> state, in which the
+ element is declaraing a tool bar. The attribute may also be omitted. The
+ <i>missing value default</i> is the <dfn id=list title="list
+ state">list</dfn> state, which indicates that the element is merely a list
+ of commands that is neither declaring a context menu nor defining a tool
+ bar.
+
+ <p>If a <code><a href="#menu">menu</a></code> element's <code
+ title=attr-menu-type><a href="#type15">type</a></code> attribute is in the
+ <a href="#context1" title="context menu state">context menu</a> state,
+ then the element represents the commands of a context menu, and the user
+ can only interact with the commands if that context menu is activated.
+
+ <p>If a <code><a href="#menu">menu</a></code> element's <code
+ title=attr-menu-type><a href="#type15">type</a></code> attribute is in the
+ <a href="#tool-bar" title="tool bar state">tool bar</a> state, then the
+ element represents a list of active commands that the user can immediately
+ interact with.
+
+ <p>If a <code><a href="#menu">menu</a></code> element's <code
+ title=attr-menu-type><a href="#type15">type</a></code> attribute is in the
+ <a href="#list" title="list state">list state, then the element either
+ represents an unordered list of items (each represented by an
+ <code>li</code> element), each of which represents a command that the user
+ may perform or activate, or, if the element has no <code>li</code> element
+ children, a <span>paragraph</span> describing available commands.</a>
+
+ <p>The <dfn id=label1 title=attr-menu-label><code>label</code></dfn>
+ attribute gives the label of the menu. It is used by user agents to
+ display nested menus in the UI. For example, a context menu containing
+ another menu would use the nested menu's <code title=attr-menu-label><a
+ href="#label1">label</a></code> attribute for the submenu's menu label.
+
+ <p>The <dfn id=autosubmit
+ title=attr-menu-autosubmit><code>autosubmit</code></dfn> attribute is a <a
+ href="#boolean0">boolean attribute</a> that, if present, indicates that
+ selections made to form controls in this menu are to result in the
+ control's form being immediately submitted.
+
+ <p>If a <code title=event-change>change</code> event bubbles through a
+ <code><a href="#menu">menu</a></code> element, then, in addition to any
+ other default action that that event might have, the UA must act as if the
+ following was an additional default action for that event: if (when it
+ comes time to execute the default action) the <code><a
+ href="#menu">menu</a></code> element has an <code
+ title=attr-menu-autosubmit><a href="#autosubmit">autosubmit</a></code>
+ attribute, and the target of the event is an <code>input</code> element,
+ and that element has a <code title=attr-input-type>type</code> attribute
+ whose value is either <code title="">radio</code> or <code
+ title="">checkbox</code>, and the <code>input</code> element in question
+ has a non-null <code title=dom-input-form>form</code> DOM attribute, then
+ the UA must invoke the <code title=dom-form-submit>submit()</code> method
+ of the <code>form</code> element indicated by that DOM attribute.
+
+ <h5 id=menus-intro><span class=secno>3.18.4.1. </span>Introduction</h5>
+
+ <p><em>This section is non-normative.</em>
+
+ <p class=big-issue>...</p>
+ <!--
+
+
+ <pre>&lt;menu type="commands"&gt;
+ &lt;li&gt;
+ &lt;menu label="File"&gt;
+ &lt;button type="button" onclick="fnew()"&gt;New...&lt;/button&gt;
+ &lt;button type="button" onclick="fopen()"&gt;Open...&lt;/button&gt;
+ &lt;button type="button" onclick="fsave()" id="save"&gt;Save&lt;/button&gt;
+ &lt;button type="button" onclick="fsaveas()"&gt;Save as...&lt;/button&gt;
+ &lt;/menu&gt;
+ &lt;/li&gt;
+ &lt;li&gt;
+ &lt;menu label="Edit"&gt;
+ &lt;button type="button" onclick="ecopy()"&gt;Copy&lt;/button&gt;
+ &lt;button type="button" onclick="ecut()"&gt;Cut&lt;/button&gt;
+ &lt;button type="button" onclick="epaste()"&gt;Paste&lt;/button&gt;
+ &lt;/menu&gt;
+ &lt;/li&gt;
+ &lt;li&gt;
+ &lt;menu label="Help"&gt;
+ &lt;li&gt;&lt;a href="help.html"&gt;Help&lt;/a&gt;&lt;/li&gt;
+ &lt;li&gt;&lt;a href="about.html"&gt;About&lt;/a&gt;&lt;/li&gt;
+ &lt;/menu&gt;
+ &lt;/li&gt;
+&lt;/menubar&gt;
+
+...
+
+&lt;input command="save"/&gt; &lt;!- - This will act exactly like the
+ Save button above, including reflecting
+ its <code>disabled</code> state dynamically - -&gt;
+
+</pre>
+
+ <p>Here's some markup that falls back on the traditional abuse of
+ the <code>select</code> element as a navigation menu, but which is
+ implemented as a semi-correct menu using the new techniques of this
+ document:</p>
+
+<pre>&lt;form action="redirect.cgi"&gt;
+ &lt;menu type="commands"&gt;
+ &lt;label for="goto"&gt;Go to...&lt;/label&gt;
+ &lt;menu label="Go"&gt;
+ &lt;select id="goto"
+ onchange="if (this.options[this.selectedIndex].value)
+ window.location = this.options[this.selectedIndex].value"&gt;
+ &lt;option value="" selected="selected"&gt; Select site: &lt;/option&gt;
+ &lt;option value="http://www.apple.com/"&gt; Apple &lt;/option&gt;
+ &lt;option value="http://www.mozilla.org/"&gt; Mozilla &lt;/option&gt;
+ &lt;option value="http://www.opera.com/"&gt; Opera &lt;/option&gt;
+ &lt;/select&gt;
+ &lt;span&gt;&lt;input type="submit" value="Go"&gt;&lt;/span&gt;
+ &lt;/menu&gt;
+ &lt;/menubar&gt;
+&lt;/form&gt;</pre>
+
+<form ...>
+ <menu type="toolbar" autosubmit>
+ <li>
+ <select name="foo" onchange="form.submit()">
+ ...
+ </select>
+ <button>Go</button>
+ </li>
+ <li>
+ <select name="bar" onchange="form.submit()">
+ ...
+ </select>
+ <button>Go</button>
+ </li>
+ </menu>
+</form>
+
+<form ...>
+ <menu type="toolbar" autosubmit>
+ <menu label="Foo">
+ <select name="foo" onchange="form.submit()">
+ ...
+ </select>
+ <button>Go</button>
+ </menu>
+ <menu label="Bar">
+ <select name="bar" onchange="form.submit()">
+ ...
+ </select>
+ <button>Go</button>
+ </menu>
+ </menu>
+</form>
+
+-->
+
+ <h5 id=building><span class=secno>3.18.4.2. </span><dfn
+ id=building0>Building menus</dfn></h5>
+
+ <p>A menu consists of a list of zero or more of the following components:
+
+ <ul class=brief>
+ <li><a href="#command1" title=concept-command>Commands</a>, which can be
+ marked as default commands
+
+ <li>Separators
+
+ <li>Other menus (which allows the list to be nested)
+ </ul>
+
+ <p>The list corresponding to a particular <code><a
+ href="#menu">menu</a></code> element is built by iterating over its child
+ nodes. For each child node in <a href="#tree-order">tree order</a>, the
+ required behaviour depends on what the node is, as follows:
+
+ <dl class=switch>
+ <dt>An element that <a href="#command1" title=concept-command>defines a
+ command</a>
+
+ <dd>Append the command to the menu. If the element is a <code><a
+ href="#command0">command</a></code> element with a <code
+ title=attr-command-default><a href="#default1">default</a></code>
+ attribute, mark the command as being a default command.
+
+ <dt>An <code><a href="#hr">hr</a></code> element
+
+ <dt>An <code>option</code> element that has a <code
+ title=attr-option-value>value</code> attribute set to the empty string,
+ and has a <code title=attr-option-disabled>disabled</code> attribute, and
+ whose <code><a href="#textcontent">textContent</a></code> consists of a
+ string of one or more hyphens (U+002D HYPHEN-MINUS)
+
+ <dd>Append a separator to the menu.
+
+ <dt>An <code><a href="#li">li</a></code> element
+
+ <dd>Iterate over the children of the <code><a href="#li">li</a></code>
+ element.
+
+ <dt>A <code><a href="#menu">menu</a></code> element with no <code
+ title=attr-menu-label><a href="#label1">label</a></code> attribute
+
+ <dt>A <code>select</code> element
+
+ <dd>Append a separator to the menu, then iterate over the children of the
+ <code><a href="#menu">menu</a></code> or <code>select</code> element,
+ then append another separator.
+
+ <dt>A <code><a href="#menu">menu</a></code> element with a <code
+ title=attr-menu-label><a href="#label1">label</a></code> attribute
+
+ <dt>An <code>optgroup</code> element
+
+ <dd>Append a submenu to the menu, using the value of the element's <code
+ title="">label</code> attribute as the label of the menu. The submenu
+ must be constructed by taking the element and creating a new menu for it
+ using the complete process described in this section.
+
+ <dt>Any other node
+
+ <dd><a href="#ignored">Ignore</a> the node.
+ </dl>
+
+ <p>Once all the nodes have been processed as described above, the user
+ agent must the post-process the menu as follows:
+
+ <ol>
+ <li>Any menu item with no label, or whose label is the empty string, must
+ be removed.
+
+ <li>Any sequence of two or more separators in a row must be collapsed to a
+ single separator.
+
+ <li>Any separator at the start or end of the menu must be removed.
+ </ol>
+
+ <h5 id=context><span class=secno>3.18.4.3. </span><dfn id=context2>Context
+ menus</dfn></h5>
+
+ <p>The <dfn id=contextmenu
+ title=attr-contextmenu><code>contextmenu</code></dfn> attribute gives the
+ element's <a href="#context2" title="context menus">context menu</a>. The
+ value must be the ID of a <code><a href="#menu">menu</a></code> element in
+ the DOM. If the node that would be obtained by the invoking the
+ <code>getElementById()</code> method using the attribute's value as the
+ only argument is null or not a <code><a href="#menu">menu</a></code>
+ element, then the element has no assigned context menu. Otherwise, the
+ element's assigned context menu is the element so identified.
+
+ <p>When an element's context menu is requested (e.g. by the user
+ right-clicking the element, or pressing a context menu key), the UA must
+ <a href="#firing1">fire a <code title="">contextmenu</code> event</a> on
+ the element for which the menu was requested.
+
+ <p class=note>Typically, therefore, the firing of the <code
+ title=event-contextmenu>contextmenu</code> event will be the default
+ action of a <code title=mouseup>mouseup</code> or <code
+ title=event-keyup>keyup</code> event. The exact sequence of events is
+ UA-dependent, as it will vary based on platform conventions.
+
+ <p>The default action of the <code
+ title=event-contextmenu>contextmenu</code> event depends on whether the
+ element has a context menu assigned (using the <code
+ title=attr-contextmenu><a href="#contextmenu">contextmenu</a></code>
+ attribute) or not. If it does not, the default action must be for the user
+ agent to show its default context menu, if it has one.
+
+ <p>If the element <em>does</em> have a context menu assigned, then the user
+ agent must <a href="#firing3">fire a <code title="">show</code> event</a>
+ on the relevant <code><a href="#menu">menu</a></code> element.
+
+ <p>The default action of <em>this</em> event is that the user agent must
+ show a context menu <a href="#building0" title="building menus">built</a>
+ from the <code><a href="#menu">menu</a></code> element.
+
+ <p>The user agent may also provide access to its default context menu, if
+ any, with the context menu shown. For example, it could merge the menu
+ items from the two menus together, or provide the page's context menu as a
+ submenu of the default menu.
+
+ <p>If the user dismisses the menu without making a selection, nothing in
+ particular happens.
+
+ <p>If the user selects a menu item that represents a <span
+ title=concept-commands>command</span>, then the UA must invoke that
+ command's <a href="#action" title=command-facet-Action>Action</a>.
+
+ <p>Context menus must not, while being shown, reflect changes in the DOM;
+ they are constructed as the default action of the <code
+ title=event-show>show</code> event and must remain like that until
+ dismissed.
+
+ <p>User agents may provide means for bypassing the context menu processing
+ model, ensuring that the user can always access the UA's default context
+ menus. For example, the user agent could handle right-clicks that have the
+ Shift key depressed in such a way that it does not fire the <code
+ title=event-contextmenu>contextmenu</code> event and instead always shows
+ the default context menu.
+
+ <p>The <dfn id=contextmenu0
+ title=dom-contextMenu><code>contextMenu</code></dfn> attribute must <a
+ href="#reflect">reflect</a> the <code title=attr-contextmenu><a
+ href="#contextmenu">contextmenu</a></code> content attribute.
+
+ <h5 id=toolbars><span class=secno>3.18.4.4. </span><dfn
+ id=toolbars0>Toolbars</dfn></h5>
+
+ <p>Toolbars are a kind of menu that is always visible.
+
+ <p>When a <code><a href="#menu">menu</a></code> element has a <code
+ title=attr-menu-type><a href="#type15">type</a></code> attribute with the
+ value <code title="">toolbar</code>, then the user agent must <a
+ href="#building0" title="building menus">build</a> the menu for that
+ <code><a href="#menu">menu</a></code> element and <span
+ title=render-toolbar>render</span><!-- XXX xref --> it in the document in
+ a position appropriate for that <code><a href="#menu">menu</a></code>
+ element.
+
+ <p>The user agent must reflect changes made to the <code><a
+ href="#menu">menu</a></code>'s DOM immediately in the UI.
+
+ <h4 id=commands><span class=secno>3.18.5. </span>Commands</h4>
+
+ <p>A <dfn id=command1 title=concept-command>command</dfn> is the
+ abstraction behind menu items, buttons, and links. Once a command is
+ defined, other parts of the interface can refer to the same command,
+ allowing many access points to a single feature to share aspects such as
+ the disabled state.
+
+ <p id=facets>Commands are defined to have the following <em>facets</em>:
+
+ <dl>
+ <dt><dfn id=type16 title=command-facet-Type>Type</dfn>
+
+ <dd>The kind of command: "command", meaning it is a normal command;
+ "radio", meaning that triggering the command will, amongst other things,
+ set the <a href="#checked2" title=command-facet-CheckedState>Checked
+ State</a> to true (and probably uncheck some other commands); or
+ "checkbox", meaning that triggering the command will, amongst other
+ things, toggle the value of the <a href="#checked2"
+ title=command-facet-CheckedState>Checked State</a>.
+
+ <dt><dfn id=id1 title=command-facet-ID>ID</dfn>
+
+ <dd>The name of the command, for referring to the command from the markup
+ or from script. If a command has no ID, it is an <dfn
+ id=anonymous>anonymous command</dfn>.
+
+ <dt><dfn id=label2 title=command-facet-Label>Label</dfn>
+
+ <dd>The name of the command as seen by the user.
+
+ <dt><dfn id=hint title=command-facet-Hint>Hint</dfn>
+
+ <dd>A helpful or descriptive string that can be shown to the user.
+
+ <dt><dfn id=icon1 title=command-facet-Icon>Icon</dfn>
+
+ <dd>A graphical image that represents the action.
+
+ <dt><dfn id=hidden1 title=command-facet-HiddenState>Hidden State</dfn>
+
+ <dd>Whether the command is hidden or not (basically, whether it should be
+ shown in menus).
+
+ <dt><dfn id=disabled6 title=command-facet-DisabledState>Disabled
+ State</dfn>
+
+ <dd>Whether the command can be triggered or not. If the <a href="#hidden1"
+ title=command-facet-HiddenState>Hidden State</a> is true (hidden) then
+ the <a href="#disabled6" title=command-facet-DisabledState>Disabled
+ State</a> will be true (disabled) regardless. <span class=issue>We could
+ make this into a string value that acts as a Hint for why the command is
+ disabled.</span>
+
+ <dt><dfn id=checked2 title=command-facet-CheckedState>Checked State</dfn>
+
+ <dd>Whether the command is checked or not.
+
+ <dt><dfn id=action title=command-facet-Action>Action</dfn>
+
+ <dd>The actual effect that triggering the command will have. This could be
+ a scripted event handler, a URI to which to navigate, or a form
+ submission.
+
+ <dt><dfn id=triggers title=command-facet-Triggers>Triggers</dfn>
+
+ <dd>The list of elements that can trigger the command. The element
+ defining a command is always in the list of elements that can trigger the
+ command. For anonymous commands, only the element defining the command is
+ on the list, since other elements have no way to refer to it.
+ </dl>
+
+ <p>Commands are represented by elements in the DOM. Any element that can
+ define a command also implements the <code title=command-ro><a
+ href="#command2">Command</a></code> interface:
+
+ <pre
+ class=idl>interface <dfn id=command2 title=command-ro>Command</dfn> {<!--
+ NOTE: to avoid clashing with the HTMLCommandElement interface's names,
+ the members of this interface use cross-references with the title
+ dom-command-ro-foo (note the "ro", which stands for "readonly").
+-->
+ readonly attribute DOMString <a href="#commandtype" title=dom-command-ro-commandType>commandType</a>;
+ readonly attribute DOMString <a href="#id2" title=dom-command-ro-id>id</a>;
+ readonly attribute DOMString <a href="#label3" title=dom-command-ro-label>label</a>;
+ readonly attribute DOMString <a href="#title7" title=dom-command-ro-title>title</a>;
+ readonly attribute DOMString <a href="#icon2" title=dom-command-ro-icon>icon</a>;
+ readonly attribute boolean <a href="#hidden2" title=dom-command-ro-hidden>hidden</a>;
+ readonly attribute boolean <a href="#disabled7" title=dom-command-ro-disabled>disabled</a>;
+ readonly attribute boolean <a href="#checked3" title=dom-command-ro-checked>checked</a>;
+ void <a href="#click1" title=dom-command-ro-click>click</a>();
+ readonly attribute <a href="#htmlcollection0">HTMLCollection</a> <a href="#triggers0" title=dom-command-ro-triggers>triggers</a>;
+ readonly attribute <a href="#command0">Command</a> <span title=dom-command-ro-command>command</span>;
+};</pre>
+
+ <p>The <code title=command-ro><a href="#command2">Command</a></code>
+ interface is implemented by any element capable of defining a command. (If
+ an element can define a command, its definition will list this interface
+ explicitly.) All the attributes of the <code title=command-ro><a
+ href="#command2">Command</a></code> interface are read-only. Elements
+ implementing this interface may implement other interfaces that have
+ attributes with identical names but that are mutable; in bindings that
+ simply flatten all supported interfaces on the object, the mutable
+ attributes must shadow the readonly attributes defined in the <code
+ title=command-ro><a href="#command2">Command</a></code> interface.
+
+ <p>The <dfn id=commandtype
+ title=dom-command-ro-commandType><code>commandType</code></dfn> attribute
+ must return a string whose value is either "<code
+ title="">command</code>", "<code title="">radio</code>", or "<code
+ title="">checked</code>", depending on whether the <a href="#type16"
+ title=command-facet-Type>Type</a> of the command defined by the element is
+ "command", "radio", or "checked" respectively. If the element does not
+ define a command, it must return null.
+
+ <p>The <dfn id=id2 title=dom-command-ro-id><code>id</code></dfn> attribute
+ must return the command's <a href="#id1" title=command-facet-ID>ID</a>, or
+ null if the element does not define a command or defines an <a
+ href="#anonymous">anonymous command</a>. This attribute will be shadowed
+ by the <code title=dom-id><a href="#id0">id</a></code> DOM attribute on
+ the <code><a href="#htmlelement">HTMLElement</a></code> interface.
+
+ <p>The <dfn id=label3 title=dom-command-ro-label><code>label</code></dfn>
+ attribute must return the command's <a href="#label2"
+ title=command-facet-Label>Label</a>, or null if the element does not
+ define a command or does not specify a <a href="#label2"
+ title=command-facet-Label>Label</a>. This attribute will be shadowed by
+ the <code title="">label</code> DOM attribute on <code>option</code> and
+ <code><a href="#command0">command</a></code> elements.
+
+ <p>The <dfn id=title7 title=dom-command-ro-title><code>title</code></dfn>
+ attribute must return the command's <a href="#hint"
+ title=command-facet-Hint>Hint</a>, or null if the element does not define
+ a command or does not specify a <a href="#hint"
+ title=command-facet-Hint>Hint</a>. This attribute will be shadowed by the
+ <code title=dom-title><a href="#title0">title</a></code> DOM attribute on
+ the <code><a href="#htmlelement">HTMLElement</a></code> interface.
+
+ <p>The <dfn id=icon2 title=dom-command-ro-icon><code>icon</code></dfn>
+ attribute must return an absolute URI to the command's <a href="#icon1"
+ title=command-facet-Icon>Icon</a>. If the element does not specify an
+ icon, or if the element does not define a command, then the attribute must
+ return null. This attribute will be shadowed by the <code
+ title=dom-command-icon><a href="#icon0">icon</a></code> DOM attribute on
+ <code><a href="#command0">command</a></code> elements.
+
+ <p>The <dfn id=hidden2
+ title=dom-command-ro-hidden><code>hidden</code></dfn> attribute must
+ return true if the command's <a href="#hidden1"
+ title=command-facet-HiddenState>Hidden State</a> is that the command is
+ hidden, and false if it is that the command is not hidden. If the element
+ does not define a command, the attribute must return false. This attribute
+ will be shadowed by the <code title=dom-command-hidden><a
+ href="#hidden0">hidden</a></code> DOM attribute on <code><a
+ href="#command0">command</a></code> elements.
+
+ <p>The <dfn id=disabled7
+ title=dom-command-ro-disabled><code>disabled</code></dfn> attribute must
+ return true if the command's <a href="#disabled6"
+ title=command-facet-DisabledState>Disabled State</a> is that the command
+ is disabled, and false if the command is not disabled. This attribute is
+ not affected by the command's <a href="#hidden1"
+ title=command-facet-HiddenState>Hidden State</a>. If the element does not
+ define a command, the attribute must return false. This attribute will be
+ shadowed by the <code title="">disabled</code> attribute on
+ <code>button</code>, <code>input</code>, <code>option</code>, and <code><a
+ href="#command0">command</a></code> elements.
+
+ <p>The <dfn id=checked3
+ title=dom-command-ro-checked><code>checked</code></dfn> attribute must
+ return true if the command's <a href="#checked2"
+ title=command-facet-CheckedState>Checked State</a> is that the command is
+ checked, and false if it is that the command is not checked. If the
+ element does not define a command, the attribute must return false. This
+ attribute will be shadowed by the <code title="">checked</code> attribute
+ on <code>input</code> and <code><a href="#command0">command</a></code>
+ elements.
+
+ <p>The <dfn id=click1 title=dom-command-ro-click><code>click()</code></dfn>
+ method must trigger the <a href="#action"
+ title=command-facet-Action>Action</a> for the command. If the element does
+ not define a command, this method must do nothing. This method will be
+ shadowed by the <code title=dom-click><a href="#click">click()</a></code>
+ method on HTML elements, and is included only for completeness.
+
+ <p>The <dfn id=triggers0
+ title=dom-command-ro-triggers><code>triggers</code></dfn> attribute must
+ return a list containing the elements that can trigger the command (the
+ command's <a href="#triggers" title=command-facet-Triggers>Triggers</a>).
+ The list must be <a href="#live">live</a>. While the element does not
+ define a command, the list must be empty.
+
+ <p>The <dfn id=commands0
+ title=dom-document-commands><code>commands</code></dfn> attribute of the
+ document's <code><a href="#htmldocument">HTMLDocument</a></code> interface
+ must return an <code><a href="#htmlcollection0">HTMLCollection</a></code>
+ rooted at the <code>Document</code> node, whose filter matches only
+ elements that define commands and have IDs.
+
+ <p>The following elements can define commands: <code title=a-command><a
+ href="#using4">a</a></code>, <code title=button-command><a
+ href="#using5">button</a></code>, <code title=input-command><a
+ href="#using6">input</a></code>, <code title=option-command><a
+ href="#using7">option</a></code>, <code title=command-element><a
+ href="#command3">command</a></code>.
+
+ <h5 id=using><span class=secno>3.18.5.1. </span><dfn id=using4
+ title=a-command>Using the <code>a</code> element to define a command</dfn></h5>
+
+ <p>An <code><a href="#a">a</a></code> element with an <code
+ title=attr-hyperlink-href><a href="#href6">href</a></code> attribute <a
+ href="#command1" title=concept-command>defines a command</a>.
+
+ <p>The <a href="#type16" title=command-facet-Type>Type</a> of the command
+ is "command".
+
+ <p>The <a href="#id1" title=command-facet-ID>ID</a> of the command is the
+ value of the <code title=attr-id><a href="#id">id</a></code> attribute of
+ the element, if the attribute is present and not empty. Otherwise the
+ command is an <a href="#anonymous">anonymous command</a>.
+
+ <p>The <a href="#label2" title=command-facet-Label>Label</a> of the command
+ is the string given by the element's <code><a
+ href="#textcontent">textContent</a></code> DOM attribute.
+
+ <p>The <a href="#hint" title=command-facet-Hint>Hint</a> of the command is
+ the value of the <code title=attr-title><a href="#title">title</a></code>
+ attribute of the <code><a href="#a">a</a></code> element. If the attribute
+ is not present, the <a href="#hint" title=command-facet-Hint>Hint</a> is
+ the empty string.
+
+ <p>The <a href="#icon1" title=command-facet-Icon>Icon</a> of the command is
+ the absolute URI of the first image in the element. Specifically, in a
+ depth-first search of the children of the element, the first element that
+ is <!--either an--> <code><a href="#img">img</a></code> element with a
+ <code>src</code> attribute<!--, or an <code>object</code> element
+ with a <code>data</code> attribute, or, if the UA supports SVG, an
+ <code>svg</code> element in the SVG namespace with a valid <code
+ title="">id</code> attribute,-->
+ is the one that is used as the image.
+ <!--If it is an <code>img</code> element then--> The URI must be taken
+ from the element's <code>src</code> attribute. <!--If it is
+ an <code>object</code> element then the URI is taken from the
+ <code>data</code> attribute. -->
+ Relative URIs must be resolved relative to the base URI of the image
+ element. <!-- If it is an
+ <code>svg</code> element then the URI is formed by taking the URI of
+ the document and appending a "#" (U+0023 NUMBER SIGN) and the ID of
+ the element.-->
+ If no image is found, then the Icon facet is left blank.
+
+ <p>The <a href="#hidden1" title=command-facet-HiddenState>Hidden State</a>
+ and <a href="#disabled6" title=command-facet-DisabledState>Disabled
+ State</a> facets of the command are always false. (The command is always
+ enabled.)
+
+ <p>The <a href="#checked2" title=command-facet-CheckedState>Checked
+ State</a> of the command is always false. (The command is never checked.)
+
+ <p>The <a href="#action" title=command-facet-Action>Action</a> of the
+ command is to <a href="#firing" title="fire a click event">fire a <code
+ title="">click</code> event</a> at the element.
+
+ <h5 id=using0><span class=secno>3.18.5.2. </span><dfn id=using5
+ title=button-command>Using the <code>button</code> element to define a
+ command</dfn></h5>
+
+ <p>A <code>button</code> element always <a href="#command1"
+ title=concept-command>defines a command</a>.
+
+ <p>The <a href="#type16" title=command-facet-Type>Type</a>, <a href="#id1"
+ title=command-facet-ID>ID</a>, <a href="#label2"
+ title=command-facet-Label>Label</a>, <a href="#hint"
+ title=command-facet-Hint>Hint</a>, <a href="#icon1"
+ title=command-facet-Icon>Icon</a>, <a href="#hidden1"
+ title=command-facet-HiddenState>Hidden State</a>, <a href="#checked2"
+ title=command-facet-CheckedState>Checked State</a>, and <a href="#action"
+ title=command-facet-Action>Action</a> facets of the command are determined
+ <a href="#using4" title=a-command>as for <code>a</code> elements</a> (see
+ the previous section).
+
+ <p>The <a href="#disabled6" title=command-facet-DisabledState>Disabled
+ State</a> of the command mirrors the disabled state of the button.
+ Typically this is given by the element's <code
+ title=attr-button-disabled>disabled</code> attribute, but certain button
+ types become disabled at other times too (for example, the
+ <code>move-up</code> button type is disabled when it would have no
+ effect).
+
+ <h5 id=using1><span class=secno>3.18.5.3. </span><dfn id=using6
+ title=input-command>Using the <code>input</code> element to define a
+ command</dfn></h5>
+
+ <p>An <code>input</code> element whose <code
+ title=attr-input-type>type</code> attribute is one of <code>submit</code>,
+ <code>reset</code>, <code>button</code>, <code>radio</code>,
+ <code>checkbox</code>, <code>move-up</code>, <code>move-down</code>,
+ <code>add</code>, and <code>remove</code> <a href="#command1"
+ title=concept-command>defines a command</a>.
+
+ <p>The <a href="#type16" title=command-facet-Type>Type</a> of the command
+ is "radio" if the <code title=attr-input-type>type</code> attribute has
+ the value <code>radio</code>, "checkbox" if the <code>type</code>
+ attribute has the value <code>checkbox</code>, and "command" otherwise.
+
+ <p>The <a href="#id1" title=command-facet-ID>ID</a> of the command is the
+ value of the <code title=attr-id><a href="#id">id</a></code> attribute of
+ the element, if the attribute is present and not empty. Otherwise the
+ command is an <a href="#anonymous">anonymous command</a>.
+
+ <p>The <a href="#label2" title=command-facet-Label>Label</a> of the command
+ depends on the Type of the command:
+
+ <p>If the <a href="#type16" title=command-facet-Type>Type</a> is "command",
+ then it is the string given by the <code
+ title=attr-input-value>value</code> attribute, if any, and a
+ <span>UA-dependent value</span><!-- XXX xref--> that the UA uses to label
+ the button itself if the attribute is absent.
+
+ <p>Otherwise, the <a href="#type16" title=command-facet-Type>Type</a> is
+ "radio" or "checkbox". If the element has a <code>label</code> element
+ associated with it, the <code><a
+ href="#textcontent">textContent</a></code> of the first such element is
+ the <a href="#label2" title=command-facet-Label>Label</a> (in DOM terms,
+ this the string given by <code><var
+ title="">element</var>.labels[0].textContent</code>). Otherwise, the value
+ of the <code>value</code> attribute, if present, is the <a href="#label2"
+ title=command-facet-Label>Label</a>. Otherwise, the <a href="#label2"
+ title=command-facet-Label>Label</a> is the empty string.
+
+ <p>The <a href="#hint" title=command-facet-Hint>Hint</a> of the command is
+ the value of the <code title=attr-title><a href="#title">title</a></code>
+ attribute of the <code>input</code> element. If the attribute is not
+ present, the <a href="#hint" title=command-facet-Hint>Hint</a> is the
+ empty string.
+
+ <p>There is no <a href="#icon1" title=command-facet-Icon>Icon</a> for the
+ command.
+
+ <p>The <a href="#hidden1" title=command-facet-HiddenState>Hidden State</a>
+ of the command is always false. (The command is never hidden.)
+
+ <p>The <a href="#disabled6" title=command-facet-DisabledState>Disabled
+ State</a> of the command mirrors the disabled state of the control.
+ Typically this is given by the element's <code
+ title=attr-input-disabled>disabled</code> attribute, but certain input
+ types become disabled at other times too (for example, the
+ <code>move-up</code> input type is disabled when it would have no effect).
+
+ <p>The <a href="#checked2" title=command-facet-CheckedState>Checked
+ State</a> of the command is true if the command is of <a href="#type16"
+ title=command-facet-Type>Type</a> "radio" or "checkbox" and the element
+ has a <code title=attr-input-checked>checked</code> attribute, and false
+ otherwise.
+
+ <p>The <a href="#action" title=command-facet-Action>Action</a> of the
+ command is to <a href="#firing" title="fire a click event">fire a <code
+ title="">click</code> event</a> at the element.</p>
+ <!-- XXX this
+ is probably wrong for radio and checkbox types, depending on how we
+ define <input>. -->
+
+ <h5 id=using2><span class=secno>3.18.5.4. </span><dfn id=using7
+ title=option-command>Using the <code>option</code> element to define a
+ command</dfn></h5>
+
+ <p>An <code>option</code> element with an ancestor <code>select</code>
+ element and either no <code title=attr-option-value>value</code> attribute
+ or a <code title=attr-option-value>value</code> attribute that is not the
+ empty string <a href="#command1" title=concept-command>defines a
+ command</a>.
+
+ <p>The <a href="#type16" title=command-facet-Type>Type</a> of the command
+ is "radio" if the <code>option</code>'s nearest ancestor
+ <code>select</code> element has no <code
+ title=attr-select-multiple>multiple</code> attribute, and "checkbox" if it
+ does.
+
+ <p>The <a href="#id1" title=command-facet-ID>ID</a> of the command is the
+ value of the <code title=attr-id><a href="#id">id</a></code> attribute of
+ the element, if the attribute is present and not empty. Otherwise the
+ command is an <a href="#anonymous">anonymous command</a>.
+
+ <p>The <a href="#label2" title=command-facet-Label>Label</a> of the command
+ is the value of the <code>option</code> element's <code
+ title=attr-option-label>label</code> attribute, if there is one, or the
+ value of the <code>option</code> element's <code><a
+ href="#textcontent">textContent</a></code> DOM attribute if it doesn't.
+
+ <p>The <a href="#hint" title=command-facet-Hint>Hint</a> of the command is
+ the string given by the element's <code title=attr-title><a
+ href="#title">title</a></code> attribute, if any, and the empty string if
+ the attribute is absent.
+
+ <p>There is no <a href="#icon1" title=command-facet-Icon>Icon</a> for the
+ command.
+
+ <p>The <a href="#hidden1" title=command-facet-HiddenState>Hidden State</a>
+ of the command is always false. (The command is never hidden.)
+
+ <p>The <a href="#disabled6" title=command-facet-DisabledState>Disabled
+ State</a> of the command is true (disabled) if the element has a <code
+ title=attr-option-disabled>disabled</code> attribute, and false otherwise.
+
+ <p>The <a href="#checked2" title=command-facet-CheckedState>Checked
+ State</a> of the command is true (checked) if the element's <code
+ title=dom-option-selected>selected</code> DOM attribute is true, and false
+ otherwise.
+
+ <p>The <a href="#action" title=command-facet-Action>Action</a> of the
+ command depends on its <a href="#type16"
+ title=command-facet-Type>Type</a>. If the command is of <a href="#type16"
+ title=command-facet-Type>Type</a> "radio" then this must set the <code
+ title=dom-option-selected>selected</code> DOM attribute of the
+ <code>option</code> element to true, otherwise it must toggle the state of
+ the <code title=dom-option-selected>selected</code> DOM attribute (set it
+ to true if it is false and vice versa). Then <a href="#firing0"
+ title="fire a change event">a <code title="">change</code> event must be
+ fired</a> on the <code>option</code> element's nearest ancestor
+ <code>select</code> element (if there is one), as if the selection had
+ been changed directly.
+
+ <h5 id=using3><span class=secno>3.18.5.5. </span>Using the <dfn id=command3
+ title=command-element><code>command</code></dfn> element to define a
+ command</h5>
+
+ <p>A <code><a href="#command0">command</a></code> element <a
+ href="#command1" title=concept-command>defines a command</a>.
+
+ <p>The <a href="#type16" title=command-facet-Type>Type</a> of the command
+ is "radio" if the <code><a href="#command0">command</a></code>'s <code
+ title=attr-command-type><a href="#type13">type</a></code> attribute is
+ "<code>radio</code>", "checkbox" if the attribute's value is
+ "<code>checkbox</code>", and "command" otherwise.
+
+ <p>The <a href="#id1" title=command-facet-ID>ID</a> of the command is the
+ value of the <code title=attr-id><a href="#id">id</a></code> attribute of
+ the element, if the attribute is present and not empty. Otherwise the
+ command is an <a href="#anonymous">anonymous command</a>.
+
+ <p>The <a href="#label2" title=command-facet-Label>Label</a> of the command
+ is the value of the element's <code title=attr-command-label><a
+ href="#label">label</a></code> attribute, if there is one, or the empty
+ string if it doesn't.
+
+ <p>The <a href="#hint" title=command-facet-Hint>Hint</a> of the command is
+ the string given by the element's <code title=attr-command-title><a
+ href="#title6">title</a></code> attribute, if any, and the empty string if
+ the attribute is absent.
+
+ <p>The <a href="#icon1" title=command-facet-Icon>Icon</a> for the command
+ is the absolute URI resulting from resolving the value of the element's
+ <code title=attr-command-icon><a href="#icon">icon</a></code> attribute as
+ a URI relative to the element's base URI. If the element has no <code
+ title=attr-command-icon><a href="#icon">icon</a></code> attribute then the
+ command has no <a href="#icon1" title=command-facet-Icon>Icon</a>.
+
+ <p>The <a href="#hidden1" title=command-facet-HiddenState>Hidden State</a>
+ of the command is true (hidden) if the element has a <code
+ title=attr-command-hidden><a href="#hidden">hidden</a></code> attribute,
+ and false otherwise.
+
+ <p>The <a href="#disabled6" title=command-facet-DisabledState>Disabled
+ State</a> of the command is true (disabled) if the element has either a
+ <code title=attr-command-disabled><a href="#disabled4">disabled</a></code>
+ attribute or a <code title=attr-command-hidden><a
+ href="#hidden">hidden</a></code> attribute (or both), and false otherwise.
+
+ <p>The <a href="#checked2" title=command-facet-CheckedState>Checked
+ State</a> of the command is true (checked) if the element has a <code
+ title=attr-command-checked><a href="#checked0">checked</a></code>
+ attribute, and false otherwise.
+
+ <p>The <a href="#action" title=command-facet-Action>Action</a> of the
+ command is to invoke the behaviour described in the definition of the
+ <code title=dom-command-click><a href="#click0">click()</a></code> method
+ of the <code><a href="#htmlcommandelement">HTMLCommandElement</a></code>
+ interface.</p>
+ <!-- XXX update to
+ point to dom-click when we remove dom-command-click -->
+
+ <h3 id=miscellaneous><span class=secno>3.19. </span>Miscellaneous elements</h3>
+
+ <h4 id=the-legend><span class=secno>3.19.1. </span>The <dfn
+ id=legend><code>legend</code></dfn> element</h4>
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>As the first child of a <code>fieldset</code> element.
+
+ <dd>As the first child of a <code><a href="#details">details</a></code>
+ element.
+
+ <dd>As a child of a <code><a href="#figure">figure</a></code> element, if
+ there are no other <code><a href="#legend">legend</a></code> element
+ children of that element.
+
+ <dt>Content model:
+
+ <dd>If used as a child of a <code>fieldset</code> or <code><a
+ href="#details">details</a></code> element: <a href="#significant"
+ title="significant inline content">significant</a> <a
+ href="#strictly">strictly inline-level content</a>
+
+ <dd>If used as a child of a <code><a href="#figure">figure</a></code>
+ element: <a href="#inline-level0">inline-level content</a>.
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#legend">legend</a></code> element represents a title
+ or explanatory caption for the rest of the contents of the <code><a
+ href="#legend">legend</a></code> element's parent element.
+
+ <h4 id=the-div><span class=secno>3.19.2. </span>The <dfn
+ id=div><code>div</code></dfn> element</h4>
+
+ <p><a href="#block-level0" title="block-level elements">Block-level
+ element</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <a href="#block-level0">block-level elements</a> are expected.
+
+ <dt>Content model:
+
+ <dd>Zero or more <code><a href="#style">style</a></code> elements,
+ followed by either zero or more <a href="#block-level0">block-level
+ elements</a>, or <a href="#inline-level0">inline-level content</a> (but
+ not both).
+
+ <dt>Element-specific attributes:
+
+ <dd>None.
+
+ <dt>DOM interface:
+
+ <dd>No difference from <code><a
+ href="#htmlelement">HTMLElement</a></code>.
+ </dl>
+
+ <p>The <code><a href="#div">div</a></code> element represents nothing at
+ all. It can be used with the <code title=attr-class><a
+ href="#class">class</a></code>, <code title=attr-lang><a
+ href="#lang">lang</a></code>/<code title=attr-xml-lang><a
+ href="#xmllang">xml:lang</a></code>, and <code title=attr-title><a
+ href="#title">title</a></code> attributes to mark up semantics common to a
+ group of consecutive elements.
+
+ <h2 id=web-browsers><span class=secno>4. </span>Web browsers</h2>
+
+ <p>This section describes features that apply most directly to Web
+ browsers. Having said that, unless specified elsewhere, the requirements
+ defined in this section <em>do</em> apply to all user agents, whether they
+ are Web browsers or not.
+
+ <h3 id=windows><span class=secno>4.1. </span>Browsing contexts</h3>
+
+ <p>A <dfn id=browsing0>browsing context</dfn> is a collection of one or
+ more <code>Document</code> objects, and one or more <a href="#view"
+ title=view>views</a>.
+
+ <p>At any one time, one of the <code>Document</code>s in a <a
+ href="#browsing0">browsing context</a> is the <dfn id=active>active
+ document</dfn>. The collection of <code>Document</code>s is the <a
+ href="#browsing0">browsing context</a>'s <a href="#session">session
+ history</a>.
+
+ <p>A <dfn id=view>view</dfn> is a user agent interface tied to a particular
+ media used for the presentation of <code>Document</code> objects in some
+ media. A view may be interactive. Each view is represented by an
+ <code>AbstractView</code> object. Each view belongs to a <a
+ href="#browsing0">browsing context</a>. <a
+ href="#refsDOM2VIEWS">[DOM2VIEWS]</a>
+
+ <p class=note>The <code title="">document</code> attribute of an
+ <code>AbstractView</code> object representing a <a href="#view">view</a>
+ gives the <code>Document</code> object of the view's <a
+ href="#browsing0">browsing context</a>'s <a href="#active">active
+ document</a>. <a href="#refsDOM2VIEWS">[DOM2VIEWS]</a>
+
+ <p class=note>Events that use the <code>UIEvent</code> interface are
+ related to a specific <a href="#view">view</a> (the view in which the
+ event happened); the <code>AbstractView</code> of that view is given in
+ the event object's <code title="">view</code> attribute. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p class=note>A typical Web browser has one obvious <a
+ href="#view">view</a> per <a href="#browsing0">browsing context</a>: the
+ browser's window (screen media). If a page is printed, however, a second
+ view becomes evident, that of the print media. The two views always share
+ the same underlying <code>Document</code>, but they have a different
+ presentation of that document. A speech browser also establishes a
+ browsing context, one with a view in the speech media.
+
+ <p class=note>A <code>Document</code> does not necessarily have a <a
+ href="#browsing0">browsing context</a> associated with it. In particular,
+ data mining tools are likely to never instantiate browsing contexts.
+
+ <p>The main <a href="#view">view</a> through which a user primarily
+ interacts with a user agent is the <dfn id=default3>default view</dfn>.
+
+ <p class=note>The <a href="#default3">default view</a> of a
+ <code>Document</code> is given by the <code title="">defaultView</code>
+ attribute on the <code>Document</code> object's <code>DocumentView</code>
+ interface. <a href="#refsDOM3VIEWS">[DOM3VIEWS]</a>
+
+ <p>When a <a href="#browsing0">browsing context</a> is first created, it
+ must be created with a single <code>Document</code> in its session
+ history, whose <span title="the document's address">address</span> is
+ <code>about:blank</code>, which is marked as being an <a href="#html-"
+ title="HTML documents">HTML documents</a>. The <code>Document</code> must
+ have a single child <code><a href="#html">html</a></code> node, which
+ itself has a single child <code><a href="#body0">body</a></code> node. If
+ the <a href="#browsing0">browsing context</a> is created specifically to
+ be immediately navigated, then that initial navigation will have <a
+ href="#replacement">replacement enabled</a>.
+
+ <h4 id=nested><span class=secno>4.1.1. </span>Nested browsing contexts</h4>
+
+ <p>Certain elements (for example, <code><a href="#iframe">iframe</a></code>
+ elements) can instantiate further <a href="#browsing0" title="browsing
+ context">browsing contexts</a>. These are called <dfn id=nested0
+ title="nested browsing context">nested browsing contexts</dfn>. If a
+ browsing context <var title="">P</var> has an element in one of its
+ <code>Document</code>s <var title="">D</var> that nests another browsing
+ context <var title="">C</var> inside it, then <var title="">P</var> is
+ said to be the <dfn id=parent>parent browsing context</dfn> of <var
+ title="">C</var>, <var title="">C</var> is said to be a <dfn
+ id=child>child browsing context</dfn> of <var title="">P</var>, and <var
+ title="">C</var> is said to be <dfn id=nested1 title="browsing context
+ nested through">nested through</dfn> <var title="">D</var>.
+
+ <p>The browsing context with no <a href="#parent">parent browsing
+ context</a> is the <dfn id=top-level>top-level browsing context</dfn> of
+ all the browsing contexts <a href="#nested0" title="nested browsing
+ context">nested</a> within it (either directly or indirectly through other
+ nested browsing contexts).
+
+ <p>A <code>Document</code> is said to be <dfn id=fully>fully active</dfn>
+ when it is the <a href="#active">active document</a> of its <a
+ href="#browsing0">browsing context</a>, and either its browsing context is
+ a <a href="#top-level">top-level browsing context</a>, or the
+ <code>Document</code> <a href="#nested1" title="browsing context nested
+ through">through which</a> that browsing context is <a href="#nested0"
+ title="nested browsing context">nested</a> is itself <a
+ href="#fully">fully active</a>.
+
+ <p>Because they are nested through an element, <a href="#child"
+ title="child browsing context">child browsing contexts</a> are always tied
+ to a specific <code>Document</code> in their <a href="#parent">parent
+ browsing context</a>. User agents must not allow the user to interact with
+ <a href="#child" title="child browsing context">child browsing
+ contexts</a> of elements that are in <code>Document</code>s that are not
+ themselves <a href="#fully">fully active</a>.
+
+ <h4 id=auxiliary><span class=secno>4.1.2. </span>Auxiliary browsing
+ contexts</h4>
+
+ <p>It is possible to create new browsing contexts that are related to a
+ <span>top level browsing context</span> without being nested through an
+ element. Such browsing contexts are called <dfn id=auxiliary0
+ title="auxiliary browsing context">auxiliary browsing contexts</dfn>.
+ Auxiliary browsing contexts are always <a href="#top-level"
+ title="top-level browsing context">top-level browsing contexts</a>.
+
+ <p>An <a href="#auxiliary0">auxiliary browsing context</a> has an <dfn
+ id=opener>opener browsing context</dfn>, which is the <a
+ href="#browsing0">browsing context</a> from which the <a
+ href="#auxiliary0">auxiliary browsing context</a> was created, and it has
+ a <dfn id=furthest>furthest ancestor browsing context</dfn>, which is the
+ <a href="#top-level">top-level browsing context</a> of the <a
+ href="#opener">opener browsing context</a> when the <a
+ href="#auxiliary0">auxiliary browsing context</a> was created.
+
+ <p>The <dfn id=opener0 title=dom-opener><code>opener</code></dfn> DOM
+ attribute on the <code><a href="#window">Window</a></code> object must
+ return the <code><a href="#window">Window</a></code> object of the <a
+ href="#browsing0">browsing context</a> from which the current browsing
+ context was created (its <a href="#opener">opener browsing context</a>),
+ if there is one and it is still available.
+
+ <h4 id=secondary><span class=secno>4.1.3. </span>Secondary browsing
+ contexts</h4>
+
+ <p>User agents may support <dfn id=secondary0 title="secondary browsing
+ context">secondary browsing contexts</dfn>, which are <a href="#browsing0"
+ title="browsing context">browsing contexts</a> that form part of the user
+ agent's interface, apart from the main content area.
+
+ <h4 id=threads><span class=secno>4.1.4. </span>Threads</h4>
+
+ <p>Each <a href="#browsing0">browsing context</a> is defined as having a
+ list of zero or more <dfn id=directly>directly reachable browsing
+ contexts</dfn>. These are:
+
+ <ul>
+ <li>All the <a href="#browsing0">browsing context</a>'s <a href="#child"
+ title="child browsing context">child browsing contexts</a>.
+
+ <li>The <a href="#browsing0">browsing context</a>'s <a
+ href="#parent">parent browsing context</a>.
+
+ <li>All the <a href="#browsing0" title="browsing context">browsing
+ contexts</a> that have the <a href="#browsing0">browsing context</a> as
+ their <a href="#opener">opener browsing context</a>.
+
+ <li>The <a href="#browsing0">browsing context</a>'s <a
+ href="#opener">opener browsing context</a>.
+ </ul>
+
+ <p>The transitive closure of all the <a href="#browsing0" title="browsing
+ context">browsing contexts</a> that are <a href="#directly">directly
+ reachable browsing contexts</a> consists of a <dfn id=unit-of>unit of
+ related browsing contexts</dfn>.
+
+ <p>All the executable code in a <a href="#unit-of">unit of related browsing
+ contexts</a> must execute on a single conceptual thread. The dispatch of
+ events fired by the user agent (e.g. in response to user actions or
+ network activity) and the execution of any scripts associated with timers
+ must be serialised so that for each <a href="#unit-of">unit of related
+ browsing contexts</a> there is only one script being executed at a time.
+
+ <h4 id=browsing><span class=secno>4.1.5. </span>Browsing context names</h4>
+
+ <p>Browsing contexts can have a <dfn id=browsing1>browsing context
+ name</dfn>. By default, a browsing context has no name (its name is not
+ set).
+
+ <p>A <dfn id=valid8>valid browsing context name</dfn> is any string that
+ does not start with a U+005F LOW LINE character, or, a string that
+ case-insensitively <!-- ASCII --> matches one of: <!--<code
+ title="">_blank</code>,-->
+ <code title="">_self</code>, <code title="">_parent</code>, or <code
+ title="">_top</code>. (Names starting with an underscore are reserved for
+ special keywords.)
+
+ <p><dfn id=the-rules>The rules for chosing a browsing context given a
+ browsing context name</dfn> are as follows. The rules assume that they are
+ being applied in the context of a <a href="#browsing0">browsing
+ context</a>.
+
+ <ol>
+ <li>
+ <p>If the given browsing context name is the empty string or <code
+ title="">_self</code>, then the chosen browsing context must be the
+ current one.
+
+ <li>
+ <p>If the given browsing context name is <code title="">_parent</code>,
+ then the chosen browsing context must be the <a
+ href="#parent"><em>parent</em> browsing context</a> of the current one,
+ unless there isn't one, in which case the chosen browsing context must
+ be the current browsing context.
+
+ <li>
+ <p>If the given browsing context name is <code title="">_top</code>, then
+ the chosen browsing context must be the most <a
+ href="#top-level">top-level browsing context</a> of the current one.
+
+ <li>
+ <p>If the given browsing context name is not <code title="">_blank</code>
+ and there exists a browsing context whose <a href="#browsing1"
+ title="browsing context name">name</a> is the same as the given browsing
+ context name, and one of the following is true:
+
+ <ul>
+ <li>Either the <a href="#origin0">origin</a> of that browsing context's
+ <a href="#active">active document</a> is the same as the <a
+ href="#origin0">origin</a> of the current browsing context's <a
+ href="#active">active document</a>,
+
+ <li>Or that browsing context is an <a href="#auxiliary0">auxiliary
+ browsing context</a> and its <a href="#opener">opener browsing
+ context</a> is either the current browsing context or a browsing
+ context that the user agent considers is closely enough related to the
+ current browsing context,
+
+ <li>Or that browsing context is not a <a href="#top-level">top-level
+ browsing context</a>, and the <a href="#origin0">origin</a> of the <a
+ href="#active">active document</a> of the <a href="#parent">parent
+ browsing context</a> of that browsing context is the same as the <a
+ href="#origin0">origin</a> of the current browsing context's <a
+ href="#active">active document</a>,
+ </ul>
+
+ <p>...and the user agent determines that the two browsing contexts are
+ related enough that it is ok if they reach each other, then that
+ browsing context must be the chosen one. If there are multiple matching
+ browsing contexts, the user agent should select one in some arbitrary
+ consistent manner, such as the most recently opened, most recently
+ focused, or more closely related.</p>
+
+ <li>
+ <p>Otherwise, a new browsing context is being requested, and what happens
+ depends on the user agent's configuration and/or abilities:</p>
+
+ <dl>
+ <dt>If the user agent has been configured such that in this instance it
+ will create a new browsing context
+
+ <dd>A new <a href="#auxiliary0">auxiliary browsing context</a> must be
+ created, with the <a href="#opener">opener browsing context</a> being
+ the current one. If the given browsing context name is not <code
+ title="">_blank</code>, then the new auxiliary browsing context's name
+ must be the given browsing context name (otherwise, it has no name).
+ The chosen browsing context must be this new browsing context. If it is
+ immediately <a href="#navigate" title=navigate>navigated</a>, then the
+ navigation will be done with <a href="#replacement">replacement
+ enabled</a>.
+
+ <dt>If the user agent has been configured such that in this instance it
+ will reuse the current browsing context
+
+ <dd>The chosen browsing context is the current browsing context.
+
+ <dt>If the user agent has been configured such that in this instance it
+ will not find a browsing context
+
+ <dd>There must not be a chosen browsing context.
+ </dl>
+ </ol>
+
+ <h3 id=the-default0><span class=secno>4.2. </span>The default view</h3>
+
+ <p>The <code>AbstractView</code> object of <a href="#default3"
+ title="default view">default views</a> must also implement the <code><a
+ href="#window">Window</a></code> object.
+
+ <pre class=idl>interface <dfn id=window>Window</dfn> {
+ // the current browsing context
+ readonly attribute <a href="#window">Window</a> <a href="#window0" title=dom-window>window</a>;
+ readonly attribute <a href="#window">Window</a> <a href="#self" title=dom-self>self</a>;
+ attribute DOMString <a href="#name3" title=dom-name>name</a>;
+ readonly attribute <a href="#location2">Location</a> <a href="#location0" title=dom-document-location>location</a>;
+ readonly attribute <a href="#history1">History</a> <a href="#history0" title=dom-history>history</a>;
+ readonly attribute <a href="#undomanager">UndoManager</a> <a href="#undomanager0" title=dom-undoManager>undoManager</a>;
+ <a href="#selection1">Selection</a> <a href="#getselection" title=dom-getSelection>getSelection</a>();
+
+ // the user agent
+ readonly attribute <a href="#clientinformation">ClientInformation</a> <a href="#navigator" title=dom-navigator>navigator</a>; <!-- XXX IE6 also has window.clientInformation pointing to this same object -->
+ readonly attribute <a href="#storage2">Storage</a> <a href="#sessionstorage" title=dom-sessionStorage>sessionStorage</a>;
+ readonly attribute <a href="#storagelist">StorageList</a> <a href="#globalstorage" title=dom-globalStorage>globalStorage</a>;
+ <a href="#resultset">ResultSet</a> <a href="#executesql" title=dom-executeSql>executeSql</a>(in DOMString sqlStatement, <var title="">arguments...</var>);
+
+ // modal user prompts
+ void <a href="#alert" title=dom-alert>alert</a>(in DOMString message);
+ boolean <a href="#confirm" title=dom-confirm>confirm</a>(in DOMString message);
+ DOMString <a href="#prompt" title=dom-prompt>prompt</a>(in DOMString message);
+ DOMString <a href="#prompt" title=dom-prompt>prompt</a>(in DOMString message, in DOMString default);
+ void <a href="#print" title=dom-print>print</a>();
+
+ // other browsing contexts
+ readonly attribute <a href="#window">Window</a> <a href="#frames" title=dom-frames>frames</a>;
+ readonly attribute unsigned long <a href="#length5" title=dom-length>length</a>;
+ readonly attribute <a href="#window">Window</a> <a href="#opener0" title=dom-opener>opener</a>;
+ <a href="#window">Window</a> <a href="#open2" title=dom-open>open</a>();
+ <a href="#window">Window</a> <a href="#open2" title=dom-open>open</a>(in DOMString url);
+ <a href="#window">Window</a> <a href="#open2" title=dom-open>open</a>(in DOMString url, in DOMString target);
+ <a href="#window">Window</a> <a href="#open2" title=dom-open>open</a>(in DOMString url, in DOMString target, in DOMString features);
+ <a href="#window">Window</a> <a href="#open2" title=dom-open>open</a>(in DOMString url, in DOMString target, in DOMString features, in DOMString replace);
+
+ // <a href="#event3">event handler DOM attributes</a>
+ attribute <span>EventListener</span> <a href="#onabort" title=handler-onabort>onabort</a>;
+ attribute <span>EventListener</span> <a href="#onbeforeunload" title=handler-onbeforeunload>onbeforeunload</a>;
+ attribute <span>EventListener</span> <a href="#onblur" title=handler-onblur>onblur</a>;
+ attribute <span>EventListener</span> <a href="#onchange" title=handler-onchange>onchange</a>;
+ attribute <span>EventListener</span> <a href="#onclick" title=handler-onclick>onclick</a>;
+ attribute <span>EventListener</span> <a href="#oncontextmenu" title=handler-oncontextmenu>oncontextmenu</a>;
+ attribute <span>EventListener</span> <a href="#ondblclick" title=handler-ondblclick>ondblclick</a>;
+ attribute <span>EventListener</span> <a href="#ondrag" title=handler-ondrag>ondrag</a>;
+ attribute <span>EventListener</span> <a href="#ondragend" title=handler-ondragend>ondragend</a>;
+ attribute <span>EventListener</span> <a href="#ondragenter" title=handler-ondragenter>ondragenter</a>;
+ attribute <span>EventListener</span> <a href="#ondragleave" title=handler-ondragleave>ondragleave</a>;
+ attribute <span>EventListener</span> <a href="#ondragover" title=handler-ondragover>ondragover</a>;
+ attribute <span>EventListener</span> <a href="#ondragstart" title=handler-ondragstart>ondragstart</a>;
+ attribute <span>EventListener</span> <a href="#ondrop" title=handler-ondrop>ondrop</a>;
+ attribute <span>EventListener</span> <a href="#onerror" title=handler-onerror>onerror</a>;
+ attribute <span>EventListener</span> <a href="#onfocus" title=handler-onfocus>onfocus</a>;
+ attribute <span>EventListener</span> <a href="#onkeydown" title=handler-onkeydown>onkeydown</a>;
+ attribute <span>EventListener</span> <a href="#onkeypress" title=handler-onkeypress>onkeypress</a>;
+ attribute <span>EventListener</span> <a href="#onkeyup" title=handler-onkeyup>onkeyup</a>;
+ attribute <span>EventListener</span> <a href="#onload" title=handler-onload>onload</a>;
+ attribute <span>EventListener</span> <a href="#onmessage" title=handler-onmessage>onmessage</a>;
+ attribute <span>EventListener</span> <a href="#onmousedown" title=handler-onmousedown>onmousedown</a>;
+ attribute <span>EventListener</span> <a href="#onmousemove" title=handler-onmousemove>onmousemove</a>;
+ attribute <span>EventListener</span> <a href="#onmouseout" title=handler-onmouseout>onmouseout</a>;
+ attribute <span>EventListener</span> <a href="#onmouseover" title=handler-onmouseover>onmouseover</a>;
+ attribute <span>EventListener</span> <a href="#onmouseup" title=handler-onmouseup>onmouseup</a>;
+ attribute <span>EventListener</span> <a href="#onmousewheel" title=handler-onmousewheel>onmousewheel</a>;
+ attribute <span>EventListener</span> <a href="#onresize" title=handler-onresize>onresize</a>;
+ attribute <span>EventListener</span> <a href="#onscroll" title=handler-onscroll>onscroll</a>;
+ attribute <span>EventListener</span> <a href="#onselect" title=handler-onselect>onselect</a>;
+ attribute <span>EventListener</span> <a href="#onsubmit" title=handler-onsubmit>onsubmit</a>;
+ attribute <span>EventListener</span> <a href="#onunload" title=handler-onunload>onunload</a>;
+};</pre>
+ <!-- XXX XMLHttpRequest
+ http://msdn.microsoft.com/workshop/author/dhtml/reference/objects/obj_window.asp
+ http://www.mozilla.org/docs/dom/domref/dom_window_ref.html
+ http://lxr.mozilla.org/mozilla/source/dom/public/idl/base/nsIDOMWindow.idl
+ -->
+
+ <p>The <dfn id=window0 title=dom-window><code>window</code></dfn>, <dfn
+ id=frames title=dom-frames><code>frames</code></dfn>, <dfn id=self
+ title=dom-self><code>self</code></dfn> DOM attributes must all return the
+ <code><a href="#window">Window</a></code> object itself.
+
+ <p>The <code><a href="#window">Window</a></code> object also provides the
+ scope for script execution. Each <code>Document</code> in a <a
+ href="#browsing0">browsing context</a> has an associated <dfn
+ id=list-of2>list of added properties</dfn> which, when a document is <a
+ href="#active" title="active document">active</a>, are available on the
+ <code>Document</code>'s <a href="#default3">default view</a> <code><a
+ href="#window">Window</a></code> object. A <code>Document</code> object's
+ <a href="#list-of2">list of added properties</a> must be empty when the
+ <code>Document</code> object is created.
+
+ <p>Objects implementing the <code><a href="#window">Window</a></code>
+ interface must also implement the <code>EventTarget</code> interface.
+
+ <p class=note><code><a href="#window">Window</a></code> objects also <a
+ href="#get" title=dom-item>have an implicit [[Get]] method</a> which
+ returns <span>nested browsing contexts</span>.
+
+ <h4 id=security1><span class=secno>4.2.1. </span>Security</h4>
+
+ <p>User agents must raise a <a href="#security8">security exception</a>
+ whenever any of the members of a <code><a href="#window">Window</a></code>
+ object are accessed by scripts whose <a href="#origin0">origin</a> is not
+ the same as the <code><a href="#window">Window</a></code> object's <a
+ href="#browsing0">browsing context</a>'s <a href="#active">active
+ document</a>'s origin, with the following exceptions:
+
+ <ul>
+ <li>The <code title=dom-location><a href="#location1">location</a></code>
+ object
+ </ul>
+
+ <p>User agents must not allow scripts to override the <code
+ title=dom-location><a href="#location1">location</a></code> object's
+ setter.
+
+ <h4 id=constructors><span class=secno>4.2.2. </span>Constructors</h4>
+
+ <p>All <code><a href="#window">Window</a></code> objects must provide the
+ following constructors:
+
+ <dl>
+ <dt><dfn id=audio2 title=dom-audio><code>Audio()</code></dfn>
+
+ <dt><dfn id=audio3 title=dom-audio-s><code>Audio(<var
+ title="">src</var>)</code></dfn>
+
+ <dd>
+ <p>When invoked as constructors, these must return a new <code><a
+ href="#htmlaudioelement">HTMLAudioElement</a></code> object (a new
+ <code><a href="#audio1">audio</a></code> element). If the <var
+ title=src>src</var> argument is present, the object created must have
+ its <code title=dom-media-src><a href="#src6">src</a></code> content
+ attribute set to the provided value.
+
+ <dt><dfn id=image0 title=dom-image><code>Image()</code></dfn>
+
+ <dt><dfn id=imagein title=dom-image-w><code>Image(in unsigned long <var
+ title="">w</var>)</code></dfn>
+
+ <dt><dfn id=imagein0 title=dom-image-wh><code>Image(in unsigned long <var
+ title="">w</var>, in unsigned long <var title="">h</var>)</code></dfn>
+
+ <dd>
+ <p>When invoked as corstructors, these must return a new <code><a
+ href="#htmlimageelement">HTMLImageElement</a></code> object (a new
+ <code><a href="#img">img</a></code> element). If the <var
+ title="">h</var> argument is present, the new object's <code
+ title=attr-img-height><a href="#height">height</a></code> content
+ attribute must be set to <var title="">h</var>. If the <var
+ title="">w</var> argument is present, the new object's <code
+ title=attr-img-width><a href="#width">width</a></code> content attribute
+ must be set to <var title="">w</var>.
+
+ <dt><dfn id=option title=dom-option><code>Option()</code></dfn>
+
+ <dt><dfn id=optionin title=dom-option-n><code>Option(in DOMString <var
+ title="">name</var>)</code></dfn>
+
+ <dt><dfn id=optionin0 title=dom-option-nv><code>Option(in DOMString <var
+ title="">name</var>, in DOMString <var title="">value</var>)</code></dfn>
+
+ <dd>
+ <p>When invoked as constructors, these must return a new
+ <code>HTMLOptionElement</code> object (a new <code>option</code>
+ element). <span class=big-issue>need to define argument
+ processing</span>
+ </dl>
+
+ <p class=big-issue>And when constructors are invoked but without using the
+ constructor syntax...?
+
+ <h4 id=apis-for><span class=secno>4.2.3. </span>APIs for creating and
+ navigating browsing contexts by name</h4>
+
+ <p>The <dfn id=open2 title=dom-open><code>open()</code></dfn> method on
+ <code><a href="#window">Window</a></code> objects provides a mechanism for
+ <a href="#navigate" title=navigate>navigating</a> an existing <a
+ href="#browsing0">browsing context</a> or opening and navigating an <a
+ href="#auxiliary0">auxiliary browsing context</a>.
+
+ <p>The method has four arguments, though they are all optional.
+
+ <p>The first argument, <var title="">url</var>, gives a URI (or IRI) for a
+ page to load in the browsing context. If no arguments are provided, then
+ the <var title="">url</var> argument defaults to
+ "<code>about:blank</code><!-- XXX xref -->". The argument must be resolved
+ to an absolute URI by <span class=big-issue>...</span>
+
+ <p>The second argument, <var title="">target</var>, specifies the <a
+ href="#browsing1" title="browsing context name">name</a> of the browsing
+ context that is to be navigated. It must be a <a href="#valid8">valid
+ browsing context name</a>. If fewer than two arguments are provided, then
+ the <var title="">name</var> argument defaults to the value
+ "<code>_blank</code>".
+
+ <p>The third argument, <var title="">features</var>, has no effect and is
+ supported for historical reasons only.
+
+ <p>The fourth argument, <var title="">replace</var>, specifies whether or
+ not the new page will <a href="#replacement" title="replacement
+ enabled">replace</a> the page currently loaded in the browsing context,
+ when <var title="">target</var> identifies an existing browsing context
+ (as opposed to leaving the current page in the browsing context's <a
+ href="#session">session history</a>). When three or fewer arguments are
+ provided, <var title="">replace</var> defaults to false.
+
+ <p>When the method is invoked, the user agent must first select a <a
+ href="#browsing0">browsing context</a> to navigate by applying <a
+ href="#the-rules">the rules for chosing a browsing context given a
+ browsing context name</a> using the <var title="">target</var> argument as
+ the name and the <a href="#browsing0">browsing context</a> of the script
+ as the context in which the algorithm is executed, unless the user has
+ indicated a preference, in which case the browsing context to navigate may
+ instead be the one indicated by the user.
+
+ <p class=example>For example, suppose there is a user agent that supports
+ control-clicking a link to open it in a new tab. If a user clicks in that
+ user agent on an element whose <code title=handler-onclick><a
+ href="#onclick">onclick</a></code> handler uses the <code
+ title=dom-open><a href="#open2">window.open()</a></code> API to open a
+ page in an iframe, but, while doing so, holds the control key down, the
+ user agent could override the selection of the target browsing context to
+ instead target a new tab.
+
+ <p>Then, the user agent must <a href="#navigate">navigate</a> the selected
+ <a href="#browsing0">browsing context</a> to the URI given in <var
+ title="">url</var>. If the <var title="">replace</var> is true, then <a
+ href="#replacement" title="replacement enabled">replacement must be
+ enabled</a>; otherwise, it must not be enabled unless the <a
+ href="#browsing0">browsing context</a> was just created as part of the <a
+ href="#the-rules">the rules for chosing a browsing context given a
+ browsing context name</a>.
+
+ <p>The <dfn id=name3 title=dom-name><code>name</code></dfn> attribute of
+ the <code><a href="#window">Window</a></code> object must, on getting,
+ return the current name of the <a href="#browsing0">browsing context</a>,
+ and, on setting, set the name of the <a href="#browsing0">browsing
+ context</a> to the new value.
+
+ <p class=note>The name <a href="#resetBCName">gets reset</a> when the
+ browsing context is navigated to another domain.
+
+ <h4 id=accessing><span class=secno>4.2.4. </span>Accessing other browsing
+ contexts</h4>
+
+ <p>In ECMAScript implementations, objects that implement the <code><a
+ href="#window">Window</a></code> interface must have a <dfn id=get
+ title=dom-item>[[Get]]</dfn> method that, when invoked with a property
+ name that is a number <var title="">i</var>, returns the <var
+ title="">i</var>th <a href="#child">child browsing context</a> of the <a
+ href="#active" title="active document">active</a> <code>Document</code>,
+ sorted in document order of the elements nesting those browsing contexts.
+
+ <p>The <dfn id=length5 title=dom-length><code>length</code></dfn> DOM
+ attribute on the <code><a href="#window">Window</a></code> interface must
+ return the number of <a href="#child" title="child browsing context">child
+ browsing contexts</a> of the <a href="#active" title="active
+ document">active</a> <code>Document</code>.
+
+ <h3 id=history><span class=secno>4.3. </span>Session history and navigation</h3>
+
+ <h4 id=the-session><span class=secno>4.3.1. </span>The session history of
+ browsing contexts</h4>
+
+ <p>The sequence of <code>Document</code>s in a <a
+ href="#browsing0">browsing context</a> is its <dfn id=session>session
+ history</dfn>.
+
+ <p><code><a href="#history1">History</a></code> objects provide a
+ representation of the pages in the session history of <a href="#browsing0"
+ title="browsing context">browsing contexts</a>. Each browsing context has
+ a distinct session history.
+
+ <p>Each <code>Document</code> object in a browsing context's session
+ history is associated with a unique instance of the <code><a
+ href="#history1">History</a></code> object, although they all must model
+ the same underlying session history.
+
+ <p>The <dfn id=history0 title=dom-history><code>history</code></dfn>
+ attribute of the <code><a href="#window">Window</a></code> interface must
+ return the object implementing the <code><a
+ href="#history1">History</a></code> interface for that <code><a
+ href="#window">Window</a></code> object's <a href="#active">active
+ document</a>.
+
+ <p><code><a href="#history1">History</a></code> objects represent their <a
+ href="#browsing0">browsing context</a>'s session history as a flat list of
+ URIs and <a href="#state" title="state object">state objects</a>. (This
+ does not imply that the UI need be linear. See the <a
+ href="#history-notes">notes below</a>.)
+
+ <p>Typically, the history list will consist of only URIs. However, a page
+ can <a href="#pushstate" title=dom-history-pushState>add</a> <dfn id=state
+ title="state object">state objects</dfn> between its entry in the session
+ history and the next ("forward") entry. These are then <a href="#popstate"
+ title=event-popstate>returned to the script</a> when the user (or script)
+ goes back in the history, thus enabling authors to use the "navigation"
+ metaphor even in one-page applications.
+
+ <p>Entries that consist of <a href="#state" title="state object">state
+ objects</a> share the same <code>Document</code> as the entry for the URI
+ itself. Contiguous entries that differ just by fragment identifier must
+ also share the same <code>Document</code>.
+
+ <p class=note>All entries that share the same <code>Document</code> (and
+ that are therefore merely different states of one particular document) are
+ contiguous by definition.
+
+ <p>At any point, one of the entries in the session history is the <dfn
+ id=current0>current entry</dfn>. This is the entry representing the <a
+ href="#active">active document</a> of the <a href="#browsing0">browsing
+ context</a>. The <a href="#current0">current entry</a> is usually an entry
+ for the <a href="#href5" title=dom-location-href>location</a> of the
+ <code>Document</code>. However, it can also be one of the entries for <a
+ href="#state" title="state object">state objects</a> added to the history
+ by that document.
+
+ <p>User agents may <dfn id=discard>discard</dfn> the DOMs of entries other
+ than the <a href="#current0">current entry</a> that are not referenced
+ from any script, reloading the pages afresh when the user or script
+ navigates back to such pages. This specification does not specify when
+ user agents should discard pages' DOMs and when they should cache them.
+ See the section on the <code title=event-load><a
+ href="#load0">load</a></code> and <code title=event-unload>unload</code>
+ events for more details.</p>
+ <!-- XXX crossref! -->
+
+ <p>Entries that have had their DOM discarded must, for the purposes of the
+ algorithms given below, act as if they had not. When the user or script
+ navigates back or forwards to a page which has no in-memory DOM objects,
+ any other entries that shared the same <code>Document</code> object with
+ it must share the new object as well.
+
+ <p>When a user agent discards the DOM from an entry in the session history,
+ it must also discard all the entries from the first state object entry for
+ that <code>Document</code> object up to and including the last entry for
+ that <code>Document</code> object (including any non-state-object entries
+ in that range, such as entries where the user navigated using fragment
+ identifiers). These entries are not recreated if the user or script
+ navigates back to the page. If there are no state object entries for that
+ <code>Document</code> object then no entries are removed.
+
+ <h4 id=the-history><span class=secno>4.3.2. </span>The <code><a
+ href="#history1">History</a></code> interface</h4>
+
+ <pre class=idl>interface <dfn id=history1>History</dfn> {
+ readonly attribute long <a href="#length6" title=dom-history-length>length</a>;
+ void <a href="#godelta" title=dom-history-go>go</a>(in long delta);
+ void <a href="#go" title=dom-history-go-0>go</a>();
+ void <a href="#back" title=dom-history-back>back</a>();
+ void <a href="#forward" title=dom-history-forward>forward</a>();
+ void <a href="#pushstate" title=dom-history-pushState>pushState</a>(in DOMObject data);
+ void <a href="#clearstate" title=dom-history-clearState>clearState</a>();
+};</pre>
+
+ <p>The <dfn id=length6 title=dom-history-length><code>length</code></dfn>
+ attribute of the <code><a href="#history1">History</a></code> interface
+ must return the number of entries in this <a href="#session">session
+ history</a>.
+
+ <p>The actual entries are not accessible from script.
+
+ <p>The <dfn id=godelta title=dom-history-go><code>go(<var
+ title="">delta</var>)</code></dfn> method causes the UA to move the number
+ of steps specified by <var title="">delta</var> in the session history.
+
+ <p>If the index of the <a href="#current0">current entry</a> plus <var
+ title="">delta</var> is less than zero or greater than or equal to the <a
+ href="#length6" title=dom-history-length>number of items in the session
+ history</a>, then the user agent must do nothing.
+
+ <p>If the <var title="">delta</var> is zero, then the user agent must act
+ as if the <code title=dom-location-reload>location.reload()</code> method
+ was called instead.
+
+ <p>Otherwise, the user agent must cause the current <a
+ href="#browsing0">browsing context</a> to <a href="#traverse">traverse the
+ history</a> to the specified entry, as described below. The <dfn
+ id=specified>specified entry</dfn> is the one whose index equals the index
+ of the <a href="#current0">current entry</a> plus <var
+ title="">delta</var>.
+
+ <p>When a user agent is required to <dfn id=traverse>traverse the
+ history</dfn> to a specified entry, the user agent must act as follows:
+
+ <ol>
+ <li>If there is no longer a <code>Document</code> object for the entry in
+ question, the user agent must <a href="#navigate">navigate</a> the
+ browsing context to the location for that entry to preform an <a
+ href="#entry">entry update</a> of that entry, and abort these steps. The
+ "<a href="#navigate">navigate</a>" algorithm reinvokes this "traverse"
+ algorithm to complete the traversal, at which point there <em>is</em> a
+ <code>Document</code> object and so this step gets skipped.
+
+ <li>
+ <p>If appropriate, update the <a href="#current0">current entry</a> in
+ the <a href="#browsing0">browsing context</a>'s <code>Document</code>
+ object's <code><a href="#history1">History</a></code> object to reflect
+ any state that the user agent wishes to persist.</p>
+
+ <p class=example>For example, some user agents might want to persist the
+ scroll position, or the values of form controls.</p>
+
+ <li>
+ <p>If there are any entries with state objects between the <a
+ href="#current0">current entry</a> and the <a
+ href="#specified">specified entry</a> (not inclusive), then the user
+ agent must iterate through every entry between the current entry and the
+ specified entry, starting with the entry closest to the current entry,
+ and ending with the one closest to the specified entry. For each entry,
+ if the entry is a state object, the user agent must <a
+ href="#activating0">activate the state object</a>.
+
+ <li>
+ <p>If the <a href="#specified">specified entry</a> has a different
+ <code>Document</code> object than the <a href="#current0">current
+ entry</a> then the user agent must follow the following substeps:</p>
+
+ <ol>
+ <li>The user agent must move any properties that have been added to the
+ browsing context's default view's <code><a
+ href="#window">Window</a></code> object to the <a href="#active">active
+ document</a>'s <code>Document</code>'s <a href="#list-of2">list of
+ added properties</a>.
+
+ <li>If the browsing context is a <a href="#top-level">top-level browsing
+ context</a> (and not an <a href="#auxiliary0">auxiliary browsing
+ context</a>), and the <a href="#origin0">origin</a> of the
+ <code>Document</code> of the <a href="#specified">specified entry</a>
+ is not the same as the <a href="#origin0">origin</a> of the
+ <code>Document</code> of the <a href="#current0">current entry</a>,
+ then the following sub-sub-steps must be run:
+ <ol>
+ <li>The current <a href="#browsing1">browsing context name</a> must be
+ stored with all the entries in the history that are associated with
+ <code>Document</code> objects with the same <a
+ href="#origin0">origin</a> as the <a href="#active">active
+ document</a> <em>and</em> that are contiguous with the <a
+ href="#current0">current entry</a>.
+
+ <li id=resetBCName>The browsing context's <a
+ href="#browsing1">browsing context name</a> must be unset.
+ </ol>
+
+ <li>The user agent must make the <a href="#specified">specified
+ entry</a>'s <code>Document</code> object the <a href="#active">active
+ document</a> of the <a href="#browsing0">browsing context</a>.
+
+ <li>If the <a href="#specified">specified entry</a> has a <a
+ href="#browsing1">browsing context name</a> stored with it, then the
+ following sub-sub-steps must be run:
+ <ol>
+ <li>The browsing context's <a href="#browsing1">browsing context
+ name</a> must be set to the name stored with the specified entry.
+
+ <li>Any <a href="#browsing1">browsing context name</a> stored with the
+ entries in the history that are associated with <code>Document</code>
+ objects with the same <a href="#origin0">origin</a> as the new <a
+ href="#active">active document</a>, and that are contiguous with the
+ specified entry, must be cleared.
+ </ol>
+
+ <li>The user agent must move any properties that have been added to the
+ <a href="#active">active document</a>'s <code>Document</code>'s <a
+ href="#list-of2">list of added properties</a> to browsing context's
+ default view's <code><a href="#window">Window</a></code> object.
+ </ol>
+
+ <li>
+ <p>If the specified entry is a state object, the user agent must <a
+ href="#activating0" title="activate the state object">activate that
+ state object</a>.
+
+ <li>
+ <p>User agents may also update other aspects of the document view when
+ the location changes in this way, for instance the scroll position,
+ values of form fields, etc.
+
+ <li>
+ <p>The <a href="#current0">current entry</a> is now the <a
+ href="#specified">specified entry</a>.
+ </ol>
+
+ <p class=big-issue>how does the changing of the global attributes affect
+ .watch() when seen from other Windows?
+
+ <p>When the user navigates through a <a href="#browsing0">browsing
+ context</a>, e.g. using a browser's back and forward buttons, the user
+ agent must translate this action into the equivalent invocations of the
+ <code title=dom-history-go><a href="#godelta">history.go(<var
+ title="">delta</var>)</a></code> method on the various affected <code
+ title=dom-window><a href="#window0">window</a></code> objects.
+
+ <p>Some of the other members of the <code><a
+ href="#history1">History</a></code> interface are defined in terms of the
+ <code title=dom-history-go><a href="#godelta">go()</a></code> method, as
+ follows:
+
+ <table>
+ <tbody>
+ <tr>
+ <th>Member
+
+ <th>Definition
+
+ <tr>
+ <td><dfn id=go title=dom-history-go-0><code>go()</code></dfn>
+
+ <td>Must do the same as <code title=dom-history-go><a
+ href="#godelta">go(0)</a></code>
+
+ <tr>
+ <td><dfn id=back title=dom-history-back><code>back()</code></dfn>
+
+ <td>Must do the same as <code title=dom-history-go><a
+ href="#godelta">go(-1)</a></code>
+
+ <tr>
+ <td><dfn id=forward
+ title=dom-history-forward><code>forward()</code></dfn>
+
+ <td>Must do the same as <code title=dom-history-go><a
+ href="#godelta">go(1)</a></code>
+ </table>
+
+ <p>The <dfn id=pushstate title=dom-history-pushState><code>pushState(<var
+ title="">data</var>)</code></dfn> method adds a state object to the
+ history.
+
+ <p>When this method is invoked, the user agent must first remove from the
+ <a href="#session">session history</a> any entries for that
+ <code>Document</code> from the entry after the <a href="#current0">current
+ entry</a> up to the last entry in the session history that references the
+ same <code>Document</code> object, if any. If the <a
+ href="#current0">current entry</a> is the last entry in the session
+ history, or if there are no entries after the <a href="#current0">current
+ entry</a> that reference the same <code>Document</code> object, then no
+ entries are removed.
+
+ <p>Then, the user agent must add a state object entry to the session
+ history, after the <a href="#current0">current entry</a>, with the
+ specified <var title="">data</var> as the state object.
+
+ <p>Finally, the user agent must update the <a href="#current0">current
+ entry</a> to be the this newly added entry.
+
+ <p class=big-issue>There has been a suggestion that pushState() should take
+ a URI and a string; the URI to allow for the page to be bookmarked, and
+ the string to allow the UA to give the page a meaningful title in the
+ history state, if it shows history state.</p>
+ <!-- XXX could have four variants of pushState to allow
+ with/without URI and with/without title. Or maybe URI only makes
+ sense if there is a title. -->
+
+ <p>User agents may limit the number of state objects added to the session
+ history per page. If a page hits the UA-defined limit, user agents must
+ remove the entry immediately after the first entry for that
+ <code>Document</code> object in the session history after having added the
+ new entry. (Thus the state history acts as a FIFO buffer for eviction, but
+ as a LIFO buffer for navigation.)
+
+ <p>The <dfn id=clearstate
+ title=dom-history-clearState><code>clearState()</code></dfn> method
+ removes all the state objects for the <code>Document</code> object from
+ the session history.
+
+ <p>When this method is invoked, the user agent must remove from the session
+ history all the entries from the first state object entry for that
+ <code>Document</code> object up to the last entry that references that
+ same <code>Document</code> object, if any.
+
+ <p>Then, if the <a href="#current0">current entry</a> was removed in the
+ previous step, the <a href="#current0">current entry</a> must be set to
+ the last entry for that <code>Document</code> object in the session
+ history.
+
+ <h4 id=activating><span class=secno>4.3.3. </span><dfn id=activating0
+ title="activate the state object">Activating state objects</dfn></h4>
+
+ <p>When a state object in the session history is activated (which happens
+ in the cases described above), the user agent must fire a <dfn id=popstate
+ title=event-popstate><code>popstate</code></dfn> event in no namespace on
+ the <a href="#the-body0">the body element</a> using the <code><a
+ href="#popstateevent">PopStateEvent</a></code> interface, with the state
+ object in the <code title=dom-PopStateEvent-state><a
+ href="#state0">state</a></code> attribute. This event bubbles but is not
+ cancelable and has no default action.</p>
+ <!-- XXX onpopstate should be defined somewhere -->
+
+ <pre
+ class=idl>interface <dfn id=popstateevent>PopStateEvent</dfn> : Event {
+ readonly attribute DOMObject <a href="#state0" title=dom-PopStateEvent-state>state</a>;
+ void <a href="#initpopstateevent" title=dom-PopStateEvent-initPopStateEvent>initPopStateEvent</a>(in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMObject statetArg);
+ void <a href="#initpopstateeventns" title=dom-PopStateEvent-initPopStateEventNS>initPopStateEventNS</a>(in DOMString namespaceURIArg, in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMObject stateArg);
+};</pre>
+
+ <p>The <dfn id=initpopstateevent
+ title=dom-PopStateEvent-initPopStateEvent><code>initPopStateEvent()</code></dfn>
+ and <dfn id=initpopstateeventns
+ title=dom-PopStateEvent-initPopStateEventNS><code>initPopStateEventNS()</code></dfn>
+ methods must initialise the event in a manner analogous to the
+ similarly-named methods in the DOM3 Events interfaces. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>The <dfn id=state0
+ title=dom-PopStateEvent-state><code>state</code></dfn> attribute
+ represents the context information for the event.
+
+ <p class=big-issue>Should we coalesce these events if they occur while the
+ page is away? (e.g. during traversal -- see above)
+
+ <h4 id=the-location><span class=secno>4.3.4. </span>The <code><a
+ href="#location2">Location</a></code> interface</h4>
+
+ <p>Each <code>Document</code> object in a browsing context's session
+ history is associated with a unique instance of a <code><a
+ href="#location2">Location</a></code> object.
+
+ <p>The <dfn id=location0
+ title=dom-document-location><code>location</code></dfn> attribute of the
+ <code><a href="#htmldocument">HTMLDocument</a></code> interface must
+ return the <code><a href="#location2">Location</a></code> object for that
+ <code>Document</code> object.
+
+ <p>The <dfn id=location1 title=dom-location><code>location</code></dfn>
+ attribute of the <code><a href="#window">Window</a></code> interface must
+ return the <code><a href="#location2">Location</a></code> object for that
+ <code><a href="#window">Window</a></code> object's <a
+ href="#active">active document</a>.
+
+ <p><code><a href="#location2">Location</a></code> objects provide a
+ representation of the URI of their document, and allow the <a
+ href="#current0">current entry</a> of the <a href="#browsing0">browsing
+ context</a>'s session history to be changed, by adding or replacing
+ entries in the <code title=dom-history><a
+ href="#history0">history</a></code> object.
+
+ <pre class=idl>interface <dfn id=location2>Location</dfn> {
+ readonly attribute DOMString <a href="#href5" title=dom-location-href>href</a>;
+ void <a href="#assign" title=dom-location-assign>assign</a>(in DOMString url);
+ void <a href="#replace" title=dom-location-replace>replace</a>(in DOMString url);
+ void <span title=dom-location-reload>reload</span>();
+
+ // <a href="#uri-decomposition">URI decomposition attributes</a> <!-- blame brendan for these "innovative" names -->
+ readonly attribute DOMString <a href="#protocol" title=dom-location-protocol>protocol</a>;
+ readonly attribute DOMString <a href="#host" title=dom-location-host>host</a>;
+ readonly attribute DOMString <a href="#hostname" title=dom-location-hostname>hostname</a>;
+ readonly attribute DOMString <a href="#port" title=dom-location-port>port</a>;
+ readonly attribute DOMString <a href="#pathname" title=dom-location-pathname>pathname</a>;
+ readonly attribute DOMString <a href="#search" title=dom-location-search>search</a>;
+ readonly attribute DOMString <a href="#hash" title=dom-location-hash>hash</a>;
+};</pre>
+
+ <p>In the ECMAScript DOM binding, objects implementing this interface must
+ stringify to the same value as the <code title=dom-location-href><a
+ href="#href5">href</a></code> attribute.
+
+ <p id=settingLocation>In the ECMAScript DOM binding, the <code
+ title="">location</code> members of the <code><a
+ href="#htmldocument">HTMLDocument</a></code> and <code><a
+ href="#window">Window</a></code> interfaces behave as if they had a
+ setter: user agents must treats attempts to set these <code
+ title="">location</code> attribute as attempts at setting the <code
+ title=dom-location-href><a href="#href5">href</a></code> attribute of the
+ relevant <code><a href="#location2">Location</a></code> object instead.
+
+ <p>The <dfn id=href5 title=dom-location-href><code>href</code></dfn>
+ attribute returns the address of the page represented by the associated
+ <code>Document</code> object, as an absolute IRI reference.
+
+ <p>On setting, <!--XXX Mozilla does this, but IE doesn't. What
+ should we do?: the behaviour depends on the context in which the
+ script that set the attribute is running. If the script ran as the
+ direct result of the execution of a <code>script</code> element in
+ the document represented by the <code>Location</code> object's
+ associated <code>Document</code> object, then the user agent must
+ act as if the <code title="dom-location-replace">replace()</code>
+ method had been called with the new value as its
+ argument. Otherwise,-->
+ the user agent must act as if the <code title=dom-location-assign><a
+ href="#assign">assign()</a></code> method had been called with the new
+ value as its argument.</p>
+ <!-- XXX may wish to allow
+ replace instead as a UI improvement -->
+
+ <p>When the <dfn id=assign title=dom-location-assign><code>assign(<var
+ title="">url</var>)</code></dfn> method is invoked, the UA must <a
+ href="#navigate">navigate</a> the <a href="#browsing0">browsing
+ context</a> to the specified <var title="">url</var>.
+
+ <p>When the <dfn id=replace title=dom-location-replace><code>replace(<var
+ title="">url</var>)</code></dfn> method is invoked, the UA must <a
+ href="#navigate">navigate</a> to the specified <var title="">url</var>
+ with <a href="#replacement">replacement enabled</a>.
+
+ <p>Relative <var title="">url</var> arguments for <code
+ title=dom-location-assign><a href="#assign">assign()</a></code> and <code
+ title=dom-location-replace><a href="#replace">replace()</a></code> must be
+ resolved relative to the base URI of the script that made the method call.</p>
+ <!-- XXX what about if the base URI is data: or
+ javascript: or about: or something else without a way to resolve
+ base URIs? -->
+
+ <p>The <code><a href="#location2">Location</a></code> interface also has
+ the complement of <a href="#uri-decomposition">URI decomposition
+ attributes</a>, <dfn id=protocol
+ title=dom-location-protocol><code>protocol</code></dfn>, <dfn id=host
+ title=dom-location-host><code>host</code></dfn>, <dfn id=port
+ title=dom-location-port><code>port</code></dfn>, <dfn id=hostname
+ title=dom-location-hostname><code>hostname</code></dfn>, <dfn id=pathname
+ title=dom-location-pathname><code>pathname</code></dfn>, <dfn id=search
+ title=dom-location-search><code>search</code></dfn>, and <dfn id=hash
+ title=dom-location-hash><code>hash</code></dfn>. These must follow the
+ rules given for URI decomposition attributes, with the <a href="#input"
+ title=concept-uda-input>input</a> being the address of the page
+ represented by the associated <code>Document</code> object, as an absolute
+ IRI reference (same as the <code title=dom-location-href><a
+ href="#href5">href</a></code> attribute), and the <a href="#common3"
+ title=concept-uda-setter>common setter action</a> being the same as
+ setting the <code title=dom-location-href><a href="#href5">href</a></code>
+ attribute to the new output value.</p>
+ <!--
+ <dfn title="dom-location-reload"><code>reload()</code></dfn>
+ reload during resize event:
+ redisplay the current page (without reloading it). This
+ theoretically would have no effect but in practice can be useful to
+ work around rendering bugs.
+
+reload on shared Document updates all of them
+
+user reload must be equivalent to .reload()
+-->
+
+ <h5 id=security2><span class=secno>4.3.4.1. </span>Security</h5>
+
+ <p>User agents must raise a <a href="#security8">security exception</a>
+ whenever any of the members of a <code><a
+ href="#location2">Location</a></code> object are accessed by scripts whose
+ <a href="#origin0">origin</a> is not the same as the <code><a
+ href="#location2">Location</a></code> object's associated
+ <code>Document</code>'s origin, with the following exceptions:
+
+ <ul>
+ <li>The <code title=dom-location-href><a href="#href5">href</a></code>
+ setter
+ </ul>
+
+ <p>User agents must not allow scripts to override the <code
+ title=dom-location-href><a href="#href5">href</a></code> attribute's
+ setter.
+
+ <h4 id=history-notes><span class=secno>4.3.5. </span>Implementation notes
+ for session history</h4>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>The <code><a href="#history1">History</a></code> interface is not meant
+ to place restrictions on how implementations represent the session history
+ to the user.
+
+ <p>For example, session history could be implemented in a tree-like manner,
+ with each page having multiple "forward" pages. This specification doesn't
+ define how the linear list of pages in the <code title=dom-history><a
+ href="#history0">history</a></code> object are derived from the actual
+ session history as seen from the user's perspective.
+
+ <p>Similarly, a page containing two <code><a
+ href="#iframe">iframe</a></code>s has a <code title=dom-history><a
+ href="#history0">history</a></code> object distinct from the <code><a
+ href="#iframe">iframe</a></code>s' <code title=dom-history><a
+ href="#history0">history</a></code> objects, despite the fact that typical
+ Web browsers present the user with just one "Back" button, with a session
+ history that interleaves the navigation of the two inner frames and the
+ outer page.
+
+ <p><strong>Security:</strong> It is suggested that to avoid letting a page
+ "hijack" the history navigation facilities of a UA by abusing <code
+ title=dom-history-pushState><a href="#pushstate">pushState()</a></code>,
+ the UA provide the user with a way to jump back to the previous page
+ (rather than just going back to the previous state). For example, the back
+ button could have a drop down showing just the pages in the session
+ history, and not showing any of the states. Similarly, an aural browser
+ could have two "back" commands, one that goes back to the previous state,
+ and one that jumps straight back to the previous page.
+
+ <p>In addition, a user agent could ignore calls to <code
+ title=dom-history-pushState><a href="#pushstate">pushState()</a></code>
+ that are invoked on a timer, or from event handlers that do not represent
+ a clear user action, or that are invoked in rapid succession.
+
+ <h3 id=links><span class=secno>4.4. </span>Links</h3>
+
+ <h4 id=hyperlink><span class=secno>4.4.1. </span>Hyperlink elements</h4>
+
+ <p>The <code><a href="#a">a</a></code>, <code><a
+ href="#area">area</a></code>, and <code><a href="#link">link</a></code>
+ elements can, in certain situations described in the definitions of those
+ elements, represent <dfn id=hyperlinks title=hyperlink>hyperlinks</dfn>.
+
+ <p>The <dfn id=href6 title=attr-hyperlink-href><code>href</code></dfn>
+ attribute on a hyperlink element must have a value that is a URI (or IRI).
+ This URI is the <em>destination resource</em> of the hyperlink.
+
+ <div class=note>
+ <p>The <code title=attr-hyperlink-href><a href="#href6">href</a></code>
+ attribute on <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements is not required; when those
+ elements do not have <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attributes they do not represent
+ hyperlinks.</p>
+
+ <p>The <code title=attr-link-href><a href="#href1">href</a></code>
+ attribute on the <code><a href="#link">link</a></code> element
+ <em>is</em> required, but whether a <code><a href="#link">link</a></code>
+ element represents a hyperlink or not depends on the value of the <code
+ title=attr-link-rel><a href="#rel">rel</a></code> attribute of that
+ element.</p>
+ </div>
+
+ <p>The <dfn id=target3
+ title=attr-hyperlink-target><code>target</code></dfn> attribute, if
+ present, must be a <a href="#valid8">valid browsing context name</a>. User
+ agents use this name when <a href="#following0">following hyperlinks</a>.
+
+ <p>The <dfn id=ping title=attr-hyperlink-ping><code>ping</code></dfn>
+ attribute, if present, gives the URIs of the resources that are interested
+ in being notified if the user follows the hyperlink. The value must be a
+ space separated list of one or more URIs (or IRIs). The value is used by
+ the user agent when <a href="#following0">following hyperlinks</a>.
+
+ <p>For <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements that represent hyperlinks, the
+ relationship between the document containing the hyperlink and the
+ destination resource indicated by the hyperlink is given by the value of
+ the element's <dfn id=rel3 title=attr-hyperlink-rel><code>rel</code></dfn>
+ attribute, which must be an <a href="#unordered">unordered set of
+ space-separated tokens</a>. The <a href="#linkTypes">allowed values and
+ their meanings</a> are defined below. The <code
+ title=attr-hyperlink-rel><a href="#rel3">rel</a></code> attribute has no
+ default value. If the attribute is omitted or if none of the values in the
+ attribute are recognised by the UA, then the document has no particular
+ relationship with the destination resource other than there being a
+ hyperlink between the two.
+
+ <p>The <dfn id=media12 title=attr-hyperlink-media><code>media</code></dfn>
+ attribute describes for which media the target document was designed. It
+ is purely advisory. The value must be a valid media query. <a
+ href="#refsMQ">[MQ]</a> The default, if the <code
+ title=attr-hyperlink-media><a href="#media12">media</a></code> attribute
+ is omitted or has an invalid value, is <code>all</code>.
+
+ <p>The <dfn id=hreflang3
+ title=attr-hyperlink-hreflang><code>hreflang</code></dfn> attribute on
+ hyperlink elements, if present, gives the language of the linked resource.
+ It is purely advisory. The value must be a valid RFC 3066 language code.
+ <a href="#refsRFC3066">[RFC3066]</a> User agents must not consider this
+ attribute authoritative &mdash; upon fetching the resource, user agents
+ must only use language information associated with the resource to
+ determine its language, not metadata included in the link to the resource.
+
+ <p>The <dfn id=type17 title=attr-hyperlink-type><code>type</code></dfn>
+ attribute, if present, gives the MIME type of the linked resource. It is
+ purely advisory. The value must be a valid MIME type, optionally with
+ parameters. <a href="#refsRFC2046">[RFC2046]</a> User agents must not
+ consider the <code title=attr-hyperlink-type><a
+ href="#type17">type</a></code> attribute authoritative &mdash; upon
+ fetching the resource, user agents must not use metadata included in the
+ link to the resource to determine its type.
+
+ <h4 id=following><span class=secno>4.4.2. </span><dfn
+ id=following0>Following hyperlinks</dfn></h4>
+
+ <p>When a user <em>follows a hyperlink</em>, the user agent must <a
+ href="#navigate">navigate</a> a <a href="#browsing0">browsing context</a>
+ to the URI of the hyperlink.
+
+ <p>The URI of the hyperlink is URI given by resolving the the <code
+ title=attr-hyperlink-href><a href="#href6">href</a></code> attribute of
+ that hyperlink relative to the hyperlink's element. In the case of
+ server-side image maps, the URI of the hyperlink must further have its
+ <var><a href="#hyperlink2">hyperlink suffix</a></var> appended to it.
+
+ <p>If the user indicated a specific browsing context when following the
+ hyperlink, or if the user agent is configured to follow hyperlinks by
+ navigating a particular browsing context, then that must be the browsing
+ context that is navigated.
+
+ <p>Otherwise, if the hyperlink element is an <code><a
+ href="#a">a</a></code> or <code><a href="#area">area</a></code> element
+ that has a <code title=attr-hyperlink-target><a
+ href="#target3">target</a></code> attribute, then the browsing context
+ that is navigated must be chosen by applying <a href="#the-rules">the
+ rules for chosing a browsing context given a browsing context name</a>,
+ using the value of the <code title=attr-hyperlink-target><a
+ href="#target3">target</a></code> attribute as the browsing context name.
+ If these rules result in the creation of a new <a
+ href="#browsing0">browsing context</a>, it must be navigated with <a
+ href="#replacement">replacement enabled</a>.
+
+ <p>Otherwise, if the hyperlink element is a <a href="#sidebar0"
+ title=rel-sidebar-hyperlink>sidebar hyperlink</a> and the user agent
+ implements a feature that can be considered a secondary browsing context,
+ such a secondary browsing context may be selected as the browsing context
+ to be navigated.
+
+ <p>Otherwise, if the hyperlink element is an <code><a
+ href="#a">a</a></code> or <code><a href="#area">area</a></code> element
+ with no <code title=attr-hyperlink-target><a
+ href="#target3">target</a></code> attribute, but one of the child nodes of
+ <a href="#the-head0">the <code>head</code> element</a> is a <code><a
+ href="#base">base</a></code> element with a <code
+ title=attr-base-target><a href="#target">target</a></code> attribute, then
+ the browsing context that is navigated must be chosen by applying <a
+ href="#the-rules">the rules for chosing a browsing context given a
+ browsing context name</a>, using the value of the <code
+ title=attr-base-target><a href="#target">target</a></code> attribute of
+ the first such <code><a href="#base">base</a></code> element as the
+ browsing context name. If these rules result in the creation of a new <a
+ href="#browsing0">browsing context</a>, it must be navigated with <a
+ href="#replacement">replacement enabled</a>.
+
+ <p>Otherwise, the browsing context that must be navigated is the same
+ browsing context as the one which the hyperlink element itself is in.
+
+ <h5 id=hyperlink0><span class=secno>4.4.2.1. </span>Hyperlink auditing</h5>
+
+ <p>If an <code><a href="#a">a</a></code> or <code><a
+ href="#area">area</a></code> hyperlink element has a <code
+ title=attr-hyperlink-ping><a href="#ping">ping</a></code> attribute and
+ the user follows the hyperlink, the user agent must take the <code
+ title=attr-hyperlink-ping><a href="#ping">ping</a></code> attribute's
+ value, strip leading and trailing <a href="#space" title="space
+ character">spaces</a>, split the value on sequences of spaces, treat each
+ resulting part as a URI (resolving relative URIs according to element's
+ base URI) and then should send a request to each of the resulting URIs.
+ This may be done in parallel with the primary request, and is independent
+ of the result of that request.
+
+ <p>User agents should allow the user to adjust this behaviour, for example
+ in conjunction with a setting that disables the sending of HTTP Referrer
+ headers. Based on the user's preferences, UAs may either <a
+ href="#ignored">ignore</a> the <code title=attr-hyperlink-ping><a
+ href="#ping">ping</a></code> attribute altogether, or selectively ignore
+ URIs in the list (e.g. ignoring any third-party URIs).
+
+ <p>For URIs that are HTTP URIs, the requests must be performed using the
+ POST method (with an empty entity body in the request). User agents must
+ ignore any entity bodies returned in the responses, but must, unless
+ otherwise specified by the user, honour the HTTP headers &mdash; in
+ particular, HTTP cookie headers. <a href="#refsRFC2965">[RFC2965]</a>
+
+ <p class=note>To save bandwidth, implementors might wish to consider
+ omitting optional headers such as <code>Accept</code> from these requests.
+
+ <p>When the <code title=attr-hyperlink-ping><a href="#ping">ping</a></code>
+ attribute is present, user agents should clearly indicate to the user that
+ following the hyperlink will also cause secondary requests to be sent in
+ the background, possibly including listing the actual target URIs.
+
+ <div class=note>
+ <p>The <code title=attr-hyperlink-ping><a href="#ping">ping</a></code>
+ attribute is redundant with pre-existing technologies like HTTP redirects
+ and JavaScript in allowing Web pages to track which off-site links are
+ most popular or allowing advertisers to track click-through rates.</p>
+
+ <p>However, the <code title=attr-hyperlink-ping><a
+ href="#ping">ping</a></code> attribute provides these advantages to the
+ user over those alternatives:</p>
+
+ <ul>
+ <li>It allows the user to see the final target URI unobscured.
+
+ <li>It allows the UA to inform the user about the out-of-band
+ notifications.
+
+ <li>It allows the paranoid user to disable the notifications without
+ losing the underlying link functionality.
+
+ <li>It allows the UA to optimise the use of available network bandwidth
+ so that the target page loads faster.
+ </ul>
+
+ <p>Thus, while it is possible to track users without this feature, authors
+ are encouraged to use the <code title=attr-hyperlink-ping><a
+ href="#ping">ping</a></code> attribute so that the user agent can improve
+ <!-- XXX optimise? --> the user experience.</p>
+ <!--
+ XXX need a better way to end that sentence. It's what I mean, but
+ it sounds kooky. -->
+ </div>
+
+ <h4 id=linkTypes><span class=secno>4.4.3. </span>Link types</h4>
+
+ <p>The following table summarises the link types that are defined by this
+ specification. This table is non-normative; the actual definitions for the
+ link types are given in the next few sections.
+
+ <p>In this section, the term <em>referenced document</em> refers to the
+ resource identified by the element representing the link, and the term
+ <em>current document</em> refers to the resource within which the element
+ representing the link finds itself.
+
+ <p>To determine which link types apply to a <code><a
+ href="#link">link</a></code>, <code><a href="#a">a</a></code>, or <code><a
+ href="#area">area</a></code> element, the element's <code
+ title="">rel</code> attribute must be <a href="#split" title="split a
+ string on spaces">split on spaces</a>. The resulting tokens are the link
+ types that apply to that element.
+
+ <table>
+ <thead>
+ <tr>
+ <th rowspan=2>Link type
+
+ <th colspan=2>Effect on...
+
+ <th rowspan=2>Brief description
+
+ <tr>
+ <th><code><a href="#link">link</a></code>
+
+ <th><code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code>
+
+ <tbody>
+ <tr>
+ <td><code title=rel-alternate><a href="#alternate">alternate</a></code></td>
+ <!-- second most used <link rel> value -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives alternate representations of the current document.
+
+ <tr>
+ <td><code title=rel-archives><a href="#archives">archives</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Provides a link to a collection of records, documents, or other
+ materials of historical interest.
+
+ <tr>
+ <td><code title=rel-author><a href="#author">author</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives a link to the current document's author.
+
+ <tr>
+ <td><code title=rel-bookmark><a href="#bookmark">bookmark</a></code></td>
+ <!-- fourth most used <a rel> value -->
+
+ <td><em>not allowed</em>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives the permalink for the nearest ancestor section.
+
+ <tr>
+ <td><code title=rel-contact><a href="#contact">contact</a></code></td>
+ <!-- 8th most used <a rel> value -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives a link to contact information for the current document.
+
+ <tr>
+ <td><code title=rel-external><a href="#external">external</a></code></td>
+ <!-- fifth and sixth most used <a rel> value (sixth is "external nofollow") -->
+
+ <td><em>not allowed</em>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Indicates that the referenced document is not part of the same site
+ as the current document.
+
+ <tr>
+ <td><code title=rel-feed><a href="#feed">feed</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives the address of a syndication feed for the current document.
+
+ <tr>
+ <td><code title=rel-first><a href="#first">first</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Indicates that the current document is a part of a series, and that
+ the first document in the series is the referenced document.
+
+ <tr>
+ <td><code title=rel-help><a href="#help">help</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Provides a link to context-sensitive help.
+
+ <tr>
+ <td><code title=rel-icon><a href="#icon3">icon</a></code></td>
+ <!-- link rel="shortcut icon" and its ilk are the fourth, sixth, and ninth most used values -->
+
+ <td><a href="#links1" title="external resource link">External
+ Resource</a>
+
+ <td><em>not allowed</em>
+
+ <td>Imports an icon to represent the current document.
+
+ <tr>
+ <td><code title=rel-index><a href="#index">index</a></code></td>
+ <!-- used more than "top" and "contents" on <link> (though on <a>, "contents" wins) -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives a link to the document that provides a table of contents or
+ index listing the current document.
+
+ <tr>
+ <td><code title=rel-last><a href="#last">last</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Indicates that the current document is a part of a series, and that
+ the last document in the series is the referenced document.
+
+ <tr>
+ <td><code title=rel-license><a href="#license">license</a></code></td>
+ <!-- seventh most used <a rel> value -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Indicates that the current document is covered by the copyright
+ license described by the referenced document.
+
+ <tr>
+ <td><code title=rel-next><a href="#next">next</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Indicates that the current document is a part of a series, and that
+ the next document in the series is the referenced document.
+
+ <tr>
+ <td><code title=rel-nofollow><a href="#nofollow">nofollow</a></code></td>
+ <!-- most used <a rel> value (and sixth most used is "external nofollow") -->
+
+ <td><em>not allowed</em>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Indicates that the current document's original author or publisher
+ does not endorse the referenced document.
+
+ <tr>
+ <td><code title=rel-pingback><a href="#pingback">pingback</a></code>
+
+ <td><a href="#links1" title="external resource link">External
+ Resource</a>
+
+ <td><em>not allowed</em>
+
+ <td>Gives the address of the pingback server that handles pingbacks to
+ the current document.
+
+ <tr>
+ <td><code title=rel-prefetch><a href="#prefetch">prefetch</a></code>
+
+ <td><a href="#links1" title="external resource link">External
+ Resource</a>
+
+ <td><em>not allowed</em>
+
+ <td>Specifies that the target resource should be pre-emptively cached.
+
+ <tr>
+ <td><code title=rel-prev><a href="#prev">prev</a></code></td>
+ <!-- prev is used more than previous -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Indicates that the current document is a part of a series, and that
+ the previous document in the series is the referenced document.
+
+ <tr>
+ <td><code title=rel-search><a href="#search0">search</a></code></td>
+ <!-- used quite a bit -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives a link to a resource that can be used to search through the
+ current document and its related pages.
+
+ <tr>
+ <td><code title=rel-stylesheet><a
+ href="#stylesheet">stylesheet</a></code></td>
+ <!-- most commonly used <link rel> value, variants came in 7th, 8th, 12th, 17th... -->
+
+ <td><a href="#links1" title="external resource link">External
+ Resource</a>
+
+ <td><em>not allowed</em>
+
+ <td>Imports a stylesheet.
+
+ <tr>
+ <td><code title=rel-sidebar><a href="#sidebar">sidebar</a></code></td>
+ <!-- used quite a bit -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Specifies that the referenced document, if retrieved, is intended to
+ be shown in the browser's sidebar (if it has one).
+
+ <tr>
+ <td><code title=rel-tag><a href="#tag">tag</a></code></td>
+ <!-- second and third most used <a rel> value (third is technically "category tag"). -->
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Gives a tag (identified by the given address) that applies to the
+ current document.
+
+ <tr>
+ <td><code title=rel-up><a href="#up">up</a></code>
+
+ <td><a href="#hyperlink1" title="hyperlink link">Hyperlink</a>
+
+ <td><a href="#hyperlinks">Hyperlink</a>
+
+ <td>Provides a link to a document giving the context for the current
+ document.
+ </table>
+
+ <p>Some of the types described below list synonyms for these values. These
+ are to be handled as specified by user agents, but must not be used in
+ documents.</p>
+ <!--XXX
+
+ issues for rel="", etc:
+ rel="alternate stylesheet"
+ rel="script"
+ rel="related" // see also
+ which relationship combinations are allowed
+ what multiple values might mean (multiple <a rel="top"> in the same document)
+ http://www.euronet.nl/~tekelenb/WWW/LINK/
+ http://shift.freezope.org/konq_rellinks/development_html
+ http://hixie.ch/specs/html/link/001
+ http://hixie.ch/specs/html/link/002
+ http://www.hixie.ch/specs/html/metadata
+ what UAs are supposed to do with this
+ do something about http://microformats.org/wiki/rel-enclosure
+
+mpt says:
+> "As with <a> elements, when <link> elements that use these relationships
+> are present, UAs should render them. As with <a> elements, when <link>
+> elements that use these relationships do not exist, UAs should not
+> render them. UAs should not make <link> rendering any easier to hide
+> than <a> rendering."
+
+for microformats (e.g. to refer to an hcard from an hcalendar):
+rel=xref
+<a> and <area> only
+The href attribute's value must start with a '#' character.
+Indicates an in-page cross-reference. For the purposes of data mining tools, the subtree rooted
+at the first element with the given ID must be treated as if it was cloned and replaced the <a> element.
+
+ -->
+
+ <h5 id=link-type><span class=secno>4.4.3.1. </span>Link type "<dfn
+ id=alternate title=rel-alternate><code>alternate</code></dfn>"</h5>
+
+ <p>The <code title=rel-alternate><a href="#alternate">alternate</a></code>
+ keyword may be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, if the <code
+ title=attr-link-rel><a href="#rel">rel</a></code> attribute does not also
+ contain the keyword <code title=rel-stylesheet><a
+ href="#stylesheet">stylesheet</a></code>, it creates a <a
+ href="#hyperlink1" title="hyperlink link">hyperlink</a>; but if it
+ <em>does</em> also contains the keyword <code title=rel-stylesheet><a
+ href="#stylesheet">stylesheet</a></code>, the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> keyword instead modifies the
+ meaning of the <code title=rel-stylesheet><a
+ href="#stylesheet">stylesheet</a></code> keyword in the way described for
+ that keyword, and the rest of this subsection doesn't apply.
+
+ <p>The <code title=rel-alternate><a href="#alternate">alternate</a></code>
+ keyword indicates that the referenced document is an alternate
+ representation of the current document.
+
+ <p>The nature of the referenced document is given by the <code
+ title=attr-hyperlink-media><a href="#media12">media</a></code>, <code
+ title=attr-hyperlink-hreflang><a href="#hreflang3">hreflang</a></code>,
+ and <code title=attr-hyperlink-type><a href="#type17">type</a></code>
+ attributes.
+
+ <p>If the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> keyword is used with the <code
+ title=attr-hyperlink-media><a href="#media12">media</a></code> attribute,
+ it indicates that the referenced document is intended for use with the
+ media specified.
+
+ <p>If the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> keyword is used with the <code
+ title=attr-hyperlink-hreflang><a href="#hreflang3">hreflang</a></code>
+ attribute, and that attribute's value differs from the <a
+ href="#root-element">root element</a>'s <a href="#language">language</a>,
+ it indicates that the referenced document is a translation.
+
+ <p>If the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> keyword is used with the <code
+ title=attr-hyperlink-type><a href="#type17">type</a></code> attribute, it
+ indicates that the referenced document is a reformulation of the current
+ document in the specified format.
+
+ <p>The <code title=attr-hyperlink-media><a
+ href="#media12">media</a></code>, <code title=attr-hyperlink-hreflang><a
+ href="#hreflang3">hreflang</a></code>, and <code
+ title=attr-hyperlink-type><a href="#type17">type</a></code> attributes can
+ be combined when specified with the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> keyword.
+
+ <div class=example>
+ <p>For example, the following link is a French translation that uses the
+ PDF format:</p>
+
+ <pre>&lt;link rel=alternate type=application/pdf hreflang=fr href=manual-fr></pre>
+ </div>
+
+ <p>If the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> keyword is used with the <code
+ title=attr-hyperlink-type><a href="#type17">type</a></code> attribute set
+ to the value <code title="">application/rss+xml</code> or the value <code
+ title="">application/atom+xml</code>, then the user agent must treat the
+ link as it would if it had the <code title=rel-feed><a
+ href="#feed">feed</a></code> keyword specified as well.
+
+ <p>The <code title=rel-alternate><a href="#alternate">alternate</a></code>
+ link relationship is transitive &mdash; that is, if a document links to
+ two other documents with the link type "<code title=rel-alternate><a
+ href="#alternate">alternate</a></code>", then, in addition to implying
+ that those documents are alternative representations of the first
+ document, it is also implying that those two documents are alternative
+ representations of each other.
+
+ <h5 id=link-type0><span class=secno>4.4.3.2. </span>Link type "<dfn
+ id=archives title=rel-archives><code>archives</code></dfn>"</h5>
+
+ <p>The <code title=rel-archives><a href="#archives">archives</a></code>
+ keyword may be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-archives><a href="#archives">archives</a></code>
+ keyword indicates that the referenced document describes a collection of
+ records, documents, or other materials of historical interest.
+
+ <p class=example>A blog's index page could link to an index of the blog's
+ past posts with <code title="">rel="archives"</code>.
+
+ <p><strong>Synonyms</strong>: For historical reasons, user agents must also
+ treat the keyword "<code title="">archive</code>" like the <code
+ title=rel-archives><a href="#archives">archives</a></code> keyword.
+
+ <h5 id=link-type1><span class=secno>4.4.3.3. </span>Link type "<dfn
+ id=author title=rel-author><code>author</code></dfn>"</h5>
+
+ <p>The <code title=rel-author><a href="#author">author</a></code> keyword
+ may be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>For <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements, the <code title=rel-author><a
+ href="#author">author</a></code> keyword indicates that the referenced
+ document provides further information about the author of the section that
+ the element defining the hyperlink <a href="#applyToSection">applies</a>
+ to.
+
+ <p>For <code><a href="#link">link</a></code> elements, the <code
+ title=rel-author><a href="#author">author</a></code> keyword indicates
+ that the referenced document provides further information about the author
+ for the page as a whole.
+
+ <p class=note>The "referenced document" can be, and often is, a <code
+ title="">mailto:</code> URI giving the e-mail address of the author. <a
+ href="#refsMAILTO">[MAILTO]</a>
+
+ <p><strong>Synonyms</strong>: For historical reasons, user agents must also
+ treat <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code> elements
+ that have a <code title="">rev</code> attribute with the value
+ "<code>made</code>" as having the <code title=rel-author><a
+ href="#author">author</a></code> keyword specified as a link relationship.
+
+ <h5 id=link-type2><span class=secno>4.4.3.4. </span>Link type "<dfn
+ id=bookmark title=rel-bookmark><code>bookmark</code></dfn>"</h5>
+
+ <p>The <code title=rel-bookmark><a href="#bookmark">bookmark</a></code>
+ keyword may be used with <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements.
+
+ <p>The <code title=rel-bookmark><a href="#bookmark">bookmark</a></code>
+ keyword gives a permalink for the nearest ancestor <code><a
+ href="#article">article</a></code> element of the linking element in
+ question, or of <a href="#associatedSection">the section the linking
+ element is most closely associated with</a>, if there are no ancestor
+ <code><a href="#article">article</a></code> elements.
+
+ <div class=example>
+ <p>The following snippet has three permalinks. A user agent could
+ determine which permalink applies to which part of the spec by looking at
+ where the permalinks are given.</p>
+
+ <pre> ...
+ &lt;body>
+ &lt;h1>Example of permalinks&lt;/h1>
+ &lt;div id="a">
+ &lt;h2>First example&lt;/h2>
+ &lt;p>&lt;a href="a.html" rel="bookmark">This&lt;/a> permalink applies to
+ only the content from the first H2 to the second H2. The DIV isn't
+ exactly that section, but it roughly corresponds to it.&lt;/p>
+ &lt;/div>
+ &lt;h2>Second example&lt;/h2>
+ &lt;article id="b">
+ &lt;p>&lt;a href="b.html" rel="bookmark">This&lt;/a> permalink applies to
+ the outer ARTICLE element (which could be, e.g., a blog post).&lt;/p>
+ &lt;article id="c">
+ &lt;p>&lt;a href="c.html" rel="bookmark">This&lt;/a> permalink applies to
+ the inner ARTICLE element (which could be, e.g., a blog comment).&lt;/p>
+ &lt;/article>
+ &lt;/article>
+ &lt;/body>
+ ...</pre>
+ </div>
+
+ <h5 id=link-type3><span class=secno>4.4.3.5. </span>Link type "<dfn
+ id=contact title=rel-contact><code>contact</code></dfn>"</h5>
+
+ <p>The <code title=rel-contact><a href="#contact">contact</a></code>
+ keyword may be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>For <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements, the <code title=rel-contact><a
+ href="#contact">contact</a></code> keyword indicates that the referenced
+ document provides further contact information for the section that the
+ element defining the hyperlink <a href="#applyToSection">applies</a> to.
+
+ <p>User agents must treat any hyperlink in an <code><a
+ href="#address">address</a></code> element as having the <code
+ title=rel-contact><a href="#contact">contact</a></code> link type
+ specified.
+
+ <p>For <code><a href="#link">link</a></code> elements, the <code
+ title=rel-contact><a href="#contact">contact</a></code> keyword indicates
+ that the referenced document provides further contact information for the
+ page as a whole.
+
+ <h5 id=link-type4><span class=secno>4.4.3.6. </span>Link type "<dfn
+ id=external title=rel-external><code>external</code></dfn>"</h5>
+
+ <p>The <code title=rel-external><a href="#external">external</a></code>
+ keyword may be used with <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements.
+
+ <p>The <code title=rel-external><a href="#external">external</a></code>
+ keyword indicates that the link is leading to a document that is not part
+ of the site that the current document forms a part of.
+
+ <h5 id=link-type5><span class=secno>4.4.3.7. </span>Link type "<dfn id=feed
+ title=rel-feed><code>feed</code></dfn>"</h5>
+
+ <p>The <code title=rel-feed><a href="#feed">feed</a></code> keyword may be
+ used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-feed><a href="#feed">feed</a></code> keyword
+ indicates that the referenced document is a syndication feed. If the <code
+ title=rel-alternate><a href="#alternate">alternate</a></code> link type is
+ also specified, then the feed is specifically the feed for the current
+ document; otherwise, the feed is just a syndication feed, not necessarily
+ associated with a particular Web page.
+
+ <p>The first <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, or <code><a href="#area">area</a></code> element
+ in the document (in tree order) that creates a hyperlink with the link
+ type <code title=rel-feed><a href="#feed">feed</a></code> must be treated
+ as the default syndication feed for the purposes of feed autodiscovery.
+
+ <p class=note>The <code title=rel-feed><a href="#feed">feed</a></code>
+ keyword is implied by the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> link type in certain cases (q.v.).
+
+ <div class=example>
+ <p>The following two <code><a href="#link">link</a></code> elements are
+ equivalent: both give the syndication feed for the current page:</p>
+
+ <pre>&lt;link rel="alternate" type="application/atom+xml" href="data.xml"></pre>
+
+ <pre>&lt;link rel="feed alternate" href="data.xml"></pre>
+
+ <p>The following extract offers various different syndication feeds:</p>
+
+ <pre> &lt;p>You can access the planets database using Atom feeds:&lt;/p>
+ &lt;ul>
+ &lt;li>&lt;a href="recently-visited-planets.xml" rel="feed">Recently Visited Planets&lt;/a>&lt;/li>
+ &lt;li>&lt;a href="known-bad-planets.xml" rel="feed">Known Bad Planets&lt;/a>&lt;/li>
+ &lt;li>&lt;a href="unexplored-planets.xml" rel="feed">Unexplored Planets&lt;/a>&lt;/li>
+ &lt;/ul></pre>
+ </div>
+
+ <h5 id=link-type6><span class=secno>4.4.3.8. </span>Link type "<dfn id=help
+ title=rel-help><code>help</code></dfn>"</h5>
+
+ <p>The <code title=rel-help><a href="#help">help</a></code> keyword may be
+ used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>For <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements, the <code title=rel-help><a
+ href="#help">help</a></code> keyword indicates that the referenced
+ document provides further help information for the parent of the element
+ defining the hyperlink, and its children.
+
+ <div class=example>
+ <p>In the following example, the form control has associated
+ context-sensitive help. The user agent could use this information, for
+ example, displaying the referenced document if the user presses the
+ "Help" or "F1" key.</p>
+
+ <pre> &lt;p>&lt;label> Topic: &lt;input name=topic> &lt;a href="help/topic.html" rel="help">(Help)&lt;/a>&lt;/label>&lt;/p></pre>
+ </div>
+
+ <p>For <code><a href="#link">link</a></code> elements, the <code
+ title=rel-help><a href="#help">help</a></code> keyword indicates that the
+ referenced document provides help for the page as a whole.
+
+ <h5 id=link-type7><span class=secno>4.4.3.9. </span>Link type "<dfn
+ id=icon3 title=rel-icon><code>icon</code></dfn>"</h5>
+
+ <p>The <code title=rel-icon><a href="#icon3">icon</a></code> keyword may be
+ used with <code><a href="#link">link</a></code> elements, for which it
+ creates an <a href="#links1" title="external resource link">external
+ resource link</a>.
+
+ <p>The specified resource is an icon representing the page or site, and
+ should be used by the user agent when representing the page in the user
+ interface.
+
+ <p>Icons could be auditory icons, visual icons, or other kinds of icons. If
+ multiple icons are provided, the user agent must select the most
+ appropriate icon according to the <code title=attr-link-media><a
+ href="#media0">media</a></code> attribute.
+
+ <h5 id=link-type8><span class=secno>4.4.3.10. </span>Link type "<dfn
+ id=license title=rel-license><code>license</code></dfn>"</h5>
+
+ <p>The <code title=rel-license><a href="#license">license</a></code>
+ keyword may be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-license><a href="#license">license</a></code>
+ keyword indicates that the referenced document provides the copyright
+ license terms under which the current document is provided.
+
+ <p><strong>Synonyms</strong>: For historical reasons, user agents must also
+ treat the keyword "<code title="">copyright</code>" like the <code
+ title=rel-license><a href="#license">license</a></code> keyword.
+
+ <h5 id=link-type9><span class=secno>4.4.3.11. </span>Link type "<dfn
+ id=nofollow title=rel-nofollow><code>nofollow</code></dfn>"</h5>
+
+ <p>The <code title=rel-nofollow><a href="#nofollow">nofollow</a></code>
+ keyword may be used with <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code> elements.
+
+ <p>The <code title=rel-nofollow><a href="#nofollow">nofollow</a></code>
+ keyword indicates that the link is not endorsed by the original author or
+ publisher of the page.
+
+ <h5 id=link-type10><span class=secno>4.4.3.12. </span>Link type "<dfn
+ id=pingback title=rel-pingback><code>pingback</code></dfn>"</h5>
+
+ <p>The <code title=rel-pingback><a href="#pingback">pingback</a></code>
+ keyword may be used with <code><a href="#link">link</a></code> elements,
+ for which it creates an <a href="#links1" title="external resource
+ link">external resource link</a>.
+
+ <p>For the semantics of the <code title=rel-pingback><a
+ href="#pingback">pingback</a></code> keyword, see the Pingback 1.0
+ specification. <a href="#refsPINGBACK">[PINGBACK]</a>
+
+ <h5 id=link-type11><span class=secno>4.4.3.13. </span>Link type "<dfn
+ id=prefetch title=rel-prefetch><code>prefetch</code></dfn>"</h5>
+
+ <p>The <code title=rel-prefetch><a href="#prefetch">prefetch</a></code>
+ keyword may be used with <code><a href="#link">link</a></code> elements,
+ for which it creates an <a href="#links1" title="external resource
+ link">external resource link</a>.
+
+ <p>The <code title=rel-prefetch><a href="#prefetch">prefetch</a></code>
+ keyword indicates that preemptively fetching and caching the specified
+ resource is likely to be beneficial, as it is highly likely that the user
+ will require this resource.
+
+ <h5 id=link-type12><span class=secno>4.4.3.14. </span>Link type "<dfn
+ id=search0 title=rel-search><code>search</code></dfn>"</h5>
+
+ <p>The <code title=rel-search><a href="#search0">search</a></code> keyword
+ may be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-search><a href="#search0">search</a></code> keyword
+ indicates that the referenced document provides an interface specifically
+ for searching the document and its related resources.
+
+ <p class=note>OpenSearch description documents can be used with <code><a
+ href="#link">link</a></code> elements and the <code title=rel-search><a
+ href="#search0">search</a></code> link type to enable user agents to
+ autodiscover search interfaces.
+
+ <h5 id=link-type13><span class=secno>4.4.3.15. </span>Link type "<dfn
+ id=stylesheet title=rel-stylesheet><code>stylesheet</code></dfn>"</h5>
+
+ <p>The <code title=rel-stylesheet><a
+ href="#stylesheet">stylesheet</a></code> keyword may be used with <code><a
+ href="#link">link</a></code> elements, for which it creates an <a
+ href="#links1" title="external resource link">external resource link</a>
+ that contributes to the <a href="#styling0">styling processing model</a>.
+
+ <p>The specified resource is a resource that describes how to present the
+ document. Exactly how the resource is to be processed depends on the
+ actual type of the resource.
+
+ <p>If the <code title=rel-alternate><a
+ href="#alternate">alternate</a></code> keyword is also specified on the
+ <code><a href="#link">link</a></code> element, then the link is an
+ alternative stylesheet.
+
+ <h5 id=link-type14><span class=secno>4.4.3.16. </span>Link type "<dfn
+ id=sidebar title=rel-sidebar><code>sidebar</code></dfn>"</h5>
+
+ <p>The <code title=rel-sidebar><a href="#sidebar">sidebar</a></code>
+ keyword may be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-sidebar><a href="#sidebar">sidebar</a></code>
+ keyword indicates that the referenced document, if retrieved, is intended
+ to be shown in a <a href="#secondary0">secondary browsing context</a> (if
+ possible), instead of in the current <a href="#browsing0">browsing
+ context</a>.
+
+ <p>A <a href="#hyperlinks" title=hyperlink>hyperlink element</a> with with
+ the <code title=rel-sidebar><a href="#sidebar">sidebar</a></code> keyword
+ specified is a <dfn id=sidebar0 title=rel-sidebar-hyperlink>sidebar
+ hyperlink</dfn>.
+
+ <h5 id=link-type15><span class=secno>4.4.3.17. </span>Link type "<dfn
+ id=tag title=rel-tag><code>tag</code></dfn>"</h5>
+
+ <p>The <code title=rel-tag><a href="#tag">tag</a></code> keyword may be
+ used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-tag><a href="#tag">tag</a></code> keyword indicates
+ that the <em>tag</em> that the referenced document represents applies to
+ the current document.
+
+ <h5 id=hierarchical><span class=secno>4.4.3.18. </span>Hierarchical link
+ types</h5>
+
+ <p>Some documents form part of a hierarchical structure of documents.
+
+ <p>A hierarchical structure of documents is one where each document can
+ have various subdocuments. A subdocument is said to be a <em>child</em> of
+ the document it is a subdocument of. The document of which it is a
+ subdocument is said to be its <em>parent</em>. The children of a document
+ have a relative order; the subdocument that precedes another is its
+ <em>previous sibling</em>, and the one that follows it is its <em>next
+ sibling</em>. A document with no parent forms the top of the hierarchy.
+
+ <h6 id=link-type16><span class=secno>4.4.3.18.1. </span>Link type "<dfn
+ id=first title=rel-first><code>first</code></dfn>"</h6>
+
+ <p>The <code title=rel-first><a href="#first">first</a></code> keyword may
+ be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-first><a href="#first">first</a></code> keyword
+ indicates that the document is part of a hierarchical structure, and that
+ the link is leading to the document that is the first child of the current
+ document's parent document.
+
+ <p><strong>Synonyms</strong>: For historical reasons, user agents must also
+ treat the keywords "<code title="">begin</code>" and "<code
+ title="">start</code>" like the <code title=rel-first><a
+ href="#first">first</a></code> keyword.
+
+ <h6 id=link-type17><span class=secno>4.4.3.18.2. </span>Link type "<dfn
+ id=index title=rel-index><code>index</code></dfn>"</h6>
+
+ <p>The <code title=rel-index><a href="#index">index</a></code> keyword may
+ be used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-index><a href="#index">index</a></code> keyword
+ indicates that the document is part of a hierarchical structure, and that
+ the link is leading to the document that is the top of the hierarchy.
+
+ <p><strong>Synonyms</strong>: For historical reasons, user agents must also
+ treat the keywords "<code title="">top</code>", "<code
+ title="">contents</code>", and "<code title="">toc</code>" like the <code
+ title=rel-index><a href="#index">index</a></code> keyword.
+
+ <h6 id=link-type18><span class=secno>4.4.3.18.3. </span>Link type "<dfn
+ id=last title=rel-last><code>last</code></dfn>"</h6>
+
+ <p>The <code title=rel-last><a href="#last">last</a></code> keyword may be
+ used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-last><a href="#last">last</a></code> keyword
+ indicates that the document is part of a hierarchical structure, and that
+ the link is leading to the document that is the last child of the current
+ document's parent document.
+
+ <p><strong>Synonyms</strong>: For historical reasons, user agents must also
+ treat the keyword "<code title="">end</code>" like the <code
+ title=rel-last><a href="#last">last</a></code> keyword.
+
+ <h6 id=link-type19><span class=secno>4.4.3.18.4. </span>Link type "<dfn
+ id=next title=rel-next><code>next</code></dfn>"</h6>
+
+ <p>The <code title=rel-next><a href="#next">next</a></code> keyword may be
+ used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-next><a href="#next">next</a></code> keyword
+ indicates that the document is part of a hierarchical structure, and that
+ the link is leading to the document that is the next sibling of the
+ current document.
+
+ <h6 id=link-type20><span class=secno>4.4.3.18.5. </span>Link type "<dfn
+ id=prev title=rel-prev><code>prev</code></dfn>"</h6>
+
+ <p>The <code title=rel-prev><a href="#prev">prev</a></code> keyword may be
+ used with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-prev><a href="#prev">prev</a></code> keyword
+ indicates that the document is part of a hierarchical structure, and that
+ the link is leading to the document that is the previous sibling of the
+ current document.
+
+ <p><strong>Synonyms</strong>: For historical reasons, user agents must also
+ treat the keyword "<code title="">previous</code>" like the <code
+ title=rel-prev><a href="#prev">prev</a></code> keyword.
+
+ <h6 id=link-type21><span class=secno>4.4.3.18.6. </span>Link type "<dfn
+ id=up title=rel-up><code>up</code></dfn>"</h6>
+
+ <p>The <code title=rel-up><a href="#up">up</a></code> keyword may be used
+ with <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. For <code><a href="#link">link</a></code> elements, it creates a
+ <a href="#hyperlink1" title="hyperlink link">hyperlink</a>.
+
+ <p>The <code title=rel-up><a href="#up">up</a></code> keyword indicates
+ that the document is part of a hierarchical structure, and that the link
+ is leading to the document that is the parent of the current document.</p>
+ <!-- idea: rel="up" vs rel="up up" vs rel="up up up top"
+ this would allow you to do breadcrumbs:
+ <nav>
+ <p>
+ <a href="/" rel="top up up up">Main</a> &gt;
+ <a href="/products/" rel="up up">Products</a> &gt;
+ <a href="/products/dishwashers" rel="up">Dishwashers</a> &gt;
+ Second hand
+ </p>
+ </nav>
+ -->
+
+ <h5 id=other0><span class=secno>4.4.3.19. </span>Other link types</h5>
+
+ <p>Other than the types defined above, only types defined as extensions in
+ the <a href="http://wiki.whatwg.org/wiki/RelExtensions">WHATWG Wiki
+ RelExtensions page</a> may be used with the <code title="">rel</code>
+ attribute on <code><a href="#link">link</a></code>, <code><a
+ href="#a">a</a></code>, and <code><a href="#area">area</a></code>
+ elements. <a href="#refsWHATWGWIKI">[WHATWGWIKI]</a>
+
+ <p>Anyone is free to edit the WHATWG Wiki RelExtensions page at any time to
+ add a type. Extension types must be specified with the following
+ information:
+
+ <dl>
+ <dt>Keyword
+
+ <dd>
+ <p>The actual value being defined. The value should not be confusingly
+ similar to any other defined value (e.g. differing only in case).
+
+ <dt>Effect on... <code><a href="#link">link</a></code>
+
+ <dd>
+ <p>One of the following:</p>
+
+ <dl>
+ <dt>not allowed
+
+ <dd>The keyword is not allowed to be specified on <code><a
+ href="#link">link</a></code> elements.
+
+ <dt>Hyperlink
+
+ <dd>The keyword may be specified on a <code><a
+ href="#link">link</a></code> element; it creates a <a
+ href="#hyperlink1" title="hyperlink link">hyperlink link</a>.
+
+ <dt>External Resource
+
+ <dd>The keyword may be specified on a <code><a
+ href="#link">link</a></code> element; it creates a <a href="#links1"
+ title="external resource link">external resource link</a>.
+ </dl>
+
+ <dt>Effect on... <code><a href="#a">a</a></code> and <code><a
+ href="#area">area</a></code>
+
+ <dd>
+ <p>One of the following:</p>
+
+ <dl>
+ <dt>not allowed
+
+ <dd>The keyword is not allowed to be specified on <code><a
+ href="#a">a</a></code> and <code><a href="#area">area</a></code>
+ elements.
+
+ <dt>Hyperlink
+
+ <dd>The keyword may be specified on <code><a href="#a">a</a></code> and
+ <code><a href="#area">area</a></code> elements; it creates a <a
+ href="#hyperlinks" title=hyperlink>hyperlink</a>.
+ </dl>
+
+ <dt>Brief description
+
+ <dd>
+ <p>A short description of what the keyword's meaning is.
+
+ <dt>Link to more details
+
+ <dd>
+ <p>A link to a more detailed description of the keyword's semantics and
+ requirements. It could be another page on the Wiki, or a link to an
+ external page.
+
+ <dt>Synonyms
+
+ <dd>
+ <p>A list of other keyword values that have exactly the same processing
+ requirements. Authors must not use the values defined to be synonyms,
+ they are only intended to allow user agents to support legacy content.
+
+ <dt>Status
+
+ <dd>
+ <p>One of the following:</p>
+
+ <dl>
+ <dt>Proposal
+
+ <dd>The keyword has not received wide peer review and approval. It is
+ included for completeness because pages use the keyword. Pages should
+ not use the keyword.
+
+ <dt>Accepted
+
+ <dd>The keyword has received wide peer review and approval. It has a
+ specification that unambiguously defines how to handle pages that use
+ the keyword, including when they use them in incorrect ways. Pages may
+ use the keyword.
+
+ <dt>Rejected
+
+ <dd>The keyword has received wide peer review and it has been found to
+ have significant problems. Pages must not use the keyword. When a
+ keyword has this status, the "Effect on... <code><a
+ href="#link">link</a></code>" and "Effect on... <code><a
+ href="#a">a</a></code> and <code><a href="#area">area</a></code>"
+ information should be set to "not allowed".
+ </dl>
+
+ <p>If a keyword is added with the "proposal" status and found to be
+ redundant with existing values, it should be removed and listed as a
+ synonym for the existing value. If a keyword is added with the
+ "proposal" status and found to be harmful, then it should be changed to
+ "rejected" status, and its "Effect on..." information should be changed
+ accordingly.</p>
+ </dl>
+
+ <p>Conformance checkers must use the information given on the WHATWG Wiki
+ RelExtensions page to establish if a value not explicitly defined in this
+ specification is allowed or not. When an author uses a new type not
+ defined by either this specification or the Wiki page, conformance
+ checkers should offer to add the value to the Wiki, with the details
+ described above, with the "proposal" status.
+
+ <p>This specification does not define how new values will get approved. It
+ is expected that the Wiki will have a community that addresses this.
+
+ <h3 id=interfaces><span class=secno>4.5. </span>Interfaces for URI
+ manipulation</h3>
+
+ <p>An interface that has a complement of <dfn id=uri-decomposition>URI
+ decomposition attributes</dfn> will have seven attributes with the
+ following definitions:
+
+ <pre class=idl>
+ readonly attribute DOMString <a href="#protocol0" title=dom-uda-protocol>protocol</a>;
+ readonly attribute DOMString <a href="#host0" title=dom-uda-host>host</a>;
+ readonly attribute DOMString <a href="#hostname0" title=dom-uda-hostname>hostname</a>;
+ readonly attribute DOMString <a href="#port0" title=dom-uda-port>port</a>;
+ readonly attribute DOMString <a href="#pathname0" title=dom-uda-pathname>pathname</a>;
+ readonly attribute DOMString <a href="#search1" title=dom-uda-search>search</a>;
+ readonly attribute DOMString <a href="#hash0" title=dom-uda-hash>hash</a>;
+</pre>
+
+ <p>The attributes defined to be URI decomposition attributes must act as
+ described for the attributes with the same corresponding names in this
+ section.
+
+ <p>In addition, an interface with a complement of URI decomposition
+ attributes will define an <dfn id=input
+ title=concept-uda-input>input</dfn>, which is a URI that the attributes
+ act on, and a <dfn id=common3 title=concept-uda-setter>common setter
+ action</dfn>, which is a set of steps invoked when any of the attributes'
+ setters are invoked.
+
+ <p>The seven URI decomposition attributes have similar requirements.
+
+ <p>On getting, if the <a href="#input" title=concept-uda-input>input</a>
+ fulfills the condition given in the "getter condition" column
+ corresponding to the attribute in the table below, the user agent must
+ return the part of the <a href="#input" title=concept-uda-input>input</a>
+ URI given in the "component" column, with any prefixes specified in the
+ "prefix" column appropriately added to the start of the string and any
+ suffixes specified in the "suffix" column appropriately added to the end
+ of the string. Otherwise, the attribute must return the empty string.
+
+ <p>On setting, the new value must first be mutated as described by the
+ "setter preprocessor" column, then mutated by %-escaping any characters in
+ the new value that are not valid in the relevant component as given by the
+ "component" column. Then, if the resulting new value fulfills the
+ condition given in the "setter condition" column, the user agent must make
+ a new string <var title="">output</var> by replacing the component of the
+ URI given by the "component" column in the <a href="#input"
+ title=concept-uda-input>input</a> URI with the new value; otherwise, the
+ user agent must let <var title="">output</var> be equal to the <a
+ href="#input" title=concept-uda-input>input</a>. Finally, the user agent
+ must invoke the <a href="#common3" title=concept-uda-setter>common setter
+ action</a> with the value of <var title="">output</var>.
+
+ <p>The rules for parsing and constructing URIs are described in RFC 2396 as
+ modified by RFC 2732. <a href="#refsRFC2396">[RFC2396]</a> <a
+ href="#refsRFC2732">[RFC2732]</a>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Attribute
+
+ <th>Component
+
+ <th>Getter Condition
+
+ <th>Prefix
+
+ <th>Suffix
+
+ <th>Setter Preprocessor
+
+ <th>Setter Condition
+
+ <tbody>
+ <tr>
+ <td><dfn id=protocol0 title=dom-uda-protocol><code>protocol</code></dfn>
+
+
+ <td>&lt;scheme&gt;
+
+ <td>&mdash;
+
+ <td>&mdash;
+
+ <td>U+003A COLON ("<code title="">:</code>")
+
+ <td>Remove all trailing U+003A COLON ("<code title="">:</code>")
+ characters
+
+ <td>The new value is not the empty string
+
+ <tr>
+ <td><dfn id=host0 title=dom-uda-host><code>host</code></dfn>
+
+ <td>&lt;hostport&gt;
+
+ <td><a href="#input" title=concept-uda-input>input</a> is hierarchical
+ and uses a server-based naming authority
+
+ <td>&mdash;
+
+ <td>&mdash;
+
+ <td>&mdash;
+
+ <td>&mdash;
+
+ <tr>
+ <td><dfn id=hostname0 title=dom-uda-hostname><code>hostname</code></dfn>
+
+
+ <td>&lt;host&gt;
+
+ <td><a href="#input" title=concept-uda-input>input</a> is hierarchical
+ and uses a server-based naming authority
+
+ <td>&mdash;
+
+ <td>&mdash;
+
+ <td>Remove all leading U+002F SOLIDUS ("<code title="">/</code>")
+ characters
+
+ <td>&mdash;
+
+ <tr>
+ <td><dfn id=port0 title=dom-uda-port><code>port</code></dfn>
+
+ <td>&lt;port&gt;
+
+ <td><a href="#input" title=concept-uda-input>input</a> is hierarchical
+ and uses a server-based naming authority
+
+ <td>&mdash;
+
+ <td>&mdash;
+
+ <td>Remove any characters in the new value that are not in the range
+ U+0030 DIGIT ZERO .. U+0039 DIGIT NINE
+
+ <td>The new value is not the empty string
+
+ <tr>
+ <td><dfn id=pathname0 title=dom-uda-pathname><code>pathname</code></dfn>
+
+
+ <td>&lt;abs_path&gt;
+
+ <td><a href="#input" title=concept-uda-input>input</a> is hierarchical
+
+ <td>&mdash;
+
+ <td>&mdash;
+
+ <td>If it has no leading U+002F SOLIDUS ("<code title="">/</code>")
+ character, prepend a U+002F SOLIDUS ("<code title="">/</code>")
+ character to the new value
+
+ <td>&mdash;
+
+ <tr>
+ <td><dfn id=search1 title=dom-uda-search><code>search</code></dfn>
+
+ <td>&lt;query&gt;
+
+ <td><a href="#input" title=concept-uda-input>input</a> is hierarchical
+
+ <td>U+003F QUESTION MARK ("<code title="">?</code>")
+
+ <td>&mdash;
+
+ <td>Remove one leading U+003F QUESTION MARK ("<code title="">?</code>")
+ character, if any
+
+ <td>&mdash;
+
+ <tr>
+ <td><dfn id=hash0 title=dom-uda-hash><code>hash</code></dfn>
+
+ <td>&lt;fragment&gt;
+
+ <td>Fragment identifier is longer than zero characters
+
+ <td>U+0023 NUMBER SIGN ("<code title="">#</code>")
+
+ <td>&mdash;
+
+ <td>Remove one leading U+0023 NUMBER SIGN ("<code title="">#</code>")
+ character, if any
+
+ <td>&mdash;
+ </table>
+ <!--
+ http://www.hixie.ch/tests/adhoc/dom/level0/location/components/
+ http://lxr.mozilla.org/seamonkey/source/dom/src/base/nsLocation.cpp
+ http://wp.netscape.com/eng/mozilla/3.0/handbook/javascript/ref_h-l.htm#84722
+-->
+
+ <h3 id=navigating><span class=secno>4.6. </span>Navigating across documents</h3>
+
+ <p>Certain actions cause the <a href="#browsing0">browsing context</a> to
+ <dfn id=navigate>navigate</dfn>. For example, <a href="#following0"
+ title="following hyperlinks">following a hyperlink</a>, <span
+ title="">form submission</span>, and the <code
+ title=dom-window-open>window.open()</code> and <code
+ title=dom-location-assign><a href="#assign">location.assign()</a></code>
+ methods can all cause a browsing context to navigate. A user agent may
+ also provide various ways for the user to explicitly cause a browsing
+ context to navigate.
+
+ <p>When a browsing context is navigated, the user agent must follow the
+ following steps:
+
+ <ol>
+ <li>
+ <p>Cancel any preexisting attempt to navigate the browsing context.
+
+ <li>
+ <p>If the new resource is the same as the current resource, but a
+ fragment identifier has been specified, then <a href="#scrolling0"
+ title=navigate-fragid>scroll the document to the specified element</a>
+ and abort these steps.
+
+ <li>
+ <p>If the new resource is to be handled by displaying some sort of inline
+ content, e.g. an error message because the specified scheme is not one
+ of the supported protocols, or an inline prompt to allow the user to
+ select <a href="#registerprotocolhandler"
+ title=dom-navigator-registerProtocolHandler>a registered handler</a> for
+ the given scheme, then <a href="#display" title="display a user agent
+ page inline">display the inline content</a> and abort these steps.
+
+ <li>
+ <p>If the new resource is to be handled using a mechanism that does not
+ affect the browsing context, then abort these steps and proceed with
+ that mechanism instead.
+
+ <li>
+ <p>Start fetching the specified resource, as appropriate (e.g. performing
+ an HTTP GET or POST operation, or reading the file from disk, or
+ executing script in the case of a <a href="#the-javascript"
+ title="javascript protocol"><code title="">javascript:</code> URI</a>).
+ If this results in a redirect, return to step 2 with the new resource.
+
+ <li>
+ <p>Wait for one or more bytes to be available or for the user agent to
+ establish that the resource in question is empty. During this time, the
+ user agent may allow the user to cancel this navigation attempt or start
+ other navigation attempts.
+
+ <li>
+ <p>If the document's out-of-band metadata (e.g. HTTP headers), not
+ counting any <a href="#content-type8" title=Content-Type>type
+ information</a> (such as the Content-Type HTTP header), requires some
+ sort of processing that will not affect the browsing context, then
+ perform that processing and abort these steps.</p>
+
+ <div class=note>
+ <p>Such processing might be triggered by, amongst other things, the
+ following:</p>
+
+ <ul class=brief>
+ <li>HTTP status codes (e.g. 204 No Content or 205 Reset Content)
+
+ <li>HTTP Content-Disposition headers
+
+ <li>Network errors
+ </ul>
+ </div>
+
+ <li>
+ <p>Let <var title="">type</var> be <a href="#sniffed" title="Content-Type
+ sniffing">the sniffed type of the resource</a>.
+
+ <li>
+ <p>If the user agent has been configured to process resources of the
+ given <var title="">type</var> using some mechanism other than rendering
+ the content in a <a href="#browsing0">browsing context</a>, then skip
+ this step. Otherwise, if the <var title="">type</var> is one of the
+ following types, jump to the appropriate entry in the following list,
+ and process the resource as described there:</p>
+
+ <dl class=switch>
+ <dt>"text/html"
+
+ <dd>Follow the steps given in the <a href="#page-load"
+ title=navigate-html>HTML document</a> section, and abort these steps.
+
+ <dt>Any type ending in "+xml"
+
+ <dt>"application/xml"
+
+ <dt>"text/xml"
+
+ <dd>Follow the steps given in the <a href="#page-load0"
+ title=navigate-xml>XML document</a> section. If that section determines
+ that the content is <em>not</em> to be displayed as a generic XML
+ document, then proceed to the next step in this overall set of steps.
+ Otherwise, abort these steps.
+
+ <dt>"text/plain"
+
+ <dd>Follow the steps given in the <a href="#page-load1"
+ title=navigate-text>plain text file</a> section, and abort these steps.
+
+ <dt>A supported image type
+
+ <dd>Follow the steps given in the <a href="#page-load2"
+ title=navigate-image>image</a> section, and abort these steps.
+
+ <dt>A type that will use an external application to render the content
+ in the <a href="#browsing0">browsing context</a>
+
+ <dd>Follow the steps given in the <a href="#page-load3"
+ title=navigate-plugin>plugin</a> section, and abort these steps.
+ </dl>
+
+ <li id=navigate-non-Document>
+ <p>If, given <var title="">type</var>, the new resource is to be handled
+ by displaying some sort of inline content, e.g. a native rendering of
+ the content, an error message because the specified type is not
+ supported, or an inline prompt to allow the user to select <a
+ href="#registercontenthandler"
+ title=dom-navigator-registerContentHandler>a registered handler</a> for
+ the given type, then <a href="#display" title="display a user agent page
+ inline">display the inline content</a> and abort these steps.
+
+ <li>
+ <p>Otherwise, the document's <var title="">type</var> is such that the
+ resource will not affect the browsing context, e.g. because the resource
+ is to be handed to an external application. Process the resource
+ appropriately.</p>
+ </ol>
+
+ <p>Some of the sections below, to which the above algorithm defers in
+ certain cases, require the user agent to <dfn id=update>update the session
+ history with the new page</dfn>. When a user agent is required to do this,
+ it must follows the set of steps given below that is appropriate for the
+ situation at hand. From the point of view of any script, these steps must
+ occur atomically.
+
+ <ol>
+ <li>
+ <p class=big-issue>pause for scripts
+
+ <li>
+ <p class=big-issue>onbeforeunload
+
+ <li>
+ <p class=big-issue>onunload
+
+ <li>
+ <dl>
+ <dt>If the navigation was initiated for <dfn id=entry>entry update</dfn>
+ of an entry
+
+ <dd>
+ <ol>
+ <li>
+ <p>Replace the entry being updated with a new entry representing the
+ new resource and its <code>Document</code> object and related state.
+ The user agent may propagate state from the old entry to the new
+ entry (e.g. scroll position).
+
+ <li>
+ <p><a href="#traverse">Traverse the history</a> to the new entry.
+ </ol>
+
+ <dt>Otherwise
+
+ <dd>
+ <ol>
+ <li>
+ <p>Remove all the entries after the <a href="#current0">current
+ entry</a> in the <a href="#browsing0">browsing context</a>'s
+ <code>Document</code> object's <code><a
+ href="#history1">History</a></code> object.</p>
+
+ <p class=note>This <a href="#history-notes">doesn't necessarily have
+ to affect</a><!--XXX change to auto-xref?--> the user agent's user
+ interface.</p>
+
+ <li>
+ <p>Append a new entry at the end of the <code><a
+ href="#history1">History</a></code> object representing the new
+ resource and its <code>Document</code> object and related state.
+
+ <li>
+ <p><a href="#traverse">Traverse the history</a> to the new entry.
+
+ <li>
+ <p>If the navigation was initiated with <dfn
+ id=replacement>replacement enabled</dfn>, remove the entry
+ immediately before the new <a href="#current0">current entry</a> in
+ the session history.
+ </ol>
+ </dl>
+ </ol>
+
+ <h4 id=read-html><span class=secno>4.6.1. </span><dfn id=page-load
+ title=navigate-html>Page load processing model for HTML files</dfn></h4>
+
+ <p>When an HTML document is to be loaded in a <a href="#browsing0">browsing
+ context</a>, the user agent must create a <code>Document</code> object,
+ mark it as being an <a href="#html-" title="HTML documents">HTML
+ document</a>, create an <a href="#html-0">HTML parser</a>, associate it
+ with the document, and begin to use the bytes provided for the document as
+ the <a href="#input0">input stream</a> for that parser.
+
+ <p class=note>The <a href="#input0">input stream</a> converts bytes into
+ characters for use in the <span>tokeniser</span>. This process relies, in
+ part, on character encoding information found in the real <a
+ href="#content-type8" title=Content-Type>Content-Type metadata</a> of the
+ resource; the "sniffed type" is not used for this purpose.</p>
+ <!-- next two paragraphs are nearly identical to the navigate-text
+ section, keep them in sync -->
+
+ <p>When no more bytes are available, an EOF character is implied, which
+ eventually causes a <code title=event-load><a
+ href="#load0">load</a></code> event to be fired.
+
+ <p>After creating the <code>Document</code> object, but potentially before
+ the page has finished parsing, the user agent must <a
+ href="#update">update the session history with the new page</a>.
+
+ <h4 id=read-xml><span class=secno>4.6.2. </span><dfn id=page-load0
+ title=navigate-xml>Page load processing model for XML files</dfn></h4>
+
+ <p>When faced with displaying an XML file inline, user agents must first
+ create a <code>Document</code> object, following the requirements of the
+ XML and Namespaces in XML recommendations, RFC 3023, DOM3 Core, and other
+ relevant specifications. <a href="#refsXML">[XML]</a> <a
+ href="#refsXMLNS">[XMLNS]</a> <a href="#refsRFC3023">[RFC3023]</a> <a
+ href="#refsDOM3CORE">[DOM3CORE]</a>
+
+ <p>The actual HTTP headers and other metadata, not the headers as mutated
+ or implied by the algorithms given in this specification, are the ones
+ that must be used when determining the character encoding according to the
+ rules given in the above specifications.
+
+ <p>User agents may examine the namespace of the root <code>Element</code>
+ node of this <code>Document</code> object to perform namespace-based
+ dispatch to alternative processing tools, e.g. determining that the
+ content is actually a syndication feed and passing it to a feed handler.
+ If such processing is to take place, abort the steps in this section, and
+ jump to <a href="#navigate-non-Document">step 10</a> in the <a
+ href="#navigate">navigate</a> steps above.
+
+ <p>Otherwise, then, with the newly created <code>Document</code>, the user
+ agents must <a href="#update">update the session history with the new
+ page</a>. User agents may do this before the complete document has been
+ parsed (thus achieving <i>incremental rendering</i>).
+
+ <p>Error messages from the parse process (e.g. namespace well-formedness
+ errors) may be reported inline by mutating the <code>Document</code>.
+
+ <h4 id=read-text><span class=secno>4.6.3. </span><dfn id=page-load1
+ title=navigate-text>Page load processing model for text files</dfn></h4>
+
+ <p>When a plain text document is to be loaded in a <a
+ href="#browsing0">browsing context</a>, the user agent should create a
+ <code>Document</code> object, mark it as being an <a href="#html-"
+ title="HTML documents">HTML document</a>, create an <a href="#html-0">HTML
+ parser</a>, associate it with the document, act as if the tokeniser had
+ emitted a start tag token with the tag name "pre", set the <a
+ href="#tokenisation0">tokenisation</a> stage's <a href="#content2">content
+ model flag</a> to <i>PLAINTEXT</i>, and begin to pass the stream of
+ characters in the plain text document to that tokeniser.
+
+ <p>The rules for how to convert the bytes of the plain text document into
+ actual characters are defined in RFC 2046, RFC 2646, and subsequent
+ versions thereof. <a href="#refsRFC2046">[RFC2046]</a> <a
+ href="#refsRFC2046">[RFC2646]</a></p>
+ <!-- next two paragraphs are nearly identical to the navigate-html
+ section and similar to the "non-DOM-inline-content" section, and the
+ next three are similar to the navigate-image and navigate-plugin
+ sections; keep them all in sync -->
+
+ <p>When no more character are available, an EOF character is implied, which
+ eventually causes a <code title=event-load><a
+ href="#load0">load</a></code> event to be fired.
+
+ <p>After creating the <code>Document</code> object, but potentially before
+ the page has finished parsing, the user agent must <a
+ href="#update">update the session history with the new page</a>.
+
+ <p>User agents may add content to the <code><a href="#head">head</a></code>
+ element of the <code>Document</code>, e.g. linking to stylesheet or an XBL
+ binding, providing script, giving the document a <code><a
+ href="#title1">title</a></code>, etc.
+
+ <h4 id=read-image><span class=secno>4.6.4. </span><dfn id=page-load2
+ title=navigate-image>Page load processing model for images</dfn></h4>
+
+ <p>When an image resource is to be loaded in a <a
+ href="#browsing0">browsing context</a>, the user agent should create a
+ <code>Document</code> object, mark it as being an <a href="#html-"
+ title="HTML documents">HTML document</a>, append an <code><a
+ href="#html">html</a></code> element to the <code>Document</code>, append
+ a <code><a href="#head">head</a></code> element and a <code><a
+ href="#body0">body</a></code> element to the <code><a
+ href="#html">html</a></code> element, append an <code><a
+ href="#img">img</a></code> to the <code><a href="#body0">body</a></code>
+ element, and set the <code title=attr-img-src><a
+ href="#src">src</a></code> attribute of the <code><a
+ href="#img">img</a></code> element to the address of the image.</p>
+ <!-- next three paragraphs are similar to the navigate-text section,
+ keep them in sync -->
+
+ <p>Then, the user agent must act as if it had <a href="#stops" title="stop
+ parsing">stopped parsing</a>.
+
+ <p>After creating the <code>Document</code> object, but potentially before
+ the page has finished fully loading, the user agent must <a
+ href="#update">update the session history with the new page</a>.
+
+ <p>User agents may add content to the <code><a href="#head">head</a></code>
+ element of the <code>Document</code>, or attributes to the <code><a
+ href="#img">img</a></code> element, e.g. to link to stylesheet or an XBL
+ binding, to provide a script, to give the document a <code><a
+ href="#title1">title</a></code>, etc.
+
+ <h4 id=read-plugin><span class=secno>4.6.5. </span><dfn id=page-load3
+ title=navigate-plugin>Page load processing model for content that uses
+ plugins</dfn></h4>
+
+ <p>When a resource that requires an external resource to be rendered is to
+ be loaded in a <a href="#browsing0">browsing context</a>, the user agent
+ should create a <code>Document</code> object, mark it as being an <a
+ href="#html-" title="HTML documents">HTML document</a>, append an <code><a
+ href="#html">html</a></code> element to the <code>Document</code>, append
+ a <code><a href="#head">head</a></code> element and a <code><a
+ href="#body0">body</a></code> element to the <code><a
+ href="#html">html</a></code> element, append an <code><a
+ href="#embed">embed</a></code> to the <code><a
+ href="#body0">body</a></code> element, and set the <code
+ title=attr-img-src><a href="#src">src</a></code> attribute of the <code><a
+ href="#img">img</a></code> element to the address of the image.</p>
+ <!-- next three paragraphs are similar to the navigate-text section,
+ keep them in sync -->
+
+ <p>Then, the user agent must act as if it had <a href="#stops" title="stop
+ parsing">stopped parsing</a>.
+
+ <p>After creating the <code>Document</code> object, but potentially before
+ the page has finished fully loading, the user agent must <a
+ href="#update">update the session history with the new page</a>.
+
+ <p>User agents may add content to the <code><a href="#head">head</a></code>
+ element of the <code>Document</code>, or attributes to the <code><a
+ href="#embed">embed</a></code> element, e.g. to link to stylesheet or an
+ XBL binding, or to give the document a <code><a
+ href="#title1">title</a></code>.
+
+ <h4 id=non-DOM-inline-content><span class=secno>4.6.6. </span>Page load
+ processing model for inline content that doesn't have a DOM</h4>
+
+ <p>When the user agent is to <dfn id=display>display a user agent page
+ inline</dfn> in a <a href="#browsing0">browsing context</a>, the user
+ agent should create a <code>Document</code> object, mark it as being an <a
+ href="#html-" title="HTML documents">HTML document</a>, and then either
+ associate that <code>Document</code> with a custom rendering that is not
+ rendered using the normal <code>Document</code> rendering rules, or mutate
+ that <code>Document</code> until it represents the content the user agent
+ wants to render.</p>
+ <!-- next two paragraphs are similar to the navigate-text section,
+ keep them in sync -->
+
+ <p>Once the page has been set up, the user agent must act as if it had <a
+ href="#stops" title="stop parsing">stopped parsing</a>.
+
+ <p>After creating the <code>Document</code> object, but potentially before
+ the page has been completely set up, the user agent must <a
+ href="#update">update the session history with the new page</a>.
+
+ <h4 id=scroll-to-fragid><span class=secno>4.6.7. </span><dfn id=scrolling0
+ title=navigate-fragid>Scrolling to a fragment identifier</dfn></h4>
+
+ <p>When a user agent is supposed to scroll to a particular element, it may
+ change the scrolling position of the document as desired, or perform any
+ other relevant action.
+
+ <p class=big-issue>how to get a "particular element" from a frag id --
+ id="", name="", XPointer, etc; missing IDs (e.g. the infamous "#top")
+
+ <p>Then, the user agent must <a href="#update">update the session history
+ with the new page</a>, where "the new page" has the same
+ <code>Document</code> as before, but potentially has a different scroll
+ position.
+
+ <h3 id=content-type-sniffing><span class=secno>4.7. </span>Determining the
+ type of a new resource in a browsing context</h3>
+
+ <p class=warning>It is imperative that the rules in this section be
+ followed exactly. When two user agents use different heuristics for
+ content type detection, security problems can occur. For example, if a
+ server believes a contributed file to be an image (and thus benign), but a
+ Web browser believes the content to be HTML (and thus capable of executing
+ script), the end user can be exposed to malicious content, making the user
+ vulnerable to cookie theft attacks and other cross-site scripting attacks.
+
+ <p>The <dfn id=sniffed title="Content-Type sniffing">sniffed type of a
+ resource</dfn> must be found as follows:
+
+ <ol>
+ <li>
+ <p>If the resource was fetched over an HTTP protocol, and there is no
+ HTTP Content-Encoding header, but there is an HTTP Content-Type header
+ and it has a value whose bytes exactly match one of the following three
+ lines:</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Bytes in Hexadecimal
+
+ <th>Textual representation
+
+ <tbody>
+ <tr><!-- Old Apache default -->
+
+ <td>74 65 78 74 2f 70 6c 61 69 6e
+
+ <td><code title="">text/plain</code>
+
+ <tr><!-- Modern Apache default -->
+
+ <td>74 65 78 74 2f 70 6c 61 69 6e 3b 20 63 68 61 72 73 65 74 3d 49 53
+ 4f 2d 38 38 35 39 2d 31
+
+ <td><code title="">text/plain;&nbsp;charset=ISO-8859-1</code>
+
+ <tr><!-- Debian's arbitrarily different Modern Apache default -->
+
+ <td>74 65 78 74 2f 70 6c 61 69 6e 3b 20 63 68 61 72 73 65 74 3d 69 73
+ 6f 2d 38 38 35 39 2d 31
+
+ <td><code title="">text/plain;&nbsp;charset=iso-8859-1</code>
+ </table>
+
+ <p>...then jump to the <em title="content-type sniffing: text or
+ binary"><a href="#content-type4">text or binary</a></em> section below.</p>
+
+ <li>
+ <p>Let <var title="">official type</var> be the type given by the <a
+ href="#content-type8" title=Content-Type>Content-Type metadata</a> for
+ the resource (in lowercase<!-- XXX ASCII case folding -->, ignoring any
+ parameters). If there is no such type, jump to the <em
+ title="content-type sniffing: unknown type"><a
+ href="#content-type5">unknown type</a></em> step below.
+
+ <li>
+ <p>If <var title="">official type</var> ends in "+xml", or if it is
+ either "text/xml" or "application/xml", then the the sniffed type of the
+ resource is <var title="">official type</var>; return that and abort
+ these steps.
+ </li>
+ <!-- we don't want
+ image/svg+xml going through the next step -->
+
+ <li>
+ <p>If <var title="">official type</var> is an image type supported by the
+ user agent (e.g. "image/png", "image/gif", "image/jpeg", etc), then jump
+ to the <em title="content-type sniffing: image"><a
+ href="#content-type6">images</a></em> section below.
+
+ <li>
+ <p>If <var title="">official type</var> is "text/html", then jump to the
+ <em title="content-type sniffing: feed or html"><a
+ href="#content-type7">feed or HTML</a></em> section below.
+
+ <li>
+ <p>Otherwise, the sniffed type of the resource is <var title="">official
+ type</var>.
+ </ol>
+
+ <h4 id=content-type0><span class=secno>4.7.1. </span><dfn
+ id=content-type4>Content-Type sniffing: text or binary</dfn></h4>
+
+ <ol>
+ <li>
+ <p>The user agent may wait for 512 or more bytes of the resource to be
+ available.
+
+ <li>
+ <p>Let <var title="">n</var> be the smaller of either 512 or the number
+ of bytes already available.
+
+ <li>
+ <p>If <var title="">n</var> is 4 or more, and the first bytes of the file
+ match one of the following byte sets:</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Bytes in Hexadecimal
+
+ <th>Description
+
+ <tbody>
+ <tr>
+ <td>FE FF
+
+ <td>UTF-16BE BOM <!-- followed by a character --> or UTF-32LE BOM
+
+ <tr>
+ <td>FF FE
+
+ <td>UTF-16LE BOM <!-- followed by a character -->
+
+ <tr>
+ <td>00 00 FE FF
+
+ <td>UTF-32BE BOM <!-- this one is redundant with the one above
+ <tr>
+ <td>FF FE 00 00
+ <td>UTF-32LE BOM
+-->
+
+
+ <tr>
+ <td>EF BB BF
+
+ <td>UTF-8 BOM
+ <!-- followed by a character, or the first byte of a multiple character sequence -->
+ <!-- nobody uses this
+ <tr>
+ <td>DD 73 66 73
+ <td>UTF-EBCDIC BOM
+-->
+
+ </table>
+
+ <p>...then the sniffed type of the resource is "text/plain".
+
+ <li>
+ <p>Otherwise, if any of the first <var title="">n</var> bytes of the
+ resource are in one of the following byte ranges:</p>
+ <!-- This byte list is based on RFC 2046 Section 4.1.2. Characters
+ in the range 0x00-0X1F, with the exception of 0x09 - 0x0D (ASCII
+ for TAB, LF, VT, FF, and CR), and character 0x1B (reportedly used
+ by some encodings as a shift escape), are invalid. Thus, if we see
+ them, we assume it's not text. -->
+
+ <ul class=brief>
+ <li> 0x00 - 0x08
+
+ <li> 0x0E - 0x1A
+
+ <li> 0x1C - 0x1F
+ </ul>
+
+ <p>...then the sniffed type of the resource is
+ "application/octet-stream".
+
+ <li>
+ <p>Otherwise, the sniffed type of the resource is "text/plain".
+ </ol>
+
+ <h4 id=content-type1><span class=secno>4.7.2. </span><dfn
+ id=content-type5>Content-Type sniffing: unknown type</dfn></h4>
+
+ <ol>
+ <li>
+ <p>The user agent may wait for 512 or more bytes of the resource to be
+ available.
+
+ <li>
+ <p>Let <var title="">stream length</var> be the smaller of either 512 or
+ the number of bytes already available.
+
+ <li>
+ <p>For each row in the table below:</p>
+
+ <ol>
+ <li>Let <var title="">pattern length</var> be the length of the pattern
+ (number of bytes described by the cell in the second column of the
+ row).
+
+ <li>If <var title="">pattern length</var> is smaller than <var
+ title="">stream length</var> then skip this row.
+
+ <li>Apply the "and" operator to the first <var title="">pattern
+ length</var> bytes of the resource and the given mask (the bytes in the
+ cell of first column of that row), and let the result be the <var
+ title="">data</var>.
+
+ <li>If the bytes of the <var title="">data</var> matches the given
+ pattern bytes exactly, then the sniffed type of the resource is the
+ type given in the cell of the third column in that row; abort these
+ steps.
+ </ol>
+
+ <li>
+ <p>As a last-ditch effort, jump to the <a href="#content-type4"
+ title="content-type sniffing: text or binary">text or binary</a>
+ section.
+ </ol>
+
+ <table>
+ <thead>
+ <tr>
+ <th colspan=2>Bytes in Hexadecimal
+
+ <th rowspan=2>Sniffed type
+
+ <th rowspan=2>Comment
+
+ <tr>
+ <th>Mask
+
+ <th>Pattern
+
+ <tbody>
+ <tr>
+ <td>FF FF DF DF DF DF DF DF DF FF DF DF DF DF
+
+ <td>3C 21 44 4F 43 54 59 50 45 20 48 54 4D 4C <!-- "<!DOCTYPE HTML" -->
+
+ <td>text/html
+
+ <td>The string "<code title="">&lt;!DOCTYPE HTML</code>" in US-ASCII or
+ compatible encodings, case-insensitively.
+
+ <tr>
+ <td>FF DF DF DF DF
+
+ <td>3C 48 54 4D 4C <!-- "<HTML" -->
+
+ <td>text/html
+
+ <td>The string "<code title="">&lt;HTML</code>" in US-ASCII or
+ compatible encodings, case-insensitively.
+
+ <tr>
+ <td>FF FF FF FF FF
+
+ <td>25 50 44 46 2D
+ <!-- "%PDF-" (from http://lxr.mozilla.org/seamonkey/source/netwerk/streamconv/converters/nsUnknownDecoder.cpp#321) -->
+
+
+ <td>application/pdf
+
+ <td>The string "<code title="">%PDF-</code>", the PDF signature.
+
+ <tr>
+ <td>FF FF FF FF FF FF FF FF FF FF FF
+
+ <td>25 21 50 53 2D 41 64 6F 62 65 2D
+ <!-- "%!PS-Adobe-" (from http://lxr.mozilla.org/seamonkey/source/netwerk/streamconv/converters/nsUnknownDecoder.cpp#321) -->
+
+
+ <td>application/postscript
+
+ <td>The string "<code title="">%!PS-Adobe-</code>", the PostScript
+ signature. <!-- copied from the section below -->
+
+ <tbody>
+ <tr>
+ <td>FF FF FF FF FF FF
+
+ <td>47 49 46 38 37 61 <!-- GIF87a -->
+
+ <td>image/gif
+
+ <td>The string "<code title="">GIF87a</code>", a GIF signature.
+
+ <tr>
+ <td>FF FF FF FF FF FF
+
+ <td>47 49 46 38 39 61 <!-- GIF89a -->
+
+ <td>image/gif
+
+ <td>The string "<code title="">GIF89a</code>", a GIF signature.
+
+ <tr>
+ <td>FF FF FF FF FF FF FF FF
+
+ <td>89 50 4E 47 0D 0A 1A 0A
+ <!-- [TAB]PNG[CR][LF][EOF][LF]; 137 80 78 71 13 10 26 10 -->
+
+ <td>image/png
+
+ <td>The PNG signature.
+
+ <tr>
+ <td>FF FF FF
+
+ <td>FF D8 FF
+ <!-- SOI marker followed by the first byte of another marker -->
+
+ <td>image/jpeg
+
+ <td>A JPEG SOI marker followed by the first byte of another marker.
+ </table>
+
+ <p>User agents may support further types if desired, by implicitly adding
+ to the above table. However, user agents should not use any other patterns
+ for types already mentioned in the table above, as this could then be used
+ for privilege escalation (where, e.g., a server uses the above table to
+ determine that content is not HTML and thus safe from XSS attacks, but
+ then a user agent detects it as HTML anyway and allows script to execute).
+
+ <h4 id=content-type2><span class=secno>4.7.3. </span><dfn
+ id=content-type6>Content-Type sniffing: image</dfn></h4>
+
+ <p>If the first bytes of the file match one of the byte sequences in the
+ first columns of the following table, then the sniffed type of the
+ resource is the type given in the corresponding cell in the second column
+ on the same row:
+
+ <table>
+ <thead>
+ <tr>
+ <th>Bytes in Hexadecimal
+
+ <th>Sniffed type
+
+ <th>Comment <!-- update the table above if you change this! -->
+
+ <tbody>
+ <tr>
+ <td>47 49 46 38 37 61 <!-- GIF87a -->
+
+ <td>image/gif
+
+ <td>The string "<code title="">GIF87a</code>", a GIF signature.
+
+ <tr>
+ <td>47 49 46 38 39 61 <!-- GIF89a -->
+
+ <td>image/gif
+
+ <td>The string "<code title="">GIF89a</code>", a GIF signature.
+
+ <tr>
+ <td>89 50 4E 47 0D 0A 1A 0A
+ <!-- [TAB]PNG[CR][LF][EOF][LF]; 137 80 78 71 13 10 26 10 -->
+
+ <td>image/png
+
+ <td>The PNG signature.
+
+ <tr>
+ <td>FF D8 FF
+ <!-- SOI marker followed by the first byte of another marker -->
+
+ <td>image/jpeg
+
+ <td>A JPEG SOI marker followed by the first byte of another marker.
+ </table>
+
+ <p>User agents must ignore any rows for image types that they do not
+ support.
+
+ <p>Otherwise, the <i>sniffed type</i> of the resource is the same as its
+ <var title="">official type</var>.
+
+ <h4 id=content-type3><span class=secno>4.7.4. </span><dfn
+ id=content-type7>Content-Type sniffing: feed or HTML</dfn></h4>
+ <!-- mostly based on:
+ http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx
+ http://lxr.mozilla.org/seamonkey/source/browser/components/feeds/src/nsFeedSniffer.cpp#192
+ http://lxr.mozilla.org/seamonkey/source/browser/components/feeds/src/nsFeedSniffer.cpp#127
+ -->
+
+ <ol>
+ <li>
+ <p>The user agent may wait for 512 or more bytes of the resource to be
+ available.
+
+ <li>
+ <p>Let <var title="">s</var> be the stream of bytes, and let <span><var
+ title="">s</var>[<var title="">i</var>]</span> represent the byte in
+ <var title="">s</var> with position <var title="">i</var>, treating <var
+ title="">s</var> as zero-indexed (so the first byte is at <span><var
+ title="">i</var>=0</span>).
+
+ <li>
+ <p>If at any point this algorithm requires the user agent to determine
+ the value of a byte in <var title="">s</var> which is not yet available,
+ or which is past the first 512 bytes of the resource, or which is beyond
+ the end of the resource, the user agent must stop this algorithm, and
+ assume that the sniffed type of the resource is "text/html".</p>
+
+ <p class=note>User agents are allowed, by the first step of this
+ algorithm, to wait until the first 512 bytes of the resource are
+ available.
+
+ <li>
+ <p>Initialise <var title="">pos</var> to 0.
+
+ <li>
+ <p>Examine <span><var title="">s</var>[<var title="">pos</var>]</span>.</p>
+
+ <dl
+ class=switch><!-- skip whitespace (S token as defined in XML 1.0 section 2.3; production [3] -->
+
+ <dt>If it is 0x09 (ASCII tab), 0x20 (ASCII space), 0x0A (ASCII LF), or
+ 0x0D (ASCII CR)
+
+ <dd>Increase <var title="">pos</var> by 1 and repeat this step.
+
+ <dt>If it is 0x3C (ASCII "<code title="">&lt;</code>")
+
+ <dd>Increase <var title="">pos</var> by 1 and go to the next step.
+
+ <dt>If it is anything else
+
+ <dd>The sniffed type of the resource is "text/html". Abort these steps.
+ </dl>
+
+ <li>
+ <p>If the bytes with positions <var title="">pos</var> to <span><var
+ title="">pos</var>+2</span> in <var title="">s</var> are exactly equal
+ to 0x21, 0x2D, 0x2D respectively (ASCII for "<code
+ title="">!--</code>"), then:</p>
+
+ <ol>
+ <li>Increase <var title="">pos</var> by 3.</li>
+ <!-- skips past the " ! - - " -->
+
+ <li>If the bytes with positions <span><var title="">pos</var></span> to
+ <span><var title="">pos</var>+2</span> in <var title="">s</var> are
+ exactly equal to 0x2D, 0x2D, 0x3E respectively (ASCII for "<code
+ title="">--&gt;</code>"), then increase <var title="">pos</var> by 3
+ and jump back to the previous step (step 5) in the overall algorithm in
+ this section.
+
+ <li>Otherwise, increase <var title="">pos</var> by 1.
+
+ <li>Otherwise, return to step 2 in these substeps.
+ </ol>
+
+ <li>
+ <p>If <span><var title="">s</var>[<var title="">pos</var>]</span> is 0x21
+ (ASCII "<code title="">!</code>"):</p>
+ <!-- this skips past a DOCTYPE if there is one. It is brain-dead
+ because we don't have to be clever to parse the Atom and RSS x.y
+ DOCTYPEs, as they don't do anything clever like have internal
+ subsets of quoted ">" characters. If this fails, then that's ok,
+ we'll treat it as HTML which is fine since we know it's not a feed
+ in that case. -->
+
+ <ol>
+ <li>Increase <var title="">pos</var> by 1.
+
+ <li>If <span><var title="">s</var>[<var title="">pos</var>]</span> equal
+ 0x3E, then increase <var title="">pos</var> by 1 and jump back to step
+ 5 in the overall algorithm in this section.
+
+ <li>Otherwise, return to step 1 in these substeps.
+ </ol>
+
+ <li>
+ <p>If <span><var title="">s</var>[<var title="">pos</var>]</span> is 0x3F
+ (ASCII "<code title="">?</code>"):</p>
+
+ <ol>
+ <li>Increase <var title="">pos</var> by 1.
+
+ <li>If <span><var title="">s</var>[<var title="">pos</var>]</span> and
+ <span><var title="">s</var>[<var title="">pos</var>+1]</span> equal
+ 0x3F and 0x3E respectively, then increase <var title="">pos</var> by 1
+ and jump back to step 5 in the overall algorithm in this section.
+
+ <li>Otherwise, return to step 1 in these substeps.
+ </ol>
+
+ <li>
+ <p>Otherwise, if the bytes in <var title="">s</var> starting at <var
+ title="">pos</var> match any of the sequences of bytes in the first
+ column of the following table, then the user agent must follow the steps
+ given in the corresponding cell in the second column of the same row.</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Bytes in Hexadecimal
+
+ <th>Requirement
+
+ <th>Comment
+
+ <tbody>
+ <tr>
+ <td>72 73 73
+
+ <td>The sniffed type of the resource is "application/rss+xml"; abort
+ these steps
+
+ <td>The three ASCII characters "<code title="">rss</code>"
+
+ <tr>
+ <td>66 65 65 64
+
+ <td>The sniffed type of the resource is "application/atom+xml"; abort
+ these steps
+
+ <td>The four ASCII characters "<code title="">feed</code>"
+
+ <tr>
+ <td>72 64 66 3A 52 44 46
+
+ <td>Continue to the next step in this algorithm
+
+ <td>The ASCII characters "<code title="">rdf:RDF</code>"
+ </table>
+
+ <p>If none of the byte sequences above match the bytes in <var
+ title="">s</var> starting at <var title="">pos</var>, then the sniffed
+ type of the resource is "text/html". Abort these steps.</p>
+
+ <li>
+ <p class=big-issue>If, before the next ">", you find two xmlns*
+ attributes with http://www.w3.org/1999/02/22-rdf-syntax-ns# and
+ http://purl.org/rss/1.0/ as the namespaces, then the sniffed type of the
+ resource is "application/rss+xml", abort these steps. (maybe we only
+ need to check for http://purl.org/rss/1.0/ actually)
+
+ <li>
+ <p>Otherwise, the sniffed type of the resource is "text/html".
+ </ol>
+
+ <p class=note>For efficiency reaons, implementations may wish to implement
+ this algorithm and the algorithm for detecting the character encoding of
+ HTML documents in parallel.
+
+ <h4 id=content-type><span class=secno>4.7.5. </span>Content-Type metadata</h4>
+
+ <p>What explicit <dfn id=content-type8 title=Content-Type>Content-Type
+ metadata</dfn> is associated with the resource (the resource's type
+ information) depends on the protocol that was used to fetch the resource.
+
+ <p>For HTTP resources, only the Content-Type HTTP header contributes any
+ data; the explicit type of the resource is then the value of that header,
+ interpreted as described by the HTTP specifications. <a
+ href="#refsHTTP">[HTTP]</a>
+
+ <p>For resources fetched from the filesystem, user agents should use
+ platform-specific conventions, e.g. operating system extension/type
+ mappings.
+
+ <p>Extensions must not be used for determining resource types for resources
+ fetched over HTTP.
+
+ <p>For resources fetched over most other protocols, e.g. FTP, there is no
+ type information.
+
+ <h3 id=user-prompts><span class=secno>4.8. </span>User prompts</h3>
+
+ <p>The <dfn id=alert title=dom-alert><code>alert(<var
+ title="">message</var>)</code></dfn> method, when invoked, must show the
+ given <var title="">message</var> to the user. The user agent may make the
+ method wait for the user to acknowledge the message before returning; if
+ so, the user agent must <a href="#pause">pause</a> while the method is
+ waiting.
+
+ <p>The <dfn id=confirm title=dom-confirm><code>confirm(<var
+ title="">message</var>)</code></dfn> method, when invoked, must show the
+ given <var title="">message</var> to the user, and ask the user to respond
+ with a positive or negative response. The user agent must then <a
+ href="#pause">pause</a> as the the method waits for the user's response.
+ If the user response positively, the method must return true, and if the
+ user response negatively, the method must return false.
+
+ <p>The <dfn id=prompt title=dom-prompt><code>prompt(<var
+ title="">message</var>, <var title="">default</var>)</code></dfn> method,
+ when invoked, must show the given <var title="">message</var> to the user,
+ and ask the user to either respond with a string value or abort. The user
+ agent must then <a href="#pause">pause</a> as the the method waits for the
+ user's response. The second argument is optional. If the second argument
+ (<var title="">default</var>) is present, then the response must be
+ defaulted to the value given by <var title="">default</var>. If the user
+ aborts, then the method must return null; otherwise, the method must
+ return the string that the user responded with.
+
+ <p>The <dfn id=print title=dom-print><code>print()</code></dfn> method,
+ when invoked, should offer the user the opportunity to <a
+ href="#obtain">obtain a physical form</a> of the document. The user agent
+ may make the method wait for the user to either accept or decline before
+ returning; if so, the user agent must <a href="#pause">pause</a> while the
+ method is waiting. (This does not, of course, preclude the user agent from
+ <em>always</em> offering the user with the opportunity to convert the
+ document to whatever media the user might want.)
+
+ <h3 id=scripting><span class=secno>4.9. </span>Scripting</h3>
+
+ <h4 id=running><span class=secno>4.9.1. </span>Running executable code</h4>
+
+ <p>Various mechanisms can cause author-provided executable code to run in
+ the context of a document. These mechanisms include, but are probably not
+ limited to:
+
+ <ul>
+ <li>Processing of <code><a href="#script0">script</a></code> elements.
+
+ <li>Processing of inline <code title="javascript protocol"><a
+ href="#the-javascript">javascript:</a></code> URIs (e.g. the <code
+ title=attr-img-src><a href="#src">src</a></code> attribute of <code><a
+ href="#img">img</a></code> elements, or an <code title="">@import</code>
+ rule in a CSS <code><a href="#style">style</a></code> element block).
+
+ <li>Event handlers, whether registered through the DOM using <code
+ title="">addEventListener()</code>, by explicit <a href="#event2">event
+ handler content attributes</a>, by <a href="#event3">event handler DOM
+ attributes</a>, or otherwise.
+
+ <li>Processing of technologies like XBL or SVG that have their own
+ scripting features.
+ </ul>
+
+ <p>User agents may provide a mechanism to enable or disable the execution
+ of author-provided code. When the user agent is configured such that
+ author-provided code does not execute, or if the user agent is implemented
+ so as to never execute author-provided code, it is said that <dfn
+ id=scripting1>scripting is disabled</dfn>. When author-provided code
+ <em>does</em> execute, <dfn id=scripting2>scripting is enabled</dfn>. A
+ user agent with scripting disabled is a <a href="#non-scripted"
+ title="User agents with no scripting support">user agent with no scripting
+ support</a> for the purposes of conformance.
+
+ <h4 id=origin><span class=secno>4.9.2. </span>Origin</h4>
+ <!-- Hallowed are the Ori -->
+ <!--
+ https://bugzilla.mozilla.org/show_bug.cgi?id=346659
+ https://bugzilla.mozilla.org/show_bug.cgi?id=344495
+ -->
+
+ <p>Access to certain APIs is granted or denied to scripts based on the <dfn
+ id=origin0>origin</dfn> of the script and the API being accessed.
+
+ <dl>
+ <dt>If a script is in a <code><a href="#script0">script</a></code> element
+
+ <dd>The origin of the script is the origin of the <code>Document</code> to
+ which the <code><a href="#script0">script</a></code> element belongs.
+
+ <dt>If a script is a function or other code reference created by another
+ script
+
+ <dd>The origin of the script is the origin of the script that created it.
+
+ <dt>If a script is a <a href="#the-javascript" title="javascript
+ protocol"><code title="">javascript:</code> URI</a> in an attribute
+
+ <dd>The origin is the origin of the <code>Document</code> of the element
+ on which the attribute is found.
+
+ <dt>If a script is a <a href="#the-javascript" title="javascript
+ protocol"><code title="">javascript:</code> URI</a> in a style sheet
+
+ <dd>The origin is the origin of the <code>Document</code> to which the
+ style sheet applies.
+
+ <dt>If a script is a <a href="#the-javascript" title="javascript
+ protocol"><code title="">javascript:</code> URI</a> to which a <a
+ href="#browsing0">browsing context</a> is being <a href="#navigate"
+ title=navigate>navigated</a>, the URI having been provided by the user
+ (e.g. by using a <i>bookmarklet</i>)
+
+ <dd>The origin is the origin of the <code>Document</code> of the <a
+ href="#browsing0">browsing context</a>'s <a href="#active">active
+ document</a>.
+
+ <dt>If a script is a <a href="#the-javascript" title="javascript
+ protocol"><code title="">javascript:</code> URI</a> to which a <a
+ href="#browsing0">browsing context</a> is being <a href="#navigate"
+ title=navigate>navigated</a>, the URI having been declared in markup
+
+ <dd>The origin is the origin of the <code>Document</code> of the element
+ (e.g. an <code><a href="#a">a</a></code> or <code><a
+ href="#area">area</a></code> element) that declared the URI.
+
+ <dt>If a script is a <a href="#the-javascript" title="javascript
+ protocol"><code title="">javascript:</code> URI</a> to which a <a
+ href="#browsing0">browsing context</a> is being <a href="#navigate"
+ title=navigate>navigated</a>, the URI having been provided by script
+
+ <dd>The origin is the origin of the script that provided the URI.</dd>
+ <!-- ... -->
+ </dl>
+
+ <p>The origin of scripts thus comes down to finding the origin of
+ <code>Document</code> objects.
+
+ <p>The origin of a <code>Document</code> or image that was served over the
+ network and whose address uses a URI scheme with a server-based naming
+ authority is the tuple consisting of the &lt;scheme&gt;, &lt;host&gt;, and
+ &lt;port&gt; parts of the <code>Document</code>'s full URI. <a
+ href="#refsRFC2396">[RFC2396]</a> <a href="#refsRFC2732">[RFC2732]</a>
+
+ <p>The origin of a <code>Document</code> or image that was generated from a
+ <code>data:</code> URI found in another <code>Document</code> or in a
+ script is the origin of the that <code>Document</code> or script.
+
+ <p>The origin of a <code>Document</code> or image that was generated from a
+ <code>data:</code> URI from another source is a globally unique identifier
+ assigned when the document is created.
+
+ <p>The origin of a <code>Document</code> or image that was generated from a
+ <a href="#the-javascript" title="javascript
+ protocol"><code>javascript:</code> URI</a> is the same as the origin of
+ that <code>javascript:</code> URI.
+
+ <p><dfn id=the-string>The string representing the script's domain in IDNA
+ format</dfn> is obtained as follows: take the domain part of the script's
+ <a href="#origin0">origin</a> tuple and apply the IDNA ToASCII algorithm
+ and then the IDNA ToUnicode algorithm to each component of the domain name
+ (with both the AllowUnassigned and UseSTD3ASCIIRules flags set both
+ times). <a href="#refsRFC3490">[RFC3490]</a>
+
+ <p>If ToASCII fails to convert one of the components of the string, e.g.
+ because it is too long or because it contains invalid characters, or if
+ the origin of the script has no domain part, then the string representing
+ the script's domain in IDNA format cannot be obtained. (ToUnicode is
+ defined to never fail.)
+
+ <h4 id=security3><span class=secno>4.9.3. </span>Security exceptions</h4>
+
+ <p class=big-issue>Define <dfn id=security8>security exception</dfn>.
+
+ <h4 id=javascript-protocol><span class=secno>4.9.4. </span><dfn
+ id=the-javascript title="javascript protocol">The <code
+ title="">javascript:</code> protocol</dfn></h4>
+
+ <p>A URI using the <code title="">javascript:</code> protocol must, if
+ evaluated, be evaluated using the in-context evaluation operation defined
+ for <code title="">javascript:</code> URIs. <a
+ href="#refsJSURI">[JSURI]</a></p>
+ <!--
+JSURI: http://ietfreport.isoc.org/all-ids/draft-hoehrmann-javascript-scheme-00.txt and
+ http://www.websitedev.de/ietf/draft-hoehrmann-javascript-scheme-00.txt should be as stable as it gets,
+ http://ietfreport.isoc.org/idref/draft-hoehrmann-javascript-scheme/ for the latest version
+-->
+
+ <p>When a browsing context is <a href="#navigate"
+ title=navigate>navigated</a> to a <code>javascript:</code> URI, and the <a
+ href="#active">active document</a> of that browsing context has the same
+ <a href="#origin0">origin</a> as the URI, the dereference context must be
+ the <a href="#browsing0">browsing context</a> being navigated.
+
+ <p>When a browsing context is <a href="#navigate"
+ title=navigate>navigated</a> to a <code>javascript:</code> URI, and the <a
+ href="#active">active document</a> of that browsing context has a
+ <em>different</em> <a href="#origin0">origin</a> than the URI, the
+ dereference context must be an empty object.
+
+ <p>Otherwise, the dereference context must the <a
+ href="#browsing0">browsing context</a> of the <code>Document</code> to
+ which belongs the element for which the URI is being dereferenced, or to
+ which the style sheet for which the URI is being dereferenced applies,
+ whichever is appropriate.
+
+ <p>URIs using the <code title="">javascript:</code> protocol should be
+ evaluated when the resource for that URI is needed, unless <a
+ href="#scripting1">scripting is disabled</a> or the <code>Document</code>
+ corresponding to the dereference context (as defined above), if any, has
+ <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> enabled.
+
+ <p>If the dereference by-product is void (there is no return value), then
+ the URI must be treated in a manner equivalent to an HTTP resource with an
+ HTTP 204 No Content response.
+
+ <p>Otherwise, the URI must be treated in a manner equivalent to an HTTP
+ resource with a 200 OK response whose <a href="#content-type8"
+ title=Content-Type>Content-Type metadata</a> is <code
+ title="">text/html</code> and whose response body is the dereference
+ by-product, converted to a string value.
+
+ <p class=note>Certain contexts, in particular <code><a
+ href="#img">img</a></code> elements, ignore the <a href="#content-type8"
+ title=Content-Type>Content-Type metadata</a>.
+
+ <div class=example>
+ <p>So for example a <code title="">javascript:</code> URI for a <code
+ title=attr-img-src><a href="#src">src</a></code> attribute of an <code><a
+ href="#img">img</a></code> element would be evaluated in the context of
+ the page as soon as the attribute is set; it would then be sniffed to
+ determine the image type and decoded as an image.</p>
+
+ <p>A <code title="">javascript:</code> URI in an <code
+ title=attr-a-href>href</code> attribute of an <code><a
+ href="#a">a</a></code> element would only be evaluated when the link was
+ <a href="#following0" title="following hyperlinks">followed</a>.</p>
+
+ <p>The <code title=attr-iframe-src><a href="#src1">src</a></code>
+ attribute of an <code><a href="#iframe">iframe</a></code> element would
+ be evaluated in the context of the <code><a
+ href="#iframe">iframe</a></code>'s own <a href="#browsing0">browsing
+ context</a>; once evaluated, its return value (if it was not void) would
+ replace that <a href="#browsing0">browsing context</a>'s document, thus
+ changing the variables visible in that <a href="#browsing0">browsing
+ context</a>.</p>
+ </div>
+
+ <h4 id=events><span class=secno>4.9.5. </span>Events</h4>
+
+ <p class=big-issue>We need to define how to handle events that are to be
+ fired on a Document that is no longer the active document of its browsing
+ context, and for Documents that have no browsing context. Do the events
+ fire? Do the handlers in that document not fire? Do we just define
+ scripting to be disabled when the document isn't active, with events still
+ running as is? See also the <code><a href="#script0">script</a></code>
+ element section, which says scripts don't run when the document isn't
+ active.
+
+ <h5 id=event-handler-attributes><span class=secno>4.9.5.1. </span>Event
+ handler attributes</h5>
+
+ <p><a href="#html-elements">HTML elements</a> can have <dfn id=event1>event
+ handler attributes</dfn> specified. These act as bubbling event listeners
+ for the element on which they are specified.
+
+ <p>Each event handler attribute has two parts, an <a href="#event2"
+ title="event handler content attributes">event handler content
+ attribute</a> and an <a href="#event3" title="event handler DOM
+ attributes">event handler DOM attribute</a>. Event handler attributes must
+ initially be set to null. When their value changes (through the changing
+ of their event handler content attribute or their event handler DOM
+ attribute), they will either be null, or have an
+ <code>EventListener</code> object assigned to them.
+
+ <p>Objects other than <code>Element</code> objects, in particular <code><a
+ href="#window">Window</a></code>, only have <a href="#event3" title="event
+ handler DOM attributes">event handler DOM attribute</a> (since they have
+ no content attributes).
+
+ <p><dfn id=event2>Event handler content attributes</dfn>, when specified,
+ must contain valid ECMAScript code matching the ECMAScript <code
+ title="">FunctionBody</code> production. <a
+ href="#refsECMA262">[ECMA262]</a>
+
+ <p>When an event handler content attribute is set, its new value must be
+ interpreted as the body of an anonymous function with a single argument
+ called <code>event</code>, with the new function's scope chain being
+ linked from the activation object of the handler, to the element, to the
+ element's <code>form</code> element if it is a form control, to the
+ <code>Document</code> object, to the <a href="#browsing0">browsing
+ context</a> of that <code>Document</code>. The function's
+ <code>this</code> parameter must be the <code>Element</code> object
+ representing the element. The resulting function must then be set as the
+ value of the corresponding event handler attribute, and the new value must
+ be set as the value of the content attribute. If the given function body
+ fails to compile, then the corresponding event handler attribute must be
+ set to null instead (the content attribute must still be updated to the
+ new value, though).
+
+ <p class=note>See ECMA262 Edition 3, sections 10.1.6 and 10.2.3, for more
+ details on activation objects. <a href="#refsECMA262">[ECMA262]</a>
+
+ <p class=issue>How do we allow non-JS event handlers?
+
+ <p><dfn id=event3>Event handler DOM attributes</dfn>, on setting, must set
+ the corresponding event handler attribute to their new value, and on
+ getting, must return whatever the current value of the corresponding event
+ handler attribute is (possibly null).
+
+ <p>The following are the event handler attributes that must be supported by
+ all <a href="#html-elements">HTML elements</a>, as both content attributes
+ and DOM attributes, and on <code><a href="#window">Window</a></code>
+ objects, as DOM attributes:
+
+ <dl>
+ <dt><dfn id=onabort title=handler-onabort><code>onabort</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever an <code title=event-abort><a
+ href="#abort">abort</a></code> event is targeted at or bubbles through
+ the element.
+ </dd>
+ <!--
+ <dt><dfn title="handler-onbeforecopy"><code>onbeforecopy</code></dfn></dt> -->
+ <!-- widely used -->
+ <!--
+
+ <dd><p>Must be invoked whenever a <code
+ title="event-beforecopy">beforecopy</code> event is targeted at or bubbles
+ through the element.</p></dd>
+-->
+
+ <dt><dfn id=onbeforeunload
+ title=handler-onbeforeunload><code>onbeforeunload</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever a <code
+ title=event-beforeunload>beforeunload</code> event is targeted at or
+ bubbles through the element.
+
+ <dt><dfn id=onblur title=handler-onblur><code>onblur</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-blur>blur</code> event is
+ targeted at or bubbles through the element.
+
+ <dt><dfn id=onchange title=handler-onchange><code>onchange</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-change>change</code>
+ event is targeted at or bubbles through the element.
+
+ <dt><dfn id=onclick title=handler-onclick><code>onclick</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-click>click</code> event
+ is targeted at or bubbles through the element.
+
+ <dt><dfn id=oncontextmenu
+ title=handler-oncontextmenu><code>oncontextmenu</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code
+ title=event-contextmenu>contextmenu</code> event is targeted at or
+ bubbles through the element.
+ </dd>
+ <!--
+ <dt><dfn title="handler-oncopy"><code>oncopy</code></dfn></dt> -->
+ <!-- widely used -->
+ <!--
+
+ <dd><p>Must be invoked whenever a <code
+ title="event-copy">copy</code> event is targeted at or bubbles
+ through the element.</p></dd>
+-->
+
+ <dt><dfn id=ondblclick
+ title=handler-ondblclick><code>ondblclick</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-dblclick>dblclick</code>
+ event is targeted at or bubbles through the element.
+
+ <dt><dfn id=ondrag title=handler-ondrag><code>ondrag</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-drag><a
+ href="#drag">drag</a></code> event is targeted at or bubbles through the
+ element.
+
+ <dt><dfn id=ondragend title=handler-ondragend><code>ondragend</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-dragend><a
+ href="#dragend">dragend</a></code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=ondragenter
+ title=handler-ondragenter><code>ondragenter</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-dragenter><a
+ href="#dragenter">dragenter</a></code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=ondragleave
+ title=handler-ondragleave><code>ondragleave</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-dragleave><a
+ href="#dragleave">dragleave</a></code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=ondragover
+ title=handler-ondragover><code>ondragover</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-dragover><a
+ href="#dragover">dragover</a></code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=ondragstart
+ title=handler-ondragstart><code>ondragstart</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-dragstart><a
+ href="#dragstart">dragstart</a></code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=ondrop title=handler-ondrop><code>ondrop</code></dfn>
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-drop><a
+ href="#drop">drop</a></code> event is targeted at or bubbles through the
+ element.
+
+ <dt><dfn id=onerror title=handler-onerror><code>onerror</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever an <code title=event-error><a
+ href="#error1">error</a></code> event is targeted at or bubbles through
+ the element.</p>
+
+ <p class=note>The <code title=handler-onerror><a
+ href="#onerror">onerror</a></code> handler is also used for <a
+ href="#runtime-script-errors">reporting script errors</a>.
+
+ <dt><dfn id=onfocus title=handler-onfocus><code>onfocus</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-focus>focus</code> event
+ is targeted at or bubbles through the element.
+
+ <dt><dfn id=onkeydown title=handler-onkeydown><code>onkeydown</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-keydown>keydown</code>
+ event is targeted at or bubbles through the element.
+
+ <dt><dfn id=onkeypress
+ title=handler-onkeypress><code>onkeypress</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-keypress>keypress</code>
+ event is targeted at or bubbles through the element.
+
+ <dt><dfn id=onkeyup title=handler-onkeyup><code>onkeyup</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-keyup>keyup</code> event
+ is targeted at or bubbles through the element.
+
+ <dt><dfn id=onload title=handler-onload><code>onload</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-load><a
+ href="#load0">load</a></code> event is targeted at or bubbles through
+ the element.
+
+ <dt><dfn id=onmessage title=handler-onmessage><code>onmessage</code></dfn></dt>
+ <!-- introduced for <event-source> -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-message><a
+ href="#message">message</a></code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=onmousedown
+ title=handler-onmousedown><code>onmousedown</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code
+ title=event-mousedown>mousedown</code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=onmousemove
+ title=handler-onmousemove><code>onmousemove</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code
+ title=event-mousemove>mousemove</code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=onmouseout
+ title=handler-onmouseout><code>onmouseout</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-mouseout>mouseout</code>
+ event is targeted at or bubbles through the element.
+
+ <dt><dfn id=onmouseover
+ title=handler-onmouseover><code>onmouseover</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code
+ title=event-mouseover>mouseover</code> event is targeted at or bubbles
+ through the element.
+
+ <dt><dfn id=onmouseup title=handler-onmouseup><code>onmouseup</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-mouseup>mouseup</code>
+ event is targeted at or bubbles through the element.
+
+ <dt><dfn id=onmousewheel
+ title=handler-onmousewheel><code>onmousewheel</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code
+ title=event-mousewheel>mousewheel</code> event is targeted at or bubbles
+ through the element.
+ </dd>
+ <!--
+ <dt><dfn title="handler-onpaste"><code>onpaste</code></dfn></dt> -->
+ <!-- widely used -->
+ <!--
+
+ <dd><p>Must be invoked whenever a <code
+ title="event-paste">paste</code> event is targeted at or bubbles
+ through the element.</p></dd>
+-->
+
+ <dt><dfn id=onresize title=handler-onresize><code>onresize</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-resize>resize</code>
+ event is targeted at or bubbles through the element.
+ </dd>
+ <!-- XXX should define when it fires -->
+
+ <dt><dfn id=onscroll title=handler-onscroll><code>onscroll</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-scroll>scroll</code>
+ event is targeted at or bubbles through the element.
+ </dd>
+ <!-- XXX should define when it fires -->
+
+ <dt><dfn id=onselect title=handler-onselect><code>onselect</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-select><a
+ href="#select">select</a></code> event is targeted at or bubbles through
+ the element.
+ </dd>
+ <!-- XXX should define when it fires -->
+ <!--XXX
+ <dt><dfn title="handler-onselectstart"><code>onselectstart</code></dfn></dt> -->
+ <!-- widely used -->
+ <!--
+
+ <dd><p>Must be invoked whenever a <code
+ title="event-selectstart">selectstart</code> event is targeted at or bubbles
+ through the element.</p></dd>
+-->
+ <!-- XXX should define when it fires -->
+
+ <dt><dfn id=onsubmit title=handler-onsubmit><code>onsubmit</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever a <code title=event-submit>submit</code>
+ event is targeted at or bubbles through the element.
+
+ <dt><dfn id=onunload title=handler-onunload><code>onunload</code></dfn></dt>
+ <!-- widely used -->
+
+ <dd>
+ <p>Must be invoked whenever an <code title=event-unload>unload</code>
+ event is targeted at or bubbles through the element.
+ </dd>
+ <!-- XXX need to fire this -->
+ </dl>
+
+ <p>When an event handler attribute is invoked, its argument must be set to
+ the <code>Event</code> object of the event in question. If the function
+ returns the exact boolean value false, the event's
+ <code>preventDefault()</code> method must then invoked. Exception: for
+ historical reasons, for the HTML <code>mouseover</code> event, the
+ <code>preventDefault()</code> method must be called when the function
+ returns true instead.</p>
+ <!-- IE actually uncancels the event if the function returns true -->
+
+ <p>When <a href="#scripting1">scripting is disabled</a>, event handler
+ attributes must do nothing.
+
+ <p>When <a href="#scripting2">scripting is enabled</a>, all event handler
+ attributes on an element, whether set to null or to a function, must be
+ registered as event listeners on the element, as if the <code
+ title=dom-EventTarget-addEventListenerNS>addEventListenerNS()</code>
+ method on the <code>Element</code> object's <code>EventTarget</code>
+ interface had been invoked when the element was created, with the event
+ type (<code title=dom-event-type>type</code> argument) equal to the type
+ described for the event handler attribute in the list above, the namespace
+ (<code title=dom-event-namespaceURI>namespaceURI</code> argument) set to
+ null, the listener set to be a target and bubbling phase listener (<code
+ title=dom-event-useCapture>useCapture</code> argument set to false), the
+ event group set to the default group (<code
+ title=dom-event-evtGroup>evtGroup</code> argument set to null), and the
+ event listener itself (<code title=dom-event-listener>listener</code>
+ argument) set to do nothing while the event handler attribute is null, and
+ set to invoke the function associated with the event handler attribute
+ otherwise.
+
+ <h5 id=event><span class=secno>4.9.5.2. </span>Event firing</h5>
+
+ <p class=big-issue>maybe this should be moved higher up (terminology?
+ conformance? DOM?)
+
+ <p>Certain operations and methods are defined as firing events on elements.
+ For example, the <code title=dom-click><a href="#click">click()</a></code>
+ method on the <code><a href="#htmlelement">HTMLElement</a></code>
+ interface is defined as firing a <code title=event-click>click</code>
+ event on the element. <a href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p><dfn id=firing title="fire a click event">Firing a <code
+ title=event-click>click</code> event</dfn> means that a <a
+ href="http://www.w3.org/TR/DOM-Level-3-Events/events.html#event-click"><code>click</code></a>
+ event with no namespace, which bubbles and is cancelable, and which uses
+ the <code>MouseEvent</code> interface, must be dispatched at the given
+ element. The event object must have its <code title="">screenX</code>,
+ <code title="">screenY</code>, <code title="">clientX</code>, <code
+ title="">clientY</code>, and <code title="">button</code> attributes set
+ to 0, its <code title="">ctrlKey</code>, <code title="">shiftKey</code>,
+ <code title="">altKey</code>, and <code title="">metaKey</code> attributes
+ set according to the current state of the key input device, if any (false
+ for any keys that are not available), its <code title="">detail</code>
+ attribute set to 1, and its <code title="">relatedTarget</code> attribute
+ set to null. The <code title="">getModifierState()</code> method on the
+ object must return values appropriately describing the state of the key
+ input device at the time the event is created.
+
+ <p><dfn id=firing0 title="fire a change event">Firing a <code
+ title=event-change>change</code> event</dfn> means that a <a
+ href="http://www.w3.org/TR/DOM-Level-3-Events/events.html#event-change"><code>change</code></a>
+ event with no namespace, which bubbles but is not cancelable, and which
+ uses the <code>Event</code> interface, must be dispatched at the given
+ element. The event object must have its <code title="">detail</code>
+ attribute set to 0.
+
+ <p><dfn id=firing1 title="fire a contextmenu event">Firing a <code
+ title=event-contextmenu>contextmenu</code> event</dfn> means that a <code
+ title=event-contextmenu>contextmenu</code> event with no namespace, which
+ bubbles and is cancelable, and which uses the <code>Event</code>
+ interface, must be dispatched at the given element. The event object must
+ have its <code title="">detail</code> attribute set to 0.
+
+ <p><dfn id=firing2 title="fire a simple event">Firing a simple event called
+ <var title="">e</var></dfn> means that an event with the name <var
+ title="">e</var>, with no namespace, which does not bubble but is
+ cancelable, and which uses the <code>Event</code> interface, must be
+ dispatched at the given element.
+
+ <p><dfn id=firing3 title="fire a show event">Firing a <code
+ title=event-show>show</code> event</dfn> means <a href="#firing2"
+ title="fire a simple event">firing a simple event called <code
+ title=event-show>show</code></a>. <dfn id=firing4 title="fire a load
+ event">Firing a <code title=event-load>load</code> event</dfn> means <a
+ href="#firing2" title="fire a simple event">firing a simple event called
+ <code title=event-load>load</code></a>. <!--<dfn title="fire a
+ DOMContentLoaded event">Firing a <code
+ title="event-DOMContentLoaded">DOMContentLoaded</code> event</dfn>
+ means <span title="fire a simple event">firing a simple event called
+ <code
+ title="event-DOMContentLoaded">DOMContentLoaded</code></span>.-->
+ <dfn id=firing5 title="fire an error event">Firing an <code
+ title=event-error>error</code> event</dfn> means <a href="#firing2"
+ title="fire a simple event">firing a simple event called <code
+ title=event-error>error</code></a>.</p>
+ <!-- XXX need to define the dispatching of DOMActivate -->
+
+ <p class=big-issue><dfn id=firing6 title="fire a progress event">Firing a
+ progress event called <var title="">e</var></dfn> means something that
+ hasn't yet been defined, in the <a href="#refsPROGRESS">[PROGRESS]</a>
+ spec.
+
+ <p>The default action of these event is to do nothing unless otherwise
+ stated.
+
+ <p class=big-issue>If you dispatch a custom "click" event at an element
+ that would normally have default actions, should they get triggered? If
+ so, we need to go through the entire spec and make sure that any default
+ actions are defined in terms of <em>any</em> event of the right type on
+ that element, not those that are dispatched in expected ways.
+
+ <h5 id=events0><span class=secno>4.9.5.3. </span>Events and the <code><a
+ href="#window">Window</a></code> object</h5>
+
+ <p>When an event is dispatched at a DOM node in a <code>Document</code> in
+ a <a href="#browsing0">browsing context</a>, if the event is not a <code
+ title=event-load><a href="#load0">load</a></code> event, the user agent
+ must also dispatch the event to the <code><a
+ href="#window">Window</a></code>, as follows:
+
+ <ol>
+ <li>In the capture phase, the event must be dispatched to the <code><a
+ href="#window">Window</a></code> object before being dispatched to any of
+ the nodes.
+
+ <li>In the bubble phase, the event must be dispatched to the <code><a
+ href="#window">Window</a></code> object at the end of the phase, unless
+ bubbling has been prevented.
+ </ol>
+
+ <h5 id=runtime-script-errors><span class=secno>4.9.5.4. </span>Runtime
+ script errors</h5>
+
+ <p><em>This section only applies to user agents that support scripting in
+ general and ECMAScript in particular.</em>
+
+ <p>Whenever a runtime script error occurs in one of the scripts associated
+ with the document, the value of the <code title=handler-onerror><a
+ href="#onerror">onerror</a></code> <span>event handler DOM
+ attribute</span> of the <code><a href="#window">Window</a></code> object
+ must be processed, as follows:
+
+ <dl class=switch>
+ <dt>If the value is a function
+
+ <dd>
+ <p>The function referenced by the <code title=handler-onerror><a
+ href="#onerror">onerror</a></code> attribute must be invoked with three
+ arguments, before notifying the user of the error.</p>
+
+ <p>The three arguments passed to the function are all
+ <code>DOMString</code>s; the first must give the message that the UA is
+ considering reporting, the second must give the URI to the resource in
+ which the error occured, and the third must give the line number in that
+ resource on which the error occured.</p>
+
+ <p>If the function returns false, then the error should not be reported
+ to the user. Otherwise, if the function returns another value (or does
+ not return at all), the error should be reported to the user.</p>
+
+ <p>Any exceptions thrown or errors caused by this function must be
+ reported to the user immediately after the error that the function was
+ called for, without calling the function again.</p>
+
+ <dt>If the value is <code>null</code>
+
+ <dd>
+ <p>The error should not reported to the user.</p>
+
+ <dt>If the value is anything else
+
+ <dd>
+ <p>The error should be reported to the user.</p>
+ </dl>
+
+ <p>The initial value of <code title=handler-onerror><a
+ href="#onerror">onerror</a></code> must be <code>undefined</code>.
+
+ <h3 id=browser><span class=secno>4.10. </span>Browser state</h3>
+
+ <p>The <dfn id=navigator title=dom-navigator><code>navigator</code></dfn>
+ attribute of the <code><a href="#window">Window</a></code> interface must
+ return an instance of the <code><a
+ href="#clientinformation">ClientInformation</a></code> interface, which
+ represents the identity and state of the user agent (the client), and
+ allows Web pages to register themselves as potential protocol and content
+ handlers:
+
+ <pre
+ class=idl>interface <dfn id=clientinformation>ClientInformation</dfn> {
+ readonly attribute boolean <a href="#navigator.online" title=dom-navigator-onLine>onLine</a>;
+ void <a href="#registerprotocolhandler" title=dom-navigator-registerProtocolHandler>registerProtocolHandler</a>(in DOMString protocol, in DOMString uri, in DOMString title);
+ void <a href="#registercontenthandler" title=dom-navigator-registerContentHandler>registerContentHandler</a>(in DOMString mimeType, in DOMString uri, in DOMString title);
+<!-- XXX there are other attributes! -->};</pre>
+ <!-- also, see window.external.AddSearchProvider() and similar DOM APIs from IE -->
+
+ <h4 id=offline><span class=secno>4.10.1. </span>Offline Web applications</h4>
+
+ <p>The <dfn id=navigator.online
+ title=dom-navigator-onLine><code>navigator.onLine</code></dfn> attribute
+ must return false if the user agent will not contact the network when the
+ user follows links or when a script requests a remote page (or knows that
+ such an attempt would fail), and must return true otherwise.
+
+ <p>The <dfn id=offline0 title=event-offline><code>offline</code></dfn>
+ event must be fired when the value of the <code
+ title=dom-navigator-onLine><a
+ href="#navigator.online">navigator.onLine</a></code> attribute of the
+ <code><a href="#window">Window</a></code> changes from true to false.
+
+ <p>The <dfn id=online title=event-online><code>online</code></dfn> event
+ must be fired when the value of the <code title=dom-navigator-onLine><a
+ href="#navigator.online">navigator.onLine</a></code> attribute of the
+ <code><a href="#window">Window</a></code> changes from false to true.
+
+ <p>These events are in no namespace, do bubble, are not cancelable, have no
+ default action, and use the normal <code>Event</code> interface. They must
+ be fired on <a href="#the-body0">the body element</a>. (As the events
+ bubble, they will reach the <code><a href="#window">Window</a></code>
+ object.)</p>
+ <!-- XXX ononline onoffline need to be defined -->
+
+ <h4 id=custom-handlers><span class=secno>4.10.2. </span>Custom protocol and
+ content handlers</h4>
+
+ <p>The <dfn id=registerprotocolhandler
+ title=dom-navigator-registerProtocolHandler><code>registerProtocolHandler()</code></dfn>
+ method allows Web sites to register themselves as possible handlers for
+ particular protocols. For example, an online fax service could register
+ itself as a handler of the <code>fax:</code> protocol (<a
+ href="#refsRFC2806">[RFC2806]</a>), so that if the user clicks on such a
+ link, he is given the opportunity to use that Web site. Analogously, the
+ <dfn id=registercontenthandler
+ title=dom-navigator-registerContentHandler><code>registerContentHandler()</code></dfn>
+ method allows Web sites to register themselves as possible handlers for
+ content in a particular MIME type. For example, the same online fax
+ service could register itself as a handler for <code>image/g3fax</code>
+ files (<a href="#refsRFC1494">[RFC1494]</a>), so that if the user has no
+ native application capable of handling G3 Facsimile byte streams, his Web
+ browser can instead suggest he use that site to view the image.
+
+ <p>User agents may, within the constraints described in this section, do
+ whatever they like when the methods are called. A UA could, for instance,
+ prompt the user and offer the user the opportunity to add the site to a
+ shortlist of handlers, or make the handlers his default, or cancel the
+ request. UAs could provide such a UI through modal UI or through a
+ non-modal transient notification interface. UAs could also simply silently
+ collect the information, providing it only when relevant to the user.
+
+ <p>There is <a href="#sample-handler-impl">an example of how these methods
+ could be presented to the user</a> below.
+
+ <p>The arguments to the methods have the following meanings:
+
+ <dl>
+ <dt><var title="">protocol</var> (<code
+ title=dom-navigator-registerProtocolHandler><a
+ href="#registerprotocolhandler">registerProtocolHandler()</a></code>
+ only)
+
+ <dd>
+ <p>A scheme, such as <code>ftp</code> or <code>fax</code>. The scheme
+ must be treated case-insensitively by user agents for the purposes of
+ comparing with the scheme part of URIs that they consider against the
+ list of registered handlers.</p>
+
+ <p>The <var title="">protocol</var> value, if it contains a colon (as in
+ "<code>ftp:</code>"), will never match anything, since schemes don't
+ contain colons.</p>
+
+ <dt><var title="">mimeType</var> (<code
+ title=dom-navigator-registerContentHandler><a
+ href="#registercontenthandler">registerContentHandler()</a></code> only)
+
+ <dd>
+ <p>A MIME type, such as <code>model/vrml</code> or
+ <code>text/richtext</code>. The MIME type must be treated
+ case-insensitively by user agents for the purposes of comparing with
+ MIME types of documents that they consider against the list of
+ registered handlers.</p>
+
+ <p>User agents must compare the given values only to the MIME
+ type/subtype parts of content types, not to the complete type including
+ parameters. Thus, if <var title="">mimeType</var> values passed to this
+ method include characters such as commas or whitespace, or include MIME
+ parameters, then the handler being registered will never be used.</p>
+
+ <dt><var title="">uri</var>
+
+ <dd>
+ <p>The URI of the page that will handle the requests. When the user agent
+ uses this URI, it must replace the first occurrence of the exact literal
+ string "<code>%s</code>" with an escaped version of the URI of the
+ content in question (as defined below), and then fetch the resulting URI
+ using the GET method (or equivalent for non-HTTP URIs).</p>
+
+ <p>To get the escaped version of the URI, first, the domain part of the
+ URI (if any) must be converted to its punycode representation, and then,
+ every character in the URI that is not in the ranges given in the next
+ paragraph must be replaced by its UTF-8 byte representation, each byte
+ being represented by a U+0025 (%) character and two digits in the range
+ U+0030 (0) to U+0039 (9) and U+0041 (A) to U+0046 (F) giving the
+ hexadecimal representation of the byte.</p>
+
+ <p>The ranges of characters that must not be escaped are: U+002D (-),
+ U+002E (.), U+0030 (0) to U+0039 (9), U+0041 (A) to U+005A (Z), U+005F
+ (_), U+0061 (a) to U+007A (z), and U+007E (~).</p>
+ <!-- XXX move that to a common algorithms section if any other
+ part of the spec needs it -->
+
+ <div class=example>
+ <p>If the user had visited a site that made the following call:</p>
+
+ <pre>navigator.registerContentHandler('application/x-soup', 'http://example.com/soup?url=%s', 'SoupWeb&trade;')</pre>
+
+ <p>...and then clicked on a link such as:</p>
+
+ <pre>&lt;a href="http://www.example.net/chickenk&#xEF;wi.soup">Download our Chicken Kiwi soup!&lt;/a></pre>
+
+ <p>...then, assuming this <code>chickenkiwi.soup</code> file was served
+ with the MIME type <code>application/x-soup</code>, the UA might
+ instead navigate to the following URI:</p>
+
+ <pre>http://example.com/soup?url=http%3A%2F%2Fwww.example.net%2Fchickenk%C3%AFwi.soup</pre>
+
+ <p>This site could then fetch the <code>chickenkiwi.soup</code> file and
+ do whatever it is that it does with soup (synthesise it and ship it to
+ the user, or whatever).</p>
+ </div>
+
+ <dt><var title="">title</var>
+
+ <dd>
+ <p>A descriptive title of the handler, which the UA might use to remind
+ the user what the site in question is.</p>
+ </dl>
+
+ <p>User agents should raise <a href="#security8" title="security
+ exception">security exceptions</a> if the methods are called with <var
+ title="">protocol</var> or <var title="">mimeType</var> values that the UA
+ deems to be "privileged". For example, a site attempting to register a
+ handler for <code>http</code> URIs or <code>text/html</code> content in a
+ Web browser would likely cause an exception to be raised.
+
+ <p>User agents must raise a <code>SYNTAX_ERR</code> exception if the <var
+ title="">uri</var> argument passed to one of these methods does not
+ contain the exact literal string "<code>%s</code>".
+
+ <p>User agents must not raise any other exceptions (other than
+ binding-specific exceptions, such as for an incorrect number of arguments
+ in an ECMAScript implementation).
+
+ <p>This section does not define how the pages registered by these methods
+ are used, beyond the requirements on how to process the <var
+ title="">uri</var> value (see above). To some extent, the <span
+ title="navigating across documents">processing model for navigating across
+ documents</span> defines some cases where these methods are relevant, but
+ in general UAs may use this information wherever they would otherwise
+ consider handing content to native plugins or helper applications.
+
+ <p>UAs must not use registered content handlers to handle content that was
+ returned as part of a non-GET transaction (or rather, as part of any
+ non-idempotent transaction), as the remote site would not be able to fetch
+ the same data.
+
+ <h5 id=security4><span class=secno>4.10.2.1. </span>Security and privacy</h5>
+
+ <p>These mechanisms can introduce a number of concerns, in particular
+ privacy concerns.
+
+ <p><strong>Hijacking all Web usage.</strong> User agents should not allow
+ protocols that are key to its normal operation, such as <code>http</code>
+ or <code>https</code>, to be rerouted through third-party sites. This
+ would allow a user's activities to be trivially tracked, and would allow
+ user information, even in secure connections, to be collected.
+
+ <p><strong>Hijacking defaults.</strong> It is strongly recommended that
+ user agents do not automatically change any defaults, as this could lead
+ the user to send data to remote hosts that the user is not expecting. New
+ handlers registering themselves should never automatically cause those
+ sites to be used.
+
+ <p><strong>Registration spamming.</strong> User agents should consider the
+ possibility that a site will attempt to register a large number of
+ handlers, possibly from multiple domains (e.g. by redirecting through a
+ series of pages each on a different domain, and each registering a handler
+ for <code>video/mpeg</code> &mdash; analogous practices abusing other Web
+ browser features have been used by pornography Web sites for many years).
+ User agents should gracefully handle such hostile attempts, protecting the
+ user.
+
+ <p><strong>Misleading titles.</strong> User agents should not rely wholy on
+ the <var title="">title</var> argument to the methods when presenting the
+ registered handlers to the user, since sites could easily lie. For
+ example, a site <code>hostile.example.net</code> could claim that it was
+ registering the "Cuddly Bear Happy Content Handler". User agents should
+ therefore use the handler's domain in any UI along with any title.
+
+ <p><strong>Hostile handler metadata.</strong> User agents should protect
+ against typical attacks against strings embedded in their interface, for
+ example ensuring that markup or escape characters in such strings are not
+ executed, that null bytes are properly handled, that over-long strings do
+ not cause crashes or buffer overruns, and so forth.
+
+ <p><strong>Leaking Intranet URIs.</strong> The mechanism described in this
+ section can result in secret Intranet URIs being leaked, in the following
+ manner:
+
+ <ol>
+ <li>The user registers a third-party content handler as the default
+ handler for a content type.
+
+ <li>The user then browses his corporate Intranet site and accesses a
+ document that uses that content type.
+
+ <li>The user agent contacts the third party and hands the third party the
+ URI to the Intranet content.
+ </ol>
+
+ <p>No actual confidential file data is leaked in this manner, but the URIs
+ themselves could contain confidential information. For example, the URI
+ could be
+ <code>https://www.corp.example.com/upcoming-aquisitions/samples.egf</code>,
+ which might tell the third party that Example Corporation is intending to
+ merge with Samples LLC. Implementors might wish to consider allowing
+ administrators to disable this feature for certain subdomains, content
+ types, or protocols.
+
+ <p><strong>Leaking secure URIs.</strong> User agents should not send HTTPS
+ URIs to third-party sites registered as content handlers, in the same way
+ that user agents do not send <code>Referer</code> headers from secure
+ sites to third-party sites.
+
+ <p><strong>Leaking credentials.</strong> User agents must never send
+ username or password information in the URIs that are escaped and included
+ sent to the handler sites. User agents may even avoid attempting to pass
+ to Web-based handlers the URIs of resources that are known to require
+ authentication to access, as such sites would be unable to access the
+ resources in question without prompting the user for credentials
+ themselves (a practice that would require the user to know whether to
+ trust the third-party handler, a decision many users are unable to make or
+ even understand).
+
+ <h5 id=sample-handler-impl><span class=secno>4.10.2.2. </span>Sample user
+ interface</h5>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>A simple implementation of this feature for a desktop Web browser might
+ work as follows.
+
+ <p>The <code title=dom-navigator-registerProtocolHandler><a
+ href="#registerprotocolhandler">registerProtocolHandler()</a></code>
+ method could display a modal dialog box:
+
+ <pre>||[ Protocol Handler Registration ]|||||||||||||||||||||||||||
+| |
+| This Web page: |
+| |
+| Kittens at work |
+| http://kittens.example.org/ |
+| |
+| ...would like permission to handle the protocol "x-meow:" |
+| using the following Web-based application: |
+| |
+| Kittens-at-work displayer |
+| http://kittens.example.org/?show=%s |
+| |
+| Do you trust the administrators of the "kittens.example. |
+| org" domain? |
+| |
+| ( Trust kittens.example.org ) (( Cancel )) |
+|____________________________________________________________|</pre>
+
+ <p>...where "Kittens at work" is the title of the page that invoked the
+ method, "http://kittens.example.org/" is the URI of that page, "x-meow" is
+ the string that was passed to the <code
+ title=dom-navigator-registerProtocolHandler><a
+ href="#registerprotocolhandler">registerProtocolHandler()</a></code>
+ method as its first argument (<var title="">protocol</var>),
+ "http://kittens.example.org/?show=%s" was the second argument (<var
+ title="">uri</var>), and "Kittens-at-work displayer" was the third
+ argument (<var title="">title</var>).
+
+ <p>If the user clicks the Cancel button, then nothing further happens. If
+ the user clicks the "Trust" button, then the handler is remembered.
+
+ <p>When the user then attempts to fetch a URI that uses the "x-meow:"
+ scheme, then it might display a dialog as follows:
+
+ <pre>||[ Unknown Protocol ]||||||||||||||||||||||||||||||||||||||||
+| |
+| You have attempted to access: |
+| |
+| x-meow:S2l0dGVucyBhcmUgdGhlIGN1dGVzdCE%3D |
+| |
+| How would you like FerretBrowser to handle this resource? |
+| |
+| (o) Contact the FerretBrowser plugin registry to see if |
+| there is an official way to handle this resource. |
+| |
+| ( ) Pass this URI to a local application: |
+| [ /no application selected/ ] ( Choose ) |
+| |
+| ( ) Pass this URI to the "Kittens-at-work displayer" |
+| application at "kittens.example.org". |
+| |
+| [ ] Always do this for resources using the "x-meow" |
+| protocol in future. |
+| |
+| ( Ok ) (( Cancel )) |
+|____________________________________________________________|</pre>
+
+ <p>...where the third option is the one that was primed by the site
+ registering itself earlier.
+
+ <p>If the user does select that option, then the browser, in accordance
+ with the requirements described in the previous two sections, will
+ redirect the user to
+ "http://kittens.example.org/?show=x-meow%3AS2l0dGVucyBhcmUgdGhlIGN1dGVzdCE%253D".
+
+ <p>The <code title=dom-navigator-registerContentHandler><a
+ href="#registercontenthandler">registerContentHandler()</a></code> method
+ would work equivalently, but for unknown MIME types instead of unknown
+ protocols.
+
+ <h3 id=storage><span class=secno>4.11. </span>Client-side session and
+ persistent storage of name/value pairs</h3>
+
+ <h4 id=introduction0><span class=secno>4.11.1. </span>Introduction</h4>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>This specification introduces two related mechanisms, similar to HTTP
+ session cookies <a href="#refsRFC2965">[RFC2965]</a>, for storing
+ structured data on the client side.
+
+ <p>The first is designed for scenarios where the user is carrying out a
+ single transaction, but could be carrying out multiple transactions in
+ different windows at the same time.
+
+ <p>Cookies don't really handle this case well. For example, a user could be
+ buying plane tickets in two different windows, using the same site. If the
+ site used cookies to keep track of which ticket the user was buying, then
+ as the user clicked from page to page in both windows, the ticket
+ currently being purchased would "leak" from one window to the other,
+ potentially causing the user to buy two tickets for the same flight
+ without really noticing.
+
+ <p>To address this, this specification introduces the <code
+ title=dom-sessionStorage><a
+ href="#sessionstorage">sessionStorage</a></code> DOM attribute. Sites can
+ add data to the session storage, and it will be accessible to any page
+ from that domain opened in that window.
+
+ <div class=example>
+ <p>For example, a page could have a checkbox that the user ticks to
+ indicate that he wants insurance:</p>
+
+ <pre>&lt;label>
+ &lt;input type="checkbox" onchange="sessionStorage.insurance = checked">
+ I want insurance on this trip.
+&lt;/label></pre>
+
+ <p>A later page could then check, from script, whether the user had
+ checked the checkbox or not:</p>
+
+ <pre>if (sessionStorage.insurance) { ... }</pre>
+
+ <p>If the user had multiple windows opened on the site, each one would
+ have its own individual copy of the session storage object.</p>
+ </div>
+ <!--
+
+ sessionStorage.flightDeparture = 'OSL';
+ sessionStorage.flightArrival = 'NYC';
+
+ for (var i in forms[0].elements)
+ sessionStorage["data_" + i.name] = i.value;
+
+ if (!sessionStorage[documents])
+ sessionStorage[documents] = {};
+ sessionStorage[documents][filename] = <document/>;
+
+ -->
+
+ <p>The second storage mechanism is designed for storage that spans multiple
+ windows, and lasts beyond the current session. In particular, Web
+ applications may wish to store megabytes of user data, such as entire
+ user-authored documents or a user's mailbox, on the clientside for
+ performance reasons.
+
+ <p>Again, cookies do not handle this case well, because they are
+ transmitted with every request.
+
+ <p>The <code title=dom-globalStorage><a
+ href="#globalstorage">globalStorage</a></code> DOM attribute is used to
+ access the global storage areas.
+
+ <div class=example>
+ <p>The site at example.com can display a count of how many times the user
+ has loaded its page by putting the following at the bottom of its page:</p>
+
+ <pre>&lt;p>
+ You have viewed this page
+ &lt;span id="count">an untold number of&lt;/span>
+ time(s).
+&lt;/p>
+&lt;script>
+ var storage = globalStorage['example.com'];
+ if (!storage.pageLoadCount)
+ storage.pageLoadCount = 0;
+ storage.pageLoadCount = parseInt(storage.pageLoadCount, 10) + 1;
+ document.getElementById('count').textContent = storage.pageLoadCount;
+&lt;/script></pre>
+ </div>
+
+ <p>Each domain and each subdomain has its own separate storage area.
+ Subdomains can access the storage areas of parent domains, and domains can
+ access the storage areas of subdomains.
+
+ <ul class=brief>
+ <li><code>globalStorage['']</code> is accessible to all domains.
+
+ <li><code>globalStorage['com']</code> is accessible to all .com domains
+
+ <li><code>globalStorage['example.com']</code> is accessible to example.com
+ and any of its subdomains
+
+ <li><code>globalStorage['www.example.com']</code> is accessible to
+ www.example.com and example.com, but not www2.example.com.
+ </ul>
+
+ <p>Storage areas (both session storage and global storage) store strings.
+ To store structured data in a storage area, you must first convert it to a
+ string.
+
+ <h4 id=the-storage><span class=secno>4.11.2. </span>The <code><a
+ href="#storage2">Storage</a></code> interface</h4>
+
+ <pre class=idl>
+interface <dfn id=storage2>Storage</dfn> {
+ readonly attribute unsigned long <a href="#length7" title=dom-Storage-length>length</a>;
+ DOMString <a href="#keyn" title=dom-Storage-key>key</a>(in unsigned long index);
+ <a href="#storageitem">StorageItem</a> <a href="#getitem" title=dom-Storage-getItem>getItem</a>(in DOMString key);
+ void <a href="#setitem" title=dom-Storage-setItem>setItem</a>(in DOMString key, in DOMString data);
+ void <a href="#removeitem" title=dom-Storage-removeItem>removeItem</a>(in DOMString key);
+};</pre>
+
+ <p>Each <code><a href="#storage2">Storage</a></code> object provides access
+ to a list of key/value pairs, which are sometimes called items. Keys are
+ strings, and any string (including the empty string) is a valid key.
+ Values are strings with associated metadata, represented by <code><a
+ href="#storageitem">StorageItem</a></code> objects.
+
+ <p>Each <code><a href="#storage2">Storage</a></code> object is associated
+ with a list of key/value pairs when it is created, as defined in the
+ sections on the <code title=dom-sessionStorage><a
+ href="#sessionstorage">sessionStorage</a></code> and <code
+ title=dom-globalStorage><a href="#globalstorage">globalStorage</a></code>
+ attributes. Multiple separate objects implementing the <code><a
+ href="#storage2">Storage</a></code> interface can all be associated with
+ the same list of key/value pairs simultaneously.
+
+ <p>Key/value pairs have associated metadata. In particular, a key/value
+ pair can be marked as either "safe only for secure content", or as "safe
+ for both secure and insecure content".
+
+ <p>A key/value pair is <dfn id=accessible title="accessible
+ keys">accessible</dfn> if either it is marked as "safe for both secure and
+ insecure content", or it is marked as "safe only for secure content" and
+ the script in question is running in a <span>secure browsing
+ context</span><!-- XXX xref -->.
+
+ <p>The <dfn id=length7 title=dom-Storage-length><code>length</code></dfn>
+ attribute must return the number of key/value pairs currently present and
+ <a href="#accessible" title="accessible keys">accessible</a> in the list
+ associated with the object.
+
+ <p>The <dfn id=keyn title=dom-Storage-key><code>key(<var
+ title="">n</var>)</code></dfn> method must return the name of the <var
+ title="">n</var>th <span>accessible</span> key in the list. The order of
+ keys is user-agent defined, but must be consistent within an object
+ between changes to the number of keys. (Thus, <a href="#setitem"
+ title=dom-Storage-setItem>adding</a> or <a href="#removeitem"
+ title=dom-Storage-removeItem>removing</a> a key may change the order of
+ the keys, but merely changing the value of an existing key must not.)
+ <!--The order of keys may differ between instances of the
+ <code>Storage</code> interface accessing the same list. [removed for
+ now for clarity, but if people ask, put it back. this is part of the
+ spec.]-->
+ If <var title="">n</var> is less than zero or greater than or equal to the
+ number of key/value pairs in the object, then this method must raise an
+ <code>INDEX_SIZE_ERR</code> exception.
+
+ <p>The <dfn id=getitem title=dom-Storage-getItem><code>getItem(<var
+ title="">key</var>)</code></dfn> method must return the <code><a
+ href="#storageitem">StorageItem</a></code> object representing the
+ key/value pair with the given <var title="">key</var>. If the given <var
+ title="">key</var> does not exist in the list associated with the object,
+ or is not <span>accessible</span>, then this method must return null.
+ Subsequent calls to this method with the same key from scripts running in
+ the same <span>security context</span> must return the same instance of
+ the <code><a href="#storageitem">StorageItem</a></code> interface. (Such
+ instances must not be shared across security contexts, though.)</p>
+ <!-- XXX define security context -->
+
+ <p>The <dfn id=setitem title=dom-Storage-setItem><code>setItem(<var
+ title="">key</var>, <var title="">value</var>)</code></dfn> method must
+ first check if a key/value pair with the given <var title="">key</var>
+ already exists in the list associated with the object.
+
+ <p>If it does not, then a new key/value pair must be added to the list,
+ with the given <var title="">key</var> and <var title="">value</var>, such
+ that any current or future <code><a
+ href="#storageitem">StorageItem</a></code> objects referring to this
+ key/value pair will return the value given in the <var
+ title="">value</var> argument. If the script setting the value is running
+ in a <span>secure browsing context</span>, then the key/value pair must be
+ marked as "safe only for secure content", otherwise it must be marked as
+ "safe for both secure and insecure content".
+
+ <p>If the given <var title="">key</var> <em>does</em> exist in the list,
+ then, if the key/value pair with the given <var title="">key</var> is
+ <span>accessible</span>, it must have its value updated so that any
+ current or future <code><a href="#storageitem">StorageItem</a></code>
+ objects referring to this key/value pair will return the value given in
+ the <var title="">value</var> argument. If it is <em>not</em>
+ <span>accessible</span>, the method must raise a <a
+ href="#security8">security exception</a>.
+
+ <p>When the <code title=dom-Storage-setItem><a
+ href="#setitem">setItem()</a></code> method is successfully invoked (i.e.
+ when it doesn't raise an exception), events are fired on other <code><a
+ href="#htmldocument">HTMLDocument</a></code> objects that can access the
+ newly stored data, as defined in the sections on the <code
+ title=dom-sessionStorage><a
+ href="#sessionstorage">sessionStorage</a></code> and <code
+ title=dom-globalStorage><a href="#globalstorage">globalStorage</a></code>
+ attributes.</p>
+ <!--
+ not normative, see the sections below for the normative statement
+ -->
+
+ <p>The <dfn id=removeitem
+ title=dom-Storage-removeItem><code>removeItem(<var
+ title="">key</var>)</code></dfn> method must cause the key/value pair with
+ the given <var title="">key</var> to be removed from the list associated
+ with the object, if it exists and is <span>accessible</span>. If no item
+ with that key exists, the method must do nothing. If an item with that key
+ exists but is not <span>accessible</span>, the method must raise a <a
+ href="#security8">security exception</a>.
+
+ <p>The <code title=dom-Storage-setItem><a
+ href="#setitem">setItem()</a></code> and <code
+ title=dom-Storage-removeItem><a href="#removeitem">removeItem()</a></code>
+ methods must be atomic with respect to failure. That is, changes to the
+ data storage area must either be successful, or the data storage area must
+ not be changed at all.
+
+ <p>In the ECMAScript DOM binding, enumerating a <code><a
+ href="#storage2">Storage</a></code> object must enumerate through the
+ currently stored and <span>accessible</span> keys in the list the object
+ is associated with. (It must not enumerate the values or the actual
+ members of the interface). In the ECMAScript DOM binding, <code><a
+ href="#storage2">Storage</a></code> objects must support dereferencing
+ such that getting a property that is not a member of the object (i.e. is
+ neither a member of the <code><a href="#storage2">Storage</a></code>
+ interface nor of <code title=dom-Object>Object</code>) must invoke the
+ <code title=dom-Storage-getItem><a href="#getitem">getItem()</a></code>
+ method with the property's name as the argument, and setting such a
+ property must invoke the <code title=dom-Storage-setItem><a
+ href="#setitem">setItem()</a></code> method with the property's name as
+ the first argument and the given value as the second argument.
+
+ <h4 id=the-storageitem><span class=secno>4.11.3. </span>The <code><a
+ href="#storageitem">StorageItem</a></code> interface</h4>
+
+ <p>Items in <code><a href="#storage2">Storage</a></code> objects are
+ represented by objects implementing the <code><a
+ href="#storageitem">StorageItem</a></code> interface.
+
+ <pre class=idl>
+interface <dfn id=storageitem>StorageItem</dfn> {
+ attribute boolean <a href="#secure" title=dom-StorageItem-secure>secure</a>;
+ attribute DOMString <a href="#value7" title=dom-StorageItem-value>value</a>;
+};</pre>
+
+ <p>In the ECMAScript DOM binding, <code><a
+ href="#storageitem">StorageItem</a></code> objects must stringify to their
+ <code title=dom-StorageItem-value><a href="#value7">value</a></code>
+ attribute's value.
+
+ <p>The <dfn id=value7 title=dom-StorageItem-value><code>value</code></dfn>
+ attribute must return the current value of the key/value pair represented
+ by the object. When the attribute is set, the user agent must invoke the
+ <code title=dom-Storage-setItem><a href="#setitem">setItem()</a></code>
+ method of the <code><a href="#storage2">Storage</a></code> object that the
+ <code><a href="#storageitem">StorageItem</a></code> object is associated
+ with, with the key that the <code><a
+ href="#storageitem">StorageItem</a></code> object is associated with as
+ the first argument, and the new given value of the attribute as the second
+ argument.
+
+ <p><code><a href="#storageitem">StorageItem</a></code> objects must be
+ <em><a href="#live">live</a></em>, meaning that as the underlying <code><a
+ href="#storage2">Storage</a></code> object has its key/value pairs
+ updated, the <code><a href="#storageitem">StorageItem</a></code> objects
+ must always return the actual value of the key/value pair they represent.
+
+ <p>If the key/value pair has been deleted, the <code><a
+ href="#storageitem">StorageItem</a></code> object must act as if its value
+ was the empty string. On setting, the key/value pair will be recreated.
+
+ <p>The <dfn id=secure
+ title=dom-StorageItem-secure><code>secure</code></dfn> attribute must
+ raise an <code>INVALID_ACCESS_ERR</code> exception when accessed or set
+ from a script whose browsing context is not <span title="secure browsing
+ context">considered secure</span><!--
+ XXX xref -->. (Basically, if the
+ page is not an SSL page.)
+
+ <p>If the browsing context <em>is</em> secure, then the <code
+ title=dom-StorageItem-secure><a href="#secure">secure</a></code> attribute
+ must return true if the key/value pair is considered "safe only for secure
+ content", and false if it is considered "safe for both secure and insecure
+ content". If it is set to true, then the key/value pair must be flagged as
+ "safe only for secure content". If it is set to false, then the key/value
+ pair must be flagged as "safe for both secure and insecure content".
+
+ <p>If a <code><a href="#storageitem">StorageItem</a></code> object is
+ obtained by a script that is not running in a <span>secure browsing
+ context</span>, and the item is then marked with the "safe only for secure
+ content" flag by a script that <em>is</em> running in a secure context,
+ the <code><a href="#storageitem">StorageItem</a></code> object must
+ continue to be available to the first script, who will be able to read the
+ value of the object. However, any attempt to <em>set</em> the value would
+ then start raising exceptions as described in the previous section, and
+ the key/value pair would no longer appear in the appropriate <code><a
+ href="#storage2">Storage</a></code> object.
+
+ <h4 id=the-sessionstorage><span class=secno>4.11.4. </span>The <code
+ title=dom-sessionStorage><a
+ href="#sessionstorage">sessionStorage</a></code> attribute</h4>
+
+ <p>The <dfn id=sessionstorage
+ title=dom-sessionStorage><code>sessionStorage</code></dfn> attribute
+ represents the storage area specific to the current <a
+ href="#top-level">top-level browsing context</a>.
+
+ <p>Each <a href="#top-level">top-level browsing context</a> has a unique
+ set of session storage areas, one for each domain.
+
+ <p>User agents should not expire data from a browsing context's session
+ storage areas, but may do so when the user requests that such data be
+ deleted, or when the UA detects that it has limited storage space, or for
+ security reasons. User agents should always avoid deleting data while a
+ script that could access that data is running. When a top-level browsing
+ context is destroyed (and therefore permanently inaccessible to the user)
+ the data stored in its session storage areas can be discarded with it, as
+ the API described in this specification provides no way for that data to
+ ever be subsequently retrieved.
+
+ <p class=note>The lifetime of a browsing context can be unrelated to the
+ lifetime of the actual user agent process itself, as the user agent may
+ support resuming sessions after a restart.
+
+ <p>When a new <code><a href="#htmldocument">HTMLDocument</a></code> is
+ created, the user agent must check to see if the document's <a
+ href="#top-level">top-level browsing context</a> has allocated a session
+ storage area for that <a href="#domain0">document's domain</a>. If it has
+ not, a new storage area for that document's domain must be created.
+
+ <p>The <code><a href="#storage2">Storage</a></code> object for the
+ document's associated <code><a href="#window">Window</a></code> object's
+ <code title=dom-sessionStorage><a
+ href="#sessionstorage">sessionStorage</a></code> attribute must then be
+ associated with the domain's session storage area.
+
+ <p>When a new <a href="#top-level">top-level browsing context</a> is
+ created by cloning an existing <a href="#browsing0">browsing context</a>,
+ the new browsing context must start with the same session storage areas as
+ the original, but the two sets must from that point on be considered
+ separate, not affecting each other in any way.
+
+ <p>When a new <a href="#top-level">top-level browsing context</a> is
+ created by a script in an existing <a href="#browsing0">browsing
+ context</a>, or by the user following a link in an existing browsing
+ context, or in some other way related to a specific <code><a
+ href="#htmldocument">HTMLDocument</a></code>, then, if the new context's
+ first <code><a href="#htmldocument">HTMLDocument</a></code> has the same
+ <a href="#domain0" title="document's domain">domain</a> as the <code><a
+ href="#htmldocument">HTMLDocument</a></code> from which the new context
+ was created, the new browsing context must start with a single session
+ storage area. That storage area must be a copy of that domain's session
+ storage area in the original browsing context, which from that point on
+ must be considered separate, with the two storage areas not affecting each
+ other in any way.</p>
+ <!-- XXX define the case for window.open() -->
+
+ <p id=sessionStorageEvent>When the <code title=dom-Storage-setItem><a
+ href="#setitem">setItem()</a></code> method is called on a <code><a
+ href="#storage2">Storage</a></code> object <var title="">x</var> that is
+ associated with a session storage area, then, if the method does not raise
+ a <a href="#security8">security exception</a>, in every <code><a
+ href="#htmldocument">HTMLDocument</a></code> object whose <code><a
+ href="#window">Window</a></code> object's <code
+ title=dom-sessionStorage><a
+ href="#sessionstorage">sessionStorage</a></code> attribute's <code><a
+ href="#storage2">Storage</a></code> object is associated with the same
+ storage area, other than <var title="">x</var>, a <code
+ title=event-storage><a href="#storage3">storage</a></code> event must be
+ fired, as <a href="#storage3" title=event-storage>described below</a>.
+
+ <h4 id=the-globalstorage><span class=secno>4.11.5. </span>The <code
+ title=dom-globalStorage><a href="#globalstorage">globalStorage</a></code>
+ attribute</h4>
+
+ <pre class=idl>interface <dfn id=storagelist>StorageList</dfn> {
+ <a href="#storage2">Storage</a> <a href="#nameditem2" title=dom-Storagelist-namedItem>namedItem</a>(in DOMString domain);
+};</pre>
+
+ <p>The <dfn id=globalstorage
+ title=dom-globalStorage><code>globalStorage</code></dfn> object provides a
+ <code><a href="#storage2">Storage</a></code> object for each domain.
+
+ <p>In the ECMAScript DOM binding, <code><a
+ href="#storagelist">StorageList</a></code> objects must support
+ dereferencing such that getting a property that is not a member of the
+ object (i.e. is neither a member of the <code><a
+ href="#storagelist">StorageList</a></code> interface nor of <code
+ title=dom-Object>Object</code>) must invoke the <code
+ title=dom-Storagelist-namedItem><a
+ href="#nameditem2">namedItem()</a></code> method with the property's name
+ as the argument.
+
+ <p>User agents must have a set of global storage areas, one for each
+ domain.
+
+ <p>User agents should only expire data from the global storage areas for
+ security reasons or when requested to do so by the user. User agents
+ should always avoid deleting data while a script that could access that
+ data is running. Data stored in global storage areas should be considered
+ potentially user-critical. It is expected that Web applications will use
+ the global storage areas for storing user-written documents.
+
+ <p>The <dfn id=nameditem2
+ title=dom-Storagelist-namedItem><code>namedItem(<var
+ title="">domain</var>)</code></dfn> method tries to returns a <code><a
+ href="#storage2">Storage</a></code> object associated with the given
+ domain, according to the rules that follow.
+
+ <div id=splitDomain>
+ <p>The <var title="">domain</var> must first be split into an array of
+ strings, by splitting the string at "." characters (U+002E FULL STOP). If
+ the <var title="">domain</var> argument is the empty string, then the
+ array is empty as well. If the <var title="">domain</var> argument is not
+ empty but has no dots, then the array has one item, which is equal to the
+ <var title="">domain</var> argument. If the <var title="">domain</var>
+ argument contains consecutive dots, there will be empty strings in the
+ array (e.g. the string "hello..world" becomes split into the three
+ strings "hello", "", and "world", with the middle one being the empty
+ string).</p>
+
+ <p>Each component of the array must then have the IDNA ToASCII algorithm
+ applied to it, with both the AllowUnassigned and UseSTD3ASCIIRules flags
+ set. <a href="#refsRFC3490">[RFC3490]</a> If ToASCII fails to convert one
+ of the components of the string, e.g. because it is too long or because
+ it contains invalid characters, then the user agent must raise a
+ <code>SYNTAX_ERR</code> exception. <a href="#refsDOM3CORE">[DOM3CORE]</a>
+ The components after this step consist of only US-ASCII characters.</p>
+
+ <p>The components of the array must then be converted to lowercase. Since
+ only US-ASCII is involved at this step, this only requires converting
+ characters in the range A-Z to the corresponding characters in the range
+ a-z.</p>
+ </div>
+
+ <p>The resulting array is used in a comparison with another array, as
+ described below. In addition, its components are concatenated together,
+ each part separated by a dot (U+002E), to form the <dfn
+ id=normalised0>normalised requested domain</dfn>.
+
+ <p class=example>If the original <var title="">domain</var> was
+ "&Aring;sg&aring;rd.Example.Com", then the resulting array would have the
+ three items "xn--sgrd-poac", "example", and "com", and the normalised
+ requested domain would be "xn--sgrd-poac.example.com".
+
+ <p>Next, the domain part of the tuple forming the calling script's <a
+ href="#origin0">origin</a> is processed to find if it is allowed to access
+ the requested domain.
+
+ <p>If the script's origin has no domain part, e.g. if only the server's IP
+ address is known, and the <a href="#normalised0">normalised requested
+ domain</a> is not the empty string, then the user agent must raise a <a
+ href="#security8">security exception</a>.
+
+ <p class=note>If the <a href="#normalised0">normalised requested domain</a>
+ is the empty string, then the rest of this algorithm can be skipped. This
+ is because in that situation, the comparison of the two arrays below will
+ always find them to be the same &mdash; the first array in such a
+ situation is also empty and so permission to access that storage area will
+ always be given.
+
+ <p>If the domain part of the script's origin contains no dots (U+002E) then
+ the string "<code>.localdomain</code>" must be appended to the script's
+ domain.
+
+ <p>Then, the domain part of the script's origin must be turned into an
+ array, being split, converted to ASCII, and lowercased as described for
+ the <var title="">domain</var> argument <a href="#splitDomain">above</a>.
+
+ <p>Of the two arrays, the longest one must then be shortened to the length
+ of the shorter one, by dropping items from the start of the array.
+
+ <div class=example>
+ <p>If the <var title="">domain</var> argument is "www.example.com" and the
+ script origin's domain part is "example.com" then the first array will be
+ a three item array ("www", "example", "com"), and the second will be a
+ two item array ("example", "com"). The first array is therefore
+ shortened, dropping the leading parts, making both into the same array
+ ("example", "com").</p>
+ </div>
+
+ <p>If the two arrays are not component-for-component identical in literal
+ string comparisons, then the user agent must then raise a <a
+ href="#security8">security exception</a>.
+
+ <p>Otherwise, the user agent must check to see if it has allocated global
+ storage area for the <a href="#normalised0">normalised requested
+ domain</a>. If it has not, a new storage area for that domain must be
+ created.
+
+ <p>The user agent must then create a <code><a
+ href="#storage2">Storage</a></code> object associated with that domain's
+ global storage area, and return it.
+
+ <p>When the requested <var title="">domain</var> is a top level domain, or
+ the empty string, or a country-specific sub-domain like "co.uk" or
+ "ca.us", the associated global storage area is known as <dfn
+ id=public0>public storage area</dfn>
+
+ <div id=globalStorageEvent>
+ <p>The <code title=dom-Storage-setItem><a
+ href="#setitem">setItem()</a></code> method might be called on a <code><a
+ href="#storage2">Storage</a></code> object that is associated with a
+ global storage area for a domain <var title="">d</var>, created by a
+ <code><a href="#storagelist">StorageList</a></code> object associated
+ with a <code><a href="#window">Window</a></code> object <var
+ title="">x</var>. Whenever this occurs, if the method didn't raise an
+ exception, a <code title=event-storage><a
+ href="#storage3">storage</a></code> event must be fired, as described
+ below, in every <code><a href="#htmldocument">HTMLDocument</a></code>
+ object that matches the following conditions:</p>
+
+ <ul>
+ <li>Its <code><a href="#window">Window</a></code> object is not <var
+ title="">x</var>, and
+
+ <li>Its <code><a href="#window">Window</a></code> object's <code
+ title=dom-sessionStorage><a
+ href="#sessionstorage">globalStorage</a></code> attribute's <code><a
+ href="#storagelist">StorageList</a></code> object's <code
+ title=dom-Storagelist-namedItem><a
+ href="#nameditem2">namedItem()</a></code> method would not raise a <a
+ href="#security8">security exception</a> according to the rules above if
+ it was invoked with the domain <var title="">d</var>.
+ </ul>
+
+ <p>In other words, every other document that has access to that domain's
+ global storage area is notified of the change.</p>
+ </div>
+
+ <h4 id=the-storage0><span class=secno>4.11.6. </span>The <code
+ title=event-storage><a href="#storage3">storage</a></code> event</h4>
+
+ <p>The <dfn id=storage3 title=event-storage><code>storage</code></dfn>
+ event is fired in an <code><a href="#htmldocument">HTMLDocument</a></code>
+ when a storage area changes, as described in the previous two sections (<a
+ href="#sessionStorageEvent">for session storage</a>, <a
+ href="#globalStorageEvent">for global storage</a>).
+
+ <p>When this happens, a <code><a href="#storage2">storage</a></code> event
+ in no namespace, which bubbles, is not cancelable, has no default action,
+ and which uses the <code><a href="#storageevent">StorageEvent</a></code>
+ interface described below, must be fired on <a href="#the-body0">the body
+ element</a>.
+
+ <p>However, it is possible (indeed, for session storage areas, likely) that
+ the target <code><a href="#htmldocument">HTMLDocument</a></code> object is
+ not active at that time. For example, it might not be the <a
+ href="#current0">current entry</a> in the session history; user agents
+ typically stop scripts from running in pages that are in the history. In
+ such cases, the user agent must instead delay the firing of the event
+ until such time as the <code><a
+ href="#htmldocument">HTMLDocument</a></code> object in question becomes
+ active again.
+
+ <p>When there are multiple delayed <code><a
+ href="#storage2">storage</a></code> events for the same <code><a
+ href="#htmldocument">HTMLDocument</a></code> object, user agents should
+ coalesce events with the same <code title=dom-Storageevent-domain><a
+ href="#domain1">domain</a></code> value (dropping duplicates).
+
+ <p>If the DOM of a page that has delayed <code><a
+ href="#storage2">storage</a></code> events queued up is <a href="#discard"
+ title=discard>discarded</a>, then the delayed events are dropped as well.
+
+ <pre class=idl>interface <dfn id=storageevent>StorageEvent</dfn> : Event {
+ readonly attribute DOMString <a href="#domain1" title=dom-StorageEvent-domain>domain</a>;
+ void <a href="#initstorageevent" title=dom-StorageEvent-initStorageEvent>initStorageEvent</a>(in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMString domainArg);
+ void <a href="#initstorageeventns" title=dom-StorageEvent-initStorageEventNS>initStorageEventNS</a>(in DOMString namespaceURIArg, in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMString domainArg);
+};</pre>
+
+ <p>The <dfn id=initstorageevent
+ title=dom-StorageEvent-initStorageEvent><code>initStorageEvent()</code></dfn>
+ and <dfn id=initstorageeventns
+ title=dom-StorageEvent-initStorageEventNS><code>initStorageEventNS()</code></dfn>
+ methods must initialise the event in a manner analogous to the
+ similarly-named methods in the DOM3 Events interfaces. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>The <dfn id=domain1
+ title=dom-StorageEvent-domain><code>domain</code></dfn> attribute of the
+ <code><a href="#storageevent">StorageEvent</a></code> event object must be
+ set to the name of the domain associated with the storage area that
+ changed if that storage area is a global storage area, or the string
+ "<code>#session</code>" if it was a session storage area.</p>
+ <!-- XXX onstorage should be defined -->
+
+ <h4 id=miscellaneous0><span class=secno>4.11.7. </span>Miscellaneous
+ implementation requirements for storage areas</h4>
+
+ <h5 id=disk-space><span class=secno>4.11.7.1. </span>Disk space</h5>
+
+ <p>User agents should limit the total amount of space allowed for a domain
+ based on the domain of the page setting the value.
+
+ <p>User agents should not limit the total amount of space allowed on a
+ per-storage-area basis, otherwise a site could just store data in any
+ number of subdomains, e.g. storing up to the limit in a1.example.com,
+ a2.example.com, a3.example.com, etc, circumventing per-domain limits.
+
+ <p>User agents should consider additional quota mechanisms (for example
+ limiting the amount of space provided to a domain's subdomains as a group)
+ so that hostile authors can't run scripts from multiple subdomains all
+ adding data to the global storage area in an attempted denial-of-service
+ attack.
+
+ <p>User agents may prompt the user when per-domain space quotas are
+ reached, allowing the user to grant a site more space. This enables sites
+ to store many user-created documents on the user's computer, for instance.
+
+ <p>User agents should allow users to see how much space each domain is
+ using.
+
+ <p>If the storage area space limit is reached during a <code
+ title=dom-Storage-setItem><a href="#setitem">setItem()</a></code> call,
+ the user agent should raise an exception.</p>
+ <!-- XXX which one? -->
+
+ <p>A mostly arbitrary limit of five megabytes per domain is recommended.
+ Implementation feedback is welcome and will be used to update this
+ suggestion in future.
+
+ <h5 id=threads0><span class=secno>4.11.7.2. </span>Threads</h5>
+
+ <p>Multiple browsing contexts must be able to access the global storage
+ areas simultaneously in a predictable manner. Scripts must not be able to
+ detect any concurrent script execution.
+
+ <p>This is required to guarentee that the <code title=dom-Storage-length><a
+ href="#length7">length</a></code> attribute of a <code><a
+ href="#storage2">Storage</a></code> object never changes while a script is
+ executing, other than in a way that is predictable by the script itself.
+
+ <p>There are various ways of implementing this requirement. One is that if
+ a script running in one browsing context accesses a global storage area,
+ the UA blocks scripts in other browsing contexts when they try to access
+ <em>any</em> global storage area until the first script has executed to
+ completion. (Similarly, when a script in one browsing context accesses its
+ session storage area, any scripts that have the same top level browsing
+ context and the same domain would block when accessing their session
+ storage area until the first script has executed to completion.) Another
+ (potentially more efficient but probably more complex) implementation
+ strategy is to use optimistic transactional script execution. This
+ specification does not require any particular implementation strategy, so
+ long as the requirement above is met.
+
+ <h4 id=security5><span class=secno>4.11.8. </span>Security and privacy</h4>
+
+ <h5 id=user-tracking><span class=secno>4.11.8.1. </span>User tracking</h5>
+
+ <p>A third-party advertiser (or any entity capable of getting content
+ distributed to multiple sites) could use a unique identifier stored in its
+ domain's global storage area to track a user across multiple sessions,
+ building a profile of the user's interests to allow for highly targeted
+ advertising. In conjunction with a site that is aware of the user's real
+ identity (for example an e-commerce site that requires authenticated
+ credentials), this could allow oppressive groups to target individuals
+ with greater accuracy than in a world with purely anonymous Web usage.
+
+ <p>The <code title=dom-globalStorage><a
+ href="#globalstorage">globalStorage</a></code> object also introduces a
+ way for sites to cooperate to track users over multiple domains, by
+ storing identifying data in "<a href="#public0" title="public storage
+ area">public</a>" top-level domain storage area, accessible by any domain.
+
+ <p>There are a number of techniques that can be used to mitigate the risk
+ of user tracking:
+
+ <ul>
+ <li>
+ <p>Blocking third-party storage: user agents may restrict access to the
+ <code title=dom-globalStorage><a
+ href="#globalstorage">globalStorage</a></code> object to scripts
+ originating at the domain of the top-level document of the <a
+ href="#browsing0">browsing context</a>.</p>
+
+ <p>This blocks a third-party site from using its private storage area for
+ tracking a user, but top-level sites could still cooperate with third
+ parties to perferm user tracking by using the "<a href="#public0"
+ title="public storage area">public</a>" storage area.</p>
+
+ <li>
+ <p>Expiring stored data: user agents may automatically delete stored data
+ after a period of time.</p>
+
+ <p>For example, a user agent could treat the global storage area as
+ session-only storage, deleting the data once the user had closed all the
+ <span>browsing contexts</span> that could access it.</p>
+
+ <p>This can restrict the ability of a site to track a user, as the site
+ would then only be able to track the user across multiple sessions when
+ he authenticates with the site itself (e.g. by making a purchase or
+ logging in to a service).</p>
+ <!-- XXX should there be an explicit way for sites to state when
+ data should expire? as in
+ globalStorage['example.com'].expireData(365); ? -->
+
+
+ <li>
+ <p>Blocking access to the top-level domain ("<a href="#public0"
+ title="public storage area">public</a>") storage areas: user agents may
+ prevent domains from storing data in and reading data from the top-level
+ domain entries in the <code title=dom-globalStorage><a
+ href="#globalstorage">globalStorage</a></code> object.</p>
+
+ <p>In practice this requires a detailed list of all the "public"
+ second-level (and third-level) domains. For example, content at the
+ domain <code>www.example.com</code> would be allowed to access
+ <code>example.com</code> data but not <code>com</code> data; content at
+ the domain <code>example.co.uk</code> would be allowed access to
+ <code>example.co.uk</code> but not <code>co.uk</code> or
+ <code>uk</code>; and content at <code>example.chiyoda.tokyo.jp</code>
+ would be allowed access to <code>example.chiyoda.tokyo.jp</code> but not
+ <code>chiyoda.tokyo.jp</code>, <code>tokyo.jp</code>, or
+ <code>jp</code>, while content at <code>example.metro.tokyo.jp</code>
+ would be allowed access to both <code>example.metro.tokyo.jp</code> and
+ <code>metro.tokyo.jp</code> but not <code>tokyo.jp</code> or
+ <code>jp</code>. The problem is even more convoluted when one considers
+ private domains with third-party subdomains such as
+ <code>dyndns.org</code> or <code>uk.com</code>.</p>
+
+ <p>Blocking access to the "<a href="#public0" title="public storage
+ area">public</a>" storage areas can also prevent innocent sites from
+ cooperating to provide services beneficial to the user.</p>
+
+ <li>
+ <p>Treating persistent storage as cookies: user agents may present the
+ persistent storage feature to the user in a way that does not
+ distinguish it from HTTP session cookies. <a
+ href="#refsRFC2965">[RFC2965]</a></p>
+
+ <p>This might encourage users to view persistent storage with healthy
+ suspicion.</p>
+
+ <li>
+ <p>Site-specific white-listing of access to "<a href="#public0"
+ title="public storage area">public</a>" storage area: user agents may
+ allow sites to access persistent storage for their own domain and
+ subdomains in an unrestricted manner, but require the user to authorise
+ access to the storage area of higher-level domains.</p>
+
+ <p>For example, code at <code>example.com</code> would be always allowed
+ to read and write data for <code>www.example.com</code> and
+ <code>example.com</code>, but if it tried to access <code>com</code>,
+ the user agent could display a non-modal message informing the user that
+ the page requested access to <code>com</code> and offering to allow it.</p>
+
+ <li>
+ <p>Origin-tracking of persistent storage data: user agents may record the
+ domain of the script that caused data to be stored.</p>
+
+ <p>If this information is then used to present the view of data currently
+ in persistent storage, it would allow the user to make informed
+ decisions about which parts of the persistent storage to prune. Combined
+ with a blacklist ("delete this data and prevent this domain from ever
+ storing data again"), the user can restrict the use of persistent
+ storage to sites that he trusts.</p>
+
+ <li>
+ <p>Shared blacklists: user agents may allow users to share their
+ persistent storage domain blacklists.</p>
+
+ <p>This would allow communities to act together to protect their privacy.</p>
+ </ul>
+
+ <p>While these suggestions prevent trivial use of this API for user
+ tracking, they do not block it altogether. Within a single domain, a site
+ can continue to track the user across multiple sessions, and can then pass
+ all this information to the third party along with any identifying
+ information (names, credit card numbers, addresses) obtained by the site.
+ If a third party cooperates with multiple sites to obtain such
+ information, a profile can still be created.
+
+ <p>However, user tracking is to some extent possible even with no
+ cooperation from the user agent whatsoever, for instance by using session
+ identifiers in URIs, a technique already commonly used for innocuous
+ purposes but easily repurposed for user tracking (even retroactively).
+ This information can then be shared with other sites, using using
+ visitors' IP addresses and other user-specific data (e.g. user-agent
+ headers and configuration settings) to combine separate sessions into
+ coherent user profiles.
+
+ <h5 id=cookie><span class=secno>4.11.8.2. </span>Cookie resurrection</h5>
+
+ <p>If the user interface for persistent storage presents data in the
+ persistent storage feature separately from data in HTTP session cookies,
+ then users are likely to delete data in one and not the other. This would
+ allow sites to use the two features as redundant backup for each other,
+ defeating a user's attempts to protect his privacy.
+
+ <h5 id=integrity><span class=secno>4.11.8.3. </span>Integrity of "public"
+ storage areas</h5>
+
+ <p>Since the "<a href="#public0" title="public storage area">public</a>"
+ global storage areas are accessible by content from many different
+ parties, it is possible for third-party sites to delete or change
+ information stored in those areas in ways that the originating sites may
+ not expect.
+
+ <p>Authors must not use the "<a href="#public0" title="public storage
+ area">public</a>" global storage areas for storing sensitive data. Authors
+ must not trust information stored in "<a href="#public0" title="public
+ storage area">public</a>" global storage areas.
+
+ <h5 id=cross-protocol><span class=secno>4.11.8.4. </span>Cross-protocol and
+ cross-port attacks</h5>
+
+ <p>This API makes no distinction between content served over HTTP, FTP, or
+ other host-based protocols, and does not distinguish between content
+ served from different ports at the same host.
+
+ <p>Thus, for example, data stored in the global persistent storage for
+ domain "www.example.com" by a page served from HTTP port 80 will be
+ available to a page served in <code>http://example.com:18080/</code>, even
+ if the latter is an experimental server under the control of a different
+ user.
+
+ <p>Since the data is not sent over the wire by the user agent, this is not
+ a security risk in its own right. However, authors must take proper steps
+ to ensure that all hosts that have fully qualified host names that are
+ subsets of hosts dealing with sensitive information are as secure as the
+ originating hosts themselves.
+
+ <p>Similarly, authors must ensure that all Web servers on a host,
+ regardless of the port, are equally trusted if any of them are to use
+ persistent storage. For instance, if a Web server runs a production
+ service that makes use of the persistent storage feature, then other users
+ that have access to that machine and that can run a Web server on another
+ port will be able to access the persistent storage added by the production
+ service (assuming they can trick a user into visiting their page).
+
+ <p>However, if one is able to trick users into visiting a Web server with
+ the same host name but on a different port as a production service used by
+ these users, then one could just as easily fake the look of the site and
+ thus trick users into authenticating with the fake site directly,
+ forwarding the request to the real site and stealing the credentials in
+ the process. Thus, the persistent storage feature is considered to only
+ minimally increase the risk involved.
+
+ <p class=big-issue>What about if someone is able to get a server up on a
+ port, and can then send people to that URI? They could steal all the data
+ with no further interaction. How about putting the port number at the end
+ of the string being compared? (Implicitly.)
+
+ <h5 id=dns-spoofing><span class=secno>4.11.8.5. </span>DNS spoofing attacks</h5>
+
+ <p>Because of the potential for DNS spoofing attacks, one cannot guarentee
+ that a host claiming to be in a certain domain really is from that domain.
+ The <code title=dom-StorageItem-secure><a href="#secure">secure</a></code>
+ attribute is provided to mark certain key/value pairs as only being
+ accessible to pages that have been authenticated using secure certificates
+ (or similar mechanisms).
+
+ <p>Authors must ensure that they do not mark sensitive items as "safe for
+ both secure and insecure content". (To prevent the risk of a race
+ condition, data stored by scripts in secure contexts default to being
+ marked as "safe only for secure content".)
+
+ <h5 id=cross-directory><span class=secno>4.11.8.6. </span>Cross-directory
+ attacks</h5>
+
+ <p>Different authors sharing one host name, for example users hosting
+ content on <code>geocities.com</code>, all share one persistent storage
+ object. There is no feature to restrict the access by pathname. Authors on
+ shared hosts are therefore recommended to avoid using the persistent
+ storage feature, as it would be trivial for other authors to read from and
+ write to the same storage area.
+
+ <p class=note>Even if a path-restriction feature was made available, the
+ usual DOM scripting security model would make it trivial to bypass this
+ protection and access the data from any path.
+
+ <h5 id=public><span class=secno>4.11.8.7. </span>Public storage areas
+ corresponding to hosts</h5>
+
+ <p>If a "<a href="#public0" title="public storage area">public</a>" global
+ storage area corresponds to a host, as it typically does if for private
+ domains with third-party subdomains such as dyndns.org or uk.com, the host
+ corresponding to the "public" domain has access to all the storage areas
+ of its third-party subdomains. In general, authors are discouraged from
+ using the <code title=dom-globalStorage><a
+ href="#globalstorage">globalStorage</a></code> API for sensitive data
+ unless the operators of all the domains involved are trusted.
+
+ <p>User agents may mitigate this problem by preventing hosts corresponding
+ to "<a href="#public0" title="public storage area">public</a>" global
+ storage areas from accessing any storage areas other than their own.
+
+ <h5 id=storage0><span class=secno>4.11.8.8. </span>Storage areas in the
+ face of untrusted higher-level domains that do not correspond to public
+ storage areas</h5>
+
+ <p>Authors should not store sensitive data using the global storage APIs if
+ there are hosts with fully-qualified domain names that are subsets of
+ their own which they do not trust. For example, an author at
+ <code>finance.members.example.net</code> should not store sensitive
+ financial user data in the <code>finance.members.example.net</code>
+ storage area if he does not trust the host that runs
+ <code>example.net</code>.
+
+ <h5 id=storage1><span class=secno>4.11.8.9. </span>Storage areas in the
+ face of untrusted subdomains</h5>
+
+ <p>If an author publishing content on one host, e.g.
+ <code>example.com</code>, wishes to use the <code
+ title=dom-globalStorage><a href="#globalstorage">globalStorage</a></code>
+ API but does not wish any content on the host's subdomains to access the
+ data, the author should use an otherwise non-existent subdomain name,
+ e.g., <code>private.example.com</code>, to store the data. This will be
+ accessible only to that host (and its parent domains), and not to any of
+ the real subdomains (e.g. <code>upload.example.com</code>).
+
+ <h5 id=implementation><span class=secno>4.11.8.10. </span>Implementation
+ risks</h5>
+
+ <p>The two primary risks when implementing this persistent storage feature
+ are letting hostile sites read information from other domains, and letting
+ hostile sites write information that is then read from other domains.
+
+ <p>Letting third-party sites read data that is not supposed to be read from
+ their domain causes <em>information leakage</em>, For example, a user's
+ shopping wishlist on one domain could be used by another domain for
+ targeted advertising; or a user's work-in-progress confidential documents
+ stored by a word-processing site could be examined by the site of a
+ competing company.
+
+ <p>Letting third-party sites write data to the storage areas of other
+ domains can result in <em>information spoofing</em>, which is equally
+ dangerous. For example, a hostile site could add items to a user's
+ wishlist; or a hostile site could set a user's session identifier to a
+ known ID that the hostile site can then use to track the user's actions on
+ the victim site.
+
+ <p>A risk is also presented by servers on local domains having host names
+ matching top-level domain names, for instance having a host called "com"
+ or "net". Such hosts might, if implementations fail to correctly implement
+ the <code>.localdomain</code> suffixing, <!-- XXX cross ref --> have full
+ access to all the data stored in a UA's persistent storage for that top
+ level domain.
+
+ <p>Thus, strictly following the model described in this specification is
+ important for user security.
+
+ <p>In addition, a number of optional restrictions related to the "<a
+ href="#public0" title="public storage area">public</a>" global storage
+ areas are suggested in the previous sections. The design of this API is
+ intended to be such that not supporting these restrictions, or supporting
+ them less than perfectly, does not result in critical security problems.
+ However, implementations are still encouraged to create and maintain a
+ list of "<a href="#public0" title="public storage area">public</a>"
+ domains, and apply the restrictions described above.
+
+ <h3 id=sql><span class=secno>4.12. </span>Client-side database storage</h3>
+
+ <h4 id=introduction1><span class=secno>4.12.1. </span>Introduction</h4>
+
+ <p class=big-issue>...
+
+ <h4 id=executing><span class=secno>4.12.2. </span>Executing SQL statements</h4>
+
+ <p>Each <a href="#origin0">origin</a> must have an associated database
+ unique to that origin. An author can interact with the database using the
+ <code title=dom-executeSql><a href="#executesql">executeSql()</a></code>
+ method.
+
+ <p>When the <dfn id=executesql title=dom-executeSql><code>executeSql(<var
+ title="">sqlStatement</var>, <var
+ title="">arguments...</var>)</code></dfn> method is invoked, the user
+ agent must first interpret the first argument to the method (<var
+ title="">sqlStatement</var>) as an SQL statement, replacing any <code
+ title="">?</code> placeholders in the statement with the values given in
+ the subsequent arguments (<var title="">arguments...</var>), and must then
+ evaluate the statement as an SQL statement in the context of the database
+ associated with the <a href="#origin0">origin</a> of the <a
+ href="#active">active document</a> of the <a href="#browsing0">browsing
+ context</a> of the <code><a href="#window">Window</a></code> object on
+ which the method was called. <a href="#refsSQL">[SQL]</a>
+
+ <p>If the <code title=dom-executeSql><a
+ href="#executesql">executeSql()</a></code> method is called with a
+ different number of arguments after the statement than there are
+ placeholder <code title="">?</code> characters in the statement, then the
+ method must raise a <code>SYNTAX_ERR</code> exception.
+
+ <p>Otherwise, the method must return a <code><a
+ href="#resultset">ResultSet</a></code> object representing the result of
+ the operation.
+
+ <p>The user agent must act as if the database was hosted in an otherwise
+ completely empty environment with no resources. For example, attempts to
+ read from or write to the filesystem will fail.
+
+ <p>User agents should limit the total amount of space allowed for each
+ origin, but may prompt the user and extend the limit if a database is
+ reaching its quota. User agents should allow users to see how much space
+ each database is using.
+
+ <p>A mostly arbitrary limit of five megabytes per origin is recommended.
+ Implementation feedback is welcome and will be used to update this
+ suggestion in future.
+
+ <p>SQL inherently supports multiple concurrent connections. Authors should
+ make use of SQL's transaction features if multiple scripts are expected to
+ interact with the same database simultaneously (as could happen if the
+ same page was opened in two different <a href="#browsing0" title="browsing
+ context">browsing contexts</a>).
+
+ <p class=note>A future version of this specification may define the exact
+ SQL subset required in more detail.
+
+ <h4 id=database><span class=secno>4.12.3. </span>Database query results</h4>
+
+ <p>Calls to the <code title=dom-executeSql><a
+ href="#executesql">executeSql()</a></code> method return <code><a
+ href="#resultset">ResultSet</a></code> objects.
+
+ <pre class=idl>interface <dfn id=resultset>ResultSet</dfn> {
+ // cursor
+ readonly attribute boolean <a href="#validrow" title=dom-ResultSet-validRow>validRow</a>;
+ void <a href="#next0" title=dom-ResultSet-next>next</a>();
+
+ // current row accessors
+ readonly attribute unsigned int <a href="#length8" title=dom-ResultSet-length>length</a>;
+ DOMString <a href="#getname" title=dom-ResultSet-getName>getName</a>(in unsigned int field);
+ Object <a href="#itemfield" title=dom-ResultSet-item>item</a>(in unsigned int field);
+ Object <a href="#nameditem3" title=dom-ResultSet-namedItem>namedItem</a>(in DOMString field);
+
+ // general result accessors
+ readonly attribute int <a href="#insertid" title=dom-ResultSet-insertId>insertId</a>;
+};</pre>
+
+ <p>A <code><a href="#resultset">ResultSet</a></code> object has a cursor
+ which visits the results of a SQL statement, in the order returned.
+ Initially, the cursor must point at the first row returned by the
+ statement, if any. Once a row has been visited, it cannot be visited again
+ (the cursor cannot go backwards).
+
+ <p>The <dfn id=validrow
+ title=dom-ResultSet-validRow><code>validRow</code></dfn> attribute must
+ return return true if the <code><a href="#resultset">ResultSet</a></code>
+ object's cursor is at a row with data. If the cursor has been moved beyond
+ the last row of the results, or if there were no results for the SQL
+ statement in question, then the method must return false.
+
+ <p>The <dfn id=next0 title=dom-ResultSet-next><code>next()</code></dfn>
+ method must advance the cursor to the next row. If there are no more rows
+ it must advance the cursor past the end of the results, so that <code
+ title=dom-ResultSet-validRow><a href="#validrow">validRow</a></code> will
+ return false.
+
+ <p>Each row of the results consists of a set of fields. Each field has a
+ name and a value. The fields are ordered. The names of the fields, and
+ their order, must be the same for every row in the results.
+
+ <p>The <dfn id=length8 title=dom-ResultSet-length><code>length</code></dfn>
+ attribute must return the number of fields in each row. If the <code><a
+ href="#resultset">ResultSet</a></code> object has no results rows (i.e. if
+ the SQL statement executed did not return any results) then the attribute
+ must return zero.
+
+ <p>The <dfn id=getname title=dom-ResultSet-getName><code>getName(<var
+ title="">field</var>)</code></dfn> method must return the name of the
+ field with index <var title="">field</var>.
+
+ <p>The <dfn id=itemfield title=dom-ResultSet-item><code>item(<var
+ title="">field</var>)</code></dfn> method must return the value of the
+ field with index <var title="">field</var>. In the ECMAScript binding, the
+ object's [[Get]] method, when invoked with a numeric argument, must have
+ the same effect as calling the <code title=dom-ResultSet-item><a
+ href="#itemfield">item()</a></code> method.
+
+ <p>If the <var title="">field</var> argument of either the <code
+ title=dom-ResultSet-getName><a href="#getname">getName()</a></code> or
+ <code title=dom-ResultSet-item><a href="#itemfield">item()</a></code>
+ methods is ever less than zero or greater than or equal to the number of
+ fields in each row, or if those methods are called when the <code><a
+ href="#resultset">ResultSet</a></code> object has no results rows, the
+ methods must instead raise an <code>INDEX_SIZE_ERR</code> exception.
+
+ <p>The <dfn id=nameditem3
+ title=dom-ResultSet-namedItem><code>namedItem(<var
+ title="">field</var>)</code></dfn> method must return the value of the
+ field with the name <var title="">field</var>. If there is no field with
+ that name, the method must instead raise a <code>SYNTAX_ERR</code>
+ exception. In the ECMAScript binding, the object's [[Get]] method, when
+ invoked with a non-numeric argument, must have the same effect as calling
+ the <code title=dom-ResultSet-namedItem><a
+ href="#nameditem3">namedItem()</a></code> method.
+
+ <p>The <dfn id=insertid
+ title=dom-ResultSet-insertId><code>insertId</code></dfn> attribute must
+ return the row ID of the row that the <code><a
+ href="#resultset">ResultSet</a></code> object's SQL statement inserted
+ into the database, if the statement inserted a row. If the statement
+ inserted multiple rows, the ID of the last row must be the one returned.
+ If the statement did not insert a row, then the attribute must instead
+ raise an <code>INVALID_ACCESS_ERR</code> exception.
+
+ <h4 id=privacy><span class=secno>4.12.4. </span>Privacy</h4>
+
+ <p>In contrast with the <code title=dom-globalStorage><a
+ href="#globalstorage">globalStorage</a></code> feature, which
+ intentionally allows data to be accessed across multiple domains,
+ protocols, and ports (albeit in a controlled fashion), this database
+ feature is limited to scripts running with the same <a
+ href="#origin0">origin</a> as the database. Thus, it is expected that the
+ privacy implications be equivalent to those already present in allowing
+ scripts to communicate with their originating host.
+
+ <p>User agents are encouraged to treat data stored in databases in the same
+ way as cookies for the purposes of user interfaces, to reduce the risk of
+ using this feature for cookie resurrection.
+
+ <h4 id=security6><span class=secno>4.12.5. </span>Security</h4>
+
+ <h5 id=user-agents><span class=secno>4.12.5.1. </span>User agents</h5>
+
+ <p>User agent implementors are strongly encouraged to audit all their
+ supported SQL statements for security implications. For example, <code
+ title="">LOAD DATA INFILE</code> is likely to pose security risks and
+ there is little reason to support it.
+
+ <p>In general, it is recommended that user agents not support features that
+ control how databases are stored on disk. For example, there is little
+ reason to allow Web authors to control the character encoding used in the
+ disk representation of the data, as all data in ECMAScript is implicitly
+ UTF-16.
+
+ <h5 id=sql-injection><span class=secno>4.12.5.2. </span>SQL injection</h5>
+
+ <p>Authors are strongly recommended to make use of the <code
+ title="">?</code> placeholder feature of the <code title=dom-executeSql><a
+ href="#executesql">executeSql()</a></code> method, and to never construct
+ SQL statements on the fly.
+
+ <h2 id=editing><span class=secno>5. </span><dfn id=editing0>Editing</dfn></h2>
+
+ <p>This section describes various features that allow authors to enable
+ users to edit documents and parts of documents interactively.
+
+ <h3 id=editing-intro><span class=secno>5.1. </span>Introduction</h3>
+
+ <p><em>This section is non-normative.</em>
+
+ <p class=big-issue>Would be nice to explain how these features work
+ together.
+
+ <h3 id=contenteditable><span class=secno>5.2. </span>The <code
+ title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute</h3>
+
+ <p>The <dfn id=contenteditable0
+ title=attr-contenteditable><code>contenteditable</code></dfn> attribute is
+ a common attribute. User agents must support this attribute on all <a
+ href="#html-elements">HTML elements</a>.
+
+ <p>The <code title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute is an <a
+ href="#enumerated">enumerated attribute</a> whose keywords are the empty
+ string, <code title="">true</code>, and <code title="">false</code>. The
+ empty string and the <code title="">true</code> keyword map to the
+ <em>true</em> state. The <code title="">false</code> keyword maps to the
+ <em>false</em> state, which is also the <em>invalid value default</em>.
+ There is no <em>missing value default</em>.
+
+ <p>If an HTML element has a <code title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute set to the
+ true state, or if its nearest ancestor with the <code
+ title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute set has its
+ attribute set to the true state, or if it has no ancestors with the <code
+ title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute set but the
+ <code>Document</code> has <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> enabled, then the UA must treat
+ the element as <dfn id=editable0>editable</dfn> (as described below).
+
+ <p>Otherwise, either the HTML element has a <code
+ title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute set to the
+ false state, or its nearest ancestor with the <code
+ title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute set is not
+ <em><a href="#editable0">editable</a></em>, or it has no ancestor with the
+ <code title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code> attribute set and the
+ <code>Document</code> itself has <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> disabled, and the element is thus
+ not editable.
+
+ <p>The <dfn id=contenteditable1
+ title=dom-contentEditable><code>contentEditable</code></dfn> DOM
+ attribute, on getting, must return the string "<code
+ title="">inherit</code>" if the content attribute isn't set, "<code
+ title="">true</code>" if the attribute is set and has the true state, and
+ "<code title="">false</code>" otherwise. On setting, if the new value is
+ case-insensitively<!-- XXX ascii --> equal to the string "<code
+ title="">inherit</code>" then the content attribute must be removed, if
+ the new value is case-insensitively<!-- XXX
+ ascii --> equal to the
+ string "<code title="">true</code> then the content attribute must be set
+ to the string "<code title="">true</code>, if the new value is
+ case-insensitively<!-- XXX
+ ascii --> equal to the string "<code
+ title="">false</code> then the content attribute must be set to the string
+ "<code title="">false</code>, and otherwise the attribute setter must
+ raise a <code>SYNTAX_ERR</code> exception.
+
+ <p>If an element is <a href="#editable0">editable</a> and its parent
+ element is not, or if an element is <a href="#editable0">editable</a> and
+ it has no parent element, then the element is an <dfn id=editing1>editing
+ host</dfn>. Editable elements can be nested. User agents must make editing
+ hosts focusable (which typicially means they enter the <span
+ title=tabindex>tab order</span>). An editing host can contain non-editable
+ sections, these are handled as described below. An editing host can
+ contain non-editable sections that contain further editing hosts.
+
+ <p>When an editing host has focus, it must have a <dfn id=caret>caret
+ position</dfn> that specifies where the current editing position is. It
+ may also have a <a href="#a-selection" title="the
+ selection">selection</a>.</p>
+ <!--- XXX xref to later section -->
+
+ <p class=note>How the caret and selection are represented depends entirely
+ on the UA.</p>
+ <!-- XXX rendering requirement: The current caret should affect the
+ line-height (i.e. it acts at least like an empty inline element) -->
+ <!-- XXX document.designMode attribute -->
+
+ <h4 id=user-editing><span class=secno>5.2.1. </span>User editing actions</h4>
+
+ <p>There are several actions that the user agent should allow the user to
+ perform while the user is interacting with an editing host. How exactly
+ each action is triggered is not defined for every action, but when it is
+ not defined, suggested key bindings are provided to guide implementors.
+
+ <dl>
+ <dt>Move the caret
+
+ <dd>
+ <p>User agents must allow users to move the caret to any position within
+ an editing host, even into nested editable elements. This could be
+ triggered as the default action of <code
+ title=event-keydown>keydown</code> events with various key identifiers
+ and as the default action of <code
+ title=event-mousedown>mouseydown</code> events.
+
+ <dt>Change the selection
+
+ <dd>
+ <p>User agents must allow users to change <a href="#a-selection">the
+ selection</a> within an editing host, even into nested editable
+ elements. This could be triggered as the default action of <code
+ title=event-keydown>keydown</code> events with various key identifiers
+ and as the default action of <code
+ title=event-mousedown>mouseydown</code> events.
+
+ <dt id=contenteditable-insertText>Insert text
+
+ <dd>
+ <p>This action must be triggered as the default action of a <code
+ title=event-textInput>textInput</code> event, and may be triggered by
+ other commands as well. It must cause the user agent to insert the
+ specified text (given by the event object's <code title="">data</code>
+ attribute in the case of the <code
+ title=event-textInput>textInput</code> event) at the caret.</p>
+
+ <p>If the caret is positioned somewhere where <a
+ href="#inline-level0">inline-level content</a> is not allowed (e.g.
+ because the element accepts "both block-level and inline-level content
+ but not both", and the element already contains block-level content),
+ then the user agent must not insert the text directly at the caret
+ position. In such cases the behaviour is UA-dependent, but user agents
+ must not, in response to a request to insert text, generate a DOM that
+ is less conformant than the DOM prior to the request.</p>
+
+ <p>User agents should allow users to insert new paragraphs into elements
+ that only contain block-level content.</p>
+
+ <div class=example>
+ <p>For example, given the markup:</p>
+
+ <pre>&lt;section&gt;
+ &lt;dl&gt;
+ &lt;dt&gt; Ben &lt;/dt&gt;
+ &lt;dd&gt; Goat &lt;/dd&gt;
+ &lt;/dl&gt;
+&lt;/section&gt;</pre>
+
+ <p>...the user agent should allow the user to insert <code><a
+ href="#p">p</a></code> elements before and after the <code><a
+ href="#dl">dl</a></code> element, as children of the <code><a
+ href="#section">section</a></code> element.</p>
+ </div>
+
+ <dt id=contenteditable-breakBlock>Break block
+
+ <dd>
+ <p>UAs should offer a way for the user to request that the current block
+ be broken at the caret, e.g. as the default action of a <code
+ title=event-keydown>keydown</code> event whose identifier is the "Enter"
+ key and that has no modifiers set.</p>
+
+ <p>The exact behaviour is UA-dependent, but user agents must not, in
+ response to a request to break a block, generate a DOM that is less
+ conformant than the DOM prior to the request.
+
+ <dt id=contenteditable-br>Insert a line separator
+
+ <dd>
+ <p>UAs should offer a way for the user to request an explicit line break
+ at the caret position without breaking the block, e.g. as the default
+ action of a <code title=event-keydown>keydown</code> event whose
+ identifier is the "Enter" key and that has a shift modifier set. Line
+ separators are typically found within a poem verse or an address. To
+ insert a line break, the user agent must insert a <code><a
+ href="#br">br</a></code> element.</p>
+
+ <p>If the caret is positioned somewhere where <a
+ href="#inline-level0">inline-level content</a> is not allowed (e.g.
+ because the element accepts "both block-level and inline-level content
+ but not both", and the element already contains block-level content),
+ then the user agent must not insert the <code><a
+ href="#br">br</a></code> element directly at the caret position. In such
+ cases the behaviour is UA-dependent, but user agents must not, in
+ response to a request to insert a line separator, generate a DOM that is
+ less conformant than the DOM prior to the request.
+
+ <dt id=contenteditable-delete>Delete
+
+ <dd>
+ <p>UAs should offer a way for the user to delete text and elements, e.g.
+ as the default action of <code title=event-keydown>keydown</code> events
+ whose identifiers are "U+0008" or "U+007F".</p>
+
+ <p>Five edge cases in particular need to be considered carefully when
+ implementing this feature: backspacing at the start of an element,
+ backspacing when the caret is immediately after an element,
+ forward-deleting at the end of an element, forward-deleting when the
+ caret is immediately before an element, and deleting a <a
+ href="#a-selection" title="the selection">selection</a> whose start and
+ end points do not share a common parent node.</p>
+
+ <p>In any case, the exact behaviour is UA-dependent, but user agents must
+ not, in response to a request to delete text or an element, generate a
+ DOM that is less conformant than the DOM prior to the request.
+
+ <dt id=contenteditable-wrapSemantic>Insert, and wrap text in, semantic
+ elements
+
+ <dd>
+ <p>UAs should offer a way for the user to mark text as having <a
+ href="#em" title=em>stress emphasis</a> and as being <a href="#strong"
+ title=strong>important</a>, and may offer the user the ability to mark
+ text and blocks with other semantics.</p>
+
+ <p>UAs should similarly offer a way for the user to insert empty semantic
+ elements (such as, again, <code><a href="#em">em</a></code>, <code><a
+ href="#strong">strong</a></code>, and others) to subsequently fill by
+ entering text manually.</p>
+
+ <p>UAs should also offer a way to remove those semantics from marked up
+ text, and to remove empty semantic element that have been inserted.</p>
+
+ <p>The exact behaviour is UA-dependent, but user agents must not, in
+ response to a request to wrap semantics around some text or to insert or
+ remove a semantic element, generate a DOM that is less conformant than
+ the DOM prior to the request.
+
+ <dt>Select and move non-editable elements nested inside editing hosts
+
+ <dd>
+ <p>UAs should offer a way for the user to move images and other
+ non-editable parts around the content within an editing host. This may
+ be done using the <a href="#drag-and">drag and drop</a> mechanism. User
+ agents must not, in response to a request to move non-editable elements
+ nested inside editing hosts, generate a DOM that is less conformant than
+ the DOM prior to the request.
+
+ <dt>Edit form controls nested inside editing hosts
+
+ <dd>
+ <p>When an <a href="#editable0">editable</a> form control is edited, the
+ changes must be reflected in both its current value <em>and</em> its
+ default value. For <code>input</code> elements this means updating the
+ <code title=dom-input-defaultValue>defaultValue</code> DOM attribute as
+ well as the <code title=dom-input-value>value</code> DOM attribute; for
+ <code>select</code> elements it means updating the <code>option</code>
+ elements' <code title=dom-option-defaultSelected>defaultSelected</code>
+ DOM attribute as well as the <code
+ title=dom-option-selected>selected</code> DOM attribute; for
+ <code>textarea</code> elements this means updating the <code
+ title=dom-textarea-defaultValue>defaultValue</code> DOM attribute as
+ well as the <code title=dom-textarea-value>value</code> DOM attribute.
+ (Updating the <code title="">default*</code> DOM attributes causes
+ content attributes to be updated as well.)
+ </dd>
+ <!-- XXX something about not supporting resizing? -->
+ </dl>
+ <!-- XXX each action performed should be added to the undo history -->
+
+ <p>User agents may perform several commands per user request; for example
+ if the user selects a block of text and hits <kbd><kbd>Enter</kbd></kbd>,
+ the UA might interpret that as a request to delete the content of <a
+ href="#a-selection">the selection</a> followed by a request to break the
+ block at that position.
+
+ <h4 id=making><span class=secno>5.2.2. </span>Making entire documents
+ editable</h4>
+
+ <p>Documents have a <dfn id=designMode
+ title=dom-document-designMode><code>designMode</code></dfn>, which can be
+ either enabled or disabled.
+
+ <p>The <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> DOM attribute on the
+ <code>Document</code> object takes takes two values, "<code
+ title="">on</code>" and "<code title="">off</code>". When it is set, the
+ new value must be case-insensitively <!-- XXX ASCII case-folding -->
+ compared to these two values. If it matches the "<code title="">on</code>"
+ value, then <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> must be enabled, and if it
+ matches the "<code title="">off</code>" value, then <code
+ title=dom-document-designMode><a href="#designMode">designMode</a></code>
+ must be disabled. Other values must be ignored.
+
+ <p>When <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> is enabled, the DOM attribute
+ must return the value "<code title="">on</code>", and when it is disabled,
+ it must return the value "<code title="">off</code>".
+
+ <p>The last state set must persist until the document is destroyed or the
+ state is changed. Initially, documents must have their <code
+ title=dom-document-designMode><a href="#designMode">designMode</a></code>
+ disabled.
+
+ <p>Enabling <code title=dom-document-designMode><a
+ href="#designMode">designMode</a></code> causes scripts in general to be
+ disabled and the document to become editable.
+
+ <p>When the <code>Document</code> has <code
+ title=dom-document-designMode><a href="#designMode">designMode</a></code>
+ enabled, event listeners registered on the document or any elements owned
+ by the document must do nothing.
+
+ <h3 id=dnd><span class=secno>5.3. </span><dfn id=drag-and>Drag and
+ drop</dfn></h3>
+ <!--XXX
+
+http://msdn.microsoft.com/workshop/author/datatransfer/overview.asp
+http://msdn.microsoft.com/workshop/author/dhtml/reference/objects/clipboarddata.asp
+
+> To implement this with simple interface I've proposed, events should be
+> handled either by existing elements (like list items that compare their size
+> and position of dragged element to decide whether element should be dropped
+> before or after) or handled by container that would probably need to calculate
+> positions of it's children and create new element to show drop target. Smooth
+> Mac-like drag'n'drop can be implemented by animating drop target's
+> padding/margin. So that's quite a bit of code that's going to be reinvented
+> each time someone implements reordering.
+
+<hyatt> :droptarget
+<hyatt> or something
+<hyatt> we don't support a pseudo-class for the drop target but that's a great idea
+<Hixie_> yeah, thinking about that too
+<Hixie_> :drop-target, :drop-target(above), :drop-target(below) and having ondragover be able to say "not on me, but next to me maybe"
+
+http://msdn.microsoft.com/workshop/author/dhtml/reference/events/ondragstart.asp
+http://msdn.microsoft.com/workshop/author/dhtml/reference/events/ondrag.asp
+http://msdn.microsoft.com/workshop/author/dhtml/reference/events/ondragend.asp
+http://msdn.microsoft.com/workshop/author/dhtml/reference/objects/obj_datatransfer.asp
+http://developer.apple.com/documentation/AppleApplications/Conceptual/SafariJSProgTopics/Tasks/DragAndDrop.html
+-->
+
+ <p>This section defines an event-based drag-and-drop mechanism.
+
+ <p>This specification does not define exactly what a <em>drag-and-drop
+ operation</em> actually is.
+
+ <p>On a visual medium with a pointing device, a drag operation could be the
+ default action of a <code title=event-mousedown>mousedown</code> event
+ that is followed by a series of <code
+ title=event-mousemove>mousemove</code> events, and the drop could be
+ triggered by the mouse being released.
+
+ <p>On media without a pointing device, the user would probably have to
+ explicitly indicate his intention to perform a drag-and-drop operation,
+ stating what he wishes to drag and what he wishes to drop, respectively.
+
+ <p>However it is implemented, drag-and-drop operations must have a starting
+ point (e.g. where the mouse was clicked, or the start of <a
+ href="#a-selection">the selection</a> or element that was selected for the
+ drag), may have any number of intermediate steps (elements that the mouse
+ moves over during a drag, or elements that the user picks as possible drop
+ points as he cycles through possibilities), and must either have an end
+ point (the element above which the mouse button was released, or the
+ element that was finally selected), or be canceled. The end point must be
+ the last element selected as a possible drop point before the drop occurs
+ (so if the operation is not canceled, there must be at least one element
+ in the middle step).
+
+ <h4 id=the-dragevent><span class=secno>5.3.1. </span>The <code><a
+ href="#dragevent">DragEvent</a></code> and <code><a
+ href="#datatransfer0">DataTransfer</a></code> interfaces</h4>
+
+ <p>The drag-and-drop processing model involves several events. They all use
+ the <code><a href="#dragevent">DragEvent</a></code> interface.
+
+ <pre class=idl>interface <dfn id=dragevent>DragEvent</dfn> : Event {
+ readonly attribute <a href="#datatransfer0">DataTransfer</a> <a href="#datatransfer" title=dom-DragEvent-dataTransfer>dataTransfer</a>;
+ void <a href="#initdragevent" title=dom-DragEvent-initDragEvent>initDragEvent</a>(in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg);
+ void <a href="#initdrageventns" title=dom-DragEvent-initDragEventNS>initDragEventNS</a>(in DOMString namespaceURIArg, in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg);
+};</pre>
+
+ <p>The <dfn id=initdragevent
+ title=dom-DragEvent-initDragEvent><code>initDragEvent()</code></dfn> and
+ <dfn id=initdrageventns
+ title=dom-DragEvent-initDragEventNS><code>initDragEventNS()</code></dfn>
+ methods must initialise the event in a manner analogous to the
+ similarly-named methods in the DOM3 Events interfaces. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>The <dfn id=datatransfer
+ title=dom-DragEvent-dataTransfer><code>dataTransfer</code></dfn> attribute
+ of the <code><a href="#dragevent">DragEvent</a></code> interface
+ represents the context information for the event.
+
+ <p>When a <code><a href="#dragevent">DragEvent</a></code> object is
+ created, a new <code><a href="#datatransfer0">DataTransfer</a></code>
+ object must be created and assigned to the <code
+ title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> context information field of
+ the event object.
+
+ <pre class=idl>interface <dfn id=datatransfer0>DataTransfer</dfn> {
+ attribute DOMString <a href="#dropeffect" title=dom-DataTransfer-dropEffect>dropEffect</a>;
+ attribute DOMString <a href="#effectallowed" title=dom-DataTransfer-effectAllowed>effectAllowed</a>;
+ void <a href="#cleardata" title=dom-DataTransfer-clearData>clearData</a>(in DOMString format);
+ void <a href="#setdata" title=dom-DataTransfer-setData>setData</a>(in DOMString format, in DOMString data);
+ DOMString <a href="#getdata" title=dom-DataTransfer-getData>getData</a>(in DOMString format);
+ void <a href="#setdragimage" title=dom-DataTransfer-setDragImage>setDragImage</a>(in Element image, in long x, in long y);
+ void <a href="#addelement" title=dom-DataTransfer-addElement>addElement</a>(in Element element);
+};
+</pre>
+
+ <p><code><a href="#datatransfer0">DataTransfer</a></code> objects can
+ conceptually contain various kinds of data.
+
+ <p>When a <code><a href="#dragevent">DragEvent</a></code> event object is
+ initialised, the <code><a href="#datatransfer0">DataTransfer</a></code>
+ object created for the event's <code title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> member must be initialised as
+ follows:
+
+ <ul>
+ <li>The <code><a href="#datatransfer0">DataTransfer</a></code> object must
+ initially contain no data, no elements, and have no associated image.
+
+ <li>The <code><a href="#datatransfer0">DataTransfer</a></code> object's
+ <code title=dom-DataTransfer-effectAllowed><a
+ href="#effectallowed">effectAllowed</a></code> attribute must be set to
+ "<code title="">uninitialized</code>".
+
+ <li>The <code title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code> attribute must be set to "<code
+ title="">none</code>".
+ </ul>
+
+ <p>The <dfn id=dropeffect
+ title=dom-DataTransfer-dropEffect><code>dropEffect</code></dfn> attribute
+ controls the drag-and-drop feedback that the user is given during a
+ drag-and-drop operation.
+
+ <p>The attribute must ignore any attempts to set it to a value other than
+ <code title="">none</code>, <code title="">copy</code>, <code
+ title="">link</code>, and <code title="">move</code>. On getting, the
+ attribute must return the last of those four values that it was set to.
+
+ <p>The <dfn id=effectallowed
+ title=dom-DataTransfer-effectAllowed><code>effectAllowed</code></dfn>
+ attribute is used in the drag-and-drop processing model to initialise the
+ <code title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code> attribute during the <code
+ title=event-dragenter><a href="#dragenter">dragenter</a></code> and <code
+ title=event-dragover><a href="#dragover">dragover</a></code> events.
+
+ <p>The attribute must ignore any attempts to set it to a value other than
+ <code title="">none</code>, <code title="">copy</code>, <code
+ title="">copyLink</code>, <code title="">copyMove</code>, <code
+ title="">link</code>, <code title="">linkMove</code>, <code
+ title="">move</code>, <code title="">all</code>, and <code
+ title="">uninitialized</code>. On getting, the attribute must return the
+ last of those values that it was set to.
+
+ <p><code><a href="#datatransfer0">DataTransfer</a></code> objects can hold
+ pieces of data, each associated with a unique format. Formats are
+ generally given by MIME types, with some values special-cased for legacy
+ reasons.
+
+ <p>The <dfn id=cleardata
+ title=dom-DataTransfer-clearData><code>clearData(<var
+ title="">format</var>)</code></dfn> method must clear the <code><a
+ href="#datatransfer0">DataTransfer</a></code> object of any data
+ associated with the given <var title="">format</var>. If <var
+ title="">format</var> is the value "<code title="">Text</code>", then it
+ must be treated as "<code title="">text/plain</code>". If the <var
+ title="">format</var> is "<code title="">URL</code>", then it must be
+ treated as "<code title="">text/uri-list</code>".
+
+ <p>The <dfn id=setdata title=dom-DataTransfer-setData><code>setData(<var
+ title="">format</var>, <var title="">data</var>)</code></dfn> method must
+ add <var title="">data</var> to the data stored in the <code><a
+ href="#datatransfer0">DataTransfer</a></code> object, labelled as being of
+ the type <var title="">format</var>. This must replace any previous data
+ that had been set for that format. If <var title="">format</var> is the
+ value "<code title="">Text</code>", then it must be treated as "<code
+ title="">text/plain</code>". If the <var title="">format</var> is "<code
+ title="">URL</code>", then it must be treated as "<code
+ title="">text/uri-list</code>".
+
+ <p>The <dfn id=getdata title=dom-DataTransfer-getData><code>getData(<var
+ title="">format</var>)</code></dfn> method must return the data that is
+ associated with the type <var title="">format</var>, if any, and must
+ return the empty string otherwise. If <var title="">format</var> is the
+ value "<code title="">Text</code>", then it must be treated as "<code
+ title="">text/plain</code>". If the <var title="">format</var> is "<code
+ title="">URL</code>", then the data associated with the "<code
+ title="">text/uri-list</code>" format must be parsed as appropriate for
+ <code title="">text/uri-list</code> data, and the first URI from the list
+ must be returned. If there is no data with that format, or if there is but
+ it has no URIs, then the method must return the empty string. <a
+ href="#refsRFC2483">[RFC2483]</a>
+
+ <p>The <dfn id=setdragimage
+ title=dom-DataTransfer-setDragImage><code>setDragImage(<var
+ title="">element</var>, <var title="">x</var>, <var
+ title="">y</var>)</code></dfn> method sets which element to use <a
+ href="#base-dnd-feedback">to generate the drag feedback</a>. The <var
+ title="">element</var> argument can be any <code>Element</code>; if it is
+ an <code><a href="#img">img</a></code> element, then the user agent should
+ use the element's image (at its intrinsic size) to generate the feedback,
+ otherwise the user agent should base the feedback on the given element
+ (but the exact mechanism for doing so is not specified).
+
+ <p>The <dfn id=addelement
+ title=dom-DataTransfer-addElement><code>addElement(<var
+ title="">element</var>)</code></dfn> method is an alternative way of
+ specifying how the user agent is to <a href="#base-dnd-feedback">render
+ the drag feedback</a>. It adds an element to the <code><a
+ href="#datatransfer0">DataTransfer</a></code> object.
+
+ <h4 id=events1><span class=secno>5.3.2. </span>Events fired during a
+ drag-and-drop action</h4>
+
+ <p>The following events are involved in the drag-and-drop model. Whenever
+ the processing model described below causes one of these events to be
+ fired, the event fired must use the <code><a
+ href="#dragevent">DragEvent</a></code> interface defined above, must have
+ the bubbling and cancelable behaviours given in the table below, and must
+ have the context information set up as described after the table.
+
+ <table>
+ <thead>
+ <tr>
+ <th> Event Name
+
+ <th> Target
+
+ <th> Bubbles?
+
+ <th> Cancelable?
+
+ <th> <code title=dom-DataTransfer-addElement><a
+ href="#addelement">dataTransfer</a></code>
+
+ <th> <code title=dom-DataTransfer-effectAllowed><a
+ href="#effectallowed">effectAllowed</a></code>
+
+ <th> <code title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code>
+
+ <th> Default Action
+
+ <tbody>
+ <tr>
+ <td><dfn id=dragstart title=event-dragstart><code>dragstart</code></dfn>
+
+ <td><a href="#source0">Source node</a>
+
+ <td>&#x2713; Bubbles
+
+ <td>&#x2713; Cancelable
+
+ <td>Contains <a href="#source0">source node</a> unless a selection is
+ being dragged, in which case it is empty
+
+ <td><code title="">uninitialized</code>
+
+ <td><code title="">none</code>
+
+ <td>Initiate the drag-and-drop operation
+
+ <tr>
+ <td><dfn id=drag title=event-drag><code>drag</code></dfn>
+
+ <td><a href="#source0">Source node</a>
+
+ <td>&#x2713; Bubbles
+
+ <td>&#x2713; Cancelable
+
+ <td>Empty
+
+ <td><a href="#effectAllowed-initialisation">Same as last event</a>
+
+ <td><code title="">none</code>
+
+ <td>Continue the drag-and-drop operation
+
+ <tr>
+ <td><dfn id=dragenter title=event-dragenter><code>dragenter</code></dfn>
+
+ <td><a href="#immediate">Immediate user selection</a> or <a
+ href="#the-body0">the body element</a>
+
+ <td>&#x2713; Bubbles
+
+ <td>&#x2713; Cancelable
+
+ <td>Empty
+
+ <td><a href="#effectAllowed-initialisation">Same as last event</a>
+
+ <td><a href="#dropEffect-initialisation">Based on
+ <code>effectAllowed</code> value</a>
+
+ <td>Reject <a href="#immediate">immediate user selection</a> as
+ potential <a href="#current1" title="current target element">target
+ element</a>
+
+ <tr>
+ <td><dfn id=dragleave title=event-dragleave><code>dragleave</code></dfn>
+
+ <td><a href="#current1" title="current target element">Previous target
+ element</a>
+
+ <td>&#x2713; Bubbles
+
+ <td>&mdash;
+
+ <td>Empty
+
+ <td><a href="#effectAllowed-initialisation">Same as last event</a>
+
+ <td><code title="">none</code>
+
+ <td>None
+
+ <tr>
+ <td><dfn id=dragover title=event-dragover><code>dragover</code></dfn>
+
+ <td><a href="#current1">Current target element</a>
+
+ <td>&#x2713; Bubbles
+
+ <td>&#x2713; Cancelable
+
+ <td>Empty
+
+ <td><a href="#effectAllowed-initialisation">Same as last event</a>
+
+ <td><a href="#dropEffect-initialisation">Based on
+ <code>effectAllowed</code> value</a>
+
+ <td>Reset the <a href="#current2">current drag operation</a> to "none"
+
+ <tr>
+ <td><dfn id=drop title=event-drop><code>drop</code></dfn>
+
+ <td><a href="#current1">Current target element</a>
+
+ <td>&#x2713; Bubbles
+
+ <td>&#x2713; Cancelable
+
+ <td><code>getData()</code> returns data set in <code
+ title=dom-dragstart>dragstart</code> event
+
+ <td><a href="#effectAllowed-initialisation">Same as last event</a>
+
+ <td><a href="#current2">Current drag operation</a>
+
+ <td>Varies
+
+ <tr>
+ <td><dfn id=dragend title=event-dragend><code>dragend</code></dfn>
+
+ <td><a href="#source0">Source node</a>
+
+ <td>&#x2713; Bubbles
+
+ <td>&mdash;
+
+ <td>Empty
+
+ <td><a href="#effectAllowed-initialisation">Same as last event</a>
+
+ <td><a href="#current2">Current drag operation</a>
+
+ <td>Varies
+ </table>
+
+ <p>The <code title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object's contents are empty
+ except for <code title=event-dragstart><a
+ href="#dragstart">dragstart</a></code> events and <code
+ title=event-drop><a href="#drop">drop</a></code> events, for which the
+ contents are set as described in the processing model, below.
+
+ <p id=effectAllowed-initialisation>The <code
+ title=dom-DataTransfer-effectAllowed><a
+ href="#effectallowed">effectAllowed</a></code> attribute must be set to
+ "<code title="">uninitialized</code>" for <code title=event-dragstart><a
+ href="#dragstart">dragstart</a></code> events, and to whatever value the
+ field had after the last drag-and-drop event was fired for all other
+ events (only counting events fired by the user agent for the purposes of
+ the drag-and-drop model described below).
+
+ <p id=dropEffect-initialisation>The <code
+ title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code> attribute must be set to "<code
+ title="">none</code>" for <code title=event-dragstart><a
+ href="#dragstart">dragstart</a></code>, <code title=event-drag><a
+ href="#drag">drag</a></code>, <code title=event-dragleave><a
+ href="#dragleave">dragleave</a></code>, and <code title=event-dragend><a
+ href="#dragend">dragend</a></code> events (except when stated otherwise in
+ the algorithms given in the sections below), to the value corresponding to
+ the <a href="#current2">current drag operation</a> for <code
+ title=event-drop><a href="#drop">drop</a></code> events, and to a value
+ based on the <code title=dom-DataTransfer-effectAllowed><a
+ href="#effectallowed">effectAllowed</a></code> attribute's value and to
+ the drag-and-drop source, as given by the following table, for the
+ remaining events (<code title=event-dragenter><a
+ href="#dragenter">dragenter</a></code> and <code title=event-dragover><a
+ href="#dragover">dragover</a></code>):
+
+ <table>
+ <thead>
+ <tr>
+ <th><code title=dom-DataTransfer-effectAllowed><a
+ href="#effectallowed">effectAllowed</a></code>
+
+ <th><code title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code>
+
+ <tbody>
+ <tr>
+ <td><code title="">none</code>
+
+ <td><code title="">none</code>
+
+ <tr>
+ <td><code title="">copy</code>, <code title="">copyLink</code>, <code
+ title="">copyMove</code>, <code title="">all</code>
+
+ <td><code title="">copy</code>
+
+ <tr>
+ <td><code title="">link</code>, <code title="">linkMove</code>
+
+ <td><code title="">link</code>
+
+ <tr>
+ <td><code title="">move</code>
+
+ <td><code title="">move</code>
+
+ <tr>
+ <td><code title="">uninitialized</code> when what is being dragged is a
+ selection from a text field
+
+ <td><code title="">move</code>
+
+ <tr>
+ <td><code title="">uninitialized</code> when what is being dragged is a
+ selection
+
+ <td><code title="">copy</code>
+
+ <tr>
+ <td><code title="">uninitialized</code> when what is being dragged is an
+ <code><a href="#a">a</a></code> element with an <code>href</code>
+ attribute
+
+ <td><code title="">link</code>
+
+ <tr>
+ <td>Any other case
+
+ <td><code title="">copy</code>
+ </table>
+
+ <h4 id=drag-and-drop><span class=secno>5.3.3. </span>Drag-and-drop
+ processing model</h4>
+
+ <p>When the user attempts to begin a drag operation, the user agent must
+ first determine what is being dragged. If the drag operation was invoked
+ on a selection, then it is the selection that is being dragged. Otherwise,
+ it is the first element, going up the ancestor chain, starting at the node
+ that the user tried to drag, that has the DOM attribute <code
+ title=dom-draggable><a href="#draggable0">draggable</a></code> set to
+ true. If there is no such element, then nothing is being dragged, the
+ drag-and-drop operation is never started, and the user agent must not
+ continue with this algorithm.
+
+ <p class=note><code><a href="#img">img</a></code> elements and <code><a
+ href="#a">a</a></code> elements with an <code title=attr-hyperlink-href><a
+ href="#href6">href</a></code> attribute have their <code
+ title=dom-draggable><a href="#draggable0">draggable</a></code> attribute
+ set to true by default.
+
+ <p>If the user agent determines that something can be dragged, a <code
+ title=event-dragstart><a href="#dragstart">dragstart</a></code> event must
+ then be fired.
+
+ <p>If it is a selection that is being dragged, then this event must be
+ fired on the node that the user started the drag on (typically the text
+ node that the user originally clicked). If the user did not specify a
+ particular node, for example if the user just told the user agent to begin
+ a drag of "the selection", then the event must be fired on the deepest
+ node that is a common ancestor of all parts of the selection.
+
+ <p>If it is not a selection that is being dragged, then the event must be
+ fired on the element that is being dragged.
+
+ <p>The node on which the event is fired is the <dfn id=source0>source
+ node</dfn>. Multiple events are fired on this node during the course of
+ the drag-and-drop operation.
+
+ <p>If it is a selection that is being dragged, the <code
+ title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> member of the event must be
+ created with no nodes. Otherwise, it must be created containing just the
+ <a href="#source0">source node</a>. Script can use the <code
+ title=dom-DataTransfer-addElement><a
+ href="#addelement">addElement()</a></code> method to add further elements
+ to the list of what is being dragged.
+
+ <p>If it is a selection that is being dragged, the <code
+ title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> member of the event must have
+ the text of the selection added to it as the data associated with the
+ <code title="">text/plain</code> format. Otherwise, if it is an <code><a
+ href="#img">img</a></code> element being dragged, then the value of the
+ element's <code title=dom-img-src><a href="#src0">src</a></code> DOM
+ attribute must be added, associated with the <code
+ title="">text/uri-list</code> format. Otherwise, if it is an <code><a
+ href="#a">a</a></code> element being dragged, then the value of the
+ element's <code title=dom-a-href><a href="#href3">href</a></code> DOM
+ attribute must be added, associated with the <code
+ title="">text/uri-list</code> format. Otherwise, no data is added to the
+ object by the user agent.
+
+ <p>If the event is canceled, then the drag-and-drop operation must not
+ occur; the user agent must not continue with this algorithm.
+
+ <p>If it is not canceled, then the drag-and-drop operation must be
+ initiated.
+
+ <p class=note>Since events with no event handlers registered are, almost by
+ definition, never canceled, drag-and-drop is always available to the user
+ if the author does not specifically prevent it.
+
+ <p id=base-dnd-feedback>The drag-and-drop feedback must be generated from
+ the first of the following sources that is available:
+
+ <ol>
+ <li>The element specified in the last call to the <code
+ title=dom-DataTransfer-setDragImage><a
+ href="#setdragimage">setDragImage()</a></code> method of the <code
+ title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object of the <code
+ title=event-dragstart><a href="#dragstart">dragstart</a></code> event, if
+ the method was called. In visual media, if this is used, the <var
+ title="">x</var> and <var title="">y</var> arguments that were passed to
+ that method should be used as hints for where to put the cursor relative
+ to the resulting image. The values are expressed as distances in CSS
+ pixels from the left side and from the top side of the image
+ respectively. <a href="#refsCSS21">[CSS21]</a></li>
+ <!--
+ CSS3 UNITS would be better -->
+
+ <li>The elements that were added to the <code
+ title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object, both before the
+ event was fired, and during the handling of the event using the <code
+ title=dom-DataTransfer-addElement><a
+ href="#addelement">addElement()</a></code> method, if any such elements
+ were indeed added.
+
+ <li>The selection that the user is dragging.
+ </ol>
+ <!-- XXX xref also link to the section that explains how to
+ render drag-and-drop, :drag, :drop, etc. Safari has a pseudo-class
+ that it uses to render an element off-screen to use as the drag
+ feedback. -->
+
+ <p>The user agent must take a note of <a href="#setdata"
+ title=dom-DataTransfer-setData>the data that was placed</a> in the <code
+ title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object. This data will be
+ made available again when the <code title=event-drop><a
+ href="#drop">drop</a></code> event is fired.
+
+ <p>From this point until the end of the drag-and-drop operation, device
+ input events (e.g. mouse and keyboard events) must be suppressed. In
+ addition, the user agent must track all DOM changes made during the
+ drag-and-drop operation, and add them to its <a href="#undo">undo
+ history</a> as one atomic operation once the drag-and-drop operation has
+ ended.
+
+ <p>During the drag operation, the element directly indicated by the user as
+ the drop target is called the <dfn id=immediate>immediate user
+ selection</dfn>. (Only elements can be selected by the user; other nodes
+ must not be made available as drop targets.) However, the <a
+ href="#immediate">immediate user selection</a> is not necessarily the <dfn
+ id=current1>current target element</dfn>, which is the element currently
+ selected for the drop part of the drag-and-drop operation. The <a
+ href="#immediate">immediate user selection</a> changes as the user selects
+ different elements (either by pointing at them with a pointing device, or
+ by selecting them in some other way). The <a href="#current1">current
+ target element</a> changes when the <a href="#immediate">immediate user
+ selection</a> changes, based on the results of event handlers in the
+ document, as described below.
+
+ <p>Both the <a href="#current1">current target element</a> and the <a
+ href="#immediate">immediate user selection</a> can be null, which means no
+ target element is selected. They can also both be elements in other
+ (DOM-based) documents, or other (non-Web) programs altogether. (For
+ example, a user could drag text to a word-processor.) The <a
+ href="#current1">current target element</a> is initially null.
+
+ <p>In addition, there is also a <dfn id=current2>current drag
+ operation</dfn>, which can take on the values "none", "copy", "link", and
+ "move". Initially it has the value "none". It is updated by the user agent
+ as described in the steps below.
+
+ <p>User agents must, every 350ms (&#xB1;200ms), perform the following steps
+ in sequence. (If the user agent is still performing the previous iteration
+ of the sequence when the next iteration becomes due, the user agent must
+ not execute the overdue iteration, effectively "skipping missed frames" of
+ the drag-and-drop operation.)
+
+ <ol>
+ <li>
+ <p>First, the user agent must fire a <code title=event-drag><a
+ href="#drag">drag</a></code> event at the <a href="#source0">source
+ node</a>. If this event is canceled, the user agent must set the <a
+ href="#current2">current drag operation</a> to none (no drag operation).</p>
+
+ <li>
+ <p>Next, if the <code title=event-drag><a href="#drag">drag</a></code>
+ event was not canceled and the user has not ended the drag-and-drop
+ operation, the user agent must check the state of the drag-and-drop
+ operation, as follows:</p>
+
+ <ol>
+ <li>
+ <p>First, if the user is indicating a different <a
+ href="#immediate">immediate user selection</a> than during the last
+ iteration (or if this is the first iteration), and if this <a
+ href="#immediate">immediate user selection</a> is not the same as the
+ <a href="#current1">current target element</a>, then the <a
+ href="#current1">current target element</a> must be updated, as
+ follows:</p>
+
+ <ol>
+ <li>
+ <p>If the new <a href="#immediate">immediate user selection</a> is
+ null, or is in a non-DOM document or application, then set the <a
+ href="#current1">current target element</a> to the same value.</p>
+
+ <li>
+ <p>Otherwise, the user agent must fire a <code
+ title=event-dragenter><a href="#dragenter">dragenter</a></code>
+ event at the <a href="#immediate">immediate user selection</a>.</p>
+
+ <li>
+ <p>If the event is canceled, then the <a href="#current1">current
+ target element</a> must be set to the <a href="#immediate">immediate
+ user selection</a>.</p>
+
+ <li>
+ <p>Otherwise, if the <a href="#current1">current target element</a>
+ is not <a href="#the-body0">the body element</a>, the user agent
+ must fire a <code title=event-dragenter><a
+ href="#dragenter">dragenter</a></code> event at <a
+ href="#the-body0">the body element</a>, and the <a
+ href="#current1">current target element</a> must be set to <a
+ href="#the-body0">the body element</a>, regardless of whether that
+ event was canceled or not. (If <a href="#the-body0">the body
+ element</a> is null, then the <a href="#current1">current target
+ element</a> would be set to null too in this case, it wouldn't be
+ set to the <code>Document</code> object.)</p>
+ </ol>
+
+ <li>
+ <p>If the previous step caused the <a href="#current1">current target
+ element</a> to change, and if the previous target element was not null
+ or a part of a non-DOM document, the user agent must fire a <code
+ title=event-dragleave><a href="#dragleave">dragleave</a></code> event
+ at the previous target element.</p>
+
+ <li>
+ <p>If the <a href="#current1">current target element</a> is a DOM
+ element, the user agent must fire a <code title=event-dragover><a
+ href="#dragover">dragover</a></code> event at this <a
+ href="#current1">current target element</a>.</p>
+
+ <p>If the <code title=event-dragover><a
+ href="#dragover">dragover</a></code> event is canceled, the <a
+ href="#current2">current drag operation</a> must be reset to "none".</p>
+
+ <p>Otherwise, the <a href="#current2">current drag operation</a> must
+ be set based on the values the <code
+ title=dom-DataTransfer-effectAllowed><a
+ href="#effectallowed">effectAllowed</a></code> and <code
+ title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code> attributes of the <code
+ title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object had after the
+ event was handled, as per the following table:</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th><code title=dom-DataTransfer-effectAllowed><a
+ href="#effectallowed">effectAllowed</a></code>
+
+ <th><code title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code>
+
+ <th>Drag operation
+
+ <tbody>
+ <tr>
+ <td><code title="">uninitialized</code>, <code title="">copy</code>,
+ <code title="">copyLink</code>, <code title="">copyMove</code>, or
+ <code title="">all</code>
+
+ <td><code title="">copy</code>
+
+ <td>"copy"
+
+ <tr>
+ <td><code title="">uninitialized</code>, <code title="">link</code>,
+ <code title="">copyLink</code>, <code title="">linkMove</code>, or
+ <code title="">all</code>
+
+ <td><code title="">link</code>
+
+ <td>"link"
+
+ <tr>
+ <td><code title="">uninitialized</code>, <code title="">move</code>,
+ <code title="">copyMove</code>, <code title="">linkMove</code>, or
+ <code title="">all</code>
+
+ <td><code title="">move</code>
+
+ <td>"move"
+
+ <tr>
+ <td colspan=2>Any other case
+
+ <td>"none"
+ </table>
+
+ <p>Then, regardless of whether the <code title=event-dragover><a
+ href="#dragover">dragover</a></code> event was canceled or not, the
+ drag feedback (e.g. the mouse cursor) must be updated to match the <a
+ href="#current2">current drag operation</a>, as follows:</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Drag operation
+
+ <th>Feedback
+
+ <tbody>
+ <tr>
+ <td>"copy"
+
+ <td>Data will be copied if dropped here.
+
+ <tr>
+ <td>"link"
+
+ <td>Data will be linked if dropped here.
+
+ <tr>
+ <td>"move"
+
+ <td>Data will be moved if dropped here.
+
+ <tr>
+ <td>"none"
+
+ <td>No operation allowed, dropping here will cancel the
+ drag-and-drop operation.
+ </table>
+
+ <li>
+ <p>Otherwise, if the <a href="#current1">current target element</a> is
+ not a DOM element, the user agent must use platform-specific
+ mechanisms to determine what drag operation is being performed (none,
+ copy, link, or move). This sets the <em><a href="#current2">current
+ drag operation</a></em>.</p>
+ </ol>
+
+ <li>
+ <p>Otherwise, if the user ended the drag-and-drop operation (e.g. by
+ releasing the mouse button in a mouse-driven drag-and-drop interface),
+ or if the <code title=event-drag><a href="#drag">drag</a></code> event
+ was canceled, then this will be the last iteration. The user agent must
+ follow the following steps, then stop looping.</p>
+
+ <ol>
+ <li>
+ <p>If the <a href="#current2">current drag operation</a> is none (no
+ drag operation), or, if the user ended the drag-and-drop operation by
+ canceling it (e.g. by hitting the <kbd>Escape</kbd> key), or if the <a
+ href="#current1">current target element</a> is null, then the drag
+ operation failed. If the <a href="#current1">current target
+ element</a> is a DOM element, the user agent must fire a <code
+ title=event-dragleave><a href="#dragleave">dragleave</a></code> event
+ at it; otherwise, if it is not null, it must use platform-specific
+ conventions for drag cancellation.</p>
+
+ <li>
+ <p>Otherwise, the drag operation was as success. If the <a
+ href="#current1">current target element</a> is a DOM element, the user
+ agent must fire a <code title=event-drop><a
+ href="#drop">drop</a></code> event at it; otherwise, it must use
+ platform-specific conventions for indicating a drop.</p>
+
+ <p>When the target is a DOM element, the <code
+ title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code> attribute of the event's
+ <code title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object must be given the
+ value representing the <a href="#current2">current drag operation</a>
+ (<code title="">copy</code>, <code title="">link</code>, or <code
+ title="">move</code>), and the object must be set up so that the <code
+ title=dom-DataTransfer-getData><a href="#getdata">getData()</a></code>
+ method will return the data that was added during the <code
+ title=event-dragstart><a href="#dragstart">dragstart</a></code> event.</p>
+
+ <p>If the event is canceled, the <a href="#current2">current drag
+ operation</a> must be set to the value of the <code
+ title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code> attribute of the event's
+ <code title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object as it stood after
+ the event was handled.</p>
+
+ <p>Otherwise, the event is not canceled, and the user agent must
+ perform the event's default action, which depends on the exact target
+ as follows:</p>
+
+ <dl class=switch>
+ <dt>If the <a href="#current1">current target element</a> is a text
+ field (e.g. <code>textarea</code>, or an <code>input</code> element
+ with <code title="">type="text"</code><!--XXX xref-->)
+
+ <dd>The user agent must insert the data associated with the
+ <code>text/plain</code> format, if any, into the text field in a
+ manner consistent with platform-specific conventions (e.g. inserting
+ it at the current mouse cursor position, or inserting it at the end
+ of the field).
+
+ <dt>Otherwise
+
+ <dd>Reset the <a href="#current2">current drag operation</a> to
+ "none".
+ </dl>
+
+ <li>
+ <p>Finally, the user agent must fire a <code title=event-dragend><a
+ href="#dragend">dragend</a></code> event at the <a
+ href="#source0">source node</a>, with the <code
+ title=dom-DataTransfer-dropEffect><a
+ href="#dropeffect">dropEffect</a></code> attribute of the event's
+ <code title=dom-DragEvent-dataTransfer><a
+ href="#datatransfer">dataTransfer</a></code> object being set to the
+ value corresponding to the <a href="#current2">current drag
+ operation</a>.</p>
+
+ <p class=note>The <a href="#current2">current drag operation</a> can
+ change during the processing of the <code title=event-drop><a
+ href="#drop">drop</a></code> event, if one was fired.</p>
+
+ <p>The event is not cancelable. After the event has been handled, the
+ user agent must act as follows:</p>
+
+ <dl class=switch>
+ <dt>If the <a href="#current1">current target element</a> is a text
+ field (e.g. <code>textarea</code>, or an <code>input</code> element
+ with <code title="">type="text"</code><!--XXX xref-->), and a <code
+ title=event-drop><a href="#drop">drop</a></code> event was fired in
+ the previous step, and the <a href="#current2">current drag
+ operation</a> is "move", and the source of the drag-and-drop
+ operation is a selection in the DOM
+
+ <dd>The user agent should delete the range representing the dragged
+ selection from the DOM.
+
+ <dt>If the <a href="#current1">current target element</a> is a text
+ field (e.g. <code>textarea</code>, or an <code>input</code> element
+ with <code title="">type="text"</code><!--XXX xref-->), and a <code
+ title=event-drop><a href="#drop">drop</a></code> event was fired in
+ the previous step, and the <a href="#current2">current drag
+ operation</a> is "move", and the source of the drag-and-drop
+ operation is a selection in a text field
+
+ <dd>The user agent should delete the dragged selection from the
+ relevant text field.
+
+ <dt>Otherwise
+
+ <dd>The event has no default action.
+ </dl>
+ </ol>
+ </ol>
+
+ <h5 id=when-the><span class=secno>5.3.3.1. </span>When the drag-and-drop
+ operation starts or ends in another document</h5>
+
+ <p>The model described above is independent of which <code>Document</code>
+ object the nodes involved are from; the events must be fired as described
+ above and the rest of the processing model must be followed as described
+ above, irrespective of how many documents are involved in the operation.
+
+ <h5 id=when-the0><span class=secno>5.3.3.2. </span>When the drag-and-drop
+ operation starts or ends in another application</h5>
+
+ <p>If the drag is initiated in another application, the <a
+ href="#source0">source node</a> is not a DOM node, and the user agent must
+ use platform-specific conventions instead when the requirements above
+ involve the source node. User agents in this situation must act as if the
+ dragged data had been added to the <code><a
+ href="#datatransfer0">DataTransfer</a></code> object when the drag
+ started, even though no <code title=event-dragstart><a
+ href="#dragstart">dragstart</a></code> event was actually fired; user
+ agents must similarly use platform-specific conventions when deciding on
+ what drag feedback to use.
+
+ <p>If a drag is started in a document but ends in another application, then
+ the user agent must instead replace the parts of the processing model
+ relating to handling the <em>target</em> according to platform-specific
+ conventions.
+
+ <p>In any case, scripts running in the context of the document must not be
+ able to distinguish the case of a drag-and-drop operation being started or
+ ended in another application from the case of a drag-and-drop operation
+ being started or ended in another document from another domain.
+
+ <h4 id=the-draggable><span class=secno>5.3.4. </span>The <dfn id=draggable
+ title=attr-draggable><code>draggable</code></dfn> attribute</h4>
+
+ <p>All elements may have the <code title=attr-draggable><a
+ href="#draggable">draggable</a></code> content attribute set. The <code
+ title=attr-draggable><a href="#draggable">draggable</a></code> attribute
+ is an <a href="#enumerated">enumerated attribute</a>. It has three states.
+ The first state is <em>true</em> and it has the keyword <code
+ title="">true</code>. The second state is <em>false</em> and it has the
+ keyword <code title="">false</code>. The third state is <em>auto</em>; it
+ has no keywords but it is the <em>missing value default</em>.
+
+ <p>The <dfn id=draggable0 title=dom-draggable><code>draggable</code></dfn>
+ DOM attribute, whose value depends on the content attribute's in the way
+ described below, controls whether or not the element is draggable.
+ Generally, only text selections are draggable, but elements whose <code
+ title=dom-draggable><a href="#draggable0">draggable</a></code> DOM
+ attribute is true become draggable as well.
+
+ <p>If an element's <code title=attr-draggable><a
+ href="#draggable">draggable</a></code> content attribute has the state
+ <em>true</em>, the <code title=dom-draggable><a
+ href="#draggable0">draggable</a></code> DOM attribute must return true.
+
+ <p>Otherwise, if the element's <code title=attr-draggable><a
+ href="#draggable">draggable</a></code> content attribute has the state
+ <em>false</em>, the <code title=dom-draggable><a
+ href="#draggable0">draggable</a></code> DOM attribute must return false.
+
+ <p>Otherwise, the element's <code title=attr-draggable><a
+ href="#draggable">draggable</a></code> content attribute has the state
+ <em>auto</em>. If the element is an <code><a href="#img">img</a></code>
+ element, or, if the element is an <code><a href="#a">a</a></code> element
+ with an <code title=attr-hyperlink-href><a href="#href6">href</a></code>
+ content attribute, the <code title=dom-draggable><a
+ href="#draggable0">draggable</a></code> DOM attribute must return true.
+
+ <p>Otherwise, the <code title=dom-draggable><a
+ href="#draggable0">draggable</a></code> DOM must return false.
+
+ <p>If the <code title=dom-draggable><a
+ href="#draggable0">draggable</a></code> DOM attribute is set to the value
+ false, the <code title=attr-draggable><a
+ href="#draggable">draggable</a></code> content attribute must be set to
+ the literal value <code title="">false</code>. If the <code
+ title=dom-draggable><a href="#draggable0">draggable</a></code> DOM
+ attribute is set to the value true, the <code title=attr-draggable><a
+ href="#draggable">draggable</a></code> content attribute must be set to
+ the literal value <code title="">true</code>.
+
+ <h4 id=copy-and><span class=secno>5.3.5. </span>Copy and paste</h4>
+
+ <p>Copy-and-paste is a form of drag-and-drop: the "copy" part is equivalent
+ to dragging content to another application (the "clipboard"), and the
+ "paste" part is equivalent to dragging content <em>from</em> another
+ application.
+
+ <p>Select-and-paste (a model used by mouse operations in the X Window
+ System) is equivalent to a drag-and-drop operation where the source is the
+ selection.
+
+ <h5 id=copy-to><span class=secno>5.3.5.1. </span>Copy to clipboard</h5>
+
+ <p>When the user invokes a copy operation, the user agent must act as if
+ the user had invoked a drag on the current selection. If the drag-and-drop
+ operation initiates, then the user agent must act as if the user had
+ indicated (as the <a href="#immediate">immediate user selection</a>) a
+ hypothetical application representing the clipbroad. Then, the user agent
+ must act as if the user had ended the drag-and-drop operation without
+ canceling it. If the drag-and-drop operation didn't get canceled, the user
+ agent should then follow the relevant platform-specific conventions for
+ copy operations (e.g. updating the clipboard).
+
+ <h5 id=cut-to><span class=secno>5.3.5.2. </span>Cut to clipboard</h5>
+
+ <p>When the user invokes a cut operation, the user agent must act as if the
+ user had invoked a copy operation (see the previous section), followed, if
+ the copy was completed successfully, by <a
+ href="#contenteditable-delete">a selection delete operation</a>.
+
+ <h5 id=paste><span class=secno>5.3.5.3. </span>Paste from clipboard</h5>
+
+ <p>When the user invokes a clipboard paste operation, the user agent must
+ act as if the user had invoked a drag on a hypothetical application
+ representing the clipboard, setting the data associated with the drag as
+ the text from the keyboard (either as <code title="">text/plain</code> or
+ <code>text/uri-list</code>). If the contents of the clipboard cannot be
+ represented as text or URIs, then the paste operation must not have any
+ effect.
+
+ <p>Then, the user agent must act as if the user had indicated (as the <a
+ href="#immediate">immediate user selection</a>) the element with the
+ keyboard focus, and then ended the drag-and-drop operation without
+ canceling it.
+
+ <h5 id=paste0><span class=secno>5.3.5.4. </span>Paste from selection</h5>
+
+ <p>When the user invokes a selection paste operation, the user agent must
+ act as if the user had invoked a drag on the current selection, then
+ indicated (as the <a href="#immediate">immediate user selection</a>) the
+ element with the keyboard focus, and then ended the drag-and-drop
+ operation without canceling it.
+
+ <p>If the contents of the selection cannot be represented as text or URIs,
+ then the paste operation must not have any effect.
+
+ <h4 id=security7><span class=secno>5.3.6. </span>Security risks in the
+ drag-and-drop model</h4>
+
+ <p>User agents must not make the data added to the <code><a
+ href="#datatransfer0">DataTransfer</a></code> object during the <code
+ title=event-dragstart><a href="#dragstart">dragstart</a></code> event
+ available to scripts until the <code title=event-drop><a
+ href="#drop">drop</a></code> event, because otherwise, if a user were to
+ drag sensitive information from one document to a second document,
+ crossing a hostile third document in the process, the hostile document
+ could intercept the data.
+
+ <p>For the same reason, user agents must only consider a drop to be
+ successful if the user specifically ended the drag operation &mdash; if
+ any scripts end the drag operation, it must be considered unsuccessful
+ (canceled) and the <code title=event-drop><a href="#drop">drop</a></code>
+ event must not be fired.
+
+ <p>User agents should take care to not start drag-and-drop operations in
+ response to script actions. For example, in a mouse-and-window
+ environment, if a script moves a window while the user has his mouse
+ button depressed, the UA would not consider that to start a drag. This is
+ important because otherwise UAs could cause data to be dragged from
+ sensitive sources and dropped into hostile documents without the user's
+ consent.
+
+ <h3 id=undo><span class=secno>5.4. </span><dfn id=undo-history>Undo
+ history</dfn></h3>
+
+ <p class=big-issue>There has got to be a better way of doing this, surely.
+
+ <p>The user agent must associate an <dfn id=undo-transaction>undo
+ transaction history</dfn> with each <code><a
+ href="#htmldocument">HTMLDocument</a></code> object.
+
+ <p>The <a href="#undo-transaction">undo transaction history</a> is a list
+ of entries. The entries are of two type: <a href="#dom-changes">DOM
+ changes</a> and <a href="#undo-object" title="undo object">undo
+ objects</a>.
+
+ <p>Each <dfn id=dom-changes>DOM changes</dfn> entry in the <a
+ href="#undo-transaction">undo transaction history</a> consists of batches
+ of one or more of the following:
+
+ <ul>
+ <li>Changes to the <a href="#content">content attributes</a> of an
+ <code>Element</code> node.
+
+ <li>Changes to the <a href="#dom-attributes">DOM attributes</a> of a
+ <code>Node</code>.</li>
+ <!-- XXX uh, these change on their own, so
+ clearly this isn't going to fly. Which DOM attributes, exactly? -->
+
+ <li>Changes to the DOM hierarchy of nodes that are descendants of the
+ <code><a href="#htmldocument">HTMLDocument</a></code> object
+ (<code>parentNode</code>, <code>childNodes</code>).
+ </ul>
+
+ <p><dfn id=undo-object>Undo object</dfn> entries consist of objects
+ representing state that scripts running in the document are managing. For
+ example, a Web mail application could use an <a href="#undo-object">undo
+ object</a> to keep track of the fact that a user has moved an e-mail to a
+ particular folder, so that the user can undo the action and have the
+ e-mail return to its former location.
+
+ <p>Broadly speaking, <a href="#dom-changes">DOM changes</a> entries are
+ handled by the UA in response to user edits of form controls and
+ <span>editing hosts</span> on the page, and <a href="#undo-object">undo
+ object</a> entries are handled by script in response to higher-level user
+ actions (such as interactions with server-side state, or in the
+ implementation of a drawing tool).
+
+ <h4 id=the-undomanager><span class=secno>5.4.1. </span>The <code><a
+ href="#undomanager">UndoManager</a></code> interface</h4>
+
+ <div class=big-issue>
+ <p>This API sucks. Seriously. It's a terrible API. Really bad. I hate it.
+ Here are the requirements:</p>
+
+ <ul>
+ <li>Has to cope with cases where the server has undo state already when
+ the page is loaded, that can be stuffed into the undo buffer onload.
+
+ <li>Has to support undo/redo.
+
+ <li>Has to cope with the "undo" action being "contact the server and tell
+ it to undo", rather than it being the opposite of the "redo" action.
+
+ <li>Has to cope with some undo states expiring from the undo history
+ (e.g. server can only remember one undelete action) but other states not
+ expiring (e.g. client can undo arbitrary amounts of local edits).
+ </ul>
+ </div>
+
+ <p>To manage <a href="#undo-object">undo object</a> entries in the <a
+ href="#undo-transaction">undo transaction history</a>, the <code><a
+ href="#undomanager">UndoManager</a></code> interface can be used:
+
+ <pre class=idl>interface <dfn id=undomanager>UndoManager</dfn> {
+ unsigned long <a href="#adddata" title=dom-UndoManager-add>add</a>(in DOMObject data, in DOMStrong title);
+ void <a href="#remove1" title=dom-UndoManager-remove>remove</a>(in unsigned long index);
+ void <a href="#clearundo" title=dom-UndoManager-clearUndo>clearUndo</a>();
+ void <a href="#clearredo" title=dom-UndoManager-clearRedo>clearRedo</a>();
+ DOMObject <a href="#itemn" title=dom-UndoManager-item>item</a>(in unsigned long index);
+ readonly attribute unsigned long <a href="#length9" title=dom-UndoManager-length>length</a>;
+ readonly attribute unsigned long <a href="#position0" title=dom-UndoManager-position>position</a>;
+};</pre>
+
+ <p>The <dfn id=undomanager0
+ title=dom-undoManager><code>undoManager</code></dfn> attribute of the
+ <code><a href="#window">Window</a></code> interface must return the object
+ implementing the <code><a href="#undomanager">UndoManager</a></code>
+ interface for that <code><a href="#window">Window</a></code> object's
+ associated <code><a href="#htmldocument">HTMLDocument</a></code> object.
+
+ <p>In the ECMAScript DOM binding, objects implementing this interface must
+ also support being dereferenced using the square bracket notation, such
+ that dereferencing with an integer index is equivalent to invoking the
+ <code title=dom-UndoManager-item><a href="#itemn">item()</a></code> method
+ with that index (e.g. <code title="">undoManager[1]</code> returns the
+ same as <code title="">undoManager.item(1)</code>).
+
+ <p><code><a href="#undomanager">UndoManager</a></code> objects represent
+ their document's <a href="#undo-transaction">undo transaction history</a>.
+ Only <a href="#undo-object">undo object</a> entries are visible with this
+ API, but this does not mean that <a href="#dom-changes">DOM changes</a>
+ entries are absent from the <a href="#undo-transaction">undo transaction
+ history</a>.
+
+ <p>The <dfn id=length9
+ title=dom-UndoManager-length><code>length</code></dfn> attribute must
+ return the number of <a href="#undo-object">undo object</a> entries in the
+ <a href="#undo-transaction">undo transaction history</a>.
+
+ <p>The <dfn id=itemn title=dom-UndoManager-item><code>item(<var
+ title="">n</var>)</code></dfn> method must return the <var
+ title="">n</var>th <a href="#undo-object">undo object</a> entry in the <a
+ href="#undo-transaction">undo transaction history</a>.
+
+ <p>The <a href="#undo-transaction">undo transaction history</a> has a <dfn
+ id=current3 title="undo position">current position</dfn>. This is the
+ position between two entries in the <a href="#undo-transaction">undo
+ transaction history</a>'s list where the previous entry represents what
+ needs to happen if the user invokes the "undo" command (the "undo" side,
+ lower numbers), and the next entry represents what needs to happen if the
+ user invokes the "redo" command (the "redo" side, higher numbers).
+
+ <p>The <dfn id=position0
+ title=dom-UndoManager-position><code>position</code></dfn> attribute must
+ return the index of the <a href="#undo-object">undo object</a> entry
+ nearest to the <a href="#current3">undo position</a>, on the "redo" side.
+ If there are no <a href="#undo-object">undo object</a> entries on the
+ "redo" side, then the attribute must return the same as the <code
+ title=dom-UndoManager-length><a href="#length9">length</a></code>
+ attribute. If there are no <a href="#undo-object">undo object</a> entries
+ on the "undo" side of the <a href="#current3">undo position</a>, the <code
+ title=dom-UndoManager-position><a href="#position0">position</a></code>
+ attribute returns zero.
+
+ <p class=note>Since the <a href="#undo-transaction">undo transaction
+ history</a> contains both <a href="#undo-object">undo object</a> entries
+ and <a href="#dom-changes">DOM changes</a> entries, but the <code
+ title=dom-UndoManager-position><a href="#position0">position</a></code>
+ attribute only returns indices relative to <a href="#undo-object">undo
+ object</a> entries, it is possible for several "undo" or "redo" actions to
+ be performed without the value of the <code
+ title=dom-UndoManager-position><a href="#position0">position</a></code>
+ attribute changing.
+
+ <p>The <dfn id=adddata title=dom-UndoManager-add><code>add(<var
+ title="">data</var>, <var title="">title</var>)</code></dfn> method's
+ behaviour depends on the current state. Normally, it must insert the <var
+ title="">data</var> object passed as an argument into the <a
+ href="#undo-transaction">undo transaction history</a> immediately before
+ the <a href="#current3">undo position</a>, optionally remembering the
+ given <var title="">title</var> to use in the UI. If the method is called
+ <a href="#undo-moving0" title=do-undo>during an undo operation</a>,
+ however, the object must instead be added immediately <em>after</em> the
+ <a href="#current3">undo position</a>.
+
+ <p>If the method is called and there is neither <a href="#undo-moving0"
+ title=do-undo>an undo operation in progress</a> nor <a
+ href="#redo-moving0" title=do-redo>a redo operation in progress</a> then
+ any entries in the <a href="#undo-transaction">undo transaction
+ history</a> after the <a href="#current3">undo position</a> must be
+ removed (as if <code title=dom-UndoManager-clearRedo><a
+ href="#clearredo">clearRedo()</a></code> had been called).
+
+ <p class=big-issue>We could fire events when someone adds something to the
+ undo history -- one event per undo object entry before the position (or
+ after, during redo addition), allowing the script to decide if that entry
+ should remain or not. Or something. Would make it potentially easier to
+ expire server-held state when the server limitations come into play.</p>
+ <!-- XXX note on expiring undo in case server can only do one level undo -->
+
+ <p>The <dfn id=remove1 title=dom-UndoManager-remove><code>remove(<var
+ title="">index</var>)</code></dfn> method must remove the <a
+ href="#undo-object">undo object</a> entry with the specified <var
+ title="">index</var>. If the index is less than zero or greater than or
+ equal to <code title=dom-UndoManager-length><a
+ href="#length9">length</a></code> then the method must raise an
+ <code>INDEX_SIZE_ERR</code> exception. <a href="#dom-changes">DOM
+ changes</a> entries are unaffected by this method.
+
+ <p>The <dfn id=clearundo
+ title=dom-UndoManager-clearUndo><code>clearUndo()</code></dfn> method must
+ remove all entries in the <a href="#undo-transaction">undo transaction
+ history</a> before the <a href="#current3">undo position</a>, be they <a
+ href="#dom-changes">DOM changes</a> entries or <a href="#undo-object">undo
+ object</a> entries.
+
+ <p>The <dfn id=clearredo
+ title=dom-UndoManager-clearRedo><code>clearRedo()</code></dfn> method must
+ remove all entries in the <a href="#undo-transaction">undo transaction
+ history</a> after the <a href="#current3">undo position</a>, be they <a
+ href="#dom-changes">DOM changes</a> entries or <a href="#undo-object">undo
+ object</a> entries.
+
+ <p class=big-issue>Another idea is to have a way for scripts to say
+ "startBatchingDOMChangesForUndo()" and after that the changes to the DOM
+ go in as if the user had done them.
+
+ <h4 id=undo-moving><span class=secno>5.4.2. </span><dfn id=undo-moving0
+ title=do-undo>Undo: moving back in the undo transaction history</dfn></h4>
+
+ <p>When the user invokes an undo operation, or when the <code
+ title=dom-document-execCommand><a
+ href="#execCommand">execCommand()</a></code> method is called with the
+ <code title=command-undo><a href="#undo1">undo</a></code> command, the
+ user agent must perform an undo operation.
+
+ <p>If the <a href="#current3">undo position</a> is at the start of the <a
+ href="#undo-transaction">undo transaction history</a>, then the user agent
+ must do nothing.
+
+ <p>If the entry immediately before the <a href="#current3">undo
+ position</a> is a <a href="#dom-changes">DOM changes</a> entry, then the
+ user agent must remove that <a href="#dom-changes">DOM changes</a> entry,
+ reverse the DOM changes that were listed in that entry, and, if the
+ changes were reversed with no problems, add a new <a
+ href="#dom-changes">DOM changes</a> entry (consisting of the opposite of
+ those DOM changes) to the <a href="#undo-transaction">undo transaction
+ history</a> on the other side of the <a href="#current3">undo
+ position</a>.
+
+ <p>If the DOM changes cannot be undone (e.g. because the DOM state is no
+ longer consistent with the changes represented in the entry), then the
+ user agent must simply remove the <a href="#dom-changes">DOM changes</a>
+ entry, without doing anything else.
+
+ <p>If the entry immediately before the <a href="#current3">undo
+ position</a> is an <a href="#undo-object">undo object</a> entry, then the
+ user agent must first remove that <a href="#undo-object">undo object</a>
+ entry from the <a href="#undo-transaction">undo transaction history</a>,
+ and then must fire an <code title=event-undo><a
+ href="#undo0">undo</a></code> event on the <code>Document</code> object,
+ using the <a href="#undo-object">undo object</a> entry's associated undo
+ object as the event's data.
+
+ <p>Any calls to <code title=dom-undoManager-add><a
+ href="#adddata">add()</a></code> while the event is being handled will be
+ used to populate the redo history, and will then be used if the user
+ invokes the "redo" command to undo his undo.
+
+ <h4 id=redo-moving><span class=secno>5.4.3. </span><dfn id=redo-moving0
+ title=do-redo>Redo: moving forward in the undo transaction history</dfn></h4>
+
+ <p>When the user invokes a redo operation, or when the <code
+ title=dom-document-execCommand><a
+ href="#execCommand">execCommand()</a></code> method is called with the
+ <code title=command-redo><a href="#redo0">redo</a></code> command, the
+ user agent must perform a redo operation.
+
+ <p>This is mostly the opposite of an <a href="#undo-moving0"
+ title=do-undo>undo operation</a>, but the full definition is included here
+ for completeness.
+
+ <p>If the <a href="#current3">undo position</a> is at the end of the <a
+ href="#undo-transaction">undo transaction history</a>, then the user agent
+ must do nothing.
+
+ <p>If the entry immediately after the <a href="#current3">undo position</a>
+ is a <a href="#dom-changes">DOM changes</a> entry, then the user agent
+ must remove that <a href="#dom-changes">DOM changes</a> entry, reverse the
+ DOM changes that were listed in that entry, and, if the changes were
+ reversed with no problems, add a new <a href="#dom-changes">DOM
+ changes</a> entry (consisting of the opposite of those DOM changes) to the
+ <a href="#undo-transaction">undo transaction history</a> on the other side
+ of the <a href="#current3">undo position</a>.
+
+ <p>If the DOM changes cannot be redone (e.g. because the DOM state is no
+ longer consistent with the changes represented in the entry), then the
+ user agent must simply remove the <a href="#dom-changes">DOM changes</a>
+ entry, without doing anything else.
+
+ <p>If the entry immediately after the <a href="#current3">undo position</a>
+ is an <a href="#undo-object">undo object</a> entry, then the user agent
+ must first remove that <a href="#undo-object">undo object</a> entry from
+ the <a href="#undo-transaction">undo transaction history</a>, and then
+ must fire a <code title=event-undo><a href="#undo0">redo</a></code> event
+ on the <code>Document</code> object, using the <a href="#undo-object">undo
+ object</a> entry's associated undo object as the event's data.
+
+ <h4 id=the-undomanagerevent><span class=secno>5.4.4. </span>The <code><a
+ href="#undomanagerevent">UndoManagerEvent</a></code> interface and the
+ <code title=event-undo><a href="#undo0">undo</a></code> and <code
+ title=event-redo><a href="#redo">redo</a></code> events</h4>
+
+ <pre
+ class=idl>interface <dfn id=undomanagerevent>UndoManagerEvent</dfn> : Event {
+ readonly attribute DOMObject <a href="#data3" title=dom-UndoManagerEvent-data>data</a>;
+ void <a href="#initundomanagerevent" title=dom-UndoManagerEvent-initUndoManagerEvent>initUndoManagerEvent</a>(in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMObject dataArg);
+ void <span title=dom-UndoManagerEvent-initUndoManagerEventNS>initUndoManagerEventNS</span>(in DOMString namespaceURIArg, in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMObject dataArg);
+};</pre>
+
+ <p>The <dfn id=initundomanagerevent
+ title=dom-UndoManagerEvent-initUndoManagerEvent><code>initUndoManagerEvent()</code></dfn>
+ and <dfn id=initundomanagereventns><code
+ title=dom-UndoManagerEvent-initUndoManagerEventNS>initUndoManagerEventNS()</code></dfn>
+ methods must initialise the event in a manner analogous to the
+ similarly-named methods in the DOM3 Events interfaces. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>The <dfn id=data3
+ title=dom-UndoManagerEvent-data><code>data</code></dfn> attribute
+ represents the <a href="#undo-object">undo object</a> for the event.
+
+ <p>The <dfn id=undo0 title=event-undo><code>undo</code></dfn> and <dfn
+ id=redo title=event-redo><code>redo</code></dfn> events do not bubble,
+ cannot be canceled, and have no default action. When the user agent fires
+ one of these events it must use the <code><a
+ href="#undomanagerevent">UndoManagerEvent</a></code> interface, with the
+ <code title=dom-UndoManagerEvent-data><a href="#data3">data</a></code>
+ field containing the relevant <a href="#undo-object">undo object</a>.
+
+ <h4 id=implementation0><span class=secno>5.4.5. </span>Implementation notes</h4>
+
+ <p>How user agents present the above conceptual model to the user is not
+ defined. The undo interface could be a filtered view of the <a
+ href="#undo-transaction">undo transaction history</a>, it could manipulate
+ the <a href="#undo-transaction">undo transaction history</a> in ways not
+ described above, and so forth. For example, it is possible to design a UA
+ that appears to have separate <a href="#undo-transaction" title="undo
+ transaction history">undo transaction histories</a> for each form control;
+ similarly, it is possible to design systems where the user has access to
+ more undo information than is present in the offical (as described above)
+ <a href="#undo-transaction">undo transaction history</a> (such as
+ providing a tree-based approach to document state). Such UI models should
+ be based upon the single <a href="#undo-transaction">undo transaction
+ history</a> described in this section, however, such that to a script
+ there is no detectable difference.
+
+ <h3 id=command><span class=secno>5.5. </span>Command APIs</h3>
+
+ <p>The <dfn id=execCommand
+ title=dom-document-execCommand><code>execCommand(<var
+ title="">commandId</var>, <var title="">doShowUI</var>, <var
+ title="">value</var>)</code></dfn> method on the <code><a
+ href="#htmldocument">HTMLDocument</a></code> interface allows scripts to
+ perform actions on the <a href="#a-selection" title="the
+ selection">current selection</a> or at the current caret position.
+ Generally, these commands would be used to implement editor UI, for
+ example having a "delete" button on a toolbar.
+
+ <p>There are three variants to this method, with one, two, and three
+ arguments respectively. The <var title="">doShowUI</var> and <var
+ title="">value</var> parameters, even if specified, are ignored unless
+ otherwise stated.
+
+ <p class=note>In this specification, in fact, the <var
+ title="">doShowUI</var> parameter is always ignored, regardless of its
+ value. It is included for historical reasons only.
+
+ <p>When any of these methods are invoked, user agents must act as described
+ in the list below.
+
+ <p>For actions marked "<dfn id=editing2>editing hosts only</dfn>", if the
+ selection is not entirely within an <a href="#editing1">editing host</a>,
+ of if there is no selection and the caret is not inside an <a
+ href="#editing1">editing host</a>, then the user agent must do nothing.
+
+ <dl>
+ <dt>If the <var title="">commandId</var> is <dfn id=undo1
+ title=command-undo><code>undo</code></dfn>
+
+ <dd>The user agent must <a href="#undo-moving0" title=do-undo>move back
+ one step</a> in its <a href="#undo-transaction">undo transaction
+ history</a>, restoring the associated state. If there is no further undo
+ information the user agent must do nothing. See the <a
+ href="#undo-history">undo history</a>.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=redo0
+ title=command-redo><code>redo</code></dfn>
+
+ <dd>The user agent must <a href="#redo-moving0" title=do-redo>move forward
+ one step</a> in its <a href="#undo-transaction">undo transaction
+ history</a>, restoring the associated state. If there is no further undo
+ (well, "redo") information the user agent must do nothing. See the <a
+ href="#undo-history">undo history</a>.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=selectall0
+ title=command-selectAll><code>selectAll</code></dfn>
+
+ <dd>The user agent must change the selection so that all the content in
+ the currently focused <a href="#editing1">editing host</a> is selected.
+ If no <a href="#editing1">editing host</a> is focused, then the content
+ of the entire document must be selected.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=unselect
+ title=command-unselect><code>unselect</code></dfn>
+
+ <dd>
+ <p>The user agent must change the selection so that nothing is selected.</p>
+
+ <p class=big-issue>We need some sort of way in which the user can make a
+ selection without risk of script clobbering it.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=superscript
+ title=command-superscript><code>superscript</code></dfn>
+
+ <dd><em><a href="#editing2">Editing hosts only.</a></em> The user agent
+ must act as if the user had requested that the selection <a
+ href="#contenteditable-wrapSemantic">be wrapped in the semantics</a> of
+ the <code><a href="#sup">sup</a></code> element (or unwrapped, or, if
+ there is no selection, have that semantic inserted or removed &mdash; the
+ exact behaviour is UA-defined).
+
+ <dt>If the <var title="">commandId</var> is <dfn id=subscript
+ title=command-subscript><code>subscript</code></dfn>
+
+ <dd><em><a href="#editing2">Editing hosts only.</a></em> The user agent
+ must act as if the user had requested that the selection <a
+ href="#contenteditable-wrapSemantic">be wrapped in the semantics</a> of
+ the <em title=""><code><a href="#sub">sub</a></code></em> element (or,
+ again, unwrapped, or have that semantic inserted or removed, as defined
+ by the UA).
+
+ <dt>If the <var title="">commandId</var> is <dfn id=formatblock
+ title=command-formatBlock><code>formatBlock</code></dfn>
+
+ <dd>
+ <p><em><a href="#editing2">Editing hosts only.</a></em> This command
+ changes the semantics of the blocks containing the selection.</p>
+
+ <p>If there is no selection, then, where in the description below refers
+ to the selection, the user agent must act as if the selection was an
+ empty range at the caret position.</p>
+
+ <p>If the <var title="">value</var> parameter is not specified or has a
+ value other than one of the following literal strings:</p>
+
+ <ul class=brief>
+ <li><code title="">&lt;address&gt;</code>
+
+ <li><code title="">&lt;aside&gt;</code>
+
+ <li><code title="">&lt;h1&gt;</code>
+
+ <li><code title="">&lt;h2&gt;</code>
+
+ <li><code title="">&lt;h3&gt;</code>
+
+ <li><code title="">&lt;h4&gt;</code>
+
+ <li><code title="">&lt;h5&gt;</code>
+
+ <li><code title="">&lt;h6&gt;</code>
+
+ <li><code title="">&lt;nav&gt;</code>
+
+ <li><code title="">&lt;p&gt;</code>
+
+ <li><code title="">&lt;pre&gt;</code>
+ </ul>
+
+ <p>...then the user agent must do nothing.</p>
+
+ <p>Otherwise, the user agent must, for every position in the selection,
+ take the furthest <a href="#block-level0" title="block-level
+ elements">block-level element</a> ancestor of that position that
+ contains only <a href="#inline-level0">inline-level content</a> and is
+ not being used as a <a href="#structured" title="structured inline-level
+ elements">structured inline-level element</a>, and, if that element is a
+ descendant of the editing host, rename it (as if the <code
+ title="">Element.renameNode()</code> method had been used) according to
+ the <var title="">value</var>, by stripping the leading
+ <code>&lt;</code> character and the trailing <code>&gt;</code> character
+ and using the rest as the new tag name, using the HTML namespace.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=delete
+ title=command-delete><code>delete</code></dfn>
+
+ <dd><em><a href="#editing2">Editing hosts only.</a></em> The user agent
+ must act as if the user had performed <a href="#contenteditable-delete">a
+ backspace operation</a>.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=forwarddelete
+ title=command-forwardDelete><code>forwardDelete</code></dfn>
+
+ <dd><em><a href="#editing2">Editing hosts only.</a></em> The user agent
+ must act as if the user had performed <a href="#contenteditable-delete">a
+ forward delete operation</a>.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=insertlinebreak
+ title=command-insertLineBreak><code>insertLineBreak</code></dfn>
+
+ <dd><em><a href="#editing2">Editing hosts only.</a></em> The user agent
+ must act as if the user had <a href="#contenteditable-br">requested a
+ line separator</a>.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=insertparagraph
+ title=command-insertParagraph><code>insertParagraph</code></dfn>
+
+ <dd><em><a href="#editing2">Editing hosts only.</a></em> The user agent
+ must act as if the user had performed a <a
+ href="#contenteditable-breakBlock">break block</a> editing action.
+
+ <dt>If the <var title="">commandId</var> is <dfn id=inserttext
+ title=command-insertText><code>insertText</code></dfn>
+
+ <dd><em><a href="#editing2">Editing hosts only.</a></em> The user agent
+ must act as if the user had <a
+ href="#contenteditable-insertText">inserted text</a> corresponding to the
+ <var title="">value</var> parameter.
+
+ <dt>If the <var title="">commandId</var> is <code><var
+ title="">vendorID</var>-<var title="">customCommandID</var></code>
+
+ <dd>User agents may implement vendor-specific extensions to this API.
+ Vendor-specific extensions to the list of commands should use the syntax
+ <code><var title="">vendorID</var>-<var
+ title="">customCommandID</var></code> so as to prevent clashes between
+ extensions from different vendors and future additions to this
+ specification.
+
+ <dt>If the <var title="">commandId</var> is something else
+
+ <dd>User agents must do nothing.
+ </dl>
+
+ <h3 id=selection><span class=secno>5.6. </span>The text selection APIs</h3>
+
+ <p>Every <a href="#browsing0">browsing context</a> has <dfn id=a-selection
+ title="the selection">a selection</dfn>. The selection may be empty, and
+ the selection may have more than one range (a disjointed selection). The
+ user should be able to change the selection. User agents are not required
+ to let the user select more than one range, and may collapse multiple
+ ranges in the selection to a single range when the user interacts with the
+ selection. (But, of course, the user agent may let the user create
+ selections with multiple ranges.)
+
+ <p>This one selection must be shared by all the content of the browsing
+ context (though not by nested <a href="#browsing0" title="browsing
+ context">browsing contexts</a>), including any editing hosts in the
+ document. (Editing hosts that are not inside a document cannot have a
+ selection.)
+
+ <p>If the selection is empty (collapsed, so that it has only one segment
+ and that segment's start and end points are the same) then the selection's
+ position should equal the caret position. When the selection is not empty,
+ this specification does not define the caret position; user agents should
+ follow platform conventions in deciding whether the caret is at the start
+ of the selection, the end of the selection, or somewhere else.
+
+ <p>On some platforms (such as those using Wordstar editing conventions),
+ the caret position is totally independent of the start and end of the
+ selection, even when the selection is empty. On such platforms, user
+ agents may ignore the requirement that the cursor position be linked to
+ the position of the selection altogether.
+
+ <p>Mostly for historical reasons, in addition to the <a
+ href="#browsing0">browsing context</a>'s <a href="#a-selection" title="the
+ selection">selection</a>, each <code>textarea</code> and
+ <code>input</code> element has an independent selection. These are the
+ <dfn id=text-field title="text field selection">text field
+ selections</dfn>.
+
+ <p>The <code><a href="#datagrid0">datagrid</a></code> and
+ <code>select</code> elements also have selections, indicating which items
+ have been picked by the user. These are not discussed in this section.
+
+ <p class=note>This specification does not specify how selections are
+ presented to the user. The Selectors specification, in conjunction with
+ CSS, can be used to style text selections using the <code><a
+ href="#selection1">::selection</a></code> pseudo-element. <a
+ href="#refsSELECTORS">[SELECTORS]</a> <a href="#refsCSS21">[CSS21]</a>
+
+ <h4 id=documentSelection><span class=secno>5.6.1. </span>APIs for the
+ browsing context selection</h4>
+
+ <p>The <dfn id=getselection
+ title=dom-getSelection><code>getSelection()</code></dfn> method on the
+ <code><a href="#window">Window</a></code> interface must return the
+ <code><a href="#selection1">Selection</a></code> object representing <a
+ href="#a-selection">the selection</a> of that <code><a
+ href="#window">Window</a></code> object's <a href="#browsing0">browsing
+ context</a>.
+
+ <p>For historical reasons, the <dfn id=getselection0
+ title=dom-document-getSelection><code>getSelection()</code></dfn> method
+ on the <code><a href="#htmldocument">HTMLDocument</a></code> interface
+ must return the same <code><a href="#selection1">Selection</a></code>
+ object.
+
+ <pre class=idl>interface <dfn id=selection1>Selection</dfn> {
+ readonly attribute Node <a href="#anchornode" title=dom-selection-anchorNode>anchorNode</a>;
+ readonly attribute long <a href="#anchoroffset" title=dom-selection-anchorOffset>anchorOffset</a>;
+ readonly attribute Node <a href="#focusnode" title=dom-selection-focusNode>focusNode</a>;
+ readonly attribute long <a href="#focusoffset" title=dom-selection-focusOffset>focusOffset</a>;
+ readonly attribute boolean <a href="#iscollapsed" title=dom-selection-isCollapsed>isCollapsed</a>;
+ void <a href="#collapse" title=dom-selection-collapse>collapse</a>(in Node parentNode, in long offset);
+ void <a href="#collapsetostart" title=dom-selection-collapseToStart>collapseToStart</a>();
+ void <a href="#collapsetoend" title=dom-selection-collapseToEnd>collapseToEnd</a>();
+ void <a href="#selectallchildren" title=dom-selection-selectAllChildren>selectAllChildren</a>(in Node parentNode);
+ void <a href="#deletefromdocument" title=dom-selection-deleteFromDocument>deleteFromDocument</a>();
+ readonly attribute long <a href="#rangecount" title=dom-selection-rangeCount>rangeCount</a>;
+ Range <a href="#getrangeat" title=dom-selection-getRangeAt>getRangeAt</a>(in long index);
+ void <a href="#addrange" title=dom-selection-addRange>addRange</a>(in Range range);
+ void <a href="#removerange" title=dom-selection-removeRange>removeRange</a>(in Range range);
+ void <a href="#removeallranges" title=dom-selection-removeAllRanges>removeAllRanges</a>();
+ DOMString <a href="#tostring" title=dom-selection-toString>toString</a>();
+};</pre>
+ <!--
+ See also:
+ http://lxr.mozilla.org/mozilla/source/content/base/public/nsISelection.idl
+ This spec doesn't have everything from there yet, in particular
+ selectionLanguageChange() and containsNode() are missing. They are missing
+ because I couldn't work out how to define them in terms of Ranges.
+
+ I also haven't included extend():
+
+ void <span title="dom-selection-extend">extend</span>(in Node parentNode, in long offset);
+ // raise if no range
+ // raise WRONG_DOCUMENT_ERR if parentNode not in document
+ // do something
+
+ ...mostly because I can't work out how to describe what it does quickly.
+-->
+
+ <p>The <code><a href="#selection1">Selection</a></code> interface is
+ represents a list of <code>Range</code> objects. The first item in the
+ list has index 0, and the last item has index <var title="">count</var>-1,
+ where <var title="">count</var> is the number of ranges in the list. <a
+ href="#refsDOM2RANGE">[DOM2RANGE]</a>
+
+ <p>All of the members of the <code><a
+ href="#selection1">Selection</a></code> interface are defined in terms of
+ operations on the <code>Range</code> objects represented by this object.
+ These operations can raise exceptions, as defined for the
+ <code>Range</code> interface; this can therefore result in the members of
+ the <code><a href="#selection1">Selection</a></code> interface raising
+ exceptions as well, in addition to any explicitly called out below.</p>
+ <!--- XXX example -->
+
+ <p>The <dfn id=anchornode
+ title=dom-selection-anchorNode><code>anchorNode</code></dfn> attribute
+ must return the value returned by the <code title="">startContainer</code>
+ attribute of the last <code>Range</code> object in the list, or null if
+ the list is empty.
+
+ <p>The <dfn id=anchoroffset
+ title=dom-selection-anchorOffset><code>anchorOffset</code></dfn> attribute
+ must return the value returned by the <code title="">startOffset</code>
+ attribute of the last <code>Range</code> object in the list, or 0 if the
+ list is empty.
+
+ <p>The <dfn id=focusnode
+ title=dom-selection-focusNode><code>focusNode</code></dfn> attribute must
+ return the value returned by the <code title="">endContainer</code>
+ attribute of the last <code>Range</code> object in the list, or null if
+ the list is empty.
+
+ <p>The <dfn id=focusoffset
+ title=dom-selection-focusOffset><code>focusOffset</code></dfn> attribute
+ must return the value returned by the <code title="">endOffset</code>
+ attribute of the last <code>Range</code> object in the list, or 0 if the
+ list is empty.
+
+ <p>The <dfn id=iscollapsed
+ title=dom-selection-isCollapsed><code>isCollapsed</code></dfn> attribute
+ must return true if there are zero ranges, or if there is exactly one
+ range and its <code title="">collapsed</code> attribute is itself true.
+ Otherwise it must return false.
+
+ <p>The <dfn id=collapse title=dom-selection-collapse><code>collapse(<var
+ title="">parentNode</var>, <var title="">offset</var>)</code></dfn> method
+ must raise a <code>WRONG_DOCUMENT_ERR</code> DOM exception if <var
+ title="">parentNode</var>'s <code title="">ownerDocument</code> is not the
+ <code><a href="#htmldocument">HTMLDocument</a></code> object with which
+ the <code><a href="#selection1">Selection</a></code> object is associated.
+ Otherwise it is, and the method must remove all the ranges in the <code><a
+ href="#selection1">Selection</a></code> list, then create a new
+ <code>Range</code> object, add it to the list, and invoke its <code
+ title="">setStart()</code> and <code title="">setEnd()</code> methods with
+ the <var title="">parentNode</var> and <var title="">offset</var> values
+ as their arguments.
+
+ <p>The <dfn id=collapsetostart
+ title=dom-selection-collapseToStart><code>collapseToStart()</code></dfn>
+ method must raise an <code>INVALID_STATE_ERR</code> DOM exception if there
+ are no ranges in the list. Otherwise, it must invoke the <code
+ title=dom-selection-collapse><a href="#collapse">collapse()</a></code>
+ method with the <code title="">startContainer</code> and <code
+ title="">startOffset</code> values of the first <code>Range</code> object
+ in the list as the arguments.
+
+ <p>The <dfn id=collapsetoend
+ title=dom-selection-collapseToEnd><code>collapseToEnd()</code></dfn>
+ method must raise an <code>INVALID_STATE_ERR</code> DOM exception if there
+ are no ranges in the list. Otherwise, it must invoke the <code
+ title=dom-selection-collapse><a href="#collapse">collapse()</a></code>
+ method with the <code title="">endContainer</code> and <code
+ title="">endOffset</code> values of the last <code>Range</code> object in
+ the list as the arguments.
+
+ <p>The <dfn id=selectallchildren
+ title=dom-selection-selectAllChildren><code>selectAllChildren(<var
+ title="">parentNode</var>)</code></dfn> method must invoke the <code
+ title=dom-selection-collapse><a href="#collapse">collapse()</a></code>
+ method with the <var title="">parentNode</var> value as the first argument
+ and 0 as the second argument, and must then invoke the <code
+ title="">selectNodeContents()</code> method on the first (and only) range
+ in the list with the <var title="">parentNode</var> value as the argument.
+
+ <p>The <dfn id=deletefromdocument
+ title=dom-selection-deleteFromDocument><code>deleteFromDocument()</code></dfn>
+ method must invoke the <code title="">deleteContents()</code> method on
+ each range in the list, if any, from first to last.
+
+ <p>The <dfn id=rangecount
+ title=dom-selection-rangeCount><code>rangeCount</code></dfn> attribute
+ must return the number of ranges in the list.
+
+ <p>The <dfn id=getrangeat
+ title=dom-selection-getRangeAt><code>getRangeAt(<var
+ title="">index</var>)</code></dfn> method must return the <var
+ title="">index</var>th range in the list. If <var title="">index</var> is
+ less than zero or greater or equal to the value returned by the <code
+ title=dom-selection-rangeCount><a href="#rangecount">rangeCount</a></code>
+ attribute, then the method must raise an <code>INDEX_SIZE_ERR</code> DOM
+ exception.
+
+ <p>The <dfn id=addrange title=dom-selection-addRange><code>addRange(<var
+ title="">range</var>)</code></dfn> method must add the given <var
+ title="">range</var> Range object to the list of selections, at the end
+ (so the newly added range is the new last range). Duplicates are not
+ prevented; a range may be added more than once in which case it appears in
+ the list more than once, which (for example) will cause <code
+ title=dom-selection-toString><a href="#tostring">toString()</a></code> to
+ return the range's text twice.</p>
+ <!-- XXX how does this interact with
+ deleteFromDocument() which acts on all ranges? -->
+
+ <p>The <dfn id=removerange
+ title=dom-selection-removeRange><code>removeRange(<var
+ title="">range</var>)</code></dfn> method must remove the first occurrence
+ of <var title="">range</var> in the list of ranges, if it appears at all.
+
+ <p>The <dfn id=removeallranges
+ title=dom-selection-removeAllRanges><code>removeAllRanges()</code></dfn>
+ method must remove all the ranges from the list of ranges, such that the
+ <code title=dom-selection-rangeCount><a
+ href="#rangecount">rangeCount</a></code> attribute returns 0 after the
+ <code title=dom-selection-removeAllRanges><a
+ href="#removeallranges">removeAllRanges()</a></code> method is invoked
+ (and until a new range is added to the list, either through this interface
+ or via user interaction).
+
+ <p>The <dfn id=tostring
+ title=dom-selection-toString><code>toString()</code></dfn> method must
+ return a concatenation of the results of invoking the <code
+ title="">toString()</code> method of the <code>Range</code> object on each
+ of the ranges of the selection, in the order they appear in the list
+ (first to last).
+
+ <p>In language bindings where this is supported, objects implementing the
+ <code><a href="#selection1">Selection</a></code> interface must stringify
+ to the value returned by the object's <code
+ title=dom-selection-toString><a href="#tostring">toString()</a></code>
+ method.
+
+ <div class=example>
+ <p>In the following document fragment, the emphasised parts indicate the
+ selection.</p>
+
+ <pre>&lt;p>The cute girl likes <em>the </em>&lt;cite><em>Oxford English</em> Dictionary&lt;/cite>.&lt;/p></pre>
+
+ <p>If a script invoked <code
+ title="">window.getSelection().toString()</code>, the return value would
+ be "<code>the Oxford English</code>".</p>
+ </div>
+
+ <p class=note>The <code><a href="#selection1">Selection</a></code>
+ interface has no relation to the <code><a
+ href="#datagridselection">DataGridSelection</a></code> interface.
+
+ <h4 id=textFieldSelection><span class=secno>5.6.2. </span>APIs for the text
+ field selections</h4>
+
+ <p class=big-issue>When we define HTMLTextAreaElement and HTMLInputElement
+ we will have to add the IDL given below to both of their IDLs.
+
+ <p>The <code>input</code> and <code>textarea</code> elements define four
+ members in their DOM interfaces for handling their text selection:
+
+ <pre
+ class=idl> void <a href="#select0" title="dom-textarea/input-select">select</a>();
+ attribute unsigned long <a href="#selectionstart" title="dom-textarea/input-selectionStart">selectionStart</a>;
+ attribute unsigned long <a href="#selectionend" title="dom-textarea/input-selectionEnd">selectionEnd</a>;
+ void <a href="#setselectionrange" title="dom-textarea/input-setSelectionRange">setSelectionRange</a>(in unsigned long start, in unsigned long end);</pre>
+ <!-- XXX also add textLength? it seems to be widely used -->
+
+ <p>These methods and attributes expose and control the selection of
+ <code>input</code> and <code>textarea</code> text fields.
+
+ <p>The <dfn id=select0
+ title="dom-textarea/input-select"><code>select()</code></dfn> method must
+ cause the contents of the text field to be fully selected.
+
+ <p>The <dfn id=selectionstart
+ title="dom-textarea/input-selectionStart"><code>selectionStart</code></dfn>
+ attribute must, on getting, return the offset (in logical order) to the
+ character that immediately follows the start of the selection. If there is
+ no selection, then it must return the offset (in logical order) to the
+ character that immediately follows the text entry cursor.
+
+ <p>On setting, it must act as if the <code
+ title="dom-textarea/input-setSelectionRange"><a
+ href="#setselectionrange">setSelectionRange()</a></code> method had been
+ called, with the new value as the first argument, and the current value of
+ the <code title="dom-textarea/input-selectionEnd"><a
+ href="#selectionend">selectionEnd</a></code> attribute as the second
+ argument, unless the current value of the <code
+ title="dom-textarea/input-selectionEnd"><a
+ href="#selectionend">selectionEnd</a></code> is less than the new value,
+ in which case the second argument must also be the new value.
+
+ <p>The <dfn id=selectionend
+ title="dom-textarea/input-selectionEnd"><code>selectionEnd</code></dfn>
+ attribute must, on getting, return the offset (in logical order) to the
+ character that immediately follows the end of the selection. If there is
+ no selection, then it must return the offset (in logical order) to the
+ character that immediately follows the text entry cursor.
+
+ <p>On setting, it must act as if the <code
+ title="dom-textarea/input-setSelectionRange"><a
+ href="#setselectionrange">setSelectionRange()</a></code> method had been
+ called, with the current value of the <code
+ title="dom-textarea/input-selectionStart"><a
+ href="#selectionstart">selectionStart</a></code> attribute as the first
+ argument, and new value as the second argument.
+
+ <p>The <dfn id=setselectionrange
+ title="dom-textarea/input-setSelectionRange"><code>setSelectionRange(<var
+ title="">start</var>, <var title="">end</var>)</code></dfn> method must
+ set the selection of the text field to the sequence of characters starting
+ with the character at the <var title="">start</var>th position (in logical
+ order) and ending with the character at the <span>(<var
+ title="">end</var>-1)</span>th position. Arguments greater than the length
+ of the value in the text field must be treated as pointing at the end of
+ the text field. If <var title="">end</var> is less than or equal to <var
+ title="">start</var> then the start of the selection and the end of the
+ selection must both be placed immediately before the character with offset
+ <var title="">end</var>. In UAs where there is no concept of an empty
+ selection, this must set the cursor to be just before the character with
+ offset <var title="">end</var>.
+
+ <div class=example>
+ <p>To obtain the currently selected text, the following JavaScript
+ suffices:</p>
+
+ <pre>var selectionText = control.value.substring(control.selectionStart, control.selectionEnd);</pre>
+
+ <p>...where <var title="">control</var> is the <code>input</code> or
+ <code>textarea</code> element.</p>
+ </div>
+
+ <p>Characters with no visible rendering, such as U+200D ZERO WIDTH JOINER,
+ still count as characters. Thus, for instance, the selection can include
+ just an invisible character, and the text insertion cursor can be placed
+ to one side or another of such a character.
+
+ <p>When these methods and attributes are used with <code>input</code>
+ elements that are not displaying simple text fields, they must raise an
+ <code>INVALID_STATE_ERR</code> exception.
+
+ <h2 id=comms><span class=secno>6. </span>Communication</h2>
+
+ <h3 id=event0><span class=secno>6.1. </span>Event definitions</h3>
+
+ <p>Messages in <a href="#cross-document">cross-document messaging</a> and,
+ by default, in <a href="#server-sent">server-sent DOM events</a>, use the
+ <dfn id=message title=event-message><code>message</code></dfn> event.
+
+ <p>The following interface is defined for this event:
+
+ <pre class=idl>interface <dfn id=messageevent>MessageEvent</dfn> : Event {
+ readonly attribute DOMString <a href="#data4" title=dom-MessageEvent-data>data</a>;
+ readonly attribute DOMString <a href="#domain2" title=dom-MessageEvent-domain>domain</a>;
+ readonly attribute DOMString <a href="#uri" title=dom-MessageEvent-uri>uri</a>;
+ readonly attribute Document <a href="#source1" title=dom-MessageEvent-source>source</a>;
+ void <a href="#initmessageevent" title=dom-MessageEvent-initMessageEvent>initMessageEvent</a>(in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMString dataArg, in DOMString domainArg, in DOMString uriArg, in Document documentArg);
+ void <a href="#initmessageeventns" title=dom-MessageEvent-initMessageEventNS>initMessageEventNS</a>(in DOMString namespaceURI, in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMString dataArg, in DOMString domainArg, in DOMString uriArg, in Document documentArg);
+};</pre>
+
+ <p>The <dfn id=initmessageevent
+ title=dom-MessageEvent-initMessageEvent><code>initMessageEvent()</code></dfn>
+ and <dfn id=initmessageeventns
+ title=dom-MessageEvent-initMessageEventNS><code>initMessageEventNS()</code></dfn>
+ methods must initialise the event in a manner analogous to the
+ similarly-named methods in the DOM3 Events interfaces. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>The <dfn id=data4 title=dom-MessageEvent-data><code>data</code></dfn>
+ attribute represents the message being sent.
+
+ <p>The <dfn id=domain2
+ title=dom-MessageEvent-domain><code>domain</code></dfn> attribute
+ represents, in <a href="#cross-document">cross-document messaging</a>, the
+ domain of the document from which the message came.
+
+ <p>The <dfn id=uri title=dom-MessageEvent-uri><code>uri</code></dfn>
+ attribute represents, in <a href="#cross-document">cross-document
+ messaging</a>, the address of the document from which the message came.
+
+ <p>The <dfn id=source1
+ title=dom-MessageEvent-source><code>source</code></dfn> attribute
+ represents, in <a href="#cross-document">cross-document messaging</a>, the
+ <code>Document</code> from which the message came.
+
+ <h3 id=server-sent-events><span class=secno>6.2. </span><dfn
+ id=server-sent>Server-sent DOM events</dfn></h3>
+ <!-- event-source -->
+
+ <p>This section describes a mechanism for allowing servers to dispatch DOM
+ events into documents that expect it. The <code><a
+ href="#event-source">event-source</a></code> element provides a simple
+ interface to this mechanism.
+
+ <h4 id=the-remoteeventtarget><span class=secno>6.2.1. </span>The <dfn
+ id=remoteeventtarget><code>RemoteEventTarget</code></dfn> interface</h4>
+
+ <p>Any object that implements the <code>EventTarget</code> interface must
+ also implement the <code><a
+ href="#remoteeventtarget0">RemoteEventTarget</a></code> interface.
+
+ <pre
+ class=idl>interface <dfn id=remoteeventtarget0>RemoteEventTarget</dfn> {
+ void <a href="#addeventsource" title=dom-RemoteEventTarget-addEventSource>addEventSource</a>(in DOMString src);
+ void <a href="#removeeventsource" title=dom-RemoteEventTarget-removeEventSource>removeEventSource</a>(in DOMString src);
+};</pre>
+
+ <p>When the <dfn id=addeventsource
+ title=dom-RemoteEventTarget-addEventSource><code>addEventSource(<var
+ title="">src</var>)</code></dfn> method is invoked, the user agent must
+ add the URI specified in <var title="">src</var> to the <a
+ href="#list-of3" title=concept-event-source-list>list of event sources</a>
+ for that object. The same URI can be registered multiple times.
+
+ <p>When the <dfn id=removeeventsource
+ title=dom-RemoteEventTarget-removeEventSource><code>removeEventSource(<var
+ title="">src</var>)</code></dfn> method is invoked, the user agent must
+ remove the URI specified in <var title="">src</var> from the <a
+ href="#list-of3" title=concept-event-source-list>list of event sources</a>
+ for that object. If the same URI has been registered multiple times,
+ removing it must only remove one instance of that URI for each invocation
+ of the <code title=removeEventSource>removeEventSource()</code> method.
+
+ <p>Relative URIs must be resolved relative to <span
+ class=big-issue>...</span>.
+
+ <h4 id=connecting><span class=secno>6.2.2. </span>Connecting to an event
+ stream</h4>
+
+ <p>Each object implementing the <code>EventTarget</code> and <code><a
+ href="#remoteeventtarget0">RemoteEventTarget</a></code> interfaces has a
+ <dfn id=list-of3 title=concept-event-source-list>list of event
+ sources</dfn> that are registered for that object.
+
+ <p>When a new URI is added to this list, the user agent should, as soon as
+ all currently executing scripts (if any) have finished executing, and if
+ the specified URI isn't removed from the list before they do so, fetch the
+ resource identified by that URI.
+
+ <p>When an event source is removed from the list of event sources for an
+ object, if that resource is still being fetched, then the relevant
+ connection must be closed.
+
+ <p>Since connections established to remote servers for such resources are
+ expected to be long-lived, UAs should ensure that appropriate buffering is
+ used. In particular, while line buffering may be safe if lines are defined
+ to end with a single U+000A LINE FEED character, block buffering or line
+ buffering with different expected line endings can cause delays in event
+ dispatch.
+
+ <p>In general, the semantics of the transport protocol specified by the
+ URIs for the event sources must be followed, including HTTP caching rules.
+
+ <p>For HTTP connections, the <code title="">Accept</code> header may be
+ included; if included, it must only contain formats of event framing that
+ are supported by the user agent (one of which must be
+ <code>application/x-dom-event-stream</code>, as described below).
+
+ <p>Other formats of event framing may also be supported in addition to
+ <code>application/x-dom-event-stream</code>, but this specification does
+ not define how they are to be parsed or processed.
+
+ <p class=note>Such formats could include systems like SMS-push; for example
+ servers could use <code title="">Accept</code> headers and HTTP redirects
+ to an SMS-push mechanism as a kind of protocol negotiation to reduce
+ network load in GSM environments.
+
+ <p>User agents should use the <code>Cache-Control: no-cache</code> header
+ in requests to bypass any caches for requests of event sources.
+
+ <p>For connections to domains other than the <a href="#domain0">document's
+ domain</a>, the semantics of the Access-Control HTTP header must be
+ followed. <a href="#refsACCESSCONTROL">[ACCESSCONTROL]</a>
+
+ <p>HTTP 200 OK responses with a <a href="#content-type8">Content-Type</a>
+ header specifying the type <code>application/x-dom-event-stream</code>
+ that are either from the <a href="#domain0">document's domain</a> or
+ explicitly allowed by the Access-Control HTTP headers must be processed
+ line by line <a href="#event-stream-interpretation">as described
+ below</a>.
+
+ <p>For the purposes of such successfully opened event streams only, user
+ agents should ignore HTTP cache headers, and instead assume that the
+ resource indicates that it does not wish to be cached.
+
+ <p>If such a resource completes loading (i.e. the entire HTTP response body
+ is received or the connection itself closes), the user agent should
+ request the event source resource again after a delay of approximately
+ five seconds.
+
+ <p>HTTP 200 OK responses that have a <a
+ href="#content-type8">Content-Type</a> other than
+ <code>application/x-dom-event-stream</code> (or some other supported
+ type), and HTTP responses whose Access-Control headers indicate that the
+ resource are not to be used, must be ignored and must prevent the user
+ agent from refetching the resource for that event source.
+
+ <p>HTTP 201 Created, 202 Accepted, 203 Non-Authoritative Information, and
+ 206 Partial Content responses must be treated like HTTP 200 OK responses
+ for the purposes of reopening event source resources. They are, however,
+ likely to indicate an error has occurred somewhere and may cause the user
+ agent to emit a warning.
+
+ <p>HTTP 204 No Content, and 205 Reset Content responses must be treated as
+ if they were 200 OK responses with the right MIME type but no content, and
+ should therefore cause the user agent to refetch the resource after a
+ short delay.
+
+ <p>HTTP 300 Multiple Choices responses should be handled automatically if
+ possible (treating the responses as if they were 302 Found responses
+ pointing to the appropriate resource), and otherwise must be treated as
+ HTTP 404 responses.
+
+ <p>HTTP 301 Moved Permanently responses must cause the user agent to
+ reconnect using the new server specified URI instead of the previously
+ specified URI for all subsequent requests for this event source. (It
+ doesn't affect other event sources with the same URI unless they also
+ receive 301 responses, and it doesn't affect future sessions, e.g. if the
+ page is reloaded.)
+
+ <p>HTTP 302 Found, 303 See Other, and 307 Temporary Redirect responses must
+ cause the user agent to connect to the new server-specified URI, but if
+ the user agent needs to again request the resource at a later point, it
+ must return to the previously specified URI for this event source.
+
+ <p>HTTP 304 Not Modified responses should be handled like HTTP 200 OK
+ responses, with the content coming from the user agent cache. A new
+ request should then be made after a short delay of approximately five
+ seconds.
+
+ <p>HTTP 305 Use Proxy, HTTP 401 Unauthorized, and 407 Proxy Authentication
+ Required should be treated transparently as for any other subresource.
+
+ <p>Any other HTTP response code not listed here should cause the user agent
+ to stop trying to process this event source.</p>
+ <!--
+ including: HTTP 400 Bad Request, 403 Forbidden, 404 Not Found, 405
+ Method Not Allowed, 406 Not Acceptable, 408 Request Timeout, 409
+ Conflict, 410 Gone, 411 Length Required, 412 Precondition Failed,
+ 413 Request Entity Too Large, 414 Request-URI Too Long, 415
+ Unsupported Media Type, 416 Requested Range Not Satisfiable, 417
+ Expectation Failed, 500 Internal Server Error, 501 Not Implemented,
+ 502 Bad Gateway, 503 Service Unavailable, 504 Gateway Timeout, and
+ 505 HTTP Version Not Supported responses -->
+
+ <p>DNS errors must be considered fatal, and cause the user agent to not
+ open any connection for that event source.
+
+ <p>For non-HTTP protocols, UAs should act in equivalent ways.
+
+ <h4 id=parsing0><span class=secno>6.2.3. </span>Parsing an event stream</h4>
+
+ <p>This event stream format's MIME type is
+ <code>application/x-dom-event-stream</code>.
+
+ <p>The event stream format is (in pseudo-BNF):
+
+ <pre>&lt;stream&gt; ::= &lt;bom&gt;? &lt;event&gt;*
+&lt;event&gt; ::= [ &lt;comment&gt; | &lt;command&gt; | &lt;field&gt; ]* &lt;newline&gt;
+&lt;comment&gt; ::= ';' &lt;any-char&gt;* &lt;newline&gt;
+&lt;command&gt; ::= ':' &lt;any-char&gt;* &lt;newline&gt;
+&lt;field&gt; ::= &lt;name&gt; [ ':' &lt;space&gt;? &lt;any-char&gt;* ]? &lt;newline&gt;
+&lt;name&gt; ::= &lt;name-start-char&gt; &lt;name-char&gt;*
+
+# characters:
+&lt;bom&gt; ::= a single U+FEFF BYTE ORDER MARK character
+&lt;space&gt; ::= a single U+0020 SPACE character (' ')
+&lt;newline&gt; ::= a U+000D CARRIAGE RETURN character
+ followed by a U+000A LINE FEED character
+ | a single U+000D CARRIAGE RETURN character
+ | a single U+000A LINE FEED character
+ | the end of the file
+&lt;name-start-char&gt; ::= a single Unicode character other than
+ ':', ';', U+000D CARRIAGE RETURN and U+000A LINE FEED
+&lt;name-char&gt; ::= a single Unicode character other than
+ ':', U+000D CARRIAGE RETURN and U+000A LINE FEED
+&lt;any-char&gt; ::= a single Unicode character other than
+ U+000D CARRIAGE RETURN and U+000A LINE FEED
+</pre>
+
+ <p>Event streams in this format must always be encoded as UTF-8. Lines must
+ be separated by either a U+000D CARRIAGE RETURN U+000A LINE FEED (CRLF)
+ character pair, a single U+000A LINE FEED (LF) character, or a single
+ U+000D CARRIAGE RETURN (CR) character. User agents must treat those three
+ variants as equivalent line terminators.
+
+ <p>Bytes that are not valid UTF-8 sequences must be interpreted as the
+ U+FFFD REPLACEMENT CHARACTER.
+
+ <p>A leading U+FEFF BYTE ORDER MARK character must be ignored if present.
+
+ <p>The stream must then be parsed by reading everything line by line, in
+ blocks separated by blank lines. Comment lines (those starting with the
+ character ';') and command lines (those starting with the character ':')
+ must be ignored.
+
+ <p>Command lines are reserved for future extensions.</p>
+ <!--XXX Lachlan says:
+ For the next version of server sent events, it might be useful if
+ authors could specify the retry interval for connections.
+
+ e.g. In cases where it is known that content is only updated at
+ specified intervals (e.g. once per minute), having the browser retry
+ the connection every 5 seconds and fire the event with the same
+ content may be excessive.
+
+ This could possibly be done using the :command syntax.
+
+ e.g.
+ Event: click
+ :retry 60000
+
+ The browser would then retry the connection in 60,000 milliseconds (1
+ minute).
+ -->
+
+ <p>For each non-blank, non-comment, non-command line, the field name must
+ first be taken. This is everything on the line up to but not including the
+ first colon (':') or the line terminator, whichever comes first. Then, if
+ there was a colon, the data for that line must be taken. This is
+ everything after the colon, ignoring a single space after the colon if
+ there is one, up to the end of the line. If there was no colon the data is
+ the empty string.
+
+ <div class=example>
+ <p>Examples:</p>
+
+ <pre>Field name:&nbsp;Field data</pre>
+
+ <pre>This is a blank field</pre>
+
+ <pre>1. These two lines:&nbsp;have the same data
+2. These two lines:have the same data</pre>
+
+ <pre>1. But these two lines:&nbsp; do not
+2. But these two lines:&nbsp;do not</pre>
+ </div>
+
+ <p>If a field name occurs multiple times in a block, the value for that
+ field in that black must consist of the data parts for each of those
+ lines, concatenated with U+000A LINE FEED characters between them
+ (regardless of what the line terminators used in the stream actually are).
+
+ <div class=example>
+ <p>For example, the following block:</p>
+
+ <pre>Test:&nbsp;Line 1
+Foo:&nbsp;&nbsp;Bar
+Test:&nbsp;Line 2</pre>
+
+ <p>...is treated as having two fields, one called <code>Test</code> with
+ the value "<code>Line 1\nLine 2</code>" (where <code>\n</code> represents
+ a newline), and one called <code>Foo</code> with the value
+ "<code>&nbsp;Bar</code>" (note the leading space character).</p>
+ </div>
+
+ <p>A block thus consists of all the name-value pairs for its fields.
+ Command lines have no effect on blocks and are not considered part of a
+ block.
+
+ <p class=note>Since any random stream of characters matches the above
+ format, there is no need to define any error handling.
+
+ <h4 id=event-stream-interpretation><span class=secno>6.2.4.
+ </span>Interpreting an event stream</h4>
+
+ <p>Once the fields have been parsed, they are interpreted as follows (these
+ are case-sensitive exact comparisons):
+
+ <dl>
+ <dt><code title="">Event</code> field
+
+ <dd>
+ <p>This field gives the name of the event. For example, <code
+ title="">load</code>, <code title="">DOMActivate</code>, <code
+ title="">updateTicker</code>. If there is no field with this name, the
+ name <code title=event-message><a href="#message">message</a></code>
+ must be used.
+
+ <dt><code title="">Namespace</code> field
+
+ <dd>
+ <p>This field gives the DOM3 namespace for the event. (For normal DOM
+ events this would be null.) If it isn't specified the event namespace is
+ null.
+
+ <dt><code title="">Class</code> field
+
+ <dd>
+ <p>This field gives is the interface used for the event, for instance
+ <code>Event</code>, <code>UIEvent</code>, <code>MutationEvent</code>,
+ <code>KeyboardEvent</code>, etc. For compatibility with DOM3 Events, the
+ values <code title="">UIEvents</code>, <code
+ title="">MouseEvents</code>, <code title="">MutationEvents</code>, and
+ <code title="">HTMLEvents</code> are valid values and must be treated
+ respectively as meaning the interfaces <code>UIEvent</code>,
+ <code>MouseEvent</code>, <code>MutationEvent</code>, and
+ <code>Event</code>. (This value can therefore be used as the argument to
+ <code title="">createEvent()</code>.)</p>
+
+ <p>If the value is not specified but the <code title="">Namespace</code>
+ is null and the <code title="">Event</code> field exactly matches one of
+ the events specified by DOM3 Events in <a
+ href="http://www.w3.org/TR/DOM-Level-3-Events/events.html#Events-EventTypes-complete">section
+ 1.4.2 "Complete list of event types"</a>, then the interface used must
+ default to the interface relevant for that event type. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a></p>
+
+ <div class=example>
+ <p>For example:</p>
+
+ <pre>Event: click</pre>
+
+ <p>...would cause <code title="">Class</code> to be treated as
+ <code>MouseEvent</code>.</p>
+ </div>
+
+ <p>If the <code title="">Namespace</code> is null and the <code
+ title="">Event</code> field is <code title=event-message><a
+ href="#message">message</a></code> (including if it was not specified
+ explicitly), then the <code><a
+ href="#messageevent">MessageEvent</a></code> interface must be used.</p>
+
+ <p>Otherwise, the <code>Event</code> interface must be used.</p>
+
+ <p>It is quite possible to give the wrong class for an event. This is
+ equivalent to creating an event in the DOM using the DOM Event APIs, but
+ using the wrong interface for it.</p>
+
+ <dt><code title="">Bubbles</code> field
+
+ <dd>
+ <p>This field specifies whether the event is to bubble. If it is
+ specified and has the value <code title="">No</code>, the event must not
+ bubble. If it is specified and has any other value (including <code
+ title="">no</code> or <code title="">NO</code>) then the event must
+ bubble.</p>
+
+ <p>If it is not specified but the <code title="">Namespace</code> field
+ is null and the <code title="">Event</code> field exactly matches one of
+ the events specified by DOM3 Events in <a
+ href="http://www.w3.org/TR/DOM-Level-3-Events/events.html#Events-EventTypes-complete">section
+ 1.4.2 "Complete list of event types"</a>, then the event must bubble if
+ the DOM3 Events spec specifies that that event bubbles, and musn't
+ bubble if it specifies it does not. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a></p>
+
+ <div class=example>
+ <p>For example:</p>
+
+ <pre>Event: load</pre>
+
+ <p>...would cause <code title="">Bubbles</code> to be treated as <code
+ title="">No</code>.</p>
+ </div>
+
+ <p>Otherwise, the event must bubble.</p>
+
+ <dt><code title="">Cancelable</code> field
+
+ <dd>
+ <p>This field specifies whether the event can have its default action
+ prevented. If it is specified and has the value <code
+ title="">No</code>, the event must not be cancelable. If it is specified
+ and has any other value (including <code title="">no</code> or <code
+ title="">NO</code>) then the event must be cancelable.</p>
+
+ <p>If it is not specified, but the <code title="">Namespace</code> field
+ is null and the <code title="">Event</code> field exactly matches one of
+ the events specified by DOM3 Events in <a
+ href="http://www.w3.org/TR/DOM-Level-3-Events/events.html#Events-EventTypes-complete">section
+ 1.4.2 "Complete list of event types"</a>, then the event must be
+ cancelable if the DOM3 Events specification specifies that it is, and
+ must not be cancelable otherwise. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a></p>
+
+ <div class=example>
+ <p>For example:</p>
+
+ <pre>Event: load</pre>
+
+ <p>...would cause <code title="">Cancelable</code> to be treated as
+ <code title="">No</code>.</p>
+ </div>
+
+ <p>Otherwise, the event must be cancelable.</p>
+
+ <dt><code title="">Target</code> field
+
+ <dd>
+ <p>This field gives the node that the event is to be dispatched on.</p>
+
+ <p>If the object for which the event source is being processed is not a
+ Node, but the <code title="">Target</code> field is nonetheless
+ specified, then the event must be dropped.</p>
+
+ <p>Otherwise, if field is specified and its value starts with a <code
+ title="">#</code> character, then the remainder of the value represents
+ an ID, and the event must be dispatched on the same node as would be
+ obtained by the <code title="">getElementById()</code> method on the
+ <code title="">ownerDocument</code> of the node whose event source is
+ being processed.</p>
+
+ <div class=example>
+ <p>For example,</p>
+
+ <pre>Target: #test</pre>
+
+ <p>...would target the element with ID <code title="">test</code>.</p>
+ </div>
+
+ <p>Otherwise, if the field is specified and its value is the literal
+ string "<code title="">Document</code>", then the event must be
+ dispatched at the <code title="">ownerDocument</code> of the node whose
+ event source is being processed.</p>
+
+ <p>Otherwise, the field (whether specified or not) is ignored and the
+ event must be dispatched at the object itself.</p>
+ </dl>
+
+ <p>Other fields depend on the interface specified (or possibly implied) by
+ the <code title="">Class</code> field. If the specified interface has an
+ attribute that exactly matches the name of the field, and the value of the
+ field can be converted (using the type conversions defined in ECMAScript)
+ to the type of the attribute, then it must be used. Any attributes (other
+ than the <code>Event</code> interface attributes) that do not have
+ matching fields are initialised to zero, null, false, or the empty string.
+
+ <div class=example>
+ <p>For example:</p>
+
+ <pre>Event: click
+Class: MouseEvent
+button: 2</pre>
+
+ <p>...would result in a 'click' event using the <code>MouseEvent</code>
+ interface that has <code>button</code> set to <code title="">2</code> but
+ <code>screenX</code>, <code>screenY</code>, etc, set to 0, false, or null
+ as appropriate.</p>
+ </div>
+
+ <p>If a field does not match any of the attributes on the event, it must be
+ ignored.
+
+ <div class=example>
+ <p>For example:</p>
+
+ <pre>Event: keypress
+Class: MouseEvent
+keyIdentifier: 0</pre>
+
+ <p>...would result in a <code>MouseEvent</code> event with its fields all
+ at their default values, with the event name being <code
+ title="">keypress</code>. The <code title="">keyIdentifier</code> field
+ would be ignored. (If the author had not included the <code
+ title="">Class</code> field explicitly, it would have just worked, since
+ the class would have defaulted as described above.)</p>
+ </div>
+
+ <p>Once a blank line or the end of the file is reached, an event of the
+ type and namespace given by the <code title="">Event</code> and
+ <code>Namespace</code> fields respectively must be synthesized and
+ dispatched to the appropriate node as described by the fields above. No
+ event must be dispatched until a blank line has been received or the end
+ of the file reached.
+
+ <p>The event must be dispatched as if using the DOM <code
+ title="">dispatchEvent()</code> method. Thus, if the <code
+ title="">Event</code> field was omitted, leaving the name as the empty
+ string, or if the name had invalid characters, then the dispatching of the
+ event fails.
+
+ <p>Events fired from event sources do not have user-agent default actions.
+
+ <div class=example>
+ <p>The following event stream, once followed by a blank line:</p>
+
+ <pre>data: YHOO
+data: -2
+data: 10</pre>
+
+ <p>...would cause an event <code title=event-message><a
+ href="#message">message</a></code> with the interface <code><a
+ href="#messageevent">MessageEvent</a></code> to be dispatched on the
+ <code><a href="#event-source">event-source</a></code> element, which
+ would then bubble up the DOM, and whose <code
+ title=dom-MessageEvent-data><a href="#data4">data</a></code> attribute
+ would contain the string <code>YHOO\n-2\n10</code> (where <code>\n</code>
+ again represents a newline).</p>
+
+ <p>This could be used as follows:
+
+ <pre>&lt;event-source src="http://stocks.example.com/ticker.php"
+ onmessage="var data = event.data.split('\n'); updateStocks(data[0], data[1], data[2]);"&gt;</pre>
+
+ <p>...where <code title="">updateStocks()</code> is a function defined as:</p>
+
+ <pre>function updateStocks(symbol, delta, value) { ... }</pre>
+
+ <p>...or some such.</p>
+ </div>
+
+ <div class=example>
+ <p>The following stream contains four blocks and therefore fires four
+ events. The first block has just a comment, and will fire a <code
+ title=event-message><a href="#message">message</a></code> event with all
+ the fields set to the empty string or null. The second block has two
+ fields with names "load" and "Target" respectively; since there is no
+ "<code title="">load</code>" member on the <code><a
+ href="#messageevent">MessageEvent</a></code> object that field is
+ ignored, leaving the event as a second <code title=event-message><a
+ href="#message">message</a></code> event with all the fields set to the
+ empty string or null, but this time the event is targetted at an element
+ with ID "image1". The third block is empty (no lines between two blank
+ lines), and the fourth block has only two comments, so they both yet
+ again fire <code title=event-message><a
+ href="#message">message</a></code> events with all the fields set to the
+ empty string or null.</p>
+
+ <pre>; test
+
+load
+Target: #image1
+
+
+; if any more events follow this block, they will not be affected by
+; the "Target" and "load" fields above.
+</pre>
+ </div>
+
+ <h4 id=notes><span class=secno>6.2.5. </span>Notes</h4>
+
+ <p>Legacy proxy servers are known to, in certain cases, drop HTTP
+ connections after a short timeout. To protect against such proxy servers,
+ authors can include a comment line (one starting with a ';' character)
+ every 15 seconds or so.
+
+ <p>Authors wishing to relate event source connections to each other or to
+ specific documents previously served might find that relying on IP
+ addresses doesn't work, as individual clients can have multiple IP
+ addresses (due to having multiple proxy servers) and individual IP
+ addresses can have multiple clients (due to sharing a proxy server). It is
+ better to include a unique identifier in the document when it is served
+ and then pass that identifier as part of the URI in the <code
+ title=attr-event-source-src><a href="#src11">src</a></code> attribute of
+ the <code><a href="#event-source">event-source</a></code> element.
+
+ <p>Implementations that support HTTP's per-server connection limitation
+ might run into trouble when opening multiple pages from a site if each
+ page has an <code><a href="#event-source">event-source</a></code> to the
+ same domain. Authors can avoid this using the relatively complex mechanism
+ of using unique domain names per connection, or by allowing the user to
+ enable or disable the <code><a
+ href="#event-source">event-source</a></code> functionality on a per-page
+ basis.
+
+ <h3 id=network><span class=secno>6.3. </span>Network connections</h3>
+
+ <p>To enable Web applications to communicate with each other in local area
+ networks, and to maintain bidirectional communications with their
+ originating server, this specification introduces the <code><a
+ href="#connection0">Connection</a></code> interface.
+
+ <p>The <code><a href="#window">Window</a></code> interface provides three
+ constructors for creating <code><a
+ href="#connection0">Connection</a></code> objects: <code
+ title=dom-TCPConnection><a
+ href="#tcpconnection">TCPConnection()</a></code>, for creating a direct
+ (possibly encrypted) link to another node on the Internet using TCP/IP;
+ <code title=dom-LocalBroadcastConnection><a
+ href="#localbroadcastconnection">LocalBroadcastConnection()</a></code>,
+ for creating a connection to any listening peer on a local network (which
+ could be a local TCP/IP subnet using UDP, a Bluetooth PAN, or another kind
+ of network infrastructure); and <code title=dom-PeerToPeerConnection><a
+ href="#peertopeerconnection">PeerToPeerConnection()</a></code>, for a
+ direct peer-to-peer connection (which could again be over TCP/IP,
+ Bluetooth, IrDA, or some other type of network).
+
+ <p class=note>This interface does not allow for raw access to the
+ underlying network. For example, this interface could not be used to
+ implement an IRC client without proxying messages through a custom server.
+
+ <h4 id=network-intro><span class=secno>6.3.1. </span>Introduction</h4>
+
+ <p><em>This section is non-normative.</em>
+
+ <p class=big-issue>An introduction to the client-side and server-side of
+ using the direct connection APIs.
+
+ <p class=big-issue>An example of a party-line implementation of a broadcast
+ service, and direct peer-to-peer chat for direct local connections.</p>
+ <!--
+ <div class="example">
+ <p>The following script creates a connection to a local party
+ line:</p>
+ <pre>var a = new LocalBroadcastConnection();
+ a.onread = function(e) { alert(e.source + ' wrote ' + e.data); }
+ a.send('hello');</pre>
+ </div>
+-->
+ <!--XXX
+ Explain why we don't use HTTP instead of our own protocol: wouldn't
+ work for peer-to-peer, too much work to implement server if you
+ have to implement a compliant HTTP server as well, etc
+ -->
+
+ <h4 id=the-connection><span class=secno>6.3.2. </span>The <code><a
+ href="#connection0">Connection</a></code> interface</h4>
+
+ <pre class=idl>interface <dfn id=connection0>Connection</dfn> {
+ readonly attribute DOMString <a href="#network1" title=dom-Connection-network>network</a>;
+ readonly attribute DOMString <a href="#peer" title=dom-Connection-peer>peer</a>;
+ readonly attribute int <a href="#readystate0" title=dom-Connection-readyState>readyState</a>;
+ attribute EventListener <a href="#onopen" title=dom-Connection-onopen>onopen</a>;
+ attribute EventListener <a href="#onread" title=dom-Connection-onread>onread</a>;
+ attribute EventListener <a href="#onclose" title=dom-Connection-onclose>onclose</a>;
+ void <a href="#send" title=dom-Connection-send>send</a>(in DOMString data);
+ void <a href="#disconnect" title=dom-Connection-disconnect>disconnect</a>();
+};</pre>
+
+ <p><code><a href="#connection0">Connection</a></code> objects must also
+ implement the <code>EventTarget</code> interface. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>When a <code><a href="#connection0">Connection</a></code> object is
+ created, the UA must try to establish a connection, as described in the
+ sections below describing each connection type.
+
+ <p>The <dfn id=network1
+ title=dom-Connection-network><code>network</code></dfn> attribute
+ represents the name of the network connection (the value depends on the
+ kind of connection being established). The <dfn id=peer
+ title=dom-Connection-peer><code>peer</code></dfn> attribute identifies the
+ remote host for direct (non-broadcast) connections.
+
+ <p>The <code title=dom-Connection-network><a
+ href="#network1">network</a></code> attribute must be set as soon as the
+ <code><a href="#connection0">Connection</a></code> object is created, and
+ keeps the same value for the lifetime of the object. The <code
+ title=dom-Connection-peer><a href="#peer">peer</a></code> attribute must
+ initially be set to the empty string and must be updated once, when the
+ connection is established, after which point it must keep the same value
+ for the lifetime of the object.
+
+ <p>The <dfn id=readystate0
+ title=dom-Connection-readyState><code>readyState</code></dfn> attribute
+ represents the state of the connection. When the object is created it must
+ be set to 0. It can have the following values:
+
+ <dl>
+ <dt>0 Connecting
+
+ <dd>The connection has not yet been established.
+
+ <dt>1 Connected
+
+ <dd>The connection is established and communication is possible.
+
+ <dt>2 Closed
+
+ <dd>The connection has been closed.
+ </dl>
+
+ <p id=openConnection>Once a connection is established, the <code
+ title=dom-Connection-readyState><a
+ href="#readystate0">readyState</a></code> attribute's value must be
+ changed to 1, and the <code title=event-connection-open><a
+ href="#open3">open</a></code> event must be fired on the <code><a
+ href="#connection0">Connection</a></code> object.
+
+ <p>When data is received, the <code title=event-connection-read><a
+ href="#read">read</a></code> event will be fired on the <code><a
+ href="#connection0">Connection</a></code> object.</p>
+ <!-- conf crit for this
+ statement is in the various protocol-specific sections below. -->
+
+ <p id=closeConnection>When the connection is closed, the <code
+ title=dom-Connection-readyState><a
+ href="#readystate0">readyState</a></code> attribute's value must be
+ changed to 2, and the <code title=event-connection-close><a
+ href="#close0">close</a></code> event must be fired on the <code><a
+ href="#connection0">Connection</a></code> object.
+
+ <p>The <dfn id=onopen
+ title=dom-Connection-onopen><code>onopen</code></dfn>, <dfn id=onread
+ title=dom-Connection-onread><code>onread</code></dfn>, and <dfn id=onclose
+ title=dom-Connection-onclose><code>onclose</code></dfn> attributes must,
+ when set, register their new value as an event listener for their
+ respective events (namely <code title=event-connection-open><a
+ href="#open3">open</a></code>, <code title=event-connection-read><a
+ href="#read">read</a></code>, and <code title=event-connection-close><a
+ href="#close0">close</a></code>), and unregister their previous value if
+ any.
+
+ <p>The <dfn id=send title=dom-Connection-send><code>send()</code></dfn>
+ method transmits data using the connection. If the connection is not yet
+ established, it must raise an <code>INVALID_STATE_ERR</code> exception. If
+ the connection <em>is</em> established, then the behaviour depends on the
+ connection type, as described below.
+
+ <p>The <dfn id=disconnect
+ title=dom-Connection-disconnect><code>disconnect()</code></dfn> method
+ must close the connection, if it is open. If the connection is already
+ closed, it must do nothing. Closing the connection causes a <code
+ title=event-connection-close><a href="#close0">close</a></code> event to
+ be fired and the <code title=dom-Connection-readyState><a
+ href="#readystate0">readyState</a></code> attribute's value to change, as
+ <a href="#closeConnection">described above</a>.
+
+ <h4 id=connection><span class=secno>6.3.3. </span>Connection Events</h4>
+
+ <p>All the events described in this section are events in no namespace,
+ which do not bubble, are not cancelable, and have no default action.
+
+ <p>The <dfn id=open3 title=event-connection-open><code>open</code></dfn>
+ event is fired when the connection is established. UAs must use the normal
+ <code>Event</code> interface when firing this event.
+
+ <p>The <dfn id=close0 title=event-connection-close><code>close</code></dfn>
+ event is fired when the connection is closed (whether by the author,
+ calling the <code title=dom-Connection-disconnect><a
+ href="#disconnect">disconnect()</a></code> method, or by the server, or by
+ a network error). UAs must use the normal <code>Event</code> interface
+ when firing this event as well.
+
+ <p class=note>No information regarding why the connection was closed is
+ passed to the application in this version of this specification.
+
+ <p>The <dfn id=read title=event-connection-read><code>read</code></dfn>
+ event is fired when when data is received for a connection. UAs must use
+ the <code><a href="#connectionreadevent">ConnectionReadEvent</a></code>
+ interface for this event.
+
+ <pre
+ class=idl>interface <dfn id=connectionreadevent>ConnectionReadEvent</dfn> : Event {
+ readonly attribute DOMString <a href="#data5" title=dom-ConnectionReadEvent-data>data</a>;
+ readonly attribute DOMString <a href="#source2" title=dom-ConnectionReadEvent-source>source</a>;
+ void <a href="#initconnectionreadevent" title=dom-ConnectionReadEvent-initConnectionReadEvent>initConnectionReadEvent</a>(in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMString dataArg);
+ void <a href="#initconnectionreadeventns" title=dom-ConnectionReadEvent-initConnectionReadEventNS>initConnectionReadEventNS</a>(in DOMString namespaceURI, in DOMString typeArg, in boolean canBubbleArg, in boolean cancelableArg, in DOMString dataArg);
+};
+</pre>
+
+ <p>The <dfn id=initconnectionreadevent
+ title=dom-ConnectionReadEvent-initConnectionReadEvent><code>initConnectionReadEvent()</code></dfn>
+ and <dfn id=initconnectionreadeventns
+ title=dom-ConnectionReadEvent-initConnectionReadEventNS><code>initConnectionReadEventNS()</code></dfn>
+ methods must initialise the event in a manner analogous to the
+ similarly-named methods in the DOM3 Events interfaces. <a
+ href="#refsDOM3EVENTS">[DOM3EVENTS]</a>
+
+ <p>The <dfn id=data5
+ title=dom-ConnectionReadEvent-data><code>data</code></dfn> attribute
+ represents the data that was transmitted from the peer.
+
+ <p>The <dfn id=source2
+ title=dom-ConnectionReadEvent-source><code>source</code></dfn> attribute
+ represents the name of the peer. This is primarily useful on broadcast
+ connections; on direct connections it is equal to the <code
+ title=dom-Connection-peer><a href="#peer">peer</a></code> attribute on the
+ <code><a href="#connection0">Connection</a></code> object.</p>
+ <!-- XXX check that the following three sections define "the data
+ that was transmitted" and "the name of the peer" in terms that mean
+ they fit into the above definitions ("for the purposes of the
+ ConnectionReadEvent"), and check they say that they MUST be set
+ correctly. -->
+ <!-- XXX should we have a Connection attribute on the event? -->
+
+ <p>Events that would be fired during script execution (e.g. between the
+ connection object being created &mdash; and thus the connection being
+ established &mdash; and the current script completing; or, during the
+ execution of a <code title=event-connection-read><a
+ href="#read">read</a></code> event handler) must be buffered, and those
+ events queued up and each one individually fired after the script has
+ completed.</p>
+ <!-- XXX make this more generic -->
+
+ <h4 id=tcp-connections><span class=secno>6.3.4. </span>TCP connections</h4>
+
+ <p>The <dfn id=tcpconnection
+ title=dom-TCPConnection><code>TCPConnection(<var title="">subdomain</var>,
+ <var title="">port</var>, <var title="">secure</var>)</code></dfn>
+ constructor on the <code><a href="#window">Window</a></code> interface
+ returns a new object implementing the <code><a
+ href="#connection0">Connection</a></code> interface, set up for a direct
+ connection to a specified host on the page's domain.
+
+ <p>When this constructor is invoked, the following steps must be followed.
+
+ <p>First, if the domain part of the script's <a href="#origin0">origin</a>
+ is not a host name (e.g. it is an IP address) then the UA must raise a <a
+ href="#security8">security exception</a>. <span class=issue>We currently
+ don't allow connections to be set up back to an originating IP address,
+ but we could, if the subdomain is the empty string.</span>
+
+ <p>Then, if the <var title="">subdomain</var> argument is null or the empty
+ string, the target host is the domain part of the script's <a
+ href="#origin0">origin</a>. Otherwise, the <var title="">subdomain</var>
+ argument is prepended to the domain part of the script's origin with a dot
+ separating the two strings, and that is the target host.
+
+ <p>If either:
+
+ <ul>
+ <li>the target host is not a valid host name, or
+
+ <li>the <var title="">port</var> argument is neither equal to 80, nor
+ equal to 443, nor greater than or equal to 1024 and less than or equal to
+ 65535,
+ </ul>
+
+ <p>...then the UA must raise a <a href="#security8">security exception</a>.</p>
+ <!-- XXX we should have our own port for this too, e.g. 980 -->
+
+ <p>Otherwise, the user agent must verify that the <a href="#the-string">the
+ string representing the script's domain in IDNA format</a> can be obtained
+ without errors. If it cannot, then the user agent must raise a <a
+ href="#security8">security exception</a>.
+
+ <p>The user agent may also raise a <a href="#security8">security
+ exception</a> at this time if, for some reason, permission to create a
+ direct TCP connection to the relevant host is denied. Reasons could
+ include the UA being instructed by the user to not allow direct
+ connections, or the UA establishing (for instance using UPnP) that the
+ network topology will cause connections on the specified port to be
+ directed at the wrong host.
+
+ <p>If no exceptions are raised by the previous steps, then a new <code><a
+ href="#connection0">Connection</a></code> object must be created, its
+ <code title=dom-Connection-peer><a href="#peer">peer</a></code> attribute
+ must be set to a string consisting of the name of the target host, a colon
+ (U+003A COLON), and the port number as decimal digits, and its <code
+ title=dom-Connection-network><a href="#network1">network</a></code>
+ attribute must be set to the same value as the <code
+ title=dom-Connection-peer><a href="#peer">peer</a></code> attribute.
+
+ <p>This object must then be returned.
+
+ <p>The user agent must then begin trying to establish a connection with the
+ target host and specified port. (This typically would begin in the
+ backgound, while the script continues to execute.)
+
+ <p>If the <var title="">secure</var> boolean argument is set to true, then
+ the user agent must establish a secure connection with the target host and
+ specified port using TLS or another protocol, negotiated with the server.
+ <a href="#refsRFC2246">[RFC2246]</a> If this fails the user agent must act
+ as if it had <a href="#closeConnection">closed the connection</a>.
+
+ <p>Once a secure connection is established, or if the <var
+ title="">secure</var> boolean argument is not set to true, then the user
+ agent must continue to connect to the server using the protocol described
+ in the section entitled <a href="#clients0">clients connecting over
+ TCP</a>. All data on connections made using TLS must be sent as
+ "application data".
+
+ <p>Once the connection is established, the UA must act as described in the
+ section entitled <a href="#sending0">sending and receiving data over
+ TCP</a>.
+
+ <p>User agents should allow multiple TCP connections to be established per
+ host. In particular, user agents should not apply per-host HTTP connection
+ limits to connections established with the <code
+ title=dom-TCPConnection><a href="#tcpconnection">TCPConnection</a></code>
+ constructor.
+
+ <h4 id=broadcast><span class=secno>6.3.5. </span>Broadcast connections</h4>
+
+ <p>The <dfn id=localbroadcastconnection
+ title=dom-LocalBroadcastConnection><code>LocalBroadcastConnection()</code></dfn>
+ constructor on the <code><a href="#window">Window</a></code> interface
+ returns a new object implementing the <code><a
+ href="#connection0">Connection</a></code> interface, set up to broadcast
+ on the local network.
+
+ <p>When this constructor is invoked, a new <code><a
+ href="#connection0">Connection</a></code> object must be created.
+
+ <p>The <code title=dom-Connection-network><a
+ href="#network1">network</a></code> attribute of the object must be set to
+ <a href="#the-string">the string representing the script's domain in IDNA
+ format</a>. If this string cannot be obtained, then the user agent must
+ raise a <a href="#security8">security exception</a> exception when the
+ constructor is called.
+
+ <p>The <code title=dom-Connection-peer><a href="#peer">peer</a></code>
+ attribute must be set to the empty string.
+
+ <p>The object must then be returned, unless, for some reason, permission to
+ broadcast on the local network is to be denied. In the latter case, a <a
+ href="#security8">security exception</a> must be raised instead. User
+ agents may deny such permission for any reason, for example a user
+ preference.
+
+ <p>If the object is returned (i.e. if no exception is raised), the user
+ agent must the begin broadcasting and listening on the local network, in
+ the background, as described below. The user agent may define "the local
+ network" in any way it considers appropriate and safe; for instance the
+ user agent may ask the user which network (e.g. Bluetooth, IrDA, Ethernet,
+ etc) the user would like to broadcast on before beginning broadcasting.
+
+ <p>UAs may broadcast and listen on multiple networks at once. For example,
+ the UA could broadcast on both Bluetooth and Wifi at the same time.</p>
+ <!-- XXX bridging? how do we handle one UA not seeing
+ the same hosts as another UA? -->
+
+ <p>As soon as the object is returned, the connection <a
+ href="#openConnection">has been established</a>, which implies that the
+ <code title=event-connection-open><a href="#open3">open</a></code> event
+ must be fired. Broadcast connections are never closed.
+
+ <h5 id=broadcasting><span class=secno>6.3.5.1. </span>Broadcasting over
+ TCP/IP</h5>
+
+ <p class=big-issue>Should we drop this altogether? Letting people fill the
+ local network with garbage seems unwise.
+
+ <p class=big-issue>We need to register a UDP port for this. For now this
+ spec refers to port 18080/udp.
+
+ <p class=note>Since this feature requires that the user agent listen to a
+ particular port, some platforms might prevent more than one user agent per
+ IP address from using this feature at any one time.
+
+ <p>On TCP/IP networks, broadcast connections transmit data using UDP over
+ port 18080.
+
+ <p>When the <code title=dom-Connection-send><a href="#send">send(<var
+ title="">data</var>)</a></code> method is invoked on a <code><a
+ href="#connection0">Connection</a></code> object that was created by the
+ <code title=dom-LocalBroadcastConnection><a
+ href="#localbroadcastconnection">LocalBroadcastConnection()</a></code>
+ constructor, the user agent must follow these steps:
+
+ <ol>
+ <li>Create a string consisting of the value of the <code
+ title=dom-Connection-network><a href="#network1">network</a></code>
+ attribute of the <code><a href="#connection0">Connection</a></code>
+ object, a U+0020 SPACE character, a U+0002 START OF TEXT character, and
+ the <var title="">data</var> argument.
+
+ <li>Encode the string as UTF-8.
+
+ <li>If the resulting byte stream is longer than 65487 bytes, raise an
+ <code>INDEX_SIZE_ERR</code> DOM exception and stop.
+
+ <li>Create a UDP packet whose data is the byte stream, with the source and
+ destination ports being 18080, and with appropriate length and checksum
+ fields. Transmit this packet to IPv4 address 255.255.255.255 or IPv6
+ address ff02::1, as appropriate. <span class=note>IPv6 applications will
+ also have to enable reception from this address.</span>
+ </ol>
+
+ <p>When a broadcast connection is opened on a TCP/IP network, the user
+ agent should listen for UDP packets on port 18080.
+
+ <p>When the user agent receives a packet on port 18080, the user agent must
+ attempt to decode that packet's data as UTF-8. If the data is not fully
+ correct UTF-8 (i.e. if there are decoding errors) then the packet must be
+ ignored. Otherwise, the user agent must check to see if the decoded string
+ contains a U+0020 SPACE character. If it does not, then the packet must
+ again be ignored (it might be a peer discovery packet from a <code
+ title=dom-PeerToPeerConnection><a
+ href="#peertopeerconnection">PeerToPeerConnection()</a></code>
+ constructor). If it does then the user agent must split the string at the
+ first space character. All the characters before the space are then known
+ as <var title="">d</var>, and all the characters after the space are known
+ as <var title="">s</var>. If <var title="">s</var> is not at least one
+ character long, or if the first character of <var title="">s</var> is not
+ a U+0002 START OF TEXT character, then the packet must be ignored. (This
+ allows for future extension of this protocol.)
+
+ <p>Otherwise, for each <code><a href="#connection0">Connection</a></code>
+ object that was created by the <code title=dom-LocalBroadcastConnection><a
+ href="#localbroadcastconnection">LocalBroadcastConnection()</a></code>
+ constructor and whose <code title=dom-Connection-network><a
+ href="#network1">network</a></code> attribute exactly matches <var
+ title="">d</var>, a <code title=event-connection-read><a
+ href="#read">read</a></code> event must be fired on the <code><a
+ href="#connection0">Connection</a></code> object. The string <var
+ title="">s</var>, with the first character removed, must be used as the
+ <code title=dom-ConnectionReadEvent-data><a href="#data5">data</a></code>,
+ and the source IP address of the packet as the <code
+ title=dom-ConnectionReadEvent-source><a href="#source2">source</a></code>.
+
+ <p class=big-issue>Making the source IP available means that if two or more
+ machines in a private network can be made to go to a hostile page
+ simultaneously, the hostile page can determine the IP addresses used
+ locally (i.e. on the other side of any NAT router). Is there some way we
+ can keep link-local IP addresses secret while still allowing for
+ applications to distinguish between multiple participants?
+
+ <h5 id=bluetooth-broadcast><span class=secno>6.3.5.2. </span>Broadcasting
+ over Bluetooth</h5>
+
+ <p class=big-issue>Does anyone know enough about Bluetooth to write this
+ section?
+
+ <h5 id=irda-broadcast><span class=secno>6.3.5.3. </span>Broadcasting over
+ IrDA</h5>
+
+ <p class=big-issue>Does anyone know enough about IrDA to write this
+ section?
+
+ <h4 id=peer-to-peer><span class=secno>6.3.6. </span>Peer-to-peer
+ connections</h4>
+
+ <p>The <dfn id=peertopeerconnection
+ title=dom-PeerToPeerConnection><code>PeerToPeerConnection()</code></dfn>
+ constructor on the <code><a href="#window">Window</a></code> interface
+ returns a new object implementing the <code><a
+ href="#connection0">Connection</a></code> interface, set up for a direct
+ connection to a user-specified host.
+
+ <p>When this constructor is invoked, a new <code><a
+ href="#connection0">Connection</a></code> object must be created.
+
+ <p>The <code title=dom-Connection-network><a
+ href="#network1">network</a></code> attribute of the object must be set to
+ <a href="#the-string">the string representing the script's domain in IDNA
+ format</a>. If this string cannot be obtained, then the user agent must
+ raise a <a href="#security8">security exception</a> exception when the
+ constructor is called.
+
+ <p>The <code title=dom-Connection-peer><a href="#peer">peer</a></code>
+ attribute must be set to the empty string.
+
+ <p>The object must then be returned, unless, for some reason, permission to
+ establish peer-to-peer connections is generally disallowed, for example
+ due to administrator settings. In the latter case, a <a
+ href="#security8">security exception</a> must be raised instead.
+
+ <p>The user agent must then, typically while the script resumes execution,
+ find a remote host to establish a connection to. To do this it must start
+ broadcasting and listening for peer discovery messages and listening for
+ incoming connection requests on all the supported networks. How this is
+ performed depends on the type of network and is described below.
+
+ <p>The UA should inform the user of the clients that are detected, and
+ allow the user to select one to connect to. UAs may also allow users to
+ explicit specify hosts that were not detected, e.g. by having the user
+ enter an IP address.
+
+ <p>If an incoming connection is detected before the user specifies a target
+ host, the user agent should ask the user to confirm that this is the host
+ they wish to connect to. If it is, the connection should be accepted and
+ the UA will act as the <em>server</em> in this connection. (Which UA acts
+ as the server and which acts as the client is not discernible at the DOM
+ API level.)
+
+ <p>If no incoming connection is detected and if the user specifies a
+ particular target host, a connection should be established to that host,
+ with the UA acting as the <em>client</em> in the connection.
+
+ <p>No more than one connection must be established per <code><a
+ href="#connection0">Connection</a></code> object, so once a connection has
+ been established, the user agent must stop listening for further
+ connections (unless, or until such time as, another <code><a
+ href="#connection0">Connection</a></code> object is being created).
+
+ <p>If at any point the user cancels the connection process or the remote
+ host refuses the connection, then the user agent must act as if it had <a
+ href="#closeConnection">closed the connection</a>, and stop trying to
+ connect.
+
+ <h5 id=peer-to-peer0><span class=secno>6.3.6.1. </span>Peer-to-peer
+ connections over TCP/IP</h5>
+
+ <p class=big-issue>Should we replace this section with something that uses
+ Rendez-vous/zeroconf or equivalent?
+
+ <p class=big-issue>We need to register ports for this. For now this spec
+ refers to port 18080/udp and 18080/tcp.
+
+ <p class=note>Since this feature requires that the user agent listen to a
+ particular port, some platforms might prevent more than one user agent per
+ IP address from using this feature at any one time.
+
+ <p>When using TCP/IP, broadcasting peer discovery messages must be done by
+ creating UDP packets every few seconds containing as their data the value
+ of the connection's <code title=dom-Connection-network><a
+ href="#network1">network</a></code> attribute, encoded as UTF-8, with the
+ source and destination ports being set to 18080 and appropriate length and
+ checksum fields, and sending these packets to address (in IPv4)
+ 255.255.255.255 or (in IPv6) ff02::1, as appropriate.
+
+ <p>Listening for peer discovery messages must be done by examining incoming
+ UDP packets on port 18080. <span class=note>IPv6 applications will also
+ have to enable reception from the ff02::1 address.</span> If their payload
+ is exactly byte-for-byte equal to a UTF-8 encoded version of the value of
+ the connection's <code title=dom-Connection-network><a
+ href="#network1">network</a></code> attribute, then the source address of
+ that packet represents the address of a host that is ready to accept a
+ peer-to-peer connection, and it should therefore be offered to the user.
+
+ <p>Incoming connection requests must be listened for on TCP port 18080. If
+ an incoming connection is received, the UA must act as a <em>server</em>,
+ as described in the section entitled <a href="#servers0">servers accepting
+ connections over TCP</a>.
+
+ <p>If no incoming connection requests are accepted and the user instead
+ specifies a target host to connect to, the UA acts as a <em>client</em>:
+ the user agent must attempt to connect to the user-specified host on port
+ 18080, as described in the section entitled <a href="#clients0">clients
+ connecting over TCP</a>.
+
+ <p>Once the connection is established, the UA must act as described in the
+ section entitled <a href="#sending0">sending and receiving data over
+ TCP</a>.
+
+ <p class=note>This specification does not include a way to establish
+ <em>secure</em> (encrypted) peer-to-peer connections at this time. <span
+ class=big-issue>If you can see a good way to do this, let me know.</span>
+
+ <h5 id=bluetooth-peer><span class=secno>6.3.6.2. </span>Peer-to-peer
+ connections over Bluetooth</h5>
+
+ <p class=big-issue>Does anyone know enough about Bluetooth to write this
+ section?
+
+ <h5 id=irda-peer><span class=secno>6.3.6.3. </span>Peer-to-peer connections
+ over IrDA</h5>
+
+ <p class=big-issue>Does anyone know enough about IrDA to write this
+ section?</p>
+ <!--XXX
+ <p>Prompts the user to select a connection to make, which could
+ look like this:</p>
+
+<pre>|:: New Connection :::::::::::::::::::::::::::::::::::::::::|
+| |
+| Select the peer to connect to: |
+| |
+| JohnSmith_Series60 via Bluetooth (( Connect )) |
+| Patrick's Phone via Bluetooth ( Connect ) |
+| John Smith via UDP ( Connect ) |
+| |
+| ( Cancel ) |
+|___________________________________________________________|
+</pre>
+
+ <p>While the prompt is displayed, the UA should broadcast on all
+ supported networks, as described <span title="announcing peer
+ connections">below</span>.</p>
+
+ <p>Returns null if the prompt was canceled. Otherwise, returns a
+ <code>Connection</code> object with its <code>network</code>
+ attribute set to <var title="">topic</var> and its <code>peer</code>
+ attribute set to a string uniquely identifying the selected peer,
+ and opens a connection to that peer. (See: <span>peer connection
+ formats</span>.)</p>
+
+
+ |:: New Connection :::::::::::::::::::::::::::::::::::::::::|
+ | |
+ | Would you like to open a connection called "Chess" for |
+ | this Web site?: |
+ | |
+ | example.org |
+ | |
+ | Select connection to use: [ Bluetooth | v ] |
+ | |
+ | (( Open connection )) ( Cancel ) |
+ |___________________________________________________________|
+
+ c = new LocalBroadcastConnection("Chess");
+ c.onread = function(s, f) { alert("got message " + s + " from " + f); }
+ c.send("hello, anybody there?");
+
+
+ |:: New Connection :::::::::::::::::::::::::::::::::::::::::|
+ | |
+ | Select the peer to connect to: |
+ | |
+ | JohnSmith_Series60 via Bluetooth (( Connect )) |
+ | Patrick's Phone via Bluetooth ( Connect ) |
+ | John Smith via UDP ( Connect ) |
+ | |
+ | ( Cancel ) |
+ |___________________________________________________________|
+
+ c = new LocalPeerConnection("Chess");
+ // c.peer contains peer's name
+ c.onread = function(s) { alert("got message " + s); } // second argument is c.peer
+ c.send("hello");
+
+ c = new TCPConnection("chess.example.com", 8089, false);
+ // c.peer contains 'chess.example.com:8089'
+ c.onread = function(s) { alert("got message " + s); } // second argument is c.peer
+ c.send("hello");
+
+> > Again, what else should we support? Should this have an HTML Element
+> > backing it for more declarative authoring? What error handling do we need?
+> > Should it automatically use bluetooth, TCP/IP broadcast, infrared, or
+> > should it be under the control of the author or user?
+-->
+
+ <h4 id=the-common><span class=secno>6.3.7. </span>The common protocol for
+ TCP-based connections</h4>
+
+ <p>The same protocol is used for <code title=dom-TCPConnection><a
+ href="#tcpconnection">TCPConnection</a></code> and <code
+ title=dom-PeerToPeerConnection><a
+ href="#peertopeerconnection">PeerToPeerConnection</a></code> connection
+ types. This section describes how such connections are established from
+ the client and server sides, and then describes how data is sent and
+ received over such connections (which is the same for both clients and
+ servers).
+
+ <h5 id=clients><span class=secno>6.3.7.1. </span><dfn id=clients0>Clients
+ connecting over TCP</dfn></h5>
+
+ <p>This section defines the client-side requirements of the protocol used
+ by the <code title=dom-TCPConnection><a
+ href="#tcpconnection">TCPConnection</a></code> and <code
+ title=dom-PeerToPeerConnection><a
+ href="#peertopeerconnection">PeerToPeerConnection</a></code> connection
+ types.
+
+ <p>If a TCP connection to the specified target host and port cannot be
+ established, for example because the target host is a domain name that
+ cannot be resolved to an IP address, or because packets cannot be routed
+ to the host, the user agent should retry creating the connection. If the
+ user agent gives up trying to connect, the user agent must act as if it
+ had <a href="#closeConnection">closed the connection</a>.
+
+ <p class=note>No information regarding the state of the connection is
+ passed to the application while the connection is being established in
+ this version of this specification.
+
+ <p>Once a TCP/IP connection to the remote host is established, the user
+ agent must transmit the following sequence of bytes, represented here in
+ hexadecimal form:
+
+ <pre>0x48 0x65 0x6C 0x6C 0x6F 0x0A</pre>
+
+ <p class=note>This represents the string "Hello" followed by a newline,
+ encoded in UTF-8.
+
+ <p>The user agent must then read all the bytes sent from the remote host,
+ up to the first 0x0A byte (inclusive). That string of bytes is then
+ compared byte-for-byte to the following string of bytes:
+
+ <pre>0x57 0x65 0x6C 0x63 0x6F 0x6E 0x65 0x0A</pre>
+
+ <p class=note>This says "Welcome".
+
+ <p>If the server sent back a string in any way different to this, then the
+ user agent must <a href="#closeConnection">close the connection</a> and
+ give up trying to connect.
+
+ <p>Otherwise, the user agent must then take <a href="#the-string">the
+ string representing the script's domain in IDNA format</a>, encode it as
+ UTF-8, and send that to the remote host, followed by a 0x0A byte (a U+000A
+ LINE FEED in UTF-8).
+
+ <p>The user agent must then read all the bytes sent from the remote host,
+ up to the first 0x0A byte (inclusive). That string of bytes must then be
+ compared byte-for-byte to the string that was just sent to the server (the
+ one with the IDNA domain name and ending with a newline character). If the
+ server sent back a string in any way different to this, then the user
+ agent must <a href="#closeConnection">close the connection</a> and give up
+ trying to connect.
+
+ <p>Otherwise, the connection <a href="#openConnection">has been
+ established</a> (and events and so forth get fired, as described above).
+
+ <p>If at any point during this process the connection is closed
+ prematurely, then the user agent must <a href="#closeConnection">close the
+ connection</a> and give up trying to connect.</p>
+ <!-- XXX we should support automatic reconnect -->
+
+ <h5 id=servers><span class=secno>6.3.7.2. </span><dfn id=servers0>Servers
+ accepting connections over TCP</dfn></h5>
+
+ <p>This section defines the server side of the protocol described in the
+ previous section. For authors, it should be used as a guide for how to
+ implement servers that can communicate with Web pages over TCP. For UAs
+ these are the requirements for the server part of <code
+ title=dom-PeerToPeerConnection><a
+ href="#peertopeerconnection">PeerToPeerConnection</a></code>s.
+
+ <p>Once a TCP/IP connection from a remote host is established, the user
+ agent must transmit the following sequence of bytes, represented here in
+ hexadecimal form:
+
+ <pre>0x57 0x65 0x6C 0x63 0x6F 0x6E 0x65 0x0A</pre>
+
+ <p class=note>This says "Welcome" and a newline in UTF-8.
+
+ <p>The user agent must then read all the bytes sent from the remote host,
+ up to the first 0x0A byte (inclusive). That string of bytes is then
+ compared byte-for-byte to the following string of bytes:
+
+ <pre>0x48 0x65 0x6C 0x6C 0x6F 0x0A</pre>
+
+ <p class=note>"Hello" and a newline.
+
+ <p>If the remote host sent back a string in any way different to this, then
+ the user agent must <a href="#closeConnection">close the connection</a>
+ and give up trying to connect.
+
+ <p>Otherwise, the user agent must then take <a href="#the-string">the
+ string representing the script's domain in IDNA format</a>, encode it as
+ UTF-8, and send that to the remote host, followed by a 0x0A byte (a U+000A
+ LINE FEED in UTF-8).
+
+ <p>The user agent must then read all the bytes sent from the remote host,
+ up to the first 0x0A byte (inclusive). That string of bytes must then be
+ compared byte-for-byte to the string that was just sent to that host (the
+ one with the IDNA domain name and ending with a newline character). If the
+ remote host sent back a string in any way different to this, then the user
+ agent must <a href="#closeConnection">close the connection</a> and give up
+ trying to connect.
+
+ <p>Otherwise, the connection <a href="#openConnection">has been
+ established</a> (and events and so forth get fired, as described above).
+
+ <p class=note>For author-written servers (as opposed to the server side of
+ a peer-to-peer connection), the script's domain would be replaced by the
+ hostname of the server. Alternatively, such servers might instead wait for
+ the client to send its domain string, and then simply echo it back. This
+ would allow connections from pages on any domain, instead of just pages
+ originating from the same host. The client compares the two strings to
+ ensure they are the same before allowing the connection to be used by
+ author script.
+
+ <p>If at any point during this process the connection is closed
+ prematurely, then the user agent must <a href="#closeConnection">close the
+ connection</a> and give up trying to connect.</p>
+ <!-- XXX we should support automatic reconnect -->
+
+ <h5 id=sending><span class=secno>6.3.7.3. </span><dfn id=sending0>Sending
+ and receiving data over TCP</dfn></h5>
+
+ <p>When the <code title=dom-Connection-send><a href="#send">send(<var
+ title="">data</var>)</a></code> method is invoked on the connection's
+ corresponding <code><a href="#connection0">Connection</a></code> object,
+ the user agent must take the <var title="">data</var> argument, replace
+ any U+0000 NULL and U+0017 END OF TRANSMISSION BLOCK characters in it with
+ U+FFFD REPLACEMENT CHARACTER characters, then transmit a U+0002 START OF
+ TEXT character, this new <var title="">data</var> string and a single
+ U+0017 END OF TRANSMISSION BLOCK character (in that order) to the remote
+ host, all encoded as UTF-8.
+
+ <p>When the user agent receives bytes on the connection, the user agent
+ must buffer received bytes until it receives a 0x17 byte (a U+0017 END OF
+ TRANSMISSION BLOCK character). If the first buffered byte is not a 0x02
+ byte (a U+0002 START OF TEXT character encoded as UTF-8) then all the data
+ up to the 0x17 byte, inclusive, must be dropped. (This allows for future
+ extension of this protocol.) Otherwise, all the data from (but not
+ including) the 0x02 byte and up to (but not including) the 0x17 byte must
+ be taken, interpreted as a UTF-8 string, and a <code
+ title=event-connection-read><a href="#read">read</a></code> event must be
+ fired on the <code><a href="#connection0">Connection</a></code> object
+ with that string as the <code title=dom-ConnectionReadEvent-data><a
+ href="#data5">data</a></code>. If that string cannot be decoded as UTF-8
+ without errors, the packet should be ignored.
+
+ <p class=note>This protocol does not yet allow binary data (e.g. an image
+ or <a href="#media7">media data</a>) to be efficiently transmitted. A
+ future version of this protocol might allow this by using the prefix
+ character U+001F INFORMATION SEPARATOR ONE, followed by binary data which
+ uses a particular byte (e.g. 0xFF) to encode byte 0x17 somehow (since
+ otherwise 0x17 would be treated as transmission end by down-level UAs).</p>
+ <!--
+ Specifically, replace all occurrences of 0xFF with 0xFF 0xFF and
+ all occurrences of 0x17 with 0xFF 0x00, or similar.
+ -->
+
+ <h4 id=network-security><span class=secno>6.3.8. </span>Security</h4>
+
+ <p class=big-issue>Need to write this section.
+
+ <p class=big-issue>If you have an unencrypted page that is (through a
+ man-in-the-middle attack) changed, it can access a secure service that is
+ using IP authentication and then send that data back to the attacker. Ergo
+ we should probably stop unencrypted pages from accessing encrypted
+ services, on the principle that the actual level of security is zero. Then
+ again, if we do that, we prevent insecure sites from using SSL as a
+ tunneling mechanism.
+
+ <p class=big-issue>Should consider dropping the subdomain-only restriction.
+ It doesn't seem to add anything, and prevents cross-domain chatter.
+
+ <h4 id=network-other-specs><span class=secno>6.3.9. </span>Relationship to
+ other standards</h4>
+
+ <p class=big-issue>Should have a section talking about the fact that we
+ blithely ignoring IANA's port assignments here.
+
+ <p class=big-issue>Should explain why we are not reusing HTTP for this.
+ (HTTP is too heavy-weight for such a simple need; requiring authors to
+ implement an HTTP server just to have a party line is too much of a
+ barrier to entry; cannot rely on prebuilt components; having a simple
+ protocol makes it much easier to do RAD; HTTP doesn't fit the needs and
+ doesn't have the security model needed; etc)
+
+ <h3 id=crossDocumentMessages><span class=secno>6.4. </span><dfn
+ id=cross-document>Cross-document messaging</dfn></h3>
+
+ <p>Web browsers, for security and privacy reasons, prevent documents in
+ different domains from affecting each other; that is, cross-site scripting
+ is disallowed.
+
+ <p>While this is an important security feature, it prevents pages from
+ different domains from communicating even when those pages are not
+ hostile. This section introduces a messaging system that allows documents
+ to communicate with each other regardless of their source domain, in a way
+ designed to not enable cross-site scripting attacks.
+
+ <p class=big-issue>We may want to just put postMessage on Window instead of
+ Document, as that reduces the XSS risk.
+
+ <h4 id=processing1><span class=secno>6.4.1. </span>Processing model</h4>
+
+ <p>When a script invokes the <dfn id=postmessage
+ title=dom-document-postMessage><code>postMessage(<var
+ title="">message</var>)</code></dfn> method on a <code>Document</code>
+ object, the user agent must create an event that uses the <code><a
+ href="#messageevent">MessageEvent</a></code> interface, with the event
+ name <code title=event-message><a href="#message">message</a></code>,
+ which bubbles, is cancelable, and has no default action. The <code
+ title=dom-MessageEvent-data><a href="#data4">data</a></code> attribute
+ must be set to the value passed as the <var title="">message</var>
+ argument to the <code title=dom-document-postMessage><a
+ href="#postmessage">postMessage()</a></code> method, the <code
+ title=dom-MessageEvent-domain><a href="#domain2">domain</a></code>
+ attribute must be set to the domain of the document that the script that
+ invoked the methods is associated with, the <code
+ title=dom-MessageEvent-uri><a href="#uri">uri</a></code> attribute must be
+ set to the URI of that document, and the <code
+ title=dom-MessageEvent-source><a href="#source1">source</a></code>
+ attribute must be set to the <code>Document</code> object representing
+ that document.
+
+ <p>The event must then be dispatched at the <code>Document</code> object
+ itself.</p>
+ <!-- XXX must ensure that postMessage() is accessible on
+ cross-domain Document objects but that the dispatchEvent() method is
+ not. -->
+
+ <p class=warning>Authors should check the <code
+ title=dom-MessageEvent-domain><a href="#domain2">domain</a></code>
+ attribute to ensure that messages are only accepted from domains that they
+ expect to receive messages from. Otherwise, bugs in the author's message
+ handling code could be exploited by hostile sites.
+
+ <div class=example>
+ <p>For example, if document A contains an <code><a
+ href="#object">object</a></code> element that contains document B, and
+ script in document A calls <code title=dom-document-postMessage><a
+ href="#postmessage">postMessage()</a></code> on document B, then a
+ message event will be fired on that element, marked as originating from
+ document A. The script in document A might look like:</p>
+
+ <pre>var o = document.getElementsByTagName('object')[0];
+o.<span title="">contentDocument</span>.<a href="#postmessage" title=dom-document-postMessage>postMessage</a>('Hello world');
+</pre>
+
+ <p>To register an event handler for incoming events, the script would use
+ <code title="">addEventListener()</code> (or similar mechanisms). For
+ example, the script in document B might look like:</p>
+
+ <pre>document.addEventListener('message', receiver, false);
+function receiver(e) {
+ if (e.domain == 'example.com') {
+ if (e.data == 'Hello world') {
+ e.source.postMessage('Hello');
+ } else {
+ alert(e.data);
+ }
+ }
+}</pre>
+
+ <p>This script first checks the domain is the expected domain, and then
+ looks at the message, which it either displays to the user, or responds
+ to by sending a message back to the document which sent the message in
+ the first place.</p>
+ </div>
+
+ <p class=note>Implementors are urged to take extra care in the
+ implementation of this feature. It allows authors to transmit information
+ from one domain to another domain, which is normally disallowed for
+ security reasons. It also requires that UAs be careful to allow access to
+ certain properties but not others.
+
+ <h2 id=repetition><span class=secno>7. </span>Repetition templates</h2>
+
+ <p class=big-issue>See <a
+ href="http://www.whatwg.org/specs/web-forms/current-work/#repeatingFormControls">WF2</a>
+ for now
+
+ <h2 id=syntax><span class=secno>8. </span>The HTML syntax</h2>
+
+ <h3 id=writing><span class=secno>8.1. </span>Writing HTML documents</h3>
+
+ <p><em>This section only applies to documents, authoring tools, and markup
+ generators. In particular, it does not apply to conformance checkers;
+ conformance checkers must use the requirements given in the next section
+ ("parsing HTML documents").</em>
+
+ <p>Documents must consist of the following parts, in the given order:
+
+ <ol>
+ <li>Optionally, a single U+FEFF BYTE ORDER MARK (BOM) character.
+
+ <li>Any number of <a href="#comments0" title=syntax-comments>comments</a>
+ and <a href="#space" title="space character">space characters</a>.
+
+ <li>A <a href="#doctype" title=syntax-doctype>DOCTYPE</a>.
+
+ <li>Any number of <a href="#comments0" title=syntax-comments>comments</a>
+ and <a href="#space" title="space character">space characters</a>.
+
+ <li>The root element, in the form of an <code><a
+ href="#html">html</a></code> <a href="#elements2"
+ title=syntax-elements>element</a>.
+
+ <li>Any number of <a href="#comments0" title=syntax-comments>comments</a>
+ and <a href="#space" title="space character">space characters</a>.
+ </ol>
+
+ <p>The various types of content mentioned above are described in the next
+ few sections.
+
+ <p>In addition, there are some restrictions on how <a href="#charset0"
+ title=attr-meta-charset>character encoding declarations</a> are to be
+ serialised, as discussed in the section on that topic.
+
+ <h4 id=the-doctype><span class=secno>8.1.1. </span>The DOCTYPE</h4>
+
+ <p>A <dfn id=doctype title=syntax-doctype>DOCTYPE</dfn> is a mostly
+ useless, but required, header.
+
+ <p class=note>DOCTYPEs are required for legacy reasons. When omitted,
+ browsers tend to use a different rendering mode that is incompatible with
+ some specifications. Including the DOCTYPE in a document ensures that the
+ browser makes a best-effort attempt at following the relevant
+ specifications.
+
+ <p>A DOCTYPE must consist of the following characters, in this order:
+
+ <ol class=brief>
+ <li>A U+003C LESS-THAN SIGN (<code>&lt;</code>) character.
+
+ <li>A U+0021 EXCLAMATION MARK (<code>!</code>) character.
+
+ <li>A U+0044 LATIN CAPITAL LETTER D or U+0064 LATIN SMALL LETTER D
+ character.
+
+ <li>A U+004F LATIN CAPITAL LETTER O or U+006F LATIN SMALL LETTER O
+ character.
+
+ <li>A U+0043 LATIN CAPITAL LETTER C or U+0063 LATIN SMALL LETTER C
+ character.
+
+ <li>A U+0054 LATIN CAPITAL LETTER T or U+0074 LATIN SMALL LETTER T
+ character.
+
+ <li>A U+0059 LATIN CAPITAL LETTER Y or U+0079 LATIN SMALL LETTER Y
+ character.
+
+ <li>A U+0050 LATIN CAPITAL LETTER P or U+0070 LATIN SMALL LETTER P
+ character.
+
+ <li>A U+0045 LATIN CAPITAL LETTER E or U+0065 LATIN SMALL LETTER E
+ character.
+
+ <li>One or more <a href="#space" title="space character">space
+ characters</a>.
+
+ <li>A U+0048 LATIN CAPITAL LETTER H or U+0068 LATIN SMALL LETTER H
+ character.
+
+ <li>A U+0054 LATIN CAPITAL LETTER T or U+0074 LATIN SMALL LETTER T
+ character.
+
+ <li>A U+004D LATIN CAPITAL LETTER M or U+006D LATIN SMALL LETTER M
+ character.
+
+ <li>A U+004C LATIN CAPITAL LETTER L or U+006C LATIN SMALL LETTER L
+ character.
+
+ <li>Zero or more <a href="#space" title="space character">space
+ characters</a>.
+
+ <li>A U+003E GREATER-THAN SIGN (<code>&gt;</code>) character.
+ </ol>
+
+ <p class=note>In other words, <code>&lt;!DOCTYPE HTML></code>,
+ case-insensitively.
+
+ <h4 id=elements0><span class=secno>8.1.2. </span>Elements</h4>
+
+ <p>There are four different kinds of <dfn id=elements2
+ title=syntax-elements>elements</dfn>: void elements, CDATA elements,
+ RCDATA elements, and normal elements.
+
+ <dl>
+ <dt>Void elements
+
+ <dd><code><a href="#base">base</a></code>, <code><a
+ href="#link">link</a></code>, <code><a href="#meta0">meta</a></code>,
+ <code><a href="#hr">hr</a></code>, <code><a href="#br">br</a></code>,
+ <code><a href="#img">img</a></code>, <code><a
+ href="#embed">embed</a></code>, <code><a href="#param">param</a></code>,
+ <code><a href="#area">area</a></code>, <code><a
+ href="#col">col</a></code>, <code>input</code><!-- XXX add: ,
+ <code>command</code>, <code>event-source</code> --></dd>
+ <!-- XXX
+ keep this synchronised with the list of "permitted slash" elements
+ -->
+
+ <dt>CDATA elements
+
+ <dd><code><a href="#style">style</a></code>, <code><a
+ href="#script0">script</a></code></dd>
+ <!-- iframe and
+ noscript don't count as CDATA for syntax purposes -->
+
+ <dt>RCDATA elements
+
+ <dd><code><a href="#title1">title</a></code>, <code>textarea</code>
+
+ <dt>Normal elements
+
+ <dd>All other allowed HTML elements are normal elements.
+ </dl>
+
+ <p><dfn id=tags title=syntax-tags>Tags</dfn> are used to delimit the start
+ and end of elements in the markup. CDATA, RCDATA, and normal elements have
+ a <a href="#start5" title=syntax-start-tags>start tag</a> to indicate
+ where they begin, and an <a href="#end-tags0" title=syntax-end-tags>end
+ tag</a> to indicate where they end. The start and end tags of certain
+ normal elements can be <a href="#omitted"
+ title=syntax-tag-omission>omitted</a>, as described later. Those that
+ cannot be omitted must not be omitted. Void elements only have a start
+ tag; end tags must not be specified for void elements.
+
+ <p>The contents of the element must be placed between just after the start
+ tag (which <a href="#omitted" title=syntax-tag-omission>might be implied,
+ in certain cases</a>) and just before the end tag (which again, <a
+ href="#omitted" title=syntax-tag-omission>might be implied in certain
+ cases</a>). The exact allowed contents of each individual element depends
+ on the content model of that element, as described earlier in this
+ specification. Elements must not contain content that their content model
+ disallows. In addition to the restrictions placed on the contents by those
+ content models, however, the four types of elements have additional
+ <em>syntactic</em> requirements.
+
+ <p>Void elements can't have any contents (since there's no end tag, no
+ content can be put between the start tag and the end tag.)
+
+ <p>CDATA elements can have <a href="#text1" title=syntax-text>text</a>,
+ but:
+
+ <ul>
+ <li>The text must not contain the two character sequence "<code
+ title="">&lt;/</code>" (U+003C LESS-THAN SIGN, U+002F SOLIDUS).
+
+ <li>For every occurrence of the four character sequence "<code
+ title="">&lt;!--</code>" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK,
+ U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS), there must be a corresponding
+ three-character sequence "<code title="">--&gt;</code>" (U+002D
+ HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN) whose U+003E
+ GREATER-THAN SIGN (&gt;) character occurs later in the text than the
+ U+003C LESS-THAN SIGN (&lt;) character of the first sequence. (This means
+ the hyphens from the "<code title="">&lt;!--</code>" part can overlap
+ those in the "<code title="">--&gt;</code>" part, as in "<code
+ title="">&lt!--&gt;</code>".)
+ </ul>
+
+ <p>RCDATA elements can have <a href="#text1" title=syntax-text>text</a> and
+ <a href="#character0" title=syntax-entities>character entity
+ references</a>, but the text must not contain the character U+003C
+ LESS-THAN SIGN (<code>&lt;</code>) or the character U+0026 AMPERSAND
+ (<code>&amp;</code>).
+
+ <p>Normal elements can have <a href="#text1" title=syntax-text>text</a>, <a
+ href="#character0" title=syntax-entities>character entity references</a>,
+ other <a href="#elements2" title=syntax-elements>elements</a>, and <a
+ href="#comments0" title=syntax-comments>comments</a>, but the text must
+ not contain the character U+003C LESS-THAN SIGN (<code>&lt;</code>) or the
+ character U+0026 AMPERSAND (<code>&amp;</code>). Some normal elements also
+ have <a href="#have-extra" title=syntax-element-restrictions>yet more
+ restrictions</a> on what content they are allowed to hold, beyond the
+ restrictions imposed by the content model and those described in this
+ paragraph. Those restrictions are described below.
+
+ <p>Tags contain a <dfn id=tag-name title=syntax-tag-name>tag name</dfn>,
+ giving the element's name. HTML elements all have names that only use
+ characters in the range U+0061 LATIN SMALL LETTER A .. U+007A LATIN SMALL
+ LETTER Z, or, in uppercase, U+0041 LATIN CAPITAL LETTER A .. U+005A LATIN
+ CAPITAL LETTER Z, and U+002D HYPHEN-MINUS (<code>-</code>). In the HTML
+ syntax, tag names may be written with any mix of lower- and uppercase
+ letters that, when converted to all-lowercase, matches the element's tag
+ name; tag names are case-insensitive.
+
+ <h5 id=start><span class=secno>8.1.2.1. </span>Start tags</h5>
+
+ <p><dfn id=start5 title=syntax-start-tags>Start tags</dfn> must have the
+ following format:
+
+ <ol>
+ <li>The first character of a start tag must be a U+003C LESS-THAN SIGN
+ (<code>&lt;</code>).
+
+ <li>The next few characters of a start tag must be the element's <a
+ href="#tag-name" title=syntax-tag-name>tag name</a>.
+
+ <li>If there are to be any attributes in the next step, there must first
+ be one or more <a href="#space" title="space character">space
+ characters</a>.
+
+ <li>Then, the start tag may have a number of attributes, the <a
+ href="#attributes1" title=syntax-attributes>syntax for which</a> is
+ described below. Attributes may be separated from each other by one or
+ more <a href="#space" title="space character">space characters</a>.
+
+ <li>After the attributes, there may be one or more <a href="#space"
+ title="space character">space characters</a>. (Some attributes are
+ required to be followed by a space. See the <a href="#attributes1"
+ title=syntax-attributes>attributes section</a> below.)
+
+ <li>Then, if the element is one of the void elements, then there may be a
+ single U+002F SOLIDUS (<code>/</code>) character. This character has no
+ effect except to appease the markup gods. As this character is therefore
+ just a symbol of faith, atheists should omit it.
+
+ <li>Finally, start tags must be closed by a U+003E GREATER-THAN SIGN
+ (<code>&gt;</code>) character.
+ </ol>
+
+ <h5 id=end-tags><span class=secno>8.1.2.2. </span>End tags</h5>
+
+ <p><dfn id=end-tags0 title=syntax-end-tags>End tags</dfn> must have the
+ following format:
+
+ <ol>
+ <li>The first character of an end tag must be a U+003C LESS-THAN SIGN
+ (<code>&lt;</code>).
+
+ <li>The second character of an end tag must be a U+002F SOLIDUS
+ (<code>/</code>).
+
+ <li>The next few characters of an end tag must be the element's <a
+ href="#tag-name" title=syntax-tag-name>tag name</a>.
+
+ <li>After the tag name, there may be one or more <a href="#space"
+ title="space character">space characters</a>.
+
+ <li>Finally, end tags must be closed by a U+003E GREATER-THAN SIGN
+ (<code>&gt;</code>) character.
+ </ol>
+
+ <h5 id=attributes0><span class=secno>8.1.2.3. </span>Attributes</h5>
+
+ <p><dfn id=attributes1 title=syntax-attributes>Attributes</dfn> for an
+ element are expressed inside the element's start tag.
+
+ <p>Attributes have a name and a value. <dfn id=attribute
+ title=syntax-attribute-name>Attribute names</dfn> use characters in the
+ range U+0061 LATIN SMALL LETTER A .. U+007A LATIN SMALL LETTER Z, or, in
+ uppercase, U+0041 LATIN CAPITAL LETTER A .. U+005A LATIN CAPITAL LETTER Z,
+ and U+002D HYPHEN-MINUS (<code>-</code>). In the HTML syntax, attribute
+ names may be written with any mix of lower- and uppercase letters that,
+ when converted to all-lowercase, matches the attribute's name; attribute
+ names are case-insensitive.
+
+ <p><dfn id=attribute0 title=syntax-attribute-value>Attribute values</dfn>
+ are a mixture of <a href="#text1" title=syntax-text>text</a> and <a
+ href="#character0" title=syntax-entities>character entity references</a>,
+ except with the additional restriction that the text cannot contain a
+ U+0026 AMPERSAND (<code>&amp;</code>) character.
+
+ <p>Attributes can be specified in four different ways:
+
+ <dl>
+ <dt>Empty attribute syntax
+
+ <dd>
+ <p>Just the <a href="#attribute" title=syntax-attribute-name>attribute
+ name</a>.</p>
+
+ <div class=example>
+ <p>In the following example, the <code
+ title=attr-input-disabled>disabled</code> attribute is given with the
+ empty attribute syntax:</p>
+
+ <pre>&lt;input <em>disabled</em>&gt;</pre>
+ </div>
+
+ <p>If an attribute using the empty attribute syntax is to be followed by
+ another attribute, then there must be a <a href="#space">space
+ character</a> separating the two.</p>
+
+ <dt>Unquoted attribute value syntax
+
+ <dd>
+ <p>The <a href="#attribute" title=syntax-attribute-name>attribute
+ name</a>, followed by zero or more <a href="#space" title="space
+ character">space characters</a>, followed by a single U+003D EQUALS SIGN
+ character, followed by zero or more <a href="#space" title="space
+ character">space characters</a>, followed by the <a href="#attribute0"
+ title=syntax-attribute-value>attribute value</a>, which, in addition to
+ the requirements given above for attribute values, must not contain any
+ literal <a href="#space" title="space character">space characters</a>,
+ U+003E GREATER-THAN SIGN (<code>&gt;</code>) characters, or U+003C
+ LESS-THAN SIGN (<code>&lt;</code>) characters, and must not,
+ furthermore, start with either a literal U+0022 QUOTATION MARK
+ (<code>&#x22;</code>) character or a literal U+0027 APOSTROPHE
+ (<code>&#x27;</code>) character.</p>
+
+ <div class=example>
+ <p>In the following example, the <code
+ title=attr-input-value>value</code> attribute is given with the
+ unquoted attribute value syntax:</p>
+
+ <pre>&lt;input <em>value=yes</em>&gt;</pre>
+ </div>
+
+ <p>If an attribute using the unquoted attribute syntax is to be followed
+ by another attribute or by one of the optional U+002F SOLIDUS
+ (<code>/</code>) characters allowed in step 6 of the <span
+ title=syntax-start-tag>start tag</span> syntax above, then there must be
+ a <a href="#space">space character</a> separating the two.</p>
+
+ <dt>Single-quoted attribute value syntax
+
+ <dd>
+ <p>The <a href="#attribute" title=syntax-attribute-name>attribute
+ name</a>, followed by zero or more <a href="#space" title="space
+ character">space characters</a>, followed by a single U+003D EQUALS SIGN
+ character, followed by zero or more <a href="#space" title="space
+ character">space characters</a>, followed by a single U+0027 APOSTROPHE
+ (<code>'</code>) character, followed by the <a href="#attribute0"
+ title=syntax-attribute-value>attribute value</a>, which, in addition to
+ the requirements given above for attribute values, must not contain any
+ literal U+0027 APOSTROPHE (<code>'</code>) characters, and finally
+ followed by a second single U+0027 APOSTROPHE (<code>'</code>)
+ character.</p>
+
+ <div class=example>
+ <p>In the following example, the <code title=attr-input-type>type</code>
+ attribute is given with the single-quoted attribute value syntax:</p>
+
+ <pre>&lt;input <em>type='checkbox'</em>&gt;</pre>
+ </div>
+
+ <dt>Double-quoted attribute value syntax
+
+ <dd>
+ <p>The <a href="#attribute" title=syntax-attribute-name>attribute
+ name</a>, followed by zero or more <a href="#space" title="space
+ character">space characters</a>, followed by a single U+003D EQUALS SIGN
+ character, followed by zero or more <a href="#space" title="space
+ character">space characters</a>, followed by a single U+0022 QUOTATION
+ MARK (<code>"</code>) character, followed by the <a href="#attribute0"
+ title=syntax-attribute-value>attribute value</a>, which, in addition to
+ the requirements given above for attribute values, must not contain any
+ literal U+0022 QUOTATION MARK (<code>"</code>) characters, and finally
+ followed by a second single U+0022 QUOTATION MARK (<code>"</code>)
+ character.</p>
+
+ <div class=example>
+ <p>In the following example, the <code title=attr-input-name>name</code>
+ attribute is given with the double-quoted attribute value syntax:</p>
+
+ <pre>&lt;input <em>name="be evil"</em>&gt;</pre>
+ </div>
+ </dl>
+
+ <h5 id=optional><span class=secno>8.1.2.4. </span>Optional tags</h5>
+
+ <p>Certain tags can be <dfn id=omitted
+ title=syntax-tag-omission>omitted</dfn>.</p>
+ <!-- <html> -->
+
+ <p>An <code><a href="#html">html</a></code> element's <span
+ title=syntax-start-tag>start tag</span> may be omitted if the first thing
+ inside the <code><a href="#html">html</a></code> element is not a <a
+ href="#space">space character</a> or a <a href="#comments0"
+ title=syntax-comments>comment</a>.</p>
+ <!-- </html> -->
+
+ <p>An <code><a href="#html">html</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#html">html</a></code> element is not immediately followed by a <a
+ href="#space">space character</a> or a <a href="#comments0"
+ title=syntax-comments>comment</a>.</p>
+ <!-- <head> -->
+
+ <p>A <code><a href="#head">head</a></code> element's <span
+ title=syntax-start-tag>start tag</span> may be omitted if the first thing
+ inside the <code><a href="#head">head</a></code> element is an element.</p>
+ <!-- </head> -->
+
+ <p>A <code><a href="#head">head</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#head">head</a></code> element is not immediately followed by a <a
+ href="#space">space character</a> or a <a href="#comments0"
+ title=syntax-comments>comment</a>.</p>
+ <!-- <body> -->
+
+ <p>A <code><a href="#body0">body</a></code> element's <span
+ title=syntax-start-tag>start tag</span> may be omitted if the first thing
+ inside the <code><a href="#body0">body</a></code> element is not a <a
+ href="#space">space character</a> or a <a href="#comments0"
+ title=syntax-comments>comment</a>, except if the first thing inside the
+ <code><a href="#body0">body</a></code> element is a <code><a
+ href="#script0">script</a></code> or <code><a
+ href="#style">style</a></code> element and the node immediately preceding
+ the <code><a href="#body0">body</a></code> element is a <code><a
+ href="#head">head</a></code> element whose end tag has been omitted.</p>
+ <!-- </body> -->
+
+ <p>A <code><a href="#body0">body</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#body0">body</a></code> element is not immediately followed by a <a
+ href="#space">space character</a> or a <a href="#comments0"
+ title=syntax-comments>comment</a>. <!-- </li> -->
+
+ <p>A <code><a href="#li">li</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#li">li</a></code> element is immediately followed by another
+ <code><a href="#li">li</a></code> element or if there is no more content
+ in the parent element.</p>
+ <!-- </dt> -->
+
+ <p>A <code><a href="#dt">dt</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#dt">dt</a></code> element is immediately followed by another
+ <code><a href="#dt">dt</a></code> element or a <code><a
+ href="#dd">dd</a></code> element.</p>
+ <!-- </dd> -->
+
+ <p>A <code><a href="#dd">dd</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#dd">dd</a></code> element is immediately followed by another
+ <code><a href="#dd">dd</a></code> element or a <code><a
+ href="#dt">dt</a></code> element, or if there is no more content in the
+ parent element.</p>
+ <!-- </p> -->
+
+ <p>A <code><a href="#p">p</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#p">p</a></code> element is immediately followed by an <code><a
+ href="#address">address</a></code>, <code><a
+ href="#blockquote">blockquote</a></code>, <code><a
+ href="#dl">dl</a></code>, <code>fieldset</code>, <code>form</code>,
+ <code><a href="#h1">h1</a></code>, <code><a href="#h2">h2</a></code>,
+ <code><a href="#h3">h3</a></code>, <code><a href="#h4">h4</a></code>,
+ <code><a href="#h5">h5</a></code>, <code><a href="#h6">h6</a></code>,
+ <code><a href="#hr">hr</a></code>, <code><a href="#menu">menu</a></code>,
+ <code><a href="#ol">ol</a></code>, <code><a href="#p">p</a></code>,
+ <code><a href="#pre">pre</a></code>, <code><a
+ href="#table">table</a></code>, or <code><a href="#ul">ul</a></code>
+ element, or if there is no more content in the parent element.</p>
+ <!-- </optgroup> -->
+
+ <p>An <code>optgroup</code> element's <span title=syntax-end-tag>end
+ tag</span> may be omitted if the <code>optgroup</code> element is
+ immediately followed by another <code>optgroup</code> element, or if there
+ is no more content in the parent element.</p>
+ <!-- </option> -->
+
+ <p>An <code>option</code> element's <span title=syntax-end-tag>end
+ tag</span> may be omitted if the <code>option</code> element is
+ immediately followed by another <code>option</code> element, or if there
+ is no more content in the parent element.</p>
+ <!-- <colgroup> -->
+
+ <p>A <code><a href="#colgroup">colgroup</a></code> element's <span
+ title=syntax-start-tag>start tag</span> may be omitted if the first thing
+ inside the <code><a href="#colgroup">colgroup</a></code> element is a
+ <code><a href="#col">col</a></code> element, and if the element is not
+ immediately preceeded by another <code><a
+ href="#colgroup">colgroup</a></code> element whose <span
+ title=syntax-end-tag>end tag</span> has been omitted.</p>
+ <!-- </colgroup> -->
+
+ <p>A <code><a href="#colgroup">colgroup</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#colgroup">colgroup</a></code> element is not immediately followed
+ by a <a href="#space">space character</a> or a <a href="#comments0"
+ title=syntax-comments>comment</a>.</p>
+ <!-- </thead> -->
+
+ <p>A <code><a href="#thead0">thead</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#thead0">thead</a></code> element is immediately followed by a
+ <code><a href="#tbody">tbody</a></code> or <code><a
+ href="#tfoot0">tfoot</a></code> element.</p>
+ <!-- <tbody> -->
+
+ <p>A <code><a href="#tbody">tbody</a></code> element's <span
+ title=syntax-start-tag>start tag</span> may be omitted if the first thing
+ inside the <code><a href="#tbody">tbody</a></code> element is a <code><a
+ href="#tr">tr</a></code> element, and if the element is not immediately
+ preceeded by a <code><a href="#tbody">tbody</a></code>, <code><a
+ href="#thead0">thead</a></code>, or <code><a
+ href="#tfoot0">tfoot</a></code> element whose <span
+ title=syntax-end-tag>end tag</span> has been omitted.</p>
+ <!-- </tbody> -->
+
+ <p>A <code><a href="#tbody">tbody</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#tbody">tbody</a></code> element is immediately followed by a
+ <code><a href="#tbody">tbody</a></code> or <code><a
+ href="#tfoot0">tfoot</a></code> element, or if there is no more content in
+ the parent element.</p>
+ <!-- </tfoot> -->
+
+ <p>A <code><a href="#tfoot0">tfoot</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#tfoot0">tfoot</a></code> element is immediately followed by a
+ <code><a href="#tbody">tbody</a></code> element, or if there is no more
+ content in the parent element.</p>
+ <!-- </tr> -->
+
+ <p>A <code><a href="#tr">tr</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#tr">tr</a></code> element is immediately followed by another
+ <code><a href="#tr">tr</a></code> element, or if there is no more content
+ in the parent element.</p>
+ <!-- </td> -->
+
+ <p>A <code><a href="#td">td</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#td">td</a></code> element is immediately followed by a <code><a
+ href="#td">td</a></code> or <code><a href="#th">th</a></code> element, or
+ if there is no more content in the parent element.</p>
+ <!-- </th> -->
+
+ <p>A <code><a href="#th">th</a></code> element's <span
+ title=syntax-end-tag>end tag</span> may be omitted if the <code><a
+ href="#th">th</a></code> element is immediately followed by a <code><a
+ href="#td">td</a></code> or <code><a href="#th">th</a></code> element, or
+ if there is no more content in the parent element.
+
+ <p><strong>However</strong>, a <span title=syntax-start-tag>start
+ tag</span> must never be omitted if it has any attributes.
+
+ <h5 id=restrictions><span class=secno>8.1.2.5. </span>Restrictions on
+ content models</h5>
+
+ <p>For historical reasons, certain elements <dfn id=have-extra
+ title=syntax-element-restrictions>have extra restrictions</dfn> beyond
+ even the restrictions given by their content model.
+
+ <p>A <code><a href="#p">p</a></code> element must not contain <code><a
+ href="#blockquote">blockquote</a></code>, <code><a
+ href="#dl">dl</a></code>, <code><a href="#menu">menu</a></code>, <code><a
+ href="#ol">ol</a></code>, <code><a href="#pre">pre</a></code>, <code><a
+ href="#table">table</a></code>, or <code><a href="#ul">ul</a></code>
+ elements, even though these elements are technically allowed inside
+ <code><a href="#p">p</a></code> elements according to the content models
+ described in this specification. (In fact, if one of those elements is put
+ inside a <code><a href="#p">p</a></code> element in the markup, it will
+ instead imply a <code><a href="#p">p</a></code> element end tag before
+ it.)
+
+ <p>An <code>optgroup</code> element must not contain <code>optgroup</code>
+ elements, even though these elements are technically allowed to be nested
+ according to the content models described in this specification. (If an
+ <code>optgroup</code> element is put inside another in the markup, it will
+ in fact imply an <code>optgroup</code> end tag before it.)
+
+ <p>A <code><a href="#table">table</a></code> element must not contain
+ <code><a href="#tr">tr</a></code> elements, even though these elements are
+ technically allowed inside <code><a href="#table">table</a></code>
+ elements according to the content models described in this specification.
+ (If a <code><a href="#tr">tr</a></code> element is put inside a <code><a
+ href="#table">table</a></code> in the markup, it will in fact imply a
+ <code><a href="#tbody">tbody</a></code> start tag before it.)
+
+ <p>A single U+000A LINE FEED (LF) character may be placed immediately after
+ the <span title=syntax-start-tag>start tag</span> of <code><a
+ href="#pre">pre</a></code> and <code>textarea</code> elements. This does
+ not affect the processing of the element. The otherwise optional U+000A
+ LINE FEED (LF) character <em>must</em> be included if the element's
+ contents start with that character (because otherwise the leading newline
+ in the contents would be treated like the optional newline, and ignored).
+
+ <div class=example>
+ <p>The following two <code><a href="#pre">pre</a></code> blocks are
+ equivalent:</p>
+
+ <pre>&lt;pre>Hello&lt;/pre></pre>
+
+ <pre>&lt;pre><br>Hello&lt;/pre></pre>
+ </div>
+
+ <h4 id=text><span class=secno>8.1.3. </span>Text</h4>
+
+ <p><dfn id=text1 title=syntax-text>Text</dfn> is allowed inside elements,
+ attributes, and comments. Text must consist of valid Unicode characters
+ other than U+0000. Text should not contain control characters other than
+ <a href="#space" title="space character">space characters</a>. Extra
+ constraints are placed on what is and what is not allowed in text based on
+ where the text is to be put, as described in the other sections.
+
+ <h5 id=newlines><span class=secno>8.1.3.1. </span>Newlines</h5>
+
+ <p><dfn id=newlines0 title=syntax-newlines>Newlines</dfn> in HTML may be
+ represented either as U+000D CARRIAGE RETURN (CR) characters, U+000A LINE
+ FEED (LF) characters, or pairs of U+000D CARRIAGE RETURN (CR), U+000A LINE
+ FEED (LF) characters in that order.
+
+ <h4 id=character><span class=secno>8.1.4. </span>Character entity
+ references</h4>
+
+ <p>In certain cases described in other sections, <a href="#text1"
+ title=syntax-text>text</a> may be mixed with <dfn id=character0
+ title=syntax-entities>character entity references</dfn>. These can be used
+ to escape characters that couldn't otherwise legally be included in <a
+ href="#text1" title=syntax-text>text</a>.
+
+ <p>Character entity references must start with a U+0026 AMPERSAND
+ (<code>&amp;</code>). Following this, there are three possible kinds of
+ character entity references:
+
+ <dl>
+ <dt>Named entities
+
+ <dd>The ampersand must be followed by one of the names given in the <a
+ href="#entities0">entities</a> section, using the same case. Finally,
+ after the name, the entity must be terminated by a U+003B SEMICOLON
+ character (<code title="">;</code>).
+
+ <dt>Decimal numeric entities
+
+ <dd>The ampersand must be followed by a U+0023 NUMBER SIGN
+ (<code>#</code>) character, followed by one or more digits in the range
+ U+0030 DIGIT ZERO .. U+0039 DIGIT NINE, representing a base-ten integer
+ that itself is a valid Unicode code point that is neither U+0000 nor a
+ character in the range U+0080 .. U+009F. The digits must then be followed
+ by a U+003B SEMICOLON character (<code title="">;</code>).
+
+ <dt>Hexadecimal numeric entities
+
+ <dd>The ampersand must be followed by a U+0023 NUMBER SIGN
+ (<code>#</code>) character, which must be followed by either a U+0078
+ LATIN SMALL LETTER X or a U+0058 LATIN CAPITAL LETTER X character, which
+ must then be followed by one or more digits in the range U+0030 DIGIT
+ ZERO .. U+0039 DIGIT NINE, U+0061 LATIN SMALL LETTER A .. U+0066 LATIN
+ SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER A .. U+0046 LATIN CAPITAL
+ LETTER F, representing a base-sixteen integer that itself is a valid
+ Unicode code point that is neither U+0000 nor a character in the range
+ U+0080 .. U+009F. The digits must then be followed by a U+003B SEMICOLON
+ character (<code title="">;</code>).
+ </dl>
+
+ <h4 id=comments><span class=secno>8.1.5. </span>Comments</h4>
+
+ <p><dfn id=comments0 title=syntax-comments>Comments</dfn> must start with
+ the four character sequence U+003C LESS-THAN SIGN, U+0021 EXCLAMATION
+ MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS (<code
+ title="">&lt;!--</code>). Following this sequence, the comment may have <a
+ href="#text1" title=syntax-text>text</a>, with the additional restriction
+ that the text must not contain two consecutive U+002D HYPHEN-MINUS (<code
+ title="">-</code>) characters, nor end with a U+002D HYPHEN-MINUS (<code
+ title="">-</code>) character. Finally, the comment must be ended by the
+ three character sequence U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E
+ GREATER-THAN SIGN (<code title="">--&gt;</code>).
+
+ <h3 id=parsing><span class=secno>8.2. </span>Parsing HTML documents</h3>
+
+ <p><em>This section only applies to user agents, data mining tools, and
+ conformance checkers.</em>
+
+ <p>The rules for parsing <a href="#xml-documents">XML documents</a> (and
+ thus <a href="#xhtml5">XHTML</a> documents) into DOM trees are covered by
+ the XML and Namespaces in XML specifications, and are out of scope of this
+ specification. <a href="#refsXML">[XML]</a> <a
+ href="#refsXMLNS">[XMLNS]</a> <!-- XXX refs -->
+
+ <p>For <a href="#html-">HTML documents</a>, user agents must use the
+ parsing rules described in this section to generate the DOM trees.
+ Together, these rules define what is referred to as the <dfn
+ id=html-0>HTML parser</dfn>.</p>
+ <!-- XXX should probably remove that "must" since
+ it'll be redundant with something in the navigating processing model
+ eventually -->
+
+ <div class=note>
+ <p>While the HTML form of HTML5 bears a close resemblance to SGML and XML,
+ it is a separate language with its own parsing rules.</p>
+
+ <p>Some earlier versions of HTML (in particular from HTML2 to HTML4) were
+ based on SGML and used SGML parsing rules. However, few (if any) web
+ browsers ever implemented true SGML parsing for HTML documents; the only
+ user agents to strictly handle HTML as an SGML application have
+ historically been validators. The resulting confusion &mdash; with
+ validators claiming documents to have one representation while widely
+ deployed Web browsers interoperably implemented a different
+ representation &mdash; has wasted decades of productivity. This version
+ of HTML thus returns to a non-SGML basis.</p>
+
+ <p>Authors interested in using SGML tools in their authoring pipeline are
+ encouraged to use XML tools and the XML serialisation of HTML5.</p>
+ </div>
+
+ <p>This specification defines the parsing rules for HTML documents, whether
+ they are syntactically valid or not. Certain points in the parsing
+ algorithm are said to be <dfn id=parse title="parse error">parse
+ errors</dfn>. The error handling for parse errors is well-defined: user
+ agents must either act as described below when encountering such problems,
+ or must abort processing at the first error that they encounter for which
+ they do not wish to apply the rules described below.
+
+ <p>Conformance checkers must report at least one parse error condition to
+ the user if one or more parse error conditions exist in the document and
+ must not report parse error conditions if none exist in the document.
+ Conformance checkers may report more than one parse error condition if
+ more than one parse error conditions exist in the document. Conformance
+ checkers are not required to recover from parse errors.
+
+ <p class=note>Parse errors are only errors with the <em>syntax</em> of
+ HTML. In addition to checking for parse errors, conformance checkers will
+ also verify that the document obeys all the other conformance requirements
+ described in this specification.
+
+ <h4 id=overview><span class=secno>8.2.1. </span>Overview of the parsing
+ model</h4>
+
+ <p>The input to the HTML parsing process consists of a stream of Unicode
+ characters, which is passed through a <a
+ href="#tokenisation0">tokenisation</a> stage (lexical analysis) followed
+ by a <a href="#tree-construction0">tree construction</a> stage (semantic
+ analysis). The output is a <code>Document</code> object.
+
+ <p class=note>Implementations that <a href="#non-scripted">do not support
+ scripting</a> do not have to actually create a DOM <code>Document</code>
+ object, but the DOM tree in such cases is still used as the model for the
+ rest of the specification.
+
+ <p>In the common case, the data handled by the tokenisation stage comes
+ from the network, but <a href="#dynamic2" title="dynamic markup
+ insertion">it can also come from script</a>, e.g. using the <code
+ title=dom-document-write-HTML><a
+ href="#document.write0">document.write()</a></code> API.
+
+ <p><img alt="" src="images/parsing-model-overview.png">
+
+ <p id=nestedParsing>There is only one set of state for the tokeniser stage
+ and the tree construction stage, but the tree construction stage is
+ reentrant, meaning that while the tree construction stage is handling one
+ token, the tokeniser might be resumed, causing further tokens to be
+ emitted and processed before the first token's processing is complete.
+
+ <div class=example>
+ <p>In the following example, the tree construction stage will be called
+ upon to handle a "p" start tag token while handling the "script" start
+ tag token:</p>
+
+ <pre>...
+&lt;script>
+ document.write('&lt;p>');
+&lt;/script>
+...</pre>
+ </div>
+
+ <h4 id=the-input0><span class=secno>8.2.2. </span>The <dfn id=input0>input
+ stream</dfn></h4>
+
+ <p>The stream of Unicode characters that consists the input to the
+ tokenisation stage will be initially seen by the user agent as a stream of
+ bytes (typically coming over the network or from the local file system).
+ The bytes encode the actual characters according to a particular
+ <em>character encoding</em>, which the user agent must use to decode the
+ bytes into characters.
+
+ <p id=documentEncoding>For HTML, user agents must use the following
+ algorithm in determining the character encoding of a document:
+
+ <ol>
+ <li>
+ <p>If the transport layer specifies an encoding, use that, and abort
+ these steps.
+
+ <li>
+ <p>The user agent may wait for 512 or more bytes of the resource to be
+ available.
+
+ <li>
+ <p>Let <var title="">n</var> be the smaller of either 512 or the number
+ of bytes already available.
+
+ <li>
+ <p>For each of the rows in the following table, starting with the first
+ one and going down, if <var title="">n</var> is equal to or greater than
+ the number of bytes in the first column, and the first bytes of the file
+ match the bytes given in the first column, then use the encoding given
+ in the cell in the second column of that row, and abort these steps:</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Bytes in Hexadecimal
+
+ <th>Description
+
+ <tbody>
+ <tr>
+ <td>00 00 FE FF
+
+ <td>UTF-32BE BOM
+
+ <tr>
+ <td>FF FE 00 00
+
+ <td>UTF-32LE BOM
+
+ <tr>
+ <td>FE FF
+
+ <td>UTF-16BE BOM
+
+ <tr>
+ <td>FF FE
+
+ <td>UTF-16LE BOM
+
+ <tr>
+ <td>EF BB BF
+
+ <td>UTF-8 BOM <!-- nobody uses this
+ <tr>
+ <td>DD 73 66 73
+ <td>UTF-EBCDIC
+-->
+
+ </table>
+
+ <li>
+ <p>Otherwise, the user agent will have to search for explicit character
+ encoding information in the file itself. This must proceed as follows:
+
+ <p>Let <var title="">position</var> be a pointer to a byte in the input
+ stream, initially pointing at the first byte. If at any point during
+ these steps the <var title="">position</var> pointer points beyond the
+ <var title="">n</var>th byte of the input stream, then skip to the next
+ step of the overall character encoding detection algorithm (the step
+ which mentions frequency analysis below).</p>
+
+ <p>Now, repeat the following "two" steps until the algorithm aborts
+ (either because <var title="">position</var> reaches beyond the <var
+ title="">n</var>th byte, or because a character encoding is found):</p>
+
+ <ol>
+ <li>
+ <p>If <var title="">position</var> points to:</p>
+
+ <dl class=switch>
+ <dt>A sequence of bytes starting with: 0x3C 0x21 0x2D 0x2D (ASCII
+ '&lt;!--')
+
+ <dd>
+ <p>Advance the <var title="">position</var> pointer so that it points
+ at the first 0x3E byte which is preceeded by two 0x2D bytes (i.e. at
+ the end of an ASCII '-->' sequence) and comes after the second 0x2D
+ byte that was found. (The two 0x2D bytes cannot be the same as the
+ those in the '&lt;!--' sequence.) If no such byte is found before
+ the <var title="">n</var>th byte, abort this "two step" algorithm.</p>
+
+ <dt>A sequence of bytes starting with: 0x3C, 0x4D or 0x6D, 0x45 or
+ 0x65, 0x54 or 0x74, 0x41 or 0x61, and finally one of 0x09, 0x0A,
+ 0x0B, 0x0C, 0x0D, 0x20 (case-insensitive ASCII '&lt;meta' followed by
+ a space)
+
+ <dd>
+ <ol>
+ <li>
+ <p>Advance the <var title="">position</var> pointer so that it
+ points at the next 0x09, 0x0A, 0x0B, 0x0C, 0x0D, or 0x20 byte (the
+ one in sequence of characters matched above), if there is one
+ before the <var title="">n</var>th byte. If there isn't, abort the
+ "two step" algorithm.
+
+ <li>
+ <p><a href="#get-an" title=concept-get-attributes-when-sniffing>Get
+ an attribute</a> and its value. If no attribute was sniffed, then
+ skip this inner set of steps, and jump to the second step in the
+ overall "two step" algorithm.</p>
+
+ <p class=note>As required above, if the <var
+ title="">position</var> pointer points beyond the <var
+ title="">n</var>th byte after the "get an attribute" step, the
+ "two step" algorithm will abort.
+
+ <li>
+ <p>Examine the attribute's name:</p>
+
+ <dl class=switch>
+ <dt>If it is 'charset'
+
+ <dd>
+ <p>If the attribute's value is a supported character encoding,
+ then use the given encoding, and abort all these steps.
+ Otherwise, do nothing with this attribute.
+
+ <dt>If it is 'content'
+
+ <dd>
+ <p>The attribute's value is now parsed.</p>
+
+ <ol>
+ <li>
+ <p>Skip characters in the attribute's value up to and including
+ the first U+003B SEMICOLON (<code title="">;</code>)
+ character.
+
+ <li>
+ <p>Skip any U+0009, U+000A, U+000B, U+000C, U+000D, or U+0020
+ characters (i.e. spaces) that immediately follow the
+ semicolon.
+
+ <li>
+ <p>If the next six characters are not 'charset', abort this
+ very inner set of steps (parsing the attribute's value), and
+ continue looking for other attributes.
+
+ <li>
+ <p>Skip any U+0009, U+000A, U+000B, U+000C, U+000D, or U+0020
+ characters that immediately follow the word 'charset' (there
+ might not be any).
+
+ <li>
+ <p>If the next character is not a U+003D EQUALS SIGN ('='),
+ abort this very inner set of steps (parsing the attribute's
+ value), and continue looking for other attributes.
+
+ <li>
+ <p>Skip any U+0009, U+000A, U+000B, U+000C, U+000D, or U+0020
+ characters that immediately follow the word equals sign (there
+ might not be any).
+
+ <li>
+ <p>Process the next character as follows:</p>
+
+ <dl class=switch>
+ <dt>If it is a U+0022 QUOTATION MARK ('"') and there is a
+ later U+0022 QUOTATION MARK ('"') in the attribute's value
+
+ <dd>
+ <p>Let <var title="">tentative encoding</var> be the string
+ between the two quotation marks.
+
+ <dt>If it is a U+0027 APOSTROPHE ("'") and there is a later
+ U+0027 APOSTROPHE ("'") in the attribute's value
+
+ <dd>
+ <p>Let <var title="">tentative encoding</var> be the string
+ between the two apostrophes.
+
+ <dt>If it is an unmatched U+0022 QUOTATION MARK ('"')
+
+ <dt>If it is an unmatched U+0027 APOSTROPHE ("'")
+
+ <dd>
+ <p>There is no <var title="">tentative encoding</var>.
+
+ <dt>Otherwise
+
+ <dd>
+ <p>Let <var title="">tentative encoding</var> be the string
+ from this character to the first U+0009, U+000A, U+000B,
+ U+000C, U+000D, or U+0020 character or the end of the
+ attribute's value, whichever comes first.
+ </dl>
+
+ <li>If there is a <var title="">tentative encoding</var> and it
+ is the name of a supported character encoding, then use that
+ encoding; abort all these steps.
+
+ <li>Otherwise, skip this 'content' attribute and continue on
+ with any other attributes.
+ </ol>
+
+ <dd>
+
+ <dt>Any other name
+
+ <dd>
+ <p>Do nothing with that attribute.
+ </dl>
+
+ <li>
+ <p>Return to step 1 in these inner steps.
+ </ol>
+
+ <dt>A sequence of bytes starting with a 0x3C byte (ASCII '&lt;'),
+ optionally a 0x2F byte (ASCII '/'), and finally a byte in the range
+ 0x41-0x5A or 0x61-0x7A (an ASCII letter)
+
+ <dd>
+ <ol>
+ <li>
+ <p>Advance the <var title="">position</var> pointer so that it
+ points at the next 0x09 (ASCII TAB), 0x0A (ASCII LF), 0x0B (ASCII
+ VT), 0x0C (ASCII FF), 0x0D (ASCII CR), 0x20 (ASCII space), 0x3E
+ (ASCII '>'), 0x3C (ASCII '<') byte, if there is one before the
+ <var title="">n</var>th byte. If there isn't, abort the "two step"
+ algorithm.
+
+ <li>
+ <p>If the pointer points to a 0x3C (ASCII '<') byte, then return to
+ the first step in the overall "two step" algorithm.
+
+ <li>
+ <p>Repeatedly <a href="#get-an"
+ title=concept-get-attributes-when-sniffing>get an attribute</a>
+ until no further attributes can be found, then jump to the second
+ step in the overall "two step" algorithm.
+ </ol>
+
+ <dt>A sequence of bytes starting with: 0x3C 0x2D (ASCII '&lt;!')
+
+ <dt>A sequence of bytes starting with: 0x3C 0x2F (ASCII '&lt;/')
+
+ <dt>A sequence of bytes starting with: 0x3C 0x3F (ASCII '&lt;?')
+
+ <dd>
+ <p>Advance the <var title="">position</var> pointer so that it points
+ at the first 0x3E byte (ASCII '>') that comes after the 0x3C byte
+ that was found. If no such byte is found before the <var
+ title="">n</var>th byte, abort this "two step" algorithm.</p>
+
+ <dt>Any other byte
+
+ <dd>
+ <p>Do nothing with that byte.</p>
+ </dl>
+
+ <li>Move <var title="">position</var> so it points at the next byte in
+ the input stream, and return to the first step of this "two step"
+ algorithm.
+ </ol>
+
+ <p>When the above "two step" algorithm says to <dfn id=get-an
+ title=concept-get-attributes-when-sniffing>get an attribute</dfn>, it
+ means doing this:</p>
+
+ <ol>
+ <li>
+ <p>If the byte at <var title="">position</var> is one of 0x09 (ASCII
+ TAB), 0x0A (ASCII LF), 0x0B (ASCII VT), 0x0C (ASCII FF), 0x0D (ASCII
+ CR), 0x20 (ASCII space), or 0x2F (ASCII '/') then advance <var
+ title="">position</var> to the next byte and start over.
+
+ <li>
+ <p>If the byte at <var title="">position</var> is 0x3C (ASCII '&lt;'),
+ then move <var title="">position</var> back to the previous byte, and
+ stop looking for an attribute. There isn't one.
+
+ <li>
+ <p>If the byte at <var title="">position</var> is 0x3E (ASCII '>'),
+ then stop looking for an attribute. There isn't one.
+
+ <li>
+ <p>Otherwise, the byte at <var title="">position</var> is the start of
+ the attribute name. Let <var title="">attribute name</var> and <var
+ title="">attribute value</var> be the empty string.
+
+ <li>
+ <p><em>Attribute name</em>: Process the byte at <var
+ title="">position</var> as follows:</p>
+
+ <dl class=switch>
+ <dt>If it is 0x3D (ASCII '='), and the <var title="">attribute
+ name</var> is longer than the empty string
+
+ <dd>Advance <var title="">position</var> to the next byte and jump to
+ the step below labelled <em>value</em>.
+
+ <dt>If it is 0x09 (ASCII TAB), 0x0A (ASCII LF), 0x0B (ASCII VT), 0x0C
+ (ASCII FF), 0x0D (ASCII CR), or 0x20 (ASCII space)
+
+ <dd>Jump to the step below labelled <em>spaces</em>.
+
+ <dt>If it is 0x2F (ASCII '/'), 0x3C (ASCII '&lt;'), or 0x3E (ASCII
+ '&gt;')
+
+ <dd>Stop looking for an attribute. The attribute's name is the value
+ of <var title="">attribute name</var>, its value is the empty string.
+
+ <dt>If it is in the range 0x41 (ASCII 'A') to 0x5A (ASCII 'Z')
+
+ <dd>Append the Unicode character with codepoint <span><var
+ title="">b</var>+0x20</span> to <var title="">attribute name</var>
+ (where <var title="">b</var> is the value of the byte at <var
+ title="">position</var>).
+
+ <dt>Anything else
+
+ <dd>Append the Unicode character with the same codepoint as the value
+ of the byte at <var title="">position</var>) to <var
+ title="">attribute name</var>. (It doesn't actually matter how bytes
+ outside the ASCII range are handled here, since only ASCII characters
+ can contribute to the detection of a character encoding.)
+ </dl>
+
+ <li>
+ <p>Advance <var title="">position</var> to the next byte and return to
+ the previous step.
+
+ <li>
+ <p><em>Spaces.</em> If the byte at <var title="">position</var> is one
+ of 0x09 (ASCII TAB), 0x0A (ASCII LF), 0x0B (ASCII VT), 0x0C (ASCII
+ FF), 0x0D (ASCII CR), or 0x20 (ASCII space) then advance <var
+ title="">position</var> to the next byte, then, repeat this step.
+
+ <li>
+ <p>If the byte at <var title="">position</var> is <em>not</em> 0x3D
+ (ASCII '='), stop looking for an attribute. Move <var
+ title="">position</var> back to the previous byte. The attribute's
+ name is the value of <var title="">attribute name</var>, its value is
+ the empty string.
+
+ <li>
+ <p>Advance <var title="">position</var> past the 0x3D (ASCII '=') byte.
+
+ <li>
+ <p><em>Value.</em> If the byte at <var title="">position</var> is one
+ of 0x09 (ASCII TAB), 0x0A (ASCII LF), 0x0B (ASCII VT), 0x0C (ASCII
+ FF), 0x0D (ASCII CR), or 0x20 (ASCII space) then advance <var
+ title="">position</var> to the next byte, then, repeat this step.
+
+ <li>
+ <p>Process the byte at <var title="">position</var> as follows:</p>
+
+ <dl class=switch>
+ <dt>If it is 0x22 (ASCII '"') or 0x27 ("'")
+
+ <dd>
+ <ol>
+ <li>Let <var title="">b</var> be the value of the byte at <var
+ title="">position</var>.
+
+ <li>Advance <var title="">position</var> to the next byte.
+
+ <li>If the value of the byte at <var title="">position</var> is the
+ value of <var title="">b</var>, then stop looking for an attribute.
+ The attribute's name is the value of <var title="">attribute
+ name</var>, and its value is the value of <var title="">attribute
+ value</var>.
+
+ <li>Otherwise, if the value of the byte at <var
+ title="">position</var> is in the range 0x41 (ASCII 'A') to 0x5A
+ (ASCII 'Z'), then append a Unicode character to <var
+ title="">attribute value</var> whose codepoint is 0x20 more than
+ the value of the byte at <var title="">position</var>.
+
+ <li>Otherwise, append a Unicode character to <var title="">attribute
+ value</var> whose codepoint is the same as the value of the byte at
+ <var title="">position</var>.
+
+ <li>Return to the second step in these substeps.
+ </ol>
+
+ <dt>If it is 0x3C (ASCII '&lt;'), or 0x3E (ASCII '&gt;')
+
+ <dd>Stop looking for an attribute. The attribute's name is the value
+ of <var title="">attribute name</var>, its value is the empty string.
+
+ <dt>If it is in the range 0x41 (ASCII 'A') to 0x5A (ASCII 'Z')
+
+ <dd>Append the Unicode character with codepoint <span><var
+ title="">b</var>+0x20</span> to <var title="">attribute value</var>
+ (where <var title="">b</var> is the value of the byte at <var
+ title="">position</var>).
+
+ <dt>Anything else
+
+ <dd>Append the Unicode character with the same codepoint as the value
+ of the byte at <var title="">position</var>) to <var
+ title="">attribute value</var>.
+ </dl>
+
+ <li>
+ <p>Process the byte at <var title="">position</var> as follows:</p>
+
+ <dl class=switch>
+ <dt>If it is 0x09 (ASCII TAB), 0x0A (ASCII LF), 0x0B (ASCII VT), 0x0C
+ (ASCII FF), 0x0D (ASCII CR), 0x20 (ASCII space), 0x3C (ASCII '&lt;'),
+ or 0x3E (ASCII '&gt;')
+
+ <dd>Stop looking for an attribute. The attribute's name is the value
+ of <var title="">attribute name</var> and its value is the value of
+ <var title="">attribute value</var>.
+
+ <dt>If it is in the range 0x41 (ASCII 'A') to 0x5A (ASCII 'Z')
+
+ <dd>Append the Unicode character with codepoint <span><var
+ title="">b</var>+0x20</span> to <var title="">attribute value</var>
+ (where <var title="">b</var> is the value of the byte at <var
+ title="">position</var>).
+
+ <dt>Anything else
+
+ <dd>Append the Unicode character with the same codepoint as the value
+ of the byte at <var title="">position</var>) to <var
+ title="">attribute value</var>.
+ </dl>
+
+ <li>
+ <p>Advance <var title="">position</var> to the next byte and return to
+ the previous step.
+ </ol>
+
+ <li>
+ <p>The user agent may attempt to autodetect the character encoding from
+ applying frequency analysis or other algorithms to the data stream. If
+ autodetection succeeds in determining a character encoding, then use
+ that; abort these steps. <a href="#refsUNIVCHARDET">[UNIVCHARDET]</a>
+ </li>
+ <!--
+ http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
+ -->
+
+ <li>
+ <p>Otherwise, use an implementation-defined or user-specified default
+ character encoding. Due to its use in legacy content, <code
+ title="">windows-1252</code> is recommended as a default in
+ predominantly Western demographics. In non-legacy environments, the more
+ comprehensive <code title="">UTF-8</code> encoding is recommended
+ instead. Since these encodings can in many cases be distinguished by
+ inspection, a user agent may heuristically decide which to use as a
+ default.
+ </ol>
+
+ <p class=note>For XML documents, the algorithm user agents must use to
+ determine the character encoding is given by the XML specification. This
+ section does not apply to XML documents. <a href="#refsXML">[XML]</a>
+
+ <p>When a user agent would otherwise use the ISO-8859-1 encoding, it must
+ instead use the Windows-1252 encoding. User agents must not support the
+ CESU-8, UTF-7, BOCU-1 and SCSU encodings. <a href="#refsCESU8">[CESU8]</a>
+ <a href="#refsUTF7">[UTF7]</a> <a href="#refsBOCU1">[BOCU1]</a> <a
+ href="#refsSCSU">[SCSU]</a>
+
+ <p>Bytes or sequences of bytes in the original byte stream that could not
+ be converted to Unicode characters must be converted to U+FFFD REPLACEMENT
+ CHARACTER code points.
+
+ <p>A leading U+FEFF BYTE ORDER MARK (BOM) must be dropped if present.
+
+ <p>All U+0000 NULL characters in the input must be replaced by U+FFFD
+ REPLACEMENT CHARACTERs. Any occurrences of such characters is a <a
+ href="#parse">parse error</a>.
+
+ <p>U+000D CARRIAGE RETURN (CR) characters, and U+000A LINE FEED (LF)
+ characters, are treated specially. Any CR characters that are followed by
+ LF characters must be removed, and any CR characters not followed by LF
+ characters must be converted to LF characters. Thus, newlines in HTML DOMs
+ are represented by LF characters, and there are never any CR characters in
+ the input to the <a href="#tokenisation0">tokenisation</a> stage.
+
+ <p>The <dfn id=next-input>next input character</dfn> is the first character
+ in the input stream that has not yet been <dfn id=consumed>consumed</dfn>.
+ Initially, the <em><a href="#next-input">next input character</a></em> is
+ the first character in the input.
+
+ <p>The <dfn id=insertion>insertion point</dfn> is the position (just before
+ a character or just before the end of the input stream) where content
+ inserted using <code title=dom-document-write-HTML><a
+ href="#document.write0">document.write()</a></code> is actually inserted.
+ The insertion point is relative to the position of the character
+ immediately after it, it is not an absolute offset into the input stream.
+ Initially, the insertion point is uninitialised.
+
+ <p>The "EOF" character in the tables below is a conceptual character
+ representing the end of the <a href="#input0">input stream</a>. If the
+ parser is a <a href="#script-created">script-created parser</a>, then the
+ end of the <a href="#input0">input stream</a> is reached when an <dfn
+ id=explicit>explicit "EOF" character</dfn> (inserted by the <code
+ title=dom-document-close><a href="#close">document.close()</a></code>
+ method) is consumed. Otherwise, the "EOF" character is not a real
+ character in the stream, but rather the lack of any further characters.
+
+ <h4 id=tokenisation><span class=secno>8.2.3. </span><dfn
+ id=tokenisation0>Tokenisation</dfn></h4>
+
+ <p>Implementations must act as if they used the following state machine to
+ tokenise HTML. The state machine must start in the <a
+ href="#data-state">data state</a>. Most states consume a single character,
+ which may have various side-effects, and either switches the state machine
+ to a new state to <em>reconsume</em> the same character, or switches it to
+ a new state (to consume the next character), or repeats the same state (to
+ consume the next character). Some states have more complicated behaviour
+ and can consume several characters before switching to another state.
+
+ <p>The exact behaviour of certain states depends on a <dfn
+ id=content2>content model flag</dfn> that is set after certain tokens are
+ emitted. The flag has several states: <em title="">PCDATA</em>, <em
+ title="">RCDATA</em>, <em title="">CDATA</em>, and <em
+ title="">PLAINTEXT</em>. Initially it must be in the PCDATA state. In the
+ RCDATA and CDATA states, a further <dfn id=escape>escape flag</dfn> is
+ used to control the behaviour of the tokeniser. It is either true or
+ false, and initially must be set to the false state.
+
+ <p>The output of the tokenisation step is a series of zero or more of the
+ following tokens: DOCTYPE, start tag, end tag, comment, character,
+ end-of-file. DOCTYPE tokens have names and can be either correct or in
+ error. Start and end tag tokens have a tag name and a list of attributes,
+ each of which has a name and a value. Comment and character tokens have
+ data.
+
+ <p>When a token is emitted, it must immediately be handled by the <a
+ href="#tree-construction0">tree construction</a> stage. The tree
+ construction stage can affect the state of the <a href="#content2">content
+ model flag</a>, and can insert additional characters into the stream. (For
+ example, the <code><a href="#script0">script</a></code> element can result
+ in scripts executing and using the <a href="#dynamic2">dynamic markup
+ insertion</a> APIs to insert characters into the stream being tokenised.)
+
+ <p>When an end tag token is emitted, the <a href="#content2">content model
+ flag</a> must be switched to the PCDATA state.
+
+ <p>When an end tag token is emitted with attributes, that is a <a
+ href="#parse">parse error</a>.
+
+ <p>Before each step of the tokeniser, the user agent may check to see if
+ either one of the scripts in the <a href="#list-of1">list of scripts that
+ will execute as soon as possible</a> or the first script in the <a
+ href="#list-of0">list of scripts that will execute asynchronously</a>, has
+ <span>completed loading</span><!-- XXX xref -->. If one has, then it must
+ be <a href="#executing0" title="executing a script block">executed</a> and
+ removed from its list.
+
+ <p>The tokeniser state machine is as follows:
+
+ <dl>
+ <dt><dfn id=data-state>Data state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0026 AMPERSAND (&amp;)
+
+ <dd>When the <a href="#content2">content model flag</a> is set to one of
+ the PCDATA or RCDATA states: switch to the <a href="#entity">entity
+ data state</a>.
+
+ <dd>Otherwise: treat it as per the "anything else" entry below.
+
+ <dt>U+002D HYPHEN-MINUS (-)
+
+ <dd>
+ <p>If the <a href="#content2">content model flag</a> is set to either
+ the RCDATA state or the CDATA state, and the <a href="#escape">escape
+ flag</a> is false, and there are at least three characters before this
+ one in the input stream, and the last four characters in the input
+ stream, including this one, are U+003C LESS-THAN SIGN, U+0021
+ EXCLAMATION MARK, U+002D HYPHEN-MINUS, and U+002D HYPHEN-MINUS
+ ("&lt;!--"), then set the <a href="#escape">escape flag</a> to true.</p>
+
+ <p>In any case, emit the input character as a character token. Stay in
+ the <a href="#data-state">data state</a>.</p>
+
+ <dt>U+003C LESS-THAN SIGN (&lt;)
+
+ <dd>When the <a href="#content2">content model flag</a> is set to the
+ PCDATA state: switch to the <a href="#tag-open">tag open state</a>.
+
+ <dd>When the <a href="#content2">content model flag</a> is set to either
+ the RCDATA state or the CDATA state and the <a href="#escape">escape
+ flag</a> is false: switch to the <a href="#tag-open">tag open
+ state</a>.
+
+ <dd>Otherwise: treat it as per the "anything else" entry below.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>
+ <p>If the <a href="#content2">content model flag</a> is set to either
+ the RCDATA state or the CDATA state, and the <a href="#escape">escape
+ flag</a> is true, and the last three characters in the input stream
+ including this one are U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS,
+ U+003E GREATER-THAN SIGN ("--&gt;"), set the <a href="#escape">escape
+ flag</a> to false.</p>
+ <!-- no need to check
+ that there are enough characters, since you can only run into
+ this if the flag is true in the first place, which requires four
+ characters. -->
+
+ <p>In any case, emit the input character as a character token. Stay in
+ the <a href="#data-state">data state</a>.</p>
+
+ <dt>EOF
+
+ <dd>Emit an end-of-file token.
+
+ <dt>Anything else
+
+ <dd>Emit the input character as a character token. Stay in the <a
+ href="#data-state">data state</a>.
+ </dl>
+
+ <dt><dfn id=entity>Entity data state</dfn>
+
+ <dd>
+ <p><em>(This cannot happen if the <a href="#content2">content model
+ flag</a> is set to the CDATA state.)</em></p>
+
+ <p>Attempt to <a href="#consume">consume an entity</a>.</p>
+
+ <p>If nothing is returned, emit a U+0026 AMPERSAND character token.</p>
+
+ <p>Otherwise, emit the character token that was returned.</p>
+
+ <p>Finally, switch to the <a href="#data-state">data state</a>.</p>
+
+ <dt><dfn id=tag-open>Tag open state</dfn>
+
+ <dd>
+ <p>The behaviour of this state depends on the <a href="#content2">content
+ model flag</a>.</p>
+
+ <dl>
+ <dt>If the <a href="#content2">content model flag</a> is set to the
+ RCDATA or CDATA states
+
+ <dd>
+ <p>If the <a href="#next-input">next input character</a> is a U+002F
+ SOLIDUS (/) character, consume it and switch to the <a
+ href="#close1">close tag open state</a>. If the <a
+ href="#next-input">next input character</a> is not a U+002F SOLIDUS
+ (/) character, emit a U+003C LESS-THAN SIGN character token and switch
+ to the <a href="#data-state">data state</a> to process the <a
+ href="#next-input">next input character</a>.</p>
+
+ <dt>If the <a href="#content2">content model flag</a> is set to the
+ PCDATA state
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0021 EXCLAMATION MARK (!)
+
+ <dd>Switch to the <a href="#markup">markup declaration open state</a>.
+
+ <dt>U+002F SOLIDUS (/)
+
+ <dd>Switch to the <a href="#close1">close tag open state</a>.
+
+ <dt>U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL
+ LETTER Z
+
+ <dd>Create a new start tag token, set its tag name to the lowercase
+ version of the input character (add 0x0020 to the character's code
+ point), then switch to the <a href="#tag-name0">tag name state</a>.
+ (Don't emit the token yet; further details will be filled in before
+ it is emitted.)
+
+ <dt>U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+
+ <dd>Create a new start tag token, set its tag name to the input
+ character, then switch to the <a href="#tag-name0">tag name
+ state</a>. (Don't emit the token yet; further details will be filled
+ in before it is emitted.)
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd><a href="#parse">Parse error</a>. Emit a U+003C LESS-THAN SIGN
+ character token and a U+003E GREATER-THAN SIGN character token.
+ Switch to the <a href="#data-state">data state</a>.
+
+ <dt>U+003F QUESTION MARK (?)
+
+ <dd><a href="#parse">Parse error</a>. Switch to the <a
+ href="#bogus">bogus comment state</a>.
+
+ <dt>Anything else
+
+ <dd><a href="#parse">Parse error</a>. Emit a U+003C LESS-THAN SIGN
+ character token and reconsume the current input character in the <a
+ href="#data-state">data state</a>.
+ </dl>
+ </dl>
+
+ <dt><dfn id=close1>Close tag open state</dfn>
+
+ <dd>
+ <p>If the <a href="#content2">content model flag</a> is set to the RCDATA
+ or CDATA states then examine the next few characters. If they do not
+ match the tag name of the last start tag token emitted (case
+ insensitively), or if they do but they are not immediately followed by
+ one of the following characters:</p>
+
+ <ul class=brief>
+ <li>U+0009 CHARACTER TABULATION
+
+ <li>U+000A LINE FEED (LF)
+
+ <li>U+000B LINE TABULATION
+
+ <li>U+000C FORM FEED (FF)</li>
+ <!--<li>U+000D CARRIAGE RETURN (CR)</li>-->
+
+ <li>U+0020 SPACE
+
+ <li>U+003E GREATER-THAN SIGN (&gt;)
+
+ <li>U+002F SOLIDUS (/)
+
+ <li>U+003C LESS-THAN SIGN (&lt;)
+
+ <li>EOF
+ </ul>
+
+ <p>...then there is a <a href="#parse">parse error</a>. Emit a U+003C
+ LESS-THAN SIGN character token, a U+002F SOLIDUS character token, and
+ switch to the <a href="#data-state">data state</a> to process the <a
+ href="#next-input">next input character</a>.</p>
+
+ <p>Otherwise, if the <a href="#content2">content model flag</a> is set to
+ the PCDATA state, or if the next few characters <em>do</em> match that
+ tag name, consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER
+ Z
+
+ <dd>Create a new end tag token, set its tag name to the lowercase
+ version of the input character (add 0x0020 to the character's code
+ point), then switch to the <a href="#tag-name0">tag name state</a>.
+ (Don't emit the token yet; further details will be filled in before it
+ is emitted.)
+
+ <dt>U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+
+ <dd>Create a new end tag token, set its tag name to the input character,
+ then switch to the <a href="#tag-name0">tag name state</a>. (Don't emit
+ the token yet; further details will be filled in before it is emitted.)
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd><a href="#parse">Parse error</a>. Switch to the <a
+ href="#data-state">data state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit a U+003C LESS-THAN SIGN
+ character token and a U+002F SOLIDUS character token. Reconsume the EOF
+ character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd><a href="#parse">Parse error</a>. Switch to the <a
+ href="#bogus">bogus comment state</a>.
+ </dl>
+
+ <dt><dfn id=tag-name0>Tag name state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Switch to the <a href="#before">before attribute name state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current tag token. Switch to the <a href="#data-state">data
+ state</a>.
+
+ <dt>U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER
+ Z
+
+ <dd>Append the lowercase version of the current input character (add
+ 0x0020 to the character's code point) to the current tag token's tag
+ name. Stay in the <a href="#tag-name0">tag name state</a>.
+
+ <dt>U+003C LESS-THAN SIGN (&lt;)
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>U+002F SOLIDUS (/)
+
+ <dd><a href="#parse">Parse error</a> unless this is a <a
+ href="#permitted">permitted slash</a>. Switch to the <a
+ href="#before">before attribute name state</a>.
+
+ <dt>Anything else
+
+ <dd>Append the current input character to the current tag token's tag
+ name. Stay in the <a href="#tag-name0">tag name state</a>.
+ </dl>
+
+ <dt><dfn id=before>Before attribute name state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Stay in the <a href="#before">before attribute name state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current tag token. Switch to the <a href="#data-state">data
+ state</a>.
+
+ <dt>U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER
+ Z
+
+ <dd>Start a new attribute in the current tag token. Set that attribute's
+ name to the lowercase version of the current input character (add
+ 0x0020 to the character's code point), and its value to the empty
+ string. Switch to the <a href="#attribute1">attribute name state</a>.
+
+ <dt>U+002F SOLIDUS (/)
+
+ <dd><a href="#parse">Parse error</a> unless this is a <a
+ href="#permitted">permitted slash</a>. Stay in the <a
+ href="#before">before attribute name state</a>.
+
+ <dt>U+003C LESS-THAN SIGN (&lt;)
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Start a new attribute in the current tag token. Set that attribute's
+ name to the current input character, and its value to the empty string.
+ Switch to the <a href="#attribute1">attribute name state</a>.
+ </dl>
+
+ <dt><dfn id=attribute1>Attribute name state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Switch to the <a href="#after">after attribute name state</a>.
+
+ <dt>U+003D EQUALS SIGN (=)
+
+ <dd>Switch to the <a href="#before0">before attribute value state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current tag token. Switch to the <a href="#data-state">data
+ state</a>.
+
+ <dt>U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER
+ Z
+
+ <dd>Append the lowercase version of the current input character (add
+ 0x0020 to the character's code point) to the current attribute's name.
+ Stay in the <a href="#attribute1">attribute name state</a>.
+
+ <dt>U+002F SOLIDUS (/)
+
+ <dd><a href="#parse">Parse error</a> unless this is a <a
+ href="#permitted">permitted slash</a>. Switch to the <a
+ href="#before">before attribute name state</a>.
+
+ <dt>U+003C LESS-THAN SIGN (&lt;)
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Append the current input character to the current attribute's name.
+ Stay in the <a href="#attribute1">attribute name state</a>.
+ </dl>
+
+ <p>When the user agent leaves the attribute name state (and before
+ emitting the tag token, if appropriate), the complete attribute's name
+ must be compared to the other attributes on the same token; if there is
+ already an attribute on the token with the exact same name, then this is
+ a <a href="#parse">parse error</a> and the new attribute must be
+ dropped, along with the value that gets associated with it (if any).</p>
+
+ <dt><dfn id=after>After attribute name state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Stay in the <a href="#after">after attribute name state</a>.
+
+ <dt>U+003D EQUALS SIGN (=)
+
+ <dd>Switch to the <a href="#before0">before attribute value state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current tag token. Switch to the <a href="#data-state">data
+ state</a>.
+
+ <dt>U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER
+ Z
+
+ <dd>Start a new attribute in the current tag token. Set that attribute's
+ name to the lowercase version of the current input character (add
+ 0x0020 to the character's code point), and its value to the empty
+ string. Switch to the <a href="#attribute1">attribute name state</a>.
+
+ <dt>U+002F SOLIDUS (/)
+
+ <dd><a href="#parse">Parse error</a> unless this is a <a
+ href="#permitted">permitted slash</a>. Switch to the <a
+ href="#before">before attribute name state</a>.
+
+ <dt>U+003C LESS-THAN SIGN (&lt;)
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Start a new attribute in the current tag token. Set that attribute's
+ name to the current input character, and its value to the empty string.
+ Switch to the <a href="#attribute1">attribute name state</a>.
+ </dl>
+
+ <dt><dfn id=before0>Before attribute value state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Stay in the <a href="#before0">before attribute value state</a>.
+
+ <dt>U+0022 QUOTATION MARK (&quot;)
+
+ <dd>Switch to the <a href="#attribute2">attribute value (double-quoted)
+ state</a>.
+
+ <dt>U+0026 AMPERSAND (&amp;)
+
+ <dd>Switch to the <a href="#attribute4">attribute value (unquoted)
+ state</a> and reconsume this input character.
+
+ <dt>U+0027 APOSTROPHE (')
+
+ <dd>Switch to the <a href="#attribute3">attribute value (single-quoted)
+ state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current tag token. Switch to the <a href="#data-state">data
+ state</a>.
+
+ <dt>U+003C LESS-THAN SIGN (&lt;)
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Append the current input character to the current attribute's value.
+ Switch to the <a href="#attribute4">attribute value (unquoted)
+ state</a>.
+ </dl>
+
+ <dt><dfn id=attribute2>Attribute value (double-quoted) state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0022 QUOTATION MARK (&quot;)
+
+ <dd>Switch to the <a href="#before">before attribute name state</a>.
+
+ <dt>U+0026 AMPERSAND (&amp;)
+
+ <dd>Switch to the <a href="#entity0">entity in attribute value
+ state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Append the current input character to the current attribute's value.
+ Stay in the <a href="#attribute2">attribute value (double-quoted)
+ state</a>.
+ </dl>
+
+ <dt><dfn id=attribute3>Attribute value (single-quoted) state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0027 APOSTROPHE (')
+
+ <dd>Switch to the <a href="#before">before attribute name state</a>.
+
+ <dt>U+0026 AMPERSAND (&amp;)
+
+ <dd>Switch to the <a href="#entity0">entity in attribute value
+ state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Append the current input character to the current attribute's value.
+ Stay in the <a href="#attribute3">attribute value (single-quoted)
+ state</a>.
+ </dl>
+
+ <dt><dfn id=attribute4>Attribute value (unquoted) state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Switch to the <a href="#before">before attribute name state</a>.
+
+ <dt>U+0026 AMPERSAND (&amp;)
+
+ <dd>Switch to the <a href="#entity0">entity in attribute value
+ state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current tag token. Switch to the <a href="#data-state">data
+ state</a>.
+
+ <dt>U+003C LESS-THAN SIGN (&lt;)
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current tag token.
+ Reconsume the character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Append the current input character to the current attribute's value.
+ Stay in the <a href="#attribute4">attribute value (unquoted) state</a>.
+ </dl>
+
+ <dt><dfn id=entity0>Entity in attribute value state</dfn>
+
+ <dd>
+ <p>Attempt to <a href="#consume">consume an entity</a>.</p>
+
+ <p>If nothing is returned, append a U+0026 AMPERSAND character to the
+ current attribute's value.</p>
+
+ <p>Otherwise, append the returned character token to the current
+ attribute's value.</p>
+
+ <p>Finally, switch back to the attribute value state that you were in
+ when were switched into this state.</p>
+
+ <dt><dfn id=bogus>Bogus comment state</dfn>
+
+ <dd>
+ <p><em>(This can only happen if the <a href="#content2">content model
+ flag</a> is set to the PCDATA state.)</em></p>
+
+ <p>Consume every character up to the first U+003E GREATER-THAN SIGN
+ character (&gt;) or the end of the file (EOF), whichever comes first.
+ Emit a comment token whose data is the concatenation of all the
+ characters starting from and including the character that caused the
+ state machine to switch into the bogus comment state, up to and
+ including the last consumed character before the U+003E character, if
+ any, or up to the end of the file otherwise. (If the comment was started
+ by the end of the file (EOF), the token is empty.)</p>
+
+ <p>Switch to the <a href="#data-state">data state</a>.</p>
+
+ <p>If the end of the file was reached, reconsume the EOF character.</p>
+
+ <dt><dfn id=markup>Markup declaration open state</dfn>
+
+ <dd>
+ <p><em>(This can only happen if the <a href="#content2">content model
+ flag</a> is set to the PCDATA state.)</em></p>
+
+ <p>If the next two characters are both U+002D HYPHEN-MINUS (-)
+ characters, consume those two characters, create a comment token whose
+ data is the empty string, and switch to the <a href="#comment">comment
+ state</a>.</p>
+
+ <p>Otherwise if the next seven chacacters are a
+ <span>case-insensitive</span><!-- XXX xref, ascii only --> match for the
+ word "DOCTYPE", then consume those characters and switch to the <a
+ href="#doctype0">DOCTYPE state</a>.</p>
+
+ <p>Otherwise, is is a <a href="#parse">parse error</a>. Switch to the <a
+ href="#bogus">bogus comment state</a>. The next character that is
+ consumed, if any, is the first character that will be in the comment.</p>
+
+ <dt><dfn id=comment>Comment state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+002D HYPHEN-MINUS (-)
+
+ <dd>Switch to the <a href="#comment0">comment dash state</a>
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the comment token. Reconsume
+ the EOF character in the <a href="#data-state">data state</a>.</dd>
+ <!-- For
+ security reasons: otherwise, hostile user could put a <script> in
+ a comment e.g. in a blog comment and then DOS the server so that
+ the end tag isn't read, and then the commented <script> tag would
+ be treated as live code -->
+
+ <dt>Anything else
+
+ <dd>Append the input character to the comment token's data. Stay in the
+ <a href="#comment">comment state</a>.
+ </dl>
+
+ <dt><dfn id=comment0>Comment dash state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+002D HYPHEN-MINUS (-)
+
+ <dd>Switch to the <a href="#comment1">comment end state</a>
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the comment token. Reconsume
+ the EOF character in the <a href="#data-state">data state</a>.</dd>
+ <!-- For
+ security reasons: otherwise, hostile user could put a <script> in
+ a comment e.g. in a blog comment and then DOS the server so that
+ the end tag isn't read, and then the commented <script> tag would
+ be treated as live code -->
+
+ <dt>Anything else
+
+ <dd>Append a U+002D HYPHEN-MINUS (-) character and the input character
+ to the comment token's data. Switch to the <a href="#comment">comment
+ state</a>.
+ </dl>
+
+ <dt><dfn id=comment1>Comment end state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the comment token. Switch to the <a href="#data-state">data
+ state</a>.
+
+ <dt>U+002D HYPHEN-MINUS (-)
+
+ <dd><a href="#parse">Parse error</a>. Append a U+002D HYPHEN-MINUS (-)
+ character to the comment token's data. Stay in the <a
+ href="#comment1">comment end state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the comment token. Reconsume
+ the EOF character in the <a href="#data-state">data state</a>.</dd>
+ <!-- For
+ security reasons: otherwise, hostile user could put a <script> in
+ a comment e.g. in a blog comment and then DOS the server so that
+ the end tag isn't read, and then the commented <script> tag would
+ be treated as live code -->
+
+ <dt>Anything else
+
+ <dd><a href="#parse">Parse error</a>. Append two U+002D HYPHEN-MINUS (-)
+ characters and the input character to the comment token's data. Switch
+ to the <a href="#comment">comment state</a>.
+ </dl>
+
+ <dt><dfn id=doctype0>DOCTYPE state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Switch to the <a href="#before1">before DOCTYPE name state</a>.
+
+ <dt>Anything else
+
+ <dd><a href="#parse">Parse error</a>. Reconsume the current character in
+ the <a href="#before1">before DOCTYPE name state</a>.
+ </dl>
+
+ <dt><dfn id=before1>Before DOCTYPE name state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Stay in the <a href="#before1">before DOCTYPE name state</a>.
+
+ <dt>U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+
+ <dd>Create a new DOCTYPE token. Set the token's name name to the
+ uppercase version of the current input character (subtract 0x0020 from
+ the character's code point), and mark it as being in error. Switch to
+ the <a href="#doctype1">DOCTYPE name state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd><a href="#parse">Parse error</a>. Emit a DOCTYPE token whose name is
+ the empty string and that is marked as being in error. Switch to the <a
+ href="#data-state">data state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit a DOCTYPE token whose name is
+ the empty string and that is marked as being in error. Reconsume the
+ EOF character in the <a href="#data-state">data state</a>.
+
+ <dt>Anything else
+
+ <dd>Create a new DOCTYPE token. Set the token's name name to the current
+ input character, and mark it as being in error. Switch to the <a
+ href="#doctype1">DOCTYPE name state</a>.
+ </dl>
+
+ <dt><dfn id=doctype1>DOCTYPE name state</dfn>
+
+ <dd>
+ <p>First, consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Switch to the <a href="#after0">after DOCTYPE name state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current DOCTYPE token. Switch to the <a
+ href="#data-state">data state</a>.
+
+ <dt>U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+
+ <dd>Append the uppercase version of the current input character
+ (subtract 0x0020 from the character's code point) to the current
+ DOCTYPE token's name. Stay in the <a href="#doctype1">DOCTYPE name
+ state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current DOCTYPE token.
+ Reconsume the EOF character in the <a href="#data-state">data
+ state</a>.
+
+ <dt>Anything else
+
+ <dd>Append the current input character to the current DOCTYPE token's
+ name. Stay in the <a href="#doctype1">DOCTYPE name state</a>.
+ </dl>
+
+ <p>Then, if the name of the DOCTYPE token is exactly the four letters
+ "HTML", then mark the token as being correct. Otherwise, mark it as
+ being in error.</p>
+
+ <p class=note>Because lowercase letters in the name are uppercased by the
+ algorithm above, the "HTML" letters are actually case-insensitive
+ relative to the markup.</p>
+
+ <dt><dfn id=after0>After DOCTYPE name state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+0009 CHARACTER TABULATION
+
+ <dt>U+000A LINE FEED (LF)
+
+ <dt>U+000B LINE TABULATION
+
+ <dt>U+000C FORM FEED (FF)</dt>
+ <!--<dt>U+000D CARRIAGE RETURN (CR)</dt>-->
+
+ <dt>U+0020 SPACE
+
+ <dd>Stay in the <a href="#after0">after DOCTYPE name state</a>.
+
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current DOCTYPE token. Switch to the <a
+ href="#data-state">data state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current DOCTYPE token.
+ Reconsume the EOF character in the <a href="#data-state">data
+ state</a>.
+
+ <dt>Anything else
+
+ <dd><a href="#parse">Parse error</a>. Mark the DOCTYPE token as being in
+ error, if it is not already. Switch to the <a href="#bogus0">bogus
+ DOCTYPE state</a>.
+ </dl>
+
+ <dt><dfn id=bogus0>Bogus DOCTYPE state</dfn>
+
+ <dd>
+ <p>Consume the <a href="#next-input">next input character</a>:</p>
+
+ <dl class=switch>
+ <dt>U+003E GREATER-THAN SIGN (&gt;)
+
+ <dd>Emit the current DOCTYPE token. Switch to the <a
+ href="#data-state">data state</a>.
+
+ <dt>EOF
+
+ <dd><a href="#parse">Parse error</a>. Emit the current DOCTYPE token.
+ Reconsume the EOF character in the <a href="#data-state">data
+ state</a>.
+
+ <dt>Anything else
+
+ <dd>Stay in the <a href="#bogus0">bogus DOCTYPE state</a>.
+ </dl>
+ </dl>
+
+ <p>A <dfn id=permitted>permitted slash</dfn> is a U+002F SOLIDUS character
+ that is immediately followed by a U+003E GREATER-THAN SIGN, if, and only
+ if, the current token being processed is a start tag token whose tag name
+ is one of the following: <code><a href="#base">base</a></code>, <code><a
+ href="#link">link</a></code>, <code><a href="#meta0">meta</a></code>,
+ <code><a href="#hr">hr</a></code>, <code><a href="#br">br</a></code>,
+ <code><a href="#img">img</a></code>, <code><a
+ href="#embed">embed</a></code>, <code><a href="#param">param</a></code>,
+ <code><a href="#area">area</a></code>, <code><a
+ href="#col">col</a></code>, <code>input</code><!-- XXX add:
+ , <code>command</code>, <code>event-source</code> --></p>
+ <!-- XXX
+ keep this synchronised with the list of "void elements" -->
+
+ <h5 id=tokenising><span class=secno>8.2.3.1. </span>Tokenising entities</h5>
+
+ <p>This section defines how to <dfn id=consume>consume an entity</dfn>.
+ This definition is used when parsing entities <a href="#entity"
+ title="entity data state">in text</a> and <a href="#entity0" title="entity
+ in attribute value state">in attributes</a>.
+
+ <p>The behaviour depends on the identity of the next character (the one
+ immediately after the U+0026 AMPERSAND character):
+
+ <dl class=switch>
+ <dt>U+0023 NUMBER SIGN (#)
+
+ <dd>
+ <p>Consume the U+0023 NUMBER SIGN.</p>
+
+ <p>The behaviour further depends on the character after the U+0023 NUMBER
+ SIGN:</p>
+
+ <dl class=switch>
+ <dt>U+0078 LATIN SMALL LETTER X
+
+ <dt>U+0058 LATIN CAPITAL LETTER X
+
+ <dd>
+ <p>Consume the X.</p>
+
+ <p>Follow the steps below, but using the range of characters U+0030
+ DIGIT ZERO through to U+0039 DIGIT NINE, U+0061 LATIN SMALL LETTER A
+ through to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL
+ LETTER A, through to U+0046 LATIN CAPITAL LETTER F (in other words,
+ 0-9, A-F, a-f).</p>
+
+ <p>When it comes to interpreting the number, interpret it as a
+ hexadecimal number.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Follow the steps below, but using the range of characters U+0030
+ DIGIT ZERO through to U+0039 DIGIT NINE (i.e. just 0-9).</p>
+
+ <p>When it comes to interpreting the number, interpret it as a decimal
+ number.</p>
+ </dl>
+
+ <p>Consume as many characters as match the range of characters given
+ above.</p>
+
+ <p>If no characters match the range, then don't consume any characters
+ (and unconsume the U+0023 NUMBER SIGN character and, if appropriate, the
+ X character). This is a <a href="#parse">parse error</a>; nothing is
+ returned.</p>
+
+ <p>Otherwise, if the next character is a U+003B SEMICOLON, consume that
+ too. If it isn't, there is a <a href="#parse">parse error</a>.</p>
+
+ <p>If one or more characters match the range, then take them all and
+ interpret the string of characters as a number (either hexadecimal or
+ decimal as appropriate).
+
+ <p>If that number is in the range 128 to 159 (0x80 to 0x9F), then this is
+ a <a href="#parse">parse error</a>. In the following table, find the row
+ with that number in the first column, and return a character token for
+ the Unicode character given in the second column of that row.</p>
+
+ <table>
+ <thead>
+ <tr>
+ <th>Number
+
+ <th colspan=2>Unicode character
+
+ <tbody>
+ <tr>
+ <td>0x80
+
+ <td>U+20AC
+
+ <td>EURO SIGN ('&#x20AC')
+
+ <tr>
+ <td>0x81
+
+ <td>U+FFFD
+
+ <td>REPLACEMENT CHARACTER
+
+ <tr>
+ <td>0x82
+
+ <td>U+201A
+
+ <td>SINGLE LOW-9 QUOTATION MARK ('&#x201A')
+
+ <tr>
+ <td>0x83
+
+ <td>U+0192
+
+ <td>LATIN SMALL LETTER F WITH HOOK ('&#x0192')
+
+ <tr>
+ <td>0x84
+
+ <td>U+201E
+
+ <td>DOUBLE LOW-9 QUOTATION MARK ('&#x201E')
+
+ <tr>
+ <td>0x85
+
+ <td>U+2026
+
+ <td>HORIZONTAL ELLIPSIS ('&#x2026')
+
+ <tr>
+ <td>0x86
+
+ <td>U+2020
+
+ <td>DAGGER ('&#x2020')
+
+ <tr>
+ <td>0x87
+
+ <td>U+2021
+
+ <td>DOUBLE DAGGER ('&#x2021')
+
+ <tr>
+ <td>0x88
+
+ <td>U+02C6
+
+ <td>MODIFIER LETTER CIRCUMFLEX ACCENT ('&#x02C6')
+
+ <tr>
+ <td>0x89
+
+ <td>U+2030
+
+ <td>PER MILLE SIGN ('&#x2030')
+
+ <tr>
+ <td>0x8A
+
+ <td>U+0160
+
+ <td>LATIN CAPITAL LETTER S WITH CARON ('&#x0160')
+
+ <tr>
+ <td>0x8B
+
+ <td>U+2039
+
+ <td>SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('&#x2039')
+
+ <tr>
+ <td>0x8C
+
+ <td>U+0152
+
+ <td>LATIN CAPITAL LIGATURE OE ('&#x0152')
+
+ <tr>
+ <td>0x8D
+
+ <td>U+FFFD
+
+ <td>REPLACEMENT CHARACTER
+
+ <tr>
+ <td>0x8E
+
+ <td>U+017D
+
+ <td>LATIN CAPITAL LETTER Z WITH CARON ('&#x017D')
+
+ <tr>
+ <td>0x8F
+
+ <td>U+FFFD
+
+ <td>REPLACEMENT CHARACTER
+
+ <tr>
+ <td>0x90
+
+ <td>U+FFFD
+
+ <td>REPLACEMENT CHARACTER
+
+ <tr>
+ <td>0x91
+
+ <td>U+2018
+
+ <td>LEFT SINGLE QUOTATION MARK ('&#x2018')
+
+ <tr>
+ <td>0x92
+
+ <td>U+2019
+
+ <td>RIGHT SINGLE QUOTATION MARK ('&#x2019')
+
+ <tr>
+ <td>0x93
+
+ <td>U+201C
+
+ <td>LEFT DOUBLE QUOTATION MARK ('&#x201C')
+
+ <tr>
+ <td>0x94
+
+ <td>U+201D
+
+ <td>RIGHT DOUBLE QUOTATION MARK ('&#x201D')
+
+ <tr>
+ <td>0x95
+
+ <td>U+2022
+
+ <td>BULLET ('&#x2022')
+
+ <tr>
+ <td>0x96
+
+ <td>U+2013
+
+ <td>EN DASH ('&#x2013')
+
+ <tr>
+ <td>0x97
+
+ <td>U+2014
+
+ <td>EM DASH ('&#x2014')
+
+ <tr>
+ <td>0x98
+
+ <td>U+02DC
+
+ <td>SMALL TILDE ('&#x02DC')
+
+ <tr>
+ <td>0x99
+
+ <td>U+2122
+
+ <td>TRADE MARK SIGN ('&#x2122')
+
+ <tr>
+ <td>0x9A
+
+ <td>U+0161
+
+ <td>LATIN SMALL LETTER S WITH CARON ('&#x0161')
+
+ <tr>
+ <td>0x9B
+
+ <td>U+203A
+
+ <td>SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('&#x203A')
+
+ <tr>
+ <td>0x9C
+
+ <td>U+0153
+
+ <td>LATIN SMALL LIGATURE OE ('&#x0153')
+
+ <tr>
+ <td>0x9D
+
+ <td>U+FFFD
+
+ <td>REPLACEMENT CHARACTER
+
+ <tr>
+ <td>0x9E
+
+ <td>U+017E
+
+ <td>LATIN SMALL LETTER Z WITH CARON ('&#x017E')
+
+ <tr>
+ <td>0x9F
+
+ <td>U+0178
+
+ <td>LATIN CAPITAL LETTER Y WITH DIAERESIS ('&#x0178')
+ </table>
+
+ <p>Otherwise, if the number is not a valid Unicode character (e.g. if the
+ number is higher than 1114111), or if the number is zero, then return a
+ character token for the U+FFFD REPLACEMENT CHARACTER character instead.</p>
+
+ <p>Otherwise, return a character token for the Unicode character whose
+ code point is that number.
+
+ <dt>Anything else
+
+ <dd>
+ <p>Consume the maximum number of characters possible, with the consumed
+ characters case-sensitively matching one of the identifiers in the first
+ column of the <a href="#entities0">entities</a> table.</p>
+
+ <p>If no match can be made, then this is a <a href="#parse">parse
+ error</a>. No characters are consumed, and nothing is returned.</p>
+
+ <p>Otherwise, if the next character is a U+003B SEMICOLON, consume that
+ too. If it isn't, there is a <a href="#parse">parse error</a>.</p>
+
+ <p>Return a character token for the character corresponding to the entity
+ name (as given by the second column of the <a
+ href="#entities0">entities</a> table).</p>
+
+ <div class=example>
+ <p>If the markup contains <code title="">I'm &amp;notit without
+ you</code>, the entity is parsed as "not", as in, <code title="">I'm
+ &not;it without you</code>. But if the markup was <code title="">I'm
+ &amp;notin without you</code>, the entity would be parsed as "notin",
+ resulting in <code title="">I'm &notin; without you</code>.</p>
+ </div>
+ </dl>
+
+ <p class=big-issue>This isn't quite right. For some entities, UAs require a
+ semicolon, for others they don't. We probably need to do the same for
+ backwards compatibility. If we do that we might be able to add more
+ entities, e.g. for mathematics. Probably the way to mark whether or not an
+ entity requires a semicolon is with an additional column in the <a
+ href="#entities0" title=entities>entity table lower down</a>.
+
+ <p class=big-issue>It seems browsers convert CRs to LFs even as entities.
+ Should we also do that? If so, we should remove the CRs in the tree
+ construction phase.</p>
+ <!-- IE is not such a browser; you
+ can prove that by comparing id="&#xA;X" to id="&#xD;X" in terms of
+ getElementById('\nX') vs '\rX'. -->
+
+ <h4 id=tree-construction><span class=secno>8.2.4. </span><dfn
+ id=tree-construction0>Tree construction</dfn></h4>
+
+ <p>The input to the tree construction stage is a sequence of tokens from
+ the <a href="#tokenisation0">tokenisation</a> stage. The tree construction
+ stage is associated with a DOM <code>Document</code> object when a parser
+ is created. The "output" of this stage consists of dynamically modifying
+ or extending that document's DOM tree.
+
+ <p>Tree construction passes through several phases. Initially, UAs must act
+ according to the steps described as being those of <a
+ href="#the-initial0">the initial phase</a>.
+
+ <p>This specification does not define when an interactive user agent has to
+ render the <code>Document</code> available to the user, or when it has to
+ begin accepting user input.
+
+ <p>When the steps below require the UA to <dfn id=append>append a
+ character</dfn> to a node, the UA must collect it and all subsequent
+ consecutive characters that would be appended to that node, and insert one
+ <code>Text</code> node whose data is the concatenation of all those
+ characters.
+
+ <p id=mutation-during-parsing>DOM mutation events must not fire for changes
+ caused by the UA parsing the document. (Conceptually, the parser is not
+ mutating the DOM, it is constructing it.) This includes the parsing of any
+ content inserted using <code title=dom-document-write-HTML><a
+ href="#document.write0">document.write()</a></code> and <code
+ title=dom-document-writeln><a
+ href="#document.writeln">document.writeln()</a></code> calls.<!--
+ XXX xref -->
+ <a href="#refsDOM3EVENTS">[DOM3EVENTS]</a></p>
+ <!-- XXX
+ what abotu innerHTML? -->
+
+ <p class=note>Not all of the tag names mentioned below are conformant tag
+ names in this specification; many are included to handle legacy content.
+ They still form part of the algorithm that implementations are required to
+ implement to claim conformance.
+
+ <p class=note>The algorithm described below places no limit on the depth of
+ the DOM tree generated, or on the length of tag names, attribute names,
+ attribute values, text nodes, etc. While implementators are encouraged to
+ avoid arbitrary limits, it is recognised that <a
+ href="#hardwareLimitations">practical concerns</a> will likely force user
+ agents to impose nesting depths.
+
+ <h5 id=the-initial><span class=secno>8.2.4.1. </span><dfn
+ id=the-initial0>The initial phase</dfn></h5>
+
+ <p>Initially, the tree construction stage must handle each token emitted
+ from the <a href="#tokenisation0">tokenisation</a> stage as follows:
+
+ <dl class=switch>
+ <dt>A DOCTYPE token that is marked as being in error
+
+ <dt>A comment token
+
+ <dt>A start tag token
+
+ <dt>An end tag token
+
+ <dt>A character token that is not one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dt>An end-of-file token
+
+ <dd>
+ <p>This specification does not define how to handle this case. In
+ particular, user agents may ignore the entirety of this specification
+ altogether for such documents, and instead invoke special parse modes
+ with a greater emphasis on backwards compatibility.</p>
+
+ <div class=note>
+ <p>Browsers in particular have generally used DOCTYPE-based sniffing to
+ invoke an "alternative conformance mode" known as <em>quirks mode</em>
+ on certain documents. In this mode, emphasis is put on legacy
+ compatibility rather than on standards compliance. This specification
+ takes no position on this behaviour; documents without DOCTYPEs or with
+ DOCTYPEs that do not conform to the syntax allowed by this
+ specification are considered to be out of scope of this specification.</p>
+ </div>
+
+ <div class=big-issue>
+ <p>As far as parsing goes, the quirks I know of are:</p>
+
+ <ul>
+ <li>Comment parsing is different.
+
+ <li><code title="">&lt;/br></code> and <code title="">&lt;/p></code> do
+ magical things.
+
+ <li><code><a href="#p">p</a></code> can contain <code><a
+ href="#table">table</a></code>
+
+ <li>Safari and IE have special parsing rules for &lt;% ... %&gt; (even
+ in standards mode, though clearly this should be quirks-only).
+ </ul>
+
+ <p>Maybe we should just adopt all those and be done with it. One parsing
+ mode to rule them all. Or legitimise/codify the quirks mode parsing in
+ some way.</p>
+
+ <p>Would be interesting to do a search to see how many pages hit each of
+ the above.</p>
+ <!-- biased by page rank? --></div>
+
+ <dt>A DOCTYPE token marked as being correct
+
+ <dd>
+ <p>Append a <code>DocumentType</code> node to the <code>Document</code>
+ node, with the <code title="">name</code> attribute set to the name
+ given in the DOCTYPE token (which will be "HTML"), and the other
+ attributes specific to <code>DocumentType</code> objects set to null,
+ empty lists, or the empty string as appropriate.</p>
+
+ <p>Then, switch to <a href="#the-root1">the root element phase</a> of the
+ tree construction stage.</p>
+ <!-- XXX should set doctype on the Document object, too, unless
+ spec is defined to already point to it if you append -->
+
+
+ <dt>A character token that <em>is</em> one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append that character</a>
+ to the <code>Document</code> node.</p>
+ </dl>
+
+ <h5 id=the-root0><span class=secno>8.2.4.2. </span><dfn id=the-root1>The
+ root element phase</dfn></h5>
+
+ <p>After <a href="#the-initial0">the initial phase</a>, as each token is
+ emitted from the <a href="#tokenisation0">tokenisation</a> stage, it must
+ be processed as described in this section.
+
+ <dl class=switch>
+ <dt>A DOCTYPE token
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <code>Document</code> object
+ with the <code title="">data</code> attribute set to the data given in
+ the comment token.</p>
+
+ <dt>A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append that character</a>
+ to the <code>Document</code> node.</p>
+
+ <dt>A character token that is <em>not</em> one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dt>A start tag token
+
+ <dt>An end tag token
+
+ <dt>An end-of-file token
+
+ <dd>
+ <p>Create an <code><a href="#htmlelement">HTMLElement</a></code> node
+ with the tag name <code><a href="#html">html</a></code>, in the <a
+ href="#html-namespace0">HTML namespace</a>. Append it to the
+ <code>Document</code> object. Switch to <a href="#the-main0">the main
+ phase</a> and reprocess the current token.</p>
+
+ <p class=big-issue>Should probably make end tags be ignored, so that
+ "&lt;/head>&lt;!-- -->&lt;html>" puts the comment befor the root node
+ (or should we?)</p>
+ </dl>
+
+ <p>The root element can end up being removed from the <code>Document</code>
+ object, e.g. by scripts; nothing in particular happens in such cases,
+ content continues being appended to the nodes as described in the next
+ section.
+
+ <h5 id=the-main><span class=secno>8.2.4.3. </span><dfn id=the-main0>The
+ main phase</dfn></h5>
+
+ <p>After <a href="#the-root1">the root element phase</a>, each token
+ emitted from the <a href="#tokenisation0">tokenisation</a> stage must be
+ processed as described in <em>this</em> section. This is by far the most
+ involved part of parsing an HTML document.
+
+ <p>The tree construction stage in this phase has several pieces of state: a
+ <a href="#stack">stack of open elements</a>, a <a href="#list-of4">list of
+ active formatting elements</a>, a <a href="#head-element"><code
+ title="">head</code> element pointer</a>, a <a href="#form-element"><code
+ title="">form</code> element pointer</a>, and an <a
+ href="#insertion0">insertion mode</a>.
+
+ <p class=big-issue>We could just fold insertion modes and phases into one
+ concept (and duplicate the two rules common to all insertion modes into
+ all of them).
+
+ <h6 id=the-stack><span class=secno>8.2.4.3.1. </span>The stack of open
+ elements</h6>
+
+ <p>Initially the <dfn id=stack>stack of open elements</dfn> contains just
+ the <code><a href="#html">html</a></code> root element node created in the
+ <a href="#the-root1" title="the root element phase">last phase</a> before
+ switching to <em>this</em> phase (or, in the <a
+ href="#innerhtml1"><code>innerHTML</code> case</a>, the <code><a
+ href="#html">html</a></code> element created to represent the element
+ whose <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute is being set). That's
+ the topmost node of the stack. It never gets popped off the stack. (This
+ stack grows downwards.)
+
+ <p>The <dfn id=current4>current node</dfn> is the bottommost node in this
+ stack.
+
+ <p>Elements in the stack fall into the following categories:
+
+ <dl>
+ <dt><dfn id=special>Special</dfn>
+
+ <dd>
+ <p>The following HTML elements have varying levels of special parsing
+ rules: <code><a href="#address">address</a></code>, <code><a
+ href="#area">area</a></code>, <code><a href="#base">base</a></code>,
+ <code>basefont</code>, <code>bgsound</code>, <code><a
+ href="#blockquote">blockquote</a></code>, <code><a
+ href="#body0">body</a></code>, <code><a href="#br">br</a></code>,
+ <code>center</code>, <code><a href="#col">col</a></code>, <code><a
+ href="#colgroup">colgroup</a></code>, <code><a href="#dd">dd</a></code>,
+ <code>dir</code>, <code><a href="#div">div</a></code>, <code><a
+ href="#dl">dl</a></code>, <code><a href="#dt">dt</a></code>, <code><a
+ href="#embed">embed</a></code>, <code>fieldset</code>,
+ <code>form</code>, <code>frame</code>, <code>frameset</code>, <code><a
+ href="#h1">h1</a></code>, <code><a href="#h2">h2</a></code>, <code><a
+ href="#h3">h3</a></code>, <code><a href="#h4">h4</a></code>, <code><a
+ href="#h5">h5</a></code>, <code><a href="#h6">h6</a></code>, <code><a
+ href="#head">head</a></code>, <code><a href="#hr">hr</a></code>,
+ <code><a href="#iframe">iframe</a></code>,
+ <code>image</code><!-- XXX ? this isn't an element that can end up
+ on the stack-->,
+ <code><a href="#img">img</a></code>, <code>input</code>,
+ <code>isindex</code>, <code><a href="#li">li</a></code>, <code><a
+ href="#link">link</a></code>, <code>listing</code>, <code><a
+ href="#menu">menu</a></code>, <code><a href="#meta0">meta</a></code>,
+ <code>noembed</code>, <code>noframes</code>, <code><a
+ href="#noscript">noscript</a></code>, <code><a href="#ol">ol</a></code>,
+ <code>optgroup</code>, <code>option</code>, <code><a
+ href="#p">p</a></code>, <code><a href="#param">param</a></code>,
+ <code>plaintext</code>, <code><a href="#pre">pre</a></code>, <code><a
+ href="#script0">script</a></code>, <code>select</code>,
+ <code>spacer</code>, <code><a href="#style">style</a></code>, <code><a
+ href="#tbody">tbody</a></code>, <code>textarea</code>, <code><a
+ href="#tfoot0">tfoot</a></code>, <code><a
+ href="#thead0">thead</a></code>, <code><a
+ href="#title1">title</a></code>, <code><a href="#tr">tr</a></code>,
+ <code><a href="#ul">ul</a></code>, and <code>wbr</code>.
+
+ <dt><dfn id=scoping>Scoping</dfn>
+
+ <dd>
+ <p>The following HTML elements introduce new <a href="#have-an"
+ title="has an element in scope">scopes</a> for various parts of the
+ parsing: <code>button</code>, <code><a
+ href="#caption0">caption</a></code>, <code><a
+ href="#html">html</a></code>, <code>marquee</code>, <code><a
+ href="#object">object</a></code>, <code><a
+ href="#table">table</a></code>, <code><a href="#td">td</a></code> and
+ <code><a href="#th">th</a></code>.
+
+ <dt><dfn id=formatting>Formatting</dfn>
+
+ <dd>
+ <p>The following HTML elements are those that end up in the <a
+ href="#list-of4">list of active formatting elements</a>: <code><a
+ href="#a">a</a></code>, <code><a href="#b">b</a></code>,
+ <code>big</code>, <code><a href="#em">em</a></code>, <code><a
+ href="#font">font</a></code>, <code><a href="#i">i</a></code>,
+ <code>nobr</code>, <code>s</code>, <code><a
+ href="#small">small</a></code>, <code>strike</code>, <code><a
+ href="#strong">strong</a></code>, <code>tt</code>, and <code>u</code>.
+
+ <dt><dfn id=phrasing>Phrasing</dfn>
+
+ <dd>
+ <p>All other elements found while parsing an HTML document.
+ </dl>
+
+ <p class=big-issue>Still need to add these new elements to the lists:
+ <code><a href="#event-source">event-source</a></code>, <code><a
+ href="#section">section</a></code>, <code><a href="#nav">nav</a></code>,
+ <code><a href="#article">article</a></code>, <code><a
+ href="#aside">aside</a></code>, <code><a href="#header">header</a></code>,
+ <code><a href="#footer">footer</a></code>, <code><a
+ href="#datagrid0">datagrid</a></code>, <code><a
+ href="#command0">command</a></code>
+
+ <p>The <a href="#stack">stack of open elements</a> is said to <dfn
+ id=have-an title="has an element in scope">have an element in scope</dfn>
+ or <dfn id=have-an0 title="has an element in table scope">have an element
+ in <em>table scope</em></dfn> when the following algorithm terminates in a
+ match state:
+
+ <ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a
+ href="#current4">current node</a> (the bottommost node of the stack).
+
+ <li>
+ <p>If <var title="">node</var> is the target node, terminate in a match
+ state.
+
+ <li>
+ <p>Otherwise, if <var title="">node</var> is a <code><a
+ href="#table">table</a></code> element, terminate in a failure state.
+
+ <li>
+ <p>Otherwise, if the algorithm is the "has an element in scope" variant
+ (rather than the "has an element in table scope" variant), and <var
+ title="">node</var> is one of the following, terminate in a failure
+ state:</p>
+
+ <ul class=brief>
+ <li><code><a href="#caption0">caption</a></code>
+
+ <li><code><a href="#td">td</a></code>
+
+ <li><code><a href="#th">th</a></code>
+
+ <li><code>button</code>
+
+ <li><code>marquee</code>
+
+ <li><code><a href="#object">object</a></code>
+ </ul>
+
+ <li>
+ <p>Otherwise, if <var title="">node</var> is an <code><a
+ href="#html">html</a></code> element, terminate in a failure state.
+ (This can only happen if the <var title="">node</var> is the topmost
+ node of the <a href="#stack">stack of open elements</a>, and prevents
+ the next step from being invoked if there are no more elements in the
+ stack.)
+
+ <li>
+ <p>Otherwise, set <var title="">node</var> to the previous entry in the
+ <a href="#stack">stack of open elements</a> and return to step 2. (This
+ will never fail, since the loop will always terminate in the previous
+ step if the top of the stack is reached.)
+ </ol>
+
+ <p>Nothing happens if at any time any of the elements in the <a
+ href="#stack">stack of open elements</a> are moved to a new location in,
+ or removed from, the <code>Document</code> tree. In particular, the stack
+ is not changed in this situation. This can cause, amongst other strange
+ effects, content to be appended to nodes that are no longer in the DOM.
+
+ <p class=note>In some cases (namely, when <a href="#adoptionAgency">closing
+ misnested formatting elements</a>), the stack is manipulated in a
+ random-access fashion.
+
+ <h6 id=the-list><span class=secno>8.2.4.3.2. </span>The list of active
+ formatting elements</h6>
+
+ <p>Initially the <dfn id=list-of4>list of active formatting elements</dfn>
+ is empty. It is used to handle mis-nested <a href="#formatting"
+ title=formatting>formatting element tags</a>.
+
+ <p>The list contains elements in the <a href="#formatting">formatting</a>
+ category, and scope markers. The scope markers are inserted when entering
+ buttons, <code><a href="#object">object</a></code> elements, marquees,
+ table cells, and table captions, and are used to prevent formatting from
+ "leaking" into tables, buttons, <code><a href="#object">object</a></code>
+ elements, and marquees.
+
+ <p>When the steps below require the UA to <dfn id=reconstruct>reconstruct
+ the active formatting elements</dfn>, the UA must perform the following
+ steps:
+
+ <ol>
+ <li>If there are no entries in the <a href="#list-of4">list of active
+ formatting elements</a>, then there is nothing to reconstruct; stop this
+ algorithm.
+
+ <li>If the last (most recently added) entry in the <a
+ href="#list-of4">list of active formatting elements</a> is a marker, or
+ if it is an element that is in the <a href="#stack">stack of open
+ elements</a>, then there is nothing to reconstruct; stop this algorithm.
+
+ <li>Let <var title="">entry</var> be the last (most recently added)
+ element in the <a href="#list-of4">list of active formatting
+ elements</a>.
+
+ <li>If there are no entries before <var title="">entry</var> in the <a
+ href="#list-of4">list of active formatting elements</a>, then jump to
+ step 8.
+
+ <li>Let <var title="">entry</var> be the entry one earlier than <var
+ title="">entry</var> in the <a href="#list-of4">list of active formatting
+ elements</a>.
+
+ <li>If <var title="">entry</var> is neither a marker nor an element that
+ is also in the <a href="#stack">stack of open elements</a>, go to step 4.
+
+ <li>Let <var title="">entry</var> be the element one later than <var
+ title="">entry</var> in the <a href="#list-of4">list of active formatting
+ elements</a>.
+
+ <li>Perform a shallow clone of the element <var title="">entry</var> to
+ obtain <var title="">clone</var>. <a href="#refsDOM3CORE">[DOM3CORE]</a>
+
+ <li>Append <var title="">clone</var> to the <a href="#current4">current
+ node</a> and push it onto the <a href="#stack">stack of open elements</a>
+ so that it is the new <a href="#current4">current node</a>.
+
+ <li>Replace the entry for <var title="">entry</var> in the list with an
+ entry for <var title="">clone</var>.
+
+ <li>If the entry for <var title="">clone</var> in the <a
+ href="#list-of4">list of active formatting elements</a> is not the last
+ entry in the list, return to step 7.
+ </ol>
+
+ <p>This has the effect of reopening all the formatting elements that were
+ opened in the current body, cell, or caption (whichever is youngest) that
+ haven't been explicitly closed.
+
+ <p class=note>The way this specification is written, the <a
+ href="#list-of4">list of active formatting elements</a> always consists of
+ elements in chronological order with the least recently added element
+ first and the most recently added element last (except for while steps 8
+ to 11 of the above algorithm are being executed, of course).
+
+ <p>When the steps below require the UA to <dfn id=clear0>clear the list of
+ active formatting elements up to the last marker</dfn>, the UA must
+ perform the following steps:
+
+ <ol>
+ <li>Let <var title="">entry</var> be the last (most recently added) entry
+ in the <a href="#list-of4">list of active formatting elements</a>.
+
+ <li>Remove <var title="">entry</var> from the <a href="#list-of4">list of
+ active formatting elements</a>.
+
+ <li>If <var title="">entry</var> was a marker, then stop the algorithm at
+ this point. The list has been cleared up to the last marker.
+
+ <li>Go to step 1.
+ </ol>
+
+ <h6 id=creating><span class=secno>8.2.4.3.3. </span>Creating and inserting
+ HTML elements</h6>
+
+ <p>When the steps below require the UA to <dfn id=create title="create an
+ element for the token">create an element for a token</dfn>, the UA must
+ create a node implementing the interface appropriate for the element type
+ corresponding to the tag name of the token (as given in the section of
+ this specification that defines that element, e.g. for an <code><a
+ href="#a">a</a></code> element it would be the <code><a
+ href="#htmlanchorelement">HTMLAnchorElement</a></code> interface), with
+ the tag name being the name of that element, with the node being in the <a
+ href="#html-namespace0">HTML namespace</a>, and with the attributes on the
+ node being those given in the given token.
+
+ <p>When the steps below require the UA to <dfn id=insert>insert an HTML
+ element</dfn> for a token, the UA must first <a href="#create">create an
+ element for the token</a>, and then append this node to the <a
+ href="#current4">current node</a>, and push it onto the <a
+ href="#stack">stack of open elements</a> so that it is the new <a
+ href="#current4">current node</a>.
+
+ <p>The steps below may also require that the UA insert an HTML element in a
+ particular place, in which case the UA must <a href="#create">create an
+ element for the token</a> and then insert or append the new node in the
+ location specified. (This happens in particular during the parsing of
+ tables with invalid content.)
+
+ <p>The interface appropriate for an element that is not defined in this
+ specification is <code><a href="#htmlelement">HTMLElement</a></code>.
+
+ <h6 id=closing><span class=secno>8.2.4.3.4. </span>Closing elements that
+ have implied end tags</h6>
+
+ <p>When the steps below require the UA to <dfn id=generate>generate implied
+ end tags</dfn>, then, if the <a href="#current4">current node</a> is a
+ <code><a href="#dd">dd</a></code> element, a <code><a
+ href="#dt">dt</a></code> element, an <code><a href="#li">li</a></code>
+ element, a <code><a href="#p">p</a></code> element, a <code><a
+ href="#td">td</a></code> element, a <code><a href="#th">th</a></code>
+ element, or a <code><a href="#tr">tr</a></code> element, the UA must act
+ as if an end tag with the respective tag name had been seen and then <a
+ href="#generate">generate implied end tags</a> again.
+
+ <p>The step that requires the UA to generate implied end tags but lists an
+ element to exclude from the process, then the UA must perform the above
+ steps as if that element was not in the above list.
+
+ <h6 id=the-element><span class=secno>8.2.4.3.5. </span>The element pointers</h6>
+
+ <p>Initially the <dfn id=head-element><code title="">head</code> element
+ pointer</dfn> and the <dfn id=form-element><code title="">form</code>
+ element pointer</dfn> are both null.
+
+ <p>Once a <code><a href="#head">head</a></code> element has been parsed
+ (whether implicitly or explicitly) the <a href="#head-element"><code
+ title="">head</code> element pointer</a> gets set to point to this node.
+
+ <p>The <a href="#form-element"><code title="">form</code> element
+ pointer</a> points to the last <code>form</code> element that was opened
+ and whose end tag has not yet been seen. It is used to make form controls
+ associate with forms in the face of dramatically bad markup, for
+ historical reasons.
+
+ <h6 id=the-insertion><span class=secno>8.2.4.3.6. </span>The insertion mode</h6>
+
+ <p>Initially the <dfn id=insertion0>insertion mode</dfn> is "<a
+ href="#before2" title="insertion mode: before head">before head</a>". It
+ can change to "<a href="#in-head" title="insertion mode: in head">in
+ head</a>", "<a href="#after1" title="insertion mode: after head">after
+ head</a>", "<a href="#in-body" title="insertion mode: in body">in
+ body</a>", "<a href="#in-table" title="insertion mode: in table">in
+ table</a>", "<a href="#in-caption" title="insertion mode: in caption">in
+ caption</a>", "<a href="#in-column" title="insertion mode: in column
+ group">in column group</a>", "<a href="#in-table0" title="insertion mode:
+ in table body">in table body</a>", "<a href="#in-row" title="insertion
+ mode: in row">in row</a>", "<a href="#in-cell" title="insertion mode: in
+ cell">in cell</a>", "<a href="#in-select" title="insertion mode: in
+ select">in select</a>", "<a href="#after2" title="insertion mode: after
+ body">after body</a>", "<a href="#in-frameset" title="insertion mode: in
+ frameset">in frameset</a>", and "<a href="#after3" title="insertion mode:
+ after frameset">after frameset</a>" during the course of the parsing, as
+ described below. It affects how certain tokens are processed.
+
+ <p>If the tree construction stage is switched from <a href="#the-main0">the
+ main phase</a> to <a href="#the-trailing0">the trailing end phase</a> and
+ back again, the various pieces of state are not reset; the UA must act as
+ if the state was maintained.
+
+ <p>When the steps below require the UA to <dfn id=reset>reset the insertion
+ mode appropriately</dfn>, it means the UA must follow these steps:
+
+ <ol>
+ <li>Let <var title="">last</var> be false.
+
+ <li>Let <var title="">node</var> be the last node in the <a
+ href="#stack">stack of open elements</a>.
+
+ <li>If <var title="">node</var> is the first node in the stack of open
+ elements, then set <var title="">last</var> to true. If the element whose
+ <code title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code>
+ attribute is being set is neither a <code><a href="#td">td</a></code>
+ element nor a <code><a href="#th">th</a></code> element, then set <var
+ title="">node</var> to the element whose <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code>
+ attribute is being set. (<a href="#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ <li>If <var title="">node</var> is a <code>select</code> element, then
+ switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-select" title="insertion mode: in select">in select</a>" and
+ abort these steps. (<a href="#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ <li>If <var title="">node</var> is a <code><a href="#td">td</a></code> or
+ <code><a href="#th">th</a></code> element, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-cell"
+ title="insertion mode: in cell">in cell</a>" and abort these steps.
+
+ <li>If <var title="">node</var> is a <code><a href="#tr">tr</a></code>
+ element, then switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-row" title="insertion mode: in row">in row</a>" and abort these
+ steps.
+
+ <li>If <var title="">node</var> is a <code><a
+ href="#tbody">tbody</a></code>, <code><a href="#thead0">thead</a></code>,
+ or <code><a href="#tfoot0">tfoot</a></code> element, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-table0"
+ title="insertion mode: in table body">in table body</a>" and abort these
+ steps.
+
+ <li>If <var title="">node</var> is a <code><a
+ href="#caption0">caption</a></code> element, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-caption"
+ title="insertion mode: in caption">in caption</a>" and abort these steps.
+
+ <li>If <var title="">node</var> is a <code><a
+ href="#colgroup">colgroup</a></code> element, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-column"
+ title="insertion mode: in column group">in column group</a>" and abort
+ these steps. (<a href="#innerhtml1"><code>innerHTML</code> case</a>)
+
+ <li>If <var title="">node</var> is a <code><a
+ href="#table">table</a></code> element, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-table"
+ title="insertion mode: in table">in table</a>" and abort these steps.
+
+ <li>If <var title="">node</var> is a <code><a href="#head">head</a></code>
+ element, then switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-body" title="insertion mode: in body">in body</a>" ("<a
+ href="#in-body" title="insertion mode: in body">in body</a>"! <em> not
+ "<a href="#in-head" title="insertion mode: in head">in head</a>"</em>!)
+ and abort these steps. (<a href="#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ <li>If <var title="">node</var> is a <code><a
+ href="#body0">body</a></code> element, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-body"
+ title="insertion mode: in body">in body</a>" and abort these steps.
+
+ <li>If <var title="">node</var> is a <code>frameset</code> element, then
+ switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-frameset" title="insertion mode: in frameset">in frameset</a>"
+ and abort these steps. (<a href="#innerhtml1"><code>innerHTML</code>
+ case</a>)
+
+ <li>If <var title="">node</var> is an <code><a
+ href="#html">html</a></code> element, then: if the <a
+ href="#head-element"><code title="">head</code> element pointer</a> is
+ null, switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#before2" title="insertion mode: before head">before head</a>",
+ otherwise, switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#after1" title="insertion mode: after head">after head</a>". In
+ either case, abort these steps. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</li>
+ <!-- XXX can the head element pointer ever be
+ non-null when we're going through these steps? -->
+
+ <li>If <var title="">last</var> is true, then set the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-body"
+ title="insertion mode: in body">in body</a>" and abort these steps. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)
+
+ <li>Let <var title="">node</var> now be the node before <var
+ title="">node</var> in the <a href="#stack">stack of open elements</a>.
+
+ <li>Return to step 3.
+ </ol>
+ <!--When you don't have to handle innerHTML, you can use this
+simplified explanation instead:
+
+ <ol>
+
+ <li><p>If the <span>stack of open elements</span> <span title="has
+ an element in table scope">has a <code>td</code> or <code>th</code>
+ element in table scope</span>, then switch the <span>insertion
+ mode</span> to "<span title="insertion mode: in cell">in
+ cell</span>".</p></li>
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>tr</code> element
+ in table scope</span>, then switch the <span>insertion mode</span>
+ to "<span title="insertion mode: in row">in row</span>".</p></li>
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>tbody</code>,
+ <code>tfoot</code>, or <code>thead</code> element in table
+ scope</span>, then switch the <span>insertion mode</span> to "<span
+ title="insertion mode: in table body">in table
+ body</span>".</p></li>
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>caption</code>
+ element in table scope</span>, then switch the <span>insertion
+ mode</span> to "<span title="insertion mode: in caption">in
+ caption</span>".</p></li>
+
+ ( you can't reach this point with a colgroup element on the
+ stack )
+
+ <li><p>Otherwise, if the <span>stack of open elements</span> <span
+ title="has an element in table scope">has a <code>table</code>
+ element in table scope</span>, then switch the <span>insertion
+ mode</span> to "<span title="insertion mode: in table">in
+ table</span>".</p></li>
+
+ <li><p>Otherwise, switch the <span>insertion mode</span> to "<span
+ title="insertion mode: in body">in body</span>".</p></li>
+
+ </ol>
+-->
+
+ <h6 id=how-to0><span class=secno>8.2.4.3.7. </span>How to handle tokens in
+ the main phase</h6>
+
+ <p>Tokens in the main phase must be handled as follows:
+
+ <dl class=switch>
+ <dt>A DOCTYPE token
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>A start tag token with the tag name "html"
+
+ <dd>
+ <p>If this start tag token was not the first start tag token, then it is
+ a <a href="#parse">parse error</a>.</p>
+
+ <p>For each attribute on the token, check to see if the attribute is
+ already present on the top element of the <a href="#stack">stack of open
+ elements</a>. If it is not, add the attribute and its corresponding
+ value to that element.</p>
+
+ <dt>An end-of-file token
+
+ <dd>
+ <p><a href="#generate">Generate implied end tags.</a></p>
+
+ <p>If there are more than two nodes on the <a href="#stack">stack of open
+ elements</a>, or if there are two nodes but the second node is not a
+ <code><a href="#body0">body</a></code> node, this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Otherwise, if the parser was originally created in order to handle the
+ setting of an element's <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute, and there's more than
+ one element in the <a href="#stack">stack of open elements</a>, and the
+ second node on the <a href="#stack">stack of open elements</a> is not a
+ <code><a href="#body0">body</a></code> node, then this is a <a
+ href="#parse">parse error</a>. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p><a href="#stops">Stop parsing.</a></p>
+
+ <p class=big-issue>This fails because it doesn't imply HEAD and BODY
+ tags. We should probably expand out the insertion modes and merge them
+ with phases and then put the three things here into each insertion mode
+ instead of trying to factor them out so carefully.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Depends on the <a href="#insertion0">insertion mode</a>:</p>
+
+ <dl class=switch>
+ <dt>If the <a href="#insertion0">insertion mode</a> is "<dfn id=before2
+ title="insertion mode: before head">before head</dfn>"
+
+ <dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag token with the tag name "head"
+
+ <dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Set the <a href="#head-element"><code title="">head</code> element
+ pointer</a> to this new element node.</p>
+
+ <p>Append the new element to the <a href="#current4">current node</a>
+ and push it onto the <a href="#stack">stack of open elements</a>.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-head" title="insertion mode: in head">in head</a>".</p>
+
+ <dt>A start tag token whose tag name is one of: "base", "link",
+ "meta", "script", "style", "title"
+
+ <dd>
+ <p>Act as if a start tag token with the tag name "head" and no
+ attributes had been seen, then reprocess the current token.</p>
+
+ <p class=note>This will result in a <code><a
+ href="#head">head</a></code> element being generated, and with the
+ current token being reprocessed in the "<a href="#in-head"
+ title="insertion mode: in head">in head</a>" <a
+ href="#insertion0">insertion mode</a>.</p>
+
+ <dt>An end tag with the tag name "html"
+
+ <dd>
+ <p>Act as if a start tag token with the tag name "head" and no
+ attributes had been seen, then reprocess the current token.</p>
+
+ <dt>Any other end tag
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>A character token that is <em>not</em> one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dt>Any other start tag token
+
+ <dd>
+ <p>Act as if a start tag token with the tag name "head" and no
+ attributes had been seen, then reprocess the current token.</p>
+
+ <p class=note>This will result in an empty <code><a
+ href="#head">head</a></code> element being generated, with the
+ current token being reprocessed in the "<a href="#after1"
+ title="insertion mode: after head">after head</a>" <a
+ href="#insertion0">insertion mode</a>.</p>
+ </dl>
+
+ <dt id=parsing-main-inhead>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-head title="insertion mode: in head">in
+ head</dfn>"
+
+ <dd>
+ <p>Handle the token as follows.</p>
+
+ <p class=note>The rules for handling "title", "style", and "script"
+ start tags are similar, but not identical.</p>
+
+ <p class=note>It is possible for the <a href="#tree-construction0">tree
+ construction</a> stage's <a href="#the-main0" title="the main
+ phase">main phase</a> to be in the "<a href="#in-head"
+ title="insertion mode: in head">in head</a>" <a
+ href="#insertion0">insertion mode</a> without the <a
+ href="#current4">current node</a> being a <code><a
+ href="#head">head</a></code> element, e.g. if a <code><a
+ href="#head">head</a></code> end tag is immediately followed by a
+ <code><a href="#meta0">meta</a></code> start tag.</p>
+
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag with the tag name "title"
+
+ <dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Append the new element to the node pointed to by the <a
+ href="#head-element"><code title="">head</code> element pointer</a>,
+ or, if that is null (<a href="#innerhtml1"><code>innerHTML</code>
+ case</a>), to the <a href="#current4">current node</a>.</p>
+
+ <p>Switch the tokeniser's <a href="#content2">content model flag</a>
+ to the RCDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node to the <code><a
+ href="#title1">title</a></code> element node whose contents is the
+ concatenation of all those tokens' characters.</p>
+
+ <p>The tokeniser's <a href="#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the tag name "title",
+ ignore it. Otherwise, this is a <a href="#parse">parse error</a>.</p>
+
+ <dt>A start tag with the tag name "style"
+
+ <dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>, unless the <a href="#insertion0">insertion mode</a> is "<a
+ href="#in-head" title="insertion mode: in head">in head</a>" and the
+ <a href="#head-element"><code title="">head</code> element
+ pointer</a> is not null, in which case append it to the node pointed
+ to by the <a href="#head-element"><code title="">head</code> element
+ pointer</a>. <!--
+ <head></head><style><body> should put the style block in the
+ head, and does so by switching back to in head, but the head
+ isn't the current node at that point (comments should go
+ between the head and the body) -->.</p>
+
+ <p>Switch the tokeniser's <a href="#content2">content model flag</a>
+ to the CDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node to the <code><a
+ href="#style">style</a></code> element node whose contents is the
+ concatenation of all those tokens' characters.</p>
+
+ <p>The tokeniser's <a href="#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the tag name "style",
+ ignore it. Otherwise, this is a <a href="#parse">parse error</a>.</p>
+
+ <dt id=scriptTag>A start tag with the tag name "script"
+
+ <dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Mark the element as being <a
+ href="#parser-inserted">"parser-inserted"</a>. This ensures that, if
+ the script is external, any <code title=dom-document-write-HTML><a
+ href="#document.write0">document.write()</a></code> calls in the
+ script will execute in-line, instead of blowing the document away,
+ as would happen in most other cases.</p>
+
+ <p>Switch the tokeniser's <a href="#content2">content model flag</a>
+ to the CDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node to the <code><a
+ href="#script0">script</a></code> element node whose contents is the
+ concatenation of all those tokens' characters.</p>
+
+ <p>The tokeniser's <a href="#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is not an end tag token with the tag name
+ "script", then this is a <a href="#parse">parse error</a>; mark the
+ <code><a href="#script0">script</a></code> element as <a
+ href="#already">"already executed"</a>. Otherwise, the token is the
+ <code><a href="#script0">script</a></code> element's end tag, so
+ ignore it.</p>
+
+ <p>If the parser was originally created in order to handle the
+ setting of a node's <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute, then mark the
+ <code><a href="#script0">script</a></code> element as <a
+ href="#already">"already executed"</a>, and skip the rest of the
+ processing described for this token (including the part below where
+ "<a href="#the-script" title="the script that will execute as soon
+ as the parser resumes">scripts that will execute as soon as the
+ parser resumes</a>" are executed). (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p class=note>Marking the <code><a href="#script0">script</a></code>
+ element as "already executed" prevents it from executing when it is
+ inserted into the document a few paragraphs below. Scripts missing
+ their end tags and scripts that were inserted using <code
+ title=dom-innerHTML-HTML><a href="#innerhtml0">innerHTML</a></code>
+ aren't executed.</p>
+
+ <p>Let the <var title="">old insertion point</var> have the same
+ value as the current <a href="#insertion">insertion point</a>. Let
+ the <a href="#insertion">insertion point</a> be just before the <a
+ href="#next-input">next input character</a>.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>, unless the <a href="#insertion0">insertion mode</a> is "<a
+ href="#in-head" title="insertion mode: in head">in head</a>" and the
+ <a href="#head-element"><code title="">head</code> element
+ pointer</a> is not null, in which case append it to the node pointed
+ to by the <a href="#head-element"><code title="">head</code> element
+ pointer</a>. <!--
+ <head></head><script><body> should put the script in the head,
+ and does so by switching back to in head, but the head isn't
+ the current node at that point (comments should go between the
+ head and the body) -->
+ <a href="#running0" title="running a script">Special processing
+ occurs when a <code>script</code> element is inserted into a
+ document</a> that might cause some script to execute, which might
+ cause <a href="#document.write0" title=dom-document-write-HTML>new
+ characters to be inserted into the tokeniser</a>.</p>
+
+ <p>Let the <a href="#insertion">insertion point</a> have the value of
+ the <var title="">old insertion point</var>. (In other words,
+ restore the <a href="#insertion">insertion point</a> to the value it
+ had before the previous paragraph. This value might be the
+ "undefined" value.)</p>
+
+ <p id=scriptTagParserResumes>At this stage, if there is <a
+ href="#the-script" title="the script that will execute as soon as
+ the parser resumes">a script that will execute as soon as the parser
+ resumes</a>, then:</p>
+
+ <dl class=switch>
+ <dt>If the tree construction stage is <a href="#nestedParsing">being
+ called reentrantly</a>, say from a call to <code
+ title=dom-document-write-HTML><a
+ href="#document.write0">document.write()</a></code>:
+
+ <dd>
+ <p>Abort the processing of any nested invokations of the tokeniser,
+ yielding control back to the caller. (Tokenisation will resume
+ when the caller returns to the "outer" tree construction stage.)
+
+ <dt>Otherwise:
+
+ <dd>
+ <p>Follow these steps:</p>
+
+ <ol>
+ <li>
+ <p>Let <var title="">the script</var> be <a
+ href="#the-script">the script that will execute as soon as the
+ parser resumes</a>. There is no longer <a href="#the-script"
+ title="the script that will execute as soon as the parser
+ resumes">a script that will execute as soon as the parser
+ resumes</a>.
+
+ <li>
+ <p><a href="#pause">Pause</a> until the script has
+ <span>completed loading</span><!-- XXX xref -->.
+
+ <li>
+ <p>Let the <a href="#insertion">insertion point</a> be just
+ before the <a href="#next-input">next input character</a>.
+
+ <li>
+ <p><a href="#executing0" title="executing a script block">Execute
+ the script</a>.
+
+ <li>
+ <p>Let the <a href="#insertion">insertion point</a> be undefined
+ again.
+
+ <li>
+ <p>If there is once again <a href="#the-script" title="the script
+ that will execute as soon as the parser resumes">a script that
+ will execute as soon as the parser resumes</a>, then repeat
+ these steps from step 1.
+ </ol>
+ </dl>
+
+ <dt>A start tag with the tag name "base", "link", or "meta"
+
+ <dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>Append the new element to the node pointed to by the <a
+ href="#head-element"><code title="">head</code> element pointer</a>,
+ or, if that is null (<a href="#innerhtml1"><code>innerHTML</code>
+ case</a>), to the <a href="#current4">current node</a>.</p>
+
+ <dt>An end tag with the tag name "head"
+
+ <dd>
+ <p>If the <a href="#current4">current node</a> is a <code><a
+ href="#head">head</a></code> element, pop the <a
+ href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>. Otherwise, this is a <a href="#parse">parse
+ error</a>.</p>
+ <!-- might happen if you see two </head>s
+ and something in between the two sends you from "after head"
+ back to "in head" -->
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to "<a
+ href="#after1" title="insertion mode: after head">after head</a>".</p>
+
+ <dt>An end tag with the tag name "html"
+
+ <dd>
+ <p>Act as described in the "anything else" entry below.</p>
+
+ <dt>A start tag with the tag name "head"
+
+ <dt>Any other end tag
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>If the <a href="#current4">current node</a> is a <code><a
+ href="#head">head</a></code> element, act as if an end tag token
+ with the tag name "head" had been seen.</p>
+
+ <p>Otherwise, change the <a href="#insertion0">insertion mode</a> to
+ "<a href="#after1" title="insertion mode: after head">after
+ head</a>".</p>
+
+ <p>Then, reprocess the current token.</p>
+
+ <p class=big-issue>In certain UAs, <a
+ href="https://bugzilla.mozilla.org/attachment.cgi?id=180157&amp;action=view">some
+ elements</a> don't trigger the "in body" mode straight away, but
+ instead get put into the head. Do we want to copy that?</p>
+ </dl>
+
+ <dt>If the <a href="#insertion0">insertion mode</a> is "<dfn id=after1
+ title="insertion mode: after head">after head</dfn>"
+
+ <dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag token with the tag name "body"
+
+ <dd>
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>body</code> element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-body" title="insertion mode: in body">in body</a>".</p>
+
+ <dt>A start tag token with the tag name "frameset"
+
+ <dd>
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>frameset</code> element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-frameset" title="insertion mode: in frameset">in
+ frameset</a>".</p>
+
+ <dt>A start tag token whose tag name is one of: "base", "link",
+ "meta", "script", "style", "title"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Switch the <a
+ href="#insertion0">insertion mode</a> back to "<a href="#in-head"
+ title="insertion mode: in head">in head</a>" and reprocess the
+ token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Act as if a start tag token with the tag name "body" and no
+ attributes had been seen, and then reprocess the current token.</p>
+ </dl>
+
+ <dt id=parsing-main-inbody>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-body title="insertion mode: in body">in
+ body</dfn>"
+
+ <dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class=switch>
+ <dt>A character token
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#append" title="append a character">Append the token's
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag token whose tag name is one of: "script", "style"
+
+ <dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> had been "<a href="#in-head" title="insertion mode: in
+ head">in head</a>".</p>
+
+ <dt>A start tag token whose tag name is one of: "base", "link",
+ "meta", "title"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Process the token as if the <a
+ href="#insertion0">insertion mode</a> had been "<a href="#in-head"
+ title="insertion mode: in head">in head</a>".</p>
+
+ <dt>A start tag token with the tag name "body"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>.</p>
+
+ <p>If the second element on the <a href="#stack">stack of open
+ elements</a> is not a <code><a href="#body0">body</a></code>
+ element, or, if the <a href="#stack">stack of open elements</a> has
+ only one node on it, then ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, for each attribute on the token, check to see if the
+ attribute is already present on the <code><a
+ href="#body0">body</a></code> element (the second element) on the <a
+ href="#stack">stack of open elements</a>. If it is not, add the
+ attribute and its corresponding value to that element.</p>
+
+ <dt>An end tag with the tag name "body"
+
+ <dd>
+ <p>If the second element in the <a href="#stack">stack of open
+ elements</a> is not a <code><a href="#body0">body</a></code>
+ element, this is a <a href="#parse">parse error</a>. Ignore the
+ token. (<a href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p class=big-issue>this needs to handle closing of implied elements,
+ but without closing them</p>
+
+ <p>If the <a href="#current4">current node</a> is not the <code><a
+ href="#body0">body</a></code> element, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to "<a
+ href="#after2" title="insertion mode: after body">after body</a>".</p>
+
+ <dt>An end tag with the tag name "html"
+
+ <dd>
+ <p>Act as if an end tag with tag name "body" had been seen, then, if
+ that token wasn't ignored, reprocess the current token.</p>
+
+ <p class=note>The fake end tag token here can only be ignored in the
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ <dt>A start tag whose tag name is one of: "address", "blockquote",
+ "center", "dir", "div", "dl", "fieldset", "listing", "menu", "ol",
+ "p", "ul"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ <dt>A start tag whose tag name is "pre"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ <p>If the next token is a U+000A LINE FEED (LF) character token, then
+ ignore that token and move on to the next one. (Newlines at the
+ start of <code><a href="#pre">pre</a></code> blocks are ignored as
+ an authoring convenience.)</p>
+
+ <dt>A start tag whose tag name is "form"
+
+ <dd>
+ <p>If the <a href="#form-element"><code title=form>form</code>
+ element pointer</a> is not null, ignore the token with a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Otherwise:</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html Element">Insert an HTML
+ element</a> for the token, and set the <code title=form>form</code>
+ element pointer to point to the element created.</p>
+
+ <dt>A start tag whose tag name is "li"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+
+ <p>Run the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a
+ href="#current4">current node</a> (the bottommost node of the
+ stack).
+
+ <li>
+ <p>If <var title="">node</var> is an <code><a
+ href="#li">li</a></code> element, then pop all the nodes from the
+ <a href="#current4">current node</a> up to <var
+ title="">node</var>, including <var title="">node</var>, then stop
+ this algorithm. If more than one node is popped, then this is a <a
+ href="#parse">parse error</a>.
+
+ <li>
+ <p>If <var title="">node</var> is not in the <a
+ href="#formatting">formatting</a> category, and is not in the <a
+ href="#phrasing">phrasing</a> category, and is not an <code><a
+ href="#address">address</a></code> or <code><a
+ href="#div">div</a></code> element, then stop this algorithm.
+ </li>
+ <!-- an element <foo> is in this
+ list if the following markup:
+
+ <!DOCTYPE html><body><ol><li><foo><li>
+
+ ...results in the second <li> not being (in any way) a
+ descendant of the first <li>, or if <foo> is a formatting
+ element that gets reopened later. -->
+
+ <li>
+ <p>Otherwise, set <var title="">node</var> to the previous entry in
+ the <a href="#stack">stack of open elements</a> and return to step
+ 2.
+ </ol>
+
+ <p>Finally, <a href="#insert" title="insert an html element">insert
+ an <code>li</code> element</a>.</p>
+
+ <dt>A start tag whose tag name is "dd" or "dt"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+
+ <p>Run the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a
+ href="#current4">current node</a> (the bottommost node of the
+ stack).
+
+ <li>
+ <p>If <var title="">node</var> is a <code><a
+ href="#dd">dd</a></code> or <code><a href="#dt">dt</a></code>
+ element, then pop all the nodes from the <a
+ href="#current4">current node</a> up to <var title="">node</var>,
+ including <var title="">node</var>, then stop this algorithm. If
+ more than one node is popped, then this is a <a
+ href="#parse">parse error</a>.
+
+ <li>
+ <p>If <var title="">node</var> is not in the <a
+ href="#formatting">formatting</a> category, and is not in the <a
+ href="#phrasing">phrasing</a> category, and is not an <code><a
+ href="#address">address</a></code> or <code><a
+ href="#div">div</a></code> element, then stop this algorithm.
+ </li>
+ <!-- an element <foo> is in this
+ list if the following markup:
+
+ <!DOCTYPE html><body><ol><dt><foo><dt>
+
+ ...results in the second <li> not being (in any way) a
+ descendant of the first <li>, or if <foo> is a formatting
+ element that gets reopened later. -->
+
+ <li>
+ <p>Otherwise, set <var title="">node</var> to the previous entry in
+ the <a href="#stack">stack of open elements</a> and return to step
+ 2.
+ </ol>
+
+ <p>Finally, <a href="#insert" title="insert an html element">insert
+ an HTML element</a> with the same tag name as the token's.</p>
+
+ <dt>A start tag token whose tag name is "plaintext"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ <p>Switch the <a href="#content2">content model flag</a> to the
+ PLAINTEXT state.</p>
+
+ <p class=note>Once a start tag with the tag name "plaintext" has been
+ seen, that will be the last token ever seen other than character
+ tokens (and the end-of-file token), because there is no way to
+ switch the <a href="#content2">content model flag</a> out of the
+ PLAINTEXT state.</p>
+
+ <dt>An end tag whose tag name is one of: "address", "blockquote",
+ "center", "dir", "div", "dl", "fieldset", "listing", "menu", "ol",
+ "pre", "ul"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an">has an element in scope</a> with the same tag name
+ as that of the token, then <a href="#generate">generate implied end
+ tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as that of the token, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an">has an element in scope</a> with the same tag name
+ as that of the token, then pop elements from this stack until an
+ element with that tag name has been popped from the stack.</p>
+ <!-- XXX quirk (except for in certain cases?):
+ <p>Otherwise, act as if a start tag with the tag name given in
+ the token had been seen, then reprocess the current token.</p>
+ -->
+
+
+ <dt>An end tag whose tag name is "form"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an">has an element in scope</a> with the same tag name
+ as that of the token, then <a href="#generate">generate implied end
+ tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as that of the token, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Otherwise, if the <a href="#current4">current node</a> is an
+ element with the same tag name as that of the token pop that element
+ from the stack.</p>
+
+ <p>In any case, set the <a href="#form-element"><code
+ title="">form</code> element pointer</a> to null.</p>
+
+ <dt>An end tag whose tag name is "p"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then <a href="#generate">generate implied end
+ tags</a>, except for <code><a href="#p">p</a></code> elements.</p>
+
+ <p>If the <a href="#current4">current node</a> is not a <code><a
+ href="#p">p</a></code> element, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then pop elements from this stack until the
+ stack no longer <a href="#have-an" title="has an element in
+ scope">has a <code>p</code> element in scope</a>.</p>
+ <!-- XXX quirk:
+ <p>Otherwise, act as if a start tag with the tag name
+ <code>p</code> had been seen, then reprocess the current
+ token.</p>
+ -->
+
+
+ <dt>An end tag whose tag name is "dd", "dt", or "li"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an">has an element in scope</a> whose tag name matches
+ the tag name of the token, then <a href="#generate">generate implied
+ end tags</a>, except for elements with the same tag name as the
+ token.</p>
+
+ <p>If the <a href="#current4">current node</a> is not an element with
+ the same tag name as the token, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an">has an element in scope</a> whose tag name matches
+ the tag name of the token, then pop elements from this stack until
+ an element with that tag name has been popped from the stack.</p>
+
+ <dt>A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
+ "h5", "h6"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+ "h6", then this is a <a href="#parse">parse error</a>; pop elements
+ from the stack until an element with one of those tag names has been
+ popped from the stack.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token.</p>
+
+ <dt>An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5",
+ "h6"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+ "h6", then <a href="#generate">generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as that of the token, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+ "h6", then pop elements from the stack until an element with one of
+ those tag names has been popped from the stack.</p>
+ <!-- XXX quirk:
+ <p>Otherwise, act as if a start tag with the tag name given in
+ the token had been seen, then reprocess the current token.</p>
+ -->
+ </dd>
+ <!-- ADOPTION AGENCY ELEMENTS
+ Mozilla-only: bdo blink del ins sub sup q
+ Safari-only: code dfn kbd nobr samp var wbr
+ Both: a b big em font i s small strike strong tt u -->
+
+ <dt>A start tag whose tag name is "a"
+
+ <dd>
+ <p>If the <a href="#list-of4">list of active formatting elements</a>
+ contains an element whose tag name is "a" between the end of the
+ list and the last marker on the list (or the start of the list if
+ there is no marker on the list), then this is a <a
+ href="#parse">parse error</a>; act as if an end tag with the tag
+ name "a" had been seen, then remove that element from the <a
+ href="#list-of4">list of active formatting elements</a> and the <a
+ href="#stack">stack of open elements</a> if the end tag didn't
+ already remove it (it might not have if the element is not <a
+ href="#have-an0" title="has an element in table scope">in table
+ scope</a>).</p>
+
+ <p class=example>In the non-conforming stream
+ <code>&lt;a&nbsp;href="a">a&lt;table>&lt;a&nbsp;href="b">b&lt;/table>x</code>,
+ the first <code><a href="#a">a</a></code> element would be closed
+ upon seeing the second one, and the "x" character would be inside a
+ link to "b", not to "a". This is despite the fact that the outer
+ <code><a href="#a">a</a></code> element is not in table scope
+ (meaning that a regular <code>&lt;/a></code> end tag at the start of
+ the table wouldn't close the outer <code><a href="#a">a</a></code>
+ element).</p>
+
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Add that element to the <a
+ href="#list-of4">list of active formatting elements</a>.</p>
+
+ <dt>A start tag whose tag name is one of: "b", "big", "em", "font",
+ "i", "nobr", "s", "small", "strike", "strong", "tt", "u"
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Add that element to the <a
+ href="#list-of4">list of active formatting elements</a>.</p>
+
+ <dt id=adoptionAgency>An end tag whose tag name is one of: "a", "b",
+ "big", "em", "font", "i", "nobr", "s", "small", "strike", "strong",
+ "tt", "u"
+
+ <dd>
+ <p>Follow these steps:</p>
+
+ <ol>
+ <li>
+ <p>Let the <var title="">formatting element</var> be the last
+ element in the <a href="#list-of4">list of active formatting
+ elements</a> that:</p>
+
+ <ul>
+ <li>is between the end of the list and the last scope marker in
+ the list, if any, or the start of the list otherwise, and
+
+ <li>has the same tag name as the token.
+ </ul>
+
+ <p>If there is no such node, or, if that node is also in the <a
+ href="#stack">stack of open elements</a> but the element is not <a
+ href="#have-an" title="has an element in scope">in scope</a>, then
+ this is a <a href="#parse">parse error</a>. Abort these steps. The
+ token is ignored.</p>
+
+ <p>Otherwise, if there is such a node, but that node is not in the
+ <a href="#stack">stack of open elements</a>, then this is a <a
+ href="#parse">parse error</a>; remove the element from the list,
+ and abort these steps.</p>
+
+ <p>Otherwise, there is a <var title="">formatting element</var> and
+ that element is in <a href="#stack" title="stack of open
+ elements">the stack</a> and is <a href="#have-an" title="has an
+ element in scope">in scope</a>. If the element is not the <a
+ href="#current4">current node</a>, this is a <a
+ href="#parse">parse error</a>. In any case, proceed with the
+ algorithm as written in the following steps.</p>
+
+ <li>
+ <p>Let the <var title="">furthest block</var> be the topmost node
+ in the <a href="#stack">stack of open elements</a> that is lower
+ in the stack than the <var title="">formatting element</var>, and
+ is not an element in the <a href="#phrasing">phrasing</a> or <a
+ href="#formatting">formatting</a> categories. There might not be
+ one.
+
+ <li>
+ <p>If there is no <var title="">furthest block</var>, then the UA
+ must skip the subsequent steps and instead just pop all the nodes
+ from the bottom of the <a href="#stack">stack of open
+ elements</a>, from the <a href="#current4">current node</a> up to
+ the <var title="">formatting element</var>, and remove the <var
+ title="">formatting element</var> from the <a
+ href="#list-of4">list of active formatting elements</a>.
+
+ <li>
+ <p>Let the <var title="">common ancestor</var> be the element
+ immediately above the <var title="">formatting element</var> in
+ the <a href="#stack">stack of open elements</a>.
+
+ <li>
+ <p>If the <var title="">furthest block</var> has a parent node,
+ then remove the <var title="">furthest block</var> from its parent
+ node.
+
+ <li>
+ <p>Let a bookmark note the position of the <var title="">formatting
+ element</var> in the <a href="#list-of4">list of active formatting
+ elements</a> relative to the elements on either side of it in the
+ list.
+
+ <li>
+ <p>Let <var title="">node</var> and <var title="">last node</var>
+ be the <var title="">furthest block</var>. Follow these steps:</p>
+
+ <ol>
+ <li>Let <var title="">node</var> be the element immediately prior
+ to <var title="">node</var> in the <a href="#stack">stack of open
+ elements</a>.
+
+ <li>If <var title="">node</var> is not in the <a
+ href="#list-of4">list of active formatting elements</a>, then
+ remove <var title="">node</var> from the <a href="#stack">stack
+ of open elements</a> and then go back to step 1.
+
+ <li>Otherwise, if <var title="">node</var> is the <var
+ title="">formatting element</var>, then go to the next step in
+ the overall algorithm.
+
+ <li>Otherwise, if <var title="">last node</var> is the <var
+ title="">furthest block</var>, then move the aforementioned
+ bookmark to be immediately after the <var title="">node</var> in
+ the <a href="#list-of4">list of active formatting elements</a>.
+
+ <li>If <var title="">node</var> has any children, perform a
+ shallow clone of <var title="">node</var>, replace the entry for
+ <var title="">node</var> in the <a href="#list-of4">list of
+ active formatting elements</a> with an entry for the clone,
+ replace the entry for <var title="">node</var> in the <a
+ href="#stack">stack of open elements</a> with an entry for the
+ clone, and let <var title="">node</var> be the clone.
+
+ <li>Insert <var title="">last node</var> into <var
+ title="">node</var>, first removing it from its previous parent
+ node if any.
+
+ <li>Let <var title="">last node</var> be <var title="">node</var>.
+
+ <li>Return to step 1 of this inner set of steps.
+ </ol>
+
+ <li>
+ <p>Insert whatever <var title="">last node</var> ended up being in
+ the previous step into the <var title="">common ancestor</var>
+ node, first removing it from its previous parent node if any.
+
+ <li>
+ <p>Perform a shallow clone of the <var title="">formatting
+ element</var>.
+
+ <li>
+ <p>Take all of the child nodes of the <var title="">furthest
+ block</var> and append them to the clone created in the last step.
+
+ <li>
+ <p>Append that clone to the <var title="">furthest block</var>.
+
+ <li>
+ <p>Remove the <var title="">formatting element</var> from the <a
+ href="#list-of4">list of active formatting elements</a>, and
+ insert the clone into the <a href="#list-of4">list of active
+ formatting elements</a> at the position of the aforementioned
+ bookmark.
+
+ <li>
+ <p>Remove the <var title="">formatting element</var> from the <a
+ href="#stack">stack of open elements</a>, and insert the clone
+ into the <a href="#stack">stack of open elements</a> immediately
+ after (i.e. in a more deeply nested position than) the position of
+ the <var title="">furthest block</var> in that stack.
+
+ <li>
+ <p>Jump back to step 1 in this series of steps.
+ </ol>
+
+ <p class=note>The way these steps are defined, only elements in the
+ <a href="#formatting">formatting</a> category ever get cloned by
+ this algorithm.</p>
+ <!--XXX
+ <div class="example">
+ <p class="big-issue">Need an example.</p>
+ </div>
+-->
+
+ <p class=note>Because of the way this algorithm causes elements to
+ change parents, it has been dubbed the "adoption agency algorithm"
+ (in contrast with other possibly algorithms for dealing with
+ misnested content, which included the "incest algorithm", the
+ "secret affair algorithm", and the "Heisenberg algorithm").</p>
+
+ <dt>A start tag token whose tag name is "button"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a
+ <code>button</code> element in scope</a>, then this is a <a
+ href="#parse">parse error</a>; act as if an end tag with the tag
+ name "button" had been seen, then reprocess the token.</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ <dt>A start tag token whose tag name is one of: "marquee", "object"
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ <dt>An end tag token whose tag name is one of: "button", "marquee",
+ "object"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has in scope</a> an
+ element whose tag name is the same as the tag name of the token,
+ then <a href="#generate">generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as the token, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Now, if the <a href="#stack">stack of open elements</a> <a
+ href="#have-an">has an element in scope</a> whose tag name matches
+ the tag name of the token, then pop elements from the stack until
+ that element has been popped from the stack, and <a
+ href="#clear0">clear the list of active formatting elements up to
+ the last marker</a>.</p>
+
+ <dt>A start tag token whose tag name is "xmp"
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Switch the <a href="#content2">content model flag</a> to the CDATA
+ state.</p>
+
+ <dt>A start tag whose tag name is "table"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+ <!-- XXX quirks: don't do this -->
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-table" title="insertion mode: in table">in table</a>".</p>
+
+ <dt>A start tag whose tag name is one of: "area", "basefont",
+ "bgsound", "br", "embed", "img", "param", "spacer", "wbr"
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Immediately pop the <a
+ href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>.</p>
+
+ <dt>A start tag whose tag name is "hr"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an" title="has an element in scope">has a <code>p</code>
+ element in scope</a>, then act as if an end tag with the tag name
+ <code><a href="#p">p</a></code> had been seen.</p>
+ <!-- XXX quirks: don't do this -->
+ <p><a href="#insert" title="insert an html element">Insert an HTML
+ element</a> for the token. Immediately pop the <a
+ href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>.</p>
+
+ <dt>A start tag whose tag name is "image"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Change the token's tag name to
+ "img" and reprocess it. (Don't ask.)</p>
+ <!-- As of
+ 2005-12, studies showed that around 0.2% of pages used the
+ <image> element. -->
+
+
+ <dt>A start tag whose tag name is "input"
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert" title="insert an html element">Insert an
+ <code>input</code> element</a> for the token.</p>
+
+ <p>If the <a href="#form-element"><code title="">form</code> element
+ pointer</a> is not null, then <span>associate</span><!--XXX
+ xref! -->
+ the <code>input</code> element with the <code>form</code> element
+ pointed to by the <a href="#form-element"><code title="">form</code>
+ element pointer</a>.</p>
+
+ <p>Pop that <code>input</code> element off the <a href="#stack">stack
+ of open elements</a>.</p>
+
+ <dt id=isindex>A start tag whose tag name is "isindex"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>.</p>
+
+ <p>If the <a href="#form-element"><code title="">form</code> element
+ pointer</a> is not null, then ignore the token.</p>
+
+ <p>Otherwise:</p>
+
+ <p>Act as if a start tag token with the tag name "form" had been
+ seen.</p>
+
+ <p>Act as if a start tag token with the tag name "hr" had been seen.</p>
+
+ <p>Act as if a start tag token with the tag name "p" had been seen.</p>
+
+ <p>Act as if a start tag token with the tag name "label" had been
+ seen.</p>
+
+ <p>Act as if a stream of character tokens had been seen (see below
+ for what they should say).</p>
+
+ <p>Act as if a start tag token with the tag name "input" had been
+ seen, with all the attributes from the "isindex" token, except with
+ the "name" attribute set to the value "isindex" (ignoring any
+ explicit "name" attribute).</p>
+
+ <p>Act as if a stream of character tokens had been seen (see below
+ for what they should say).</p>
+
+ <p>Act as if an end tag token with the tag name "label" had been
+ seen.</p>
+
+ <p>Act as if an end tag token with the tag name "p" had been seen.</p>
+
+ <p>Act as if a start tag token with the tag name "hr" had been seen.</p>
+
+ <p>Act as if an end tag token with the tag name "form" had been seen.</p>
+
+ <p>The two streams of character tokens together should, together with
+ the <code>input</code> element, express the equivalent of "This is a
+ searchable index. Insert your search keywords here: (input field)"
+ in the user's preferred language.</p>
+
+ <p class=big-issue> Then need to specify that if the form submission
+ causes just a single form control, whose name is "isindex", to be
+ submitted, then we submit just the value part, not the "isindex="
+ part.</p>
+ </dd>
+ <!-- XXX keygen support; don't forget form element pointer!
+
+ <dt>A start tag whose tag name is "keygen"</dt>
+ <dd>
+ ...
+ </dd>
+-->
+
+ <dt>A start tag whose tag name is "textarea"
+
+ <dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>If the <a href="#form-element"><code title="">form</code> element
+ pointer</a> is not null, then <span>associate</span><!--XXX
+ xref! -->
+ the <code>textarea</code> element with the <code>form</code> element
+ pointed to by the <a href="#form-element"><code title="">form</code>
+ element pointer</a>.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>.</p>
+
+ <p>Switch the tokeniser's <a href="#content2">content model flag</a>
+ to the RCDATA state.</p>
+
+ <p>If the next token is a U+000A LINE FEED (LF) character token, then
+ ignore that token and move on to the next one. (Newlines at the
+ start of <code>textarea</code> elements are ignored as an authoring
+ convenience.)</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node, whose contents is the
+ concatenation of all those tokens' characters, to the new element
+ node.</p>
+
+ <p>The tokeniser's <a href="#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the tag name
+ "textarea", ignore it. Otherwise, this is a <a href="#parse">parse
+ error</a>.</p>
+
+ <dt>A start tag whose tag name is one of: "iframe", "noembed",
+ "noframes"
+
+ <dt>A start tag whose tag name is "noscript", if <a
+ href="#scripting2">scripting is enabled</a>:
+
+ <dd>
+ <p><a href="#create">Create an element for the token</a>.</p>
+
+ <p>For "iframe" tags, the node must be an <code><a
+ href="#htmliframeelement">HTMLIFrameElement</a></code> object, for
+ the other tags it must be an <code><a
+ href="#htmlelement">HTMLElement</a></code> object.</p>
+
+ <p>Append the new element to the <a href="#current4">current
+ node</a>.</p>
+
+ <p>Switch the tokeniser's <a href="#content2">content model flag</a>
+ to the CDATA state.</p>
+
+ <p>Then, collect all the character tokens that the tokeniser returns
+ until it returns a token that is not a character token, or until it
+ stops tokenising.</p>
+
+ <p>If this process resulted in a collection of character tokens,
+ append a single <code>Text</code> node, whose contents is the
+ concatenation of all those tokens' characters, to the new element
+ node.</p>
+
+ <p>The tokeniser's <a href="#content2">content model flag</a> will
+ have switched back to the PCDATA state.</p>
+
+ <p>If the next token is an end tag token with the same tag name as
+ the start tag token, ignore it. Otherwise, this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <dt>A start tag whose tag name is "select"
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p>Change the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-select" title="insertion mode: in select">in select</a>".</p>
+ </dd>
+ <!-- XXX quirks:
+ <dt>An end tag whose tag name is "br"</dt>
+ <dd>
+ <p>Act as if a start tag token with the tag name "br" had been
+ seen. Ignore the end tag token.</p>
+ </dd>
+-->
+
+ <dt>A start or end tag whose tag name is one of: "caption", "col",
+ "colgroup", "frame", "frameset", "head", "option", "optgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr"
+
+ <dt>An end tag whose tag name is one of: "area", "basefont",
+ "bgsound", <!--XXX quirks: remove br-->"br", "embed", "hr", "iframe",
+ "image", "img", "input", "isindex", "noembed", "noframes", "param",
+ "select", "spacer", "table", "textarea", "wbr"</dt>
+ <!-- add keygen if we add the start tag -->
+
+ <dt>An end tag whose tag name is "noscript", if <a
+ href="#scripting2">scripting is enabled</a>:
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>A start or end tag whose tag name is one of: "event-source",
+ "section", "nav", "article", "aside", "header", "footer", "datagrid",
+ "command"
+
+ <dd> <!-- XXXX -->
+ <p class=big-issue>Work in progress!</p>
+
+ <dt>A start tag token not covered by the previous entries
+
+ <dd>
+ <p><a href="#reconstruct">Reconstruct the active formatting
+ elements</a>, if any.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <p class=note>This element will be a <a href="#phrasing">phrasing</a>
+ element.</p>
+ <!--
+Put the following into the MathML namespace if parsed:
+ math, mrow, mfrac, msqrt, mroot, mstyle, merror, mpadded,
+ mphantom, mfenced, menclose, msub, msup, msubsup, munder,
+ mover, munderover, mmultiscripts, mtable, mlabeledtr, mtr,
+ mtd, maction
+-->
+
+
+ <dt>An end tag token not covered by the previous entries
+
+ <dd>
+ <p>Run the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>Initialise <var title="">node</var> to be the <a
+ href="#current4">current node</a> (the bottommost node of the
+ stack).
+
+ <li>
+ <p>If <var title="">node</var> has the same tag name as the end tag
+ token, then:</p>
+
+ <ol>
+ <li>
+ <p><a href="#generate">Generate implied end tags</a>.
+
+ <li>
+ <p>If the tag name of the end tag token does not match the tag
+ name of the <a href="#current4">current node</a>, this is a <a
+ href="#parse">parse error</a>.
+
+ <li>
+ <p>Pop all the nodes from the <a href="#current4">current
+ node</a> up to <var title="">node</var>, including <var
+ title="">node</var>, then stop this algorithm.
+ </ol>
+
+ <li>
+ <p>Otherwise, if <var title="">node</var> is in neither the <a
+ href="#formatting">formatting</a> category nor the <a
+ href="#phrasing">phrasing</a> category, then this is a <a
+ href="#parse">parse error</a>. Stop this algorithm. The end tag
+ token is ignored.
+
+ <li>
+ <p>Set <var title="">node</var> to the previous entry in the <a
+ href="#stack">stack of open elements</a>.
+
+ <li>
+ <p>Return to step 2.
+ </ol>
+ </dl>
+
+ <dt id=parsing-main-intable>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-table title="insertion mode: in table">in
+ table</dfn>"
+
+ <dd>
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag whose tag name is "caption"
+
+ <dd>
+ <p><a href="#clear1">Clear the stack back to a table context</a>.
+ (See below.)</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token, then
+ switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-caption" title="insertion mode: in caption">in
+ caption</a>".</p>
+
+ <dt>A start tag whose tag name is "colgroup"
+
+ <dd>
+ <p><a href="#clear1">Clear the stack back to a table context</a>.
+ (See below.)</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token, then
+ switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-column" title="insertion mode: in column group">in column
+ group</a>".</p>
+
+ <dt>A start tag whose tag name is "col"
+
+ <dd>
+ <p>Act as if a start tag token with the tag name "colgroup" had been
+ seen, then reprocess the current token.</p>
+
+ <dt>A start tag whose tag name is one of: "tbody", "tfoot", "thead"
+
+ <dd>
+ <p><a href="#clear1">Clear the stack back to a table context</a>.
+ (See below.)</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token, then
+ switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-table0" title="insertion mode: in table body">in table
+ body</a>".</p>
+
+ <dt>A start tag whose tag name is one of: "td", "th", "tr"
+
+ <dd>
+ <p>Act as if a start tag token with the tag name "tbody" had been
+ seen, then reprocess the current token.</p>
+
+ <dt>A start tag whose tag name is "table"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Act as if an end tag token with
+ the tag name "table" had been seen, then, if that token wasn't
+ ignored, reprocess the current token.</p>
+
+ <p class=note>The fake end tag token here can only be ignored in the
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ <dt>An end tag whose tag name is "table"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="#parse">parse error</a>. Ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#generate">Generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not a <code><a
+ href="#table">table</a></code> element, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Pop elements from this stack until a <code><a
+ href="#table">table</a></code> element has been popped from the
+ stack.</p>
+
+ <p><a href="#reset">Reset the insertion mode appropriately</a>.</p>
+
+ <dt>An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Process the token as if the <a
+ href="#insertion0">insertion mode</a> was "<a href="#in-body"
+ title="insertion mode: in body">in body</a>", with the following
+ exception:</p>
+
+ <p>If the <a href="#current4">current node</a> is a <code><a
+ href="#table">table</a></code>, <code><a
+ href="#tbody">tbody</a></code>, <code><a
+ href="#tfoot0">tfoot</a></code>, <code><a
+ href="#thead0">thead</a></code>, or <code><a
+ href="#tr">tr</a></code> element, then, whenever a node would be
+ inserted into the <a href="#current4">current node</a>, it must
+ instead be inserted into the <em><a href="#foster">foster parent
+ element</a></em>.</p>
+
+ <p>The <dfn id=foster>foster parent element</dfn> is the parent
+ element of the last <code><a href="#table">table</a></code> element
+ in the <a href="#stack">stack of open elements</a>, if there is a
+ <code><a href="#table">table</a></code> element and it has such a
+ parent element. If there is no <code><a
+ href="#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a> (<a href="#innerhtml1"><code>innerHTML</code>
+ case</a>), then the <em><a href="#foster">foster parent
+ element</a></em> is the first element in the <a href="#stack">stack
+ of open elements</a> (the <code><a href="#html">html</a></code>
+ element). Otherwise, if there is a <code><a
+ href="#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a>, but the last <code><a
+ href="#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a> has no parent, or its parent node is not an
+ element, then the <em><a href="#foster">foster parent
+ element</a></em> is the element before the last <code><a
+ href="#table">table</a></code> element in the <a href="#stack">stack
+ of open elements</a>.</p>
+
+ <p>If the <em><a href="#foster">foster parent element</a></em> is the
+ parent element of the last <code><a href="#table">table</a></code>
+ element in the <a href="#stack">stack of open elements</a>, then the
+ new node must be inserted immediately <em>before</em> the last
+ <code><a href="#table">table</a></code> element in the <a
+ href="#stack">stack of open elements</a> in the <a
+ href="#foster">foster parent element</a>; otherwise, the new node
+ must be <em>appended</em> to the <a href="#foster">foster parent
+ element</a>.</p>
+ </dl>
+
+ <p>When the steps above require the UA to <dfn id=clear1>clear the
+ stack back to a table context</dfn>, it means that the UA must, while
+ the <a href="#current4">current node</a> is not a <code><a
+ href="#table">table</a></code> element or an <code><a
+ href="#html">html</a></code> element, pop elements from the <a
+ href="#stack">stack of open elements</a>. If this causes any elements
+ to be popped from the stack, then this is a <a href="#parse">parse
+ error</a>.</p>
+
+ <p class=note>The <a href="#current4">current node</a> being an
+ <code><a href="#html">html</a></code> element after this process is an
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ <dt id=parsing-main-incaption>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-caption title="insertion mode: in caption">in
+ caption</dfn>"
+
+ <dd>
+ <dl class=switch>
+ <dt>An end tag whose tag name is "caption"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="#parse">parse error</a>. Ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#generate">Generate implied end tags</a>.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not a <code><a
+ href="#caption0">caption</a></code> element, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Pop elements from this stack until a <code><a
+ href="#caption0">caption</a></code> element has been popped from the
+ stack.</p>
+
+ <p><a href="#clear0">Clear the list of active formatting elements up
+ to the last marker</a>.</p>
+
+ <p>Switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-table" title="insertion mode: in table">in table</a>".</p>
+
+ <dt>A start tag whose tag name is one of: "caption", "col",
+ "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"
+
+ <dt>An end tag whose tag name is "table"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Act as if an end tag with the
+ tag name "caption" had been seen, then, if that token wasn't
+ ignored, reprocess the current token.</p>
+
+ <p class=note>The fake end tag token here can only be ignored in the
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ <dt>An end tag whose tag name is one of: "body", "col", "colgroup",
+ "html", "tbody", "td", "tfoot", "th", "thead", "tr"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was "<a href="#in-body" title="insertion mode: in body">in
+ body</a>".</p>
+ </dl>
+
+ <dt id=parsing-main-incolgroup>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-column title="insertion mode: in column
+ group">in column group</dfn>"
+
+ <dd>
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag whose tag name is "col"
+
+ <dd>
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>col</code> element</a> for the token. Immediately pop the <a
+ href="#current4">current node</a> off the <a href="#stack">stack of
+ open elements</a>.</p>
+
+ <dt>An end tag whose tag name is "colgroup"
+
+ <dd>
+ <p>If the <a href="#current4">current node</a> is the root <code><a
+ href="#html">html</a></code> element, then this is a <a
+ href="#parse">parse error</a>, ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, pop the <a href="#current4">current node</a> (which
+ will be a <code><a href="#colgroup">colgroup</a></code> element)
+ from the <a href="#stack">stack of open elements</a>. Switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-table"
+ title="insertion mode: in table">in table</a>".</p>
+
+ <dt>An end tag whose tag name is "col"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Act as if an end tag with the tag name "colgroup" had been seen,
+ and then, if that token wasn't ignored, reprocess the current token.</p>
+
+ <p class=note>The fake end tag token here can only be ignored in the
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+ </dl>
+
+ <dt id=parsing-main-intbody>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-table0 title="insertion mode: in table body">in
+ table body</dfn>"
+
+ <dd>
+ <dl class=switch>
+ <dt>A start tag whose tag name is "tr"
+
+ <dd>
+ <p><a href="#clear2">Clear the stack back to a table body
+ context</a>. (See below.)</p>
+
+ <p><a href="#insert" title="insert an HTML element">Insert a
+ <code>tr</code> element</a> for the token, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-row"
+ title="insertion mode: in row">in row</a>".</p>
+
+ <dt>A start tag whose tag name is one of: "th", "td"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Act as if a start tag with the
+ tag name "tr" had been seen, then reprocess the current token.</p>
+
+ <dt>An end tag whose tag name is one of: "tbody", "tfoot", "thead"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="#parse">parse error</a>. Ignore the token.</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#clear2">Clear the stack back to a table body
+ context</a>. (See below.)</p>
+
+ <p>Pop the <a href="#current4">current node</a> from the <a
+ href="#stack">stack of open elements</a>. Switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-table"
+ title="insertion mode: in table">in table</a>".</p>
+
+ <dt>A start tag whose tag name is one of: "caption", "col",
+ "colgroup", "tbody", "tfoot", "thead"
+
+ <dt>An end tag whose tag name is "table"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have a
+ <code>tbody</code>, <code>thead</code>, or <code>tfoot</code>
+ element in table scope</a>, this is a <a href="#parse">parse
+ error</a>. Ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#clear2">Clear the stack back to a table body
+ context</a>. (See below.)</p>
+
+ <p>Act as if an end tag with the same tag name as the <a
+ href="#current4">current node</a> ("tbody", "tfoot", or "thead") had
+ been seen, then reprocess the current token.</p>
+
+ <dt>An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th", "tr"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was "<a href="#in-table" title="insertion mode: in
+ table">in table</a>".</p>
+ </dl>
+
+ <p>When the steps above require the UA to <dfn id=clear2>clear the
+ stack back to a table body context</dfn>, it means that the UA must,
+ while the <a href="#current4">current node</a> is not a <code><a
+ href="#tbody">tbody</a></code>, <code><a
+ href="#tfoot0">tfoot</a></code>, <code><a
+ href="#thead0">thead</a></code>, or <code><a
+ href="#html">html</a></code> element, pop elements from the <a
+ href="#stack">stack of open elements</a>. If this causes any elements
+ to be popped from the stack, then this is a <a href="#parse">parse
+ error</a>.</p>
+
+ <p class=note>The <a href="#current4">current node</a> being an
+ <code><a href="#html">html</a></code> element after this process is an
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ <dt id=parsing-main-intr>If the <a href="#insertion0">insertion mode</a>
+ is "<dfn id=in-row title="insertion mode: in row">in row</dfn>"
+
+ <dd>
+ <dl class=switch>
+ <dt>A start tag whose tag name is one of: "th", "td"
+
+ <dd>
+ <p><a href="#clear3">Clear the stack back to a table row context</a>.
+ (See below.)</p>
+
+ <p><a href="#insert" title="insert an HTML element">Insert an HTML
+ element</a> for the token, then switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-cell"
+ title="insertion mode: in cell">in cell</a>".</p>
+
+ <p>Insert a marker at the end of the <a href="#list-of4">list of
+ active formatting elements</a>.</p>
+
+ <dt>An end tag whose tag name is "tr"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="#parse">parse error</a>. Ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#clear3">Clear the stack back to a table row context</a>.
+ (See below.)</p>
+
+ <p>Pop the <a href="#current4">current node</a> (which will be a
+ <code><a href="#tr">tr</a></code> element) from the <a
+ href="#stack">stack of open elements</a>. Switch the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-table0"
+ title="insertion mode: in table body">in table body</a>".</p>
+
+ <dt>A start tag whose tag name is one of: "caption", "col",
+ "colgroup", "tbody", "tfoot", "thead", "tr"
+
+ <dt>An end tag whose tag name is "table"
+
+ <dd>
+ <p>Act as if an end tag with the tag name "tr" had been seen, then,
+ if that token wasn't ignored, reprocess the current token.</p>
+
+ <p class=note>The fake end tag token here can only be ignored in the
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ <dt>An end tag whose tag name is one of: "tbody", "tfoot", "thead"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="#parse">parse error</a>. Ignore the token.</p>
+
+ <p>Otherwise, act as if an end tag with the tag name "tr" had been
+ seen, then reprocess the current token.</p>
+
+ <dt>An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was "<a href="#in-table" title="insertion mode: in
+ table">in table</a>".</p>
+ </dl>
+
+ <p>When the steps above require the UA to <dfn id=clear3>clear the
+ stack back to a table row context</dfn>, it means that the UA must,
+ while the <a href="#current4">current node</a> is not a <code><a
+ href="#tr">tr</a></code> element or an <code><a
+ href="#html">html</a></code> element, pop elements from the <a
+ href="#stack">stack of open elements</a>. If this causes any elements
+ to be popped from the stack, then this is a <a href="#parse">parse
+ error</a>.</p>
+
+ <p class=note>The <a href="#current4">current node</a> being an
+ <code><a href="#html">html</a></code> element after this process is an
+ <a href="#innerhtml1"><code>innerHTML</code> case</a>.</p>
+
+ <dt id=parsing-main-intd>If the <a href="#insertion0">insertion mode</a>
+ is "<dfn id=in-cell title="insertion mode: in cell">in cell</dfn>"
+
+ <dd>
+ <dl class=switch>
+ <dt>An end tag whose tag name is one of: "td", "th"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as that of the
+ token, then this is a <a href="#parse">parse error</a> and the token
+ must be ignored.</p>
+
+ <p>Otherwise:</p>
+
+ <p><a href="#generate">Generate implied end tags</a>, except for
+ elements with the same tag name as the token.</p>
+
+ <p>Now, if the <a href="#current4">current node</a> is not an element
+ with the same tag name as the token, then this is a <a
+ href="#parse">parse error</a>.</p>
+
+ <p>Pop elements from this stack until an element with the same tag
+ name as the token has been popped from the stack.</p>
+
+ <p><a href="#clear0">Clear the list of active formatting elements up
+ to the last marker</a>.</p>
+
+ <p>Switch the <a href="#insertion0">insertion mode</a> to "<a
+ href="#in-row" title="insertion mode: in row">in row</a>". (The <a
+ href="#current4">current node</a> will be a <code><a
+ href="#tr">tr</a></code> element at this point.)</p>
+
+ <dt>A start tag whose tag name is one of: "caption", "col",
+ "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does
+ <em>not</em> <a href="#have-an0" title="has an element in table
+ scope">have a <code>td</code> or <code>th</code> element in table
+ scope</a>, then this is a <a href="#parse">parse error</a>; ignore
+ the token. (<a href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, <a href="#close2">close the cell</a> (see below) and
+ reprocess the current token.</p>
+
+ <dt>An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>An end tag whose tag name is one of: "table", "tbody", "tfoot",
+ "thead", "tr"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as that of the
+ token (which can only happen for "tbody", "tfoot" and "thead", or,
+ in the <a href="#innerhtml1"><code>innerHTML</code> case</a>), then
+ this is a <a href="#parse">parse error</a> and the token must be
+ ignored.</p>
+
+ <p>Otherwise, <a href="#close2">close the cell</a> (see below) and
+ reprocess the current token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> was "<a href="#in-body" title="insertion mode: in body">in
+ body</a>".</p>
+ </dl>
+
+ <p>Where the steps above say to <dfn id=close2>close the cell</dfn>,
+ they mean to follow the following algorithm:</p>
+
+ <ol>
+ <li>
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an0" title="has an element in table scope">has a
+ <code>td</code> element in table scope</a>, then act as if an end
+ tag token with the tag name "td" had been seen.
+
+ <li>
+ <p>Otherwise, the <a href="#stack">stack of open elements</a> will <a
+ href="#have-an0" title="has an element in table scope">have a
+ <code>th</code> element in table scope</a>; act as if an end tag
+ token with the tag name "th" had been seen.
+ </ol>
+
+ <p class=note>The <a href="#stack">stack of open elements</a> cannot
+ have both a <code><a href="#td">td</a></code> and a <code><a
+ href="#th">th</a></code> element <a href="#have-an0" title="has an
+ element in table scope">in table scope</a> at the same time, nor can
+ it have neither when the <a href="#insertion0">insertion mode</a> is
+ "<a href="#in-cell" title="insertion mode: in cell">in cell</a>".</p>
+
+ <dt id=parsing-main-inselect>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-select title="insertion mode: in select">in
+ select</dfn>"
+
+ <dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class=switch>
+ <dt>A character token
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the token's
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag token whose tag name is "option"
+
+ <dd>
+ <p>If the <a href="#current4">current node</a> is an
+ <code>option</code> element, act as if an end tag with the tag name
+ "option" had been seen.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <dt>A start tag token whose tag name is "optgroup"
+
+ <dd>
+ <p>If the <a href="#current4">current node</a> is an
+ <code>option</code> element, act as if an end tag with the tag name
+ "option" had been seen.</p>
+
+ <p>If the <a href="#current4">current node</a> is an
+ <code>optgroup</code> element, act as if an end tag with the tag
+ name "optgroup" had been seen.</p>
+
+ <p><a href="#insert">Insert an HTML element</a> for the token.</p>
+
+ <dt>An end tag token whose tag name is "optgroup"
+
+ <dd>
+ <p>First, if the <a href="#current4">current node</a> is an
+ <code>option</code> element, and the node immediately before it in
+ the <a href="#stack">stack of open elements</a> is an
+ <code>optgroup</code> element, then act as if an end tag with the
+ tag name "option" had been seen.</p>
+
+ <p>If the <a href="#current4">current node</a> is an
+ <code>optgroup</code> element, then pop that node from the <a
+ href="#stack">stack of open elements</a>. Otherwise, this is a <a
+ href="#parse">parse error</a>, ignore the token.</p>
+
+ <dt>An end tag token whose tag name is "option"
+
+ <dd>
+ <p>If the <a href="#current4">current node</a> is an
+ <code>option</code> element, then pop that node from the <a
+ href="#stack">stack of open elements</a>. Otherwise, this is a <a
+ href="#parse">parse error</a>, ignore the token.</p>
+
+ <dt>An end tag whose tag name is "select"
+
+ <dd>
+ <p>If the <a href="#stack">stack of open elements</a> does not <a
+ href="#have-an0" title="has an element in table scope">have an
+ element in table scope</a> with the same tag name as the token, this
+ is a <a href="#parse">parse error</a>. Ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise:</p>
+
+ <p>Pop elements from the <a href="#stack">stack of open elements</a>
+ until a <code>select</code> element has been popped from the stack.</p>
+
+ <p><a href="#reset">Reset the insertion mode appropriately</a>.</p>
+
+ <dt>A start tag whose tag name is "select"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Act as if the token had been an
+ end tag with the tag name "select" instead.</p>
+
+ <dt>An end tag whose tag name is one of: "caption", "table", "tbody",
+ "tfoot", "thead", "tr", "td", "th"
+
+ <dd>
+ <p><a href="#parse">Parse error</a>.</p>
+
+ <p>If the <a href="#stack">stack of open elements</a> <a
+ href="#have-an0">has an element in table scope</a> with the same tag
+ name as that of the token, then act as if an end tag with the tag
+ name "select" had been seen, and reprocess the token. Otherwise,
+ ignore the token.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+ </dl>
+
+ <dt id=parsing-main-afterbody>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=after2 title="insertion mode: after body">after
+ body</dfn>"
+
+ <dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p>Process the token as it would be processed if the <a
+ href="#insertion0">insertion mode</a> was "<a href="#in-body"
+ title="insertion mode: in body">in body</a>".</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the first element in the <a
+ href="#stack">stack of open elements</a> (the <code><a
+ href="#html">html</a></code> element), with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>An end tag with the tag name "html"
+
+ <dd>
+ <p>If the parser was originally created in order to handle the
+ setting of <em>an element</em>'s <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute, this is a <a
+ href="#parse">parse error</a>; ignore the token. (The element will
+ be an <code><a href="#html">html</a></code> element in this case.)
+ (<a href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, switch to <a href="#the-trailing0">the trailing end
+ phase</a>.</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Set the <a
+ href="#insertion0">insertion mode</a> to "<a href="#in-body"
+ title="insertion mode: in body">in body</a>" and reprocess the
+ token.</p>
+ </dl>
+
+ <dt id=parsing-main-inframeset>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=in-frameset title="insertion mode: in frameset">in
+ frameset</dfn>"
+
+ <dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>A start tag with the tag name "frameset"
+
+ <dd>
+ <p><a href="#insert" title="Insert an HTML element">Insert a
+ <code>frameset</code> element</a> for the token.</p>
+
+ <dt>An end tag with the tag name "frameset"
+
+ <dd>
+ <p>If the <a href="#current4">current node</a> is the root <code><a
+ href="#html">html</a></code> element, then this is a <a
+ href="#parse">parse error</a>; ignore the token. (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>)</p>
+
+ <p>Otherwise, pop the <a href="#current4">current node</a> from the
+ <a href="#stack">stack of open elements</a>.</p>
+
+ <p>If the parser was <em>not</em> originally created in order to
+ handle the setting of an element's <code title=dom-innerHTML-HTML><a
+ href="#innerhtml0">innerHTML</a></code> attribute (<a
+ href="#innerhtml1"><code>innerHTML</code> case</a>), and the <a
+ href="#current4">current node</a> is no longer a
+ <code>frameset</code> element, then change the <a
+ href="#insertion0">insertion mode</a> to "<a href="#after3"
+ title="insertion mode: after frameset">after frameset</a>".</p>
+
+ <dt>A start tag with the tag name "frame"
+
+ <dd>
+ <p><a href="#insert">Insert an HTML element</a> for the token.
+ Immediately pop the <a href="#current4">current node</a> off the <a
+ href="#stack">stack of open elements</a>.</p>
+
+ <dt>A start tag with the tag name "noframes"
+
+ <dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> had been "<a href="#in-body" title="insertion mode: in
+ body">in body</a>".</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+ </dl>
+
+ <dt id=parsing-main-afterframeset>If the <a href="#insertion0">insertion
+ mode</a> is "<dfn id=after3 title="insertion mode: after
+ frameset">after frameset</dfn>"
+
+ <dd>
+ <p>Handle the token as follows:</p>
+
+ <dl class=switch>
+ <dt>A character token that is one of one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C
+ FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p><a href="#append" title="append a character">Append the
+ character</a> to the <a href="#current4">current node</a>.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <a
+ href="#current4">current node</a> with the <code
+ title="">data</code> attribute set to the data given in the comment
+ token.</p>
+
+ <dt>An end tag with the tag name "html"
+
+ <dd>
+ <p>Switch to <a href="#the-trailing0">the trailing end phase</a>.</p>
+
+ <dt>A start tag with the tag name "noframes"
+
+ <dd>
+ <p>Process the token as if the <a href="#insertion0">insertion
+ mode</a> had been "<a href="#in-body" title="insertion mode: in
+ body">in body</a>".</p>
+
+ <dt>Anything else
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+ </dl>
+ </dl>
+ </dl>
+
+ <p class=big-issue>This doesn't handle UAs that don't support frames, or
+ that do support frames but want to show the NOFRAMES content. Supporting
+ the former is easy; supporting the latter is harder.
+
+ <h5 id=the-trailing><span class=secno>8.2.4.4. </span><dfn
+ id=the-trailing0>The trailing end phase</dfn></h5>
+
+ <p>After <a href="#the-main0">the main phase</a>, as each token is emitted
+ from the <a href="#tokenisation0">tokenisation</a> stage, it must be
+ processed as described in this section.
+
+ <dl class=switch>
+ <dt>A DOCTYPE token
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Ignore the token.</p>
+
+ <dt>A comment token
+
+ <dd>
+ <p>Append a <code>Comment</code> node to the <code>Document</code> object
+ with the <code title="">data</code> attribute set to the data given in
+ the comment token.</p>
+
+ <dt>A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dd>
+ <p>Process the token as it would be processed in <a href="#the-main0">the
+ main phase</a>.</p>
+
+ <dt>A character token that is <em>not</em> one of U+0009 CHARACTER
+ TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM
+ FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+
+ <dt>A start tag token
+
+ <dt>An end tag token
+
+ <dd>
+ <p><a href="#parse">Parse error</a>. Switch back to <a
+ href="#the-main0">the main phase</a> and reprocess the token.</p>
+
+ <dt>An end-of-file token
+
+ <dd>
+ <p><a href="#stops">Stop parsing</a>.</p>
+ </dl>
+
+ <h4 id=the-end><span class=secno>8.2.5. </span>The End</h4>
+
+ <p>Once the user agent <dfn id=stops title="stop parsing">stops
+ parsing</dfn> the document, the user agent must follow the steps in this
+ section.
+
+ <p>First, <!--the user agent must <span title="fire a DOMContentLoaded
+ event">fire a <code
+ title="event-DOMContentLoaded">DOMContentLoaded</code> event</span>
+ at <span>the <code>body</code> element</span>.</p>
+
+ <p>Then, -->the
+ rules for <a href="#when-a">when a script completes loading</a> start
+ applying (script execution is no longer managed by the parser).
+
+ <p>If any of the scripts in the <a href="#list-of1">list of scripts that
+ will execute as soon as possible</a> have <span>completed
+ loading</span><!-- XXX xref -->, or if the <a href="#list-of0">list of
+ scripts that will execute asynchronously</a> is not empty and the first
+ script in that list has <span>completed loading</span><!-- XXX xref
+ -->,
+ then the user agent must act as if those scripts just completed loading,
+ following the rules given for that in the <code><a
+ href="#script0">script</a></code> element definition.
+
+ <p>Then, if the <a href="#list-of">list of scripts that will execute when
+ the document has finished parsing</a> is not empty, and the first item in
+ this list has already <span>completed loading</span><!--XXX
+ xref -->,
+ then the user agent must act as if that script just finished loading.
+
+ <p>By this point, there will be no scripts that have loaded but have not
+ yet been executed.
+
+ <p>The user agent must then <a href="#firing2">fire a simple event</a>
+ called <code title=event-DOMContentLoaded>DOMContentLoaded</code> at the
+ <code>Document</code>.
+
+ <p>Once everything that <dfn id=delays title="delay the load event">delays
+ the load event</dfn> has completed, the user agent must <a href="#firing4"
+ title="fire a load event">fire a <code title=event-load>load</code>
+ event</a> at <a href="#the-body0">the <code>body</code> element</a>.</p>
+ <!-- XXX make sure things "delay the load event" -->
+ <!--XXX need to handle
+http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#2354
+2354 // Don't open transient styles if it makes the stack deep, bug 58917.
+-->
+ <!--XXX
+http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/nsHTMLTokenizer.cpp#749
+-->
+ <!--
+see also CTextToken::ConsumeCharacterData() for CDATA parsing?
+
+1212 1 Here's a tricky case from bug 22596: <h5><li><h5>
+1213 How do we know that the 2nd <h5> should close the <LI> rather than nest inside the <LI>?
+1214 (Afterall, the <h5> is a legal child of the <LI>).
+1215
+1216 The way you know is that there is no root between the two, so the <h5> binds more
+1217 tightly to the 1st <h5> than to the <LI>.
+1218 2. Also, bug 6148 shows this case: <SPAN><DIV><SPAN>
+1219 From this case we learned not to execute this logic if the parent is a block.
+1220
+1221 3. Fix for 26583
+1222 Ex. <A href=foo.html><B>foo<A href-bar.html>bar</A></B></A> <- A legal HTML
+1223 In the above example clicking on "foo" or "bar" should link to
+1224 foo.html or bar.html respectively. That is, the inner <A> should be informed
+1225 about the presence of an open <A> above <B>..so that the inner <A> can close out
+1226 the outer <A>. The following code does it for us.
+1227
+1228 4. Fix for 27865 [ similer to 22596 ]. Ex: <DL><DD><LI>one<DD><LI>two
+ - http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#1211
+
+815 // Here's a problem. If theTag is legal in here, we don't move it
+816 // out. So if we're moving stuff out of here, the parent of theTag
+817 // gets closed at this point. But some things are legal
+818 // _everywhere_ and hence would effectively close out misplaced
+819 // content in tables. This is undesirable, so treat them as
+820 // illegal here so they'll be shipped out with their parents and
+821 // siblings. See bug 40855 for an explanation (that bug was for
+822 // comments, but the same issues arise with whitespace, newlines,
+823 // noscript, etc). Script is special, though. Shipping it out
+824 // breaks document.write stuff. See bug 243064.
+ - http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#825
+
+
+1326 /**************************************************************************************
+1327 *
+1328 * Now a little code to deal with bug #49687 (crash when layout stack gets too deep)
+1329 * I've also opened this up to any container (not just inlines): re bug 55095
+1330 * Improved to handle bug 55980 (infinite loop caused when DEPTH is exceeded and
+1331 * </P> is encountered by itself (<P>) is continuously produced.
+1332 *
+1333 **************************************************************************************/
+
+1912 // Oh boy!! we found a "stray" tag. Nav4.x and IE introduce line break in
+1913 // such cases. So, let's simulate that effect for compatibility.
+1914 // Ex. <html><body>Hello</P>There</body></html>
+http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/CNavDTD.cpp#1912
+
+http://lxr.mozilla.org/seamonkey/search?string=nested
+/parser/htmlparser/src/CNavDTD.cpp, line 791 - * 2. <CENTER><DL><DT><A><CENTER> allow nested <CENTER>
+/parser/htmlparser/src/CNavDTD.cpp, line 792 - * 3. <TABLE><TR><TD><TABLE>... allow nested <TABLE>
+/parser/htmlparser/src/CNavDTD.cpp, line 2562 - // Discard nested forms - bug 72639
+/parser/htmlparser/src/nsElementTable.cpp, line 1453 - * 2. <CENTER><DL><DT><A><CENTER> allow nested <CENTER>
+/parser/htmlparser/src/nsElementTable.cpp, line 1454 - * 3. <TABLE><TR><TD><TABLE>... allow nested <TABLE>
+/parser/htmlparser/src/nsElementTable.cpp, line 1901 - // Ex: <H1><LI><H1><LI>. Inner LI has the potential of getting nested
+-->
+
+ <h3 id=namespaces><span class=secno>8.3. </span>Namespaces</h3>
+
+ <p>The <dfn id=html-namespace0>HTML namespace</dfn> is:
+ <code>http://www.w3.org/1999/xhtml</code>
+
+ <h3 id=entities><span class=secno>8.4. </span><dfn
+ id=entities0>Entities</dfn></h3>
+
+ <p>This table lists the entity names that are supported by HTML, and the
+ code points to which they refer. It is referenced by the previous
+ sections.</p>
+ <!--XXX
+ entities:
+ 94 // If an entity value is greater than 255 then:
+ 95 // Nav 4.x does not treat it as an entity,
+ 96 // IE treats it as an entity if terminated with a semicolon.
+ 97 // Resembling IE!!
+http://lxr.mozilla.org/mozilla/source/parser/htmlparser/src/nsHTMLTokens.cpp#94
+-->
+
+ <table>
+ <thead>
+ <tr>
+ <th> Entity Name
+
+ <th> Character
+
+ <tbody>
+ <tr>
+ <td> <code title="">AElig</code>
+
+ <td> U+00C6
+
+ <tr>
+ <td> <code title="">Aacute</code>
+
+ <td> U+00C1
+
+ <tr>
+ <td> <code title="">Acirc</code>
+
+ <td> U+00C2
+
+ <tr>
+ <td> <code title="">Agrave</code>
+
+ <td> U+00C0
+
+ <tr>
+ <td> <code title="">Alpha</code>
+
+ <td> U+0391
+
+ <tr>
+ <td> <code title="">Aring</code>
+
+ <td> U+00C5
+
+ <tr>
+ <td> <code title="">Atilde</code>
+
+ <td> U+00C3
+
+ <tr>
+ <td> <code title="">Auml</code>
+
+ <td> U+00C4
+
+ <tr>
+ <td> <code title="">Beta</code>
+
+ <td> U+0392
+
+ <tr>
+ <td> <code title="">Ccedil</code>
+
+ <td> U+00C7
+
+ <tr>
+ <td> <code title="">Chi</code>
+
+ <td> U+03A7
+
+ <tr>
+ <td> <code title="">Dagger</code>
+
+ <td> U+2021
+
+ <tr>
+ <td> <code title="">Delta</code>
+
+ <td> U+0394
+
+ <tr>
+ <td> <code title="">ETH</code>
+
+ <td> U+00D0
+
+ <tr>
+ <td> <code title="">Eacute</code>
+
+ <td> U+00C9
+
+ <tr>
+ <td> <code title="">Ecirc</code>
+
+ <td> U+00CA
+
+ <tr>
+ <td> <code title="">Egrave</code>
+
+ <td> U+00C8
+
+ <tr>
+ <td> <code title="">Epsilon</code>
+
+ <td> U+0395
+
+ <tr>
+ <td> <code title="">Eta</code>
+
+ <td> U+0397
+
+ <tr>
+ <td> <code title="">Euml</code>
+
+ <td> U+00CB
+
+ <tr>
+ <td> <code title="">Gamma</code>
+
+ <td> U+0393
+
+ <tr>
+ <td> <code title="">Iacute</code>
+
+ <td> U+00CD
+
+ <tr>
+ <td> <code title="">Icirc</code>
+
+ <td> U+00CE
+
+ <tr>
+ <td> <code title="">Igrave</code>
+
+ <td> U+00CC
+
+ <tr>
+ <td> <code title="">Iota</code>
+
+ <td> U+0399
+
+ <tr>
+ <td> <code title="">Iuml</code>
+
+ <td> U+00CF
+
+ <tr>
+ <td> <code title="">Kappa</code>
+
+ <td> U+039A
+
+ <tr>
+ <td> <code title="">Lambda</code>
+
+ <td> U+039B
+
+ <tr>
+ <td> <code title="">Mu</code>
+
+ <td> U+039C
+
+ <tr>
+ <td> <code title="">Ntilde</code>
+
+ <td> U+00D1
+
+ <tr>
+ <td> <code title="">Nu</code>
+
+ <td> U+039D
+
+ <tr>
+ <td> <code title="">OElig</code>
+
+ <td> U+0152
+
+ <tr>
+ <td> <code title="">Oacute</code>
+
+ <td> U+00D3
+
+ <tr>
+ <td> <code title="">Ocirc</code>
+
+ <td> U+00D4
+
+ <tr>
+ <td> <code title="">Ograve</code>
+
+ <td> U+00D2
+
+ <tr>
+ <td> <code title="">Omega</code>
+
+ <td> U+03A9
+
+ <tr>
+ <td> <code title="">Omicron</code>
+
+ <td> U+039F
+
+ <tr>
+ <td> <code title="">Oslash</code>
+
+ <td> U+00D8
+
+ <tr>
+ <td> <code title="">Otilde</code>
+
+ <td> U+00D5
+
+ <tr>
+ <td> <code title="">Ouml</code>
+
+ <td> U+00D6
+
+ <tr>
+ <td> <code title="">Phi</code>
+
+ <td> U+03A6
+
+ <tr>
+ <td> <code title="">Pi</code>
+
+ <td> U+03A0
+
+ <tr>
+ <td> <code title="">Prime</code>
+
+ <td> U+2033
+
+ <tr>
+ <td> <code title="">Psi</code>
+
+ <td> U+03A8
+
+ <tr>
+ <td> <code title="">Rho</code>
+
+ <td> U+03A1
+
+ <tr>
+ <td> <code title="">Scaron</code>
+
+ <td> U+0160
+
+ <tr>
+ <td> <code title="">Sigma</code>
+
+ <td> U+03A3
+
+ <tr>
+ <td> <code title="">THORN</code>
+
+ <td> U+00DE
+
+ <tr>
+ <td> <code title="">Tau</code>
+
+ <td> U+03A4
+
+ <tr>
+ <td> <code title="">Theta</code>
+
+ <td> U+0398
+
+ <tr>
+ <td> <code title="">Uacute</code>
+
+ <td> U+00DA
+
+ <tr>
+ <td> <code title="">Ucirc</code>
+
+ <td> U+00DB
+
+ <tr>
+ <td> <code title="">Ugrave</code>
+
+ <td> U+00D9
+
+ <tr>
+ <td> <code title="">Upsilon</code>
+
+ <td> U+03A5
+
+ <tr>
+ <td> <code title="">Uuml</code>
+
+ <td> U+00DC
+
+ <tr>
+ <td> <code title="">Xi</code>
+
+ <td> U+039E
+
+ <tr>
+ <td> <code title="">Yacute</code>
+
+ <td> U+00DD
+
+ <tr>
+ <td> <code title="">Yuml</code>
+
+ <td> U+0178
+
+ <tr>
+ <td> <code title="">Zeta</code>
+
+ <td> U+0396
+
+ <tr>
+ <td> <code title="">aacute</code>
+
+ <td> U+00E1
+
+ <tr>
+ <td> <code title="">acirc</code>
+
+ <td> U+00E2
+
+ <tr>
+ <td> <code title="">acute</code>
+
+ <td> U+00B4
+
+ <tr>
+ <td> <code title="">aelig</code>
+
+ <td> U+00E6
+
+ <tr>
+ <td> <code title="">agrave</code>
+
+ <td> U+00E0
+
+ <tr>
+ <td> <code title="">alefsym</code>
+
+ <td> U+2135
+
+ <tr>
+ <td> <code title="">alpha</code>
+
+ <td> U+03B1
+
+ <tr>
+ <td> <code title="">amp</code>
+
+ <td> U+0026
+
+ <tr>
+ <td> <code title="">AMP</code>
+
+ <td> U+0026
+
+ <tr>
+ <td> <code title="">and</code>
+
+ <td> U+2227
+
+ <tr>
+ <td> <code title="">ang</code>
+
+ <td> U+2220
+
+ <tr>
+ <td> <code title="">apos</code>
+
+ <td> U+0027
+
+ <tr>
+ <td> <code title="">aring</code>
+
+ <td> U+00E5
+
+ <tr>
+ <td> <code title="">asymp</code>
+
+ <td> U+2248
+
+ <tr>
+ <td> <code title="">atilde</code>
+
+ <td> U+00E3
+
+ <tr>
+ <td> <code title="">auml</code>
+
+ <td> U+00E4
+
+ <tr>
+ <td> <code title="">bdquo</code>
+
+ <td> U+201E
+
+ <tr>
+ <td> <code title="">beta</code>
+
+ <td> U+03B2
+
+ <tr>
+ <td> <code title="">brvbar</code>
+
+ <td> U+00A6
+
+ <tr>
+ <td> <code title="">bull</code>
+
+ <td> U+2022
+
+ <tr>
+ <td> <code title="">cap</code>
+
+ <td> U+2229
+
+ <tr>
+ <td> <code title="">ccedil</code>
+
+ <td> U+00E7
+
+ <tr>
+ <td> <code title="">cedil</code>
+
+ <td> U+00B8
+
+ <tr>
+ <td> <code title="">cent</code>
+
+ <td> U+00A2
+
+ <tr>
+ <td> <code title="">chi</code>
+
+ <td> U+03C7
+
+ <tr>
+ <td> <code title="">circ</code>
+
+ <td> U+02C6
+
+ <tr>
+ <td> <code title="">clubs</code>
+
+ <td> U+2663
+
+ <tr>
+ <td> <code title="">cong</code>
+
+ <td> U+2245
+
+ <tr>
+ <td> <code title="">copy</code>
+
+ <td> U+00A9
+
+ <tr>
+ <td> <code title="">COPY</code>
+
+ <td> U+00A9
+
+ <tr>
+ <td> <code title="">crarr</code>
+
+ <td> U+21B5
+
+ <tr>
+ <td> <code title="">cup</code>
+
+ <td> U+222A
+
+ <tr>
+ <td> <code title="">curren</code>
+
+ <td> U+00A4
+
+ <tr>
+ <td> <code title="">dArr</code>
+
+ <td> U+21D3
+
+ <tr>
+ <td> <code title="">dagger</code>
+
+ <td> U+2020
+
+ <tr>
+ <td> <code title="">darr</code>
+
+ <td> U+2193
+
+ <tr>
+ <td> <code title="">deg</code>
+
+ <td> U+00B0
+
+ <tr>
+ <td> <code title="">delta</code>
+
+ <td> U+03B4
+
+ <tr>
+ <td> <code title="">diams</code>
+
+ <td> U+2666
+
+ <tr>
+ <td> <code title="">divide</code>
+
+ <td> U+00F7
+
+ <tr>
+ <td> <code title="">eacute</code>
+
+ <td> U+00E9
+
+ <tr>
+ <td> <code title="">ecirc</code>
+
+ <td> U+00EA
+
+ <tr>
+ <td> <code title="">egrave</code>
+
+ <td> U+00E8
+
+ <tr>
+ <td> <code title="">empty</code>
+
+ <td> U+2205
+
+ <tr>
+ <td> <code title="">emsp</code>
+
+ <td> U+2003
+
+ <tr>
+ <td> <code title="">ensp</code>
+
+ <td> U+2002
+
+ <tr>
+ <td> <code title="">epsilon</code>
+
+ <td> U+03B5
+
+ <tr>
+ <td> <code title="">equiv</code>
+
+ <td> U+2261
+
+ <tr>
+ <td> <code title="">eta</code>
+
+ <td> U+03B7
+
+ <tr>
+ <td> <code title="">eth</code>
+
+ <td> U+00F0
+
+ <tr>
+ <td> <code title="">euml</code>
+
+ <td> U+00EB
+
+ <tr>
+ <td> <code title="">euro</code>
+
+ <td> U+20AC
+
+ <tr>
+ <td> <code title="">exist</code>
+
+ <td> U+2203
+
+ <tr>
+ <td> <code title="">fnof</code>
+
+ <td> U+0192
+
+ <tr>
+ <td> <code title="">forall</code>
+
+ <td> U+2200
+
+ <tr>
+ <td> <code title="">frac12</code>
+
+ <td> U+00BD
+
+ <tr>
+ <td> <code title="">frac14</code>
+
+ <td> U+00BC
+
+ <tr>
+ <td> <code title="">frac34</code>
+
+ <td> U+00BE
+
+ <tr>
+ <td> <code title="">frasl</code>
+
+ <td> U+2044
+
+ <tr>
+ <td> <code title="">gamma</code>
+
+ <td> U+03B3
+
+ <tr>
+ <td> <code title="">ge</code>
+
+ <td> U+2265
+
+ <tr>
+ <td> <code title="">gt</code>
+
+ <td> U+003E
+
+ <tr>
+ <td> <code title="">GT</code>
+
+ <td> U+003E
+
+ <tr>
+ <td> <code title="">hArr</code>
+
+ <td> U+21D4
+
+ <tr>
+ <td> <code title="">harr</code>
+
+ <td> U+2194
+
+ <tr>
+ <td> <code title="">hearts</code>
+
+ <td> U+2665
+
+ <tr>
+ <td> <code title="">hellip</code>
+
+ <td> U+2026
+
+ <tr>
+ <td> <code title="">iacute</code>
+
+ <td> U+00ED
+
+ <tr>
+ <td> <code title="">icirc</code>
+
+ <td> U+00EE
+
+ <tr>
+ <td> <code title="">iexcl</code>
+
+ <td> U+00A1
+
+ <tr>
+ <td> <code title="">igrave</code>
+
+ <td> U+00EC
+
+ <tr>
+ <td> <code title="">image</code>
+
+ <td> U+2111
+
+ <tr>
+ <td> <code title="">infin</code>
+
+ <td> U+221E
+
+ <tr>
+ <td> <code title="">int</code>
+
+ <td> U+222B
+
+ <tr>
+ <td> <code title="">iota</code>
+
+ <td> U+03B9
+
+ <tr>
+ <td> <code title="">iquest</code>
+
+ <td> U+00BF
+
+ <tr>
+ <td> <code title="">isin</code>
+
+ <td> U+2208
+
+ <tr>
+ <td> <code title="">iuml</code>
+
+ <td> U+00EF
+
+ <tr>
+ <td> <code title="">kappa</code>
+
+ <td> U+03BA
+
+ <tr>
+ <td> <code title="">lArr</code>
+
+ <td> U+21D0
+
+ <tr>
+ <td> <code title="">lambda</code>
+
+ <td> U+03BB
+
+ <tr>
+ <td> <code title="">lang</code>
+
+ <td> U+2329
+
+ <tr>
+ <td> <code title="">laquo</code>
+
+ <td> U+00AB
+
+ <tr>
+ <td> <code title="">larr</code>
+
+ <td> U+2190
+
+ <tr>
+ <td> <code title="">lceil</code>
+
+ <td> U+2308
+
+ <tr>
+ <td> <code title="">ldquo</code>
+
+ <td> U+201C
+
+ <tr>
+ <td> <code title="">le</code>
+
+ <td> U+2264
+
+ <tr>
+ <td> <code title="">lfloor</code>
+
+ <td> U+230A
+
+ <tr>
+ <td> <code title="">lowast</code>
+
+ <td> U+2217
+
+ <tr>
+ <td> <code title="">loz</code>
+
+ <td> U+25CA
+
+ <tr>
+ <td> <code title="">lrm</code>
+
+ <td> U+200E
+
+ <tr>
+ <td> <code title="">lsaquo</code>
+
+ <td> U+2039
+
+ <tr>
+ <td> <code title="">lsquo</code>
+
+ <td> U+2018
+
+ <tr>
+ <td> <code title="">lt</code>
+
+ <td> U+003C
+
+ <tr>
+ <td> <code title="">LT</code>
+
+ <td> U+003C
+
+ <tr>
+ <td> <code title="">macr</code>
+
+ <td> U+00AF
+
+ <tr>
+ <td> <code title="">mdash</code>
+
+ <td> U+2014
+
+ <tr>
+ <td> <code title="">micro</code>
+
+ <td> U+00B5
+
+ <tr>
+ <td> <code title="">middot</code>
+
+ <td> U+00B7
+
+ <tr>
+ <td> <code title="">minus</code>
+
+ <td> U+2212
+
+ <tr>
+ <td> <code title="">mu</code>
+
+ <td> U+03BC
+
+ <tr>
+ <td> <code title="">nabla</code>
+
+ <td> U+2207
+
+ <tr>
+ <td> <code title="">nbsp</code>
+
+ <td> U+00A0
+
+ <tr>
+ <td> <code title="">ndash</code>
+
+ <td> U+2013
+
+ <tr>
+ <td> <code title="">ne</code>
+
+ <td> U+2260
+
+ <tr>
+ <td> <code title="">ni</code>
+
+ <td> U+220B
+
+ <tr>
+ <td> <code title="">not</code>
+
+ <td> U+00AC
+
+ <tr>
+ <td> <code title="">notin</code>
+
+ <td> U+2209
+
+ <tr>
+ <td> <code title="">nsub</code>
+
+ <td> U+2284
+
+ <tr>
+ <td> <code title="">ntilde</code>
+
+ <td> U+00F1
+
+ <tr>
+ <td> <code title="">nu</code>
+
+ <td> U+03BD
+
+ <tr>
+ <td> <code title="">oacute</code>
+
+ <td> U+00F3
+
+ <tr>
+ <td> <code title="">ocirc</code>
+
+ <td> U+00F4
+
+ <tr>
+ <td> <code title="">oelig</code>
+
+ <td> U+0153
+
+ <tr>
+ <td> <code title="">ograve</code>
+
+ <td> U+00F2
+
+ <tr>
+ <td> <code title="">oline</code>
+
+ <td> U+203E
+
+ <tr>
+ <td> <code title="">omega</code>
+
+ <td> U+03C9
+
+ <tr>
+ <td> <code title="">omicron</code>
+
+ <td> U+03BF
+
+ <tr>
+ <td> <code title="">oplus</code>
+
+ <td> U+2295
+
+ <tr>
+ <td> <code title="">or</code>
+
+ <td> U+2228
+
+ <tr>
+ <td> <code title="">ordf</code>
+
+ <td> U+00AA
+
+ <tr>
+ <td> <code title="">ordm</code>
+
+ <td> U+00BA
+
+ <tr>
+ <td> <code title="">oslash</code>
+
+ <td> U+00F8
+
+ <tr>
+ <td> <code title="">otilde</code>
+
+ <td> U+00F5
+
+ <tr>
+ <td> <code title="">otimes</code>
+
+ <td> U+2297
+
+ <tr>
+ <td> <code title="">ouml</code>
+
+ <td> U+00F6
+
+ <tr>
+ <td> <code title="">para</code>
+
+ <td> U+00B6
+
+ <tr>
+ <td> <code title="">part</code>
+
+ <td> U+2202
+
+ <tr>
+ <td> <code title="">permil</code>
+
+ <td> U+2030
+
+ <tr>
+ <td> <code title="">perp</code>
+
+ <td> U+22A5
+
+ <tr>
+ <td> <code title="">phi</code>
+
+ <td> U+03C6
+
+ <tr>
+ <td> <code title="">pi</code>
+
+ <td> U+03C0
+
+ <tr>
+ <td> <code title="">piv</code>
+
+ <td> U+03D6
+
+ <tr>
+ <td> <code title="">plusmn</code>
+
+ <td> U+00B1
+
+ <tr>
+ <td> <code title="">pound</code>
+
+ <td> U+00A3
+
+ <tr>
+ <td> <code title="">prime</code>
+
+ <td> U+2032
+
+ <tr>
+ <td> <code title="">prod</code>
+
+ <td> U+220F
+
+ <tr>
+ <td> <code title="">prop</code>
+
+ <td> U+221D
+
+ <tr>
+ <td> <code title="">psi</code>
+
+ <td> U+03C8
+
+ <tr>
+ <td> <code title="">quot</code>
+
+ <td> U+0022
+
+ <tr>
+ <td> <code title="">QUOT</code>
+
+ <td> U+0022
+
+ <tr>
+ <td> <code title="">rArr</code>
+
+ <td> U+21D2
+
+ <tr>
+ <td> <code title="">radic</code>
+
+ <td> U+221A
+
+ <tr>
+ <td> <code title="">rang</code>
+
+ <td> U+232A
+
+ <tr>
+ <td> <code title="">raquo</code>
+
+ <td> U+00BB
+
+ <tr>
+ <td> <code title="">rarr</code>
+
+ <td> U+2192
+
+ <tr>
+ <td> <code title="">rceil</code>
+
+ <td> U+2309
+
+ <tr>
+ <td> <code title="">rdquo</code>
+
+ <td> U+201D
+
+ <tr>
+ <td> <code title="">real</code>
+
+ <td> U+211C
+
+ <tr>
+ <td> <code title="">reg</code>
+
+ <td> U+00AE
+
+ <tr>
+ <td> <code title="">REG</code>
+
+ <td> U+00AE
+
+ <tr>
+ <td> <code title="">rfloor</code>
+
+ <td> U+230B
+
+ <tr>
+ <td> <code title="">rho</code>
+
+ <td> U+03C1
+
+ <tr>
+ <td> <code title="">rlm</code>
+
+ <td> U+200F
+
+ <tr>
+ <td> <code title="">rsaquo</code>
+
+ <td> U+203A
+
+ <tr>
+ <td> <code title="">rsquo</code>
+
+ <td> U+2019
+
+ <tr>
+ <td> <code title="">sbquo</code>
+
+ <td> U+201A
+
+ <tr>
+ <td> <code title="">scaron</code>
+
+ <td> U+0161
+
+ <tr>
+ <td> <code title="">sdot</code>
+
+ <td> U+22C5
+
+ <tr>
+ <td> <code title="">sect</code>
+
+ <td> U+00A7
+
+ <tr>
+ <td> <code title="">shy</code>
+
+ <td> U+00AD
+
+ <tr>
+ <td> <code title="">sigma</code>
+
+ <td> U+03C3
+
+ <tr>
+ <td> <code title="">sigmaf</code>
+
+ <td> U+03C2
+
+ <tr>
+ <td> <code title="">sim</code>
+
+ <td> U+223C
+
+ <tr>
+ <td> <code title="">spades</code>
+
+ <td> U+2660
+
+ <tr>
+ <td> <code title="">sub</code>
+
+ <td> U+2282
+
+ <tr>
+ <td> <code title="">sube</code>
+
+ <td> U+2286
+
+ <tr>
+ <td> <code title="">sum</code>
+
+ <td> U+2211
+
+ <tr>
+ <td> <code title="">sup</code>
+
+ <td> U+2283
+
+ <tr>
+ <td> <code title="">sup1</code>
+
+ <td> U+00B9
+
+ <tr>
+ <td> <code title="">sup2</code>
+
+ <td> U+00B2
+
+ <tr>
+ <td> <code title="">sup3</code>
+
+ <td> U+00B3
+
+ <tr>
+ <td> <code title="">supe</code>
+
+ <td> U+2287
+
+ <tr>
+ <td> <code title="">szlig</code>
+
+ <td> U+00DF
+
+ <tr>
+ <td> <code title="">tau</code>
+
+ <td> U+03C4
+
+ <tr>
+ <td> <code title="">there4</code>
+
+ <td> U+2234
+
+ <tr>
+ <td> <code title="">theta</code>
+
+ <td> U+03B8
+
+ <tr>
+ <td> <code title="">thetasym</code>
+
+ <td> U+03D1
+
+ <tr>
+ <td> <code title="">thinsp</code>
+
+ <td> U+2009
+
+ <tr>
+ <td> <code title="">thorn</code>
+
+ <td> U+00FE
+
+ <tr>
+ <td> <code title="">tilde</code>
+
+ <td> U+02DC
+
+ <tr>
+ <td> <code title="">times</code>
+
+ <td> U+00D7
+
+ <tr>
+ <td> <code title="">trade</code>
+
+ <td> U+2122
+
+ <tr>
+ <td> <code title="">TRADE</code>
+
+ <td> U+2122
+
+ <tr>
+ <td> <code title="">uArr</code>
+
+ <td> U+21D1
+
+ <tr>
+ <td> <code title="">uacute</code>
+
+ <td> U+00FA
+
+ <tr>
+ <td> <code title="">uarr</code>
+
+ <td> U+2191
+
+ <tr>
+ <td> <code title="">ucirc</code>
+
+ <td> U+00FB
+
+ <tr>
+ <td> <code title="">ugrave</code>
+
+ <td> U+00F9
+
+ <tr>
+ <td> <code title="">uml</code>
+
+ <td> U+00A8
+
+ <tr>
+ <td> <code title="">upsih</code>
+
+ <td> U+03D2
+
+ <tr>
+ <td> <code title="">upsilon</code>
+
+ <td> U+03C5
+
+ <tr>
+ <td> <code title="">uuml</code>
+
+ <td> U+00FC
+
+ <tr>
+ <td> <code title="">weierp</code>
+
+ <td> U+2118
+
+ <tr>
+ <td> <code title="">xi</code>
+
+ <td> U+03BE
+
+ <tr>
+ <td> <code title="">yacute</code>
+
+ <td> U+00FD
+
+ <tr>
+ <td> <code title="">yen</code>
+
+ <td> U+00A5
+
+ <tr>
+ <td> <code title="">yuml</code>
+
+ <td> U+00FF
+
+ <tr>
+ <td> <code title="">zeta</code>
+
+ <td> U+03B6
+
+ <tr>
+ <td> <code title="">zwj</code>
+
+ <td> U+200D
+
+ <tr>
+ <td> <code title="">zwnj</code>
+
+ <td> U+200C
+ </table>
+
+ <h2 id=wysiwyg><span class=secno>9. </span>WYSIWYG editors</h2>
+
+ <p><dfn id=wysiwyg1>WYSIWYG editors</dfn> are authoring tools with a
+ predominantly presentation-driven user interface.
+
+ <h3 id=presentational><span class=secno>9.1. </span>Presentational markup</h3>
+
+ <h4 id=wysiwyg0><span class=secno>9.1.1. </span><dfn id=wysiwyg2>WYSIWYG
+ signature</dfn></h4>
+
+ <p>WYSIWYG editors must include a <code><a href="#meta0">meta</a></code>
+ element in the <code><a href="#head">head</a></code> element whose <code
+ title=attr-meta-name><a href="#name">name</a></code> attribute has the
+ value <code title=meta-generator><a href="#generator">generator</a></code>
+ and whose <code title=attr-meta-content><a
+ href="#content0">content</a></code> attribute's value ends with the string
+ "<code title="">(WYSIWYG editor)</code>". Non-WYSIWYG authoring tools must
+ not include this string in their generator string.
+
+ <h4 id=the-font><span class=secno>9.1.2. </span>The <dfn
+ id=font><code>font</code></dfn> element</h4>
+
+ <p><a href="#transparent0">Transparent</a> <a href="#block-level0"
+ title="block-level elements">block-level element</a>, and <a
+ href="#transparent0">transparent</a> <a href="#strictly">strictly
+ inline-level content</a>.
+
+ <dl class=element>
+ <dt>Contexts in which this element may be used:
+
+ <dd>Where <span>block-level content</span> is allowed.
+
+ <dd>Where <a href="#strictly">strictly inline-level content</a> is
+ allowed.
+
+ <dt>Content model:
+
+ <dd><a href="#transparent0">Transparent</a>.
+
+ <dt>Element-specific attributes:</dt>
+ <!--
+ <dd><code title="attr-font-color">color</code></dd>
+ <dd><code title="attr-font-face">face</code></dd>
+ <dd><code title="attr-font-size">size</code></dd>-->
+
+ <dd><code title=attr-font-style><a href="#style0">style</a></code>
+
+ <dt>DOM interface:
+
+ <dd>
+ <pre
+ class=idl>interface <dfn id=htmlfontelement>HTMLFontElement</dfn> : <a href="#htmlelement">HTMLElement</a> {<!--
+ attribute DOMString <span title="dom-font-color">color</span>;
+ attribute DOMString <span title="dom-font-face">face</span>;
+ attribute DOMString <span title="dom-font-size">size</span>;-->
+ readonly attribute CSSStyleDeclaration <a href="#style1" title=dom-font-style>style</a>;
+};</pre>
+ </dl>
+
+ <p>The <code><a href="#font">font</a></code> element doesn't represent
+ anything. It must not be used except by <a href="#wysiwyg1">WYSIWYG
+ editors</a>, which may use it to achieve presentational affects. Even
+ WYSIWYG editors, however, should make every effort to use appropriate
+ semantic markup and avoid the use of media-specific presentational markup.
+
+ <p>Conformance checkers must consider this element to be non-conforming if
+ it is used on a page lacking the <a href="#wysiwyg2">WYSIWYG
+ signature</a>.
+
+ <p>A <code><a href="#font">font</a></code> element can only contain content
+ that would still be conformant if all elements with <a
+ href="#transparent0">transparent</a> content models were replaced by their
+ contents.
+
+ <div class=example>
+ <p>The following would be syntactically legal (as the output from a
+ WYSIWYG editor, though not anywhere else):</p>
+
+ <pre>&lt;!DOCTYPE HTML>
+&lt;html>
+ &lt;head>
+ &lt;title>&lt;/title>
+ &lt;meta name="generator" content="Sample Editor 1.0 <em>(WYSIWYG editor)</em>">
+ &lt;/head>
+ &lt;body>
+ &lt;font style="display: block; border: solid">
+ &lt;h1>Hello.&lt;/h1>
+ &lt;/font>
+ &lt;p>
+ &lt;font style="color: orange; background: white">How&lt;/font>
+ &lt;font style="color: yellow; background: white">do&lt;/font>
+ &lt;font style="color: green; background: white"><em>&lt;em></em>you<em>&lt;/em></em>&lt;/font>
+ &lt;font style="color: blue; background: white">do?&lt;/font>
+ &lt;/p>
+ &lt;/body>
+&lt;/html></pre>
+
+ <p>The first <code><a href="#font">font</a></code> element is conformant
+ because <code><a href="#h1">h1</a></code> and <code><a
+ href="#p">p</a></code> elements are both allowed in <code><a
+ href="#body0">body</a></code> elements. the next four are allowed because
+ text and <code><a href="#em">em</a></code> elements are allowed in
+ <code><a href="#p">p</a></code> elements.</p>
+ </div>
+
+ <p>The <dfn id=style0 title=attr-font-style><code>style</code></dfn>
+ attribute, if specified, must contain only a list of zero or more
+ semicolon-separated (;) CSS declarations. <a href="#refsCSS21">[CSS21]</a></p>
+ <!-- XXX deal with each of the use cases in this:
+ http://lists.w3.org/Archives/Public/www-html/2003Jan/0277.html -->
+
+ <p>The declarations specified must be parsed and treated as the body of a
+ declaration block whose selector matches just that <code><a
+ href="#font">font</a></code> element. For the purposes of the CSS cascade,
+ the attribute must be considered to be a 'style' attribute at the author
+ level.
+
+ <p>The <dfn id=style1 title=dom-font-style><code>style</code></dfn> DOM
+ attribute must return a <code>CSSStyleDeclaration</code> whose value
+ represents the declarations specified in the attribute, if present.
+ Mutating the <code>CSSStyleDeclaration</code> object must create a <code
+ title=attr-font-style><a href="#style0">style</a></code> attribute on the
+ element (if there isn't one already) and then change its value to be a
+ value representing the serialised form of the
+ <code>CSSStyleDeclaration</code> object. <a href="#refsCSSOM">[CSSOM]</a>
+
+ <h2 id=rendering><span class=secno>10. </span>Rendering</h2>
+
+ <p class=big-issue>This section will probably include details on how to
+ render DATAGRID (including <span id=datagridPseudos>its
+ pseudo-elements</span>), drag-and-drop, etc, in a visual medium, in
+ concert with CSS. Terms that need to be defined include: <dfn
+ id=sizing>sizing of embedded content</dfn>
+
+ <p>CSS UAs in visual media must, when scrolling a page to a fragment
+ identifier, align the top of the viewport with the target element's top
+ border edge.</p>
+ <!-- XXX horiz pos given bidi, and not
+ scrolling when not required to? -->
+ <!-- Elements that have been dropped: ACRONYM APPLET B BASEFONT BLINK
+BIG CENTER DIR DIV FONT FRAME FRAMESET I ISINDEX MARQUEE NOEMBED
+NOFRAMES S SPACER STRIKE TT U -->
+ <!-- XXX bits and pieces that were removed from the semantic parts:
+
+ <p>In CSS-aware user agents, the default presentation of this
+ element should be achieved by including the following rules, or
+ their equivalent, in the UA's user agent style sheet:</p>
+
+ <pre>@namespace xh url(http://www.w3.org/1999/xhtml);
+xh|section { display: block; margin: 1em 0; }</pre>
+
+ <h4>Section headers</h4>
+
+ <p>For <code>h1</code> elements, CSS-aware visual user agents should
+ derive the size of the header from the level of <code>section</code>
+ nesting. This effect should be achieved by including the following
+ rules, or their equivalent, in the UA's user agent style sheet:</p>
+
+ <pre>@namespace xh url(http://www.w3.org/1999/xhtml);
+xh|section xh|h1 { /* same styles as h2 */ }
+xh|section xh|section xh|h1 { /* same styles as h4 */ }
+xh|section xh|section xh|section xh|h1 { /* same styles as h4 */ }
+xh|section xh|section xh|section xh|section xh|h1 { /* same styles as h5 */ }
+xh|section xh|section xh|section xh|section xh|section xh|h1 { /* same styles as h6 */ }</pre>
+
+ <p>Authors should use <code>h1</code> elements to denote headers in
+ sections. Authors may instead use <code>h2</code> ...
+ <code>h6</code> elements, for backwards compatibility with user
+ agents that do not support <code>section</code> elements.</p>
+
+-->
+
+ <p class=big-issue> must define letting the user <dfn id=obtain>obtain a
+ physical form</dfn> of a document (printing) and what this means for the
+ UA
+
+ <h3 id=rendering0><span class=secno>10.1. </span>Rendering and the DOM</h3>
+
+ <p class=big-issue>This section is wrong. mediaMode will end up on Window,
+ I think. All views implement Window.
+
+ <p>Any object implement the <code>AbstractView</code> interface must also
+ implement the <code><a
+ href="#mediamodeabstractview">MediaModeAbstractView</a></code> interface.
+
+ <pre
+ class=idl>interface <dfn id=mediamodeabstractview>MediaModeAbstractView</dfn> {
+ readonly attribute DOMString <a href="#mediamode">mediaMode</a>;
+};</pre>
+
+ <p>The <dfn id=mediamode><code>mediaMode</code></dfn> attribute on objects
+ implementing the <code><a
+ href="#mediamodeabstractview">MediaModeAbstractView</a></code> interface
+ must return the string that represents the canvas' current rendering mode
+ (<code>screen</code>, <code>print</code>, etc). This is a lowercase
+ string, as <a
+ href="http://www.w3.org/TR/CSS21/media.html#media-types">defined by the
+ CSS specification</a>. <a href="#refsCSS21">[CSS21]</a>
+
+ <p>Some user agents may support multiple media, in which case there will
+ exist multiple objects implementing the <code>AbstractView</code>
+ interface. Only the default view implements the <code><a
+ href="#window">Window</a></code> interface. The other views can be reached
+ using the <code><a href="#view">view</a></code> attribute of the
+ <code>UIEvent</code> inteface, during event propagation. There is no way
+ currently to enumerate all the views.</p>
+ <!-- XXX examples! -->
+
+ <h2 id=no><span class=secno>11. </span>Things that you can't do with this
+ specification because they are better handled using other technologies
+ that are further described herein</h2>
+
+ <p><em>This section is non-normative.</em>
+
+ <p>There are certain features that are not handled by this specification
+ because a client side markup language is not the right level for them, or
+ because the features exist in other languages that can be integrated into
+ this one. This section covers some of the more common requests.
+
+ <h3 id=localisation><span class=secno>11.1. </span>Localisation</h3>
+
+ <p>If you wish to create localised versions of an HTML application, the
+ best solution is to preprocess the files on the server, and then use HTTP
+ content negotation to serve the appropriate language.</p>
+ <!-- <p>XXX example here</p> -->
+
+ <h3 id=declarative><span class=secno>11.2. </span>Declarative 2D vector
+ graphics and animation</h3>
+
+ <p>Embedding vector graphics into XHTML documents is the domain of SVG.</p>
+ <!-- <p>XXX example here</p> -->
+
+ <h3 id=declarative0><span class=secno>11.3. </span>Declarative 3D scenes</h3>
+
+ <p>Embedding 3D imagery into XHTML documents is the domain of X3D, or
+ technologies based on X3D that are namespace-aware.</p>
+ <!-- <p>XXX example here</p> -->
+
+ <h3 id=timers><span class=secno>11.4. </span>Timers</h3>
+
+ <p>This section is expected to be moved to the Window Object specification
+ in due course.
+
+ <pre class=idl>
+interface <dfn id=windowtimers>WindowTimers</dfn> {
+ // timers
+ long <a href="#settimeout">setTimeout</a>(in <a href="#timeouthandler">TimeoutHandler</a> handler, in long timeout);
+ long <a href="#settimeout">setTimeout</a>(in <a href="#timeouthandler">TimeoutHandler</a> handler, in long timeout, <var title="">arguments...</var>);
+ long <a href="#settimeout">setTimeout</a>(in DOMString code, in long timeout);
+ long <a href="#settimeout">setTimeout</a>(in DOMString code, in long timeout, in DOMString language);
+ void <a href="#cleartimeout">clearTimeout</a>(in long handle);
+ long <a href="#setinterval...">setInterval</a>(in <a href="#timeouthandler">TimeoutHandler</a> handler, in long timeout);
+ long <a href="#setinterval...">setInterval</a>(in <a href="#timeouthandler">TimeoutHandler</a> handler, in long timeout, <var title="">arguments...</var>);
+ long <a href="#setinterval...">setInterval</a>(in DOMString code, in long timeout);
+ long <a href="#setinterval...">setInterval</a>(in DOMString code, in long timeout, in DOMString language);
+ void <a href="#clearinterval">clearInterval</a>(in long handle);
+};
+
+interface <dfn id=timeouthandler>TimeoutHandler</dfn> {
+ void handleEvent(<var title="">arguments...</var>);
+};
+</pre>
+
+ <p>The <code><a href="#windowtimers">WindowTimers</a></code> interface must
+ be obtainable from any <code><a href="#window">Window</a></code> object
+ using binding-specific casting methods.
+
+ <p>The <code><a href="#settimeout">setTimeout</a></code> and <code><a
+ href="#setinterval...">setInterval</a></code> methods allow authors to
+ schedule timer-based events.
+
+ <p>The <dfn id=settimeout title=setTimeout><code>setTimeout(<var
+ title="">handler</var>, <var title="">timeout</var>[, <var
+ title="">arguments...</var>])</code></dfn> method takes a reference to a
+ <code><a href="#timeouthandler">TimeoutHandler</a></code> object and a
+ length of time in milliseconds. It must return a handle to the timeout
+ created, and then asynchronously wait <var title="">timeout</var>
+ milliseconds and then invoke <code>handleEvent()</code> on the <var
+ title="">handler</var> object. If any <var title="">arguments...</var>
+ were provided, they must be passed to the <var title="">handler</var> as
+ arguments to the <code>handleEvent()</code> function.
+
+ <p>In the ECMAScript DOM binding, the ECMAScript native
+ <code>Function</code> type must implement the <code><a
+ href="#timeouthandler">TimeoutHandler</a></code> interface such that
+ invoking the <code>handleEvent()</code> method of that interface on the
+ object from another language binding invokes the function itself, with the
+ arguments passed to <code>handleEvent()</code> as the arguments passed to
+ the function. In the ECMAScript DOM binding itself, however, the
+ <code>handleEvent()</code> method of the interface is not directly
+ accessible on <code>Function</code> objects. Such functions must be called
+ in the scope of the <a href="#browsing0">browsing context</a> in which
+ they were created.
+
+ <p>Alternatively, <dfn id=settimeout0 title=""><code>setTimeout(<var
+ title="">code</var>, <var title="">timeout</var>[, <var
+ title="">language</var>])</code></dfn> may be used. This variant takes a
+ string instead of a <code><a
+ href="#timeouthandler">TimeoutHandler</a></code> object. That string must
+ be parsed using the specified <var title="">language</var> (defaulting to
+ ECMAScript if the third argument is omitted) and executed in the scope of
+ the <a href="#browsing0">browsing context</a> associated with the <code><a
+ href="#window">Window</a></code> object on which the <code
+ title=setTimeout><a href="#settimeout">setTimeout()</a></code> method was
+ invoked.
+
+ <p class=big-issue>Need to define <var title="">language</var> values.
+
+ <p>The <dfn id=setinterval...><code>setInterval(...)</code></dfn> variants
+ must work in the same way as the <code><a
+ href="#settimeout">setTimeout</a></code> variants except that the <var
+ title="">handler</var> or <code><a href="#code">code</a></code> must be
+ invoked again every <var title="">timeout</var> milliseconds, not just the
+ once.
+
+ <p>The <dfn id=cleartimeout><code>clearTimeout()</code></dfn> and <dfn
+ id=clearinterval><code>clearInterval()</code></dfn> methods take one
+ integer (the value returned by <code><a
+ href="#settimeout">setTimeout</a></code> and <code><a
+ href="#setinterval...">setInterval</a></code> respectively) and must
+ cancel the specified timeout. When called with a value that does not
+ correspond to an active timeout or interval, the methods must return
+ without doing anything.
+
+ <p>Timeouts must never fire while another script is executing. (Thus the
+ HTML scripting model is strictly single-threaded and not reentrant.)
+
+ <h3 id=events2><span class=secno>11.5. </span>Events</h3>
+
+ <p id=js-function-listener>In the ECMAScript DOM binding, the ECMAScript
+ native <code>Function</code> type must implement the
+ <code>EventListener</code> interface such that invoking the
+ <code>handleEvent()</code> method of that interface on the object from
+ another language binding invokes the function itself, with the
+ <code>event</code> argument as its only argument. In the ECMAScript
+ binding itself, however, the <code>handleEvent()</code> method of the
+ interface is not directly accessible on <code>Function</code> objects.
+ Such functions, when invoked, must be called in the scope of the <a
+ href="#browsing0">browsing context</a> that they were created in.
+
+ <h2 class=no-num id=references>References</h2>
+
+ <p class=big-issue>This section will be written in a future
+ draft.<!--XXX-->
+
+ <h2 class=no-num id=acknowledgements>Acknowledgements</h2>
+
+ <p>Thanks to Aankhen, Aaron Leventhal, Adrian Sutton, Alexey Feldgendler,
+ Andrew Gove, Anne van Kesteren, Anthony Hickson, Asbj&oslash;rn Ulsberg,
+ Ben Godfrey, Ben Meadowcroft, Benjamin Hawkes-Lewis, Bert Bos, Bjoern
+ Hoehrmann, Boris Zbarsky, Brad Fults, Brad Neuberg, Brendan Eich, Brett
+ Wilson, Carlos Perell&oacute; Mar&iacute;n, Chao Cai, Channy Yun, Charl
+ van Niekerk<!--status.whatwg.org maintainer-->, Charles Iliya Krempeaux,
+ Charles McCathieNevile, Christian Biesinger, Christian Johansen, Chriswa,
+ Daniel Peng, Daniel Sp&aring;ng, Darin Alder, Darin Fisher, Dave
+ Townsend<!-- Mossop on moz irc -->, David Baron, David Flanagan, David
+ H&aring;s&auml;ther, David Hyatt, Derek Featherstone, Dimitri Glazkov,
+ dolphinling, Doron Rosenberg, Eira Monstad, Elliotte Harold, Erik
+ Arvidsson, fantasai, Franck 'Shift' Qu&eacute;lain, Geoffrey Sneddon,
+ H&aring;kon Wium Lie, Henri Sivonen, Henrik Lied, Ignacio Javier, J. King,
+ James Graham, James M Snell, James Perrett, Jan-Klaas Kollhof, Jasper
+ Bryant-Greene, Jeff Cutsinger, Jens Bannmann, Joel Spolsky, John Harding,
+ Johnny Stenback, Jon Perlow, Jonathan Worent, Jorgen Horstink, Josh
+ Levenberg, Joshua Randall, Jukka K. Korpela, Kai Hendry, <!-- Keryx
+ Web, = Lars Gunther -->
+ Kornel Lesinski, &#x9ed2;&#x6fa4;&#x525b;&#x5fd7; (KUROSAWA Takeshi),
+ Kristof Zelechovski, Lachlan Hunt, Larry Page, Lars Gunther, Laurens
+ Holst, Lenny Domnitser, L&eacute;onard Bouchet, Leons Petrazickis,
+ Logan<!-- on moz irc -->, Maciej Stachowiak, Malcolm Rowe, Mark
+ Nottingham, Mark Schenk, Martijn Wargers, Martin Atkins, Martin Honnen,
+ Mathieu Henri, Matthew Mastracci, Matthew Raymond, Matthew Thomas, Mattias
+ Waldau, Max Romantschuk, Michael 'Ratt' Iannarelli, Michael A. Nachbaur,
+ Michael A. Puls II<!--Shadow2531-->, Michael Gratton, Michael Powers,
+ Michel Fortin, Mihai &#x015E;ucan<!-- from
+ ROBO Design -->, Mike
+ Dierken<!-- S. Mike Dierken -->, Mike Dixon, Mike Schinkel, Mike Shaver,
+ Mikko Rantalainen, Neil Deakin, Olav Junker Kj&aelig;r, Philip Taylor,
+ Rajas Moonka, Rimantas Liubertas, Robert O'Callahan, Robert Sayre, Roman
+ Ivanov, S. Mike Dierken, Sam Ruby, Sean Knapp, Shaun Inman, Simon Pieters,
+ Stefan Haustein, Stephen Ma, Steve Runyon, Steven Garrity, Stewart Brodie,
+ Stuart Parmenter, Tantek &Ccedil;elik, Thomas Broyer, Thomas O'Connor, Tim
+ Altman, Vladimir Vuki&#x0107;evi&#x0107;, Wakaba, William Swanson, and
+ everyone on the WHATWG mailing list for their useful and substantial
+ comments.
+
+ <p>Special thanks to Richard Williamson for creating the first
+ implementation of <code><a href="#canvas">canvas</a></code> in Safari,
+ from which the canvas feature was designed.
+
+ <p>Special thanks also to the Microsoft employees who first implemented the
+ event-based drag-and-drop mechanism, <code title=attr-contenteditable><a
+ href="#contenteditable0">contenteditable</a></code>, and other features
+ first widely deployed by the Windows Internet Explorer browser.
+
+ <p>Special thanks and $10,000 to David Hyatt who came up with a broken
+ implementation of the <a href="#adoptionAgency">adoption agency
+ algorithm</a> that the editor had to reverse engineer and fix before using
+ it in the parsing section.
+
+ <p>Thanks also the Microsoft blogging community for some ideas, to the
+ attendees of the W3C Workshop on Web Applications and Compound Documents
+ for inspiration, and to the #mrt crew, the #mrt.no crew, and the cabal for
+ their ideas and support.</p>
+ <!-- Hopefully Kam won't notice he's covered by these
+ acknowledgements three times! -->
+ <!--
+ ! menus
+
+<hyatt> the ability to get the current focused window in a window hierarchy
+
+wizards
+tabbed interface
+
+Application object? http://longhorn.msdn.microsoft.com/lhsdk/ref/ns/msavalon.windows/c/application/application.aspx
+
+<input type="text" menu="foo" icon="g.png"/> <menu id="foo"> <menuitem icon="g.png" onclick="engine('yahoo')">Yahoo</menuitem> ... </menu>
+
+> One more aspect I want you think about - for "user interface systems" in
+> general: The windowing system.
+> Different kinds of windows ("document", "browser (file-system/network or
+> otherwise)", "palette", "application modal dialog", "system modal dialog"),
+> the rules for layering them (appropriately flexible to allow different
+> implementations, e.g. MacOS vs. X-Windows), and simplifications for handheld
+> devices (which are sometimes single window devices anyway, but sometimes
+> they are one "normal" window plus sometimes one "dialog" window on top.
+
+window.open for dialogs
+
+
+Thus, they lack things like proper windows, tree
+widgets, menu bars, rich text areas and so forth. This is what I would
+like XUL to solve. - Paul Prescod
+
+
+
+
+Olav:
+> <product> to indicate something you can buy, like a cd on amazon
+> or a biker jacket at harleydavidson.com
+
+
+Drop downs often have a title for when there is no selection.
+
+http://www.w3.org/mid/BAY1-F150PNOkJvX41K000418e1@hotmail.com
+
+http://crew.tweakers.net/crisp/newlayout/index.html
+http://crew.tweakers.net/crisp/newlayout/list_topics.html
+http://crew.tweakers.net/crisp/newlayout/list_messages.html
+http://crew.tweakers.net/crisp/newlayout/list_messages_mod.html
+
+http://mail.mozilla.org/private/gui-toolkit/2004-April/000041.html
+
+> > > A standard for rich edit widgets would also be of interest to me.
+> >
+> > As in WYSIWIG editing? Of the bold/italic/underline/larger/smaller kind?
+> >
+> > Or do you mean as in the bare bones to be able to build an editor on top
+> > of? As in something that basically just gives you a cursor and the ability
+> > to tell where the selection is and some way to hook into the Undo
+> > functionality?
+>
+> I have use cases for both...I have a more desperate business need for
+> the latter (and have build apps using the gross APIs out there today)
+> but there are a lot of circumstances where an editor that already has
+> all standard HTML editing behaviour would be fine.
+>
+- Paul Prescod
+
+ * a way of selecting rows, so that you can declaratively have buttons
+ outside the template that move a "selected row" around. => web apps
+
+
+Calendar with list of days that have events (think: blog calendars)
+
+Find object at (x,y)
+Find mouse position
+
+Styling scrollbars:
+
+ ::scrollbars { ... }
+
+
+table of contents?
+
+
+http://www.gadgetopia.com/2004/05/04/FileIconTag.html
+
+
+on going back
+on going forward
+on came from back
+on came from forward
+better state serialisation for going back/forward
+
+
+some sort of markup to tell google _not_ to index a particular part of the site
+
+drop down menu with URIs to replace the silly <select> hacks.
+
+http://www.cs.tut.fi/~jkorpela/html/em.html
+
+<htmlarea>, <xmlarea>...
+
+> 3) Extensible syntax highlighting (coloring). I am aware that a ton of
+> code editors don't even do this well. The ability to load a syntax
+> definition file and have it color a block of code would do wonders for
+> making the web a more friendly place to script.
+ - Ryan Johnson
+
+toolbars, status bars. - Didier PH Martin
+
+ * methods/properties for scrolling managing, especially in
+ TextArea, such as .scrollTop and .scrollHeight in Mozilla and -
+
+
+Robert Bateman:
+
+ I've looked thru as many of the examples from around the web as I can find and
+ don't see an obvious way to do date calculations.
+
+ What I'm trying to do is populate an xsd:date field with now() plus 5 days as
+ it's default value. The field in question is a proposed "due date" for a
+ work order.
+
+ I've seen that I can get a "difference" between two dates, but no
+ calculations.
+
+
+Wladimir Palant pointed out problems with chunking with server-sent-events
+
+
+
+> 2. Some method of integration to allow Web apps to respond
+> to the browser's Cut, Copy, Paste, and Select All menu
+> items and keyboard equivalents. These work automatically
+> for text fields in any Web application; it would be
+> great if apps could make them work for stocks, address
+> book cards, message attachments, transactions, photos,
+> and so on too.
+>
+> I'd add Undo and Redo to that list, but unfortunately
+> IE6 doesn't have Undo and Redo menu items.
+ - mpt
+
+
+
+>> maybe except for the server sent events and the clipboard
+>> api (but even in those cases it might be possible).
+>
+> Clipboard API I don't really want to see, given the problems IE's
+> implementation of such brought up. A better standardised drag-and-drop
+> interface would be great though, as doing it with mouse events and IE's
+> proprietary events is annoying-to-impossible to get right all the time.
+>
+> I really like the idea of server-side events, though I would prefer to
+> have them set up by a scripting call rather than an HTML element.
+ - Andrew Clover
+
+
+ <html application="application">
+
+...would, instead of showing the Web page itself, the first time, show
+(inline in the browser):
+
+ :::: Security Warning :::::::::::::::::::::::::::::::::::::
+ :: ::
+ :: The Web page at this domain: ::
+ :: ':
+ :: paypcl.com
+ ::
+ :: ...wishes to launch an application in a separate
+ :: window. Do you trust this domain?
+ ::
+ :: [x] Remember this decision.
+ ::
+ :: (( Trust paypcl.com )) ( Display as Web page )
+ ::
+ :::::.
+- (spurred on by Jose Dinuncio)
+
+
+ :::: Security Warning :::::::::::::::::::::::::::::::::::::
+ :: ::
+ :: This Web page wishes to launch an application in a ::
+ :: separate window. Do you trust this domain? ::
+ :: ::
+ :: paypcl.com '
+ ::
+ :: ( Trust this site for now )
+ ::
+ :: ( Always trust this site )
+ ::
+ :: (( Display as Web page ))
+ ::
+ :::::.
+
+
+breadcrumb navigation markup
+other markup:
+ http://www.stuffandnonsense.co.uk/archives/whats_in_a_name_pt2.html
+
+
+common icons: http://www.intersmash.com/300images/
+
+
+http://www.gadgetopia.com/2004/06/18/DoYouWantToSaveYourChanges.html#Comments
+
+http://www.mojavelinux.com/cooker/demos/domTT/index.html
+
+http://www.mozilla.org/projects/ui/accessibility/dynamic-accessibility.html
+
+http://blog.colorstudy.com/ianb/weblog/2004/06/23.html
+http://daringfireball.net/2004/06/location_field
+
+listview/gridview API
+http://www.activewidgets.com/grid/
+
+
+> I would rather have it that changing the dom attribute 'value' or typing
+> in the textarea, would also change the contents of the textnode in the
+> textarea.
+>
+> In that way the dom level 2 traversal and range specification would not
+> be useless for textarea's.
+>
+> The same goes for input text controls and probably also for other form
+> controls.
+ - martijnw
+
+
+
+1. point to an xml instance and cause the page to be filled in
+2. serialise the site to a version of that xml instance
+
+<menu>
+ <li state="bar"/>
+</menu>
+<button state="bar"/>
+<state id="bar" label="" disabled="" value=""/>
+
+<input state="foo"/>
+<input state="foo"/>
+<input state="foo 2"/>
+<input state="foo 2"/>
+<state id="foo" model="x" ref="cat:orderLine[$v1]/cat:foo"/>
+
+<instance src=""/>
+
+<instance id="x">
+
+<Order xmlns="urn:oasis:names:tc:ubl:Order:1.0:0.70" xmlns:cat="urn:oasis:names:tc:ubl:CommonAggregateTypes:1.0:0.70">
+ <cat:ID/>
+ <cat:IssueDate/>
+ <cat:LineExtensionTotalAmount currencyID="USD"/>
+ <cat:BuyerParty>
+ <cat:ID/>
+ <cat:PartyName>
+ <cat:Name/>
+ </cat:PartyName>
+ <cat:Address>
+ <cat:ID/>
+ <cat:Street/>
+ <cat:CityName/>
+ <cat:PostalZone/>
+ <cat:CountrySub-Entity/>
+ </cat:Address>
+ <cat:BuyerContact>
+ <cat:ID/>
+ <cat:Name/>
+ </cat:BuyerContact>
+ </cat:BuyerParty>
+ <cat:SellerParty>
+ <cat:ID/>
+ <cat:PartyName>
+ <cat:Name/>
+ </cat:PartyName>
+ <cat:Address>
+ <cat:ID/>
+ <cat:Street/>
+ <cat:CityName/>
+ <cat:CountrySub-Entity/>
+ </cat:Address>
+ </cat:SellerParty>
+ <cat:DeliveryTerms>
+ <cat:ID/>
+ <cat:SpecialTerms/>
+ </cat:DeliveryTerms>
+ <cat:OrderLine>
+ <cat:BuyersID/>
+ <cat:SellersID/>
+ <cat:LineExtensionAmount currencyID=""/>
+ <cat:Quantity unitCode="">1</cat:Quantity>
+ <cat:Item>
+ <cat:ID/>
+ <cat:Description>Enter description here</cat:Description>
+ <cat:SellersItemIdentification>
+ <cat:ID>Enter part number here</cat:ID>
+ </cat:SellersItemIdentification>
+ <cat:BasePrice>
+ <cat:PriceAmount currencyID="">0.00</cat:PriceAmount>
+ </cat:BasePrice>
+ </cat:Item>
+ </cat:OrderLine>
+</Order>
+
+</instance>
+
+
+
+
+ <h2>Tree and List Widgets</h2>
+ click item to go uri
+ doube click to submit form with value
+ sort list by any column
+ specify column headers, column sort types
+ specify data inline, or out of band
+ data can be linear or a one way tree
+ rows can have an icon associated with them
+ rows can have overlays associated with them
+
+ progress meter
+
+ http://www.gazingus.org/html/menuDropdown.html
+
+
+
+Disclosure triangles
+
+
+I think UAs should automatically highlight the accesskey (or add it in
+brackets if it isn't already in the string). I am thinking of writing some
+text - optional, of course, since this wouldn't apply to all UAs or all
+platforms - that specifies this.
+
+I also think that there should be an accesskey value which is basically
+"auto", and which picks a non-clashing access key based on the element
+content.
+
+
+
+| adding HTTP authentication capabilities to HTML allow sites to:
+| - remove a site's authentication state from the browser when
+| activated (i.e., a "log out" interface)
+| - add user data to a site's authentication state in the browser
+| (i.e., "log on" interfaces)
+| - display the user's current authentication state
+|
+| There are a few good reasons to do this. Many sites use cookies to
+| authenticate users, because HTTP authentication doesn't have any
+| mechanism to allow logging out (a key requirement of financial
+| institutions and other sensitive applications), and because the UI for
+| HTTP authentication can't be controlled, and doesn't offer an
+| "anyonymous" / "not logged in" view.
+|
+| By accommodating HTTP authentication in Web forms, it will be possible
+| to have styled, custom "log on" interfaces as part of pages, as well
+| as "log out" facilities, while still retaining the benefits of HTTP
+| authentication.
+|
+| Specifically, HTTP authentication is more secure than cookies (when
+| Digest auth is used), and is more amenable to automated processes
+| (agents, spiders, etc.) as well as alternate browsing devices (screen
+| readers, etc.).
+
+
+http://jogin.com/weblog/archives/2004/07/19/hierarchy
+
+
+Yeah, <header> and <footer> or similar elements are almost certainly going
+to be defined at some point, along with <content> (for the main body of
+the page), <entry> or <post> or <article> to refer to a unit of text
+bigger than a section but smaller than a page, <aside> to mean a
+side bar, <note> to mean a note... and so forth. Suggestions welcome.
+We'll probably keep it to a minimum though. The idea is just to relieve
+the most common pseudo-semantic uses of <div>.
+
+
+http://lxr.mozilla.org/seamonkey/source/dom/public/idl/base/nsIDOMWindow.idl
+scrollBy, etc
+http://lxr.mozilla.org/seamonkey/source/dom/public/idl/base/nsIDOMWindowInternal.idl
+DOM level 0
+
+
+DH: I was arguing that you should be able to get the CSS info for
+document fragments if you had an owner document with CSS in it.
+
+
+
+So maybe:
+
+ var library = new ZipFile("data.zip");
+ library.onload = function() {
+ var sound1 = library.getAudio("sound1.wav"); // returns an Audio object
+ var image1 = library.getImage("image1.png"); // returns an HTMLImageElement
+ var doc1 = library.getXMLDocument("doc1.xml"); // returns a Document
+ var doc2 = library.getHTMLDocument("doc1.html"); // returns an HTMLDocument
+ }
+
+Also maybe supporting more than one file at a time:
+
+ var library = new ResourceLoader("data.zip");
+ library.add("moredata.zip");
+ library.onload = function() { ... }
+ library.onloading = function() {
+ reportLoadProgress(library.progress); // 0.0 .. 1.0
+ }
+
+...although I'm not sure how we would then deal with filename clashes.
+
+ var library = new AudioZip("sounds.zip");
+ library.onload = function() {
+ var sound1 = library["sound1.wav"];
+ sound.play();
+ }
+
+
+If we define onbeforeunload, then we have to say that the JS
+implementation of EventListener::handleEvent checks for that event,
+treating the return value as the string to use for the unload warning
+message if there is a return value, and calling preventDefault if
+there isn't.
+
+
+> > > Schematic editors, layout editors, interactive maps, data
+> > > visualization for network flows, etc.
+> Searching the web for the above keywords should find you a lot more.
+ - Denis Bohm
+
+
+
+Jens Meiert:
+- For non-native English speakers, it's sometimes difficult to understand
+the difference between <cite /> and <quote />, since citations often seem to
+be quoted, too (this is a presentational aspect, I know).
+- Is it right that the <dfn /> element [1] /must/ be used only in the
+context of the definition of the enclosed term (as the example suggests)?
+(If so, wouldn't it be useful to add this note, too?)
+
+
+http://secunia.com/advisories/9711/
+In particular number 7. - Chris Hofmann
+
+
+> [1] http://www.stopdesign.com/log/2004/08/25/microsoft-advances.html
+> [2] http://www.stopdesign.com/articles/throwing_tables/
+
+
+In other areas, however, the replacement is not a match in terms of functionality. Like it or not, but showModalDialog is a better way to provide feature-rich user feedback windows than window.confirm (which Firefox supports, even though there is NO PUBLIC STANDARD for it). With showModalDialog, I can pop a window offering "Yes," "No," or "Cancel" buttons that requires a response before proceeding. With window.confirm, I have to craft all my questions as something to which "OK" or "Cancel" makes sense, never mind asking for three, four, or five state responses.
+- http://news.zdnet.com/2100-9588-5438955.html ( John Carroll )
+
+
+
+> http://channel9.msdn.com/wiki/default.aspx/Channel9.InternetExplorerFeatureRequests
+> Alternate way of caching content
+> Avalon Integration
+> getData/setData improvement (clipboardData)
+> Input type=file improvements
+> HTML editing: Editing Tables
+> Input type=file improvements
+> .NET framework
+>
+> http://channel9.msdn.com/wiki/default.aspx/Channel9.InternetExplorerOutrageous
+> Some decent controls
+ - lachlan.hunt@lachy.id.au
+
+
+http://lists.w3.org/Archives/Member/w3c-html-wg/2004JulSep/att-0135/role072704a.html
+
+
+> I've encountered two situations where setting or retrieving the caret
+> position would be useful. The first is a situation where I'd like to
+> apply an input mask to a text box. For example, I'd like the ability to
+> create a text box where the date delimiters (dashes or slashes) appear
+> automatically in a text box upon entering the field, and when the user
+> types in the field, it fills into the appropriate spaces in the input
+> box and sets the text selection to the next appropriate position, all
+> while allowing the user to reposition the cursor within the text box
+> with a keyboard or mouse without being able to edit or delete the
+> delimiters (dashes or slashes). This would be very similar to input
+> mask features in certain native apps that I've used.
+ - Greg Kilwein
+
+> The second situation is an application that would like to highlight text
+> in a text box or textarea for the purposes of a spell check, thesaurus,
+> or search-and-replace operation.
+ - Greg Kilwein
+
+
+
+
+HTMLImageElement.click(x, y); (for Csaba Gabor)
+or clickPoint, if click() can't be done in IE
+can this be emulated in IE by posting a synthetic moue click event
+with those X and Y coords?
+
+
+<menulabel>, or rather menus in general, need an icon attribute and a
+hide attribute, like the <command> element.
+
+
+What about safe clipboard access.
+As discussed before by others as well:
+The user initiates a paste action as recognized by the UISystem the user is working in.
+E.g pressing Ctrl-V or selecting paste from a context menu.
+An event is fired and a Listener can now access the pasted data as part of the event object.
+The same for cut and copy. The Listner can set data as part of the event object.
+This is safe and will not allow any script to mess with the clipboard without the user specifically asking for it
+by initiating a cut/copy/paste action.
+ - Jan-Klaas Kollhof
+
+
+Need to say that NodeList's items are enumerable, so that for (var x in myNodeList) { } works.
+ thank Dethe Elza
+
+rel="" on submit buttons?
+
+what does <label> _mean_? how about an empty one, one which contains
+more than one control, no controls?
+
+
+data: URIs and same-origin policy when navigated to from http:?
+ - Hallvord Reiar Michaelsen Steen
+
+
+need conformance section for editors, which says stuff like "can't be
+conforming if editor has an "italics" button"
+
+people want multiline tooltips with explicit line breaks
+
+
+
+attributes of type ID that have no value beyond the empty string do
+not give the element an ID of "".
+
+
+ability for a web app to save a file to the local disk:
+ var file = window.openFile(); // throws up UI
+ file.read();
+ var file = window.saveAsFile(); // throws up UI
+ file.write();
+...or something? Or use data: URIs and right-click-to-save?
+
+http://lxr.mozilla.org/mozilla/source/dom/public/idl/html/nsIDOMNSHTMLDocument.idl
+
+
+ <p><em>This section is non-normative.</em></p>
+
+
+how to handle 404s and 500s and other non-OK responses when it comes
+to <script>, <link>, <style>, etc.
+
+
+normative classes:
+ -example
+ -note
+ -warning
+ -issue
+ hCard, hCalendar
+ wiki based registration, first come first served
+ * class:
+ * applies to elements:
+ * processing model:
+ * status:
+
+
+<Hixie> vlad: you should define what the UA should do with out-of-order aDATs
+<pav> its an error
+<pav> pretty sure we say that somewhere
+<Hixie> yes i know it's an error
+<Hixie> but that doesn't say what the UA should do
+<pav> error == image is invalid
+<vlad> yep
+<vlad> either broken image icon
+<vlad> or display first frame (fall back to normal PNG)
+<vlad> up to the UA
+<Hixie> right
+<Hixie> you should say which one
+<pav> its up to the UA
+<Hixie> why?
+<vlad> "SHOULD display the first frame, but MAY display broken image icon if that's not convenient", in rfc parlance
+<vlad> because it's not useful to specify that, IMO
+<Hixie> up to the UA means one UA will implement something, it'll become a popular UA, then all the others will have to copy it.
+<vlad> how a UA wants to handle image errors is up to the UA
+<pav> we're designing an image format, not the html image tag
+<pav> the html spec should say what to do with it
+
+
+should we say that elements in HTML must be lowercase? (but with error
+handling for uppercase tags, obviously)? If so, update examples.
+
+<title> is for out of context headers
+<h1> is for in-context headers
+
+The parsing rules of HTML
+
+media="" is case-insensitive
+case-sensitivity of other attributes, and what it means
+
+empty title attribute is equivalent to missing attribute for purposes
+of alternate style sheet processing
+
+
+<p>s that contain <ul><ol><table><dl><blockquote>? (did we get all those?)
+
+
+
+> I'd like search engines to be able to show me the title of a page in the
+> same consistent position in a search result, and the name of the site
+> (if available) in the same consistent position in a search result, and
+> the name of the author (if available) in the same consistent position in
+> a search result.
+>
+> For that to happen, it would help slightly if the HTML specification
+> stopped SHOULD-ing the current <title> behavior. It would help more if
+> the HTML specification contained clear, straightforward markup for
+> author and site name (and encouraged UAs to present this information
+> when the document is taken out of context).
+
+ <title site="" publisher="" author="">Page Title</title>
+ <title>Page Title - <site></site> - <author></author> (<publisher></publisher>)</title>
+
+ <title>Page Title</title>
+ <link rel="top" title="" href="">
+ <link rel="publisher" title="" href="">
+ <link rel="author" title="" href="">
+
+
+ h1 is styled appropriately, h2 to h6 are styled according to legacy.
+
+
+[onclick] should make element focusable; enter should send onclick
+
+define implied <html>, <head>, <body>, <p>, </p>, etc.
+
+http://www.aujsproduction.com/samples/wishlist/revampedselector.asp
+
+
+interactive elements can't be nested (as in <a><button><input></button></a>)
+
+
+need a summary of all the content models and how they interact:
+ a | interactive strictly inline-level element | where inline-level content is expected | strictly inline-level content | interactive elements must not be nested
+ i | strictly inline-level element | where inline-level content is expected | strictly inline-level content |
+ em | strictly inline-level element | where inline-level content is expected | inline-level content |
+ p | block-level element, structured inline- | where block-level content is expected, | inline-level content | must not be nested
+ | level element | where inline-level content is expected | |
+...etc
+
+
+need a summary of the differences between the HTML and XML serialisations.
+e.g. how <p><ul> is allowed in one but not the other
+
+
+
+Google suggest: oninput -> submit a form whose only contents is the
+drop down list which you refresh (<datalist>).
+
+Inline editing of <select multiple=""> boxes
+
+image buttons shouldn't be used unless you want the coordinate
+
+need for the spec to say something about sending proprietary data over
+the network, e.j. in XMLHttpRequest and other data streams. Is it ok,
+if the page is doing the translation?
+
+built-in spell-checking in <input type="text">, <textarea>
+
+author-driven highlighting of individual words in text fields
+
+
+support access Array element via () instead of [] (IEism)
+- https://bugzilla.mozilla.org/show_bug.cgi?id=289876
+
+
+atom can do this:
+ <author>
+ <name>Mark Pilgrim</name>
+ <uri>http://example.org/</uri>
+ <email>f8dy@example.com</email>
+ </author>
+ <contributor>
+ <name>Sam Ruby</name>
+ <uri>http://intertwingly.net/blog/</uri>
+ </contributor>
+how do we do this in HTML5? (what's the use case?)
+
+how to interpret an HTML5 document for syndication
+http://hixie.ch/specs/hsf/hsf
+
+
+
+section "rendering HTML" has to cope with:
+ <q> element's quotes
+ <section> <h1>
+ default margins and paddings for <ul>, <form>, etc.
+
+ <h4>The <code>q</code> element</h4>
+ <p class="big-issue">Need to deal with the quotemark problem without
+ adding verbose markup, breaking existing documents, or adding
+ redundant elements.</p>
+
+
+
+<Hixie> here's how <object> works (assuming you don't support ActiveX)
+<Hixie> 1. look at the data="" attribute. If it's not there, go to the step i'll label "bail" below.
+<Hixie> 2. fetch the file indicated by the data="" attribute.
+<Hixie> 3. while waiting for the MIME type, treat <object> as a replaced element of transparent nothingness, intrinsic size zero.
+<hyatt> (so we would honor width/height)
+<hyatt> (because it's replaced)
+<Hixie> (yes)
+<Hixie> 4. if the MIME type is a long time coming (e.g. DNS is being slow) then jump to the "bail" step below until you have the MIME type, then jump back to step 5.
+<Hixie> 5. Once you have the MIME type, examine it. If it's a plugin type, jump to the plugin step below. If it's an image, jump to the image step below. If it's a document type (HTML, XML, etc) jump to the iframe step below. Otherwise, you don't recognise it, and jump to the "bail" step.
+<Hixie> plugin step: collect all the <param> element children in the <object>. instantiate the plugin and pass the params to it.
+<Hixie> image step: render the <object> as if it was an <img>
+<Hixie> document step: render the <object> as if it was an <iframe>
+<Hixie> bail step: render the <object> as if it was a <span>
+ - if there is no authoratative MIME type, then use the type="" attribute.
+ - if type="" is something you know you don't support, you MAY not download it
+ - if type="" is dynamically changed, do nothing
+ - if data="" is dynamically changed, redo loop
+
+<hyatt> apparently your url can come from <param>
+<hyatt> not just the data attribute
+<hyatt> our code looks for params with "src", "movie", "code" and "url"
+<hyatt> and also tries to find the type on a param
+<Hixie> oh that's you trying to have hacky activex support
+<Hixie> opera does that too
+<hyatt> yeah we support activex versions of plugins that are common
+<hyatt> like flash and quicktime and realaudio
+<Hixie> that would be a step 1b. if no data attribute, then look for a <param> to get you a URL instead.
+<Hixie> and if you find one, carry on as if that was your data="".
+
+
+should have some text talking about the fact that it's ok if your page
+passes through a period of non-conformance while script is running,
+but that in between scripts it should be compliant.
+
+
+how to handle 205 reset content responses and other HTTP codes in
+response to link clicks, link clicks with target="" attributes,
+window.open(), the user typing a URI in the URL bar, etc.
+
+XXX Native code for fast sorting of many data?
+
+http://www.microsoft.com/mind/1097/directanim.asp
+
+
+events: onmousewheel
+<hyatt> with a wheelDelta field on the WheelEvent (whcih comes off UIEvent)
+<hyatt> but in OS X you can wheel horizontally
+<hyatt> so we actually added wheelX, wheelY, and wheelZ
+<hyatt> with wheelDelta just mapping to wheelY for WinIE compat
+<Hixie_> oh i don't mind wheelZ, maybe we can even say ctrl+wheel should map to it on some platforms (windows)
+<hyatt> but if you hold down Shift+mouse wheel in mac apps on os x you'll wheel horizontally
+wheelDelta is multiples of 120
+http://msdn.microsoft.com/workshop/author/dhtml/reference/properties/wheeldelta.asp
+
+events:
+http://damowmow.com/temp/safari/WebCore-315/khtml/ecma/kjs_events.cpp
+
+Need to resolve whether <a rel=""> should affect an out-of-band UI (or
+whether it should just be a may), see
+https://bugs.opera.com/show_bug.cgi?id=169791
+
+
+should have appendix listing what was already implemented
+- http://www.xml.com/pub/a/2005/04/27/deviant.html
+
+| Hixie and Steven shared an item: in both XHTML2 and HTML5, it will
+| be possible to have a list child of a paragraph. That's good, from a
+| structural point of view. But that's bad, from a user's point of
+| view. Imagine you have a paragraph, with red background color. And
+| you have an unordered list in your clipboard. You place the caret at
+| the end of the paragraph and paste your list. Where does it end up?
+| In the paragraph or after it? Red background or not? I really fear
+| that, once again, document model authors are completely neglecting
+| the authoring side.
+ - http://www.glazman.org/weblog/dotclear/index.php?2005/05/27/1055-adam-2
+
+need to define how to process MIME types in <style> and <script> and so forth.
+
+http://www.paulgraham.com/popular.html
+
+
+
+ <p>In the ECMAScript DOM binding, objects implementing this interface
+ can also be dereferenced using square bracket notation (e.g.
+ <code>foo[1]</code> or <code>foo["bar"]</code>). Dereferencing with
+ an integer index is equivalent to invoking the <code>item()</code>
+ method with that index, and dereferencing with a string index is
+ equivalent to invoking the <code>namedItem()</code> method with that
+ index.</p>
+
+
+"you have mail": bubble notification; flash taskbar button,
+=> how do you stop advertisers?
+
+
+
+events should bubble from documents to Window
+
+say something about events fired on <body> -> document -> window, like
+onload? onpopstate is defined as body->html->doc->window; as is the
+local storage event. What about the old ones, how do they work? load,
+error, scroll, resize, etc?
+
+If we assuming that bubbling events bubble from document to window,
+then it seems reasonable for scroll events that bubble to be fired at
+the document if the window is resized, and scroll events that don't
+bubble to be fired at elements if they are scrolled. window.onscroll
+and document.onscroll should both work.
+
+
+[HIT TESTING TRANSPARENCY]
+Definition: IE considers a point of an element "transparent" if any
+one of the following are true:
+
+ 1. All of the following are true:
+ a: The computed value of 'background-image' is 'none', and
+ b: The computed value of 'background-color' is 'transparent', and
+ c: The point is over a pixel of an AlphaImageLoader filter image
+ that has an alpha value of 0 (fully transparent), or the
+ element does not have an AlphaImageLoader filter applied;
+
+ 2. The point is outside the element's CSS clip rectangle;
+
+ 3. The computed value of 'visibility' is 'hidden';
+
+ 4. The element is a transparent IFRAME (in IE, an IFRAME with the
+ custom attribute "allowtransparency");
+
+ 5. The element is an OBJECT with the custom attribute "wmode" set to
+ "transparent" and the point in question is fully transparent.
+
+Given those definitions, when a mouse event occurs, IE finds the
+target element as follows:
+
+ A. Take the topmost node that is under the point where the pointer
+ was for the event. For CSS boxes, borders, padding areas and
+ content areas are considered part of the node, margins and
+ leading generated by the 'line-height' property are not.
+
+ B. If there is no node at that point, no event is fired. STOP.
+
+ C. If the node is a text node, then the event is fired at the text
+ node's nearest ancestor element node. STOP.
+
+ D. If the node is not an element, assign the node's nearest
+ ancestor element node to a variable X. Otherwise, assign the
+ element node itself to X.
+
+ E. If the element X is the BODY element or the HTML element and its
+ document is not the document of a transparent IFRAME, goto step
+ H. Similarly, if the element X is a TABLE element, or is an IMG
+ element, goto step H.
+
+ F. If the point where the pointer was is, per the above definition,
+ a point that on the element X is transparent, then ignore that
+ element and assign the element that is below that element in the
+ stacking order to X. If there is no element below X, or if the
+ point on X is not transparent and so the previous condition
+ doesn't apply, then leave X as is and go straight to step H.
+
+ G. Goto step E.
+
+ H. If the element X is now a BODY or TABLE element, but the element
+ assigned to X in step D was some other element, assign the
+ element originally assigned in step D back to X.
+
+ I. The event goes to X. STOP
+
+
+
+
+mousedown's default action is focus, so canceling mousedown stops focus transference.
+e.g. on http://www.mozilla.org/editor/midasdemo/
+
+xref all the _ERR exceptions to DOM3CORE
+
+
+<select><option><hr> support
+
+
+raising an exception when the wrong number of arguments is passed -
+is that a language-specific thing, or what?
+
+why |new XMLHttpRequest()| returns an object that .toStrings to
+[object XMLHttpRequest], same with new TCPConnection(); what if a
+constructor is called without using "new" in JS?
+
+
+reload: fire an event when "reload" is pressed so that the page can
+reload its data instead of the whole page. cancel the event cancels
+the HTTP reload. Abuse prevention required, though.
+
+
+load event: fire on body, document, window? or just let it bubble?
+
+
+http://msdn.microsoft.com/workshop/author/dhtml/reference/methods/elementfrompoint.asp
+http://msdn.microsoft.com/workshop/author/dhtml/reference/methods/showmodaldialog.asp
+
+
+refs for TCP/IP (rfc793) and IPv6
+
+http://www.joelonsoftware.com/items/2004/06/17.html
+http://www.joelonsoftware.com/items/2004/06/18.html
+
+
+<neutralise> block that kills scripting or anything dangerous?
+
+
+XXXX need explanation of when to use undo/redo, and when to use back/forward
+
+XXX "alternate style sheet" should be "alternative style sheet"
+
+
+
+
+ <h5>Using the <code>a</code> element with the <code>command</code> attribute</h5>
+
+ <p>If an <code>a</code> element has a <code
+ title="command-attribute">command</code> attribute, then:</p>
+
+ <p>If the element's <code>title</code> attribute is absent, then
+ when the UA attempts to display the element's hint, it must instead
+ use the specified command's Hint.</p>
+
+ <p>Even if the element's <code>href</code> attribute is absent, the
+ element must still match the CSS <code>:link</code> or
+ <code>:visited</code> pseudo-classes. It must match the
+ <code>:visited</code> pseudo-class if the command's action is to
+ follow a link that has already been visited by the user, and must
+ match the <code>:link</code> pseudo-class otherwise.</p>
+
+ <p>If a <code>DOMActivate</code> event is dispatched on the element
+ and is not canceled, and the event has no other default action, and
+ the command's Disabled State is false (enabled), then the command's
+ Action must be triggered as the default action.</p>
+
+ <p class="note">The <code>DOMActivate</code> event is fired as the
+ default action of the <code>click</code> event.</p>
+
+ <p>If the command's Disabled State is true (disabled) then the
+ element must be disabled and must therefore match the
+ <code>:disabled</code> pseudo-class. UAs should style disabled links
+ in such a way as to clearly convey their disabled state.</p>
+
+ <p>The Label, Icon, Checked State and Type facets of the command are
+ ignored by the <code>a</code> element (except for <a
+ href="#pseudosAndCommands">matching CSS pseudo-classes</a>).</p>
+
+ <h5>Using the <code>button</code> element with the <code>command</code> attribute</h5>
+
+ <p>If a <code>button</code> element has a <code
+ title="command-attribute">command</code> attribute, then:</p>
+
+ <p>If the element's <code>title</code> attribute is absent, then
+ when the UA attempts to display the element's hint, it must instead
+ use the specified command's Hint.</p>
+
+ <p>If a <code>DOMActivate</code> event is dispatched on the element
+ and is not canceled, and the event has no other default action, and
+ the command's Disabled State is false (enabled), and the button's
+ <code>disabled</code> attribute is absent, then the command's Action
+ must be triggered as the default action.</p>
+
+ <p class="note">The <code>DOMActivate</code> event is fired as the
+ default action of the <code>click</code> event.</p>
+
+ <p>If the command's Disabled State is true (disabled) then the
+ element must be disabled. The <code>button</code> element must also
+ be disabled if the element's <code>disabled</code> attribute is
+ set.</p>
+
+ <p>The Label, Icon, Checked State and Type facets of the command are
+ ignored by the <code>button</code> element (except for <a
+ href="#pseudosAndCommands">matching CSS pseudo-classes</a>).</p>
+
+ <h5>Using the <code>input</code> element with the <code>command</code> attribute</h5>
+
+ <p>If an <code>input</code> element has no <code>type</code>
+ attribute and no <code>name</code> attribute, and it has a <code
+ title="command-attribute">command</code> attribute, then:</p>
+
+ <p>If the command is of Type "command" then the element must
+ generally be styled and behave as if it was of type
+ <code>button</code>; if the Type of the command is "radio" then the
+ element must generally be styled and behave as if it was of type
+ <code>radio</code>; and if the Type of the command is "checkbox"
+ then the element must generally be styled and behave as if it was of
+ type <code>checkbox</code>.</p>
+
+ <p>If the command is of Type "command" and the element's
+ <code>value</code> attribute is absent, then when the UA attempts to
+ display the element's caption, it must instead use the specified
+ command's Label. The Label facet is ignored if the command is not of
+ Type "command".</p>
+
+ <p>The UA may use the Icon facet of the command to render an
+ icon in the control, if appropriate for the UI used.</p>
+
+ <p>If the element's <code>title</code> attribute is absent, then
+ when the UA attempts to display the element's hint, it must instead
+ use the specified command's Hint.</p>
+
+ <p>If a <code>DOMActivate</code> event is dispatched on the element
+ and is not canceled, and the event has no other default action, and
+ the command's Disabled State is false (enabled), and the element's
+ <code>disabled</code> attribute is absent, then the command's Action
+ must be triggered as the default action.</p>
+
+ <p class="note">The <code>DOMActivate</code> event is fired as the
+ default action of the <code>click</code> event.</p>
+
+ <p>If the command's Disabled State is true (disabled) then the
+ element must be disabled. The <code>input</code> element must also
+ be disabled if the element's <code>disabled</code> attribute is
+ set.</p>
+
+ <p>If the command's Checked State is true (checked) then the element
+ must be checked. The <code>input</code> element must also be checked
+ if the element's <code>checked</code> attribute is set.</p>
+
+
+
+
+ <p>This element should not be directly displayed. In CSS-aware user
+ agents, this should be achieved by including the following rules, or
+ their equivalent, in the UA's user agent style sheet:</p>
+
+ <pre>@namespace xh url(http://www.w3.org/1999/xhtml);
+xh|command { display: none; }</pre>
+
+
+
+ <h5 id="command-with-command">Using the <code>command</code> element with the <code>command</code> attribute</h5>
+
+ <p>If a <code>command</code> element has a <code
+ title="command-attribute">command</code> attribute, then:</p>
+
+ <p>If the element's <code>label</code> attribute is absent, then
+ when the UA attempts to display the element's caption, it must instead
+ use the specified command's Label.</p>
+
+ <p>If the element's <code>icon</code> attribute is absent, then
+ when the UA attempts to display the element's icon, it must instead
+ use the specified command's Icon.</p>
+
+ <p>If the element's <code>title</code> attribute is absent, then
+ when the UA attempts to display the element's hint, it must instead
+ use the specified command's Hint.</p>
+
+ <p>If a <code>click</code> event is dispatched on the element and is
+ not canceled, and the command's Disabled State is false (enabled),
+ and the element's own <code>disabled</code> attribute is absent,
+ then the command's Action must be triggered as the default
+ action.</p>
+
+ <p>If the command's Disabled State is true (disabled) then the
+ element must be disabled. The <code>command</code> element must also
+ be disabled if the element's <code>disabled</code> attribute is
+ set.</p>
+
+ <p>If the command's Checked State is true (checked) then the
+ element must be checked. The <code>command</code> element must also
+ be checked if the element's <code>checked</code> attribute is
+ set.</p>
+
+ <p>When a <code title="command-element">command</code> element has a
+ <code title="command-attribute">command</code> attribute, any <code
+ title="attr-command-type">type</code> and <code
+ title="attr-command-radiogroup">radiogroup</code> attribute is
+ ignored.</p>
+
+
+
+ <h4>The 'icon' property</h4>
+
+ <p>UAs should use the command's Icon as the default generic icon
+ provided by the user agent when the 'icon' property computes to
+ 'auto' on an element that either defines a command or refers to one
+ using the <code title="command-attribute">command</code>
+ attribute.</p>
+
+ <h4 id="pseudosAndCommands">CSS pseudo-classes and commands</h4>
+
+ <p>When an element uses the <code
+ title="command-attribute">command</code> attribute, any UI
+ pseudo-classes from the following list that apply to the element
+ defining the command also apply to the elements that refer to that
+ command.</p>
+
+ <dl>
+
+ <dt>:enabled, :disabled</dt>
+
+ <dd>Matches commands whose Disabled State facet is False and True
+ respectively.</dd>
+
+ <dt>:checked</dt>
+
+ <dd>Matches commands whose Type facet is either "radio" or
+ "checkbox", and whose Checked State facet is true.</dd>
+
+ </dl>
+
+
+
+ <p><code>menu</code> elements with explicit <code>label</code>
+ attributes, and <code>menu</code> elements following
+ <code>menulabel</code> elements, should be hidden. In CSS-aware UAs,
+ this effect should be achieved by including the following rules, or
+ their equivalent, in the UA's user agent style sheet:</p>
+
+ <pre>@namespace xh url(http://www.w3.org/1999/xhtml);
+xh|menu[label], xh|menulabel + xh|menu { display: none; }</pre>
+
+ <p>All other <code>menu</code> elements should be rendered
+ identically to <code>ul</code> elements. In CSS-aware UAs, this
+ effect may be achieved by including rules similar to the following
+ in the UA's user agent style sheet:</p>
+
+ <pre>@namespace xh url(http://www.w3.org/1999/xhtml);
+xh|menu { display: block; margin: 0 0 0 40px; list-style: disc; }</pre>
+
+
+
+
+ <h5>Displaying menus</h5>
+
+ <p>When a <code>menu</code> element is activated, the associated
+ menu should be constructed and shown. (For details on how a
+ <code>menu</code> element can be activated, see the sections on
+ <span>menu links</span> and <span>menu bars</span>.)</p>
+
+ <p>The styles applied to each element in the <code>menu</code>
+ element, as well as the element itself, may be applied when
+ constructing a menu. UAs are recommended to not apply styling to
+ context menus and menus for application menu bars, and to only use
+ styles for in-page menus.</p>
+
+ <p>If user agents support styling of menus, they should only support
+ the '<code>background</code>', '<code>color</code>',
+ '<code>border</code>', '<code>padding</code>' and
+ '<code>font</code>' properties on menus and menu items. (This list
+ might be incomplete; in general, properties that merely affect the
+ appearance of the element should work, but properties that affect
+ the layout should not.)</p>
+
+ <p>As the user interacts with a menu, the elements from which the
+ menu was created should have appropriate pseudo-classes (:hover,
+ :focus, :active) applied.</p>
+
+ <p>The menu items must only consider the computed styles of the
+ elements from which they were derived, not other elements.</p>
+
+ <div class="example">
+
+ <p>For example, take this menu:</p>
+
+ <pre>&lt;menu&gt;
+&lt;li&gt;&lt;command label="a"/&gt;&lt;/li&gt;
+&lt;menu&gt;</pre>
+
+ <p>The menu has one menu item, labelled "a".</p>
+
+ <p>Styles applied to the <code>li</code> element in this menu would
+ have no effect on the rendered menu, except in so far as styles
+ inherit from that element to the <code>command</code> element.</p>
+
+ <p>Styles applied to the <code>command</code> element could affect
+ the menu. While the user is hovering over the menu item, the
+ <code>:hover</code> pseudo-class matches the <code>command</code>
+ element and any appropriate newly matching rules could be
+ applied.</p>
+
+ </div>
+
+ <p>When activated from a <span title="menu links">menu link</span>,
+ a menu must be placed in an Appropriate Place. Specifically, if the
+ <code>a</code> element is displayed as a vertically-stacked box (as
+ is typically seen for elements with '<code>display: block</code>',
+ '<code>list-item</code>', or '<code>table</code>'), then the menu
+ should appear vertically below the element, anchored so that one of
+ its top corners coincides with a bottom corner of the box so that
+ the menu and the box each have a horizontal sides in common (or a
+ bottom corner of the menu coincides with a top corner of the box, if
+ there isn't enough room for the menu to drop down); otherwise, if
+ the element is displayed as a horizontally stacked box
+ ('<code>display: inline</code>', '<code>table-cell</code>', etc),
+ the menu should appear to the <em>side</em> of the box in an
+ analogous way. If the element is on the right of the page, the menu
+ should drop to the left, and vice versa.</p>
+
+ <p>UAs should implement the drop-down behaviour in more
+ platform-appropriate ways if the platform conventions differ from
+ the behaviour described above.</p>
+
+
+
+
+ <h4>The <dfn title="command-attribute"><code>command</code></dfn>
+ attribute</h4>
+
+ <p>Any element that can define a command can also, instead, have a
+ <code>command</code> attribute that specifies the ID of a command
+ that the element should defer to. In this case the element does not
+ define a command, but, in the absence of attributes to the contrary,
+ reflects the state of the element specified.</p>
+
+ <p>If the <code>command</code> attribute specifies an ID that is not
+ the ID of an element that defines a command, then the
+ <code>command</code> DOM attribute is set to the null value, and the
+ element acts as if it was linked to an element that defined a
+ command with no Label, no Hint, no Icon, no Action, that was not
+ Hidden, not Disabled, not Checked, and that was of Type
+ "command".</p>
+
+
+replaceable DOM properties: http://lxr.mozilla.org/mozilla/source/dom/src/base/nsDOMClassInfo.cpp#5928
+< brendan>|Hixie: so yeah, lxr for JSRESOLVE_QUALIFIED
+
+screen object:
+screen contains top left width height pixelDepth colorDepth availWidth availHeight availLeft availTop
+
+
+
+ <p>The most direct way to represent a command is by using the <code
+ title="command-element">command</code> element. A <code
+ title="command-element">command</code> element defines a command if
+ it does not have a <code title="command-attribute">command</code>
+ attribute.</p>
+
+ <div class="example">
+ <pre>...
+ &lt;command id="c_stop" label="Emergency Stop" onclick="dostop()"/&gt;
+ &lt;command id="c_go" label="Go" onclick="dogo()"/&gt;
+ &lt;command id="c_lamp" label="Headlamps" onclick="dof2()" disabled="disabled"/&gt;
+...</pre>
+</div>
+
+ <p>The <code>command</code> element, in addition to the core and
+ internationalisation attributes, may have the following
+ attributes specified:</p>
+
+ <dl>
+
+ <!-+- yes i know that some of these are core attributes. If you can
+ give me a better introductory paragraph, I'm all for it. -+->
+
+ <dt><dfn title="attr-command-type"><code>type</code></dfn></dt>
+
+ <dd>The command's Type. If present, this attribute must either have
+ the value <code>radio</code>, in which case the command is of Type
+ "radio", or the value <code>checkbox</code>, in which case the
+ command is (amazingly) of Type "checkbox". Any other value, or the
+ absence of the attribute altogether, means that the command is of
+ Type "command".</dd>
+
+ <dt><dfn title="attr-command-id"><code>id</code></dfn></dt>
+
+ <dd>The command's ID. If this attribute is not specified, then the
+ command is anonymous.</dd>
+
+ <dt><dfn title="attr-command-label"><code>label</code></dfn></dt>
+
+ <dd>The command's Label. If the attribute is not specified, the
+ command's Label is given by the element's <code>textContent</code>
+ DOM attribute.</dd>
+
+ <dt><dfn title="attr-command-title"><code>title</code></dfn></dt>
+
+ <dd>The command's Hint. If the attribute is not specified, the
+ command's Hint is the empty string.</dd>
+
+ <dt><dfn title="attr-command-icon"><code>icon</code></dfn></dt>
+
+ <dd>A URI to the command's Icon. If the attribute is not specified,
+ then the command has no Icon.</dd>
+
+ <dt><dfn title="attr-command-onclick"><code>onclick</code></dfn></dt>
+
+ <dd>An event handler attribute that listens for <code>click</code>
+ events.</dd>
+
+ <dt><dfn title="attr-command-hide"><code>hide</code></dfn></dt>
+
+ <dd>The command's Hidden State. If the attribute is present, the
+ command is hidden (and also disabled, regardless of the value of
+ the <code>disabled</code> attribute), otherwise, the command is
+ shown. If the attribute is present, it should have the value
+ "<code>hide</code>". <!-+-The name of the attribute reflects the
+ fact that Hidden commands in menus are hidden.-+-></dd>
+
+ <dt><dfn title="attr-command-disabled"><code>disabled</code></dfn></dt>
+
+ <dd>The command's Disabled State. If the attribute is present, the
+ command is disabled, otherwise, the command is enabled. If the
+ attribute is present, it should have the value
+ "<code>disabled</code>".</dd>
+
+ <dt><dfn title="attr-command-checked"><code>checked</code></dfn></dt>
+
+ <dd>The command's Checked State. If the attribute is present, the
+ command is checked, otherwise, the command is not. If the attribute
+ is present, it should have the value "<code>checked</code>".</dd>
+
+ <dt><dfn title="attr-command-radiogroup"><code>radiogroup</code></dfn></dt>
+
+ <dd>An attribute indicating the name of the group of commands that
+ will be toggled when the command itself is toggled. (Described <a
+ href="#radiocommand">below</a>.)</dd>
+
+ <dt><dfn title="attr-command-default"><code>default</code></dfn></dt>
+
+ <dd>An attribute indicating whether the command is the default
+ command. If the attribute is present, the command is the default
+ command, otherwise it is not. If it is set, it should have the
+ value <code>default</code>. Used by context menus to indicate what
+ the default option would be. The :default pseudo-class matches
+ <code>command</code> elements with this attribute.</dd>
+
+ </dl>
+
+ <p>In addition, <code title="command-element">command</code>
+ elements may also have a <code
+ title="command-attribute">command</code> attribute, as <a
+ href="#command-with-command">described below</a>.</p>
+
+ <p>The Type, ID, Label, Hint, Icon, Hidden State, Disabled State,
+ and Checked State of the command defined by a <code
+ title="command-element">command</code> element are as described
+ above. The Action of a <code title="command-element">command</code>
+ element is that a <code>{null, "click"}</code> event is fired on the
+ element.</p>
+
+ <p>If the Type of the command is "checkbox", when a
+ <code>click</code> event is dispatched on the element, user agents
+ must toggle the value of the <code>checked</code> attribute before
+ the event is dispatched in the document. (If the attribute is
+ absent, then it is set to the value <code>checked</code>, and if the
+ attribute is present, it is removed.) If the default action of the
+ event is canceled, the value of the attribute must be changed back
+ to the value it had before the event was dispatched.</p>
+
+ <p id="radiocommand">If the Type of the command is "radio", when a
+ <code>click</code> event is dispatched on the element, user agents
+ must set the value of the <code>checked</code> attribute on the
+ element to <code>checked</code>, and remove the attribute from any
+ <code>command</code> elements with <code>type</code> set to
+ <code>radio</code> and the same parent element and same
+ <code>radiogroup</code> attribute, before the event is dispatched in
+ the document. (If the element has no <code>radiogroup</code>
+ attribute, then the elements "with the same <code>radiogroup</code>
+ attribute" are those elements with <em>no</em>
+ <code>radiogroup</code> attribute.) If the default action of the
+ event is canceled, the value of the attributes that were changed
+ must be changed back to the values they had before the event was
+ dispatched.</p>
+
+ <p>In HTML the <code>command</code> element is an empty element with
+ no end tag.</p>
+
+ <p>Authors should put <code>command</code> elements inside the
+ <code>head</code> element, inside any element that may contain
+ <span>block-level elements</span> or <span>inline-level
+ content</span>, or inside <code>commandset</code> elements.</p> <!-+-
+ should, because hey, if they want to put them elsewhere, why not.
+ XXX -+->
+
+ <p>Authors should not put elements or text inside
+ <code>command</code> elements.</p>
+
+
+
+
+ <p>The <code title="dom-command-ro-command">command</code> DOM attribute
+ is defined with the <code title="attr-command">command</code>
+ content attribute.</p>
+
+
+Need to become consistent about whether or not to quote keyword
+("<code title="">foo</code>" vs <code>foo</code>)
+
+
+
+ XXX command icons in rendering section:
+
+ If the element defining the command has no explicit icon, then the
+ attribute must instead return the computed value of the CSS '<code
+ title="">icon</code>' property on that element. <a
+ href="#refsCSS3UI">[CSS3UI]</a>
+
+ If the computed value of '<code title="">icon</code>' is
+ '<code>auto</code>',
+
+
+search for event-click and make them all point to:
+<a href="http://www.w3.org/TR/DOM-Level-3-Events/events.html#event-click"><code>click</code></a>
+...or something.
+
+<code>DOMActivate -> <code title="event-DOMActivate">DOMActivate
+
+
+onclick="" only fires if it is a MouseEvent
+
+
+<form> .submit definition - see http://lxr.mozilla.org/mozilla/source/content/html/content/src/nsHTMLFormElement.cpp#600
+for how to handle multiple calls in series
+
+
+http://lxr.mozilla.org/mozilla/source/content/html/content/src/nsHTMLFormElement.cpp#699
+
+How events are handled:
+http://lxr.mozilla.org/mozilla/ident?i=HandleDOMEvent
+
+http://www.quirksmode.org/js/events_compinfo.html
+e.g. mousedown mouseup click mousedown mouseup click dblclick
+
+http://msdn.microsoft.com/library/default.asp?url=/workshop/author/dhtml/reference/events.asp
+
+
+
+05:46 < bz>|Hixie: let's put it this way
+05:46 < bz>|Hixie: 1) A script is executed when its data is available
+05:47 < bz>|Hixie: 2) The data for an inline script is available when its </script> is seen
+05:47 < bz>|Hixie: 3) The data for a script with src is available when it finishes loading
+05:47 < bz>|Hixie: all good so far?
+05:47 < Hixie>|i'm waiting for the bit where src= causes blocking in the normal, non-d.w case
+05:47 < bz>|Hixie: 4) The data for a script with src starts loading when the <script> node is inserted into the DOM
+05:48 < shaver>|it causes parser blocking in the normal case
+05:48 < bz>|Hixie: 5) When such a load starts, all further parsing is suspended until the load has completed and the script has executed.
+05:48 < bz>|Hixie: so if we forget document.write
+05:48 < bz>|Hixie: and look at the HTML: <script src="foo"></script><div>
+05:48 < bz>|Hixie: the text "<div>" will nto be parsed until after the script runs
+05:49 < bz>|Hixie: this is needed so that if the script does document.write that text can be inserted _before_ the "<div>" text into the parser
+05:49 < Hixie>|sure
+05:49 < Hixie>|all this is fine
+05:49 < bz>|Hixie: ok. So now let's look at our case
+05:49 < Hixie>|but how does document.write() know when to return?
+05:49 -!- davel [davel@moz-4F4E281A.dsl.static.sonic.net] has quit [Quit: davel]
+05:49 < bz>|It gives the data to the parser, and tells the parser to parse it
+05:49 < bz>|Once the parser returns, document.write returns
+05:50 < bz>|The parser returns when it runs out of data to parse (it's parsed it all)
+05:50 < bz>|Or if it's explicitly suspended (eg by a <script src="">)
+05:50 < Hixie>|AH
+05:50 < bz>|All this in Gecko
+05:50 < Hixie>|ok that was the key piece of information i was missing
+05:50 < Hixie>|the "explicit suspension"
+05:50 < Hixie>|ok
+but test IE on this...
+
+
+XXX publish a "Valid HTML5!" button with a kitten on it. Made by an artist. (Doodle?)
+
+
+XXX rename "Block-level" and "inline-level" to something else to
+prevent terminology clash with CSS.
+
+ Interaction with document.open/write/close is undefined
+ How to determine the character encoding
+ Integration with quirks mode problems
+ <style> parsing needs tweaking if we want to exactly match IE
+ <base> parsing needs tweaking to handle multiple <base>s
+ <isindex> needs some prose in the form submission section
+ No-frames and no-script modes aren't yet defined
+ Execution of <script> is not yet defined
+ New HTML5 elements aren't yet defined
+ There are various cases (marked) where EOF handling is undefined
+ Interaction with the "load" event is undefined
+
+
+hsivonen:
+> To make document conformance a more useful concept for the purpose of catching
+> author errors, I suggest that the following attributes be made required:
+> href and rel on link
+> href on base
+> name and content on meta (other than the encoding decl)
+> src on img
+> code, height and width on applet
+> name and value on param
+...
+> To allow user agents see whether the author provided the empty string as the
+> alternative text of whether the author just didn't care, I suggest that the
+> alt attribute on img be made optional.
+(i agree -ian)
+...
+> On the other hand, I have doubts about the requirement of significant
+> inline content. When the W3C said that paragraphs mustn't be empty,
+> various applications started emitting <p>&nbsp;</p>. If the WHAT WG says
+> that paragraphs must contend significant inline content, are the
+> developers of those applications suddenly going to decide not to allow
+> them to paragraphs to be saved or are they going to come up with an even
+> more crufty work-around to comply with the machine-checkable
+> requirements of the spec?
+(i agree, i think we should drop "significant inline content". -ian)
+
+
+bjoern:
+> If the concern here is what the specification should say, then that's
+> what a valid state is, not what a valid document is, since the class of
+> "predictably valid" documents does not cover many dynamic documents.
+
+
+
+arv asks for: a way to track download progress of, e.g., images when
+you are preloading 10 images; cf onprogress on XHR in mozilla
+
+
+window.getAttention() or some similar API to let the user know the
+page wants attention? How do you reduce the chance of irritation?
+see also https://bugzilla.mozilla.org/show_bug.cgi?id=293412
+
+
+
+
+ITEM
+
+Items have:
+ - parents, children
+ - properties
+ - commands that can apply to them
+
+
+
+ <li>Inline markup for pop-up windows, for example for dialog boxes
+ or tool palettes, so that dialogs need not be defined in separate
+ files.</li>
+
+ <li>Command updating: applications that have several access
+ points for the same feature, for instance a menu item and a
+ tool-bar button, would benefit from having to disable such
+ commands only once, instead of having to keep each access point
+ synchronized with the feature's availability at all times.
+ Similarly menu items or tool-bar buttons that represent a toggle
+ state could automatically stay synchronized whenever
+ toggled.</li>
+
+ <li>More device-independent DOM events: The DOM event set needs
+ device-independent events, such as events that fire when a button
+ or link is activated, whether via the mouse or the keyboard.
+ <code>DOMActivate</code> is a start, but it lacks equivalent HTML
+ attributes, and additional events may be needed.</li>
+
+ <li>Richer widget set: the existing HTML controls are quite
+ limited, some controls for commonly used types such as date
+ controls and range controls would be useful.</li>
+
+ <li>Sortable and multicolumn tree views and list views with rich
+ formatting.</li>
+
+ <li>Ability to define custom widgets cleanly, for example using
+ XBL and APIs to query and control focus state, widget state, the
+ position and state of input devices, etc.</li>
+
+ <li>Rich text editing: an underlying architecture upon which
+ domain-specific editors can be created, including things like
+ control over the caret position.</li>
+
+ <li>A predefined HTML editor based on the rich text editing
+ architecture.</li>
+
+ <li>Drag and drop APIs.</li>
+
+ <li>Text selection manipulation APIs.</li>
+
+ <li>Clipboard APIs (if the security and privacy concerns can be
+ addressed).</li>
+
+ <li>Flexible box model: The existing box model in CSS is designed
+ largely for documents rather than user interface. We need a new
+ box model designed for user interface which would relieve author
+ complaints about other aspects of CSS and also reduce the need
+ for tables for layout.</li>
+
+ <li>Window-based state management (so that new windows don't
+ interfere with existing sessions), for example implemented as a
+ per-domain, per-window "file system". This would allow multiple
+ instances of the same application (from the same site) to run
+ without the instances overwriting each other's cookies.</li>
+
+ <li>Markup to denote <span>mutually exclusive sections</span> (as
+ in the commonly seen wizard interfaces).</li>
+
+ <li>An improved CSS object model, for example with better APIs
+ for animation, simpler ways to navigate the rendered content, a
+ way to find the position of an element, methods to list the
+ elements under a coordinate, etc.</li>
+
+ <li>Better defined user authentication state handling. (Being able
+ to "log out" of sites reliably, for instance, or being able to
+ integrate the HTTP authentication model into the Web page.)</li>
+
+
+offline storage / caching pining:
+http://groups.google.com/group/mozilla.dev.platform/browse_frm/thread/bf866101aa238773/a298294c27b9380a?lnk=gst&q=offline&rnum=1#a298294c27b9380a
+
+
+DOM0 quirks that Mozilla knows about:
+http://lxr.mozilla.org/seamonkey/source/dom/src/base/nsDOMClassInfo.cpp
+
+
+
+mutually exclusive sections:
+ <p class="example">For example, in an application for an online
+ mutiplayer game, there could be four mutually exclusive sections:
+ one for the login page, one for the network status page displayed
+ while the user is logging in, one for a "lobby" where players get
+ together to organise a game, and one for the actual game. The
+ different sections are the various states that the application can
+ reach.</p>
+
+
+XXX make a consistent decision of which of the following formats to use:
+
+ U+1234 FOO BAR character ("foo")
+ U+1234 FOO BAR character ('foo')
+ U+1234 FOO BAR character (foo)
+ U+1234 FOO BAR ("foo") character
+ U+1234 FOO BAR ('foo') character
+ U+1234 FOO BAR (foo) character
+ U+1234 FOO BAR character ("<code title="">foo</code>")
+ U+1234 FOO BAR character ('<code title="">foo</code>')
+ U+1234 FOO BAR character (<code title="">foo</code>)
+ U+1234 FOO BAR ("<code title="">foo</code>") character
+ U+1234 FOO BAR ('<code title="">foo</code>') character
+ U+1234 FOO BAR (<code title="">foo</code>) character
+
+And make these match:
+
+ 0x12 (ASCII FOO)
+ 0x12 (ASCII "foo")
+ 0x12 (ASCII 'foo')
+ 0x12 ("foo")
+ 0x12 ('foo')
+
+-->
+ <script src="http://status.whatwg.org/annotate-web-apps.js"
+ type="text/javascript"></script>
diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
new file mode 100644
index 0000000..8539aeb
--- /dev/null
+++ b/test/data/tokeniser2/INDEX
@@ -0,0 +1,7 @@
+# Index file for tokeniser tests
+#
+# Test Description
+
+test1.test html5lib tests (part 1)
+test2.test html5lib tests (part 2)
+contentModelFlags.test html5lib content model tests \ No newline at end of file
diff --git a/test/data/tokeniser2/contentModelFlags.test b/test/data/tokeniser2/contentModelFlags.test
new file mode 100644
index 0000000..84d41fc
--- /dev/null
+++ b/test/data/tokeniser2/contentModelFlags.test
@@ -0,0 +1,36 @@
+{"tests": [
+
+{"description":"PLAINTEXT content model flag",
+"contentModelFlags":["PLAINTEXT"],
+"input":"<head>&body;",
+"output":[["Character", "<head>&body;"]]},
+
+{"description":"End tag closing RCDATA or CDATA",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"bar",
+"input":"foo</bar>",
+"output":[["Character", "foo"], ["EndTag", "bar"]]},
+
+{"description":"End tag with incorrect name in RCDATA or CDATA",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"baz",
+"input":"</foo>bar</baz>",
+"output":["ParseError", ["Character", "</foo>bar"], ["EndTag", "baz"]]},
+
+{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
+"contentModelFlags":["RCDATA", "CDATA"],
+"lastStartTag":"bar",
+"input":"foo</bar></baz>",
+"output":[["Character", "foo"], ["EndTag", "bar"], ["EndTag", "baz"]]},
+
+{"description":"CDATA w/ something looking like an entity",
+"contentModelFlags":["CDATA"],
+"input":"&foo;",
+"output":[["Character", "&foo;"]]},
+
+{"description":"RCDATA w/ an entity",
+"contentModelFlags":["RCDATA"],
+"input":"&lt;",
+"output":[["Character", "<"]]}
+
+]}
diff --git a/test/data/tokeniser2/test1.test b/test/data/tokeniser2/test1.test
new file mode 100644
index 0000000..c12ff5a
--- /dev/null
+++ b/test/data/tokeniser2/test1.test
@@ -0,0 +1,136 @@
+{"tests": [
+
+{"description":"Correct Doctype lowercase",
+"input":"<!DOCTYPE html>",
+"output":[["DOCTYPE", "HTML", false]]},
+
+{"description":"Correct Doctype uppercase",
+"input":"<!DOCTYPE HTML>",
+"output":[["DOCTYPE", "HTML", false]]},
+
+{"description":"Correct Doctype mixed case",
+"input":"<!DOCTYPE HtMl>",
+"output":[["DOCTYPE", "HTML", false]]},
+
+{"description":"Truncated doctype start",
+"input":"<!DOC>",
+"output":["ParseError", ["Comment", "DOC"]]},
+
+{"description":"Doctype in error",
+"input":"<!DOCTYPE foo>",
+"output":[["DOCTYPE", "FOO", true]]},
+
+{"description":"Single Start Tag",
+"input":"<h>",
+"output":[["StartTag", "h", {}]]},
+
+{"description":"Empty end tag",
+"input":"</>",
+"output":["ParseError"]},
+
+{"description":"Empty start tag",
+"input":"<>",
+"output":["ParseError", ["Character", "<>"]]},
+
+{"description":"Start Tag w/attribute",
+"input":"<h a='b'>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start Tag w/attribute no quotes",
+"input":"<h a=b>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start/End Tag",
+"input":"<h></h>",
+"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
+
+{"description":"Two unclosed start tags",
+"input":"<p>One<p>Two",
+"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
+
+{"description":"End Tag w/attribute",
+"input":"<h></h a='b'>",
+"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
+
+{"description":"Multiple atts",
+"input":"<h a='b' c='d'>",
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
+
+{"description":"Multiple atts no space",
+"input":"<h a='b'c='d'>",
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
+
+{"description":"Repeated attr",
+ "input":"<h a='b' a='d'>",
+ "output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Simple comment",
+ "input":"<!--comment-->",
+ "output":[["Comment", "comment"]]},
+
+{"description":"Comment, Central dash no space",
+ "input":"<!----->",
+ "output":["ParseError", ["Comment", "-"]]},
+
+{"description":"Comment, two central dashes",
+"input":"<!-- --comment -->",
+"output":["ParseError", ["Comment", " --comment "]]},
+
+{"description":"Unfinished comment",
+"input":"<!--comment",
+"output":["ParseError", ["Comment", "comment"]]},
+
+{"description":"Start of a comment",
+"input":"<!-",
+"output":["ParseError", ["Comment", "-"]]},
+
+{"description":"Ampersand only",
+"input":"&",
+"output":["ParseError", ["Character", "&"]]},
+
+{"description":"Unfinished entity",
+"input":"&f",
+"output":["ParseError", ["Character", "&"], ["Character", "f"]]},
+
+{"description":"Ampersand, number sign",
+"input":"&#",
+"output":["ParseError", ["Character", "&"], ["Character", "#"]]},
+
+{"description":"Unfinished numeric entity",
+"input":"&#x",
+"output":["ParseError", ["Character", "&#x"]]},
+
+{"description":"Entity with trailing semicolon (1)",
+"input":"I'm &not;it",
+"output":[["Character","I'm ¬it"]]},
+
+{"description":"Entity with trailing semicolon (2)",
+"input":"I'm &notin;",
+"output":[["Character","I'm ∉"]]},
+
+{"description":"Entity without trailing semicolon (1)",
+"input":"I'm &notit",
+"output":[["Character","I'm "], "ParseError", ["Character", "¬"],
+["Character", "it"]]},
+
+{"description":"Entity without trailing semicolon (2)",
+"input":"I'm &notin",
+"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
+
+{"description":"Partial entity match at end of file",
+"input":"I'm &no",
+"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
+
+{"description":"ASCII decimal entity",
+"input":"&#0036;",
+"output":[["Character","$"]]},
+
+{"description":"ASCII hexadecimal entity",
+"input":"&#x3f;",
+"output":[["Character","?"]]},
+
+{"description":"Hexadecimal entity in attribute",
+"input":"<h a='&#x3f;'></h>",
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
+
+]}
diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test
new file mode 100644
index 0000000..32c0f99
--- /dev/null
+++ b/test/data/tokeniser2/test2.test
@@ -0,0 +1,108 @@
+{"tests": [
+
+{"description":"Doctype without a name",
+"input":"<!DOCTYPE>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
+
+{"description":"Correct doctype without a space before name",
+"input":"<!DOCTYPEhtml>",
+"output":["ParseError", ["DOCTYPE", "HTML", false]]},
+
+{"description":"Incorrect doctype without a space before name",
+"input":"<!DOCTYPEfoo>",
+"output":["ParseError", ["DOCTYPE", "FOO", true]]},
+
+{"description":"Bogus doctype",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+
+{"description":"Incomplete doctype",
+"input":"<!DOCTYPE html ",
+"output":["ParseError", ["DOCTYPE", "HTML", true]]},
+
+{"description":"Numeric entity representing the NUL character",
+"input":"&#0000;",
+"output":[["Character", "\uFFFD"]]},
+
+{"description":"Hexadecimal entity representing the NUL character",
+"input":"&#x0000;",
+"output":[["Character", "\uFFFD"]]},
+
+{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
+"input":"&#2225222;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
+"input":"&#x1010FFFF;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Numeric entity representing a Windows-1252 'codepoint'",
+"input":"&#137;",
+"output":[["Character", "\u2030"]]},
+
+{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
+"input":"&#x89;",
+"output":[["Character", "\u2030"]]},
+
+{"description":"Hexadecimal entity with mixed uppercase and lowercase",
+"input":"&#xaBcD;",
+"output":[["Character", "\uABCD"]]},
+
+{"description":"Entity without a name",
+"input":"&;",
+"output":["ParseError", ["Character", "&;"]]},
+
+{"description":"Unescaped ampersand in attribute value",
+"input":"<h a='&'>",
+"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
+
+{"description":"StartTag containing <",
+"input":"<a<b>",
+"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
+
+{"description":"Non-void element containing trailing /",
+"input":"<h/>",
+"output":["ParseError", ["StartTag", "h", { }]]},
+
+{"description":"Void element with permitted slash",
+"input":"<br/>",
+"output":[["StartTag", "br", { }]]},
+
+{"description":"StartTag containing /",
+"input":"<h/a='b'>",
+"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
+
+{"description":"Double-quoted attribute value",
+"input":"<h a=\"b\">",
+"output":[["StartTag", "h", { "a":"b" }]]},
+
+{"description":"Unescaped </",
+"input":"</",
+"output":["ParseError", ["Character", "</"]]},
+
+{"description":"Illegal end tag name",
+"input":"</1>",
+"output":["ParseError", ["Comment", "1"]]},
+
+{"description":"Simili processing instruction",
+"input":"<?namespace>",
+"output":["ParseError", ["Comment", "?namespace"]]},
+
+{"description":"A bogus comment stops at >, even if preceeded by two dashes",
+"input":"<?foo-->",
+"output":["ParseError", ["Comment", "?foo--"]]},
+
+{"description":"Unescaped <",
+"input":"foo < bar",
+"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
+
+/* jmb -- libjson uses C strings internally, thus the input gets truncated before the
+ * data is fed to the input stream (and thus the tokeniser)
+{"description":"Null Byte Replacement",
+"input":"\u0000",
+"output":[["Character", "\ufffd"]]}
+*/
+
+]}
+
+
diff --git a/test/dict.c b/test/dict.c
new file mode 100644
index 0000000..0c2e93e
--- /dev/null
+++ b/test/dict.c
@@ -0,0 +1,53 @@
+#include "utils/dict.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_dict *dict;
+ const void *result;
+ void *context = NULL;
+
+ UNUSED(argc);
+ UNUSED(argv);
+
+ dict = hubbub_dict_create(myrealloc, NULL);
+ assert(dict != NULL);
+
+ assert(hubbub_dict_insert(dict, "Hello", (const void *) 123) ==
+ HUBBUB_OK);
+ assert(hubbub_dict_insert(dict, "Hello1", (const void *) 456) ==
+ HUBBUB_OK);
+
+ assert(hubbub_dict_search_step(dict, 'H', &result, &context) ==
+ HUBBUB_NEEDDATA);
+ assert(hubbub_dict_search_step(dict, 'e', &result, &context) ==
+ HUBBUB_NEEDDATA);
+ assert(hubbub_dict_search_step(dict, 'l', &result, &context) ==
+ HUBBUB_NEEDDATA);
+ assert(hubbub_dict_search_step(dict, 'l', &result, &context) ==
+ HUBBUB_NEEDDATA);
+ assert(hubbub_dict_search_step(dict, 'o', &result, &context) ==
+ HUBBUB_OK);
+ assert(result == (const void *) 123);
+ assert(hubbub_dict_search_step(dict, '1', &result, &context) ==
+ HUBBUB_OK);
+ assert(result == (const void *) 456);
+ assert(hubbub_dict_search_step(dict, '\0', &result, &context) ==
+ HUBBUB_OK);
+ assert(hubbub_dict_search_step(dict, 'x', &result, &context) ==
+ HUBBUB_INVALID);
+
+ hubbub_dict_destroy(dict);
+
+ printf("PASS\n");
+
+ return 0;
+}
diff --git a/test/entities.c b/test/entities.c
new file mode 100644
index 0000000..e99e6b0
--- /dev/null
+++ b/test/entities.c
@@ -0,0 +1,42 @@
+#include "tokeniser/entities.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t result;
+ void *context = NULL;
+
+ UNUSED(argc);
+ UNUSED(argv);
+
+ assert(hubbub_entities_create(myrealloc, NULL) == HUBBUB_OK);
+
+ assert(hubbub_entities_search_step('o', &result, &context) ==
+ HUBBUB_NEEDDATA);
+
+ assert(hubbub_entities_search_step('r', &result, &context) ==
+ HUBBUB_OK);
+
+ assert(hubbub_entities_search_step('d', &result, &context) ==
+ HUBBUB_NEEDDATA);
+
+ assert(hubbub_entities_search_step('f', &result, &context) ==
+ HUBBUB_OK);
+
+ assert(hubbub_entities_search_step('z', &result, &context) ==
+ HUBBUB_INVALID);
+
+ hubbub_entities_destroy(myrealloc, NULL);
+
+ printf("PASS\n");
+
+ return 0;
+}
diff --git a/test/filter.c b/test/filter.c
new file mode 100644
index 0000000..83cce20
--- /dev/null
+++ b/test/filter.c
@@ -0,0 +1,355 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <hubbub/hubbub.h>
+
+#include "utils/utils.h"
+
+#include "input/filter.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_filter_optparams params;
+ hubbub_filter *input;
+ uint8_t inbuf[64], outbuf[64];
+ size_t inlen, outlen;
+ const uint8_t *in = inbuf;
+ uint8_t *out = outbuf;
+
+ if (argc != 2) {
+ printf("Usage: %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ /* Initialise library */
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ /* Create input filter */
+ input = hubbub_filter_create("UTF-8", myrealloc, NULL);
+ assert(input);
+
+ /* Convert filter to UTF-8 encoding */
+ params.encoding.name = "UTF-8";
+ assert(hubbub_filter_setopt(input, HUBBUB_FILTER_SET_ENCODING,
+ (hubbub_filter_optparams *) &params) == HUBBUB_OK);
+
+
+ /* Simple case - valid input & output buffer large enough */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\xc2\xa0o!");
+ inlen = strlen((const char *) inbuf);
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xc2\xa0o!",
+ SLEN("hell\xc2\xa0o!")) == 0);
+
+
+ /* Too small an output buffer; no encoding edge cases */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hello!");
+ inlen = strlen((const char *) inbuf);
+ outbuf[0] = '\0';
+ outlen = 5;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_NOMEM);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ outlen = 64 - 5 + outlen;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hello!",
+ SLEN("hello!")) == 0);
+
+
+ /* Illegal input sequence; output buffer large enough */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\x96o!");
+ inlen = strlen((const char *) inbuf);
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ /* Input does loose decoding, converting to U+FFFD if illegal
+ * input is encountered */
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xef\xbf\xbdo!",
+ SLEN("hell\xef\xbf\xbdo!")) == 0);
+
+
+ /* Input ends mid-sequence */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\xc2\xa0o!");
+ inlen = strlen((const char *) inbuf) - 3;
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 3;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xc2\xa0o!",
+ SLEN("hell\xc2\xa0o!")) == 0);
+
+
+ /* Input ends mid-sequence, but second attempt has too small a
+ * buffer, but large enough to write out the incomplete character. */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\xc2\xa0o!");
+ inlen = strlen((const char *) inbuf) - 3;
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 3;
+ outlen = 3;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_NOMEM);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ outlen = 64 - 7;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xc2\xa0o!",
+ SLEN("hell\xc2\xa0o!")) == 0);
+
+
+ /* Input ends mid-sequence, but second attempt has too small a
+ * buffer, not large enough to write out the incomplete character. */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\xc2\xa0o!");
+ inlen = strlen((const char *) inbuf) - 3;
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 3;
+ outlen = 1;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_NOMEM);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ outlen = 60;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xc2\xa0o!",
+ SLEN("hell\xc2\xa0o!")) == 0);
+
+
+ /* Input ends mid-sequence, but second attempt contains
+ * invalid character */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\xc2\xc2o!");
+ inlen = strlen((const char *) inbuf) - 3;
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 3;
+
+ /* Input does loose decoding, converting to U+FFFD if illegal
+ * input is encountered */
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xef\xbf\xbdo!",
+ SLEN("hell\xef\xbf\xbdo!")) == 0);
+
+
+ /* Input ends mid-sequence, but second attempt contains another
+ * incomplete character */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\xc2\xa0\xc2\xa1o!");
+ inlen = strlen((const char *) inbuf) - 5;
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 2;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 3;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xc2\xa0\xc2\xa1o!",
+ SLEN("hell\xc2\xa0\xc2\xa1o!")) == 0);
+
+
+ /* Input ends mid-sequence, but second attempt contains insufficient
+ * data to complete the incomplete character */
+ in = inbuf;
+ out = outbuf;
+ strcpy((char *) inbuf, "hell\xe2\x80\xa2o!");
+ inlen = strlen((const char *) inbuf) - 4;
+ outbuf[0] = '\0';
+ outlen = 64;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 1;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ inlen = 3;
+
+ assert(hubbub_filter_process_chunk(input, &in, &inlen,
+ &out, &outlen) == HUBBUB_OK);
+
+ printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
+ (int) (out - ((uint8_t *) outbuf)),
+ outbuf, (int) outlen);
+
+ assert(hubbub_filter_reset(input) == HUBBUB_OK);
+
+ assert(memcmp(outbuf, "hell\xe2\x80\xa2o!",
+ SLEN("hell\xe2\x80\xa2o!")) == 0);
+
+
+ /* Clean up */
+ hubbub_filter_destroy(input);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
diff --git a/test/hubbub.c b/test/hubbub.c
new file mode 100644
index 0000000..ed61bb3
--- /dev/null
+++ b/test/hubbub.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <hubbub/hubbub.h>
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 2) {
+ printf("Usage: %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ assert (hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
diff --git a/test/inputstream.c b/test/inputstream.c
new file mode 100644
index 0000000..3a83419
--- /dev/null
+++ b/test/inputstream.c
@@ -0,0 +1,126 @@
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <hubbub/hubbub.h>
+
+#include "utils/utils.h"
+
+#include "input/inputstream.h"
+
+#include "testutils.h"
+
+static void buffer_moved_handler(const uint8_t *buffer, size_t len,
+ void *pw);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_inputstream *stream;
+ FILE *fp;
+ size_t len, origlen;
+#define CHUNK_SIZE (4096)
+ uint8_t buf[CHUNK_SIZE];
+ uint8_t *isb;
+ size_t isblen;
+ uint32_t c;
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ /* Initialise library */
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL);
+ assert(stream != NULL);
+
+ assert(hubbub_inputstream_register_movehandler(stream,
+ buffer_moved_handler, NULL) == HUBBUB_OK);
+
+ fp = fopen(argv[2], "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", argv[2]);
+ return 1;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ origlen = len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ while (len >= CHUNK_SIZE) {
+ fread(buf, 1, CHUNK_SIZE, fp);
+
+ assert(hubbub_inputstream_append(stream,
+ buf, CHUNK_SIZE) == HUBBUB_OK);
+
+ len -= CHUNK_SIZE;
+
+ while ((c = hubbub_inputstream_peek(stream)) !=
+ HUBBUB_INPUTSTREAM_OOD) {
+ size_t len;
+ hubbub_inputstream_cur_pos(stream, &len);
+ hubbub_inputstream_advance(stream);
+ assert(hubbub_inputstream_push_back(stream, c) ==
+ HUBBUB_OK);
+ hubbub_inputstream_advance(stream);
+ }
+ }
+
+ if (len > 0) {
+ fread(buf, 1, len, fp);
+
+ assert(hubbub_inputstream_append(stream,
+ buf, len) == HUBBUB_OK);
+
+ len = 0;
+ }
+
+ fclose(fp);
+
+ assert(hubbub_inputstream_insert(stream,
+ (const uint8_t *) "hello!!!",
+ SLEN("hello!!!")) == HUBBUB_OK);
+
+ assert(hubbub_inputstream_append(stream, NULL, 0) == HUBBUB_OK);
+
+ while (hubbub_inputstream_peek(stream) !=
+ HUBBUB_INPUTSTREAM_EOF) {
+ size_t len;
+ hubbub_inputstream_cur_pos(stream, &len);
+ hubbub_inputstream_advance(stream);
+ }
+
+ assert(hubbub_inputstream_claim_buffer(stream, &isb, &isblen) ==
+ HUBBUB_OK);
+
+ printf("Input size: %zu, Output size: %zu\n", origlen, isblen);
+ printf("Buffer at %p\n", isb);
+
+ free(isb);
+
+ assert(hubbub_inputstream_deregister_movehandler(stream,
+ buffer_moved_handler, NULL) == HUBBUB_OK);
+
+ hubbub_inputstream_destroy(stream);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
+void buffer_moved_handler(const uint8_t *buffer, size_t len,
+ void *pw)
+{
+ UNUSED(pw);
+
+ printf("Buffer moved to: %p (%zu)\n", buffer, len);
+}
diff --git a/test/parser.c b/test/parser.c
new file mode 100644
index 0000000..fe2659d
--- /dev/null
+++ b/test/parser.c
@@ -0,0 +1,175 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <hubbub/hubbub.h>
+
+#include <hubbub/parser.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+static const uint8_t *pbuffer;
+
+static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
+static void token_handler(const hubbub_token *token, void *pw);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_parser *parser;
+ hubbub_parser_optparams params;
+ FILE *fp;
+ size_t len, origlen;
+#define CHUNK_SIZE (4096)
+ uint8_t buf[CHUNK_SIZE];
+ const char *charset;
+ hubbub_charset_source cssource;
+ uint8_t *buffer;
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ /* Initialise library */
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ parser = hubbub_parser_create("UTF-8", "UTF-8", myrealloc, NULL);
+ assert(parser != NULL);
+
+ params.buffer_handler.handler = buffer_handler;
+ params.buffer_handler.pw = NULL;
+ assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER,
+ &params) == HUBBUB_OK);
+
+ params.token_handler.handler = token_handler;
+ params.token_handler.pw = NULL;
+ assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER,
+ &params) == HUBBUB_OK);
+
+ fp = fopen(argv[2], "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", argv[2]);
+ return 1;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ origlen = len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ while (len >= CHUNK_SIZE) {
+ fread(buf, 1, CHUNK_SIZE, fp);
+
+ assert(hubbub_parser_parse_chunk(parser,
+ buf, CHUNK_SIZE) == HUBBUB_OK);
+
+ len -= CHUNK_SIZE;
+ }
+
+ if (len > 0) {
+ fread(buf, 1, len, fp);
+
+ assert(hubbub_parser_parse_chunk(parser,
+ buf, len) == HUBBUB_OK);
+
+ len = 0;
+
+ assert(hubbub_parser_completed(parser) == HUBBUB_OK);
+ }
+
+ fclose(fp);
+
+ charset = hubbub_parser_read_charset(parser, &cssource);
+
+ printf("Charset: %s (from %d)\n", charset, cssource);
+
+ assert(hubbub_parser_claim_buffer(parser, &buffer, &len) ==
+ HUBBUB_OK);
+
+ free(buffer);
+
+ hubbub_parser_destroy(parser);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
+void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
+{
+ UNUSED(len);
+ UNUSED(pw);
+
+ pbuffer = buffer;
+}
+
+void token_handler(const hubbub_token *token, void *pw)
+{
+ static const char *token_names[] = {
+ "DOCTYPE", "START TAG", "END TAG",
+ "COMMENT", "CHARACTERS", "EOF"
+ };
+ size_t i;
+
+ UNUSED(pw);
+
+ printf("%s: ", token_names[token->type]);
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_DOCTYPE:
+ printf("'%.*s' (%svalid)\n",
+ (int) token->data.doctype.name.len,
+ pbuffer + token->data.doctype.name.data_off,
+ token->data.doctype.correct ? "" : "in");
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ printf("'%.*s' %s\n",
+ (int) token->data.tag.name.len,
+ pbuffer + token->data.tag.name.data_off,
+ (token->data.tag.n_attributes > 0) ?
+ "attributes:" : "");
+ for (i = 0; i < token->data.tag.n_attributes; i++) {
+ printf("\t'%.*s' = '%.*s'\n",
+ (int) token->data.tag.attributes[i].name.len,
+ pbuffer + token->data.tag.attributes[i].name.data_off,
+ (int) token->data.tag.attributes[i].value.len,
+ pbuffer + token->data.tag.attributes[i].value.data_off);
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ printf("'%.*s' %s\n",
+ (int) token->data.tag.name.len,
+ pbuffer + token->data.tag.name.data_off,
+ (token->data.tag.n_attributes > 0) ?
+ "attributes:" : "");
+ for (i = 0; i < token->data.tag.n_attributes; i++) {
+ printf("\t'%.*s' = '%.*s'\n",
+ (int) token->data.tag.attributes[i].name.len,
+ pbuffer + token->data.tag.attributes[i].name.data_off,
+ (int) token->data.tag.attributes[i].value.len,
+ pbuffer + token->data.tag.attributes[i].value.data_off);
+ }
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ printf("'%.*s'\n", (int) token->data.comment.len,
+ pbuffer + token->data.comment.data_off);
+ break;
+ case HUBBUB_TOKEN_CHARACTER:
+ printf("'%.*s'\n", (int) token->data.character.len,
+ pbuffer + token->data.character.data_off);
+ break;
+ case HUBBUB_TOKEN_EOF:
+ printf("\n");
+ break;
+ }
+}
diff --git a/test/regression/cscodec-segv.c b/test/regression/cscodec-segv.c
new file mode 100644
index 0000000..ad9894a
--- /dev/null
+++ b/test/regression/cscodec-segv.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+
+#include <hubbub/hubbub.h>
+
+#include "charset/codec.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_charsetcodec *codec;
+
+ if (argc != 2) {
+ printf("Usage: %s <aliases_file>\n", argv[0]);
+ return 1;
+ }
+
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ codec = hubbub_charsetcodec_create("ISO-8859-1", myrealloc, NULL);
+ assert(codec != NULL);
+
+ hubbub_charsetcodec_destroy(codec);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
diff --git a/test/regression/filter-segv.c b/test/regression/filter-segv.c
new file mode 100644
index 0000000..950df61
--- /dev/null
+++ b/test/regression/filter-segv.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <hubbub/hubbub.h>
+
+#include "input/filter.h"
+
+#include "testutils.h"
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_filter *input;
+
+ if (argc != 2) {
+ printf("Usage: %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ input = hubbub_filter_create("UTF-8", myrealloc, NULL);
+ assert(input);
+
+ hubbub_filter_destroy(input);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
diff --git a/test/testrunner.pl b/test/testrunner.pl
new file mode 100644
index 0000000..00c54e7
--- /dev/null
+++ b/test/testrunner.pl
@@ -0,0 +1,147 @@
+#!/bin/perl
+#
+# Testcase runner for libhubbub
+#
+# Usage: testrunner <executable extension>
+#
+# Operates upon INDEX files described in the README.
+# Locates and executes testcases, feeding data files to programs
+# as appropriate.
+# Logs testcase output to file.
+# Aborts test sequence on detection of error.
+#
+
+use warnings;
+use strict;
+use File::Spec;
+use IPC::Open3;
+
+# Get EXE extension (if any)
+my $exeext = "";
+$exeext = shift @ARGV if (@ARGV > 0);
+
+# Open log file and /dev/null
+open(LOG, ">log") or die "Failed opening test log";
+open(NULL, "+<", File::Spec->devnull) or die "Failed opening /dev/null";
+
+# Open testcase index
+open(TINDEX, "<INDEX") or die "Failed opening test INDEX";
+
+# Parse testcase index, looking for testcases
+while (my $line = <TINDEX>) {
+ next if ($line =~ /^(#.*)?$/);
+
+ # Found one; decompose
+ (my $test, my $desc, my $data) = split /\t+/, $line;
+
+ # Strip whitespace
+ $test =~ s/^\s+|\s+$//g;
+ $desc =~ s/^\s+|\s+$//g;
+ $data =~ s/^\s+|\s+$//g if ($data);
+
+ # Append EXE extension to binary name
+ $test = $test . $exeext;
+
+ print "Test: $desc\n";
+
+ my $pid;
+
+ if ($data) {
+ # Testcase has external data files
+
+ # Open datafile index
+ open(DINDEX, "<./data/$data/INDEX") or
+ die "Failed opening ./data/$data/INDEX";
+
+ # Parse datafile index, looking for datafiles
+ while (my $dentry = <DINDEX>) {
+ next if ($dentry =~ /^(#.*)?$/);
+
+ # Found one; decompose
+ (my $dtest, my $ddesc) = split /\t+/, $dentry;
+
+ # Strip whitespace
+ $dtest =~ s/^\s+|\s+$//g;
+ $ddesc =~ s/^\s+|\s+$//g;
+
+ print LOG "Running ./$test ./data/Aliases " .
+ "./data/$data/$dtest\n";
+
+ # Make message fit on an 80 column terminal
+ my $msg = " ==> $test [$data/$dtest]";
+ $msg = $msg . "." x (80 - length($msg) - 8);
+
+ print $msg;
+
+ # Run testcase
+ $pid = open3("&<NULL", \*OUT, ">&NULL",
+ "./$test", "./data/Aliases",
+ "./data/$data/$dtest");
+
+ my $last;
+
+ # Marshal testcase output to log file
+ while (my $output = <OUT>) {
+ print LOG " $output";
+ $last = $output;
+ }
+
+ # Wait for child to finish
+ waitpid($pid, 0);
+
+ print substr($last, 0, 4) . "\n";
+
+ # Bail, noisily, on failure
+ if (substr($last, 0, 4) eq "FAIL") {
+ print "\n\nFailure detected: " .
+ "consult log file\n\n\n";
+
+ exit(1);
+ }
+ }
+
+ close(DINDEX);
+ } else {
+ # Testcase has no external data files
+ print LOG "Running ./$test ./data/Aliases\n";
+
+ # Make message fit on an 80 column terminal
+ my $msg = " ==> $test";
+ $msg = $msg . "." x (80 - length($msg) - 8);
+
+ print $msg;
+
+ # Run testcase
+ $pid = open3("&<NULL", \*OUT, "&>NULL",
+ "./$test", "./data/Aliases");
+
+ my $last;
+
+ # Marshal testcase output to log file
+ while (my $output = <OUT>) {
+ print LOG " $output";
+ $last = $output;
+ }
+
+ # Wait for child to finish
+ waitpid($pid, 0);
+
+ print substr($last, 0, 4) . "\n";
+
+ # Bail, noisily, on failure
+ if (substr($last, 0, 4) eq "FAIL") {
+ print "\n\nFailure detected: " .
+ "consult log file\n\n\n";
+
+ exit(1);
+ }
+ }
+
+ print "\n";
+}
+
+# Clean up
+close(TINDEX);
+
+close(NULL);
+close(LOG);
diff --git a/test/testutils.h b/test/testutils.h
new file mode 100644
index 0000000..68657f8
--- /dev/null
+++ b/test/testutils.h
@@ -0,0 +1,123 @@
+#ifndef hubbub_test_testutils_h_
+#define hubbub_test_testutils_h_
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef UNUSED
+#define UNUSED(x) ((x) = (x))
+#endif
+
+/* Redefine assert, so we can simply use the standard assert mechanism
+ * within testcases and exit with the right output for the testrunner
+ * to do the right thing. */
+void __assert2(const char *expr, const char *function,
+ const char *file, int line);
+
+void __assert2(const char *expr, const char *function,
+ const char *file, int line)
+{
+ UNUSED(function);
+ UNUSED(file);
+
+ printf("FAIL - %s at line %d\n", expr, line);
+
+ exit(EXIT_FAILURE);
+}
+
+#define assert(expr) \
+ ((void) ((expr) || (__assert2 (#expr, __func__, __FILE__, __LINE__), 0)))
+
+
+typedef bool (*line_func)(const char *data, size_t datalen, void *pw);
+
+static size_t parse_strlen(const char *str, size_t limit);
+bool parse_testfile(const char *filename, line_func callback, void *pw);
+size_t parse_filesize(const char *filename);
+
+/**
+ * Testcase datafile parser driver
+ *
+ * \param filename Name of file to parse
+ * \param callback Pointer to function to handle each line of input data
+ * \param pw Pointer to client-specific private data
+ * \return true on success, false otherwise.
+ */
+bool parse_testfile(const char *filename, line_func callback, void *pw)
+{
+ FILE *fp;
+ char buf[300];
+
+ fp = fopen(filename, "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", filename);
+ return false;
+ }
+
+ while (fgets(buf, sizeof buf, fp)) {
+ if (buf[0] == '\n')
+ continue;
+
+ if (!callback(buf, parse_strlen(buf, sizeof buf), pw)) {
+ fclose(fp);
+ return false;
+ }
+ }
+
+ fclose(fp);
+
+ return true;
+}
+
+/**
+ * Utility string length measurer; assumes strings are '\n' terminated
+ *
+ * \param str String to measure length of
+ * \param limit Upper bound on string length
+ * \return String length
+ */
+size_t parse_strlen(const char *str, size_t limit)
+{
+ size_t len = 0;
+
+ if (str == NULL)
+ return 0;
+
+ while (len < limit - 1 && *str != '\n') {
+ len++;
+ str++;
+ }
+
+ len++;
+
+ return len;
+}
+
+/**
+ * Read the size of a file
+ *
+ * \param filename Name of file to read size of
+ * \return File size (in bytes), or 0 on error
+ */
+size_t parse_filesize(const char *filename)
+{
+ FILE *fp;
+ size_t len = 0;
+
+ fp = fopen(filename, "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", filename);
+ return 0;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ len = ftell(fp);
+
+ fclose(fp);
+
+ return len;
+}
+
+
+#endif
diff --git a/test/tokeniser.c b/test/tokeniser.c
new file mode 100644
index 0000000..271b986
--- /dev/null
+++ b/test/tokeniser.c
@@ -0,0 +1,174 @@
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <hubbub/hubbub.h>
+
+#include "utils/utils.h"
+
+#include "input/inputstream.h"
+#include "tokeniser/tokeniser.h"
+
+#include "testutils.h"
+
+static const uint8_t *pbuffer;
+
+static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
+static void token_handler(const hubbub_token *token, void *pw);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_inputstream *stream;
+ hubbub_tokeniser *tok;
+ hubbub_tokeniser_optparams params;
+ FILE *fp;
+ size_t len, origlen;
+#define CHUNK_SIZE (4096)
+ uint8_t buf[CHUNK_SIZE];
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ /* Initialise library */
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL);
+ assert(stream != NULL);
+
+ tok = hubbub_tokeniser_create(stream, myrealloc, NULL);
+ assert(tok != NULL);
+
+ params.buffer_handler.handler = buffer_handler;
+ params.buffer_handler.pw = NULL;
+ assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_BUFFER_HANDLER,
+ &params) == HUBBUB_OK);
+
+ params.token_handler.handler = token_handler;
+ params.token_handler.pw = NULL;
+ assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_TOKEN_HANDLER,
+ &params) == HUBBUB_OK);
+
+ fp = fopen(argv[2], "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", argv[2]);
+ return 1;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ origlen = len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ while (len >= CHUNK_SIZE) {
+ fread(buf, 1, CHUNK_SIZE, fp);
+
+ assert(hubbub_inputstream_append(stream,
+ buf, CHUNK_SIZE) == HUBBUB_OK);
+
+ len -= CHUNK_SIZE;
+
+ assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
+ }
+
+ if (len > 0) {
+ fread(buf, 1, len, fp);
+
+ assert(hubbub_inputstream_append(stream,
+ buf, len) == HUBBUB_OK);
+
+ len = 0;
+
+ assert(hubbub_inputstream_append(stream, NULL, 0) ==
+ HUBBUB_OK);
+
+ assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
+ }
+
+ fclose(fp);
+
+ hubbub_tokeniser_destroy(tok);
+
+ hubbub_inputstream_destroy(stream);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
+void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
+{
+ UNUSED(len);
+ UNUSED(pw);
+
+ pbuffer = buffer;
+}
+
+void token_handler(const hubbub_token *token, void *pw)
+{
+ static const char *token_names[] = {
+ "DOCTYPE", "START TAG", "END TAG",
+ "COMMENT", "CHARACTERS", "EOF"
+ };
+ size_t i;
+
+ UNUSED(pw);
+
+ printf("%s: ", token_names[token->type]);
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_DOCTYPE:
+ printf("'%.*s' (%svalid)\n",
+ (int) token->data.doctype.name.len,
+ pbuffer + token->data.doctype.name.data_off,
+ token->data.doctype.correct ? "" : "in");
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ printf("'%.*s' %s\n",
+ (int) token->data.tag.name.len,
+ pbuffer + token->data.tag.name.data_off,
+ (token->data.tag.n_attributes > 0) ?
+ "attributes:" : "");
+ for (i = 0; i < token->data.tag.n_attributes; i++) {
+ printf("\t'%.*s' = '%.*s'\n",
+ (int) token->data.tag.attributes[i].name.len,
+ pbuffer + token->data.tag.attributes[i].name.data_off,
+ (int) token->data.tag.attributes[i].value.len,
+ pbuffer + token->data.tag.attributes[i].value.data_off);
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ printf("'%.*s' %s\n",
+ (int) token->data.tag.name.len,
+ pbuffer + token->data.tag.name.data_off,
+ (token->data.tag.n_attributes > 0) ?
+ "attributes:" : "");
+ for (i = 0; i < token->data.tag.n_attributes; i++) {
+ printf("\t'%.*s' = '%.*s'\n",
+ (int) token->data.tag.attributes[i].name.len,
+ pbuffer + token->data.tag.attributes[i].name.data_off,
+ (int) token->data.tag.attributes[i].value.len,
+ pbuffer + token->data.tag.attributes[i].value.data_off);
+ }
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ printf("'%.*s'\n", (int) token->data.comment.len,
+ pbuffer + token->data.comment.data_off);
+ break;
+ case HUBBUB_TOKEN_CHARACTER:
+ printf("'%.*s'\n", (int) token->data.character.len,
+ pbuffer + token->data.character.data_off);
+ break;
+ case HUBBUB_TOKEN_EOF:
+ printf("\n");
+ break;
+ }
+}
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
new file mode 100644
index 0000000..06340fb
--- /dev/null
+++ b/test/tokeniser2.c
@@ -0,0 +1,418 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <json.h>
+
+#include <hubbub/hubbub.h>
+
+#include "utils/utils.h"
+
+#include "input/inputstream.h"
+#include "tokeniser/tokeniser.h"
+
+#include "testutils.h"
+
+typedef struct context {
+ const uint8_t *pbuffer;
+
+ const uint8_t *input;
+ size_t input_len;
+
+ struct array_list *output;
+ int output_index;
+ size_t char_off;
+
+ const char *last_start_tag;
+ struct array_list *content_model;
+} context;
+
+static void run_test(context *ctx);
+static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
+static void token_handler(const hubbub_token *token, void *pw);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ struct json_object *json;
+ struct array_list *tests;
+ struct lh_entry *entry;
+ char *key;
+ struct json_object *val;
+ int i;
+ context ctx;
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ /* Initialise library */
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ json = json_object_from_file(argv[2]);
+ assert(!is_error(json));
+
+ assert(strcmp((char *) ((json_object_get_object(json)->head)->k),
+ "tests") == 0);
+
+ /* Get array of tests */
+ tests = json_object_get_array((struct json_object *)
+ (json_object_get_object(json)->head)->v);
+
+ for (i = 0; i < array_list_length(tests); i++) {
+ /* Get test */
+ struct json_object *test =
+ (struct json_object *) array_list_get_idx(tests, i);
+
+ ctx.last_start_tag = NULL;
+ ctx.content_model = NULL;
+
+ /* Extract settings */
+ for (entry = json_object_get_object(test)->head; entry;
+ entry = entry->next) {
+ key = (char *) entry->k;
+ val = (struct json_object *) entry->v;
+
+ if (strcmp(key, "description") == 0) {
+ printf("Test: %s\n",
+ json_object_get_string(val));
+ } else if (strcmp(key, "input") == 0) {
+ ctx.input = (const uint8_t *)
+ json_object_get_string(val);
+ ctx.input_len =
+ strlen((const char *) ctx.input);
+ } else if (strcmp(key, "output") == 0) {
+ ctx.output = json_object_get_array(val);
+ ctx.output_index = 0;
+ ctx.char_off = 0;
+ } else if (strcmp(key, "lastStartTag") == 0) {
+ ctx.last_start_tag = (const char *)
+ json_object_get_string(val);
+ } else if (strcmp(key, "contentModelFlags") == 0) {
+ ctx.content_model =
+ json_object_get_array(val);
+ }
+ }
+
+ /* And run the test */
+ run_test(&ctx);
+ }
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
+void run_test(context *ctx)
+{
+ hubbub_inputstream *stream;
+ hubbub_tokeniser *tok;
+ hubbub_tokeniser_optparams params;
+ int i, max_i;
+ struct array_list *outputsave = ctx->output;
+
+ if (ctx->content_model == NULL) {
+ max_i = 1;
+ } else {
+ max_i = array_list_length(ctx->content_model);
+ }
+
+ /* We test for each of the content models specified */
+ for (i = 0; i < max_i; i++) {
+ /* Reset expected output */
+ ctx->output = outputsave;
+ ctx->output_index = 0;
+ ctx->char_off = 0;
+
+ stream = hubbub_inputstream_create("UTF-8", "UTF-8",
+ myrealloc, NULL);
+ assert(stream != NULL);
+
+ tok = hubbub_tokeniser_create(stream, myrealloc, NULL);
+ assert(tok != NULL);
+
+ if (ctx->last_start_tag != NULL) {
+ /* Fake up a start tag, in PCDATA state */
+ uint8_t buf [strlen(ctx->last_start_tag) + 3];
+
+ snprintf((char *) buf, sizeof buf, "<%s>",
+ ctx->last_start_tag);
+
+ assert(hubbub_inputstream_append(stream,
+ buf, strlen(ctx->last_start_tag) + 2) ==
+ HUBBUB_OK);
+
+ assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
+ }
+
+ params.buffer_handler.handler = buffer_handler;
+ params.buffer_handler.pw = ctx;
+ assert(hubbub_tokeniser_setopt(tok,
+ HUBBUB_TOKENISER_BUFFER_HANDLER,
+ &params) == HUBBUB_OK);
+
+ params.token_handler.handler = token_handler;
+ params.token_handler.pw = ctx;
+ assert(hubbub_tokeniser_setopt(tok,
+ HUBBUB_TOKENISER_TOKEN_HANDLER,
+ &params) == HUBBUB_OK);
+
+ if (ctx->content_model == NULL) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_PCDATA;
+ } else {
+ char *cm = json_object_get_string(
+ (struct json_object *)
+ array_list_get_idx(ctx->content_model, i));
+
+ if (strcmp(cm, "PCDATA") == 0) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_PCDATA;
+ } else if (strcmp(cm, "RCDATA") == 0) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_RCDATA;
+ } else if (strcmp(cm, "CDATA") == 0) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_CDATA;
+ } else {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_PLAINTEXT;
+ }
+ }
+ assert(hubbub_tokeniser_setopt(tok,
+ HUBBUB_TOKENISER_CONTENT_MODEL,
+ &params) == HUBBUB_OK);
+
+ assert(hubbub_inputstream_append(stream,
+ ctx->input, ctx->input_len) == HUBBUB_OK);
+
+ assert(hubbub_inputstream_append(stream, NULL, 0) ==
+ HUBBUB_OK);
+
+ printf("Input: '%.*s'\n", (int) ctx->input_len,
+ (const char *) ctx->input);
+
+ assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
+
+ hubbub_tokeniser_destroy(tok);
+
+ hubbub_inputstream_destroy(stream);
+ }
+}
+
+void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
+{
+ context *ctx = (context *) pw;
+
+ UNUSED(len);
+
+ ctx->pbuffer = buffer;
+}
+
+void token_handler(const hubbub_token *token, void *pw)
+{
+ static const char *token_names[] = {
+ "DOCTYPE", "StartTag", "EndTag",
+ "Comment", "Character", "EOF"
+ };
+ size_t i;
+ context *ctx = (context *) pw;
+ struct json_object *obj;
+ struct array_list *items;
+
+ for (; ctx->output_index < array_list_length(ctx->output);
+ ctx->output_index++) {
+ /* Get object for index */
+ obj = (struct json_object *)
+ array_list_get_idx(ctx->output,
+ ctx->output_index);
+
+ /* If it's not a string, we've found the expected output */
+ if (json_object_get_type(obj) != json_type_string)
+ break;
+
+ /* Otherwise, it must be a parse error */
+ assert(strcmp(json_object_get_string(obj),
+ "ParseError") == 0);
+ }
+
+ /* If we've run off the end, this is an error -- the tokeniser has
+ * produced more tokens than expected. We allow for the generation
+ * of a terminating EOF token, however. */
+ assert(ctx->output_index < array_list_length(ctx->output) ||
+ token->type == HUBBUB_TOKEN_EOF);
+
+ /* Got a terminating EOF -- no error */
+ if (ctx->output_index >= array_list_length(ctx->output))
+ return;
+
+ /* Now increment the output index so we don't re-expect this token */
+ ctx->output_index++;
+
+ /* Expected output must be an array */
+ assert(json_object_get_type(obj) == json_type_array);
+
+ items = json_object_get_array(obj);
+
+ printf("%s: %s\n", token_names[token->type],
+ json_object_get_string((struct json_object *)
+ array_list_get_idx(items, 0)));
+
+ /* Make sure we got the token we expected */
+ assert(strcmp(token_names[token->type],
+ json_object_get_string((struct json_object *)
+ array_list_get_idx(items, 0))) == 0);
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_DOCTYPE:
+ {
+ char *expname = json_object_get_string((struct json_object *)
+ array_list_get_idx(items, 1));
+ bool expvalid = json_object_get_boolean((struct json_object *)
+ array_list_get_idx(items, 2));
+ char *gotname = (char *) (ctx->pbuffer +
+ token->data.doctype.name.data_off);
+
+ printf("'%.*s' (%svalid)\n",
+ (int) token->data.doctype.name.len,
+ gotname,
+ token->data.doctype.correct ? "" : "in");
+
+ assert(token->data.doctype.name.len == strlen(expname));
+ assert(strncmp(gotname, expname, strlen(expname)) == 0);
+ /* For some reason, html5lib's doctype validity indicator
+ * is inverted */
+ assert(expvalid == !token->data.doctype.correct);
+ }
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ {
+ char *expname = json_object_get_string((struct json_object *)
+ array_list_get_idx(items, 1));
+ struct lh_entry *expattrs = json_object_get_object(
+ (struct json_object *)
+ array_list_get_idx(items, 2))->head;
+ char *tagname = (char *) (ctx->pbuffer +
+ token->data.tag.name.data_off);
+
+ printf("'%.*s' %s\n",
+ (int) token->data.tag.name.len,
+ tagname,
+ (token->data.tag.n_attributes > 0) ?
+ "attributes:" : "");
+
+ assert(token->data.tag.name.len == strlen(expname));
+ assert(strncmp(tagname, expname, strlen(expname)) == 0);
+
+ for (i = 0; i < token->data.tag.n_attributes; i++) {
+ char *expname = (char *) expattrs->k;
+ char *expval = json_object_get_string(
+ (struct json_object *) expattrs->v);
+ char *gotname = (char *) (ctx->pbuffer +
+ token->data.tag.attributes[i].name.data_off);
+ size_t namelen =
+ token->data.tag.attributes[i].name.len;
+ char *gotval = (char *) (ctx->pbuffer +
+ token->data.tag.attributes[i].value.data_off);
+ size_t vallen =
+ token->data.tag.attributes[i].value.len;
+
+ printf("\t'%.*s' = '%.*s'\n",
+ (int) namelen, gotname,
+ (int) vallen, gotval);
+
+ assert(namelen == strlen(expname));
+ assert(strncmp(gotname, expname,
+ strlen(expname)) == 0);
+ assert(vallen == strlen(expval));
+ assert(strncmp(gotval, expval, strlen(expval)) == 0);
+
+ expattrs = expattrs->next;
+ }
+
+ assert(expattrs == NULL);
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ {
+ char *expname = json_object_get_string((struct json_object *)
+ array_list_get_idx(items, 1));
+ char *tagname = (char *) (ctx->pbuffer +
+ token->data.tag.name.data_off);
+
+ printf("'%.*s' %s\n",
+ (int) token->data.tag.name.len,
+ tagname,
+ (token->data.tag.n_attributes > 0) ?
+ "attributes:" : "");
+
+ assert(token->data.tag.name.len == strlen(expname));
+ assert(strncmp(tagname, expname, strlen(expname)) == 0);
+ }
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ {
+ char *expstr = json_object_get_string((struct json_object *)
+ array_list_get_idx(items, 1));
+ char *gotstr = (char *) (ctx->pbuffer +
+ token->data.comment.data_off);
+
+ printf("'%.*s'\n", (int) token->data.comment.len, gotstr);
+
+ assert(token->data.comment.len == strlen(expstr));
+ assert(strncmp(gotstr, expstr, strlen(expstr)) == 0);
+ }
+ break;
+ case HUBBUB_TOKEN_CHARACTER:
+ {
+ char *expstr = json_object_get_string((struct json_object *)
+ array_list_get_idx(items, 1));
+ char *gotstr = (char *) (ctx->pbuffer +
+ token->data.character.data_off);
+ size_t len = min(token->data.character.len,
+ strlen(expstr + ctx->char_off));
+
+ printf("'%.*s'\n", (int) token->data.character.len, gotstr);
+
+ assert(strncmp(gotstr, expstr + ctx->char_off, len) == 0);
+
+ if (len < token->data.character.len) {
+ /* Expected token only contained part of the data
+ * Calculate how much is left, then try again with
+ * the next expected token */
+ hubbub_token t;
+
+ t.type = HUBBUB_TOKEN_CHARACTER;
+ t.data.character.data_off += len;
+ t.data.character.len -= len;
+
+ ctx->char_off = 0;
+
+ token_handler(&t, pw);
+ } else if (strlen(expstr + ctx->char_off) >
+ token->data.character.len) {
+ /* Tokeniser output only contained part of the data
+ * in the expected token; calculate the offset into
+ * the token and process the remainder next time */
+ ctx->char_off += len;
+ ctx->output_index--;
+ } else {
+ /* Exact match - clear offset */
+ ctx->char_off = 0;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_EOF:
+ printf("\n");
+ break;
+ }
+}