From d488d298e303cb2be18ff217330cd1574fcb384f Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 15 Jul 2012 17:40:08 +0100 Subject: XML binding: Not libxml only any more --- bindings/xml/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/xml/Makefile b/bindings/xml/Makefile index e75e362..ea7653c 100644 --- a/bindings/xml/Makefile +++ b/bindings/xml/Makefile @@ -26,7 +26,7 @@ ifeq ($(WITH_EXPAT_BINDING),yes) endif ifeq ($(DO_XML_INSTALL),yes) - DIR_INSTALL_ITEMS := /include/dom/bindings/libxml:xmlerror.h;xmlparser.h + DIR_INSTALL_ITEMS := /include/dom/bindings/xml:xmlerror.h;xmlparser.h endif include $(NSBUILD)/Makefile.subdir -- cgit v1.2.3 From a45056f4dd306e6ff06d2ec77b129b8e27add45e Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 15 Jul 2012 17:41:02 +0100 Subject: XML binding: Expat handler, dodgy external entity ref handler --- bindings/xml/expat_xmlparser.c | 143 +++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 48 deletions(-) diff --git a/bindings/xml/expat_xmlparser.c b/bindings/xml/expat_xmlparser.c index ce649d7..3f9c51c 100644 --- a/bindings/xml/expat_xmlparser.c +++ b/bindings/xml/expat_xmlparser.c @@ -42,9 +42,7 @@ expat_xmlparser_start_element_handler(void *_parser, dom_exception err; dom_element *elem, *ins_elem; dom_string *tag_name; - - fprintf(stderr, "<%s>\n", name); - + err = dom_string_create((const uint8_t *)name, strlen(name), &tag_name); @@ -53,7 +51,7 @@ expat_xmlparser_start_element_handler(void *_parser, "No memory for tag name"); return; } - + err = dom_document_create_element(parser->doc, tag_name, &elem); if (err != DOM_NO_ERR) { dom_string_unref(tag_name); @@ -61,15 +59,15 @@ expat_xmlparser_start_element_handler(void *_parser, "Failed to create element '%s'", name); return; } - + dom_string_unref(tag_name); - + /* Add attributes to the element */ - + while (*atts) { dom_string *key, *value; - err = dom_string_create((const uint8_t *)(*atts), + err = dom_string_create((const uint8_t *)(*atts), strlen(*atts), &key); if (err != DOM_NO_ERR) { parser->msg(DOM_MSG_CRITICAL, parser->mctx, @@ -88,7 +86,7 @@ expat_xmlparser_start_element_handler(void *_parser, return; } atts++; - + err = dom_element_set_attribute(elem, key, value); dom_string_unref(key); dom_string_unref(value); @@ -99,7 +97,7 @@ expat_xmlparser_start_element_handler(void *_parser, return; } } - + err = dom_node_append_child(parser->current, elem, &ins_elem); if (err != DOM_NO_ERR) { dom_node_unref(elem); @@ -107,9 +105,9 @@ expat_xmlparser_start_element_handler(void *_parser, "No memory for appending child node"); return; } - + dom_node_unref(ins_elem); - + dom_node_unref(parser->current); parser->current = (struct dom_node *)elem; /* Steal initial ref */ } @@ -121,19 +119,17 @@ expat_xmlparser_end_element_handler(void *_parser, dom_xml_parser *parser = _parser; dom_exception err; dom_node *parent; - + UNUSED(name); - - fprintf(stderr, "\n", name); - + err = dom_node_get_parent_node(parser->current, &parent); - + if (err != DOM_NO_ERR) { parser->msg(DOM_MSG_CRITICAL, parser->mctx, "Unable to find a parent while closing element."); return; } - + dom_node_unref(parser->current); parser->current = parent; /* Takes the ref given by get_parent_node */ } @@ -147,14 +143,14 @@ expat_xmlparser_cdata_handler(void *_parser, dom_string *data; dom_exception err; struct dom_cdata_section *cdata, *ins_cdata; - + err = dom_string_create((const uint8_t *)s, len, &data); if (err != DOM_NO_ERR) { parser->msg(DOM_MSG_CRITICAL, parser->mctx, "No memory for cdata section contents"); return; } - + err = dom_document_create_cdata_section(parser->doc, data, &cdata); if (err != DOM_NO_ERR) { dom_string_unref(data); @@ -162,7 +158,7 @@ expat_xmlparser_cdata_handler(void *_parser, "No memory for cdata section"); return; } - + /* No longer need data */ dom_string_unref(data); @@ -184,14 +180,62 @@ expat_xmlparser_cdata_handler(void *_parser, dom_node_unref((struct dom_node *) cdata); } +static int +expat_xmlparser_external_entity_ref_handler(XML_Parser parser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *system_id, + const XML_Char *public_id) +{ + FILE *fh; + XML_Parser subparser; + unsigned char data[1024]; + size_t len; + enum XML_Status status; + + UNUSED(base); + UNUSED(public_id); + + if (system_id == NULL) + return XML_STATUS_OK; + + fh = fopen(system_id, "r"); + + if (fh == NULL) + return XML_STATUS_OK; + + subparser = XML_ExternalEntityParserCreate(parser, + context, + NULL); + + if (subparser == NULL) { + fclose(fh); + return XML_STATUS_OK; + } + + /* Parse the file bit by bit */ + while ((len = fread(data, 1, 1024, fh)) > 0) { + status = XML_Parse(subparser, (const char *)data, len, 0); + if (status != XML_STATUS_OK) { + XML_ParserFree(subparser); + fclose(fh); + return XML_STATUS_OK; + } + } + XML_Parse(subparser, "", 0, 1); + XML_ParserFree(subparser); + fclose(fh); + return XML_STATUS_OK; +} + static void expat_xmlparser_unknown_data_handler(void *_parser, const XML_Char *s, int len) { UNUSED(_parser); - - fprintf(stderr, "!!! %.*s !!!\n", len, s); + UNUSED(s); + UNUSED(len); } /** * Create an XML parser instance @@ -210,29 +254,29 @@ dom_xml_parser_create(const char *enc, const char *int_enc, { dom_xml_parser *parser; dom_exception err; - + UNUSED(int_enc); - + parser = calloc(sizeof(*parser), 1); if (parser == NULL) { msg(DOM_MSG_CRITICAL, mctx, "No memory for parser"); return NULL; } - + parser->msg = msg; parser->mctx = mctx; - + parser->parser = XML_ParserCreateNS(enc, ':'); - + if (parser->parser == NULL) { free(parser); msg(DOM_MSG_CRITICAL, mctx, "No memory for parser"); return NULL; } - + parser->complete = false; parser->doc = NULL; - + err = dom_implementation_create_document( DOM_IMPLEMENTATION_XML, /* namespace */ NULL, @@ -240,32 +284,35 @@ dom_xml_parser_create(const char *enc, const char *int_enc, /* doctype */ NULL, NULL, &parser->doc); - + if (err != DOM_NO_ERR) { - parser->msg(DOM_MSG_CRITICAL, parser->mctx, + parser->msg(DOM_MSG_CRITICAL, parser->mctx, "Failed creating document"); XML_ParserFree(parser->parser); free(parser); return NULL; } - + XML_SetUserData(parser->parser, parser); - + XML_SetElementHandler(parser->parser, expat_xmlparser_start_element_handler, expat_xmlparser_end_element_handler); - + XML_SetCharacterDataHandler(parser->parser, expat_xmlparser_cdata_handler); - - XML_SetParamEntityParsing(parser->parser, + + XML_SetParamEntityParsing(parser->parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - - XML_SetDefaultHandler(parser->parser, + + XML_SetExternalEntityRefHandler(parser->parser, + expat_xmlparser_external_entity_ref_handler); + + XML_SetDefaultHandlerExpand(parser->parser, expat_xmlparser_unknown_data_handler); - + parser->current = dom_node_ref(parser->doc); - + return parser; } @@ -278,7 +325,7 @@ void dom_xml_parser_destroy(dom_xml_parser *parser) { XML_ParserFree(parser->parser); - + free(parser); } @@ -294,14 +341,14 @@ dom_xml_error dom_xml_parser_parse_chunk(dom_xml_parser *parser, uint8_t *data, size_t len) { enum XML_Status status; - + status = XML_Parse(parser->parser, (const char *)data, len, 0); if (status != XML_STATUS_OK) { parser->msg(DOM_MSG_ERROR, parser->mctx, "XML_Parse failed: %d", status); return DOM_XML_EXTERNAL_ERR | status; } - + return DOM_XML_OK; } @@ -313,20 +360,20 @@ dom_xml_parser_parse_chunk(dom_xml_parser *parser, uint8_t *data, size_t len) * * This will force any remaining data through the parser */ -dom_xml_error +dom_xml_error dom_xml_parser_completed(dom_xml_parser *parser) { enum XML_Status status; - + status = XML_Parse(parser->parser, "", 0, 1); if (status != XML_STATUS_OK) { parser->msg(DOM_MSG_ERROR, parser->mctx, "XML_Parse failed: %d", status); return DOM_XML_EXTERNAL_ERR | status; } - + parser->complete = true; - + return DOM_XML_OK; } -- cgit v1.2.3 From c86f1c6c5a5aab4a5e99bf2617ddf4f15dde20c5 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 15 Jul 2012 17:46:33 +0100 Subject: XML Binding: Add comment support to Expat handler --- bindings/xml/expat_xmlparser.c | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/bindings/xml/expat_xmlparser.c b/bindings/xml/expat_xmlparser.c index 3f9c51c..4c5cbef 100644 --- a/bindings/xml/expat_xmlparser.c +++ b/bindings/xml/expat_xmlparser.c @@ -228,6 +228,55 @@ expat_xmlparser_external_entity_ref_handler(XML_Parser parser, return XML_STATUS_OK; } +static void +expat_xmlparser_comment_handler(void *_parser, + const XML_Char *_comment) +{ + dom_xml_parser *parser = _parser; + struct dom_comment *comment, *ins_comment = NULL; + dom_string *data; + dom_exception err; + + /* Create DOM string data for comment */ + err = dom_string_create((const uint8_t *)_comment, + strlen((const char *) _comment), &data); + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for comment data"); + return; + } + + /* Create comment */ + err = dom_document_create_comment(parser->doc, data, &comment); + if (err != DOM_NO_ERR) { + dom_string_unref(data); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for comment node"); + return; + } + + /* No longer need data */ + dom_string_unref(data); + + /* Append comment to parent */ + err = dom_node_append_child(parser->current, (struct dom_node *) comment, + (struct dom_node **) (void *) &ins_comment); + if (err != DOM_NO_ERR) { + dom_node_unref((struct dom_node *) comment); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "Failed attaching comment node"); + return; + } + + /* We're not interested in the inserted comment */ + if (ins_comment != NULL) + dom_node_unref((struct dom_node *) ins_comment); + + /* No longer interested in comment */ + dom_node_unref((struct dom_node *) comment); + +} + static void expat_xmlparser_unknown_data_handler(void *_parser, const XML_Char *s, @@ -308,6 +357,9 @@ dom_xml_parser_create(const char *enc, const char *int_enc, XML_SetExternalEntityRefHandler(parser->parser, expat_xmlparser_external_entity_ref_handler); + XML_SetCommentHandler(parser->parser, + expat_xmlparser_comment_handler); + XML_SetDefaultHandlerExpand(parser->parser, expat_xmlparser_unknown_data_handler); -- cgit v1.2.3 From 575c52aad811bbfce12faac963256d4872c51598 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 15 Jul 2012 17:58:25 +0100 Subject: XML Binding: Add doctype support to Expat handler --- bindings/xml/expat_xmlparser.c | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/bindings/xml/expat_xmlparser.c b/bindings/xml/expat_xmlparser.c index 4c5cbef..42abdfe 100644 --- a/bindings/xml/expat_xmlparser.c +++ b/bindings/xml/expat_xmlparser.c @@ -277,6 +277,48 @@ expat_xmlparser_comment_handler(void *_parser, } +static void +expat_xmlparser_start_doctype_decl_handler(void *_parser, + const XML_Char *doctype_name, + const XML_Char *system_id, + const XML_Char *public_id, + int has_internal_subset) +{ + dom_xml_parser *parser = _parser; + struct dom_document_type *doctype, *ins_doctype = NULL; + dom_exception err; + + UNUSED(has_internal_subset); + + err = dom_implementation_create_document_type( + doctype_name, system_id ? system_id : "", + public_id ? public_id : "", + &doctype); + + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "Failed to create document type"); + return; + } + + /* Add doctype to document */ + err = dom_node_append_child(parser->doc, (struct dom_node *) doctype, + (struct dom_node **) (void *) &ins_doctype); + if (err != DOM_NO_ERR) { + dom_node_unref((struct dom_node *) doctype); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "Failed attaching doctype"); + return; + } + + /* Not interested in inserted node */ + if (ins_doctype != NULL) + dom_node_unref((struct dom_node *) ins_doctype); + + /* No longer interested in doctype */ + dom_node_unref((struct dom_node *) doctype); +} + static void expat_xmlparser_unknown_data_handler(void *_parser, const XML_Char *s, @@ -360,6 +402,9 @@ dom_xml_parser_create(const char *enc, const char *int_enc, XML_SetCommentHandler(parser->parser, expat_xmlparser_comment_handler); + XML_SetStartDoctypeDeclHandler(parser->parser, + expat_xmlparser_start_doctype_decl_handler); + XML_SetDefaultHandlerExpand(parser->parser, expat_xmlparser_unknown_data_handler); -- cgit v1.2.3 From e31173cd5d116a49d3c11ab732e639ca3cc3ecd1 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 15 Jul 2012 18:15:26 +0100 Subject: XML Binding: Allow expat binding to create text nodes if not parsing cdata --- bindings/xml/expat_xmlparser.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/bindings/xml/expat_xmlparser.c b/bindings/xml/expat_xmlparser.c index 42abdfe..7f4fa03 100644 --- a/bindings/xml/expat_xmlparser.c +++ b/bindings/xml/expat_xmlparser.c @@ -29,6 +29,7 @@ struct dom_xml_parser { bool complete; /**< Indicate stream completion */ struct dom_document *doc; /**< DOM Document we're building */ struct dom_node *current; /**< DOM node we're currently building */ + bool is_cdata; /**< If the character data is cdata or text */ }; /* Binding functions */ @@ -134,6 +135,22 @@ expat_xmlparser_end_element_handler(void *_parser, parser->current = parent; /* Takes the ref given by get_parent_node */ } +static void +expat_xmlparser_start_cdata_handler(void *_parser) +{ + dom_xml_parser *parser = _parser; + + parser->is_cdata = true; +} + +static void +expat_xmlparser_end_cdata_handler(void *_parser) +{ + dom_xml_parser *parser = _parser; + + parser->is_cdata = false; +} + static void expat_xmlparser_cdata_handler(void *_parser, const XML_Char *s, @@ -142,7 +159,7 @@ expat_xmlparser_cdata_handler(void *_parser, dom_xml_parser *parser = _parser; dom_string *data; dom_exception err; - struct dom_cdata_section *cdata, *ins_cdata; + struct dom_node *cdata, *ins_cdata; err = dom_string_create((const uint8_t *)s, len, &data); if (err != DOM_NO_ERR) { @@ -151,7 +168,11 @@ expat_xmlparser_cdata_handler(void *_parser, return; } - err = dom_document_create_cdata_section(parser->doc, data, &cdata); + err = parser->is_cdata ? + dom_document_create_cdata_section(parser->doc, data, + (dom_cdata_section **)&cdata) : + dom_document_create_text_node(parser->doc, data, + (dom_text **)&cdata); if (err != DOM_NO_ERR) { dom_string_unref(data); parser->msg(DOM_MSG_CRITICAL, parser->mctx, @@ -163,8 +184,7 @@ expat_xmlparser_cdata_handler(void *_parser, dom_string_unref(data); /* Append cdata section to parent */ - err = dom_node_append_child(parser->current, (struct dom_node *) cdata, - (struct dom_node **) (void *) &ins_cdata); + err = dom_node_append_child(parser->current, cdata, &ins_cdata); if (err != DOM_NO_ERR) { dom_node_unref((struct dom_node *) cdata); parser->msg(DOM_MSG_ERROR, parser->mctx, @@ -174,10 +194,10 @@ expat_xmlparser_cdata_handler(void *_parser, /* We're not interested in the inserted cdata section */ if (ins_cdata != NULL) - dom_node_unref((struct dom_node *) ins_cdata); + dom_node_unref(ins_cdata); /* No longer interested in cdata section */ - dom_node_unref((struct dom_node *) cdata); + dom_node_unref(cdata); } static int @@ -390,6 +410,10 @@ dom_xml_parser_create(const char *enc, const char *int_enc, expat_xmlparser_start_element_handler, expat_xmlparser_end_element_handler); + XML_SetCdataSectionHandler(parser->parser, + expat_xmlparser_start_cdata_handler, + expat_xmlparser_end_cdata_handler); + XML_SetCharacterDataHandler(parser->parser, expat_xmlparser_cdata_handler); @@ -410,6 +434,8 @@ dom_xml_parser_create(const char *enc, const char *int_enc, parser->current = dom_node_ref(parser->doc); + parser->is_cdata = false; + return parser; } -- cgit v1.2.3 From 2f4be485272446109e4451a19dc29ba6728f0039 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 15 Jul 2012 18:26:47 +0100 Subject: XML Binding: Allow expat handler to coalesce TEXT nodes. Core suite now passes with expat. --- bindings/xml/expat_xmlparser.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/bindings/xml/expat_xmlparser.c b/bindings/xml/expat_xmlparser.c index 7f4fa03..1077bc9 100644 --- a/bindings/xml/expat_xmlparser.c +++ b/bindings/xml/expat_xmlparser.c @@ -159,7 +159,8 @@ expat_xmlparser_cdata_handler(void *_parser, dom_xml_parser *parser = _parser; dom_string *data; dom_exception err; - struct dom_node *cdata, *ins_cdata; + struct dom_node *cdata, *ins_cdata, *lastchild = NULL; + dom_node_type ntype = 0; err = dom_string_create((const uint8_t *)s, len, &data); if (err != DOM_NO_ERR) { @@ -168,6 +169,39 @@ expat_xmlparser_cdata_handler(void *_parser, return; } + err = dom_node_get_last_child(parser->current, &lastchild); + + if (err == DOM_NO_ERR && lastchild != NULL) { + err = dom_node_get_node_type(lastchild, &ntype); + } + + if (err != DOM_NO_ERR) { + dom_string_unref(data); + if (lastchild != NULL) + dom_node_unref(lastchild); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for cdata section"); + return; + } + + if (ntype == DOM_TEXT_NODE && parser->is_cdata == false) { + /* We can append this text instead */ + err = dom_characterdata_append_data( + (dom_characterdata *)lastchild, data); + dom_string_unref(data); + if (lastchild != NULL) + dom_node_unref(lastchild); + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for cdata section"); + } + return; + } + + if (lastchild != NULL) + dom_node_unref(lastchild); + + /* We can't append directly, so make a new node */ err = parser->is_cdata ? dom_document_create_cdata_section(parser->doc, data, (dom_cdata_section **)&cdata) : -- cgit v1.2.3