From c4b49fb2240a425f0327b9720b6eb33ea55b3756 Mon Sep 17 00:00:00 2001 From: Michael Graeb Date: Tue, 11 Jul 2023 14:43:31 -0700 Subject: [PATCH 1/3] Simplify XML API --- include/aws/common/private/xml_parser_impl.h | 1 + include/aws/common/xml_parser.h | 54 ++-- source/xml_parser.c | 176 +++++-------- tests/xml_parser_test.c | 248 +++++++++---------- 4 files changed, 200 insertions(+), 279 deletions(-) diff --git a/include/aws/common/private/xml_parser_impl.h b/include/aws/common/private/xml_parser_impl.h index 8f022a4e0..dc731c462 100644 --- a/include/aws/common/private/xml_parser_impl.h +++ b/include/aws/common/private/xml_parser_impl.h @@ -9,6 +9,7 @@ #include struct aws_xml_node { + struct aws_xml_parser *parser; struct aws_byte_cursor name; struct aws_array_list attributes; struct aws_byte_cursor doc_at_body; diff --git a/include/aws/common/xml_parser.h b/include/aws/common/xml_parser.h index 542a72bec..78c2085f5 100644 --- a/include/aws/common/xml_parser.h +++ b/include/aws/common/xml_parser.h @@ -13,7 +13,6 @@ AWS_PUSH_SANE_WARNING_LEVEL -struct aws_xml_parser; struct aws_xml_node; struct aws_xml_attribute { @@ -21,17 +20,21 @@ struct aws_xml_attribute { struct aws_byte_cursor value; }; +/* TODO: remove stop_parsing support */ + /** * Callback for when an xml node is encountered in the document. As a user you have a few options: * - * 1. reject the document parsing at this point by returning false. This will immediately stop doc parsing. - * 2. call aws_xml_node_traverse() on the node to descend into the node with a new callback and user_data. - * 3. call aws_xml_node_as_body() to retrieve the contents of the node as text. + * 1. call aws_xml_node_traverse() on the node to descend into the node with a new callback and user_data. + * 2. call aws_xml_node_as_body() to retrieve the contents of the node as text. + * 3. return AWS_OP_ERR (after an error has been raised) to fail doc parsing. + * 4. set `*stop_parsing = true` and return AWS_OP_SUCCESS to immediately stop doc parsing without failing it. + * + * You MUST NOT call both aws_xml_node_traverse() and aws_xml_node_as_body() on the same node. * * return true to continue the parsing operation. */ -typedef bool( - aws_xml_parser_on_node_encountered_fn)(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data); +typedef int(aws_xml_parser_on_node_encountered_fn)(struct aws_xml_node *node, bool *stop_parsing, void *user_data); struct aws_xml_parser_options { /* xml document to parse. */ @@ -39,47 +42,35 @@ struct aws_xml_parser_options { /* Max node depth used for parsing document. */ size_t max_depth; -}; -AWS_EXTERN_C_BEGIN + /* Callback invoked on the root node */ + aws_xml_parser_on_node_encountered_fn *on_root_encountered; -/** - * Allocates an xml parser. - */ -AWS_COMMON_API -struct aws_xml_parser *aws_xml_parser_new( - struct aws_allocator *allocator, - const struct aws_xml_parser_options *options); + /* User data for callback */ + void *user_data; +}; -/* - * De-allocates an xml parser. - */ -AWS_COMMON_API -void aws_xml_parser_destroy(struct aws_xml_parser *parser); +AWS_EXTERN_C_BEGIN /** - * Parse the doc until the end or until a callback rejects the document. - * on_node_encountered will be invoked when the root node is encountered. + * Parse an XML document. + * WARNING: This is not a public API. It is only intended for use within the aws-c libraries. */ AWS_COMMON_API -int aws_xml_parser_parse( - struct aws_xml_parser *parser, - aws_xml_parser_on_node_encountered_fn *on_node_encountered, - void *user_data); +int aws_xml_parse(struct aws_allocator *allocator, const struct aws_xml_parser_options *options); /** * Writes the contents of the body of node into out_body. out_body is an output parameter in this case. Upon success, * out_body will contain the body of the node. */ AWS_COMMON_API -int aws_xml_node_as_body(struct aws_xml_parser *parser, struct aws_xml_node *node, struct aws_byte_cursor *out_body); +int aws_xml_node_as_body(struct aws_xml_node *node, struct aws_byte_cursor *out_body); /** * Traverse node and invoke on_node_encountered when a nested node is encountered. */ AWS_COMMON_API int aws_xml_node_traverse( - struct aws_xml_parser *parser, struct aws_xml_node *node, aws_xml_parser_on_node_encountered_fn *on_node_encountered, void *user_data); @@ -88,7 +79,7 @@ int aws_xml_node_traverse( * Get the name of an xml node. */ AWS_COMMON_API -int aws_xml_node_get_name(const struct aws_xml_node *node, struct aws_byte_cursor *out_name); +struct aws_byte_cursor aws_xml_node_get_name(const struct aws_xml_node *node); /* * Get the number of attributes for an xml node. @@ -100,10 +91,7 @@ size_t aws_xml_node_get_num_attributes(const struct aws_xml_node *node); * Get an attribute for an xml node by its index. */ AWS_COMMON_API -int aws_xml_node_get_attribute( - const struct aws_xml_node *node, - size_t attribute_index, - struct aws_xml_attribute *out_attribute); +struct aws_xml_attribute aws_xml_node_get_attribute(const struct aws_xml_node *node, size_t attribute_index); AWS_EXTERN_C_END AWS_POP_SANE_WARNING_LEVEL diff --git a/source/xml_parser.c b/source/xml_parser.c index ac238cdfa..966ab196e 100644 --- a/source/xml_parser.c +++ b/source/xml_parser.c @@ -21,45 +21,6 @@ struct cb_stack_data { void *user_data; }; -struct aws_xml_parser *aws_xml_parser_new( - struct aws_allocator *allocator, - const struct aws_xml_parser_options *options) { - - AWS_PRECONDITION(allocator); - AWS_PRECONDITION(options); - - struct aws_xml_parser *parser = aws_mem_calloc(allocator, 1, sizeof(struct aws_xml_parser)); - - if (parser == NULL) { - return NULL; - } - - parser->allocator = allocator; - parser->doc = options->doc; - - parser->max_depth = s_max_document_depth; - parser->error = AWS_OP_SUCCESS; - - if (options->max_depth) { - parser->max_depth = options->max_depth; - } - - if (aws_array_list_init_dynamic(&parser->callback_stack, allocator, 4, sizeof(struct cb_stack_data))) { - aws_mem_release(allocator, parser); - return NULL; - } - - return parser; -} - -void aws_xml_parser_destroy(struct aws_xml_parser *parser) { - AWS_PRECONDITION(parser); - - aws_array_list_clean_up(&parser->callback_stack); - - aws_mem_release(parser->allocator, parser); -} - int s_node_next_sibling(struct aws_xml_parser *parser); static bool s_double_quote_fn(uint8_t value) { @@ -134,43 +95,44 @@ static int s_load_node_decl( return AWS_OP_SUCCESS; } -int aws_xml_parser_parse( - struct aws_xml_parser *parser, - aws_xml_parser_on_node_encountered_fn *on_node_encountered, - void *user_data) { +int aws_xml_parse(struct aws_allocator *allocator, const struct aws_xml_parser_options *options) { - AWS_PRECONDITION(parser); - - if (on_node_encountered == NULL) { - AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'on_node_encountered' argument for aws_xml_parser_parse is invalid."); - aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); - return AWS_OP_ERR; - } - - aws_array_list_clear(&parser->callback_stack); + AWS_PRECONDITION(allocator); + AWS_PRECONDITION(options); + AWS_PRECONDITION(options->on_root_encountered); + + struct aws_xml_parser parser = { + .allocator = allocator, + .doc = options->doc, + .max_depth = options->max_depth ? options->max_depth : s_max_document_depth, + .error = AWS_OP_SUCCESS, + .stop_parsing = false, + }; + aws_array_list_init_dynamic(&parser.callback_stack, allocator, 4, sizeof(struct cb_stack_data)); /* burn everything that precedes the actual xml nodes. */ - while (parser->doc.len) { - const uint8_t *start = memchr(parser->doc.ptr, '<', parser->doc.len); + while (parser.doc.len) { + const uint8_t *start = memchr(parser.doc.ptr, '<', parser.doc.len); if (!start) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + parser.error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + goto clean_up; } - const uint8_t *location = memchr(parser->doc.ptr, '>', parser->doc.len); - + const uint8_t *location = memchr(parser.doc.ptr, '>', parser.doc.len); if (!location) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + parser.error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + goto clean_up; } - aws_byte_cursor_advance(&parser->doc, start - parser->doc.ptr); + aws_byte_cursor_advance(&parser.doc, start - parser.doc.ptr); /* if these are preamble statements, burn them. otherwise don't seek at all * and assume it's just the doc with no preamble statements. */ - if (*(parser->doc.ptr + 1) == '?' || *(parser->doc.ptr + 1) == '!') { + if (*(parser.doc.ptr + 1) == '?' || *(parser.doc.ptr + 1) == '!') { /* nobody cares about the preamble */ - size_t advance = location - parser->doc.ptr + 1; - aws_byte_cursor_advance(&parser->doc, advance); + size_t advance = location - parser.doc.ptr + 1; + aws_byte_cursor_advance(&parser.doc, advance); } else { break; } @@ -178,12 +140,16 @@ int aws_xml_parser_parse( /* now we should be at the start of the actual document. */ struct cb_stack_data stack_data = { - .cb = on_node_encountered, - .user_data = user_data, + .cb = options->on_root_encountered, + .user_data = options->user_data, }; - AWS_FATAL_ASSERT(!aws_array_list_push_back(&parser->callback_stack, &stack_data)); - return s_node_next_sibling(parser); + aws_array_list_push_back(&parser.callback_stack, &stack_data); + parser.error = s_node_next_sibling(&parser); + +clean_up: + aws_array_list_clean_up(&parser.callback_stack); + return parser.error; } int s_advance_to_closing_tag( @@ -267,29 +233,24 @@ int s_advance_to_closing_tag( return parser->error; } -int aws_xml_node_as_body(struct aws_xml_parser *parser, struct aws_xml_node *node, struct aws_byte_cursor *out_body) { - AWS_PRECONDITION(parser); +int aws_xml_node_as_body(struct aws_xml_node *node, struct aws_byte_cursor *out_body) { AWS_PRECONDITION(node); + AWS_FATAL_ASSERT(!node->processed && "XML node can be traversed, or read as body, but not both."); node->processed = true; - return s_advance_to_closing_tag(parser, node, out_body); + return s_advance_to_closing_tag(node->parser, node, out_body); } int aws_xml_node_traverse( - struct aws_xml_parser *parser, struct aws_xml_node *node, aws_xml_parser_on_node_encountered_fn *on_node_encountered, void *user_data) { - AWS_PRECONDITION(parser); AWS_PRECONDITION(node); + AWS_PRECONDITION(on_node_encountered); - if (on_node_encountered == NULL) { - AWS_LOGF_ERROR( - AWS_LS_COMMON_XML_PARSER, "Callback 'on_node_encountered' for aws_xml_node_traverse is invalid."); - aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); - return AWS_OP_ERR; - } + struct aws_xml_parser *parser = node->parser; + AWS_FATAL_ASSERT(!node->processed && "XML node can be traversed, or read as body, but not both."); node->processed = true; struct cb_stack_data stack_data = { .cb = on_node_encountered, @@ -298,16 +259,12 @@ int aws_xml_node_traverse( size_t doc_depth = aws_array_list_length(&parser->callback_stack); if (doc_depth >= parser->max_depth) { - AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); - return AWS_OP_ERR; + AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document exceeds max depth."); + aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + goto error; } - if (aws_array_list_push_back(&parser->callback_stack, &stack_data)) { - AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); - return AWS_OP_ERR; - } + aws_array_list_push_back(&parser->callback_stack, &stack_data); /* look for the next node at the current level. do this until we encounter the parent node's * closing tag. */ @@ -316,14 +273,16 @@ int aws_xml_node_traverse( if (!next_location) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + goto error; } const uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len); if (!end_location) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + goto error; } bool parent_closed = false; @@ -343,6 +302,7 @@ int aws_xml_node_traverse( struct aws_byte_cursor decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1); struct aws_xml_node next_node = { + .parser = parser, .doc_at_body = parser->doc, .processed = false, }; @@ -351,15 +311,18 @@ int aws_xml_node_traverse( return AWS_OP_ERR; } - if (!on_node_encountered(parser, &next_node, user_data)) { - parser->stop_parsing = true; + if (on_node_encountered(&next_node, &parser->stop_parsing, user_data)) { + goto error; + } + + if (parser->stop_parsing) { return parser->error; } /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */ if (!parser->stop_parsing && !next_node.processed) { if (s_advance_to_closing_tag(parser, &next_node, NULL)) { - return AWS_OP_ERR; + goto error; } } } @@ -370,19 +333,15 @@ int aws_xml_node_traverse( aws_array_list_pop_back(&parser->callback_stack); return parser->error; + +error: + parser->error = AWS_OP_ERR; + return parser->error; } -int aws_xml_node_get_name(const struct aws_xml_node *node, struct aws_byte_cursor *out_name) { +struct aws_byte_cursor aws_xml_node_get_name(const struct aws_xml_node *node) { AWS_PRECONDITION(node); - - if (out_name == NULL) { - AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_name' argument for aws_xml_node_get_name is invalid."); - aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); - return AWS_OP_ERR; - } - - *out_name = node->name; - return AWS_OP_SUCCESS; + return node->name; } size_t aws_xml_node_get_num_attributes(const struct aws_xml_node *node) { @@ -390,19 +349,15 @@ size_t aws_xml_node_get_num_attributes(const struct aws_xml_node *node) { return aws_array_list_length(&node->attributes); } -int aws_xml_node_get_attribute( - const struct aws_xml_node *node, - size_t attribute_index, - struct aws_xml_attribute *out_attribute) { +struct aws_xml_attribute aws_xml_node_get_attribute(const struct aws_xml_node *node, size_t attribute_index) { AWS_PRECONDITION(node); - if (out_attribute == NULL) { - AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_attribute' argument for aws_xml_node_get_attribute is invalid."); - aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); - return AWS_OP_ERR; + struct aws_xml_attribute attribute; + if (aws_array_list_get_at(&node->attributes, &attribute, attribute_index)) { + AWS_FATAL_ASSERT(0 && "Invalid XML attribute index"); } - return aws_array_list_get_at(&node->attributes, out_attribute, attribute_index); + return attribute; } /* advance the parser to the next sibling node.*/ @@ -429,6 +384,7 @@ int s_node_next_sibling(struct aws_xml_parser *parser) { struct aws_byte_cursor node_decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1); struct aws_xml_node sibling_node = { + .parser = parser, .doc_at_body = parser->doc, .processed = false, }; @@ -442,7 +398,7 @@ int s_node_next_sibling(struct aws_xml_parser *parser) { aws_array_list_back(&parser->callback_stack, &stack_data); AWS_FATAL_ASSERT(stack_data.cb); - parser->stop_parsing = !stack_data.cb(parser, &sibling_node, stack_data.user_data); + parser->stop_parsing = !stack_data.cb(&sibling_node, &parser->stop_parsing, stack_data.user_data); /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */ if (!sibling_node.processed) { diff --git a/tests/xml_parser_test.c b/tests/xml_parser_test.c index 252afba4b..6c62d1605 100644 --- a/tests/xml_parser_test.c +++ b/tests/xml_parser_test.c @@ -11,41 +11,38 @@ const char *root_with_text = "error = aws_xml_node_as_body(parser, node, &capture->capture); - capture->error |= aws_xml_node_get_name(node, &capture->node_name); + if (aws_xml_node_as_body(node, &capture->capture)) { + return AWS_OP_ERR; + } + capture->node_name = aws_xml_node_get_name(node); - return true; + return AWS_OP_SUCCESS; } static int s_xml_parser_root_with_text_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(root_with_text); - - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - - ASSERT_TRUE(parser != NULL); struct root_with_text_capture capture; AWS_ZERO_STRUCT(capture); - ASSERT_SUCCESS(aws_xml_parser_parse(parser, s_root_with_text_root_node, &capture)); + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(root_with_text), + .on_root_encountered = s_root_with_text_root_node, + .user_data = &capture, + }; + ASSERT_SUCCESS(aws_xml_parse(allocator, &options)); const char expected_name[] = "rootNode"; const char expected_value[] = "TestBody"; - ASSERT_INT_EQUALS(AWS_OP_SUCCESS, capture.error); ASSERT_BIN_ARRAYS_EQUALS(expected_name, sizeof(expected_name) - 1, capture.node_name.ptr, capture.node_name.len); ASSERT_BIN_ARRAYS_EQUALS(expected_value, sizeof(expected_value) - 1, capture.capture.ptr, capture.capture.len); - aws_xml_parser_destroy(parser); - return AWS_OP_SUCCESS; } @@ -57,45 +54,45 @@ const char *child_with_text = struct child_text_capture { struct aws_byte_cursor capture; struct aws_byte_cursor node_name; - int error; }; -bool s_child_with_text_root_node(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { +int s_child_with_text_root_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; struct child_text_capture *capture = user_data; - capture->error |= aws_xml_node_as_body(parser, node, &capture->capture); - capture->error |= aws_xml_node_get_name(node, &capture->node_name); + if (aws_xml_node_as_body(node, &capture->capture)) { + return AWS_OP_ERR; + } + capture->node_name = aws_xml_node_get_name(node); - return true; + return AWS_OP_SUCCESS; } -bool s_root_with_child(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { - struct child_text_capture *capture = user_data; - capture->error |= aws_xml_node_traverse(parser, node, s_child_with_text_root_node, user_data); - return true; +int s_root_with_child(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; + if (aws_xml_node_traverse(node, s_child_with_text_root_node, user_data)) { + return AWS_OP_ERR; + } + return AWS_OP_SUCCESS; } static int s_xml_parser_child_with_text_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(child_with_text); - - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - ASSERT_TRUE(parser != NULL); - - struct root_with_text_capture capture; + struct child_text_capture capture; AWS_ZERO_STRUCT(capture); - ASSERT_SUCCESS(aws_xml_parser_parse(parser, s_root_with_child, &capture)); + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(child_with_text), + .on_root_encountered = s_root_with_child, + .user_data = &capture, + }; + ASSERT_SUCCESS(aws_xml_parse(allocator, &options)); const char expected_name[] = "child1"; const char expected_value[] = "TestBody"; - ASSERT_INT_EQUALS(AWS_OP_SUCCESS, capture.error); ASSERT_BIN_ARRAYS_EQUALS(expected_name, sizeof(expected_name) - 1, capture.node_name.ptr, capture.node_name.len); ASSERT_BIN_ARRAYS_EQUALS(expected_value, sizeof(expected_value) - 1, capture.capture.ptr, capture.capture.len); - aws_xml_parser_destroy(parser); return AWS_OP_SUCCESS; } @@ -110,51 +107,49 @@ struct sibling_text_capture { struct aws_byte_cursor capture2; struct aws_byte_cursor node_name1; struct aws_byte_cursor node_name2; - int error; }; -bool s_sibling_with_text_root_node(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { +int s_sibling_with_text_root_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; struct sibling_text_capture *capture = user_data; struct aws_byte_cursor child1_name = aws_byte_cursor_from_c_str("child1"); struct aws_byte_cursor child2_name = aws_byte_cursor_from_c_str("child2"); - struct aws_byte_cursor node_name; - AWS_ZERO_STRUCT(node_name); - capture->error |= aws_xml_node_get_name(node, &node_name); + struct aws_byte_cursor node_name = aws_xml_node_get_name(node); if (aws_byte_cursor_eq_ignore_case(&node_name, &child1_name)) { capture->node_name1 = node_name; - capture->error |= aws_xml_node_as_body(parser, node, &capture->capture1); + if (aws_xml_node_as_body(node, &capture->capture1)) { + return AWS_OP_ERR; + } } else if (aws_byte_cursor_eq_ignore_case(&node_name, &child2_name)) { capture->node_name2 = node_name; - capture->error |= aws_xml_node_as_body(parser, node, &capture->capture2); + if (aws_xml_node_as_body(node, &capture->capture2)) { + return AWS_OP_ERR; + } } - return true; + return AWS_OP_SUCCESS; } -bool s_root_with_child_siblings(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { - struct sibling_text_capture *capture = user_data; - - capture->error |= aws_xml_node_traverse(parser, node, s_sibling_with_text_root_node, user_data); - return true; +int s_root_with_child_siblings(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; + return aws_xml_node_traverse(node, s_sibling_with_text_root_node, user_data); } static int s_xml_parser_siblings_with_text_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(siblings_with_text); - - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - ASSERT_TRUE(parser != NULL); - struct sibling_text_capture capture; AWS_ZERO_STRUCT(capture); - ASSERT_SUCCESS(aws_xml_parser_parse(parser, s_root_with_child_siblings, &capture)); + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(siblings_with_text), + .on_root_encountered = s_root_with_child_siblings, + .user_data = &capture, + }; + ASSERT_SUCCESS(aws_xml_parse(allocator, &options)); const char expected_name1[] = "child1"; const char expected_value1[] = "TestBody"; @@ -162,7 +157,6 @@ static int s_xml_parser_siblings_with_text_test(struct aws_allocator *allocator, const char expected_name2[] = "child2"; const char expected_value2[] = "TestBody2"; - ASSERT_INT_EQUALS(AWS_OP_SUCCESS, capture.error); ASSERT_BIN_ARRAYS_EQUALS( expected_name1, sizeof(expected_name1) - 1, capture.node_name1.ptr, capture.node_name1.len); ASSERT_BIN_ARRAYS_EQUALS(expected_value1, sizeof(expected_value1) - 1, capture.capture1.ptr, capture.capture1.len); @@ -171,7 +165,6 @@ static int s_xml_parser_siblings_with_text_test(struct aws_allocator *allocator, expected_name2, sizeof(expected_name2) - 1, capture.node_name2.ptr, capture.node_name2.len); ASSERT_BIN_ARRAYS_EQUALS(expected_value2, sizeof(expected_value2) - 1, capture.capture2.ptr, capture.capture2.len); - aws_xml_parser_destroy(parser); return AWS_OP_SUCCESS; } @@ -193,59 +186,60 @@ struct preamble_and_attributes_capture { struct aws_byte_cursor node_name2; struct aws_xml_attribute root_attr1; struct aws_xml_attribute root_attr2; - int error; }; -bool s_preamble_and_attributes_child_node(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { +int s_preamble_and_attributes_child_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; struct preamble_and_attributes_capture *capture = user_data; struct aws_byte_cursor child1_name = aws_byte_cursor_from_c_str("child1"); struct aws_byte_cursor child2_name = aws_byte_cursor_from_c_str("child2"); - struct aws_byte_cursor node_name; - AWS_ZERO_STRUCT(node_name); - capture->error |= aws_xml_node_get_name(node, &node_name); + struct aws_byte_cursor node_name = aws_xml_node_get_name(node); if (aws_byte_cursor_eq_ignore_case(&node_name, &child1_name)) { capture->node_name1 = node_name; - capture->error |= aws_xml_node_as_body(parser, node, &capture->capture1); + if (aws_xml_node_as_body(node, &capture->capture1)) { + return AWS_OP_ERR; + } } else if (aws_byte_cursor_eq_ignore_case(&node_name, &child2_name)) { capture->node_name2 = node_name; - capture->error |= aws_xml_node_as_body(parser, node, &capture->capture2); + if (aws_xml_node_as_body(node, &capture->capture2)) { + return AWS_OP_ERR; + } ASSERT_TRUE(aws_xml_node_get_num_attributes(node) == 1); - capture->error |= aws_xml_node_get_attribute(node, 0, &capture->capture2_attr); + capture->capture2_attr = aws_xml_node_get_attribute(node, 0); } - return true; + return AWS_OP_SUCCESS; } -bool s_preamble_and_attributes(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { +int s_preamble_and_attributes(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; struct preamble_and_attributes_capture *capture = user_data; ASSERT_TRUE(aws_xml_node_get_num_attributes(node) == 2); - capture->error |= aws_xml_node_get_attribute(node, 0, &capture->root_attr1); - capture->error |= aws_xml_node_get_attribute(node, 1, &capture->root_attr2); - capture->error |= aws_xml_node_traverse(parser, node, s_preamble_and_attributes_child_node, user_data); - return true; + capture->root_attr1 = aws_xml_node_get_attribute(node, 0); + capture->root_attr2 = aws_xml_node_get_attribute(node, 1); + return aws_xml_node_traverse(node, s_preamble_and_attributes_child_node, user_data); } static int s_xml_parser_preamble_and_attributes_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(preamble_and_attributes); - - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - ASSERT_TRUE(parser != NULL); - struct preamble_and_attributes_capture capture; AWS_ZERO_STRUCT(capture); - ASSERT_SUCCESS(aws_xml_parser_parse(parser, s_preamble_and_attributes, &capture)); + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(preamble_and_attributes), + .on_root_encountered = s_preamble_and_attributes, + .user_data = &capture, + }; + + ASSERT_SUCCESS(aws_xml_parse(allocator, &options)); const char expected_attr1_name[] = "attribute1"; const char expected_attr1_value1[] = "abc"; @@ -275,7 +269,6 @@ static int s_xml_parser_preamble_and_attributes_test(struct aws_allocator *alloc const char expected_name2[] = "child2"; const char expected_value2[] = "TestBody2"; - ASSERT_INT_EQUALS(AWS_OP_SUCCESS, capture.error); ASSERT_BIN_ARRAYS_EQUALS( expected_name1, sizeof(expected_name1) - 1, capture.node_name1.ptr, capture.node_name1.len); ASSERT_BIN_ARRAYS_EQUALS(expected_value1, sizeof(expected_value1) - 1, capture.capture1.ptr, capture.capture1.len); @@ -298,7 +291,6 @@ static int s_xml_parser_preamble_and_attributes_test(struct aws_allocator *alloc capture.capture2_attr.value.ptr, capture.capture2_attr.value.len); - aws_xml_parser_destroy(parser); return AWS_OP_SUCCESS; } @@ -323,28 +315,25 @@ struct nested_node_capture { struct aws_byte_cursor node_body; }; -bool s_nested_node(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { +int s_nested_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; struct nested_node_capture *capture = user_data; - aws_xml_node_as_body(parser, node, &capture->node_body); - - return true; + return aws_xml_node_as_body(node, &capture->node_body); } static int s_xml_parser_nested_node_same_name_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(nested_nodes_same_name_doc); - - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - ASSERT_TRUE(parser != NULL); - struct nested_node_capture capture; AWS_ZERO_STRUCT(capture); - ASSERT_SUCCESS(aws_xml_parser_parse(parser, s_nested_node, &capture)); + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(nested_nodes_same_name_doc), + .on_root_encountered = s_nested_node, + .user_data = &capture, + }; + ASSERT_SUCCESS(aws_xml_parse(allocator, &options)); const char *expected_body = "\n \n" " \n" @@ -357,7 +346,6 @@ static int s_xml_parser_nested_node_same_name_test(struct aws_allocator *allocat ASSERT_BIN_ARRAYS_EQUALS(expected_body, strlen(expected_body), capture.node_body.ptr, capture.node_body.len); - aws_xml_parser_destroy(parser); return AWS_OP_SUCCESS; } @@ -373,27 +361,21 @@ const char *nested_nodes_deep_recursion_doc = "\n" ""; -bool s_nested_node_deep_recursion(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { - aws_xml_node_traverse(parser, node, s_nested_node_deep_recursion, user_data); - return true; +int s_nested_node_deep_recursion(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { + (void)stop_parsing; + return aws_xml_node_traverse(node, s_nested_node_deep_recursion, user_data); } static int s_xml_parser_nested_node_deep_recursion_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(nested_nodes_deep_recursion_doc); - options.max_depth = 2; + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(nested_nodes_deep_recursion_doc), + .max_depth = 2, + .on_root_encountered = s_nested_node_deep_recursion, + .user_data = NULL, + }; + ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parse(allocator, &options)); - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - ASSERT_TRUE(parser != NULL); - - struct nested_node_capture capture; - AWS_ZERO_STRUCT(capture); - - ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parser_parse(parser, s_nested_node_deep_recursion, NULL)); - - aws_xml_parser_destroy(parser); return AWS_OP_SUCCESS; } @@ -408,26 +390,23 @@ const char *too_many_attributes = "\n" "attribute8=\"def\" attribute9=\"def\" attribute10=\"def\" attribute11=\"def\">\n" ""; -bool s_too_many_attributes(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { - (void)parser; +int s_too_many_attributes(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { (void)node; + (void)stop_parsing; (void)user_data; - return true; + return AWS_OP_SUCCESS; } static int s_xml_parser_too_many_attributes_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(too_many_attributes); - - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - ASSERT_TRUE(parser != NULL); + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(too_many_attributes), + .on_root_encountered = s_too_many_attributes, + .user_data = NULL, + }; + ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parse(allocator, &options)); - ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parser_parse(parser, s_too_many_attributes, NULL)); - - aws_xml_parser_destroy(parser); return AWS_OP_SUCCESS; } @@ -446,26 +425,23 @@ const char *node_name_too_long = "klmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrst" "uvwxyzabcdefghijklmnopqrstuvwxyz>"; -bool s_too_long(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data) { - (void)parser; +int s_too_long(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { (void)node; + (void)stop_parsing; (void)user_data; - return true; + return AWS_OP_SUCCESS; } static int s_xml_parser_name_too_long_test(struct aws_allocator *allocator, void *ctx) { (void)ctx; - struct aws_xml_parser_options options; - AWS_ZERO_STRUCT(options); - options.doc = aws_byte_cursor_from_c_str(node_name_too_long); - - struct aws_xml_parser *parser = aws_xml_parser_new(allocator, &options); - ASSERT_TRUE(parser != NULL); - - ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parser_parse(parser, s_too_long, NULL)); + struct aws_xml_parser_options options = { + .doc = aws_byte_cursor_from_c_str(node_name_too_long), + .on_root_encountered = s_too_long, + .user_data = NULL, + }; + ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parse(allocator, &options)); - aws_xml_parser_destroy(parser); return AWS_OP_SUCCESS; } From fa5be12f5c9c8277f15e77424fca088f286a9275 Mon Sep 17 00:00:00 2001 From: Michael Graeb Date: Tue, 11 Jul 2023 14:57:18 -0700 Subject: [PATCH 2/3] Remove ability to stop parsing without raising error. No one is using this. Less code is best code. --- include/aws/common/private/xml_parser_impl.h | 1 - include/aws/common/xml_parser.h | 11 +++---- source/xml_parser.c | 19 ++++------- tests/xml_parser_test.c | 33 +++++++------------- 4 files changed, 21 insertions(+), 43 deletions(-) diff --git a/include/aws/common/private/xml_parser_impl.h b/include/aws/common/private/xml_parser_impl.h index dc731c462..eea061b1e 100644 --- a/include/aws/common/private/xml_parser_impl.h +++ b/include/aws/common/private/xml_parser_impl.h @@ -26,7 +26,6 @@ struct aws_xml_parser { struct aws_byte_cursor split_scratch[11]; size_t max_depth; int error; - bool stop_parsing; }; #endif /* AWS_COMMON_PRIVATE_XML_PARSER_IMPL_H */ diff --git a/include/aws/common/xml_parser.h b/include/aws/common/xml_parser.h index 78c2085f5..c7ef28227 100644 --- a/include/aws/common/xml_parser.h +++ b/include/aws/common/xml_parser.h @@ -20,21 +20,18 @@ struct aws_xml_attribute { struct aws_byte_cursor value; }; -/* TODO: remove stop_parsing support */ - /** * Callback for when an xml node is encountered in the document. As a user you have a few options: * - * 1. call aws_xml_node_traverse() on the node to descend into the node with a new callback and user_data. - * 2. call aws_xml_node_as_body() to retrieve the contents of the node as text. - * 3. return AWS_OP_ERR (after an error has been raised) to fail doc parsing. - * 4. set `*stop_parsing = true` and return AWS_OP_SUCCESS to immediately stop doc parsing without failing it. + * 1. fail the parse by returning AWS_OP_ERR (after an error has been raised). This will stop any further parsing. + * 2. call aws_xml_node_traverse() on the node to descend into the node with a new callback and user_data. + * 3. call aws_xml_node_as_body() to retrieve the contents of the node as text. * * You MUST NOT call both aws_xml_node_traverse() and aws_xml_node_as_body() on the same node. * * return true to continue the parsing operation. */ -typedef int(aws_xml_parser_on_node_encountered_fn)(struct aws_xml_node *node, bool *stop_parsing, void *user_data); +typedef int(aws_xml_parser_on_node_encountered_fn)(struct aws_xml_node *node, void *user_data); struct aws_xml_parser_options { /* xml document to parse. */ diff --git a/source/xml_parser.c b/source/xml_parser.c index 966ab196e..4ed62fca5 100644 --- a/source/xml_parser.c +++ b/source/xml_parser.c @@ -106,7 +106,6 @@ int aws_xml_parse(struct aws_allocator *allocator, const struct aws_xml_parser_o .doc = options->doc, .max_depth = options->max_depth ? options->max_depth : s_max_document_depth, .error = AWS_OP_SUCCESS, - .stop_parsing = false, }; aws_array_list_init_dynamic(&parser.callback_stack, allocator, 4, sizeof(struct cb_stack_data)); @@ -268,7 +267,7 @@ int aws_xml_node_traverse( /* look for the next node at the current level. do this until we encounter the parent node's * closing tag. */ - while (!parser->stop_parsing && !parser->error) { + while (!parser->error) { const uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len); if (!next_location) { @@ -311,26 +310,18 @@ int aws_xml_node_traverse( return AWS_OP_ERR; } - if (on_node_encountered(&next_node, &parser->stop_parsing, user_data)) { + if (on_node_encountered(&next_node, user_data)) { goto error; } - if (parser->stop_parsing) { - return parser->error; - } - /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */ - if (!parser->stop_parsing && !next_node.processed) { + if (!next_node.processed) { if (s_advance_to_closing_tag(parser, &next_node, NULL)) { goto error; } } } - if (parser->stop_parsing) { - return parser->error; - } - aws_array_list_pop_back(&parser->callback_stack); return parser->error; @@ -398,7 +389,9 @@ int s_node_next_sibling(struct aws_xml_parser *parser) { aws_array_list_back(&parser->callback_stack, &stack_data); AWS_FATAL_ASSERT(stack_data.cb); - parser->stop_parsing = !stack_data.cb(&sibling_node, &parser->stop_parsing, stack_data.user_data); + if (stack_data.cb(&sibling_node, stack_data.user_data)) { + return AWS_OP_ERR; + } /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */ if (!sibling_node.processed) { diff --git a/tests/xml_parser_test.c b/tests/xml_parser_test.c index 6c62d1605..a02aa6262 100644 --- a/tests/xml_parser_test.c +++ b/tests/xml_parser_test.c @@ -13,8 +13,7 @@ struct root_with_text_capture { struct aws_byte_cursor node_name; }; -int s_root_with_text_root_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_root_with_text_root_node(struct aws_xml_node *node, void *user_data) { struct root_with_text_capture *capture = user_data; if (aws_xml_node_as_body(node, &capture->capture)) { return AWS_OP_ERR; @@ -56,8 +55,7 @@ struct child_text_capture { struct aws_byte_cursor node_name; }; -int s_child_with_text_root_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_child_with_text_root_node(struct aws_xml_node *node, void *user_data) { struct child_text_capture *capture = user_data; if (aws_xml_node_as_body(node, &capture->capture)) { return AWS_OP_ERR; @@ -67,8 +65,7 @@ int s_child_with_text_root_node(struct aws_xml_node *node, bool *stop_parsing, v return AWS_OP_SUCCESS; } -int s_root_with_child(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_root_with_child(struct aws_xml_node *node, void *user_data) { if (aws_xml_node_traverse(node, s_child_with_text_root_node, user_data)) { return AWS_OP_ERR; } @@ -109,8 +106,7 @@ struct sibling_text_capture { struct aws_byte_cursor node_name2; }; -int s_sibling_with_text_root_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_sibling_with_text_root_node(struct aws_xml_node *node, void *user_data) { struct sibling_text_capture *capture = user_data; struct aws_byte_cursor child1_name = aws_byte_cursor_from_c_str("child1"); @@ -133,8 +129,7 @@ int s_sibling_with_text_root_node(struct aws_xml_node *node, bool *stop_parsing, return AWS_OP_SUCCESS; } -int s_root_with_child_siblings(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_root_with_child_siblings(struct aws_xml_node *node, void *user_data) { return aws_xml_node_traverse(node, s_sibling_with_text_root_node, user_data); } @@ -188,8 +183,7 @@ struct preamble_and_attributes_capture { struct aws_xml_attribute root_attr2; }; -int s_preamble_and_attributes_child_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_preamble_and_attributes_child_node(struct aws_xml_node *node, void *user_data) { struct preamble_and_attributes_capture *capture = user_data; struct aws_byte_cursor child1_name = aws_byte_cursor_from_c_str("child1"); @@ -216,8 +210,7 @@ int s_preamble_and_attributes_child_node(struct aws_xml_node *node, bool *stop_p return AWS_OP_SUCCESS; } -int s_preamble_and_attributes(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_preamble_and_attributes(struct aws_xml_node *node, void *user_data) { struct preamble_and_attributes_capture *capture = user_data; ASSERT_TRUE(aws_xml_node_get_num_attributes(node) == 2); @@ -315,8 +308,7 @@ struct nested_node_capture { struct aws_byte_cursor node_body; }; -int s_nested_node(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_nested_node(struct aws_xml_node *node, void *user_data) { struct nested_node_capture *capture = user_data; return aws_xml_node_as_body(node, &capture->node_body); @@ -361,8 +353,7 @@ const char *nested_nodes_deep_recursion_doc = "\n" ""; -int s_nested_node_deep_recursion(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { - (void)stop_parsing; +int s_nested_node_deep_recursion(struct aws_xml_node *node, void *user_data) { return aws_xml_node_traverse(node, s_nested_node_deep_recursion, user_data); } @@ -390,9 +381,8 @@ const char *too_many_attributes = "\n" "attribute8=\"def\" attribute9=\"def\" attribute10=\"def\" attribute11=\"def\">\n" ""; -int s_too_many_attributes(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { +int s_too_many_attributes(struct aws_xml_node *node, void *user_data) { (void)node; - (void)stop_parsing; (void)user_data; return AWS_OP_SUCCESS; } @@ -425,9 +415,8 @@ const char *node_name_too_long = "klmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrst" "uvwxyzabcdefghijklmnopqrstuvwxyz>"; -int s_too_long(struct aws_xml_node *node, bool *stop_parsing, void *user_data) { +int s_too_long(struct aws_xml_node *node, void *user_data) { (void)node; - (void)stop_parsing; (void)user_data; return AWS_OP_SUCCESS; } From fdcc2fdd86f57035b4644259a78fbea77ca96df5 Mon Sep 17 00:00:00 2001 From: Michael Graeb Date: Wed, 12 Jul 2023 17:37:40 -0700 Subject: [PATCH 3/3] Add AWS_ERROR_INVALID_XML error code. More informative than AWS_ERROR_MALFORMED_INPUT_STRING if it boils up from some deep internal system. --- include/aws/common/error.h | 1 + source/common.c | 3 +++ source/xml_parser.c | 22 +++++++++++----------- tests/xml_parser_test.c | 6 +++--- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/include/aws/common/error.h b/include/aws/common/error.h index 8fd0b4360..2c103d763 100644 --- a/include/aws/common/error.h +++ b/include/aws/common/error.h @@ -201,6 +201,7 @@ enum aws_common_error { AWS_ERROR_PLATFORM_NOT_SUPPORTED, AWS_ERROR_INVALID_UTF8, AWS_ERROR_GET_HOME_DIRECTORY_FAILED, + AWS_ERROR_INVALID_XML, AWS_ERROR_END_COMMON_RANGE = AWS_ERROR_ENUM_END_RANGE(AWS_C_COMMON_PACKAGE_ID) }; diff --git a/source/common.c b/source/common.c index 062d23228..79e301539 100644 --- a/source/common.c +++ b/source/common.c @@ -262,6 +262,9 @@ static struct aws_error_info errors[] = { AWS_DEFINE_ERROR_INFO_COMMON( AWS_ERROR_GET_HOME_DIRECTORY_FAILED, "Failed to get home directory"), + AWS_DEFINE_ERROR_INFO_COMMON( + AWS_ERROR_INVALID_XML, + "Invalid XML document"), }; /* clang-format on */ diff --git a/source/xml_parser.c b/source/xml_parser.c index 4ed62fca5..e1b580740 100644 --- a/source/xml_parser.c +++ b/source/xml_parser.c @@ -51,14 +51,14 @@ static int s_load_node_decl( * we limit to 10 attributes, if this is exceeded we consider it invalid document. */ if (aws_byte_cursor_split_on_char(decl_body, ' ', &splits)) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + return aws_raise_error(AWS_ERROR_INVALID_XML); } size_t splits_count = aws_array_list_length(&splits); if (splits_count < 1) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + return aws_raise_error(AWS_ERROR_INVALID_XML); } aws_array_list_get_at(&splits, &node->name, 0); @@ -114,14 +114,14 @@ int aws_xml_parse(struct aws_allocator *allocator, const struct aws_xml_parser_o const uint8_t *start = memchr(parser.doc.ptr, '<', parser.doc.len); if (!start) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - parser.error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + parser.error = aws_raise_error(AWS_ERROR_INVALID_XML); goto clean_up; } const uint8_t *location = memchr(parser.doc.ptr, '>', parser.doc.len); if (!location) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - parser.error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + parser.error = aws_raise_error(AWS_ERROR_INVALID_XML); goto clean_up; } @@ -170,13 +170,13 @@ int s_advance_to_closing_tag( if (closing_name_len > node->doc_at_body.len) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + parser->error = aws_raise_error(AWS_ERROR_INVALID_XML); return AWS_OP_ERR; } if (sizeof(name_close) < closing_name_len) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + parser->error = aws_raise_error(AWS_ERROR_INVALID_XML); return AWS_OP_ERR; } @@ -200,7 +200,7 @@ int s_advance_to_closing_tag( do { if (aws_byte_cursor_find_exact(&parser->doc, &to_find_close, &close_find_result)) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + return aws_raise_error(AWS_ERROR_INVALID_XML); } /* if we find an opening node with the same name, before the closing tag keep going. */ @@ -259,7 +259,7 @@ int aws_xml_node_traverse( size_t doc_depth = aws_array_list_length(&parser->callback_stack); if (doc_depth >= parser->max_depth) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document exceeds max depth."); - aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + aws_raise_error(AWS_ERROR_INVALID_XML); goto error; } @@ -272,7 +272,7 @@ int aws_xml_node_traverse( if (!next_location) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + aws_raise_error(AWS_ERROR_INVALID_XML); goto error; } @@ -280,7 +280,7 @@ int aws_xml_node_traverse( if (!end_location) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + aws_raise_error(AWS_ERROR_INVALID_XML); goto error; } @@ -366,7 +366,7 @@ int s_node_next_sibling(struct aws_xml_parser *parser) { if (!end_location) { AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); - return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); + return aws_raise_error(AWS_ERROR_INVALID_XML); } size_t node_name_len = end_location - next_location; diff --git a/tests/xml_parser_test.c b/tests/xml_parser_test.c index a02aa6262..9a3b33041 100644 --- a/tests/xml_parser_test.c +++ b/tests/xml_parser_test.c @@ -365,7 +365,7 @@ static int s_xml_parser_nested_node_deep_recursion_test(struct aws_allocator *al .on_root_encountered = s_nested_node_deep_recursion, .user_data = NULL, }; - ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parse(allocator, &options)); + ASSERT_ERROR(AWS_ERROR_INVALID_XML, aws_xml_parse(allocator, &options)); return AWS_OP_SUCCESS; } @@ -395,7 +395,7 @@ static int s_xml_parser_too_many_attributes_test(struct aws_allocator *allocator .on_root_encountered = s_too_many_attributes, .user_data = NULL, }; - ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parse(allocator, &options)); + ASSERT_ERROR(AWS_ERROR_INVALID_XML, aws_xml_parse(allocator, &options)); return AWS_OP_SUCCESS; } @@ -429,7 +429,7 @@ static int s_xml_parser_name_too_long_test(struct aws_allocator *allocator, void .on_root_encountered = s_too_long, .user_data = NULL, }; - ASSERT_ERROR(AWS_ERROR_MALFORMED_INPUT_STRING, aws_xml_parse(allocator, &options)); + ASSERT_ERROR(AWS_ERROR_INVALID_XML, aws_xml_parse(allocator, &options)); return AWS_OP_SUCCESS; }