From 3839d4ac6ae6d74b8662f38da73aec43efba80b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Sat, 6 Jul 2024 20:55:23 +0200 Subject: [PATCH] fix parsing of numbers within XML-encoded anyxml This was caught by the libyang-cpp test suite. On Windows/MSVC, an anyxml snippet that was parsed from this XML: 123 ...would get printed like this: {"example-schema:ax":{"x":["1","2","3"]}} ...while the correct form is: {"example-schema:ax":{"x":[1,2,3]}} This was caused by a change in `lydxml_get_hints_opaq` which probably tried to silence a compiler warning. The code wrongly assumed that the C `long` will be able to hold `UINT32_MAX`, which is not the case on Windows where `long` has the same range as `int32_t`. As a result, when the XML is parsed, these numeric values are compared against -1 on Windows, and against 4294967295 on many other platforms, and therefore any positive number was tagged with the hint of "this needs a 64bit serialization" on Windows, which in turn triggers printing these values as quoted string in JSON. The fix simply makes sure that we never perform a narrowing conversion from a constant. I was looking for a way of using explicitly sized types (i.e., int64_t in this case), but there's only `strtoll` in C. The alternative would be `sscanf(... SCNd64 ...)`, but the semantics is subtly different when it comes to whitespace. The bisecting process was non-trivial for me because the workflow which bypasses CI introduced a test regression (fixed by commit 7ebd607ee, "test UPDATE message changed") and a temporary Windows breakage (fixed by commit a1ecc1695, "compat BUGFIX regression in win timegm support"). Instead of a simple `git bisect`, this required some manual patch backporting. Fixes: 21eaa39f9 libyang BUGFIX type size problems --- src/parser_xml.c | 8 +++++--- tests/utests/data/test_parser_xml.c | 12 ++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/parser_xml.c b/src/parser_xml.c index 0db10cae2..944d3d530 100644 --- a/src/parser_xml.c +++ b/src/parser_xml.c @@ -441,7 +441,9 @@ lydxml_get_hints_opaq(const char *name, size_t name_len, const char *value, size { struct lyd_node_opaq *opaq; char *ptr; - long num; + /* this needs to be at least 64bit, and it "should not" be an explicit int64_t + * because the code calls strtoll later on, which "might" return a bigger type */ + long long num; *hints = 0; *anchor = NULL; @@ -453,11 +455,11 @@ lydxml_get_hints_opaq(const char *name, size_t name_len, const char *value, size /* boolean value */ *hints |= LYD_VALHINT_BOOLEAN; } else { - num = strtol(value, &ptr, 10); + num = strtoll(value, &ptr, 10); if ((unsigned)(ptr - value) == value_len) { /* number value */ *hints |= LYD_VALHINT_DECNUM; - if ((num < INT32_MIN) || (num > (long)UINT32_MAX)) { + if ((num < INT32_MIN) || (num > UINT32_MAX)) { /* large number */ *hints |= LYD_VALHINT_NUM64; } diff --git a/tests/utests/data/test_parser_xml.c b/tests/utests/data/test_parser_xml.c index d5336c030..28c37709a 100644 --- a/tests/utests/data/test_parser_xml.c +++ b/tests/utests/data/test_parser_xml.c @@ -193,6 +193,18 @@ test_anyxml(void **state) free(str); CHECK_LYD_STRING(tree, LYD_PRINT_WITHSIBLINGS, data_expected); lyd_free_all(tree); + + data = "10-142949672954294967296-2147483648-2147483649"; + CHECK_PARSE_LYD(data, 0, LYD_VALIDATE_PRESENT, tree); + assert_non_null(tree); + tree = tree->next; + assert_int_equal(LY_SUCCESS, lyd_print_mem(&str, tree, LYD_XML, LYD_PRINT_SHRINK)); + CHECK_STRING(str, data); + free(str); + assert_int_equal(LY_SUCCESS, lyd_print_mem(&str, tree, LYD_JSON, LYD_PRINT_SHRINK)); + CHECK_STRING(str, "{\"a:anyx\":{\"x\":[1,0,-1,4294967295,\"4294967296\",-2147483648,\"-2147483649\"]}}"); + free(str); + lyd_free_all(tree); } static void