From c67021f84a39a169cf5ab7d660a7ac24cf269ca2 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Tue, 23 Aug 2022 11:21:55 -0700 Subject: [PATCH] split out the json string-to-int functions for general use PiperOrigin-RevId: 469509635 --- BUILD | 24 +++++++++ upb/internal/atoi.c | 68 ++++++++++++++++++++++++ upb/internal/atoi.h | 53 ++++++++++++++++++ upb/internal/atoi_test.cc | 109 ++++++++++++++++++++++++++++++++++++++ upb/json_decode.c | 42 +++------------ 5 files changed, 261 insertions(+), 35 deletions(-) create mode 100644 upb/internal/atoi.c create mode 100644 upb/internal/atoi.h create mode 100644 upb/internal/atoi_test.cc diff --git a/BUILD b/BUILD index bd93ff434a..a49327b287 100644 --- a/BUILD +++ b/BUILD @@ -432,6 +432,7 @@ cc_library( copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ + ":atoi_internal", ":encode_internal", ":port", ":reflection", @@ -589,6 +590,17 @@ upb_proto_reflection_library( deps = ["@com_google_protobuf//:timestamp_proto"], ) +cc_test( + name = "atoi_test", + srcs = ["upb/internal/atoi_test.cc"], + copts = UPB_DEFAULT_CPPOPTS, + deps = [ + ":atoi_internal", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "test_cpp", srcs = ["upb/test_cpp.cc"], @@ -768,6 +780,15 @@ cc_library( deps = [":port"], ) +cc_library( + name = "atoi_internal", + srcs = ["upb/internal/atoi.c"], + hdrs = ["upb/internal/atoi.h"], + copts = UPB_DEFAULT_COPTS, + visibility = ["//:__subpackages__"], + deps = [":port"], +) + cc_library( name = "decode_internal", srcs = [ @@ -839,6 +860,7 @@ upb_amalgamation( libs = [ ":arena_internal", ":array_internal", + ":atoi_internal", ":collections", ":decode_internal", ":descriptor_upb_proto", @@ -870,6 +892,7 @@ upb_amalgamation( libs = [ ":arena_internal", ":array_internal", + ":atoi_internal", ":collections", ":decode_internal", ":descriptor_upb_proto", @@ -904,6 +927,7 @@ upb_amalgamation( libs = [ ":arena_internal", ":array_internal", + ":atoi_internal", ":collections", ":decode_internal", ":descriptor_upb_proto", diff --git a/upb/internal/atoi.c b/upb/internal/atoi.c new file mode 100644 index 0000000000..e8a2c8c111 --- /dev/null +++ b/upb/internal/atoi.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/internal/atoi.h" + +// Must be last. +#include "upb/port_def.inc" + +const char* upb_BufToUint64(const char* ptr, const char* end, uint64_t* val) { + uint64_t u64 = 0; + while (ptr < end) { + unsigned ch = *ptr - '0'; + if (ch >= 10) break; + if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) { + return NULL; // integer overflow + } + u64 *= 10; + u64 += ch; + ptr++; + } + + *val = u64; + return ptr; +} + +const char* upb_BufToInt64(const char* ptr, const char* end, int64_t* val, + bool* is_neg) { + bool neg = false; + uint64_t u64; + + if (ptr != end && *ptr == '-') { + ptr++; + neg = true; + } + + ptr = upb_BufToUint64(ptr, end, &u64); + if (!ptr || u64 > (uint64_t)INT64_MAX + neg) { + return NULL; // integer overflow + } + + *val = neg ? -u64 : u64; + if (is_neg) *is_neg = neg; + return ptr; +} diff --git a/upb/internal/atoi.h b/upb/internal/atoi.h new file mode 100644 index 0000000000..e29097afb5 --- /dev/null +++ b/upb/internal/atoi.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_INTERNAL_ATOI_H_ +#define UPB_INTERNAL_ATOI_H_ + +// Must be last. +#include "upb/port_def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +// We use these hand-written routines instead of strto[u]l() because the "long +// long" variants aren't in c89. Also our version allows setting a ptr limit. +// Return the new position of the pointer after parsing the int, or NULL on +// integer overflow. + +const char* upb_BufToUint64(const char* ptr, const char* end, uint64_t* val); +const char* upb_BufToInt64(const char* ptr, const char* end, int64_t* val, + bool* is_neg); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port_undef.inc" + +#endif /* UPB_INTERNAL_ATOI_H_ */ diff --git a/upb/internal/atoi_test.cc b/upb/internal/atoi_test.cc new file mode 100644 index 0000000000..f83cbe704c --- /dev/null +++ b/upb/internal/atoi_test.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/internal/atoi.h" + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" + +TEST(AtoiTest, Uint64) { + uint64_t val; + + const char* s = "1234z"; + EXPECT_EQ(s + 1, upb_BufToUint64(s, s + 1, &val)); + EXPECT_EQ(val, 1); + EXPECT_EQ(s + 4, upb_BufToUint64(s, s + 4, &val)); + EXPECT_EQ(val, 1234); + EXPECT_EQ(s + 4, upb_BufToUint64(s, s + 5, &val)); + EXPECT_EQ(val, 1234); + + const char* t = "42.6"; + EXPECT_EQ(t + 1, upb_BufToUint64(t, t + 1, &val)); + EXPECT_EQ(val, 4); + EXPECT_EQ(t + 2, upb_BufToUint64(t, t + 2, &val)); + EXPECT_EQ(val, 42); + EXPECT_EQ(t + 2, upb_BufToUint64(t, t + 3, &val)); + EXPECT_EQ(val, 42); + + // Integer overflow + const char* u = "1000000000000000000000000000000"; + EXPECT_EQ(NULL, upb_BufToUint64(u, u + strlen(u), &val)); + + // Not an integer + const char* v = "foobar"; + EXPECT_EQ(v, upb_BufToUint64(v, v + strlen(v), &val)); + + const uint64_t values[] = { + std::numeric_limits::max(), + std::numeric_limits::min(), + }; + for (size_t i = 0; i < ABSL_ARRAYSIZE(values); i++) { + std::string v = absl::StrCat(values[i]); + const char* ptr = v.c_str(); + const char* end = ptr + strlen(ptr); + EXPECT_EQ(end, upb_BufToUint64(ptr, end, &val)); + EXPECT_EQ(val, values[i]); + } +} + +TEST(AtoiTest, Int64) { + int64_t val; + bool neg; + + const char* s = "1234z"; + EXPECT_EQ(s + 1, upb_BufToInt64(s, s + 1, &val, &neg)); + EXPECT_EQ(val, 1); + EXPECT_EQ(neg, false); + EXPECT_EQ(s + 4, upb_BufToInt64(s, s + 4, &val, NULL)); + EXPECT_EQ(val, 1234); + EXPECT_EQ(s + 4, upb_BufToInt64(s, s + 5, &val, NULL)); + EXPECT_EQ(val, 1234); + + const char* t = "-42.6"; + EXPECT_EQ(t + 2, upb_BufToInt64(t, t + 2, &val, &neg)); + EXPECT_EQ(val, -4); + EXPECT_EQ(neg, true); + EXPECT_EQ(t + 3, upb_BufToInt64(t, t + 3, &val, NULL)); + EXPECT_EQ(val, -42); + EXPECT_EQ(neg, true); + EXPECT_EQ(t + 3, upb_BufToInt64(t, t + 5, &val, NULL)); + EXPECT_EQ(val, -42); + + const int64_t values[] = { + std::numeric_limits::max(), + std::numeric_limits::min(), + std::numeric_limits::max(), + std::numeric_limits::min(), + }; + for (size_t i = 0; i < ABSL_ARRAYSIZE(values); i++) { + std::string v = absl::StrCat(values[i]); + const char* ptr = v.c_str(); + const char* end = ptr + strlen(ptr); + EXPECT_EQ(end, upb_BufToInt64(ptr, end, &val, NULL)); + EXPECT_EQ(val, values[i]); + } +} diff --git a/upb/json_decode.c b/upb/json_decode.c index 556541ccff..1eed62f625 100644 --- a/upb/json_decode.c +++ b/upb/json_decode.c @@ -36,6 +36,7 @@ #include #include "upb/encode.h" +#include "upb/internal/atoi.h" #include "upb/internal/unicode.h" #include "upb/reflection.h" @@ -609,48 +610,19 @@ static size_t jsondec_base64(jsondec* d, upb_StringView str) { /* Low-level integer parsing **************************************************/ -/* We use these hand-written routines instead of strto[u]l() because the "long - * long" variants aren't in c89. Also our version allows setting a ptr limit. */ - static const char* jsondec_buftouint64(jsondec* d, const char* ptr, const char* end, uint64_t* val) { - uint64_t u64 = 0; - while (ptr < end) { - unsigned ch = *ptr - '0'; - if (ch >= 10) break; - if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) { - jsondec_err(d, "Integer overflow"); - } - u64 *= 10; - u64 += ch; - ptr++; - } - - *val = u64; - return ptr; + const char* out = upb_BufToUint64(ptr, end, val); + if (!out) jsondec_err(d, "Integer overflow"); + return out; } static const char* jsondec_buftoint64(jsondec* d, const char* ptr, const char* end, int64_t* val, bool* is_neg) { - bool neg = false; - uint64_t u64; - - if (ptr != end && *ptr == '-') { - ptr++; - neg = true; - } - - ptr = jsondec_buftouint64(d, ptr, end, &u64); - if (u64 > (uint64_t)INT64_MAX + neg) { - jsondec_err(d, "Integer overflow"); - } - - *val = neg ? -u64 : u64; - if (is_neg) { - *is_neg = neg; - } - return ptr; + const char* out = upb_BufToInt64(ptr, end, val, is_neg); + if (!out) jsondec_err(d, "Integer overflow"); + return out; } static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {