Skip to content

Commit

Permalink
split out the json string-to-int functions for general use
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 469509635
  • Loading branch information
ericsalo authored and copybara-github committed Aug 23, 2022
1 parent 1216643 commit c67021f
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 35 deletions.
24 changes: 24 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ cc_library(
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
":atoi_internal",
":encode_internal",
":port",
":reflection",
Expand Down Expand Up @@ -589,6 +590,17 @@ upb_proto_reflection_library(
deps = ["@com_google_protobuf//:timestamp_proto"],
)

cc_test(
name = "atoi_test",
srcs = ["upb/internal/atoi_test.cc"],
copts = UPB_DEFAULT_CPPOPTS,
deps = [
":atoi_internal",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
)

cc_test(
name = "test_cpp",
srcs = ["upb/test_cpp.cc"],
Expand Down Expand Up @@ -768,6 +780,15 @@ cc_library(
deps = [":port"],
)

cc_library(
name = "atoi_internal",
srcs = ["upb/internal/atoi.c"],
hdrs = ["upb/internal/atoi.h"],
copts = UPB_DEFAULT_COPTS,
visibility = ["//:__subpackages__"],
deps = [":port"],
)

cc_library(
name = "decode_internal",
srcs = [
Expand Down Expand Up @@ -839,6 +860,7 @@ upb_amalgamation(
libs = [
":arena_internal",
":array_internal",
":atoi_internal",
":collections",
":decode_internal",
":descriptor_upb_proto",
Expand Down Expand Up @@ -870,6 +892,7 @@ upb_amalgamation(
libs = [
":arena_internal",
":array_internal",
":atoi_internal",
":collections",
":decode_internal",
":descriptor_upb_proto",
Expand Down Expand Up @@ -904,6 +927,7 @@ upb_amalgamation(
libs = [
":arena_internal",
":array_internal",
":atoi_internal",
":collections",
":decode_internal",
":descriptor_upb_proto",
Expand Down
68 changes: 68 additions & 0 deletions upb/internal/atoi.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "upb/internal/atoi.h"

// Must be last.
#include "upb/port_def.inc"

const char* upb_BufToUint64(const char* ptr, const char* end, uint64_t* val) {
uint64_t u64 = 0;
while (ptr < end) {
unsigned ch = *ptr - '0';
if (ch >= 10) break;
if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
return NULL; // integer overflow
}
u64 *= 10;
u64 += ch;
ptr++;
}

*val = u64;
return ptr;
}

const char* upb_BufToInt64(const char* ptr, const char* end, int64_t* val,
bool* is_neg) {
bool neg = false;
uint64_t u64;

if (ptr != end && *ptr == '-') {
ptr++;
neg = true;
}

ptr = upb_BufToUint64(ptr, end, &u64);
if (!ptr || u64 > (uint64_t)INT64_MAX + neg) {
return NULL; // integer overflow
}

*val = neg ? -u64 : u64;
if (is_neg) *is_neg = neg;
return ptr;
}
53 changes: 53 additions & 0 deletions upb/internal/atoi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifndef UPB_INTERNAL_ATOI_H_
#define UPB_INTERNAL_ATOI_H_

// Must be last.
#include "upb/port_def.inc"

#ifdef __cplusplus
extern "C" {
#endif

// We use these hand-written routines instead of strto[u]l() because the "long
// long" variants aren't in c89. Also our version allows setting a ptr limit.
// Return the new position of the pointer after parsing the int, or NULL on
// integer overflow.

const char* upb_BufToUint64(const char* ptr, const char* end, uint64_t* val);
const char* upb_BufToInt64(const char* ptr, const char* end, int64_t* val,
bool* is_neg);

#ifdef __cplusplus
} /* extern "C" */
#endif

#include "upb/port_undef.inc"

#endif /* UPB_INTERNAL_ATOI_H_ */
109 changes: 109 additions & 0 deletions upb/internal/atoi_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright (c) 2009-2022, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "upb/internal/atoi.h"

#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"

TEST(AtoiTest, Uint64) {
uint64_t val;

const char* s = "1234z";
EXPECT_EQ(s + 1, upb_BufToUint64(s, s + 1, &val));
EXPECT_EQ(val, 1);
EXPECT_EQ(s + 4, upb_BufToUint64(s, s + 4, &val));
EXPECT_EQ(val, 1234);
EXPECT_EQ(s + 4, upb_BufToUint64(s, s + 5, &val));
EXPECT_EQ(val, 1234);

const char* t = "42.6";
EXPECT_EQ(t + 1, upb_BufToUint64(t, t + 1, &val));
EXPECT_EQ(val, 4);
EXPECT_EQ(t + 2, upb_BufToUint64(t, t + 2, &val));
EXPECT_EQ(val, 42);
EXPECT_EQ(t + 2, upb_BufToUint64(t, t + 3, &val));
EXPECT_EQ(val, 42);

// Integer overflow
const char* u = "1000000000000000000000000000000";
EXPECT_EQ(NULL, upb_BufToUint64(u, u + strlen(u), &val));

// Not an integer
const char* v = "foobar";
EXPECT_EQ(v, upb_BufToUint64(v, v + strlen(v), &val));

const uint64_t values[] = {
std::numeric_limits<uint64_t>::max(),
std::numeric_limits<uint64_t>::min(),
};
for (size_t i = 0; i < ABSL_ARRAYSIZE(values); i++) {
std::string v = absl::StrCat(values[i]);
const char* ptr = v.c_str();
const char* end = ptr + strlen(ptr);
EXPECT_EQ(end, upb_BufToUint64(ptr, end, &val));
EXPECT_EQ(val, values[i]);
}
}

TEST(AtoiTest, Int64) {
int64_t val;
bool neg;

const char* s = "1234z";
EXPECT_EQ(s + 1, upb_BufToInt64(s, s + 1, &val, &neg));
EXPECT_EQ(val, 1);
EXPECT_EQ(neg, false);
EXPECT_EQ(s + 4, upb_BufToInt64(s, s + 4, &val, NULL));
EXPECT_EQ(val, 1234);
EXPECT_EQ(s + 4, upb_BufToInt64(s, s + 5, &val, NULL));
EXPECT_EQ(val, 1234);

const char* t = "-42.6";
EXPECT_EQ(t + 2, upb_BufToInt64(t, t + 2, &val, &neg));
EXPECT_EQ(val, -4);
EXPECT_EQ(neg, true);
EXPECT_EQ(t + 3, upb_BufToInt64(t, t + 3, &val, NULL));
EXPECT_EQ(val, -42);
EXPECT_EQ(neg, true);
EXPECT_EQ(t + 3, upb_BufToInt64(t, t + 5, &val, NULL));
EXPECT_EQ(val, -42);

const int64_t values[] = {
std::numeric_limits<int32_t>::max(),
std::numeric_limits<int32_t>::min(),
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::min(),
};
for (size_t i = 0; i < ABSL_ARRAYSIZE(values); i++) {
std::string v = absl::StrCat(values[i]);
const char* ptr = v.c_str();
const char* end = ptr + strlen(ptr);
EXPECT_EQ(end, upb_BufToInt64(ptr, end, &val, NULL));
EXPECT_EQ(val, values[i]);
}
}
42 changes: 7 additions & 35 deletions upb/json_decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <string.h>

#include "upb/encode.h"
#include "upb/internal/atoi.h"
#include "upb/internal/unicode.h"
#include "upb/reflection.h"

Expand Down Expand Up @@ -609,48 +610,19 @@ static size_t jsondec_base64(jsondec* d, upb_StringView str) {

/* Low-level integer parsing **************************************************/

/* We use these hand-written routines instead of strto[u]l() because the "long
* long" variants aren't in c89. Also our version allows setting a ptr limit. */

static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
const char* end, uint64_t* val) {
uint64_t u64 = 0;
while (ptr < end) {
unsigned ch = *ptr - '0';
if (ch >= 10) break;
if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
jsondec_err(d, "Integer overflow");
}
u64 *= 10;
u64 += ch;
ptr++;
}

*val = u64;
return ptr;
const char* out = upb_BufToUint64(ptr, end, val);
if (!out) jsondec_err(d, "Integer overflow");
return out;
}

static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
const char* end, int64_t* val,
bool* is_neg) {
bool neg = false;
uint64_t u64;

if (ptr != end && *ptr == '-') {
ptr++;
neg = true;
}

ptr = jsondec_buftouint64(d, ptr, end, &u64);
if (u64 > (uint64_t)INT64_MAX + neg) {
jsondec_err(d, "Integer overflow");
}

*val = neg ? -u64 : u64;
if (is_neg) {
*is_neg = neg;
}
return ptr;
const char* out = upb_BufToInt64(ptr, end, val, is_neg);
if (!out) jsondec_err(d, "Integer overflow");
return out;
}

static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
Expand Down

0 comments on commit c67021f

Please sign in to comment.