Skip to content

Commit

Permalink
pythongh-124064: Fix -Wconversion warnings in Parser/string_parser.c (p…
Browse files Browse the repository at this point in the history
…ython#124204)

Fix integer overflow check in decode_unicode_with_escapes(): use
PY_SSIZE_T_MAX instead of SIZE_MAX.
  • Loading branch information
vstinner authored and savannahostrowski committed Sep 22, 2024
1 parent 96c03b3 commit 8f79322
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions Parser/string_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
// to avoid showing the warning twice.
return 0;
}
unsigned char c = *first_invalid_escape;
unsigned char c = (unsigned char)*first_invalid_escape;
if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) {
// in this case the tokenizer has already emitted a warning,
// see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
Expand Down Expand Up @@ -90,12 +90,12 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
const char *end;

/* check for integer overflow */
if (len > SIZE_MAX / 6) {
if (len > (size_t)PY_SSIZE_T_MAX / 6) {
return NULL;
}
/* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
u = PyBytes_FromStringAndSize((char *)NULL, (Py_ssize_t)len * 6);
if (u == NULL) {
return NULL;
}
Expand Down Expand Up @@ -142,11 +142,11 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
*p++ = *s++;
}
}
len = p - buf;
len = (size_t)(p - buf);
s = buf;

const char *first_invalid_escape;
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape);

// HACK: later we can simply pass the line no, since we don't preserve the tokens
// when we are decoding the string but we preserve the line numbers.
Expand Down Expand Up @@ -185,7 +185,7 @@ PyObject *
_PyPegen_decode_string(Parser *p, int raw, const char *s, size_t len, Token *t)
{
if (raw) {
return PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
return PyUnicode_DecodeUTF8Stateful(s, (Py_ssize_t)len, NULL, NULL);
}
return decode_unicode_with_escapes(p, s, len, t);
}
Expand Down Expand Up @@ -274,9 +274,9 @@ _PyPegen_parse_string(Parser *p, Token *t)
}
}
if (rawmode) {
return PyBytes_FromStringAndSize(s, len);
return PyBytes_FromStringAndSize(s, (Py_ssize_t)len);
}
return decode_bytes_with_escapes(p, s, len, t);
return decode_bytes_with_escapes(p, s, (Py_ssize_t)len, t);
}
return _PyPegen_decode_string(p, rawmode, s, len, t);
}

0 comments on commit 8f79322

Please sign in to comment.