Skip to content

Commit

Permalink
fix: improve bytes to str decoding error handling (#4294)
Browse files Browse the repository at this point in the history
* (bugfix): Improve bytes to str decoding error handling

* regroup test

* Further broaden tests

* Add another decode error test

* Fix bug in tests

* Reviewer suggestions
  • Loading branch information
Skylion007 authored Oct 29, 2022
1 parent fcb5554 commit b07223f
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 0 deletions.
9 changes: 9 additions & 0 deletions include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1432,6 +1432,9 @@ class str : public object {
str(const char *c, const SzType &n)
: object(PyUnicode_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) {
if (!m_ptr) {
if (PyErr_Occurred()) {
throw error_already_set();
}
pybind11_fail("Could not allocate string object!");
}
}
Expand All @@ -1441,6 +1444,9 @@ class str : public object {
// NOLINTNEXTLINE(google-explicit-constructor)
str(const char *c = "") : object(PyUnicode_FromString(c), stolen_t{}) {
if (!m_ptr) {
if (PyErr_Occurred()) {
throw error_already_set();
}
pybind11_fail("Could not allocate string object!");
}
}
Expand Down Expand Up @@ -1598,6 +1604,9 @@ inline str::str(const bytes &b) {
}
auto obj = reinterpret_steal<object>(PyUnicode_FromStringAndSize(buffer, length));
if (!obj) {
if (PyErr_Occurred()) {
throw error_already_set();
}
pybind11_fail("Could not allocate string object!");
}
m_ptr = obj.release().ptr();
Expand Down
5 changes: 5 additions & 0 deletions tests/test_pytypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,12 @@ TEST_SUBMODULE(pytypes, m) {
m.def("str_from_char_ssize_t", []() { return py::str{"red", (py::ssize_t) 3}; });
m.def("str_from_char_size_t", []() { return py::str{"blue", (py::size_t) 4}; });
m.def("str_from_string", []() { return py::str(std::string("baz")); });
m.def("str_from_std_string_input", [](const std::string &stri) { return py::str(stri); });
m.def("str_from_cstr_input", [](const char *c_str) { return py::str(c_str); });
m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
m.def("str_from_bytes_input",
[](const py::bytes &encoded_str) { return py::str(encoded_str); });

m.def("str_from_object", [](const py::object &obj) { return py::str(obj); });
m.def("repr_from_object", [](const py::object &obj) { return py::repr(obj); });
m.def("str_from_handle", [](py::handle h) { return py::str(h); });
Expand Down
14 changes: 14 additions & 0 deletions tests/test_pytypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,20 @@ def __repr__(self):
m.str_from_string_from_str(ucs_surrogates_str)


@pytest.mark.parametrize(
"func",
[
m.str_from_bytes_input,
m.str_from_cstr_input,
m.str_from_std_string_input,
],
)
def test_surrogate_pairs_unicode_error(func):
input_str = "\ud83d\ude4f".encode("utf-8", "surrogatepass")
with pytest.raises(UnicodeDecodeError):
func(input_str)


def test_bytes(doc):
assert m.bytes_from_char_ssize_t().decode() == "green"
assert m.bytes_from_char_size_t().decode() == "purple"
Expand Down

0 comments on commit b07223f

Please sign in to comment.