From 0c7d4467053504ae7701f6bf87a891804003b1ca Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Wed, 1 Dec 2021 11:07:18 -0400 Subject: [PATCH] Expand string_view support to str, bytes, memoryview 1. Allows constructing a str or bytes implicitly from a string_view; this is essentially a small shortcut allowing a caller to write `py::bytes{sv}` rather than `py::bytes{sv.data(), sv.size()}`. 2. Allows implicit conversion *to* string_view from py::bytes -- this saves a fair bit more as currently there is no simple way to get such a view of the bytes without copying it (or resorting to Python API calls). (This is not done for `str` because when the str contains unicode we have to allocate to a temporary and so there might not be some string data we can properly view without owning.) 3. Allows `memoryview::from_memory` to accept a string_view. As with the other from_memory calls, it's entirely your responsibility to keep it alive. This also required moving the string_view availability detection into detail/common.h because this PR needs it in pytypes.h, which is higher up the include chain than cast.h where it was being detected currently. --- include/pybind11/cast.h | 17 --------------- include/pybind11/detail/common.h | 18 ++++++++++++++++ include/pybind11/pytypes.h | 37 ++++++++++++++++++++++++++++++++ tests/test_builtin_casters.cpp | 13 +++++++++++ tests/test_builtin_casters.py | 8 +++++++ 5 files changed, 76 insertions(+), 17 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 097d41bdaf4..01dc5df73f3 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -27,23 +27,6 @@ #include #include -#if defined(PYBIND11_CPP17) -# if defined(__has_include) -# if __has_include() -# define PYBIND11_HAS_STRING_VIEW -# endif -# elif defined(_MSC_VER) -# define PYBIND11_HAS_STRING_VIEW -# endif -#endif -#ifdef PYBIND11_HAS_STRING_VIEW -#include -#endif - -#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L -# define PYBIND11_HAS_U8STRING -#endif - PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 862451fd132..73821c742fe 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -183,6 +183,21 @@ # define PYBIND11_HAS_VARIANT 1 #endif +#if defined(PYBIND11_CPP17) +# if defined(__has_include) +# if __has_include() +# define PYBIND11_HAS_STRING_VIEW +# endif +# elif defined(_MSC_VER) +# define PYBIND11_HAS_STRING_VIEW +# endif +#endif + +#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L +# define PYBIND11_HAS_U8STRING +#endif + + #include #include #include @@ -229,6 +244,9 @@ # include # endif #endif +#ifdef PYBIND11_HAS_STRING_VIEW +# include +#endif // #define PYBIND11_STR_LEGACY_PERMISSIVE // If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index a08fd8ceb62..c395bdf27d4 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1085,6 +1085,18 @@ class str : public object { // NOLINTNEXTLINE(google-explicit-constructor) str(const std::string &s) : str(s.data(), s.size()) { } +#ifdef PYBIND11_HAS_STRING_VIEW + // NOLINTNEXTLINE(google-explicit-constructor) + str(std::string_view s) : str(s.data(), s.size()) { } + +# ifdef PYBIND11_HAS_U8STRING + // reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char) + // NOLINTNEXTLINE(google-explicit-constructor) + str(std::u8string_view s) : str(reinterpret_cast(s.data()), s.size()) { } +# endif + +#endif + explicit str(const bytes &b); /** \rst @@ -1167,6 +1179,24 @@ class bytes : public object { pybind11_fail("Unable to extract bytes contents!"); return std::string(buffer, (size_t) length); } + +#ifdef PYBIND11_HAS_STRING_VIEW + // NOLINTNEXTLINE(google-explicit-constructor) + bytes(std::string_view s) : bytes(s.data(), s.size()) { } + + // Obtain a string view that views the current `bytes` buffer value. Note that this is only + // valid so long as the `bytes` instance remains alive and so generally should not outlive the + // lifetime of the `bytes` instance. + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::string_view() const { + char *buffer = nullptr; + ssize_t length = 0; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length)) + pybind11_fail("Unable to extract bytes contents!"); + return {buffer, static_cast(length)}; + } +#endif + }; // Note: breathe >= 4.17.0 will fail to build docs if the below two constructors // are included in the doxygen group; close here and reopen after as a workaround @@ -1714,6 +1744,13 @@ class memoryview : public object { static memoryview from_memory(const void *mem, ssize_t size) { return memoryview::from_memory(const_cast(mem), size, true); } + +#ifdef PYBIND11_HAS_STRING_VIEW + static memoryview from_memory(std::string_view mem) { + return from_memory(const_cast(mem.data()), static_cast(mem.size()), true); + } +#endif + #endif }; diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index 71c778e8e27..ee24131e243 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -140,10 +140,23 @@ TEST_SUBMODULE(builtin_casters, m) { m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); }); m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); }); + // The inner lambdas here are to also test implicit conversion + using namespace std::literals; + m.def("string_view_bytes", []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); }); + m.def("string_view_str", []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); }); + m.def("string_view_from_bytes", [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); }); +#if PY_MAJOR_VERSION >= 3 + m.def("string_view_memoryview", []() { + static constexpr auto val = "Have some \360\237\216\202"sv; + return py::memoryview::from_memory(val); + }); +#endif + # ifdef PYBIND11_HAS_U8STRING m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); }); m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; }); m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); }); + m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; }); # endif #endif diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py index 9c5e17a68ea..6d266b42135 100644 --- a/tests/test_builtin_casters.py +++ b/tests/test_builtin_casters.py @@ -206,6 +206,14 @@ def test_string_view(capture): """ ) + assert m.string_view_bytes() == b"abc \x80\x80 def" + assert m.string_view_str() == u"abc ‽ def" + assert m.string_view_from_bytes(u"abc ‽ def".encode("utf-8")) == u"abc ‽ def" + if hasattr(m, "has_u8string"): + assert m.string_view8_str() == u"abc ‽ def" + if not env.PY2: + assert m.string_view_memoryview() == "Have some 🎂".encode() + def test_integer_casting(): """Issue #929 - out-of-range integer values shouldn't be accepted"""