Skip to content

Commit

Permalink
Expand string_view support to str, bytes, memoryview
Browse files Browse the repository at this point in the history
1. Allows constructing a str or bytes implicitly from a string_view;
   this is essentially a small shortcut allowing a caller to write
   `py::bytes{sv}` rather than `py::bytes{sv.data(), sv.size()}`.

2. Allows implicit conversion *to* string_view from py::bytes -- this
   saves a fair bit more as currently there is no simple way to get such
   a view of the bytes without copying it (or resorting to Python API
   calls).

   (This is not done for `str` because when the str contains unicode we
   have to allocate to a temporary and so there might not be some string
   data we can properly view without owning.)

3. Allows `memoryview::from_memory` to accept a string_view.  As with
   the other from_memory calls, it's entirely your responsibility to
   keep it alive.

This also required moving the string_view availability detection into
detail/common.h because this PR needs it in pytypes.h, which is higher
up the include chain than cast.h where it was being detected currently.
  • Loading branch information
jagerman committed Dec 2, 2021
1 parent 70a58c5 commit 0c7d446
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 17 deletions.
17 changes: 0 additions & 17 deletions include/pybind11/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,6 @@
#include <utility>
#include <vector>

#if defined(PYBIND11_CPP17)
# if defined(__has_include)
# if __has_include(<string_view>)
# define PYBIND11_HAS_STRING_VIEW
# endif
# elif defined(_MSC_VER)
# define PYBIND11_HAS_STRING_VIEW
# endif
#endif
#ifdef PYBIND11_HAS_STRING_VIEW
#include <string_view>
#endif

#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif

PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
PYBIND11_NAMESPACE_BEGIN(detail)

Expand Down
18 changes: 18 additions & 0 deletions include/pybind11/detail/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,21 @@
# define PYBIND11_HAS_VARIANT 1
#endif

#if defined(PYBIND11_CPP17)
# if defined(__has_include)
# if __has_include(<string_view>)
# define PYBIND11_HAS_STRING_VIEW
# endif
# elif defined(_MSC_VER)
# define PYBIND11_HAS_STRING_VIEW
# endif
#endif

#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif


#include <Python.h>
#include <frameobject.h>
#include <pythread.h>
Expand Down Expand Up @@ -229,6 +244,9 @@
# include <version>
# endif
#endif
#ifdef PYBIND11_HAS_STRING_VIEW
# include <string_view>
#endif

// #define PYBIND11_STR_LEGACY_PERMISSIVE
// If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
Expand Down
37 changes: 37 additions & 0 deletions include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,18 @@ class str : public object {
// NOLINTNEXTLINE(google-explicit-constructor)
str(const std::string &s) : str(s.data(), s.size()) { }

#ifdef PYBIND11_HAS_STRING_VIEW
// NOLINTNEXTLINE(google-explicit-constructor)
str(std::string_view s) : str(s.data(), s.size()) { }

# ifdef PYBIND11_HAS_U8STRING
// reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char)
// NOLINTNEXTLINE(google-explicit-constructor)
str(std::u8string_view s) : str(reinterpret_cast<const char*>(s.data()), s.size()) { }
# endif

#endif

explicit str(const bytes &b);

/** \rst
Expand Down Expand Up @@ -1167,6 +1179,24 @@ class bytes : public object {
pybind11_fail("Unable to extract bytes contents!");
return std::string(buffer, (size_t) length);
}

#ifdef PYBIND11_HAS_STRING_VIEW
// NOLINTNEXTLINE(google-explicit-constructor)
bytes(std::string_view s) : bytes(s.data(), s.size()) { }

// Obtain a string view that views the current `bytes` buffer value. Note that this is only
// valid so long as the `bytes` instance remains alive and so generally should not outlive the
// lifetime of the `bytes` instance.
// NOLINTNEXTLINE(google-explicit-constructor)
operator std::string_view() const {
char *buffer = nullptr;
ssize_t length = 0;
if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length))
pybind11_fail("Unable to extract bytes contents!");
return {buffer, static_cast<size_t>(length)};
}
#endif

};
// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
// are included in the doxygen group; close here and reopen after as a workaround
Expand Down Expand Up @@ -1714,6 +1744,13 @@ class memoryview : public object {
static memoryview from_memory(const void *mem, ssize_t size) {
return memoryview::from_memory(const_cast<void*>(mem), size, true);
}

#ifdef PYBIND11_HAS_STRING_VIEW
static memoryview from_memory(std::string_view mem) {
return from_memory(const_cast<char*>(mem.data()), static_cast<ssize_t>(mem.size()), true);
}
#endif

#endif
};

Expand Down
13 changes: 13 additions & 0 deletions tests/test_builtin_casters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,23 @@ TEST_SUBMODULE(builtin_casters, m) {
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });

// The inner lambdas here are to also test implicit conversion
using namespace std::literals;
m.def("string_view_bytes", []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); });
m.def("string_view_str", []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); });
m.def("string_view_from_bytes", [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); });
#if PY_MAJOR_VERSION >= 3
m.def("string_view_memoryview", []() {
static constexpr auto val = "Have some \360\237\216\202"sv;
return py::memoryview::from_memory(val);
});
#endif

# ifdef PYBIND11_HAS_U8STRING
m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; });
# endif
#endif

Expand Down
8 changes: 8 additions & 0 deletions tests/test_builtin_casters.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,14 @@ def test_string_view(capture):
"""
)

assert m.string_view_bytes() == b"abc \x80\x80 def"
assert m.string_view_str() == u"abc ‽ def"
assert m.string_view_from_bytes(u"abc ‽ def".encode("utf-8")) == u"abc ‽ def"
if hasattr(m, "has_u8string"):
assert m.string_view8_str() == u"abc ‽ def"
if not env.PY2:
assert m.string_view_memoryview() == "Have some 🎂".encode()


def test_integer_casting():
"""Issue #929 - out-of-range integer values shouldn't be accepted"""
Expand Down

0 comments on commit 0c7d446

Please sign in to comment.