Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support string_view initialization and lookup in TfToken #3053

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions pxr/base/tf/testenv/token.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ Test_TfToken()
TF_AXIOM(TfToken(strVec2[1]) == tokVec[1]);
TF_AXIOM(TfToken(strVec2[2]) == tokVec[2]);

// Test safe construction from nullptr
TF_AXIOM(TfToken(nullptr) == TfToken());

// Test Find via string_view
TF_AXIOM(TfToken::Find("a_string_that_is_very_unlikely_to_exist") ==
TfToken());
TF_AXIOM(TfToken("string1") == TfToken::Find("string1"));

return true;
}

Expand Down
108 changes: 51 additions & 57 deletions pxr/base/tf/token.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@

#include <atomic>
#include <algorithm>
#include <cstring>
#include <new>
#include <ostream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

Expand All @@ -45,24 +45,25 @@ struct Tf_TokenRegistry
struct _Eq {
inline size_t operator()(TfToken::_Rep const &rep1,
TfToken::_Rep const &rep2) const {
return (*this)(rep1._cstr, rep2._cstr);
return (*this)(rep1._view, rep2._view);
}
inline bool operator()(const char* s1, const char* s2) const {
return !strcmp(s1, s2);
inline bool operator()(std::string_view s1,
std::string_view s2) const {
return s1 == s2;
}
};

template <int Mul>
struct _Hash {
inline size_t operator()(TfToken::_Rep const &rep) const {
return (*this)(rep._cstr);
return (*this)(rep._view);
}
inline size_t operator()(char const *s) const {
inline size_t operator()(std::string_view sv) const {
// Matches STL original implementation for now. Switch to something
// better?
unsigned int h = 0;
for (; *s; ++s)
h = Mul * h + *s;
for (const auto c : sv)
h = Mul * h + c;
return h;
}
};
Expand Down Expand Up @@ -92,27 +93,17 @@ struct Tf_TokenRegistry
return TfSingleton<Tf_TokenRegistry>::GetInstance();
}

inline size_t _GetSetNum(char const *s) const {
inline size_t _GetSetNum(std::string_view sv) const {
_OuterHash h;
return h(s) & _SetMask;
return h(sv) & _SetMask;
}

inline _RepPtrAndBits _GetPtrChar(char const *s, bool makeImmortal) {
return _GetPtrImpl(s, makeImmortal);
}
inline _RepPtrAndBits _GetPtrStr(string const &s, bool makeImmortal) {
// Explicitly specify template arg -- if unspecified, it will deduce
// string by-value, which we don't want.
return _GetPtrImpl<string const &>(s, makeImmortal);
inline _RepPtrAndBits _GetPtr(std::string_view sv, bool makeImmortal) {
return _GetPtrImpl(sv, makeImmortal);
}

inline _RepPtrAndBits _FindPtrChar(char const *s) const {
return _FindPtrImpl(s);
}
inline _RepPtrAndBits _FindPtrStr(string const &s) const {
// Explicitly specify template arg -- if unspecified, it will deduce
// string by-value, which we don't want.
return _FindPtrImpl<string const &>(s);
inline _RepPtrAndBits _FindPtr(std::string_view sv) const {
return _FindPtrImpl(sv);
}

void _DumpStats() const {
Expand All @@ -139,25 +130,21 @@ struct Tf_TokenRegistry

private:

inline bool _IsEmpty(char const *s) const { return !s || !s[0]; }
inline bool _IsEmpty(string const &s) const { return s.empty(); }

inline char const *_CStr(char const *s) const { return s; }
inline char const *_CStr(string const &s) const { return s.c_str(); }

static TfToken::_Rep _LookupRep(char const *cstr) {
static TfToken::_Rep _LookupRep(std::string_view sv) {
TfToken::_Rep ret;
ret._cstr = cstr;
ret._view = sv;
return ret;
}

static inline uint64_t _ComputeCompareCode(char const *p) {
static inline uint64_t _ComputeCompareCode(std::string_view sv) {
uint64_t compCode = 0;
int nchars = sizeof(compCode);
auto it = std::cbegin(sv);
while (nchars--) {
compCode |= static_cast<uint64_t>(*p) << (8*nchars);
if (*p) {
++p;
const bool atEnd = it == std::cend(sv);
compCode |= static_cast<uint64_t>(atEnd ? '\0' : *it) << (8*nchars);
if (!atEnd) {
++it;
}
}
return compCode;
Expand All @@ -167,19 +154,18 @@ struct Tf_TokenRegistry
* Either finds a key that is stringwise-equal to s,
* or puts a new _Rep into the map for s.
*/
template <class Str>
inline _RepPtrAndBits _GetPtrImpl(Str s, bool makeImmortal) {
if (_IsEmpty(s)) {
inline _RepPtrAndBits _GetPtrImpl(std::string_view sv, bool makeImmortal) {
if (sv.empty()) {
return _RepPtrAndBits();
}

unsigned setNum = _GetSetNum(_CStr(s));
unsigned setNum = _GetSetNum(sv);
_Set &set = _sets[setNum];

tbb::spin_mutex::scoped_lock lock(set.mutex);

// Insert or lookup an existing.
_RepSet::iterator iter = set.reps.find(_LookupRep(_CStr(s)));
_RepSet::iterator iter = set.reps.find(_LookupRep(sv));
if (iter != set.reps.end()) {
_RepPtr rep = &(*iter);
bool isCounted = rep->_refCount.load(std::memory_order_relaxed) & 1;
Expand Down Expand Up @@ -243,9 +229,9 @@ struct Tf_TokenRegistry

// Create the new entry.
TfAutoMallocTag noname("TfToken");
uint64_t compareCode = _ComputeCompareCode(_CStr(s));
uint64_t compareCode = _ComputeCompareCode(sv);
_RepPtr rep = &(*set.reps.insert(
TfToken::_Rep(s, setNum, compareCode)).first);
TfToken::_Rep(sv, setNum, compareCode)).first);
// 3, because 1 for counted bit plus 2 for one refcount. We can
// store relaxed here since our lock on the set's mutex provides
// synchronization.
Expand All @@ -255,18 +241,17 @@ struct Tf_TokenRegistry
}
}

template <class Str>
_RepPtrAndBits _FindPtrImpl(Str s) const {
if (_IsEmpty(s)) {
_RepPtrAndBits _FindPtrImpl(std::string_view sv) const {
if (sv.empty()) {
return _RepPtrAndBits();
}

size_t setNum = _GetSetNum(_CStr(s));
size_t setNum = _GetSetNum(sv);
_Set const &set = _sets[setNum];

tbb::spin_mutex::scoped_lock lock(set.mutex);

_RepSet::const_iterator iter = set.reps.find(_LookupRep(_CStr(s)));
_RepSet::const_iterator iter = set.reps.find(_LookupRep(sv));
if (iter == set.reps.end()) {
return _RepPtrAndBits();
}
Expand Down Expand Up @@ -297,35 +282,44 @@ TfToken::_GetEmptyString()
return empty;
}

TfToken::TfToken(const string &s)
TfToken::TfToken(std::string_view sv)
: _rep(Tf_TokenRegistry::_GetInstance().
_GetPtrStr(s, /*makeImmortal*/false))
_GetPtr(sv, /*makeImmortal*/false))
{
}

TfToken::TfToken(const string &s)
: TfToken(std::string_view{s})
{
}

TfToken::TfToken(const char *s)
: TfToken(s ? std::string_view{s} : std::string_view{})
{
}


TfToken::TfToken(std::string_view sv, _ImmortalTag)
: _rep(Tf_TokenRegistry::_GetInstance().
_GetPtrChar(s, /*makeImmortal*/false))
_GetPtr(sv, /*makeImmortal*/true))
{
}

TfToken::TfToken(const string &s, _ImmortalTag)
: _rep(Tf_TokenRegistry::_GetInstance().
_GetPtrStr(s, /*makeImmortal*/true))
: TfToken(std::string_view{s}, _ImmortalTag{})
{
}

TfToken::TfToken(const char *s, _ImmortalTag)
: _rep(Tf_TokenRegistry::_GetInstance().
_GetPtrChar(s, /*makeImmortal*/true))
: TfToken(s ? std::string_view{s} : std::string_view{}, _ImmortalTag{})
{
}

TfToken
TfToken::Find(const string& s)
TfToken::Find(std::string_view sv)
{
TfToken t;
t._rep = Tf_TokenRegistry::_GetInstance()._FindPtrStr(s);
t._rep = Tf_TokenRegistry::_GetInstance()._FindPtr(sv);
return t;
}

Expand Down
43 changes: 30 additions & 13 deletions pxr/base/tf/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <atomic>
#include <iosfwd>
#include <string>
#include <string_view>
#include <vector>
#include <set>

Expand Down Expand Up @@ -132,11 +133,24 @@ class TfToken
// and release their memory.
TF_API TfToken(char const* s, _ImmortalTag);

/// Acquire a token for the given string_view.
//
// This constructor involves a string hash and a lookup in the global
// table, and so should not be done more often than necessary. When
// possible, create a token once and reuse it many times.
TF_API explicit TfToken(std::string_view sv);
/// \overload
// Create a token for \p sv, and make it immortal. If \p sv exists in the
// token table already and is not immortal, make it immortal. Immortal
// tokens are faster to copy than mortal tokens, but they will never expire
// and release their memory.
TF_API TfToken(std::string_view sv, _ImmortalTag);

/// Find the token for the given string, if one exists.
//
// If a token has previous been created for the given string, this
// will return it. Otherwise, the empty token will be returned.
TF_API static TfToken Find(std::string const& s);
TF_API static TfToken Find(std::string_view sv);

/// Return a size_t hash for this token.
//
Expand Down Expand Up @@ -192,6 +206,12 @@ class TfToken
return rep ? rep->_str : _GetEmptyString();
}

/// Return a string view of the underlying token
std::string_view GetStringView() const {
_Rep const *rep = _rep.Get();
return rep ? rep->_view : std::string_view{};
}

/// Swap this token with another.
inline void Swap(TfToken &other) {
std::swap(_rep, other._rep);
Expand Down Expand Up @@ -351,36 +371,31 @@ class TfToken
: _setNum(setNum)
, _compareCode(compareCode)
, _str(std::move(str))
, _cstr(_str.c_str()) {}

explicit _Rep(std::string const &str,
unsigned setNum,
uint64_t compareCode)
: _Rep(std::string(str), setNum, compareCode) {}
, _view(std::string_view{_str}) {}

explicit _Rep(char const *str,
explicit _Rep(std::string_view str,
unsigned setNum,
uint64_t compareCode)
: _Rep(std::string(str), setNum, compareCode) {}

// Make sure we reacquire _cstr from _str on copy and assignment
// Make sure we reacquire _view from _str on copy and assignment
// to avoid holding on to a dangling pointer. However, if rhs'
// _cstr member doesn't come from its _str, just copy it directly
// _view member doesn't come from its _str, just copy it directly
// over. This is to support lightweight _Rep objects used for
// internal lookups.
_Rep(_Rep const &rhs)
: _refCount(rhs._refCount.load(std::memory_order_relaxed))
, _setNum(rhs._setNum)
, _compareCode(rhs._compareCode)
, _str(rhs._str)
, _cstr(rhs._str.c_str() == rhs._cstr ? _str.c_str() : rhs._cstr) {}
, _view(std::string_view{rhs._str} == rhs._view ? std::string_view{_str} : rhs._view) {}
nvmkuruc marked this conversation as resolved.
Show resolved Hide resolved

_Rep &operator=(_Rep const &rhs) {
_refCount = rhs._refCount.load(std::memory_order_relaxed);
_setNum = rhs._setNum;
_compareCode = rhs._compareCode;
_str = rhs._str;
_cstr = (rhs._str.c_str() == rhs._cstr ? _str.c_str() : rhs._cstr);
_view = (std::string_view{rhs._str} == rhs._view ? std::string_view{_str} : rhs._view);
return *this;
}

Expand All @@ -400,7 +415,9 @@ class TfToken
unsigned _setNum;
uint64_t _compareCode;
std::string _str;
char const *_cstr;
// proxies transparent hashing until the token registry uses a set
// (like `std::unordered_set` in C++20) that supports it
std::string_view _view;
};

friend struct TfTokenFastArbitraryLessThan;
Expand Down