Skip to content

Commit

Permalink
Document safety guarantees of NSString::as_str and its internals
Browse files Browse the repository at this point in the history
  • Loading branch information
madsmtm committed Jun 12, 2024
1 parent cca6a40 commit b88db80
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 31 deletions.
12 changes: 10 additions & 2 deletions crates/objc2/src/exception.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,15 @@ impl fmt::Debug for Exception {
// SAFETY: Just checked that object is an NSException
let (name, reason) = unsafe { (self.name(), self.reason()) };

// SAFETY: `name` and `reason` are guaranteed to be NSString.
// SAFETY:
// - `name` and `reason` are guaranteed to be `NSString`s.
// - We control the scope in which they are alive, so we know
// they are not moved outside the current autorelease pool.
//
// Note that these strings are immutable (`NSException` is
// immutable, and the properties are marked as `readonly` and
// `copy` and are copied upon creation), so we also don't have
// to worry about the string being mutated under our feet.
let name = name
.as_deref()
.map(|name| unsafe { nsstring_to_str(name, pool) });
Expand Down Expand Up @@ -145,7 +153,7 @@ impl fmt::Display for Exception {
let reason = unsafe { self.reason() };

if let Some(reason) = &reason {
// SAFETY: `reason` is guaranteed to be NSString.
// SAFETY: Same as above in `Debug`.
let reason = unsafe { nsstring_to_str(reason, pool) };
return write!(f, "{reason}");
}
Expand Down
46 changes: 34 additions & 12 deletions crates/objc2/src/runtime/__nsstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,19 @@ pub unsafe fn nsstring_len(obj: &NSObject) -> NSUInteger {

/// Extract a [`str`](`prim@str`) representation out of the given NSString.
///
/// Uses [`UTF8String`] under the hood.
///
/// [`UTF8String`]: https://developer.apple.com/documentation/foundation/nsstring/1411189-utf8string?language=objc
///
///
/// # Safety
///
/// The object must be an instance of `NSString`.
/// - The object must be an instance of `NSString`.
/// - The returned string must not be moved outside the autorelease pool into
/// which it (may) have been released.
///
/// Furthermore, the object must not, as is always the case for strings, be
/// mutated in parallel.
//
// Note: While this is not public, it is still a breaking change to modify,
// since `objc2-foundation` relies on it.
Expand All @@ -42,21 +52,33 @@ pub unsafe fn nsstring_to_str<'r, 's: 'r, 'p: 'r>(
// This is necessary until `auto` types stabilizes.
pool.__verify_is_inner();

// The documentation on `UTF8String` is a bit sparse, but with
// educated guesses and testing I've determined that NSString stores
// a pointer to the string data, sometimes with an UTF-8 encoding,
// (usual for ascii data), sometimes in other encodings (UTF-16?).
// The documentation on `UTF8String` is quite sparse, but with educated
// guesses, testing, reading the code for `CFString` and a bit of
// reverse-engineering, we've determined that `NSString` stores a pointer
// to the string data, sometimes with an UTF-8 encoding (usual for ascii
// data), sometimes in other encodings (often UTF-16).
//
// `UTF8String` then checks the internal encoding:
// - If the data is UTF-8 encoded, it returns the internal pointer.
// - If the data is in another encoding, it creates a new allocation,
// writes the UTF-8 representation of the string into it,
// autoreleases the allocation and returns a pointer to it.
// - If the data is UTF-8 encoded, and (since macOS 10.6) if the string is
// immutable, it returns the internal pointer using
// `CFStringGetCStringPtr`.
// - Otherwise, if the data is in another encoding or is mutable, it
// creates a new allocation, writes the UTF-8 representation of the
// string into it, autoreleases the allocation, and returns a pointer to
// it (similar to `CFStringGetCString`).
//
// If the string is a tagged pointer, or a custom subclass, then another
// code-path is taken that always creates a new allocation and copies the
// string into that using (effectively) `length` and `characterAtIndex:`.
//
// So the lifetime of the returned pointer is either the same as the
// NSString OR the lifetime of the innermost @autoreleasepool.
// As a result, the lifetime of the returned pointer is either the same as
// the passed-in `NSString` OR the lifetime of the current / innermost
// `@autoreleasepool`.
//
// https://developer.apple.com/documentation/foundation/nsstring/1411189-utf8string?language=objc
// Furthermore, we can allow creating a `&str` from `&obj`, even if the
// string is originally a `NSMutableString` which may be mutated later on,
// since in that case the lifetime will be tied to the pool and not the
// string.
let bytes: *const c_char = unsafe { msg_send![obj, UTF8String] };
let bytes: *const u8 = bytes.cast();

Expand Down
33 changes: 26 additions & 7 deletions crates/objc2/src/runtime/protocol_object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,33 @@ impl<P: ?Sized + NSObjectProtocol> hash::Hash for ProtocolObject<P> {
impl<P: ?Sized + NSObjectProtocol> fmt::Debug for ProtocolObject<P> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let description = self.description();
// We use a leaking autorelease pool since often the string
// will be UTF-8, and in that case the pool will be
// irrelevant. Also, it allows us to pass the formatter into
// the pool (since it may contain a pool internally that it
// assumes is current when writing).

// `NSString`s in return types, such as the one in `description`, are
// in general _supposed_ to be immutable:
// <https://developer.apple.com/library/archive/documentation/General/Conceptual/CocoaEncyclopedia/ObjectMutability/ObjectMutability.html#//apple_ref/doc/uid/TP40010810-CH5-SW65>
//
// In reality, that isn't actually the case for `NSMutableString`,
// the `description` of that just returns itself instead of copying.
//
// Luckily though, the `UTF8String` returned by mutable objects do not
// return a reference to internal data, and instead always allocate
// a new autoreleased object (see details in `nsstring_to_str`), so
// we do not have to worry about the string being mutated in e.g. a
// malicious `Write` implementation in `fmt::Formatter` while we hold
// the `&str`.

// We use a leaking autorelease pool since often the string will be
// UTF-8, and in that case the pool will be irrelevant. Also, it
// allows us to pass the formatter into the pool (since it may contain
// a pool internally that it assumes is current when writing).
autoreleasepool_leaking(|pool| {
// SAFETY: `description` selector is guaranteed to always
// return an instance of `NSString`.
// SAFETY:
// - The `description` selector is guaranteed to always return an
// instance of `NSString`.
// - We control the scope in which the string is alive, so we know
// it is not moved outside the current autorelease pool
// (`autoreleasepool_leaking` is greatly helping with this,
// though by itself does not fully ensure it).
let s = unsafe { nsstring_to_str(&description, pool) };
fmt::Display::fmt(s, f)
})
Expand Down
109 changes: 99 additions & 10 deletions framework-crates/objc2-foundation/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,21 @@ impl NSString {
/// Returns [`None`] if the internal storage does not allow this to be
/// done efficiently. Use [`NSString::as_str`] or `NSString::to_string`
/// if performance is not an issue.
///
///
/// # Safety
///
/// The `NSString` must not be mutated for the lifetime of the returned
/// string.
///
/// Warning: This is very difficult to ensure in generic contexts.
#[doc(alias = "CFStringGetCStringPtr")]
#[allow(unused)]
#[cfg(target_vendor = "apple")]
// TODO: Finish this
fn as_str_wip(&self) -> Option<&str> {
// TODO: Can this be used on NSStrings that are not internally CFString?
// (i.e. custom subclasses of NSString)?
unsafe fn as_str_wip(&self) -> Option<&str> {
use core::ptr::NonNull;
use std::os::raw::c_char;

Expand Down Expand Up @@ -90,7 +100,7 @@ impl NSString {
#[allow(unused)]
#[cfg(target_vendor = "apple")]
// TODO: Finish this
fn as_utf16(&self) -> Option<&[u16]> {
unsafe fn as_utf16(&self) -> Option<&[u16]> {
use core::ptr::NonNull;

extern "C" {
Expand All @@ -102,12 +112,78 @@ impl NSString {
.map(|ptr| unsafe { slice::from_raw_parts(ptr.as_ptr(), self.len_utf16()) })
}

/// Get the [`str`](`prim@str`) representation of this.
/// Convert the string into a [string slice](`prim@str`).
///
/// The signature of this method can be a bit confusing, as it contains
/// several lifetimes; the lifetime `'s` of the `NSString`, the lifetime
/// `'p` of the current autorelease pool and the lifetime `'r` of the
/// returned string slice.
///
/// In general, this method converts the string to a newly allocated UTF-8
/// string, autoreleases the buffer, and returns a slice pointer to this
/// internal buffer, which will become invalid once the autorelease pool
/// is popped. So the lifetime of the return value is bound to the current
/// autorelease pool.
///
/// However, as an optimization, this method may choose to instead return
/// an internal reference to the `NSString` when it can, and when the
/// string is immutable, and that is why the lifetime of the returned
/// string slice is also bound to the string itself.
///
/// You should prefer the [`to_string`] method or the
/// [`Display` implementation][display-impl] over this method when
/// possible.
///
/// [`to_string`]: alloc::string::ToString::to_string
/// [display-impl]: NSString#impl-Display-for-NSString
///
///
/// # Examples
///
/// Get the string slice of the `NSString`, and compare it with another
/// inside an autorelease pool.
///
/// ```
/// use objc2_foundation::NSString;
/// use objc2::rc::autoreleasepool;
///
/// TODO: Further explain this.
/// let string = NSString::from_str("foo");
/// autoreleasepool(|pool| {
/// assert_eq!(string.as_str(pool), "foo");
/// });
/// ```
///
/// Fails to compile because the lifetime of the string slice is bound to
/// the autorelease pool:
///
/// ```compile_fail
/// # use objc2_foundation::NSString;
/// # use objc2::rc::autoreleasepool;
/// #
/// let string = NSString::from_str("foo");
/// let s = autoreleasepool(|pool| string.as_str(pool));
/// assert_eq!(s, "foo");
/// ```
///
/// Fails to compile because the lifetime of the string slice is bound to
/// the string itself:
///
/// ```compile_fail
/// # use objc2_foundation::NSString;
/// # use objc2::rc::autoreleasepool;
/// #
/// autoreleasepool(|pool| {
/// let string = NSString::from_str("foo");
/// let s = string.as_str(pool);
/// drop(string);
/// assert_eq!(s, "foo");
/// });
/// ```
#[doc(alias = "UTF8String")]
pub fn as_str<'r, 's: 'r, 'p: 'r>(&'s self, pool: AutoreleasePool<'p>) -> &'r str {
// SAFETY: This is an instance of `NSString`
//
// TODO: Caller upholds that the string is not moved outside the pool.
unsafe { nsstring_to_str(self, pool) }
}

Expand Down Expand Up @@ -229,20 +305,33 @@ impl AddAssign<&NSString> for NSMutableString {

impl fmt::Display for NSString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
autoreleasepool_leaking(|pool| fmt::Display::fmt(self.as_str(pool), f))
// SAFETY:
// - The object is an instance of `NSString`.
// - We control the scope in which the string is alive, so we know
// it is not moved outside the current autorelease pool.
//
// TODO: Use more performant APIs, maybe by copying bytes into a
// temporary stack buffer so that we avoid allocating?
//
// Beware though that the string may be mutable internally, and that
// mutation may happen on every call to the formatter `f` (so
// `CFStringGetCharactersPtr` is probably out of the question, unless
// we somehow check that the string is immutable?).
autoreleasepool_leaking(|pool| fmt::Display::fmt(unsafe { nsstring_to_str(self, pool) }, f))
}
}

impl fmt::Display for NSMutableString {
#[inline]
impl fmt::Debug for NSString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&**self, f)
// SAFETY: Same as for `Display` above.
autoreleasepool_leaking(|pool| fmt::Debug::fmt(unsafe { nsstring_to_str(self, pool) }, f))
}
}

impl fmt::Debug for NSString {
impl fmt::Display for NSMutableString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
autoreleasepool_leaking(|pool| fmt::Debug::fmt(self.as_str(pool), f))
fmt::Display::fmt(&**self, f)
}
}

Expand Down

0 comments on commit b88db80

Please sign in to comment.