Skip to content

Commit

Permalink
Don’t corrupt the yyjson pool when reentering orjson.loads
Browse files Browse the repository at this point in the history
orjson.loads may allocate a Python object that triggers a garbage
collection that invokes a destructor that calls orjson.loads again.
Or the destructor may release the GIL so a different thread can call
orjson.loads.  To remain safe under such reentrancy, we need to avoid
reinitializing the yyjson pool while it might still be in use.  The
simplest fix is to initialize the yyjson pool only once, like we did
before commit e9b745e.

Fixes ijl#415.

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
  • Loading branch information
andersk committed Sep 8, 2023
1 parent c5fd226 commit 784288b
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 22 deletions.
13 changes: 1 addition & 12 deletions src/deserialize/yyjson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,22 +91,11 @@ fn read_doc_default(data: &'static str, err: &mut yyjson_read_err) -> *mut yyjso

fn read_doc_with_buffer(data: &'static str, err: &mut yyjson_read_err) -> *mut yyjson_doc {
unsafe {
let mut allocator = crate::yyjson::yyjson_alc {
malloc: None,
realloc: None,
free: None,
ctx: null_mut(),
};
crate::yyjson::yyjson_alc_pool_init(
&mut allocator,
YYJSON_ALLOC.get_or_init(yyjson_init).as_ptr() as *mut std::os::raw::c_void,
YYJSON_BUFFER_SIZE,
);
yyjson_read_opts(
data.as_ptr() as *mut c_char,
data.len(),
YYJSON_READ_NOFLAG,
std::ptr::addr_of!(allocator),
&YYJSON_ALLOC.get_or_init(yyjson_init).alloc,
err,
)
}
Expand Down
33 changes: 23 additions & 10 deletions src/typeref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use once_cell::race::{OnceBool, OnceBox};
use pyo3_ffi::*;
use std::cell::UnsafeCell;
use std::mem::MaybeUninit;
use std::os::raw::c_char;
use std::os::raw::{c_char, c_void};
use std::ptr::{null_mut, NonNull};

pub struct NumpyTypes {
Expand Down Expand Up @@ -94,28 +94,41 @@ pub const YYJSON_BUFFER_SIZE: usize = 1024 * 1024 * 8;

#[cfg(feature = "yyjson")]
#[repr(align(64))]
pub struct YYJSONBuffer(UnsafeCell<MaybeUninit<[u8; YYJSON_BUFFER_SIZE]>>);
struct YYJSONBuffer(UnsafeCell<MaybeUninit<[u8; YYJSON_BUFFER_SIZE]>>);

#[cfg(feature = "yyjson")]
impl YYJSONBuffer {
pub(crate) fn as_ptr(&self) -> *mut u8 {
self.0.get().cast::<u8>()
}
pub struct YYJSONAlloc {
pub alloc: crate::yyjson::yyjson_alc,
_buffer: Box<YYJSONBuffer>,
}

#[cfg(feature = "yyjson")]
pub static mut YYJSON_ALLOC: OnceBox<YYJSONBuffer> = OnceBox::new();
pub static mut YYJSON_ALLOC: OnceBox<YYJSONAlloc> = OnceBox::new();

#[cfg(feature = "yyjson")]
pub fn yyjson_init() -> Box<YYJSONBuffer> {
pub fn yyjson_init() -> Box<YYJSONAlloc> {
// Using unsafe to ensure allocation happens on the heap without going through the stack
// so we don't stack overflow in debug mode. Once rust-lang/rust#63291 is stable (Box::new_uninit)
// we can use that instead.
let layout = std::alloc::Layout::new::<YYJSONBuffer>();
let buffer = unsafe { Box::from_raw(std::alloc::alloc(layout).cast::<YYJSONBuffer>()) };
let mut alloc = crate::yyjson::yyjson_alc {
malloc: None,
realloc: None,
free: None,
ctx: null_mut(),
};
unsafe {
let buffer = std::alloc::alloc(layout);
Box::from_raw(buffer.cast::<YYJSONBuffer>())
crate::yyjson::yyjson_alc_pool_init(
&mut alloc,
buffer.0.get().cast::<c_void>(),
YYJSON_BUFFER_SIZE,
);
}
Box::new(YYJSONAlloc {
alloc,
_buffer: buffer,
})
}

#[allow(non_upper_case_globals)]
Expand Down
14 changes: 14 additions & 0 deletions test/test_reentrant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import orjson


class C:
def __del__(self):
orjson.loads('"' + "a" * 10000 + '"')


def test_reentrant():
c = C()
c.c = c
del c

orjson.loads("[" + "[]," * 1000 + "[]]")

0 comments on commit 784288b

Please sign in to comment.