Skip to content

Commit

Permalink
Avoid UB in *_offset methods
Browse files Browse the repository at this point in the history
  • Loading branch information
mejrs committed Jun 12, 2022
1 parent 56c218f commit 48c6940
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 48 deletions.
123 changes: 85 additions & 38 deletions src/pycell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,8 @@ pub struct PyCell<T: PyClassImpl> {
contents: PyCellContents<T>,
}

// This struct has mirrored definitions in `weaklist_offset` and `dict_offset`,
// who need the same struct layout to function correctly.
#[repr(C)]
pub(crate) struct PyCellContents<T: PyClassImpl> {
pub(crate) value: ManuallyDrop<UnsafeCell<T>>,
Expand Down Expand Up @@ -626,30 +628,52 @@ impl<T: PyClass> PyCell<T> {
/// Gets the offset of the dictionary from the start of the struct in bytes.
pub(crate) fn dict_offset() -> ffi::Py_ssize_t {
use std::convert::TryInto;
use std::mem::MaybeUninit;

#[cfg(addr_of)]
let offset = {
// With std::ptr::addr_of - can measure offset using uninit memory without UB.
let cell = std::mem::MaybeUninit::<Self>::uninit();
let offset = unsafe {
// The much simpler way, using addr_of!
let cell = MaybeUninit::<Self>::uninit();
let base_ptr = cell.as_ptr();
let dict_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.dict) };
unsafe { (dict_ptr as *const u8).offset_from(base_ptr as *const u8) }
let dict_ptr = std::ptr::addr_of!((*base_ptr).contents.dict);
(dict_ptr.cast::<u8>()).offset_from(base_ptr.cast::<u8>())
};
#[cfg(not(addr_of))]
let offset = {
// No std::ptr::addr_of - need to take references to PyCell to measure offsets;
// make a zero-initialised "fake" one so that referencing it is not UB.
let mut cell = std::mem::MaybeUninit::<Self>::uninit();
unsafe {
std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1);
let offset = unsafe {
// The annoying way using a dummy struct because there's no way to get
// a pointer without going through a reference, which is insta-UB
// if it is not initialized.

use std::mem::size_of;

#[repr(C)]
struct PyCellDummy<T: PyClassImpl> {
ob_base: MaybeUninit<<T::BaseType as PyClassBaseType>::LayoutAsBase>,
contents: PyCellContentsDummy<T>,
}

#[repr(C)]
struct PyCellContentsDummy<T: PyClassImpl> {
value: MaybeUninit<T>,
borrow_checker: MaybeUninit<<T::PyClassMutability as PyClassMutability>::Storage>,
thread_checker: MaybeUninit<T::ThreadChecker>,
dict: MaybeUninit<T::Dict>,
weakref: MaybeUninit<T::WeakRef>,
}
let cell = unsafe { cell.assume_init() };
let dict_ptr = &cell.contents.dict;
// offset_from wasn't stabilised until 1.47, so we also have to work around
// that...
let offset = (dict_ptr as *const _ as usize) - (&cell as *const _ as usize);
// This isn't a valid cell, so ensure no Drop code runs etc.
std::mem::forget(cell);
offset

assert_eq!(size_of::<PyCell<T>>(), size_of::<PyCellDummy<T>>());
assert_eq!(
size_of::<PyCellContents<T>>(),
size_of::<PyCellContentsDummy<T>>()
);

// All of the pycelldummy's fields are maybeuninit, which require no initialization
let cell = MaybeUninit::<PyCellDummy<T>>::uninit().assume_init();

let base_ptr: *const PyCellDummy<T> = &cell;
let dict_ptr: *const MaybeUninit<T::Dict> = &(*base_ptr).contents.dict;

(dict_ptr.cast::<u8>()).offset_from(base_ptr.cast::<u8>())
};
// Py_ssize_t may not be equal to isize on all platforms
#[allow(clippy::useless_conversion)]
Expand All @@ -659,30 +683,53 @@ impl<T: PyClass> PyCell<T> {
/// Gets the offset of the weakref list from the start of the struct in bytes.
pub(crate) fn weaklist_offset() -> ffi::Py_ssize_t {
use std::convert::TryInto;
use std::mem::MaybeUninit;

#[cfg(addr_of)]
let offset = {
// With std::ptr::addr_of - can measure offset using uninit memory without UB.
let cell = std::mem::MaybeUninit::<Self>::uninit();
let offset = unsafe {
// The much simpler way, using addr_of!
let cell = MaybeUninit::<Self>::uninit();
let base_ptr = cell.as_ptr();
let weaklist_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.weakref) };
unsafe { (weaklist_ptr as *const u8).offset_from(base_ptr as *const u8) }
let weaklist_ptr = std::ptr::addr_of!((*base_ptr).contents.weakref);
(weaklist_ptr.cast::<u8>()).offset_from(base_ptr.cast::<u8>())
};
#[cfg(not(addr_of))]
let offset = {
// No std::ptr::addr_of - need to take references to PyCell to measure offsets;
// make a zero-initialised "fake" one so that referencing it is not UB.
let mut cell = std::mem::MaybeUninit::<Self>::uninit();
unsafe {
std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1);
let offset = unsafe {
// The annoying way using a dummy struct because there's no way to get
// a pointer without going through a reference, which is insta-UB
// if it is not initialized.

use std::mem::size_of;

#[repr(C)]
struct PyCellDummy<T: PyClassImpl> {
ob_base: MaybeUninit<<T::BaseType as PyClassBaseType>::LayoutAsBase>,
contents: PyCellContentsDummy<T>,
}

#[repr(C)]
struct PyCellContentsDummy<T: PyClassImpl> {
value: MaybeUninit<T>,
borrow_checker: MaybeUninit<<T::PyClassMutability as PyClassMutability>::Storage>,
thread_checker: MaybeUninit<T::ThreadChecker>,
dict: MaybeUninit<T::Dict>,
weakref: MaybeUninit<T::WeakRef>,
}
let cell = unsafe { cell.assume_init() };
let weaklist_ptr = &cell.contents.weakref;
// offset_from wasn't stabilised until 1.47, so we also have to work around
// that...
let offset = (weaklist_ptr as *const _ as usize) - (&cell as *const _ as usize);
// This isn't a valid cell, so ensure no Drop code runs etc.
std::mem::forget(cell);
offset

assert_eq!(size_of::<PyCell<T>>(), size_of::<PyCellDummy<T>>());
assert_eq!(
size_of::<PyCellContents<T>>(),
size_of::<PyCellContentsDummy<T>>()
);

// All of the pycelldummy's fields are maybeuninit, which require no initialization
let cell = MaybeUninit::<PyCellDummy<T>>::uninit().assume_init();

let base_ptr: *const PyCellDummy<T> = &cell;
let weaklist_ptr: *const MaybeUninit<<T as PyClassImpl>::WeakRef> =
&(*base_ptr).contents.weakref;

(weaklist_ptr.cast::<u8>()).offset_from(base_ptr.cast::<u8>())
};
// Py_ssize_t may not be equal to isize on all platforms
#[allow(clippy::useless_conversion)]
Expand Down
70 changes: 60 additions & 10 deletions tests/test_class_basics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,14 +347,23 @@ fn test_tuple_struct_class() {
}

#[pyclass(dict, subclass)]
struct DunderDictSupport {}
struct DunderDictSupport {
// Make sure that dict_offset runs with non-zero sized Self
_pad: [u8; 32],
}

#[test]
#[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)]
//#[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)]
fn dunder_dict_support() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, DunderDictSupport {}).unwrap();
let inst = PyCell::new(
py,
DunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -371,7 +380,13 @@ fn dunder_dict_support() {
fn access_dunder_dict() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, DunderDictSupport {}).unwrap();
let inst = PyCell::new(
py,
DunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -393,7 +408,16 @@ struct InheritDict {
fn inherited_dict() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, (InheritDict { _value: 0 }, DunderDictSupport {})).unwrap();
let inst = PyCell::new(
py,
(
InheritDict { _value: 0 },
DunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
),
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -405,14 +429,23 @@ fn inherited_dict() {
}

#[pyclass(weakref, dict)]
struct WeakRefDunderDictSupport {}
struct WeakRefDunderDictSupport {
// Make sure that weaklist_offset runs with non-zero sized Self
_pad: [u8; 32],
}

#[test]
#[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)]
fn weakref_dunder_dict_support() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, WeakRefDunderDictSupport {}).unwrap();
let inst = PyCell::new(
py,
WeakRefDunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -421,14 +454,22 @@ fn weakref_dunder_dict_support() {
}

#[pyclass(weakref, subclass)]
struct WeakRefSupport {}
struct WeakRefSupport {
_pad: [u8; 32],
}

#[test]
#[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)]
fn weakref_support() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, WeakRefSupport {}).unwrap();
let inst = PyCell::new(
py,
WeakRefSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -447,7 +488,16 @@ struct InheritWeakRef {
fn inherited_weakref() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, (InheritWeakRef { _value: 0 }, WeakRefSupport {})).unwrap();
let inst = PyCell::new(
py,
(
InheritWeakRef { _value: 0 },
WeakRefSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
),
)
.unwrap();
py_run!(
py,
inst,
Expand Down

0 comments on commit 48c6940

Please sign in to comment.