Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix UB in *_offset functions #2450

Merged
merged 4 commits into from
Jun 18, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ edition = "2018"
cfg-if = "1.0"
libc = "0.2.62"
parking_lot = ">= 0.11, < 0.13"
memoffset = "0.6.5"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a note here that it's to support safe offset calculations on versions predating addr_of? I imagine in the future we might consider removing again.

Copy link
Member

@adamreichold adamreichold Jun 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are using it to compute offsets in all cases since even when addr_of is available, using memoffset enables us to avoid writing out its usage ourselves.

Personally, I think as it takes care of what is available where and how to use it, memoffset does carry its weight as a dependency.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with this, I was thinking more that after a future MSRV bump that may change as addr_of would be trivial for us to implement safely. I'll worry about that in the future, perhaps 😄


# ffi bindings to the python interpreter, split into a separate crate so they can be used independently
pyo3-ffi = { path = "pyo3-ffi", version = "=0.16.5" }
Expand Down
7 changes: 5 additions & 2 deletions src/ffi/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ fn ascii() {
// _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings.
assert!(!PyUnicode_DATA(ptr).is_null());

assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as _);
assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as Py_ssize_t);
assert_eq!(PyUnicode_IS_READY(ptr), 1);

// This has potential to mutate object. But it should be a no-op since
Expand Down Expand Up @@ -175,7 +175,10 @@ fn ucs4() {
// _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings.
assert!(!PyUnicode_DATA(ptr).is_null());

assert_eq!(PyUnicode_GET_LENGTH(ptr), py_string.len().unwrap() as _);
assert_eq!(
PyUnicode_GET_LENGTH(ptr),
py_string.len().unwrap() as Py_ssize_t
);
assert_eq!(PyUnicode_IS_READY(ptr), 1);

// This has potential to mutate object. But it should be a no-op since
Expand Down
58 changes: 8 additions & 50 deletions src/pycell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -625,65 +625,23 @@ impl<T: PyClass> PyCell<T> {

/// Gets the offset of the dictionary from the start of the struct in bytes.
pub(crate) fn dict_offset() -> ffi::Py_ssize_t {
use memoffset::offset_of;
use std::convert::TryInto;
#[cfg(addr_of)]
let offset = {
// With std::ptr::addr_of - can measure offset using uninit memory without UB.
let cell = std::mem::MaybeUninit::<Self>::uninit();
let base_ptr = cell.as_ptr();
let dict_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.dict) };
unsafe { (dict_ptr as *const u8).offset_from(base_ptr as *const u8) }
};
#[cfg(not(addr_of))]
let offset = {
// No std::ptr::addr_of - need to take references to PyCell to measure offsets;
// make a zero-initialised "fake" one so that referencing it is not UB.
let mut cell = std::mem::MaybeUninit::<Self>::uninit();
unsafe {
std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1);
}
let cell = unsafe { cell.assume_init() };
let dict_ptr = &cell.contents.dict;
// offset_from wasn't stabilised until 1.47, so we also have to work around
// that...
let offset = (dict_ptr as *const _ as usize) - (&cell as *const _ as usize);
// This isn't a valid cell, so ensure no Drop code runs etc.
std::mem::forget(cell);
offset
};

let offset = offset_of!(PyCell<T>, contents) + offset_of!(PyCellContents<T>, dict);

// Py_ssize_t may not be equal to isize on all platforms
#[allow(clippy::useless_conversion)]
offset.try_into().expect("offset should fit in Py_ssize_t")
}

/// Gets the offset of the weakref list from the start of the struct in bytes.
pub(crate) fn weaklist_offset() -> ffi::Py_ssize_t {
use memoffset::offset_of;
use std::convert::TryInto;
#[cfg(addr_of)]
let offset = {
// With std::ptr::addr_of - can measure offset using uninit memory without UB.
let cell = std::mem::MaybeUninit::<Self>::uninit();
let base_ptr = cell.as_ptr();
let weaklist_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.weakref) };
unsafe { (weaklist_ptr as *const u8).offset_from(base_ptr as *const u8) }
};
#[cfg(not(addr_of))]
let offset = {
// No std::ptr::addr_of - need to take references to PyCell to measure offsets;
// make a zero-initialised "fake" one so that referencing it is not UB.
let mut cell = std::mem::MaybeUninit::<Self>::uninit();
unsafe {
std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1);
}
let cell = unsafe { cell.assume_init() };
let weaklist_ptr = &cell.contents.weakref;
// offset_from wasn't stabilised until 1.47, so we also have to work around
// that...
let offset = (weaklist_ptr as *const _ as usize) - (&cell as *const _ as usize);
// This isn't a valid cell, so ensure no Drop code runs etc.
std::mem::forget(cell);
offset
};

let offset = offset_of!(PyCell<T>, contents) + offset_of!(PyCellContents<T>, weakref);

// Py_ssize_t may not be equal to isize on all platforms
#[allow(clippy::useless_conversion)]
offset.try_into().expect("offset should fit in Py_ssize_t")
Expand Down
20 changes: 16 additions & 4 deletions src/types/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,14 @@ mod tests {
let obj = vec![10, 20].to_object(py);
let inst = obj.as_ref(py);
let mut it = inst.iter().unwrap();
assert_eq!(10, it.next().unwrap().unwrap().extract().unwrap());
assert_eq!(20, it.next().unwrap().unwrap().extract().unwrap());
assert_eq!(
10_i32,
it.next().unwrap().unwrap().extract::<'_, i32>().unwrap()
);
assert_eq!(
20_i32,
it.next().unwrap().unwrap().extract::<'_, i32>().unwrap()
);
assert!(it.next().is_none());
});
}
Expand All @@ -138,7 +144,10 @@ mod tests {
let inst = obj.as_ref(py);
let mut it = inst.iter().unwrap();

assert_eq!(10, it.next().unwrap().unwrap().extract().unwrap());
assert_eq!(
10_i32,
it.next().unwrap().unwrap().extract::<'_, i32>().unwrap()
);
});

Python::with_gil(|py| {
Expand Down Expand Up @@ -167,7 +176,10 @@ mod tests {
let inst = obj.as_ref(py);
let mut it = inst.iter().unwrap();

assert_eq!(10, it.next().unwrap().unwrap().extract().unwrap());
assert_eq!(
10_i32,
it.next().unwrap().unwrap().extract::<'_, i32>().unwrap()
);
assert!(it.next().unwrap().unwrap().is_none());
}
assert_eq!(count, none.get_refcnt(py));
Expand Down
4 changes: 2 additions & 2 deletions src/types/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,12 +407,12 @@ mod tests {

// iter method
for el in set.iter() {
assert_eq!(1i32, el.extract().unwrap());
assert_eq!(1i32, el.extract::<'_, i32>().unwrap());
}

// intoiterator iteration
for el in set {
assert_eq!(1i32, el.extract().unwrap());
assert_eq!(1i32, el.extract::<'_, i32>().unwrap());
}
});
}
Expand Down
14 changes: 7 additions & 7 deletions src/types/tuple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,13 +508,13 @@ mod tests {

assert_eq!(iter.size_hint(), (3, Some(3)));

assert_eq!(1, iter.next().unwrap().extract().unwrap());
assert_eq!(1_i32, iter.next().unwrap().extract::<'_, i32>().unwrap());
assert_eq!(iter.size_hint(), (2, Some(2)));

assert_eq!(2, iter.next().unwrap().extract().unwrap());
assert_eq!(2_i32, iter.next().unwrap().extract::<'_, i32>().unwrap());
assert_eq!(iter.size_hint(), (1, Some(1)));

assert_eq!(3, iter.next().unwrap().extract().unwrap());
assert_eq!(3_i32, iter.next().unwrap().extract::<'_, i32>().unwrap());
assert_eq!(iter.size_hint(), (0, Some(0)));
});
}
Expand All @@ -527,7 +527,7 @@ mod tests {
assert_eq!(3, tuple.len());

for (i, item) in tuple.iter().enumerate() {
assert_eq!(i + 1, item.extract().unwrap());
assert_eq!(i + 1, item.extract::<'_, usize>().unwrap());
}
});
}
Expand All @@ -541,9 +541,9 @@ mod tests {

let slice = tuple.as_slice();
assert_eq!(3, slice.len());
assert_eq!(1, slice[0].extract().unwrap());
assert_eq!(2, slice[1].extract().unwrap());
assert_eq!(3, slice[2].extract().unwrap());
assert_eq!(1_i32, slice[0].extract::<'_, i32>().unwrap());
assert_eq!(2_i32, slice[1].extract::<'_, i32>().unwrap());
assert_eq!(3_i32, slice[2].extract::<'_, i32>().unwrap());
});
}

Expand Down
68 changes: 59 additions & 9 deletions tests/test_class_basics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,14 +347,23 @@ fn test_tuple_struct_class() {
}

#[pyclass(dict, subclass)]
struct DunderDictSupport {}
struct DunderDictSupport {
// Make sure that dict_offset runs with non-zero sized Self
_pad: [u8; 32],
}

#[test]
#[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)]
fn dunder_dict_support() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, DunderDictSupport {}).unwrap();
let inst = PyCell::new(
py,
DunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -371,7 +380,13 @@ fn dunder_dict_support() {
fn access_dunder_dict() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, DunderDictSupport {}).unwrap();
let inst = PyCell::new(
py,
DunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -393,7 +408,16 @@ struct InheritDict {
fn inherited_dict() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, (InheritDict { _value: 0 }, DunderDictSupport {})).unwrap();
let inst = PyCell::new(
py,
(
InheritDict { _value: 0 },
DunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
),
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -405,14 +429,23 @@ fn inherited_dict() {
}

#[pyclass(weakref, dict)]
struct WeakRefDunderDictSupport {}
struct WeakRefDunderDictSupport {
// Make sure that weaklist_offset runs with non-zero sized Self
_pad: [u8; 32],
}

#[test]
#[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)]
fn weakref_dunder_dict_support() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, WeakRefDunderDictSupport {}).unwrap();
let inst = PyCell::new(
py,
WeakRefDunderDictSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -421,14 +454,22 @@ fn weakref_dunder_dict_support() {
}

#[pyclass(weakref, subclass)]
struct WeakRefSupport {}
struct WeakRefSupport {
_pad: [u8; 32],
}

#[test]
#[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)]
fn weakref_support() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, WeakRefSupport {}).unwrap();
let inst = PyCell::new(
py,
WeakRefSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
)
.unwrap();
py_run!(
py,
inst,
Expand All @@ -447,7 +488,16 @@ struct InheritWeakRef {
fn inherited_weakref() {
let gil = Python::acquire_gil();
let py = gil.python();
let inst = PyCell::new(py, (InheritWeakRef { _value: 0 }, WeakRefSupport {})).unwrap();
let inst = PyCell::new(
py,
(
InheritWeakRef { _value: 0 },
WeakRefSupport {
_pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF",
},
),
)
.unwrap();
py_run!(
py,
inst,
Expand Down