From 8b18c6bdd3d61dda5fe8c4ed378286abec2a9bc4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 10 Aug 2024 21:54:44 +0200 Subject: [PATCH] miri weak memory emulation: initialize store buffer only on atomic writes; pre-fill with previous value --- .../rustc_const_eval/src/interpret/memory.rs | 2 +- src/tools/miri/src/concurrency/data_race.rs | 19 +- src/tools/miri/src/concurrency/weak_memory.rs | 164 ++++++++---------- .../tests/fail/weak_memory/weak_uninit.rs | 43 +++++ .../tests/fail/weak_memory/weak_uninit.stderr | 15 ++ src/tools/miri/tests/pass/weak_memory/weak.rs | 19 +- 6 files changed, 153 insertions(+), 109 deletions(-) create mode 100644 src/tools/miri/tests/fail/weak_memory/weak_uninit.rs create mode 100644 src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 2e5d0ae773654..b0a583f136fd7 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -1011,7 +1011,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { /// /// We do this so Miri's allocation access tracking does not show the validation /// reads as spurious accesses. - pub(super) fn run_for_validation(&self, f: impl FnOnce() -> R) -> R { + pub fn run_for_validation(&self, f: impl FnOnce() -> R) -> R { // This deliberately uses `==` on `bool` to follow the pattern // `assert!(val.replace(new) == old)`. assert!( diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs index 9df0d95f1f275..6fd207c92b937 100644 --- a/src/tools/miri/src/concurrency/data_race.rs +++ b/src/tools/miri/src/concurrency/data_race.rs @@ -617,9 +617,10 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> { // the *value* (including the associated provenance if this is an AtomicPtr) at this location. // Only metadata on the location itself is used. let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place))?; - this.buffered_atomic_read(place, atomic, scalar, || { + let buffered_scalar = this.buffered_atomic_read(place, atomic, scalar, || { this.validate_atomic_load(place, atomic) - }) + })?; + Ok(buffered_scalar.ok_or_else(|| err_ub!(InvalidUninitBytes(None)))?) } /// Perform an atomic write operation at the memory location. @@ -632,14 +633,14 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> { let this = self.eval_context_mut(); this.atomic_access_check(dest, AtomicAccessType::Store)?; + // Read the previous value so we can put it in the store buffer later. + // The program didn't actually do a read, so suppress the memory access hooks. + // This is also a very special exception where we just ignore an error -- if this read + // was UB e.g. because the memory is uninitialized, we don't want to know! + let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok(); this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?; this.validate_atomic_store(dest, atomic)?; - // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause - // side effects from a read the program did not perform. So we have to initialise - // the store buffer with the value currently being written - // ONCE this is fixed please remove the hack in buffered_atomic_write() in weak_memory.rs - // /~https://github.com/rust-lang/miri/issues/2164 - this.buffered_atomic_write(val, dest, atomic, val) + this.buffered_atomic_write(val, dest, atomic, old_val) } /// Perform an atomic RMW operation on a memory location. @@ -768,7 +769,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> { // in the modification order. // Since `old` is only a value and not the store element, we need to separately // find it in our store buffer and perform load_impl on it. - this.perform_read_on_buffered_latest(place, fail, old.to_scalar())?; + this.perform_read_on_buffered_latest(place, fail)?; } // Return the old value. diff --git a/src/tools/miri/src/concurrency/weak_memory.rs b/src/tools/miri/src/concurrency/weak_memory.rs index 6f4171584a8f7..0605b744e6afd 100644 --- a/src/tools/miri/src/concurrency/weak_memory.rs +++ b/src/tools/miri/src/concurrency/weak_memory.rs @@ -39,11 +39,10 @@ //! to attach store buffers to atomic objects. However, Rust follows LLVM in that it only has //! 'atomic accesses'. Therefore Miri cannot know when and where atomic 'objects' are being //! created or destroyed, to manage its store buffers. Instead, we hence lazily create an -//! atomic object on the first atomic access to a given region, and we destroy that object -//! on the next non-atomic or imperfectly overlapping atomic access to that region. +//! atomic object on the first atomic write to a given region, and we destroy that object +//! on the next non-atomic or imperfectly overlapping atomic write to that region. //! These lazy (de)allocations happen in memory_accessed() on non-atomic accesses, and -//! get_or_create_store_buffer() on atomic accesses. This mostly works well, but it does -//! lead to some issues (). +//! get_or_create_store_buffer_mut() on atomic writes. //! //! One consequence of this difference is that safe/sound Rust allows for more operations on atomic locations //! than the C++20 atomic API was intended to allow, such as non-atomically accessing @@ -144,11 +143,9 @@ struct StoreElement { /// The timestamp of the storing thread when it performed the store timestamp: VTimestamp, - /// The value of this store - // FIXME: this means the store must be fully initialized; - // we will have to change this if we want to support atomics on - // (partially) uninitialized data. - val: Scalar, + /// The value of this store. `None` means uninitialized. + // FIXME: Currently, we cannot represent partial initialization. + val: Option, /// Metadata about loads from this store element, /// behind a RefCell to keep load op take &self @@ -170,7 +167,7 @@ impl StoreBufferAlloc { /// When a non-atomic access happens on a location that has been atomically accessed /// before without data race, we can determine that the non-atomic access fully happens - /// after all the prior atomic accesses so the location no longer needs to exhibit + /// after all the prior atomic writes so the location no longer needs to exhibit /// any weak memory behaviours until further atomic accesses. pub fn memory_accessed(&self, range: AllocRange, global: &DataRaceState) { if !global.ongoing_action_data_race_free() { @@ -192,37 +189,29 @@ impl StoreBufferAlloc { } } - /// Gets a store buffer associated with an atomic object in this allocation, - /// or creates one with the specified initial value if no atomic object exists yet. - fn get_or_create_store_buffer<'tcx>( + /// Gets a store buffer associated with an atomic object in this allocation. + /// Returns `None` if there is no store buffer. + fn get_store_buffer<'tcx>( &self, range: AllocRange, - init: Scalar, - ) -> InterpResult<'tcx, Ref<'_, StoreBuffer>> { + ) -> InterpResult<'tcx, Option>> { let access_type = self.store_buffers.borrow().access_type(range); let pos = match access_type { AccessType::PerfectlyOverlapping(pos) => pos, - AccessType::Empty(pos) => { - let mut buffers = self.store_buffers.borrow_mut(); - buffers.insert_at_pos(pos, range, StoreBuffer::new(init)); - pos - } - AccessType::ImperfectlyOverlapping(pos_range) => { - // Once we reach here we would've already checked that this access is not racy. - let mut buffers = self.store_buffers.borrow_mut(); - buffers.remove_pos_range(pos_range.clone()); - buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init)); - pos_range.start - } + // If there is nothing here yet, that means there wasn't an atomic write yet so + // we can't return anything outdated. + _ => return Ok(None), }; - Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos])) + let store_buffer = Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]); + Ok(Some(store_buffer)) } - /// Gets a mutable store buffer associated with an atomic object in this allocation + /// Gets a mutable store buffer associated with an atomic object in this allocation, + /// or creates one with the specified initial value if no atomic object exists yet. fn get_or_create_store_buffer_mut<'tcx>( &mut self, range: AllocRange, - init: Scalar, + init: Option, ) -> InterpResult<'tcx, &mut StoreBuffer> { let buffers = self.store_buffers.get_mut(); let access_type = buffers.access_type(range); @@ -244,10 +233,8 @@ impl StoreBufferAlloc { } impl<'tcx> StoreBuffer { - fn new(init: Scalar) -> Self { + fn new(init: Option) -> Self { let mut buffer = VecDeque::new(); - buffer.reserve(STORE_BUFFER_LIMIT); - let mut ret = Self { buffer }; let store_elem = StoreElement { // The thread index and timestamp of the initialisation write // are never meaningfully used, so it's fine to leave them as 0 @@ -257,11 +244,11 @@ impl<'tcx> StoreBuffer { is_seqcst: false, load_info: RefCell::new(LoadInfo::default()), }; - ret.buffer.push_back(store_elem); - ret + buffer.push_back(store_elem); + Self { buffer } } - /// Reads from the last store in modification order + /// Reads from the last store in modification order, if any. fn read_from_last_store( &self, global: &DataRaceState, @@ -282,7 +269,7 @@ impl<'tcx> StoreBuffer { is_seqcst: bool, rng: &mut (impl rand::Rng + ?Sized), validate: impl FnOnce() -> InterpResult<'tcx>, - ) -> InterpResult<'tcx, (Scalar, LoadRecency)> { + ) -> InterpResult<'tcx, (Option, LoadRecency)> { // Having a live borrow to store_buffer while calling validate_atomic_load is fine // because the race detector doesn't touch store_buffer @@ -419,15 +406,15 @@ impl<'tcx> StoreBuffer { // In the language provided in the paper, an atomic store takes the value from a // non-atomic memory location. // But we already have the immediate value here so we don't need to do the memory - // access - val, + // access. + val: Some(val), is_seqcst, load_info: RefCell::new(LoadInfo::default()), }; - self.buffer.push_back(store_elem); - if self.buffer.len() > STORE_BUFFER_LIMIT { + if self.buffer.len() >= STORE_BUFFER_LIMIT { self.buffer.pop_front(); } + self.buffer.push_back(store_elem); if is_seqcst { // Every store that happens before this needs to be marked as SC // so that in a later SC load, only the last SC store (i.e. this one) or stores that @@ -450,7 +437,12 @@ impl StoreElement { /// buffer regardless of subsequent loads by the same thread; if the earliest load of another /// thread doesn't happen before the current one, then no subsequent load by the other thread /// can happen before the current one. - fn load_impl(&self, index: VectorIdx, clocks: &ThreadClockSet, is_seqcst: bool) -> Scalar { + fn load_impl( + &self, + index: VectorIdx, + clocks: &ThreadClockSet, + is_seqcst: bool, + ) -> Option { let mut load_info = self.load_info.borrow_mut(); load_info.sc_loaded |= is_seqcst; let _ = load_info.timestamps.try_insert(index, clocks.clock[index]); @@ -479,7 +471,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { global.sc_write(threads); } let range = alloc_range(base_offset, place.layout.size); - let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, init)?; + let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?; buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst); buffer.buffered_write(new_val, global, threads, atomic == AtomicRwOrd::SeqCst)?; } @@ -492,47 +484,55 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { atomic: AtomicReadOrd, latest_in_mo: Scalar, validate: impl FnOnce() -> InterpResult<'tcx>, - ) -> InterpResult<'tcx, Scalar> { + ) -> InterpResult<'tcx, Option> { let this = self.eval_context_ref(); - if let Some(global) = &this.machine.data_race { - let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?; - if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() { - if atomic == AtomicReadOrd::SeqCst { - global.sc_read(&this.machine.threads); - } - let mut rng = this.machine.rng.borrow_mut(); - let buffer = alloc_buffers.get_or_create_store_buffer( - alloc_range(base_offset, place.layout.size), - latest_in_mo, - )?; - let (loaded, recency) = buffer.buffered_read( - global, - &this.machine.threads, - atomic == AtomicReadOrd::SeqCst, - &mut *rng, - validate, - )?; - if global.track_outdated_loads && recency == LoadRecency::Outdated { - this.emit_diagnostic(NonHaltingDiagnostic::WeakMemoryOutdatedLoad { - ptr: place.ptr(), - }); + 'fallback: { + if let Some(global) = &this.machine.data_race { + let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?; + if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() { + if atomic == AtomicReadOrd::SeqCst { + global.sc_read(&this.machine.threads); + } + let mut rng = this.machine.rng.borrow_mut(); + let Some(buffer) = alloc_buffers + .get_store_buffer(alloc_range(base_offset, place.layout.size))? + else { + // No old writes available, fall back to base case. + break 'fallback; + }; + let (loaded, recency) = buffer.buffered_read( + global, + &this.machine.threads, + atomic == AtomicReadOrd::SeqCst, + &mut *rng, + validate, + )?; + if global.track_outdated_loads && recency == LoadRecency::Outdated { + this.emit_diagnostic(NonHaltingDiagnostic::WeakMemoryOutdatedLoad { + ptr: place.ptr(), + }); + } + + return Ok(loaded); } - - return Ok(loaded); } } // Race detector or weak memory disabled, simply read the latest value validate()?; - Ok(latest_in_mo) + Ok(Some(latest_in_mo)) } + /// Add the given write to the store buffer. (Does not change machine memory.) + /// + /// `init` says with which value to initialize the store buffer in case there wasn't a store + /// buffer for this memory range before. fn buffered_atomic_write( &mut self, val: Scalar, dest: &MPlaceTy<'tcx>, atomic: AtomicWriteOrd, - init: Scalar, + init: Option, ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?; @@ -545,23 +545,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { global.sc_write(threads); } - // UGLY HACK: in write_scalar_atomic() we don't know the value before our write, - // so init == val always. If the buffer is fresh then we would've duplicated an entry, - // so we need to remove it. - // See /~https://github.com/rust-lang/miri/issues/2164 - let was_empty = matches!( - alloc_buffers - .store_buffers - .borrow() - .access_type(alloc_range(base_offset, dest.layout.size)), - AccessType::Empty(_) - ); let buffer = alloc_buffers .get_or_create_store_buffer_mut(alloc_range(base_offset, dest.layout.size), init)?; - if was_empty { - buffer.buffer.pop_front(); - } - buffer.buffered_write(val, global, threads, atomic == AtomicWriteOrd::SeqCst)?; } @@ -576,7 +561,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { &self, place: &MPlaceTy<'tcx>, atomic: AtomicReadOrd, - init: Scalar, ) -> InterpResult<'tcx> { let this = self.eval_context_ref(); @@ -587,8 +571,12 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let size = place.layout.size; let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?; if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() { - let buffer = alloc_buffers - .get_or_create_store_buffer(alloc_range(base_offset, size), init)?; + let Some(buffer) = + alloc_buffers.get_store_buffer(alloc_range(base_offset, size))? + else { + // No store buffer, nothing to do. + return Ok(()); + }; buffer.read_from_last_store( global, &this.machine.threads, diff --git a/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs b/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs new file mode 100644 index 0000000000000..54bea6c6908e3 --- /dev/null +++ b/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs @@ -0,0 +1,43 @@ +//@compile-flags: -Zmiri-ignore-leaks -Zmiri-preemption-rate=0 + +// Tests showing weak memory behaviours are exhibited. All tests +// return true when the desired behaviour is seen. +// This is scheduler and pseudo-RNG dependent, so each test is +// run multiple times until one try returns true. +// Spurious failure is possible, if you are really unlucky with +// the RNG and always read the latest value from the store buffer. +#![feature(new_uninit)] + +use std::sync::atomic::*; +use std::thread::spawn; + +#[allow(dead_code)] +#[derive(Copy, Clone)] +struct EvilSend(pub T); + +unsafe impl Send for EvilSend {} +unsafe impl Sync for EvilSend {} + +// We can't create static items because we need to run each test multiple times. +fn static_uninit_atomic() -> &'static AtomicUsize { + unsafe { Box::leak(Box::new_uninit()).assume_init_ref() } +} + +fn relaxed() { + let x = static_uninit_atomic(); + let j1 = spawn(move || { + x.store(1, Ordering::Relaxed); + }); + + let j2 = spawn(move || x.load(Ordering::Relaxed)); //~ERROR: using uninitialized data + + j1.join().unwrap(); + j2.join().unwrap(); +} + +pub fn main() { + // If we try often enough, we should hit UB. + for _ in 0..100 { + relaxed(); + } +} diff --git a/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr b/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr new file mode 100644 index 0000000000000..9aa5bc2fa76df --- /dev/null +++ b/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr @@ -0,0 +1,15 @@ +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> $DIR/weak_uninit.rs:LL:CC + | +LL | let j2 = spawn(move || x.load(Ordering::Relaxed)); + | ^^^^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE on thread `unnamed-ID`: + = note: inside closure at $DIR/weak_uninit.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error + diff --git a/src/tools/miri/tests/pass/weak_memory/weak.rs b/src/tools/miri/tests/pass/weak_memory/weak.rs index dac63eeeb0b24..1b5c98cd51826 100644 --- a/src/tools/miri/tests/pass/weak_memory/weak.rs +++ b/src/tools/miri/tests/pass/weak_memory/weak.rs @@ -18,11 +18,9 @@ struct EvilSend(pub T); unsafe impl Send for EvilSend {} unsafe impl Sync for EvilSend {} -// We can't create static items because we need to run each test -// multiple times +// We can't create static items because we need to run each test multiple times. fn static_atomic(val: usize) -> &'static AtomicUsize { - let ret = Box::leak(Box::new(AtomicUsize::new(val))); - ret + Box::leak(Box::new(AtomicUsize::new(val))) } // Spins until it reads the given value @@ -33,7 +31,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize { val } -fn relaxed() -> bool { +fn relaxed(initial_read: bool) -> bool { let x = static_atomic(0); let j1 = spawn(move || { x.store(1, Relaxed); @@ -47,7 +45,9 @@ fn relaxed() -> bool { j1.join().unwrap(); let r2 = j2.join().unwrap(); - r2 == 1 + // There are three possible values here: 0 (from the initial read), 1 (from the first relaxed + // read), and 2 (the last read). The last case is boring and we cover the other two. + r2 == if initial_read { 0 } else { 1 } } // https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf Figure 8 @@ -74,7 +74,6 @@ fn seq_cst() -> bool { fn initialization_write(add_fence: bool) -> bool { let x = static_atomic(11); - assert_eq!(x.load(Relaxed), 11); // work around /~https://github.com/rust-lang/miri/issues/2164 let wait = static_atomic(0); @@ -112,11 +111,8 @@ fn faa_replaced_by_load() -> bool { } let x = static_atomic(0); - assert_eq!(x.load(Relaxed), 0); // work around /~https://github.com/rust-lang/miri/issues/2164 let y = static_atomic(0); - assert_eq!(y.load(Relaxed), 0); // work around /~https://github.com/rust-lang/miri/issues/2164 let z = static_atomic(0); - assert_eq!(z.load(Relaxed), 0); // work around /~https://github.com/rust-lang/miri/issues/2164 // Since each thread is so short, we need to make sure that they truely run at the same time // Otherwise t1 will finish before t2 even starts @@ -146,7 +142,8 @@ fn assert_once(f: fn() -> bool) { } pub fn main() { - assert_once(relaxed); + assert_once(|| relaxed(false)); + assert_once(|| relaxed(true)); assert_once(seq_cst); assert_once(|| initialization_write(false)); assert_once(|| initialization_write(true));