From 8876cf7181556a0820e8ea6e40dad309a1063139 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Fri, 10 Jan 2025 11:28:20 +0000 Subject: [PATCH 1/4] Also generate undef scalars and scalar pairs --- compiler/rustc_codegen_ssa/src/mir/operand.rs | 59 ++++++++++++------- .../src/mir/interpret/allocation.rs | 2 +- tests/codegen/overaligned-constant.rs | 2 - 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/mir/operand.rs b/compiler/rustc_codegen_ssa/src/mir/operand.rs index 19101ec2d1ba3..9ca7d4f8f0047 100644 --- a/compiler/rustc_codegen_ssa/src/mir/operand.rs +++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs @@ -204,14 +204,30 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> { let alloc_align = alloc.inner().align; assert!(alloc_align >= layout.align.abi); + // Returns `None` when the value is partially undefined or any byte of it has provenance. + // Otherwise returns the value or (if the entire value is undef) returns an undef. let read_scalar = |start, size, s: abi::Scalar, ty| { + let range = alloc_range(start, size); match alloc.0.read_scalar( bx, - alloc_range(start, size), + range, /*read_provenance*/ matches!(s.primitive(), abi::Primitive::Pointer(_)), ) { - Ok(val) => bx.scalar_to_backend(val, s, ty), - Err(_) => bx.const_poison(ty), + Ok(val) => Some(bx.scalar_to_backend(val, s, ty)), + Err(_) => { + // We may have failed due to partial provenance or unexpected provenance, + // continue down the normal code path if so. + if alloc.0.provenance().range_empty(range, &bx.tcx()) + // Since `read_scalar` failed, but there were no relocations involved, the + // bytes must be partially or fully uninitialized. Thus we can now unwrap the + // information about the range of uninit bytes and check if it's the full range. + && alloc.0.init_mask().is_range_initialized(range).unwrap_err() == range + { + Some(bx.const_undef(ty)) + } else { + None + } + } } }; @@ -222,16 +238,14 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> { // check that walks over the type of `mplace` to make sure it is truly correct to treat this // like a `Scalar` (or `ScalarPair`). match layout.backend_repr { - BackendRepr::Scalar(s @ abi::Scalar::Initialized { .. }) => { + BackendRepr::Scalar(s) => { let size = s.size(bx); assert_eq!(size, layout.size, "abi::Scalar size does not match layout size"); - let val = read_scalar(offset, size, s, bx.immediate_backend_type(layout)); - OperandRef { val: OperandValue::Immediate(val), layout } + if let Some(val) = read_scalar(offset, size, s, bx.immediate_backend_type(layout)) { + return OperandRef { val: OperandValue::Immediate(val), layout }; + } } - BackendRepr::ScalarPair( - a @ abi::Scalar::Initialized { .. }, - b @ abi::Scalar::Initialized { .. }, - ) => { + BackendRepr::ScalarPair(a, b) => { let (a_size, b_size) = (a.size(bx), b.size(bx)); let b_offset = (offset + a_size).align_to(b.align(bx).abi); assert!(b_offset.bytes() > 0); @@ -247,20 +261,21 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> { b, bx.scalar_pair_element_backend_type(layout, 1, true), ); - OperandRef { val: OperandValue::Pair(a_val, b_val), layout } - } - _ if layout.is_zst() => OperandRef::zero_sized(layout), - _ => { - // Neither a scalar nor scalar pair. Load from a place - // FIXME: should we cache `const_data_from_alloc` to avoid repeating this for the - // same `ConstAllocation`? - let init = bx.const_data_from_alloc(alloc); - let base_addr = bx.static_addr_of(init, alloc_align, None); - - let llval = bx.const_ptr_byte_offset(base_addr, offset); - bx.load_operand(PlaceRef::new_sized(llval, layout)) + if let (Some(a_val), Some(b_val)) = (a_val, b_val) { + return OperandRef { val: OperandValue::Pair(a_val, b_val), layout }; + } } + _ if layout.is_zst() => return OperandRef::zero_sized(layout), + _ => {} } + // Neither a scalar nor scalar pair. Load from a place + // FIXME: should we cache `const_data_from_alloc` to avoid repeating this for the + // same `ConstAllocation`? + let init = bx.const_data_from_alloc(alloc); + let base_addr = bx.static_addr_of(init, alloc_align, None); + + let llval = bx.const_ptr_byte_offset(base_addr, offset); + bx.load_operand(PlaceRef::new_sized(llval, layout)) } /// Asserts that this operand refers to a scalar and returns diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index d6f8fed755f08..1b07846e0cf6c 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -222,7 +222,7 @@ impl AllocError { } /// The information that makes up a memory access: offset and size. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq)] pub struct AllocRange { pub start: Size, pub size: Size, diff --git a/tests/codegen/overaligned-constant.rs b/tests/codegen/overaligned-constant.rs index 7cd8d19c21135..e5540aca38780 100644 --- a/tests/codegen/overaligned-constant.rs +++ b/tests/codegen/overaligned-constant.rs @@ -17,8 +17,6 @@ pub fn overaligned_constant() { // CHECK-LABEL: @overaligned_constant // CHECK: [[full:%_.*]] = alloca [32 x i8], align 8 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 @0, i64 32, i1 false) - // CHECK: %b.0 = load i32, ptr @0, align 4 - // CHECK: %b.1 = load i32, ptr getelementptr inbounds ({{.*}}), align 4 let mut s = S(1); s.0 = 3; From 964c58a7d96d4cd2dbda8fb40ff0f15ef10b78e3 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Tue, 21 Jan 2025 08:25:48 +0000 Subject: [PATCH 2/4] Ensure we always get a constant, even without mir opts --- tests/codegen/slice-init.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/codegen/slice-init.rs b/tests/codegen/slice-init.rs index 1c2dd3e887555..52b96cf8cd032 100644 --- a/tests/codegen/slice-init.rs +++ b/tests/codegen/slice-init.rs @@ -86,7 +86,7 @@ pub fn option_none_init() -> [Option; N] { // CHECK-NOT: switch // CHECK: icmp // CHECK-NOT: call void @llvm.memset.p0 - [None; N] + [const { None }; N] } // Use an opaque function to prevent rustc from removing useless drops. From dfa4c01b2e03ad740012236db00adfddc82883e8 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Wed, 8 Jan 2025 15:00:25 +0000 Subject: [PATCH 3/4] Treat undef bytes as equal to any other byte --- compiler/rustc_codegen_gcc/src/common.rs | 5 ++++ compiler/rustc_codegen_llvm/src/common.rs | 4 +++ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 1 + compiler/rustc_codegen_ssa/src/mir/rvalue.rs | 26 +++++++++++++++++-- .../rustc_codegen_ssa/src/traits/consts.rs | 1 + tests/codegen/slice-init.rs | 10 +++---- 6 files changed, 40 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_codegen_gcc/src/common.rs b/compiler/rustc_codegen_gcc/src/common.rs index f43743fc2a41b..bd5d6ba387cff 100644 --- a/compiler/rustc_codegen_gcc/src/common.rs +++ b/compiler/rustc_codegen_gcc/src/common.rs @@ -64,6 +64,11 @@ impl<'gcc, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> { if type_is_pointer(typ) { self.context.new_null(typ) } else { self.const_int(typ, 0) } } + fn is_undef(&self, _val: RValue<'gcc>) -> bool { + // FIXME: actually check for undef + false + } + fn const_undef(&self, typ: Type<'gcc>) -> RValue<'gcc> { let local = self.current_func.borrow().expect("func").new_local(None, typ, "undefined"); if typ.is_struct().is_some() { diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index adfe8aeb5c539..b4e9b9f44f4ab 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -126,6 +126,10 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { unsafe { llvm::LLVMGetUndef(t) } } + fn is_undef(&self, v: &'ll Value) -> bool { + unsafe { llvm::LLVMIsUndef(v) == True } + } + fn const_poison(&self, t: &'ll Type) -> &'ll Value { unsafe { llvm::LLVMGetPoison(t) } } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index ec6c84f6f2567..9349ae212d21b 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -918,6 +918,7 @@ unsafe extern "C" { pub fn LLVMMetadataTypeInContext(C: &Context) -> &Type; // Operations on all values + pub fn LLVMIsUndef(Val: &Value) -> Bool; pub fn LLVMTypeOf(Val: &Value) -> &Type; pub fn LLVMGetValueName2(Val: &Value, Length: *mut size_t) -> *const c_char; pub fn LLVMSetValueName2(Val: &Value, Name: *const c_char, NameLen: size_t); diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs index eb4ef599b82e6..cb4862663fda7 100644 --- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs +++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs @@ -8,7 +8,7 @@ use rustc_middle::ty::{self, Instance, Ty, TyCtxt}; use rustc_middle::{bug, mir, span_bug}; use rustc_session::config::OptLevel; use rustc_span::{DUMMY_SP, Span}; -use tracing::{debug, instrument}; +use tracing::{debug, instrument, trace}; use super::operand::{OperandRef, OperandValue}; use super::place::PlaceRef; @@ -93,6 +93,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { return; } + // If `v` is an integer constant whose value is just a single byte repeated N times, + // emit a `memset` filling the entire `dest` with that byte. let try_init_all_same = |bx: &mut Bx, v| { let start = dest.val.llval; let size = bx.const_usize(dest.layout.size.bytes()); @@ -117,13 +119,33 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { false }; + trace!(?cg_elem.val); match cg_elem.val { OperandValue::Immediate(v) => { if try_init_all_same(bx, v) { return; } } - _ => (), + OperandValue::Pair(a, b) => { + let a_is_undef = bx.cx().is_undef(a); + match (a_is_undef, bx.cx().is_undef(b)) { + // Can happen for uninit unions + (true, true) => { + // FIXME: can we produce better output here? + } + (false, true) | (true, false) => { + let val = if a_is_undef { b } else { a }; + if try_init_all_same(bx, val) { + return; + } + } + (false, false) => { + // FIXME: if both are the same value, use try_init_all_same + } + } + } + OperandValue::ZeroSized => unreachable!("checked above"), + OperandValue::Ref(..) => {} } let count = self diff --git a/compiler/rustc_codegen_ssa/src/traits/consts.rs b/compiler/rustc_codegen_ssa/src/traits/consts.rs index 9af463a691af0..d0de7ff0b5ff1 100644 --- a/compiler/rustc_codegen_ssa/src/traits/consts.rs +++ b/compiler/rustc_codegen_ssa/src/traits/consts.rs @@ -9,6 +9,7 @@ pub trait ConstCodegenMethods<'tcx>: BackendTypes { /// Generate an uninitialized value (matching uninitialized memory in MIR). /// Whether memory is initialized or not is tracked byte-for-byte. fn const_undef(&self, t: Self::Type) -> Self::Value; + fn is_undef(&self, v: Self::Value) -> bool; /// Generate a fake value. Poison always affects the entire value, even if just a single byte is /// poison. This can only be used in codepaths that are already UB, i.e., UB-free Rust code /// (including code that e.g. copies uninit memory with `MaybeUninit`) can never encounter a diff --git a/tests/codegen/slice-init.rs b/tests/codegen/slice-init.rs index 52b96cf8cd032..73f808db46182 100644 --- a/tests/codegen/slice-init.rs +++ b/tests/codegen/slice-init.rs @@ -2,6 +2,8 @@ #![crate_type = "lib"] +use std::mem::MaybeUninit; + // CHECK-LABEL: @zero_sized_elem #[no_mangle] pub fn zero_sized_elem() { @@ -76,16 +78,14 @@ pub fn u16_init_one_bytes() -> [u16; N] { [const { u16::from_be_bytes([1, 1]) }; N] } -// FIXME: undef bytes can just be initialized with the same value as the -// defined bytes, if the defines bytes are all the same. // CHECK-LABEL: @option_none_init #[no_mangle] pub fn option_none_init() -> [Option; N] { // CHECK-NOT: select - // CHECK: br label %repeat_loop_header{{.*}} + // CHECK-NOT: br // CHECK-NOT: switch - // CHECK: icmp - // CHECK-NOT: call void @llvm.memset.p0 + // CHECK-NOT: icmp + // CHECK: call void @llvm.memset.p0 [const { None }; N] } From 8f5f5e56a8edbe3a231441f6051fb7dc4ed8e193 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Tue, 21 Jan 2025 08:27:30 +0000 Subject: [PATCH 4/4] Add more tests --- tests/codegen/slice-init.rs | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/codegen/slice-init.rs b/tests/codegen/slice-init.rs index 73f808db46182..b36a5b5de3d41 100644 --- a/tests/codegen/slice-init.rs +++ b/tests/codegen/slice-init.rs @@ -89,6 +89,55 @@ pub fn option_none_init() -> [Option; N] { [const { None }; N] } +// If there is partial provenance or some bytes are initialized and some are not, +// we can't really do better than initialize bytes or groups of bytes together. +// CHECK-LABEL: @option_maybe_uninit_init +#[no_mangle] +pub fn option_maybe_uninit_init() -> [MaybeUninit; N] { + // CHECK-NOT: select + // CHECK: br label %repeat_loop_header{{.*}} + // CHECK-NOT: switch + // CHECK: icmp + // CHECK-NOT: call void @llvm.memset.p0 + [const { + let mut val: MaybeUninit = MaybeUninit::uninit(); + let ptr = val.as_mut_ptr() as *mut u8; + unsafe { + ptr.write(0); + } + val + }; N] +} + +#[repr(packed)] +struct Packed { + start: u8, + ptr: &'static (), + rest: u16, + rest2: u8, +} + +// If there is partial provenance or some bytes are initialized and some are not, +// we can't really do better than initialize bytes or groups of bytes together. +// CHECK-LABEL: @option_maybe_uninit_provenance +#[no_mangle] +pub fn option_maybe_uninit_provenance() -> [MaybeUninit; N] { + // CHECK-NOT: select + // CHECK: br label %repeat_loop_header{{.*}} + // CHECK-NOT: switch + // CHECK: icmp + // CHECK-NOT: call void @llvm.memset.p0 + [const { + let mut val: MaybeUninit = MaybeUninit::uninit(); + unsafe { + let ptr = &raw mut (*val.as_mut_ptr()).ptr; + static HAS_ADDR: () = (); + ptr.write_unaligned(&HAS_ADDR); + } + val + }; N] +} + // Use an opaque function to prevent rustc from removing useless drops. #[inline(never)] pub fn opaque(_: impl Sized) {}