From e30f55ff761ddb871d29218fd380553d3d4fc21e Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Mon, 18 Feb 2019 10:54:16 +0100 Subject: [PATCH] Optimize copying large ranges of undefmask blocks --- src/librustc/mir/interpret/allocation.rs | 45 ++++++++++++++++++++---- src/librustc_mir/interpret/memory.rs | 22 ++++++++++-- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index e96392edd64bf..06d5f27ccd744 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -613,7 +613,6 @@ impl DerefMut for Relocations { //////////////////////////////////////////////////////////////////////////////// type Block = u64; -const BLOCK_SIZE: u64 = 64; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)] pub struct UndefMask { @@ -624,6 +623,8 @@ pub struct UndefMask { impl_stable_hash_for!(struct mir::interpret::UndefMask{blocks, len}); impl UndefMask { + pub const BLOCK_SIZE: u64 = 64; + pub fn new(size: Size) -> Self { let mut m = UndefMask { blocks: vec![], @@ -643,6 +644,7 @@ impl UndefMask { return Err(self.len); } + // FIXME(oli-obk): optimize this for allocations larger than a block. let idx = (start.bytes()..end.bytes()) .map(|i| Size::from_bytes(i)) .find(|&i| !self.get(i)); @@ -662,8 +664,31 @@ impl UndefMask { } pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) { - for i in start.bytes()..end.bytes() { - self.set(Size::from_bytes(i), new_state); + let (blocka, bita) = bit_index(start); + let (blockb, bitb) = bit_index(end); + if blocka == blockb { + // within a single block + for i in bita .. bitb { + self.set_bit(blocka, i, new_state); + } + return; + } + // across block boundaries + for i in bita .. Self::BLOCK_SIZE as usize { + self.set_bit(blocka, i, new_state); + } + for i in 0 .. bitb { + self.set_bit(blockb, i, new_state); + } + // fill in all the other blocks (much faster than one bit at a time) + if new_state { + for block in (blocka + 1) .. blockb { + self.blocks[block] = 0xFFFF_FFFF_FFFF_FFFF; + } + } else { + for block in (blocka + 1) .. blockb { + self.blocks[block] = 0; + } } } @@ -676,6 +701,11 @@ impl UndefMask { #[inline] pub fn set(&mut self, i: Size, new_state: bool) { let (block, bit) = bit_index(i); + self.set_bit(block, bit, new_state); + } + + #[inline] + fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) { if new_state { self.blocks[block] |= 1 << bit; } else { @@ -684,11 +714,12 @@ impl UndefMask { } pub fn grow(&mut self, amount: Size, new_state: bool) { - let unused_trailing_bits = self.blocks.len() as u64 * BLOCK_SIZE - self.len.bytes(); + let unused_trailing_bits = self.blocks.len() as u64 * Self::BLOCK_SIZE - self.len.bytes(); if amount.bytes() > unused_trailing_bits { - let additional_blocks = amount.bytes() / BLOCK_SIZE + 1; + let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1; assert_eq!(additional_blocks as usize as u64, additional_blocks); self.blocks.extend( + // FIXME(oli-obk): optimize this by repeating `new_state as Block` iter::repeat(0).take(additional_blocks as usize), ); } @@ -701,8 +732,8 @@ impl UndefMask { #[inline] fn bit_index(bits: Size) -> (usize, usize) { let bits = bits.bytes(); - let a = bits / BLOCK_SIZE; - let b = bits % BLOCK_SIZE; + let a = bits / UndefMask::BLOCK_SIZE; + let b = bits % UndefMask::BLOCK_SIZE; assert_eq!(a as usize as u64, a); assert_eq!(b as usize as u64, b); (a as usize, b as usize) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 88b936afaa4c1..78668c5ad875e 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -20,7 +20,7 @@ use syntax::ast::Mutability; use super::{ Pointer, AllocId, Allocation, GlobalId, AllocationExtra, EvalResult, Scalar, EvalErrorKind, AllocKind, PointerArithmetic, - Machine, AllocMap, MayLeak, ErrorHandled, InboundsCheck, + Machine, AllocMap, MayLeak, ErrorHandled, InboundsCheck, UndefMask, }; #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] @@ -785,10 +785,28 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { assert_eq!(size.bytes() as usize as u64, size.bytes()); let undef_mask = self.get(src.alloc_id)?.undef_mask.clone(); + let get = |i| undef_mask.get(src.offset + Size::from_bytes(i)); let dest_allocation = self.get_mut(dest.alloc_id)?; + // an optimization where we can just overwrite an entire range of definedness bits if + // they are going to be uniformly `1` or `0`. + if size.bytes() * repeat > UndefMask::BLOCK_SIZE { + let first = undef_mask.get(src.offset); + // check that all bits are the same as the first bit + // FIXME(oli-obk): consider making this a function on `UndefMask` and optimize it, too + if (1..size.bytes()).all(|i| get(i) == first) { + dest_allocation.undef_mask.set_range( + dest.offset, + dest.offset + size * repeat, + first, + ); + return Ok(()) + } + } + + // the default path for i in 0..size.bytes() { - let defined = undef_mask.get(src.offset + Size::from_bytes(i)); + let defined = get(i); for j in 0..repeat { dest_allocation.undef_mask.set(