From 6c3a5b386512ab0df4a8d1522938c63771e1b39c Mon Sep 17 00:00:00 2001 From: Sam Rijs Date: Sat, 1 Dec 2018 16:28:30 +1100 Subject: [PATCH] replace flate2-crc with crc32fast --- .travis.yml | 2 - Cargo.toml | 2 +- appveyor.yml | 2 - flate2-crc/Cargo.toml | 21 ----- flate2-crc/LICENSE-APACHE | 1 - flate2-crc/LICENSE-MIT | 1 - flate2-crc/benches/run.rs | 69 ---------------- flate2-crc/build.rs | 36 --------- flate2-crc/src/lib.rs | 103 ------------------------ flate2-crc/src/other.rs | 12 --- flate2-crc/src/x86.rs | 160 -------------------------------------- src/crc.rs | 32 ++------ src/ffi.rs | 113 +-------------------------- src/lib.rs | 2 +- 14 files changed, 10 insertions(+), 546 deletions(-) delete mode 100644 flate2-crc/Cargo.toml delete mode 120000 flate2-crc/LICENSE-APACHE delete mode 120000 flate2-crc/LICENSE-MIT delete mode 100644 flate2-crc/benches/run.rs delete mode 100644 flate2-crc/build.rs delete mode 100644 flate2-crc/src/lib.rs delete mode 100644 flate2-crc/src/other.rs delete mode 100644 flate2-crc/src/x86.rs diff --git a/.travis.yml b/.travis.yml index 2ed33e8d6..ebb630223 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,8 +43,6 @@ script: - cargo test --features tokio - cargo test --features 'tokio zlib' - cargo test --features zlib --no-default-features - - cargo test --manifest-path flate2-crc/Cargo.toml - - cargo test --release --manifest-path flate2-crc/Cargo.toml - cargo clean && cargo build - cargo doc --no-deps - cargo doc --no-deps --manifest-path=miniz-sys/Cargo.toml diff --git a/Cargo.toml b/Cargo.toml index b476a3ce7..1f5d60913 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ libz-sys = { version = "1.0", optional = true } tokio-io = { version = "0.1", optional = true } futures = { version = "0.1", optional = true } miniz_oxide_c_api = { version = "0.2", optional = true, features = ["no_c_export"]} -flate2-crc = { version = '0.1', path = 'flate2-crc' } +crc32fast = "1.1" [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] miniz_oxide_c_api = { version = "0.2", features = ["no_c_export"] } diff --git a/appveyor.yml b/appveyor.yml index f66ffed29..2baae5822 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,8 +22,6 @@ build: false test_script: - cargo test --target %TARGET% - cargo test --target %TARGET% --features tokio - - cargo test --target %TARGET% --manifest-path flate2-crc/Cargo.toml - - cargo test --target %TARGET% --manifest-path flate2-crc/Cargo.toml --release branches: only: diff --git a/flate2-crc/Cargo.toml b/flate2-crc/Cargo.toml deleted file mode 100644 index d0a33a12d..000000000 --- a/flate2-crc/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "flate2-crc" -version = "0.1.1" -authors = ["Alex Crichton "] -license = "MIT/Apache-2.0" -repository = "/~https://github.com/alexcrichton/flate2-rs/tree/flate2-crc" -homepage = "/~https://github.com/alexcrichton/flate2-rs" -documentation = "https://docs.rs/flate2-crc" -description = """ -SIMD acceleration for CRC-32 checksums used in the gzip format -""" - -[dependencies] -cfg-if = "0.1.6" - -[dev-dependencies] -miniz-sys = { path = '../miniz-sys', version = '0.1' } -rand = "0.6" -libz-sys = "1.0" -rayon = "1.0.3" -quickcheck = "0.7" diff --git a/flate2-crc/LICENSE-APACHE b/flate2-crc/LICENSE-APACHE deleted file mode 120000 index 965b606f3..000000000 --- a/flate2-crc/LICENSE-APACHE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-APACHE \ No newline at end of file diff --git a/flate2-crc/LICENSE-MIT b/flate2-crc/LICENSE-MIT deleted file mode 120000 index 76219eb72..000000000 --- a/flate2-crc/LICENSE-MIT +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-MIT \ No newline at end of file diff --git a/flate2-crc/benches/run.rs b/flate2-crc/benches/run.rs deleted file mode 100644 index ae49130e5..000000000 --- a/flate2-crc/benches/run.rs +++ /dev/null @@ -1,69 +0,0 @@ -#![feature(test)] - -extern crate flate2_crc; -extern crate rand; -extern crate test; -extern crate miniz_sys; -extern crate libz_sys; - -use rand::{thread_rng, RngCore}; - -fn flate2_crc(data: &[u8]) -> u32 { - flate2_crc::Hardware::detect().calculate(0, data, |crc, data| { - unsafe { - miniz_sys::mz_crc32(crc as u64, data.as_ptr(), data.len()) as u32 - } - }) -} - -fn miniz(data: &[u8]) -> u32 { - unsafe { - miniz_sys::mz_crc32(0, data.as_ptr(), data.len()) as u32 - } -} - -fn zlib(data: &[u8]) -> u32 { - unsafe { - libz_sys::crc32(0, data.as_ptr(), data.len() as u32) as u32 - } -} - -macro_rules! benches { - ($($f:ident => ($small:ident, $medium:ident, $large:ident),)*) => ($( - #[bench] - fn $small(b: &mut test::Bencher) { - let mut rng = thread_rng(); - let mut buf = vec![0u8; 8]; - rng.fill_bytes(&mut buf); - - b.bytes = 8; - b.iter(|| $f(&buf)); - } - - #[bench] - fn $medium(b: &mut test::Bencher) { - let mut rng = thread_rng(); - let mut buf = vec![0u8; 65_000]; - rng.fill_bytes(&mut buf); - - b.bytes = 65_000; - b.iter(|| $f(&buf)); - } - - #[bench] - fn $large(b: &mut test::Bencher) { - let mut rng = thread_rng(); - let mut buf = vec![0u8; 1_000_000]; - rng.fill_bytes(&mut buf); - - b.bytes = 1_000_000; - b.iter(|| $f(&buf)); - } - )*) -} - -benches! { - flate2_crc => (flate2_crc_8, flate2_crc_65000, flate2_crc_1000000), - miniz => (miniz_8, miniz_65000, miniz_1000000), - zlib => (zlib_8, zlib_65000, zlib_1000000), -} diff --git a/flate2-crc/build.rs b/flate2-crc/build.rs deleted file mode 100644 index d03562018..000000000 --- a/flate2-crc/build.rs +++ /dev/null @@ -1,36 +0,0 @@ -use std::env; -use std::process::Command; -use std::str; - -fn main() { - println!("cargo:rerun-if-changed=build.rs"); - - let minor = match rustc_minor_version() { - Some(n) => n, - None => return, - }; - - if minor >= 27 { - println!("cargo:rustc-cfg=simd"); - } -} - -fn rustc_minor_version() -> Option { - macro_rules! otry { - ($e:expr) => { - match $e { - Some(e) => e, - None => return None, - } - }; - } - let rustc = otry!(env::var_os("RUSTC")); - let output = otry!(Command::new(rustc).arg("--version").output().ok()); - let version = otry!(str::from_utf8(&output.stdout).ok()); - let mut pieces = version.split('.'); - if pieces.next() != Some("rustc 1") { - return None; - } - otry!(pieces.next()).parse().ok() -} - diff --git a/flate2-crc/src/lib.rs b/flate2-crc/src/lib.rs deleted file mode 100644 index f221519c3..000000000 --- a/flate2-crc/src/lib.rs +++ /dev/null @@ -1,103 +0,0 @@ -// Note that this isn't really intended to be a user-facing crate, that's -// `flate2::Crc` - -#[macro_use] -extern crate cfg_if; - -#[cfg(test)] -#[macro_use] -extern crate quickcheck; - -cfg_if! { - if #[cfg(not(simd))] { - mod other; - use self::other as imp; - } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - mod x86; - use self::x86 as imp; - } else { - mod other; - use self::other as imp; - } -} - -#[derive(Debug)] -pub struct Hardware(bool); - -impl Hardware { - #[inline] - pub fn detect() -> Hardware { - Hardware(imp::detect()) - } - - #[inline] - pub fn calculate( - &self, - crc: u32, - data: &[u8], - fallback: fn(u32, &[u8]) -> u32, - ) -> u32 { - if self.0 { - unsafe { imp::calculate(crc, data, fallback) } - } else { - fallback(crc, data) - } - } -} - -#[cfg(test)] -mod tests { - extern crate miniz_sys; - extern crate rand; - extern crate rayon; - - use self::rand::Rng; - use self::rayon::prelude::*; - use super::Hardware; - - fn fallback(a: u32, b: &[u8]) -> u32 { - unsafe { - miniz_sys::mz_crc32(a as _, b.as_ptr(), b.len()) as u32 - } - } - - fn random_chunks(iters: usize, lo: usize, hi: usize) { - let hardware = Hardware::detect(); - - (0..iters) - .into_par_iter() - .for_each_with(Vec::new(), |data, _| { - let mut rng = rand::thread_rng(); - let init = rng.gen::(); - let len = rng.gen_range(lo, hi); - data.resize(len, 0u8); - rng.fill(&mut data[..]); - - assert_eq!( - fallback(init, &data), - hardware.calculate(init, &data, fallback), - ); - }); - } - - #[test] - fn random_small() { - random_chunks(1000, 0, 256); - } - - #[test] - fn random_med() { - random_chunks(1000, 256, 16 * 1024); - } - - #[test] - fn random_large() { - random_chunks(1000, 0, 1024 * 1024); - } - - quickcheck! { - fn prop(crc: u32, xs: Vec) -> bool { - fallback(crc, &xs) == Hardware::detect().calculate(crc, &xs, fallback) - } - } -} diff --git a/flate2-crc/src/other.rs b/flate2-crc/src/other.rs deleted file mode 100644 index 5e855aa20..000000000 --- a/flate2-crc/src/other.rs +++ /dev/null @@ -1,12 +0,0 @@ -#[inline] -pub fn detect() -> bool { - false -} - -pub unsafe fn calculate( - _crc: u32, - _data: &[u8], - _fallback: fn(u32, &[u8]) -> u32, -) -> u32 { - panic!() -} diff --git a/flate2-crc/src/x86.rs b/flate2-crc/src/x86.rs deleted file mode 100644 index bf6595720..000000000 --- a/flate2-crc/src/x86.rs +++ /dev/null @@ -1,160 +0,0 @@ -//! SIMD-based implementation of crc-32 checksums for x86 hardware. -//! -//! This module is based on Intel's paper, "Fast CRC Computation for Generic -//! Polynomials Using PCLMULQDQ Instruction". The code is quite analagous to the -//! paper itself and only largely differs in one area. More information in the -//! comments below! - -#![allow(non_upper_case_globals)] - -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; - -const K1: i64 = 0x154442bd4; -const K2: i64 = 0x1c6e41596; -const K3: i64 = 0x1751997d0; -const K4: i64 = 0x0ccaa009e; -const K5: i64 = 0x163cd6124; -const K6: i64 = 0x1db710640; - -const P_x: i64 = 0x1DB710641; -const U_prime: i64 = 0x1F7011641; - -pub fn detect() -> bool { - is_x86_feature_detected!("pclmulqdq") && - is_x86_feature_detected!("sse2") && - is_x86_feature_detected!("sse4.1") -} - -unsafe fn debug(s: &str, a: __m128i) -> __m128i { - if false { - union A { a: __m128i, b: [u8; 16] } - let x = A { a }.b; - print!(" {:20} | ", s); - for x in x.iter() { - print!("{:02x} ", x); - } - println!(); - } - return a -} - -#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")] -pub unsafe fn calculate( - crc: u32, - mut data: &[u8], - fallback: fn(u32, &[u8]) -> u32, -) -> u32 { - // In theory we can accelerate smaller chunks too, but for now just rely on - // the fallback implementation as it's too much hassle and doesn't seem too - // beneficial. - if data.len() < 128 { - return fallback(crc, data) - } - - // Step 1: fold by 4 loop - let mut x3 = get(&mut data); - let mut x2 = get(&mut data); - let mut x1 = get(&mut data); - let mut x0 = get(&mut data); - - // fold in our initial value, part of the incremental crc checksum - x3 = _mm_xor_si128(x3, _mm_cvtsi32_si128(!crc as i32)); - - let k1k2 = _mm_set_epi64x(K2, K1); - while data.len() >= 64 { - x3 = reduce128(x3, get(&mut data), k1k2); - x2 = reduce128(x2, get(&mut data), k1k2); - x1 = reduce128(x1, get(&mut data), k1k2); - x0 = reduce128(x0, get(&mut data), k1k2); - } - - let k3k4 = _mm_set_epi64x(K4, K3); - let mut x = reduce128(x3, x2, k3k4); - x = reduce128(x, x1, k3k4); - x = reduce128(x, x0, k3k4); - - // Step 2: fold by 1 loop - while data.len() >= 16 { - x = reduce128(x, get(&mut data), k3k4); - } - - debug("128 > 64 init", x); - - // Perform step 3, reduction from 128 bits to 64 bits. This is - // significantly different from the paper and basically doesn't follow it - // at all. It's not really clear why, but implementations of this algorithm - // in Chrome/Linux diverge in the same way. It is beyond me why this is - // different than the paper, maybe the paper has like errata or something? - // Unclear. - // - // It's also not clear to me what's actually happening here and/or why, but - // algebraically what's happening is: - // - // x = (x[0:63] • K4) ^ x[64:127] // 96 bit result - // x = ((x[0:31] as u64) • K5) ^ x[32:95] // 64 bit result - // - // It's... not clear to me what's going on here. The paper itself is pretty - // vague on this part but definitely uses different constants at least. - // It's not clear to me, reading the paper, where the xor operations are - // happening or why things are shifting around. This implementation... - // appears to work though! - drop(K6); - let x = _mm_xor_si128( - _mm_clmulepi64_si128(x, k3k4, 0x10), - _mm_srli_si128(x, 8), - ); - let x = _mm_xor_si128( - _mm_clmulepi64_si128( - _mm_and_si128(x, _mm_set_epi32(0, 0, 0, !0)), - _mm_set_epi64x(0, K5), - 0x00, - ), - _mm_srli_si128(x, 4), - ); - debug("128 > 64 xx", x); - - // Perform a Barrett reduction from our now 64 bits to 32 bits. The - // algorithm for this is described at the end of the paper, and note that - // this also implements the "bit reflected input" variant. - let pu = _mm_set_epi64x(U_prime, P_x); - - // T1(x) = ⌊(R(x) % x^32)⌋ • μ - let t1 = _mm_clmulepi64_si128( - _mm_and_si128(x, _mm_set_epi32(0, 0, 0, !0)), - pu, - 0x10, - ); - // T2(x) = ⌊(T1(x) % x^32)⌋ • P(x) - let t2 = _mm_clmulepi64_si128( - _mm_and_si128(t1, _mm_set_epi32(0, 0, 0, !0)), - pu, - 0x00, - ); - // We're doing the bit-reflected variant, so get the upper 32-bits of the - // 64-bit result instead of the lower 32-bits. - // - // C(x) = R(x) ^ T2(x) / x^32 - let c = _mm_extract_epi32(_mm_xor_si128(x, t2), 1) as u32; - - if data.len() > 0 { - fallback(!c, data) - } else { - !c - } -} - -unsafe fn reduce128(a: __m128i, b: __m128i, keys: __m128i) -> __m128i { - let t1 = _mm_clmulepi64_si128(a, keys, 0x00); - let t2 = _mm_clmulepi64_si128(a, keys, 0x11); - _mm_xor_si128(_mm_xor_si128(b, t1), t2) -} - -unsafe fn get(a: &mut &[u8]) -> __m128i { - debug_assert!(a.len() >= 16); - let r = _mm_loadu_si128(a.as_ptr() as *const __m128i); - *a = &a[16..]; - return r -} diff --git a/src/crc.rs b/src/crc.rs index 186d050fe..b2905ec88 100644 --- a/src/crc.rs +++ b/src/crc.rs @@ -3,19 +3,15 @@ use std::io::prelude::*; use std::io; -use flate2_crc::Hardware; -use libc; - -use ffi; +use crc32fast::Hasher; /// The CRC calculated by a [`CrcReader`]. /// /// [`CrcReader`]: struct.CrcReader.html #[derive(Debug)] pub struct Crc { - crc: u32, amt: u32, - hardware: Hardware, + hasher: Hasher, } /// A wrapper around a [`Read`] that calculates the CRC. @@ -30,12 +26,12 @@ pub struct CrcReader { impl Crc { /// Create a new CRC. pub fn new() -> Crc { - Crc { crc: 0, amt: 0, hardware: Hardware::detect() } + Crc { amt: 0, hasher: Hasher::new() } } /// Returns the current crc32 checksum. pub fn sum(&self) -> u32 { - self.crc as u32 + self.hasher.clone().finalize() } /// The number of bytes that have been used to calculate the CRC. @@ -47,33 +43,19 @@ impl Crc { /// Update the CRC with the bytes in `data`. pub fn update(&mut self, data: &[u8]) { self.amt = self.amt.wrapping_add(data.len() as u32); - self.crc = self.hardware.calculate(self.crc, data, |crc, data| { - unsafe { - ffi::mz_crc32( - crc as libc::c_ulong, - data.as_ptr(), - data.len() as libc::size_t, - ) as u32 - } - }); + self.hasher.update(data); } /// Reset the CRC. pub fn reset(&mut self) { - self.crc = 0; self.amt = 0; + self.hasher.reset(); } /// Combine the CRC with the CRC for the subsequent block of bytes. pub fn combine(&mut self, additional_crc: &Crc) { - self.crc = unsafe { - ffi::mz_crc32_combine( - self.crc as libc::c_ulong, - additional_crc.crc as libc::c_ulong, - additional_crc.amt as libc::off_t, - ) as u32 - }; self.amt += additional_crc.amt; + self.hasher.combine(&additional_crc.hasher); } } diff --git a/src/ffi.rs b/src/ffi.rs index f8b31cc9f..755372cd7 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -6,7 +6,7 @@ mod imp { extern crate libz_sys as z; use std::mem; use std::ops::{Deref, DerefMut}; - use libc::{c_char, c_int, c_uint, c_ulong, size_t}; + use libc::{c_char, c_int}; pub use self::z::*; pub use self::z::deflateEnd as mz_deflateEnd; @@ -33,18 +33,6 @@ mod imp { pub const MZ_DEFAULT_WINDOW_BITS: c_int = 15; - pub unsafe extern "C" fn mz_crc32(crc: c_ulong, ptr: *const u8, len: size_t) -> c_ulong { - z::crc32(crc, ptr, len as c_uint) - } - - pub unsafe extern "C" fn mz_crc32_combine( - crc1: c_ulong, - crc2: c_ulong, - len2: z_off_t, - ) -> c_ulong { - z::crc32_combine(crc1, crc2, len2) - } - const ZLIB_VERSION: &'static str = "1.2.8\0"; pub unsafe extern "C" fn mz_deflateInit2( @@ -113,7 +101,6 @@ mod imp { extern crate miniz_oxide_c_api; use std::ops::{Deref, DerefMut}; - pub use ffi::crc_imp::*; pub use self::miniz_oxide_c_api::*; pub use self::miniz_oxide_c_api::lib_oxide::*; @@ -144,7 +131,6 @@ mod imp { use std::ops::{Deref, DerefMut}; pub use self::miniz_sys::*; - pub use ffi::crc_imp::*; pub struct StreamWrapper { inner: mz_stream, @@ -179,100 +165,3 @@ mod imp { } } -#[cfg(not(feature = "zlib"))] -mod crc_imp { - use libc::{c_ulong, off_t}; - pub unsafe extern "C" fn mz_crc32_combine( - crc1: c_ulong, - crc2: c_ulong, - len2: off_t, - ) -> c_ulong { - crc32_combine_(crc1, crc2, len2) - } - - // gf2_matrix_times, gf2_matrix_square and crc32_combine_ are ported from - // zlib. - - fn gf2_matrix_times(mat: &[c_ulong; 32], mut vec: c_ulong) -> c_ulong { - let mut sum = 0; - let mut mat_pos = 0; - while vec != 0 { - if vec & 1 == 1 { - sum ^= mat[mat_pos]; - } - vec >>= 1; - mat_pos += 1; - } - sum - } - - fn gf2_matrix_square(square: &mut [c_ulong; 32], mat: &[c_ulong; 32]) { - for n in 0..32 { - square[n] = gf2_matrix_times(mat, mat[n]); - } - } - - fn crc32_combine_(mut crc1: c_ulong, crc2: c_ulong, mut len2: off_t) -> c_ulong { - let mut row; - - let mut even = [0; 32]; /* even-power-of-two zeros operator */ - let mut odd = [0; 32]; /* odd-power-of-two zeros operator */ - - /* degenerate case (also disallow negative lengths) */ - if len2 <= 0 { - return crc1; - } - - /* put operator for one zero bit in odd */ - odd[0] = 0xedb88320; /* CRC-32 polynomial */ - row = 1; - for n in 1..32 { - odd[n] = row; - row <<= 1; - } - - /* put operator for two zero bits in even */ - gf2_matrix_square(&mut even, &odd); - - /* put operator for four zero bits in odd */ - gf2_matrix_square(&mut odd, &even); - - /* apply len2 zeros to crc1 (first square will put the operator for one - zero byte, eight zero bits, in even) */ - loop { - /* apply zeros operator for this bit of len2 */ - gf2_matrix_square(&mut even, &odd); - if len2 & 1 == 1 { - crc1 = gf2_matrix_times(&even, crc1); - } - len2 >>= 1; - - /* if no more bits set, then done */ - if len2 == 0 { - break; - } - - /* another iteration of the loop with odd and even swapped */ - gf2_matrix_square(&mut odd, &even); - if len2 & 1 == 1 { - crc1 = gf2_matrix_times(&odd, crc1); - } - len2 >>= 1; - - /* if no more bits set, then done */ - if len2 == 0 { - break; - } - } - - /* return combined crc */ - crc1 ^= crc2; - crc1 - } -} - -#[test] -fn crc32_combine() { - let crc32 = unsafe { imp::mz_crc32_combine(1, 2, 3) }; - assert_eq!(crc32, 29518389); -} diff --git a/src/lib.rs b/src/lib.rs index 163575d0f..de22064a9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,7 +79,7 @@ #![allow(trivial_numeric_casts)] #![cfg_attr(test, deny(warnings))] -extern crate flate2_crc; +extern crate crc32fast; #[cfg(feature = "tokio")] extern crate futures; #[cfg(not(all(target_arch = "wasm32", not(target_os = "emscripten"))))]