Merge pull request #10 from henninglive/i128

Add support for 128-bit integers
dtolnay · Sep 16, 2017 · f05d2d6 · f05d2d6
2 parents d954ca8 + 1d85a0c
commit f05d2d6
Show file tree

Hide file tree

Showing 7 changed files with 319 additions and 31 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,17 @@
 sudo: false
-
 language: rust
 
-rust:
-  - nightly
+matrix:
+  include:
+    - rust: stable
+    - rust: beta
+    - rust: nightly
+      env:
+      - FEATURES="i128"
+      - BUILD_BENCH="true"
+
+script:
+  - cargo build --verbose --features "$FEATURES"
+  - cargo test --verbose --features "$FEATURES"
+  - if [ "$BUILD_BENCH" == "true" ]; then cargo bench --verbose --no-run --features "$FEATURES"; fi
+
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,3 +9,6 @@ documentation = "/~https://github.com/dtolnay/itoa"
 categories = ["value-formatting"]
 readme = "README.md"
 exclude = ["performance.png"]
+
+[features]
+i128 = []
diff --git a/README.md b/README.md
@@ -41,8 +41,9 @@ fn write<W: io::Write, V: itoa::Integer>(writer: W, value: V) -> io::Result<usiz
 ```
 
 where `itoa::Integer` is implemented for `i8`, `u8`, `i16`, `u16`, `i32`, `u32`,
-`i64`, `u64`, `isize` and `usize`. The return value gives the number of bytes
-written.
+`i64`, `u64`, `i128`, `u128`, `isize` and `usize`. 128-bit integer support is
+only available with the nightly compiler when the `i128` feature is enabled for
+this crate. The return value gives the number of bytes written.
 
 ## Dependency
 

diff --git a/benches/bench.rs b/benches/bench.rs
@@ -1,19 +1,26 @@
+#![cfg_attr(feature = "i128", feature(i128_type, i128))]
 #![feature(test)]
 #![allow(non_snake_case)]
 
 extern crate itoa;
 extern crate test;
 
 macro_rules! benches {
-    ($($name:ident($value:expr),)*) => {
+    (
+        $(
+            $(#[$attr:meta])*
+            $name:ident($value:expr)
+        ),*
+    ) => {
         mod bench_itoa {
             use test::{Bencher, black_box};
             $(
+                $(#[$attr])*
                 #[bench]
                 fn $name(b: &mut Bencher) {
                     use itoa;
 
-                    let mut buf = Vec::with_capacity(20);
+                    let mut buf = Vec::with_capacity(40);
 
                     b.iter(|| {
                         buf.clear();
@@ -26,11 +33,12 @@ macro_rules! benches {
         mod bench_fmt {
             use test::{Bencher, black_box};
             $(
+                $(#[$attr])*
                 #[bench]
                 fn $name(b: &mut Bencher) {
                     use std::io::Write;
 
-                    let mut buf = Vec::with_capacity(20);
+                    let mut buf = Vec::with_capacity(40);
 
                     b.iter(|| {
                         buf.clear();
@@ -42,11 +50,16 @@ macro_rules! benches {
     }
 }
 
-benches!(
-    bench_0u64(0u64),
-    bench_HALFu64(<u32>::max_value() as u64),
-    bench_MAXu64(<u64>::max_value()),
+benches!{
+    bench_u64_0(0u64),
+    bench_u64_half(<u32>::max_value() as u64),
+    bench_u64_max(<u64>::max_value()),
 
-    bench_0i16(0i16),
-    bench_MINi16(<i16>::min_value()),
-);
+    bench_i16_0(0i16),
+    bench_i16_min(<i16>::min_value()),
+
+    #[cfg(feature = "i128")]
+    bench_u128_0(0u128),
+    #[cfg(feature = "i128")]
+    bench_u128_max(<u128>::max_value())
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -8,6 +8,11 @@
 
 #![doc(html_root_url = "https://docs.rs/itoa/0.3.3")]
 
+#![cfg_attr(feature = "i128", feature(i128_type, i128))]
+
+#[cfg(feature = "i128")]
+mod udiv128;
+
 use std::{io, mem, ptr, slice};
 
 #[inline]
@@ -30,12 +35,24 @@ const DEC_DIGITS_LUT: &'static[u8] =
       6061626364656667686970717273747576777879\
       8081828384858687888990919293949596979899";
 
-const MAX_LEN: usize = 20;  // Tie between i64::MIN (including minus sign) and u64::MAX
+const MAX_LEN: usize = 40; // i128::MIN (including minus sign)
 
 // Adaptation of the original implementation at
 // /~https://github.com/rust-lang/rust/blob/b8214dc6c6fc20d0a660fb5700dca9ebf51ebe89/src/libcore/fmt/num.rs#L188-L266
 macro_rules! impl_Integer {
-    ($($t:ident),* as $conv_fn:ident) => ($(
+    ($($t:ident),* as $conv_fn:ident) =>
+        (impl_Integer!(
+            $($t),* as $conv_fn,
+            (|n:$conv_fn, d:$conv_fn, rem:Option<&mut $conv_fn>| {
+                match rem {
+                    Some(rem) => *rem = n % d,
+                    _ => {},
+                }
+                n / d
+            })
+        ););
+
+    ($($t:ident),* as $conv_fn:ident, $divmod:expr) => ($(
     impl Integer for $t {
         fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
             let mut buf = unsafe { mem::uninitialized() };
@@ -63,11 +80,13 @@ macro_rules! impl_Integer {
                 // eagerly decode 4 characters at a time
                 if <$t>::max_value() as u64 >= 10000 {
                     while n >= 10000 {
-                        let rem = (n % 10000) as isize;
-                        n /= 10000;
+                        let mut rem = 0;
+                        // division with remainder on u128 is badly optimized by LLVM.
+                        // see “udiv128.rs” for more info.
+                        n = $divmod(n, 10000, Some(&mut rem));
 
-                        let d1 = (rem / 100) << 1;
-                        let d2 = (rem % 100) << 1;
+                        let d1 = (rem as isize / 100) << 1;
+                        let d2 = (rem as isize % 100) << 1;
                         curr -= 4;
                         ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2);
                         ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2);
@@ -115,3 +134,5 @@ impl_Integer!(isize, usize as u16);
 impl_Integer!(isize, usize as u32);
 #[cfg(target_pointer_width = "64")]
 impl_Integer!(isize, usize as u64);
+#[cfg(all(feature = "i128"))]
+impl_Integer!(i128, u128 as u128, udiv128::udivmodti4);
diff --git a/src/udiv128.rs b/src/udiv128.rs
@@ -0,0 +1,225 @@
+// Copyright 2009-2016 compiler-builtins Developers
+//
+// The compiler-builtins crate is dual licensed under both the University of
+// Illinois "BSD-Like" license and the MIT license.  As a user of this code you may
+// choose to use it under either license. As a contributor, you agree to allow
+// your code to be used under both.
+//
+// Full text of the relevant licenses is found here:
+// /~https://github.com/rust-lang-nursery/compiler-builtins/blob/master/LICENSE.TXT
+//
+//
+//
+// The following code is based on Rust’s [compiler-builtins crate]
+// (/~https://github.com/rust-lang-nursery/compiler-builtins) which
+// provides runtime functions for the Rust programs. The Rust
+// compiler will automatically link your programs against this crate.
+//
+// We copied the implementation of '__udivmodti4()' which is an intrinsic
+// implementing division with remainder for architectures without 128-bit integer support.
+// We have done this two reasons, to work around [bad optimization by LLVM]
+// (/~https://github.com/rust-lang/rust/issues/44545) and to allow function
+// inlining which doesn’t happen with the intrinsic.
+
+const BITS: u32 = 128;
+const BITS_HALF: u32 = 64;
+
+trait LargeInt {
+    fn low(self) -> u64;
+    fn high(self) -> u64;
+    fn from_parts(low: u64, high: u64) -> Self;
+}
+
+trait Int {
+    fn aborting_div(self, other: Self) -> Self;
+    fn aborting_rem(self, other: Self) -> Self;
+}
+
+impl LargeInt for u128 {
+    fn low(self) -> u64 {
+        self as u64
+    }
+
+    fn high(self) -> u64 {
+        (self >> 64) as u64
+    }
+
+    fn from_parts(low: u64, high: u64) -> u128 {
+        low as u128 | ((high as u128) << 64)
+    }
+}
+
+impl Int for u64 {
+    fn aborting_div(self, other: u64) -> u64 {
+        <u64>::checked_div(self, other).unwrap()
+    }
+
+    fn aborting_rem(self, other: u64) -> u64 {
+        <u64>::checked_rem(self, other).unwrap()
+    }
+}
+
+pub fn udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 {
+    // NOTE X is unknown, K != 0
+    if n.high() == 0 {
+        if d.high() == 0 {
+            // 0 X
+            // ---
+            // 0 X
+
+            if let Some(rem) = rem {
+                *rem = <u128>::from(n.low().aborting_rem(d.low()));
+            }
+            return <u128>::from(n.low().aborting_div(d.low()))
+        } else {
+            // 0 X
+            // ---
+            // K X
+            if let Some(rem) = rem {
+                *rem = n;
+            }
+            return 0;
+        };
+    }
+
+    let mut sr;
+    let mut q;
+    let mut r;
+
+    if d.low() == 0 {
+        if d.high() == 0 {
+            // K X
+            // ---
+            // 0 0
+            // NOTE This should be unreachable in safe Rust because the program will panic before
+            // this intrinsic is called
+            unreachable!();
+        }
+
+        if n.low() == 0 {
+            // K 0
+            // ---
+            // K 0
+            if let Some(rem) = rem {
+                *rem = <u128>::from_parts(0, n.high().aborting_rem(d.high()));
+            }
+            return <u128>::from(n.high().aborting_div(d.high()))
+        }
+
+        // K K
+        // ---
+        // K 0
+
+        if d.high().is_power_of_two() {
+            if let Some(rem) = rem {
+                *rem = <u128>::from_parts(n.low(), n.high() & (d.high() - 1));
+            }
+            return <u128>::from(n.high() >> d.high().trailing_zeros());
+        }
+
+        sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros());
+
+        // D > N
+        if sr > BITS_HALF - 2 {
+            if let Some(rem) = rem {
+                *rem = n;
+            }
+            return 0;
+        }
+
+        sr += 1;
+
+        // 1 <= sr <= BITS_HALF - 1
+        q = n << (BITS - sr);
+        r = n >> sr;
+    } else if d.high() == 0 {
+        // K X
+        // ---
+        // 0 K
+        if d.low().is_power_of_two() {
+            if let Some(rem) = rem {
+                *rem = <u128>::from(n.low() & (d.low() - 1));
+            }
+
+            if d.low() == 1 {
+                return n;
+            } else {
+                let sr = d.low().trailing_zeros();
+                return n >> sr;
+            };
+        }
+
+        sr = 1 + BITS_HALF + d.low().leading_zeros() - n.high().leading_zeros();
+
+        // 2 <= sr <= u64::BITS - 1
+        q = n << (BITS - sr);
+        r = n >> sr;
+    } else {
+        // K X
+        // ---
+        // K K
+        sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros());
+
+        // D > N
+        if sr > BITS_HALF - 1 {
+            if let Some(rem) = rem {
+                *rem = n;
+            }
+            return 0;
+        }
+
+        sr += 1;
+
+        // 1 <= sr <= BITS_HALF
+        q = n << (BITS - sr);
+        r = n >> sr;
+    }
+
+    // Not a special case
+    // q and r are initialized with
+    // q = n << (u64::BITS - sr)
+    // r = n >> sr
+    // 1 <= sr <= u64::BITS - 1
+    let mut carry = 0;
+
+    // Don't use a range because they may generate references to memcpy in unoptimized code
+    let mut i = 0;
+    while i < sr {
+        i += 1;
+
+        // r:q = ((r:q) << 1) | carry
+        r = (r << 1) | (q >> (BITS - 1));
+        q = (q << 1) | carry as u128;
+
+        // carry = 0
+        // if r >= d {
+        //     r -= d;
+        //     carry = 1;
+        // }
+        let s = (d.wrapping_sub(r).wrapping_sub(1)) as i128 >> (BITS - 1);
+        carry = (s & 1) as u64;
+        r -= d & s as u128;
+    }
+
+    if let Some(rem) = rem {
+        *rem = r;
+    }
+    (q << 1) | carry as u128
+}
+
+#[cfg(test)]
+#[test]
+fn test_udivmodti4() {
+    let primes = [
+          3,   7,  31,  73, 127, 179, 233, 283, 353,
+        419, 467, 547, 607, 661, 739, 811, 877, 947,
+    ];
+
+    for (i, d) in (0..128).cycle().zip(primes.iter().cycle()).take(1_000) {
+        let n = 1u128 << i;
+        let mut rem = 0;
+        let q = udivmodti4(n, *d, Some(&mut rem));
+        assert_eq!(q, n / d);
+        assert_eq!(rem, n % d);
+    }
+}