open-i18n · bors · Aug 13, 2017 · Aug 12, 2017 · Aug 12, 2017 · Aug 12, 2017
diff --git a/unic/char/Cargo.toml b/unic/char/Cargo.toml
@@ -16,3 +16,4 @@ travis-ci = { repository = "behnam/rust-unic", branch = "master" }
 
 [dependencies]
 unic-char-property = { path = "property/", version = "0.5.0" }
+unic-char-range = { path = "range/", version = "0.5.0" }
diff --git a/unic/char/range/Cargo.toml b/unic/char/range/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "unic-char-range"
+version = "0.5.0"
+authors = ["The UNIC Project Developers"]
+repository = "/~https://github.com/behnam/rust-unic/"
+license = "MIT/Apache-2.0"
+keywords = ["text", "unicode", "iteration"]
+description = "UNIC - Unicode Characters - Character Range and Iteration"
+categories = ["text-processing"]
+
+# No tests/benches that depends on /data/
+exclude = []
+
+[features]
+default = []
+
+# Unstable features
+unstable = [ "fused", "trusted-len" ]
+fused = []
+trusted-len = []
+
+
+[badges]
+travis-ci = { repository = "behnam/rust-unic", branch = "master" }
diff --git a/unic/char/range/benches/benchmarks.rs b/unic/char/range/benches/benchmarks.rs
@@ -0,0 +1,32 @@
+#![feature(test)]
+
+extern crate test;
+extern crate unic_char_range;
+
+use std::char;
+use unic_char_range::CharRange;
+
+#[bench]
+fn forward_iteration(b: &mut test::Bencher) {
+    b.iter(|| CharRange::all().count())
+}
+
+#[bench]
+fn forward_iteration_baseline(b: &mut test::Bencher) {
+    b.iter(|| (0..0x110000).filter_map(char::from_u32).count())
+}
+
+#[bench]
+fn reverse_iteration(b: &mut test::Bencher) {
+    b.iter(|| CharRange::all().rev().count())
+}
+
+#[bench]
+fn reverse_iteration_baseline(b: &mut test::Bencher) {
+    b.iter(|| (0..0x110000).rev().filter_map(char::from_u32).count())
+}
+
+#[bench]
+fn range_length(b: &mut test::Bencher) {
+    b.iter(|| CharRange::all().len())
+}
diff --git a/unic/char/range/src/lib.rs b/unic/char/range/src/lib.rs
@@ -0,0 +1,41 @@
+//! # Unic - Char - Range
+//!
+//! A simple way to control iteration over a range of characters.
+//!
+//! # Examples
+//!
+//! ```
+//! # #[macro_use] extern crate unic_char_range;
+//! # use unic_char_range::*;
+//! # fn main() {
+//! for character in chars!('a'..='z') {
+//!     // character is each character in the lowercase english alphabet in order
+//! }
+//!
+//! for character in chars!(..) {
+//!     // character is every valid char from lowest codepoint to highest
+//! }
+//! # }
+//! ```
+//!
+//! # Features
+//!
+//! None of these features are included by default; they rely on unstable Rust feature gates.
+//!
+//! - `unstable`: enables all features
+//! - `fused`: impl the [`FusedIterator`] contract
+//! - `trusted-len`: impl the [`TrustedLen`] contract
+//!
+//! [`FusedIterator`](https://doc.rust-lang.org/std/iter/trait.FusedIterator.html)
+//! [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html)
+//!
+#![forbid(bad_style, missing_debug_implementations, unconditional_recursion)]
+#![deny(missing_docs, unsafe_code, unused, future_incompatible)]
+#![cfg_attr(feature = "fused", feature(fused))]
+#![cfg_attr(feature = "trusted-len", feature(trusted_len))]
+
+mod macros;
+mod range;
+mod step;
+
+pub use range::CharRange;
diff --git a/unic/char/range/src/macros.rs b/unic/char/range/src/macros.rs
@@ -0,0 +1,25 @@
+#[macro_export]
+/// Convenience macro for the initialization of `CharRange`s.
+///
+/// # Syntax
+///
+/// ```
+/// # #[macro_use] extern crate unic_char_range;
+/// # fn main() {
+/// chars!('a'..'z'); // Iterate the half open range including 'a' and excluding 'z'
+/// chars!('a'..='z'); // Iterate the closed range including 'a' and including 'z'
+/// chars!(..); // Iterate all characters
+/// # }
+/// ```
+///
+/// `chars!('a'..='z')` and `chars!(..)` are constant-time expressions, and can be used
+/// where such are required, such as in the initialization of constant data structures.
+///
+/// Note that because an `expr` capture cannot be followed by a `..`/`..=`,
+/// this macro captures token trees. This means that if you want to pass more than one token,
+/// you must parenthesize it (e.g. `chars!('\0' ..= (char::MAX)`).
+macro_rules! chars {
+    ( $low:tt .. $high:tt ) => ( $crate::CharRange { low: $low, high: $high } );
+    ( $low:tt ..= $high:tt ) => ( $crate::CharRange::open_right($low, $high) );
+    ( .. ) => ( chars!( '\0' ..= (::std::char::MAX) ) );
+}
diff --git a/unic/char/range/src/range.rs b/unic/char/range/src/range.rs
@@ -0,0 +1,203 @@
+use std::char;
+use std::collections::Bound;
+use std::ops::Range;
+use step;
+
+const SURROGATE_RANGE: Range<u32> = 0xD800..0xE000;
+
+/// A range of unicode code points.
+///
+/// The members of this struct are public for const initialization by `chars!(..=)` only.
+/// They should be considered unstable private API that may change at any time.
+/// If you decide to use them anyway, make sure to note the safety notes.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct CharRange {
+    /// The lowest uniterated character (inclusive).
+    ///
+    /// Iteration is finished if this is higher than `high`.
+    ///
+    /// # Safety
+    ///
+    /// This is not guaranteed to always be a valid character. Check before using!
+    /// Note that `high` _is_ guaranteed to be a valid character,
+    /// so this will always be a valid character when iteration is not yet finished.
+    #[doc(hidden)]
+    pub low: char,
+
+    /// The highest uniterated character (inclusive).
+    ///
+    /// Iteration is finished if this is lower than `low`.
+    #[doc(hidden)]
+    pub high: char,
+}
+
+/// Constructors
+impl CharRange {
+    /// Construct a closed range of characters.
+    pub fn closed(start: char, stop: char) -> CharRange {
+        CharRange {
+            low: start,
+            high: stop,
+        }
+    }
+
+    /// Construct a half open (right) range of characters.
+    pub fn open_right(start: char, stop: char) -> CharRange {
+        let mut range = CharRange::closed(start, stop);
+        range.step_backward();
+        range
+    }
+
+    /// Construct a half open (left) range of characters.
+    pub fn open_left(start: char, stop: char) -> CharRange {
+        let mut range = CharRange::closed(start, stop);
+        range.step_forward();
+        range
+    }
+
+    /// Construct a fully open range of characters.
+    pub fn open(start: char, stop: char) -> CharRange {
+        let mut range = CharRange::closed(start, stop);
+        range.step_forward();
+        range.step_backward();
+        range
+    }
+
+    /// Construct a range of characters from bounds.
+    pub fn bound(mut start: Bound<char>, mut stop: Bound<char>) -> CharRange {
+        if start == Bound::Unbounded {
+            start = Bound::Included('\0');
+        }
+        if stop == Bound::Unbounded {
+            stop = Bound::Included(char::MAX);
+        }
+        match (start, stop) {
+            (Bound::Included(start), Bound::Included(stop)) => CharRange::closed(start, stop),
+            (Bound::Excluded(start), Bound::Excluded(stop)) => CharRange::open(start, stop),
+            (Bound::Included(start), Bound::Excluded(stop)) => CharRange::open_right(start, stop),
+            (Bound::Excluded(start), Bound::Included(stop)) => CharRange::open_left(start, stop),
+            (Bound::Unbounded, _) | (_, Bound::Unbounded) => unreachable!(),
+        }
+    }
+
+    /// Construct a range over all characters.
+    pub fn all() -> CharRange {
+        CharRange::closed('\0', char::MAX)
+    }
+}
+
+impl CharRange {
+    #[inline]
+    #[allow(unsafe_code)]
+    // It is always safe to step `self.low` forward because
+    // `self.low` will only be used when less than `self.high`.
+    fn step_forward(&mut self) {
+        self.low = unsafe { step::forward(self.low) }
+    }
+
+    #[inline]
+    #[allow(unsafe_code)]
+    // When stepping `self.high` backward would cause underflow,
+    // step `self.low` forward instead. It will have the same effect --
+    // consuming the last element from the iterator and ending iteration.
+    fn step_backward(&mut self) {
+        if self.high == '\0' {
+            self.step_forward();
+        } else {
+            self.high = unsafe { step::backward(self.high) }
+        }
+    }
+}
+
+impl CharRange {
+    /// Does this range include a character?
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use unic_char_range::CharRange;
+    /// assert!(   CharRange::closed('a', 'g').contains('d'));
+    /// assert!( ! CharRange::closed('a', 'g').contains('z'));
+    ///
+    /// assert!( ! CharRange:: open ('a', 'a').contains('a'));
+    /// assert!( ! CharRange::closed('z', 'a').contains('g'));
+    /// ```
+    pub fn contains(&self, ch: char) -> bool {
+        self.low <= ch && ch <= self.high
+    }
+}
+
+impl Iterator for CharRange {
+    type Item = char;
+
+    #[inline]
+    fn next(&mut self) -> Option<char> {
+        if self.low > self.high {
+            return None;
+        }
+
+        let ch = self.low;
+        self.step_forward();
+        Some(ch)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.len();
+        (len, Some(len))
+    }
+
+    fn last(self) -> Option<char> {
+        if self.low > self.high {
+            None
+        } else {
+            Some(self.high)
+        }
+    }
+
+    fn max(self) -> Option<char> {
+        self.last()
+    }
+
+    fn min(mut self) -> Option<char> {
+        self.next()
+    }
+}
+
+impl DoubleEndedIterator for CharRange {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.low > self.high {
+            return None;
+        }
+
+        let ch = self.high;
+        self.step_backward();
+        Some(ch)
+    }
+}
+
+impl ExactSizeIterator for CharRange {
+    fn len(&self) -> usize {
+        if self.low > self.high {
+            return 0;
+        }
+        let start = self.low as u32;
+        let end = self.high as u32;
+        let naive_len = self.high as usize - self.low as usize + 1;
+        if start <= SURROGATE_RANGE.start && SURROGATE_RANGE.end <= end {
+            naive_len - SURROGATE_RANGE.len()
+        } else {
+            naive_len
+        }
+    }
+}
+
+#[cfg(any(feature = "fused", feature = "trusted-len"))]
+use std::iter;
+
+#[cfg(feature = "fused")]
+impl iter::FusedIterator for CharRange {}
+
+#[allow(unsafe_code)]
+#[cfg(feature = "trusted-len")]
+unsafe impl iter::TrustedLen for CharRange {}
diff --git a/unic/char/range/src/step.rs b/unic/char/range/src/step.rs
@@ -0,0 +1,35 @@
+use std::char;
+
+const BEFORE_SURROGATE: char = '\u{D7FF}';
+const AFTER_SURROGATE: char = '\u{E000}';
+
+#[inline]
+#[allow(unsafe_code)]
+/// Step a character one step towards `char::MAX`.
+///
+/// # Safety
+///
+/// If the given character is `char::MAX`, the return value is not a valid character.
+pub unsafe fn forward(ch: char) -> char {
+    if ch == BEFORE_SURROGATE {
+        AFTER_SURROGATE
+    } else {
+        char::from_u32_unchecked(ch as u32 + 1)
+    }
+}
+
+#[inline]
+#[allow(unsafe_code)]
+/// Step a character one step towards `'\0'`.
+///
+/// # Safety
+///
+/// If the given character is `'\0'`, this will cause an underflow.
+/// (Thus, it will panic in debug mode, undefined behavior in release mode.)
+pub unsafe fn backward(ch: char) -> char {
+    if ch == AFTER_SURROGATE {
+        BEFORE_SURROGATE
+    } else {
+        char::from_u32_unchecked(ch as u32 - 1)
+    }
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,4 @@ travis-ci = { repository = "behnam/rust-unic", branch = "master" }

		[dependencies]
		unic-char-property = { path = "property/", version = "0.5.0" }
		unic-char-range = { path = "range/", version = "0.5.0" }