From 4c3448f356dcb04150db20dcef92da688d49a4d5 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Wed, 8 Feb 2017 17:43:48 -0500 Subject: [PATCH 01/11] Add equivalents of C's functions to AsciiExt. * `is_ascii_alphabetic` * `is_ascii_uppercase` * `is_ascii_lowercase` * `is_ascii_alphanumeric` * `is_ascii_digit` * `is_ascii_hexdigit` * `is_ascii_punctuation` * `is_ascii_graphic` * `is_ascii_whitespace` * `is_ascii_control` This addresses issue #39658. --- src/libstd/ascii.rs | 838 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 838 insertions(+) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 35c388ba076ce..cf965291fa2a6 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -184,6 +184,348 @@ pub trait AsciiExt { /// [`to_ascii_lowercase`]: #tymethod.to_ascii_lowercase #[stable(feature = "ascii", since = "1.9.0")] fn make_ascii_lowercase(&mut self); + + /// Checks if the value is an ASCII alphabetic character: + /// U+0041 'A' ... U+005A 'Z' or U+0061 'a' ... U+007A 'z'. + /// For strings, true if all characters in the string are + /// ASCII alphabetic. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_alphabetic()); + /// assert!(G.is_ascii_alphabetic()); + /// assert!(a.is_ascii_alphabetic()); + /// assert!(g.is_ascii_alphabetic()); + /// assert!(!zero.is_ascii_alphabetic()); + /// assert!(!percent.is_ascii_alphabetic()); + /// assert!(!space.is_ascii_alphabetic()); + /// assert!(!lf.is_ascii_alphabetic()); + /// assert!(!esc.is_ascii_alphabetic()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_alphabetic(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII uppercase character: + /// U+0041 'A' ... U+005A 'Z'. + /// For strings, true if all characters in the string are + /// ASCII uppercase. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_uppercase()); + /// assert!(G.is_ascii_uppercase()); + /// assert!(!a.is_ascii_uppercase()); + /// assert!(!g.is_ascii_uppercase()); + /// assert!(!zero.is_ascii_uppercase()); + /// assert!(!percent.is_ascii_uppercase()); + /// assert!(!space.is_ascii_uppercase()); + /// assert!(!lf.is_ascii_uppercase()); + /// assert!(!esc.is_ascii_uppercase()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_uppercase(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII lowercase character: + /// U+0061 'a' ... U+007A 'z'. + /// For strings, true if all characters in the string are + /// ASCII lowercase. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_lowercase()); + /// assert!(!G.is_ascii_lowercase()); + /// assert!(a.is_ascii_lowercase()); + /// assert!(g.is_ascii_lowercase()); + /// assert!(!zero.is_ascii_lowercase()); + /// assert!(!percent.is_ascii_lowercase()); + /// assert!(!space.is_ascii_lowercase()); + /// assert!(!lf.is_ascii_lowercase()); + /// assert!(!esc.is_ascii_lowercase()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_lowercase(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII alphanumeric character: + /// U+0041 'A' ... U+005A 'Z', U+0061 'a' ... U+007A 'z', or + /// U+0030 '0' ... U+0039 '9'. + /// For strings, true if all characters in the string are + /// ASCII alphanumeric. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_alphanumeric()); + /// assert!(G.is_ascii_alphanumeric()); + /// assert!(a.is_ascii_alphanumeric()); + /// assert!(g.is_ascii_alphanumeric()); + /// assert!(zero.is_ascii_alphanumeric()); + /// assert!(!percent.is_ascii_alphanumeric()); + /// assert!(!space.is_ascii_alphanumeric()); + /// assert!(!lf.is_ascii_alphanumeric()); + /// assert!(!esc.is_ascii_alphanumeric()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_alphanumeric(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII decimal digit: + /// U+0030 '0' ... U+0039 '9'. + /// For strings, true if all characters in the string are + /// ASCII digits. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_digit()); + /// assert!(!G.is_ascii_digit()); + /// assert!(!a.is_ascii_digit()); + /// assert!(!g.is_ascii_digit()); + /// assert!(zero.is_ascii_digit()); + /// assert!(!percent.is_ascii_digit()); + /// assert!(!space.is_ascii_digit()); + /// assert!(!lf.is_ascii_digit()); + /// assert!(!esc.is_ascii_digit()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_digit(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII hexadecimal digit: + /// U+0030 '0' ... U+0039 '9', U+0041 'A' ... U+0046 'F', or + /// U+0061 'a' ... U+0066 'f'. + /// For strings, true if all characters in the string are + /// ASCII hex digits. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_hexdigit()); + /// assert!(!G.is_ascii_hexdigit()); + /// assert!(a.is_ascii_hexdigit()); + /// assert!(!g.is_ascii_hexdigit()); + /// assert!(zero.is_ascii_hexdigit()); + /// assert!(!percent.is_ascii_hexdigit()); + /// assert!(!space.is_ascii_hexdigit()); + /// assert!(!lf.is_ascii_hexdigit()); + /// assert!(!esc.is_ascii_hexdigit()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_hexdigit(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII punctuation character: + /// U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /` + /// U+003A ... U+0040 `: ; < = > ? @` + /// U+005B ... U+0060 `[ \\ ] ^ _ \`` + /// U+007B ... U+007E `{ | } ~` + /// For strings, true if all characters in the string are + /// ASCII punctuation. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_punctuation()); + /// assert!(!G.is_ascii_punctuation()); + /// assert!(!a.is_ascii_punctuation()); + /// assert!(!g.is_ascii_punctuation()); + /// assert!(!zero.is_ascii_punctuation()); + /// assert!(percent.is_ascii_punctuation()); + /// assert!(!space.is_ascii_punctuation()); + /// assert!(!lf.is_ascii_punctuation()); + /// assert!(!esc.is_ascii_punctuation()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_punctuation(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII graphic character: + /// U+0021 '@' ... U+007E '~'. + /// For strings, true if all characters in the string are + /// ASCII punctuation. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(A.is_ascii_graphic()); + /// assert!(G.is_ascii_graphic()); + /// assert!(a.is_ascii_graphic()); + /// assert!(g.is_ascii_graphic()); + /// assert!(zero.is_ascii_graphic()); + /// assert!(percent.is_ascii_graphic()); + /// assert!(!space.is_ascii_graphic()); + /// assert!(!lf.is_ascii_graphic()); + /// assert!(!esc.is_ascii_graphic()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_graphic(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII whitespace character: + /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, + /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. + /// For strings, true if all characters in the string are + /// ASCII whitespace. + /// + /// Rust uses the WhatWG Infra Standard's [definition of ASCII + /// whitespace][infra-aw]. There are several other definitions in + /// wide use. For instance, [the POSIX locale][posix-ctype] + /// includes U+000B VERTICAL TAB as well as all the above + /// characters, but—from the very same specification—[the default + /// rule for "field splitting" in the Bourne shell][field-splitting] + /// considers *only* SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// + /// If you are writing a program that will process an existing + /// file format, check what that format's definition of whitespace is + /// before using this function. + /// + /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace + /// [posix-ctype]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [field-splitting]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_whitespace()); + /// assert!(!G.is_ascii_whitespace()); + /// assert!(!a.is_ascii_whitespace()); + /// assert!(!g.is_ascii_whitespace()); + /// assert!(!zero.is_ascii_whitespace()); + /// assert!(!percent.is_ascii_whitespace()); + /// assert!(space.is_ascii_whitespace()); + /// assert!(lf.is_ascii_whitespace()); + /// assert!(!esc.is_ascii_whitespace()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_whitespace(&self) -> bool { unimplemented!(); } + + /// Checks if the value is an ASCII control character: + /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE. + /// Note that most ASCII whitespace characters are control + /// characters, but SPACE is not. + /// + /// # Examples + /// + /// ``` + /// use std::ascii::AsciiExt; + /// let A = 'A'; + /// let G = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc = '\u{001b}'; + /// + /// assert!(!A.is_ascii_control()); + /// assert!(!G.is_ascii_control()); + /// assert!(!a.is_ascii_control()); + /// assert!(!g.is_ascii_control()); + /// assert!(!zero.is_ascii_control()); + /// assert!(!percent.is_ascii_control()); + /// assert!(!space.is_ascii_control()); + /// assert!(lf.is_ascii_control()); + /// assert!(esc.is_ascii_control()); + /// ``` + #[unstable(feature = "ascii_ctype", issue = "39658")] + fn is_ascii_control(&self) -> bool { unimplemented!(); } } #[stable(feature = "rust1", since = "1.0.0")] @@ -225,6 +567,56 @@ impl AsciiExt for str { let me: &mut [u8] = unsafe { mem::transmute(self) }; me.make_ascii_lowercase() } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + self.bytes().all(|b| b.is_ascii_alphabetic()) + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + self.bytes().all(|b| b.is_ascii_uppercase()) + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + self.bytes().all(|b| b.is_ascii_lowercase()) + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + self.bytes().all(|b| b.is_ascii_alphanumeric()) + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + self.bytes().all(|b| b.is_ascii_digit()) + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + self.bytes().all(|b| b.is_ascii_hexdigit()) + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + self.bytes().all(|b| b.is_ascii_punctuation()) + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + self.bytes().all(|b| b.is_ascii_graphic()) + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + self.bytes().all(|b| b.is_ascii_whitespace()) + } + + #[inline] + fn is_ascii_control(&self) -> bool { + self.bytes().all(|b| b.is_ascii_control()) + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -268,6 +660,56 @@ impl AsciiExt for [u8] { byte.make_ascii_lowercase(); } } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + self.iter().all(|b| b.is_ascii_alphabetic()) + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + self.iter().all(|b| b.is_ascii_uppercase()) + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + self.iter().all(|b| b.is_ascii_lowercase()) + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + self.iter().all(|b| b.is_ascii_alphanumeric()) + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + self.iter().all(|b| b.is_ascii_digit()) + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + self.iter().all(|b| b.is_ascii_hexdigit()) + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + self.iter().all(|b| b.is_ascii_punctuation()) + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + self.iter().all(|b| b.is_ascii_graphic()) + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + self.iter().all(|b| b.is_ascii_whitespace()) + } + + #[inline] + fn is_ascii_control(&self) -> bool { + self.iter().all(|b| b.is_ascii_control()) + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -287,6 +729,96 @@ impl AsciiExt for u8 { fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); } #[inline] fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + L|Lx|U|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + U|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + L|Lx => true, + _ => false + } + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D|L|Lx|U|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D => true, + _ => false + } + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D|Lx|Ux => true, + _ => false + } + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + P => true, + _ => false + } + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + Ux|U|Lx|L|D|P => true, + _ => false + } + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + Cw|W => true, + _ => false + } + } + + #[inline] + fn is_ascii_control(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + C|Cw => true, + _ => false + } + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -324,6 +856,56 @@ impl AsciiExt for char { fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); } #[inline] fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); } + + #[inline] + fn is_ascii_alphabetic(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphabetic() + } + + #[inline] + fn is_ascii_uppercase(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_uppercase() + } + + #[inline] + fn is_ascii_lowercase(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_lowercase() + } + + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphanumeric() + } + + #[inline] + fn is_ascii_digit(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_digit() + } + + #[inline] + fn is_ascii_hexdigit(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_hexdigit() + } + + #[inline] + fn is_ascii_punctuation(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_punctuation() + } + + #[inline] + fn is_ascii_graphic(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_graphic() + } + + #[inline] + fn is_ascii_whitespace(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_whitespace() + } + + #[inline] + fn is_ascii_control(&self) -> bool { + (*self as u32 <= 0x7f) && (*self as u8).is_ascii_control() + } } /// An iterator over the escaped version of a byte, constructed via @@ -485,6 +1067,30 @@ static ASCII_UPPERCASE_MAP: [u8; 256] = [ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, ]; +enum AsciiCharacterClass { + C, // control + Cw, // control whitespace + W, // whitespace + D, // digit + L, // lowercase + Lx, // lowercase hex digit + U, // uppercase + Ux, // uppercase hex digit + P, // punctuation +} +use self::AsciiCharacterClass::*; + +static ASCII_CHARACTER_CLASS: [AsciiCharacterClass; 128] = [ +// _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _a _b _c _d _e _f + C, C, C, C, C, C, C, C, C, Cw,Cw,C, Cw,Cw,C, C, // 0_ + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 1_ + W, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, // 2_ + D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, P, // 3_ + P, Ux,Ux,Ux,Ux,Ux,Ux,U, U, U, U, U, U, U, U, U, // 4_ + U, U, U, U, U, U, U, U, U, U, U, P, P, P, P, P, // 5_ + P, Lx,Lx,Lx,Lx,Lx,Lx,L, L, L, L, L, L, L, L, L, // 6_ + L, L, L, L, L, L, L, L, L, L, L, P, P, P, P, C, // 7_ +]; #[cfg(test)] mod tests { @@ -606,4 +1212,236 @@ mod tests { let x = "a".to_string(); x.eq_ignore_ascii_case("A"); } + + // Shorthands used by the is_ascii_* tests. + macro_rules! assert_all { + ($what:ident, $($str:tt),+) => {{ + $( + for b in $str.chars() { + if !b.$what() { + panic!("expected {}({}) but it isn't", + stringify!($what), b); + } + } + for b in $str.as_bytes().iter() { + if !b.$what() { + panic!("expected {}(0x{:02x})) but it isn't", + stringify!($what), b); + } + } + assert!($str.$what()); + assert!($str.as_bytes().$what()); + )+ + }}; + ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+)) + } + macro_rules! assert_none { + ($what:ident, $($str:tt),+) => {{ + $( + for b in $str.chars() { + if b.$what() { + panic!("expected not-{}({}) but it is", + stringify!($what), b); + } + } + for b in $str.as_bytes().iter() { + if b.$what() { + panic!("expected not-{}(0x{:02x})) but it is", + stringify!($what), b); + } + } + )* + }}; + ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+)) + } + + #[test] + fn test_is_ascii_alphabetic() { + assert_all!(is_ascii_alphabetic, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + ); + assert_none!(is_ascii_alphabetic, + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_uppercase() { + assert_all!(is_ascii_uppercase, + "", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + ); + assert_none!(is_ascii_uppercase, + "abcdefghijklmnopqrstuvwxyz", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_lowercase() { + assert_all!(is_ascii_lowercase, + "abcdefghijklmnopqrstuvwxyz", + ); + assert_none!(is_ascii_lowercase, + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_alphanumeric() { + assert_all!(is_ascii_alphanumeric, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + ); + assert_none!(is_ascii_alphanumeric, + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_digit() { + assert_all!(is_ascii_digit, + "", + "0123456789", + ); + assert_none!(is_ascii_digit, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_hexdigit() { + assert_all!(is_ascii_hexdigit, + "", + "0123456789", + "abcdefABCDEF", + ); + assert_none!(is_ascii_hexdigit, + "ghijklmnopqrstuvwxyz", + "GHIJKLMNOQPRSTUVWXYZ", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_punctuation() { + assert_all!(is_ascii_punctuation, + "", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ); + assert_none!(is_ascii_punctuation, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_graphic() { + assert_all!(is_ascii_graphic, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ); + assert_none!(is_ascii_graphic, + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_whitespace() { + assert_all!(is_ascii_whitespace, + "", + " \t\n\x0c\r", + ); + assert_none!(is_ascii_whitespace, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x0b\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_control() { + assert_all!(is_ascii_control, + "", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + assert_none!(is_ascii_control, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " ", + ); + } } From b3d73995dac028f287604dbe763b236be154e787 Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Fri, 10 Feb 2017 23:23:11 +0000 Subject: [PATCH 02/11] Fix ICE on certain sequence repetitions. --- src/libsyntax/parse/parser.rs | 19 ++++++++++++++----- src/test/compile-fail/issue-39709.rs | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 src/test/compile-fail/issue-39709.rs diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index b051928ff9d3c..2c4fa8e15edf2 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -302,11 +302,20 @@ impl<'a> Parser<'a> { if i + 1 < tts.len() { self.tts.push((tts, i + 1)); } - if let TokenTree::Token(sp, tok) = tt { - TokenAndSpan { tok: tok, sp: sp } - } else { - self.tts.push((tt, 0)); - continue + // FIXME(jseyfried): remove after fixing #39390 in #39419. + if self.quote_depth > 0 { + if let TokenTree::Sequence(sp, _) = tt { + self.span_err(sp, "attempted to repeat an expression containing no \ + syntax variables matched as repeating at this depth"); + } + } + match tt { + TokenTree::Token(sp, tok) => TokenAndSpan { tok: tok, sp: sp }, + _ if tt.len() > 0 => { + self.tts.push((tt, 0)); + continue + } + _ => continue, } } else { TokenAndSpan { tok: token::Eof, sp: self.span } diff --git a/src/test/compile-fail/issue-39709.rs b/src/test/compile-fail/issue-39709.rs new file mode 100644 index 0000000000000..0f66fe8439336 --- /dev/null +++ b/src/test/compile-fail/issue-39709.rs @@ -0,0 +1,15 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + println!("{}", { macro_rules! x { ($()*) => {} } 33 }); + //~^ ERROR no syntax variables matched as repeating at this depth +} + From 0340ddeb3bcfcd0cfe6a0c4745293ecf2b733dac Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sat, 11 Feb 2017 17:28:29 -0800 Subject: [PATCH 03/11] travis: Add builders without assertions This commit adds three new builders, one OSX, one Linux, and one MSVC, which will produce "nightlies" with LLVM assertions disabled. Currently all nightly releases have LLVM assertions enabled to catch bugs before they reach the beta/stable channels. The beta/stable channels, however, do not have LLVM assertions enabled. Unfortunately though projects like Servo are stuck on nightlies for the near future at least and are also suffering very long compile times. The purpose of this commit is to provide artifacts to these projects which are not distributed through normal channels (e.g. rustup) but are provided for developers to use locally if need be. Logistically these builds will all be uploaded to `rustc-builds-alt` instead of the `rustc-builds` folder of the `rust-lang-ci` bucket. These builds will stay there forever (until cleaned out if necessary) and there are no plans to integrate this with rustup and/or the official release process. --- .travis.yml | 30 ++++++++++++++++++++++++++++++ appveyor.yml | 22 ++++++++++++++++++++++ src/ci/docker/run.sh | 1 + src/ci/run.sh | 4 +++- 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a776f1b1e1041..537849964ab41 100644 --- a/.travis.yml +++ b/.travis.yml @@ -65,6 +65,20 @@ matrix: osx_image: xcode8.2 install: *osx_install_sccache + # "alternate" deployments, these are "nightlies" but don't have assertions + # turned on, they're deployed to a different location primarily for projects + # which are stuck on nightly and don't want llvm assertions in the artifacts + # that they use. + - env: IMAGE=dist-x86-linux DEPLOY_ALT=1 + - env: > + RUST_CHECK_TARGET=dist + RUST_CONFIGURE_ARGS="--enable-extended" + SRC=. + DEPLOY_ALT=1 + os: osx + osx_image: xcode8.2 + install: *osx_install_sccache + env: global: - SCCACHE_BUCKET=rust-lang-ci-sccache @@ -125,3 +139,19 @@ deploy: on: branch: auto condition: $DEPLOY = 1 + + # this is the same as the above deployment provider except that it uploads to + # a slightly different directory and has a different trigger + - provider: s3 + bucket: rust-lang-ci + skip_cleanup: true + local_dir: deploy + upload_dir: rustc-builds-alt + acl: public_read + region: us-east-1 + access_key_id: AKIAIPQVNYF2T3DTYIWQ + secret_access_key: + secure: "FBqDqOTeIPMu6v/WYPf4CFSlh9rLRZGKVtpLa5KkyuOhXRTrnEzBduEtS8/FMIxdQImvurhSvxWvqRybMOi4qoVfjMqqpHAI7uBbidbrvAcJoHNsx6BgUNVCIoH6a0UsAjTUtm6/YPIpzbHoLZXPL0GrHPMk6Mu04qVSmcYNWn4=" + on: + branch: auto + condition: $DEPLOY_ALT = 1 diff --git a/appveyor.yml b/appveyor.yml index 2183d8da95f89..38781d281c8ec 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -61,6 +61,12 @@ environment: MINGW_DIR: mingw64 DEPLOY: 1 + # "alternate" deployment, see .travis.yml for more info + - MSYS_BITS: 64 + RUST_CONFIGURE_ARGS: --build=x86_64-pc-windows-msvc --enable-extended + SCRIPT: python x.py dist + DEPLOY_ALT: 1 + matrix: fast_finish: true @@ -146,6 +152,22 @@ deploy: branch: auto DEPLOY: 1 + # This provider is the same as the one above except that it has a slightly + # different upload directory and a slightly different trigger + - provider: S3 + skip_cleanup: true + access_key_id: AKIAIPQVNYF2T3DTYIWQ + secret_access_key: + secure: +11jsUNFTQ9dq5Ad1i2+PeUJaXluFJ0zIJAXESE1dFT3Kdjku4/eDdgyjgsB6GnV + bucket: rust-lang-ci + set_public: true + region: us-east-1 + artifact: /.*/ + folder: rustc-builds-alt + on: + branch: auto + DEPLOY_ALT: 1 + # init: # - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) # on_finish: diff --git a/src/ci/docker/run.sh b/src/ci/docker/run.sh index 0ddab8c4160f4..892c5baa5c64b 100755 --- a/src/ci/docker/run.sh +++ b/src/ci/docker/run.sh @@ -49,6 +49,7 @@ exec docker \ $args \ --env CARGO_HOME=/cargo \ --env DEPLOY=$DEPLOY \ + --env DEPLOY_ALT=$DEPLOY_ALT \ --env LOCAL_USER_ID=`id -u` \ --volume "$HOME/.cargo:/cargo" \ --rm \ diff --git a/src/ci/run.sh b/src/ci/run.sh index 960acc4de7d87..41230aedbfa01 100755 --- a/src/ci/run.sh +++ b/src/ci/run.sh @@ -30,12 +30,14 @@ RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --disable-manage-submodules" # # FIXME: need a scheme for changing this `nightly` value to `beta` and `stable` # either automatically or manually. -if [ "$DEPLOY" != "" ]; then +if [ "$DEPLOY$DEPLOY_ALT" != "" ]; then RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --release-channel=nightly" RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-llvm-static-stdcpp" if [ "$NO_LLVM_ASSERTIONS" = "1" ]; then RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --disable-llvm-assertions" + elif [ "$DEPLOY_ALT" != "" ]; then + RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --disable-llvm-assertions" fi else RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-debug-assertions" From 30abe7bd583d172414d82357acf62ceabf896730 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 13 Feb 2017 06:44:06 -0800 Subject: [PATCH 04/11] test: Remove sanitizer-thread test Unfortunately it appears to spuriously fail so we can't gate on it --- src/test/run-make/sanitizer-thread/Makefile | 10 ---------- src/test/run-make/sanitizer-thread/racy.rs | 21 --------------------- 2 files changed, 31 deletions(-) delete mode 100644 src/test/run-make/sanitizer-thread/Makefile delete mode 100644 src/test/run-make/sanitizer-thread/racy.rs diff --git a/src/test/run-make/sanitizer-thread/Makefile b/src/test/run-make/sanitizer-thread/Makefile deleted file mode 100644 index 8bb89a241cb05..0000000000000 --- a/src/test/run-make/sanitizer-thread/Makefile +++ /dev/null @@ -1,10 +0,0 @@ --include ../tools.mk - -ifdef SANITIZER_SUPPORT -all: - $(RUSTC) -g -Z sanitizer=thread -Z print-link-args racy.rs | grep -q librustc_tsan - $(TMPDIR)/racy 2>&1 | grep -q 'data race' -else -all: - -endif diff --git a/src/test/run-make/sanitizer-thread/racy.rs b/src/test/run-make/sanitizer-thread/racy.rs deleted file mode 100644 index dc929e004a479..0000000000000 --- a/src/test/run-make/sanitizer-thread/racy.rs +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2017 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use std::thread; - -static mut ANSWER: i32 = 0; - -fn main() { - let t1 = thread::spawn(|| unsafe { ANSWER = 42 }); - unsafe { - ANSWER = 24; - } - t1.join().ok(); -} From c2566f638aa065f14f296aaad0666492ae00a636 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Mon, 13 Feb 2017 11:46:29 -0500 Subject: [PATCH 05/11] Squeeze URL lines under 100 chars wide to make tidy happy. --- src/libstd/ascii.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index cf965291fa2a6..2a22e5e7a1136 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -454,19 +454,19 @@ pub trait AsciiExt { /// /// Rust uses the WhatWG Infra Standard's [definition of ASCII /// whitespace][infra-aw]. There are several other definitions in - /// wide use. For instance, [the POSIX locale][posix-ctype] - /// includes U+000B VERTICAL TAB as well as all the above - /// characters, but—from the very same specification—[the default - /// rule for "field splitting" in the Bourne shell][field-splitting] - /// considers *only* SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// wide use. For instance, [the POSIX locale][pct] includes + /// U+000B VERTICAL TAB as well as all the above characters, + /// but—from the very same specification—[the default rule for + /// "field splitting" in the Bourne shell][bfs] considers *only* + /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. /// /// If you are writing a program that will process an existing /// file format, check what that format's definition of whitespace is /// before using this function. /// /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace - /// [posix-ctype]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 - /// [field-splitting]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 /// /// # Examples /// From cc8d4558956e48ff65d5e0d1af13f3f6e8466b84 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 13 Feb 2017 18:11:20 +0100 Subject: [PATCH 06/11] Add filename when running rustdoc --test on a markdown file --- src/librustdoc/markdown.rs | 3 ++- src/librustdoc/test.rs | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/librustdoc/markdown.rs b/src/librustdoc/markdown.rs index 70ef7c597e4d7..c67e2fdc2b027 100644 --- a/src/librustdoc/markdown.rs +++ b/src/librustdoc/markdown.rs @@ -155,7 +155,8 @@ pub fn test(input: &str, cfgs: Vec, libs: SearchPaths, externs: Externs, let mut opts = TestOptions::default(); opts.no_crate_inject = true; let mut collector = Collector::new(input.to_string(), cfgs, libs, externs, - true, opts, maybe_sysroot, None); + true, opts, maybe_sysroot, None, + Some(input.to_owned())); find_testable_code(&input_str, &mut collector, DUMMY_SP); test_args.insert(0, "rustdoctest".to_string()); testing::test_main(&test_args, collector.tests); diff --git a/src/librustdoc/test.rs b/src/librustdoc/test.rs index 349bddc87405c..1c37067d7f69d 100644 --- a/src/librustdoc/test.rs +++ b/src/librustdoc/test.rs @@ -104,7 +104,8 @@ pub fn run(input: &str, false, opts, maybe_sysroot, - Some(codemap)); + Some(codemap), + None); { let dep_graph = DepGraph::new(false); @@ -391,12 +392,13 @@ pub struct Collector { maybe_sysroot: Option, position: Span, codemap: Option>, + filename: Option, } impl Collector { pub fn new(cratename: String, cfgs: Vec, libs: SearchPaths, externs: Externs, use_headers: bool, opts: TestOptions, maybe_sysroot: Option, - codemap: Option>) -> Collector { + codemap: Option>, filename: Option) -> Collector { Collector { tests: Vec::new(), names: Vec::new(), @@ -411,6 +413,7 @@ impl Collector { maybe_sysroot: maybe_sysroot, position: DUMMY_SP, codemap: codemap, + filename: filename, } } @@ -483,6 +486,8 @@ impl Collector { pub fn get_filename(&self) -> String { if let Some(ref codemap) = self.codemap { codemap.span_to_filename(self.position) + } else if let Some(ref filename) = self.filename { + filename.clone() } else { "".to_owned() } From 5817351048b7c817720f696dd6a0f7005bd1a7a4 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Mon, 13 Feb 2017 12:33:35 -0500 Subject: [PATCH 07/11] tidy: exempt URLs from the line length restriction The length of a URL is usually not under our control, and Markdown provides no way to split a URL in the middle. Therefore, comment lines consisting _solely_ of a URL (possibly with a Markdown link label in front) should be exempt from the line-length restriction. Inline hyperlink destinations ( `[foo](http://...)` notation ) are _not_ exempt, because it is my arrogant opinion that long lines of that type make the source text illegible. The patch adds dependencies on the `regex` and `lazy_static` crates to the tidy utility. This _appears_ to Just Work, but if you would rather not have that dependency I am willing to provide a hand-written parser instead. --- src/tools/tidy/Cargo.toml | 2 ++ src/tools/tidy/src/main.rs | 3 +++ src/tools/tidy/src/style.rs | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/tools/tidy/Cargo.toml b/src/tools/tidy/Cargo.toml index e900bd47fb7bd..39986d592899b 100644 --- a/src/tools/tidy/Cargo.toml +++ b/src/tools/tidy/Cargo.toml @@ -4,3 +4,5 @@ version = "0.1.0" authors = ["Alex Crichton "] [dependencies] +regex = "*" +lazy_static = "*" diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs index 9962c6ec9af12..bbd6c8e87c215 100644 --- a/src/tools/tidy/src/main.rs +++ b/src/tools/tidy/src/main.rs @@ -14,6 +14,9 @@ //! etc. This is run by default on `make check` and as part of the auto //! builders. +extern crate regex; +#[macro_use] extern crate lazy_static; + use std::fs; use std::path::{PathBuf, Path}; use std::env; diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs index c722eb690b8c3..91c5edfd75abc 100644 --- a/src/tools/tidy/src/style.rs +++ b/src/tools/tidy/src/style.rs @@ -26,6 +26,8 @@ use std::fs::File; use std::io::prelude::*; use std::path::Path; +use regex::Regex; + const COLS: usize = 100; const LICENSE: &'static str = "\ Copyright The Rust Project Developers. See the COPYRIGHT @@ -38,6 +40,32 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license option. This file may not be copied, modified, or distributed except according to those terms."; +/// True if LINE is allowed to be longer than the normal limit. +/// +/// Currently there is only one exception: if the line is within a +/// comment, and its entire text is one URL (possibly with a Markdown +/// link label in front), then it's allowed to be overlength. This is +/// because Markdown offers no way to split a line in the middle of a +/// URL, and the length of URLs for external references is beyond our +/// control. +fn long_line_is_ok(line: &str) -> bool { + lazy_static! { + static ref URL_RE: Regex = Regex::new( + // This regexp uses the CommonMark definition of link + // label. It thinks any sequence of nonwhitespace + // characters beginning with "http://" or "https://" is a + // URL. Add more schemas as necessary. + r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$" + ).unwrap(); + } + + if URL_RE.is_match(line) { + return true; + } + + false +} + pub fn check(path: &Path, bad: &mut bool) { let mut contents = String::new(); super::walk(path, &mut super::filter_dirs, &mut |file| { @@ -61,8 +89,9 @@ pub fn check(path: &Path, bad: &mut bool) { println!("{}:{}: {}", file.display(), i + 1, msg); *bad = true; }; - if line.chars().count() > COLS && !skip_length { - err(&format!("line longer than {} chars", COLS)); + if !skip_length && line.chars().count() > COLS + && !long_line_is_ok(line) { + err(&format!("line longer than {} chars", COLS)); } if line.contains("\t") && !skip_tab { err("tab character"); From ff4758c2a0dffef264fe73b90668bd04b1b2fa89 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Mon, 13 Feb 2017 15:44:51 -0500 Subject: [PATCH 08/11] Replace regex-based parser for URL lines with open-coded one. --- src/tools/tidy/Cargo.toml | 2 -- src/tools/tidy/src/main.rs | 3 -- src/tools/tidy/src/style.rs | 66 ++++++++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/src/tools/tidy/Cargo.toml b/src/tools/tidy/Cargo.toml index 39986d592899b..e900bd47fb7bd 100644 --- a/src/tools/tidy/Cargo.toml +++ b/src/tools/tidy/Cargo.toml @@ -4,5 +4,3 @@ version = "0.1.0" authors = ["Alex Crichton "] [dependencies] -regex = "*" -lazy_static = "*" diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs index bbd6c8e87c215..9962c6ec9af12 100644 --- a/src/tools/tidy/src/main.rs +++ b/src/tools/tidy/src/main.rs @@ -14,9 +14,6 @@ //! etc. This is run by default on `make check` and as part of the auto //! builders. -extern crate regex; -#[macro_use] extern crate lazy_static; - use std::fs; use std::path::{PathBuf, Path}; use std::env; diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs index 91c5edfd75abc..2233f8c352974 100644 --- a/src/tools/tidy/src/style.rs +++ b/src/tools/tidy/src/style.rs @@ -26,8 +26,6 @@ use std::fs::File; use std::io::prelude::*; use std::path::Path; -use regex::Regex; - const COLS: usize = 100; const LICENSE: &'static str = "\ Copyright The Rust Project Developers. See the COPYRIGHT @@ -40,26 +38,54 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license option. This file may not be copied, modified, or distributed except according to those terms."; -/// True if LINE is allowed to be longer than the normal limit. -/// -/// Currently there is only one exception: if the line is within a -/// comment, and its entire text is one URL (possibly with a Markdown -/// link label in front), then it's allowed to be overlength. This is -/// because Markdown offers no way to split a line in the middle of a -/// URL, and the length of URLs for external references is beyond our -/// control. -fn long_line_is_ok(line: &str) -> bool { - lazy_static! { - static ref URL_RE: Regex = Regex::new( - // This regexp uses the CommonMark definition of link - // label. It thinks any sequence of nonwhitespace - // characters beginning with "http://" or "https://" is a - // URL. Add more schemas as necessary. - r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$" - ).unwrap(); +/// Parser states for line_is_url. +#[derive(PartialEq)] +#[allow(non_camel_case_types)] +enum LIUState { EXP_COMMENT_START, + EXP_LINK_LABEL_OR_URL, + EXP_URL, + EXP_END } + +/// True if LINE appears to be a line comment containing an URL, +/// possibly with a Markdown link label in front, and nothing else. +/// The Markdown link label, if present, may not contain whitespace. +/// Lines of this form are allowed to be overlength, because Markdown +/// offers no way to split a line in the middle of a URL, and the lengths +/// of URLs to external references are beyond our control. +fn line_is_url(line: &str) -> bool { + use self::LIUState::*; + let mut state: LIUState = EXP_COMMENT_START; + + for tok in line.split_whitespace() { + match (state, tok) { + (EXP_COMMENT_START, "//") => state = EXP_LINK_LABEL_OR_URL, + (EXP_COMMENT_START, "///") => state = EXP_LINK_LABEL_OR_URL, + (EXP_COMMENT_START, "//!") => state = EXP_LINK_LABEL_OR_URL, + + (EXP_LINK_LABEL_OR_URL, w) + if w.len() >= 4 && w.starts_with("[") && w.ends_with("]:") + => state = EXP_URL, + + (EXP_LINK_LABEL_OR_URL, w) + if w.starts_with("http://") || w.starts_with("https://") + => state = EXP_END, + + (EXP_URL, w) + if w.starts_with("http://") || w.starts_with("https://") + => state = EXP_END, + + (_, _) => return false, + } } - if URL_RE.is_match(line) { + state == EXP_END +} + +/// True if LINE is allowed to be longer than the normal limit. +/// Currently there is only one exception, for long URLs, but more +/// may be added in the future. +fn long_line_is_ok(line: &str) -> bool { + if line_is_url(line) { return true; } From 162240c744fa415602dcd56f08895b9583037717 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Mon, 13 Feb 2017 18:44:43 -0500 Subject: [PATCH 09/11] Add feature annotations to the doctests for ascii_ctype. --- src/libstd/ascii.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 2a22e5e7a1136..af21d6d906eb5 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -193,6 +193,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -225,6 +227,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -257,6 +261,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -290,6 +296,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -322,6 +330,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -355,6 +365,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -390,6 +402,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -422,6 +436,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -471,6 +487,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; @@ -503,6 +521,8 @@ pub trait AsciiExt { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// # #![allow(non_snake_case)] /// use std::ascii::AsciiExt; /// let A = 'A'; /// let G = 'G'; From 07b3a8bd60ff71c0519fc68068930edbbe767272 Mon Sep 17 00:00:00 2001 From: Colm Seale Date: Sun, 12 Feb 2017 23:16:06 +0000 Subject: [PATCH 10/11] Adding compile fail test for staged_api feature Issue #39059 r? @est31 --- .../compile-fail/feature-gate-staged_api.rs | 24 +++++++++++++++++++ src/tools/tidy/src/features.rs | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 src/test/compile-fail/feature-gate-staged_api.rs diff --git a/src/test/compile-fail/feature-gate-staged_api.rs b/src/test/compile-fail/feature-gate-staged_api.rs new file mode 100644 index 0000000000000..014a0aaaf68e6 --- /dev/null +++ b/src/test/compile-fail/feature-gate-staged_api.rs @@ -0,0 +1,24 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![stable(feature = "a", since = "b")] +//~^ ERROR stability attributes may not be used outside of the standard library +mod inner_private_module { + // UnnameableTypeAlias isn't marked as reachable, so no stability annotation is required here + pub type UnnameableTypeAlias = u8; +} + +#[stable(feature = "a", since = "b")] +//~^ ERROR stability attributes may not be used outside of the standard library +pub fn f() -> inner_private_module::UnnameableTypeAlias { + 0 +} + +fn main() {} diff --git a/src/tools/tidy/src/features.rs b/src/tools/tidy/src/features.rs index 707d5da50bf73..cb6e73237d5eb 100644 --- a/src/tools/tidy/src/features.rs +++ b/src/tools/tidy/src/features.rs @@ -167,7 +167,7 @@ pub fn check(path: &Path, bad: &mut bool) { // FIXME get this whitelist empty. let whitelist = vec![ "abi_ptx", "simd", "static_recursion", - "cfg_target_has_atomic", "staged_api", + "cfg_target_has_atomic", "unboxed_closures", "stmt_expr_attributes", "cfg_target_thread_local", "unwind_attributes", "inclusive_range_syntax" From 255b5ed842993d1d71939729c1f99684af8937bf Mon Sep 17 00:00:00 2001 From: Seo Sanghyeon Date: Tue, 14 Feb 2017 19:46:48 +0900 Subject: [PATCH 11/11] Use check_variant for non_camel_case_types lint --- src/librustc_lint/bad_style.rs | 14 +++++--------- .../test-allow-non-camel-case-variant.rs | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 src/test/run-pass/test-allow-non-camel-case-variant.rs diff --git a/src/librustc_lint/bad_style.rs b/src/librustc_lint/bad_style.rs index 05ba262ef90c0..0c86eb42e7acb 100644 --- a/src/librustc_lint/bad_style.rs +++ b/src/librustc_lint/bad_style.rs @@ -117,22 +117,18 @@ impl<'a, 'tcx> LateLintPass<'a, 'tcx> for NonCamelCaseTypes { match it.node { hir::ItemTy(..) | + hir::ItemEnum(..) | hir::ItemStruct(..) | hir::ItemUnion(..) => self.check_case(cx, "type", it.name, it.span), hir::ItemTrait(..) => self.check_case(cx, "trait", it.name, it.span), - hir::ItemEnum(ref enum_definition, _) => { - if has_extern_repr { - return; - } - self.check_case(cx, "type", it.name, it.span); - for variant in &enum_definition.variants { - self.check_case(cx, "variant", variant.node.name, variant.span); - } - } _ => (), } } + fn check_variant(&mut self, cx: &LateContext, v: &hir::Variant, _: &hir::Generics) { + self.check_case(cx, "variant", v.node.name, v.span); + } + fn check_generics(&mut self, cx: &LateContext, it: &hir::Generics) { for gen in it.ty_params.iter() { self.check_case(cx, "type parameter", gen.name, gen.span); diff --git a/src/test/run-pass/test-allow-non-camel-case-variant.rs b/src/test/run-pass/test-allow-non-camel-case-variant.rs new file mode 100644 index 0000000000000..c7073b3a95e12 --- /dev/null +++ b/src/test/run-pass/test-allow-non-camel-case-variant.rs @@ -0,0 +1,18 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![deny(non_camel_case_types)] + +pub enum Foo { + #[allow(non_camel_case_types)] + bar +} + +fn main() {}