From 74ae05ad90d1e809663702f374bba6e62671692c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 5 Aug 2014 15:13:57 -0700 Subject: [PATCH] syntax: Handle \r\n in byte string literals This ended up passing through the lexer but dying later on in parsing when it wasn't handled. The strategy taken was to copy the `str_lit` funciton, but adapt it for bytes. Closes #16278 --- src/libsyntax/parse/mod.rs | 56 ++++++++++++++++++++++---------- src/test/run-pass/.gitattributes | 1 + src/test/run-pass/issue-16278.rs | 20 ++++++++++++ 3 files changed, 59 insertions(+), 18 deletions(-) create mode 100644 src/test/run-pass/issue-16278.rs diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 8f960e37de2e7..5b70ed609d98f 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -21,6 +21,7 @@ use std::gc::Gc; use std::io::File; use std::rc::Rc; use std::str; +use std::iter; pub mod lexer; pub mod parser; @@ -327,7 +328,7 @@ pub fn str_lit(lit: &str) -> String { let error = |i| format!("lexer should have rejected {} at {}", lit, i); /// Eat everything up to a non-whitespace - fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) { + fn eat<'a>(it: &mut iter::Peekable<(uint, char), str::CharOffsets<'a>>) { loop { match it.peek().map(|x| x.val1()) { Some(' ') | Some('\n') | Some('\r') | Some('\t') => { @@ -471,35 +472,54 @@ pub fn binary_lit(lit: &str) -> Rc> { // FIXME #8372: This could be a for-loop if it didn't borrow the iterator let error = |i| format!("lexer should have rejected {} at {}", lit, i); + /// Eat everything up to a non-whitespace + fn eat<'a, I: Iterator<(uint, u8)>>(it: &mut iter::Peekable<(uint, u8), I>) { + loop { + match it.peek().map(|x| x.val1()) { + Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => { + it.next(); + }, + _ => { break; } + } + } + } + // binary literals *must* be ASCII, but the escapes don't have to be - let mut chars = lit.as_bytes().iter().enumerate().peekable(); + let mut chars = lit.bytes().enumerate().peekable(); loop { match chars.next() { - Some((i, &c)) => { - if c == b'\\' { - if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' { - loop { - // eat everything up to a non-whitespace - match chars.peek().map(|x| *x.val1()) { - Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => { - chars.next(); - }, - _ => { break; } - } + Some((i, b'\\')) => { + let em = error(i); + match chars.peek().expect(em.as_slice()).val1() { + b'\n' => eat(&mut chars), + b'\r' => { + chars.next(); + if chars.peek().expect(em.as_slice()).val1() != b'\n' { + fail!("lexer accepted bare CR"); } - } else { + eat(&mut chars); + } + _ => { // otherwise, a normal escape let (c, n) = byte_lit(lit.slice_from(i)); - for _ in range(0, n - 1) { // we don't need to move past the first \ + // we don't need to move past the first \ + for _ in range(0, n - 1) { chars.next(); } res.push(c); } - } else { - res.push(c); } }, - None => { break; } + Some((i, b'\r')) => { + let em = error(i); + if chars.peek().expect(em.as_slice()).val1() != b'\n' { + fail!("lexer accepted bare CR"); + } + chars.next(); + res.push(b'\n'); + } + Some((_, c)) => res.push(c), + None => break, } } diff --git a/src/test/run-pass/.gitattributes b/src/test/run-pass/.gitattributes index c6a6f23074de0..46db548a8c497 100644 --- a/src/test/run-pass/.gitattributes +++ b/src/test/run-pass/.gitattributes @@ -1 +1,2 @@ lexer-crlf-line-endings-string-literal-doc-comment.rs -text +issue-16278.rs -text diff --git a/src/test/run-pass/issue-16278.rs b/src/test/run-pass/issue-16278.rs new file mode 100644 index 0000000000000..f92426d204c0d --- /dev/null +++ b/src/test/run-pass/issue-16278.rs @@ -0,0 +1,20 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// ignore-tidy-cr + +// this file has some special \r\n endings (use xxd to see them) + +fn main() {assert_eq!(b"", b"\ + "); +assert_eq!(b"\n", b" +"); +} +