Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

syntax: Handle \r\n in byte string literals #16282

Merged
merged 1 commit into from
Aug 6, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 38 additions & 18 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::gc::Gc;
use std::io::File;
use std::rc::Rc;
use std::str;
use std::iter;

pub mod lexer;
pub mod parser;
Expand Down Expand Up @@ -327,7 +328,7 @@ pub fn str_lit(lit: &str) -> String {
let error = |i| format!("lexer should have rejected {} at {}", lit, i);

/// Eat everything up to a non-whitespace
fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) {
fn eat<'a>(it: &mut iter::Peekable<(uint, char), str::CharOffsets<'a>>) {
loop {
match it.peek().map(|x| x.val1()) {
Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
Expand Down Expand Up @@ -471,35 +472,54 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
let error = |i| format!("lexer should have rejected {} at {}", lit, i);

/// Eat everything up to a non-whitespace
fn eat<'a, I: Iterator<(uint, u8)>>(it: &mut iter::Peekable<(uint, u8), I>) {
loop {
match it.peek().map(|x| x.val1()) {
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
it.next();
},
_ => { break; }
}
}
}

// binary literals *must* be ASCII, but the escapes don't have to be
let mut chars = lit.as_bytes().iter().enumerate().peekable();
let mut chars = lit.bytes().enumerate().peekable();
loop {
match chars.next() {
Some((i, &c)) => {
if c == b'\\' {
if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' {
loop {
// eat everything up to a non-whitespace
match chars.peek().map(|x| *x.val1()) {
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
chars.next();
},
_ => { break; }
}
Some((i, b'\\')) => {
let em = error(i);
match chars.peek().expect(em.as_slice()).val1() {
b'\n' => eat(&mut chars),
b'\r' => {
chars.next();
if chars.peek().expect(em.as_slice()).val1() != b'\n' {
fail!("lexer accepted bare CR");
}
} else {
eat(&mut chars);
}
_ => {
// otherwise, a normal escape
let (c, n) = byte_lit(lit.slice_from(i));
for _ in range(0, n - 1) { // we don't need to move past the first \
// we don't need to move past the first \
for _ in range(0, n - 1) {
chars.next();
}
res.push(c);
}
} else {
res.push(c);
}
},
None => { break; }
Some((i, b'\r')) => {
let em = error(i);
if chars.peek().expect(em.as_slice()).val1() != b'\n' {
fail!("lexer accepted bare CR");
}
chars.next();
res.push(b'\n');
}
Some((_, c)) => res.push(c),
None => break,
}
}

Expand Down
1 change: 1 addition & 0 deletions src/test/run-pass/.gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
lexer-crlf-line-endings-string-literal-doc-comment.rs -text
issue-16278.rs -text
20 changes: 20 additions & 0 deletions src/test/run-pass/issue-16278.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// ignore-tidy-cr

// this file has some special \r\n endings (use xxd to see them)

fn main() {assert_eq!(b"", b"\
");
assert_eq!(b"\n", b"
");
}