Skip to content

Commit

Permalink
syntax: Handle \r\n in byte string literals
Browse files Browse the repository at this point in the history
This ended up passing through the lexer but dying later on in parsing when it
wasn't handled. The strategy taken was to copy the `str_lit` funciton, but adapt
it for bytes.

Closes #16278
  • Loading branch information
alexcrichton committed Aug 6, 2014
1 parent 6da3889 commit 74ae05a
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 18 deletions.
56 changes: 38 additions & 18 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::gc::Gc;
use std::io::File;
use std::rc::Rc;
use std::str;
use std::iter;

pub mod lexer;
pub mod parser;
Expand Down Expand Up @@ -327,7 +328,7 @@ pub fn str_lit(lit: &str) -> String {
let error = |i| format!("lexer should have rejected {} at {}", lit, i);

/// Eat everything up to a non-whitespace
fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) {
fn eat<'a>(it: &mut iter::Peekable<(uint, char), str::CharOffsets<'a>>) {
loop {
match it.peek().map(|x| x.val1()) {
Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
Expand Down Expand Up @@ -471,35 +472,54 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
let error = |i| format!("lexer should have rejected {} at {}", lit, i);

/// Eat everything up to a non-whitespace
fn eat<'a, I: Iterator<(uint, u8)>>(it: &mut iter::Peekable<(uint, u8), I>) {
loop {
match it.peek().map(|x| x.val1()) {
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
it.next();
},
_ => { break; }
}
}
}

// binary literals *must* be ASCII, but the escapes don't have to be
let mut chars = lit.as_bytes().iter().enumerate().peekable();
let mut chars = lit.bytes().enumerate().peekable();
loop {
match chars.next() {
Some((i, &c)) => {
if c == b'\\' {
if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' {
loop {
// eat everything up to a non-whitespace
match chars.peek().map(|x| *x.val1()) {
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
chars.next();
},
_ => { break; }
}
Some((i, b'\\')) => {
let em = error(i);
match chars.peek().expect(em.as_slice()).val1() {
b'\n' => eat(&mut chars),
b'\r' => {
chars.next();
if chars.peek().expect(em.as_slice()).val1() != b'\n' {
fail!("lexer accepted bare CR");
}
} else {
eat(&mut chars);
}
_ => {
// otherwise, a normal escape
let (c, n) = byte_lit(lit.slice_from(i));
for _ in range(0, n - 1) { // we don't need to move past the first \
// we don't need to move past the first \
for _ in range(0, n - 1) {
chars.next();
}
res.push(c);
}
} else {
res.push(c);
}
},
None => { break; }
Some((i, b'\r')) => {
let em = error(i);
if chars.peek().expect(em.as_slice()).val1() != b'\n' {
fail!("lexer accepted bare CR");
}
chars.next();
res.push(b'\n');
}
Some((_, c)) => res.push(c),
None => break,
}
}

Expand Down
1 change: 1 addition & 0 deletions src/test/run-pass/.gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
lexer-crlf-line-endings-string-literal-doc-comment.rs -text
issue-16278.rs -text
20 changes: 20 additions & 0 deletions src/test/run-pass/issue-16278.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// ignore-tidy-cr

// this file has some special \r\n endings (use xxd to see them)

fn main() {assert_eq!(b"", b"\
");
assert_eq!(b"\n", b"
");
}

0 comments on commit 74ae05a

Please sign in to comment.