Skip to content

Commit

Permalink
[xt] XXX: delay unescaping
Browse files Browse the repository at this point in the history
Note that from_token_lit was looking for errors but never finding them!

- issue-62913.rs: The structure and output changed a bit. Issue #62913
  was about an ICE due to an unterminated string literal, so the new
  version should be good enough.

- literals-are-validated-before-expansion.rs: this tests exactly the
  behaviour that has been changed.
  XXX: insert a new test covering more of that

- XXX: explain the tests that needed to be split

- XXX: tests/ui/parser/unicode-control-codepoints.rs: just reordered errors

- XXX: tests/rustdoc-ui/ignore-block-help.rs: relies on a parsing error
  occurring. The error present was an unescaping error, which is now
  delayed to after parsing. So the commit changes it to an "unterminated
  character literal" error which still occurs during parsing.
  • Loading branch information
nnethercote committed Dec 8, 2023
1 parent 3469136 commit 536b970
Show file tree
Hide file tree
Showing 43 changed files with 562 additions and 410 deletions.
3 changes: 2 additions & 1 deletion compiler/rustc_ast/src/attr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ impl AttrArgsEq {
match self {
AttrArgsEq::Ast(expr) => match expr.kind {
ExprKind::Lit(token_lit) => {
LitKind::from_token_lit(token_lit).ok().and_then(|lit| lit.str())
LitKind::from_token_lit(token_lit).0.ok().and_then(|lit| lit.str())
}
_ => None,
},
Expand Down Expand Up @@ -426,6 +426,7 @@ impl MetaItemKind {
ExprKind::Lit(token_lit) => {
// Turn failures to `None`, we'll get parse errors elsewhere.
MetaItemLit::from_token_lit(token_lit, expr.span)
.0
.ok()
.map(|lit| MetaItemKind::NameValue(lit))
}
Expand Down
289 changes: 192 additions & 97 deletions compiler/rustc_ast/src/util/literal.rs

Large diffs are not rendered by default.

11 changes: 6 additions & 5 deletions compiler/rustc_ast_lowering/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,14 @@ impl<'hir> LoweringContext<'_, 'hir> {
hir::ExprKind::Unary(op, ohs)
}
ExprKind::Lit(token_lit) => {
let lit_kind = match LitKind::from_token_lit(*token_lit) {
let (result, errs) = LitKind::from_token_lit(*token_lit);
let lit_kind = match result {
Ok(lit_kind) => lit_kind,
Err(err) => {
report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span);
LitKind::Err
}
Err(()) => LitKind::Err,
};
for err in errs {
report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span);
}
let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind));
hir::ExprKind::Lit(lit)
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_ast_lowering/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ fn inline_literals(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> {
&& let ExprKind::Lit(lit) = arg.kind
{
if let token::LitKind::Str | token::LitKind::StrRaw(_) = lit.kind
&& let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit)
&& let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit).0
{
literal = Some(s);
} else if let token::LitKind::Integer = lit.kind
&& let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit)
&& let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit).0
{
literal = Some(Symbol::intern(&n.to_string()));
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast_lowering/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
// In valid code the value always ends up as a single literal. Otherwise, a dummy
// literal suffices because the error is handled elsewhere.
let lit = if let ExprKind::Lit(token_lit) = expr.kind
&& let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span)
&& let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span).0
{
lit
} else {
Expand Down
64 changes: 35 additions & 29 deletions compiler/rustc_builtin_macros/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,48 +19,54 @@ pub fn expand_concat(
let mut has_errors = false;
for e in es {
match e.kind {
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
accumulator.push_str(s.as_str());
}
Ok(ast::LitKind::Char(c)) => {
accumulator.push(c);
}
Ok(ast::LitKind::Int(i, _)) => {
accumulator.push_str(&i.to_string());
}
Ok(ast::LitKind::Bool(b)) => {
accumulator.push_str(&b.to_string());
}
Ok(ast::LitKind::CStr(..)) => {
cx.emit_err(errors::ConcatCStrLit { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
cx.emit_err(errors::ConcatBytestr { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Err) => {
has_errors = true;
ast::ExprKind::Lit(token_lit) => {
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
match res {
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
accumulator.push_str(s.as_str());
}
Ok(ast::LitKind::Char(c)) => {
accumulator.push(c);
}
Ok(ast::LitKind::Int(i, _)) => {
accumulator.push_str(&i.to_string());
}
Ok(ast::LitKind::Bool(b)) => {
accumulator.push_str(&b.to_string());
}
Ok(ast::LitKind::CStr(..)) => {
cx.emit_err(errors::ConcatCStrLit { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
cx.emit_err(errors::ConcatBytestr { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Err) | Err(()) => {
has_errors = true;
}
}
Err(err) => {
// njn: what happens if I remove some of these non-lowering ones?
for err in errs {
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
has_errors = true;
}
},
}
// We also want to allow negative numeric literals.
ast::ExprKind::Unary(ast::UnOp::Neg, ref expr)
if let ast::ExprKind::Lit(token_lit) = expr.kind =>
{
match ast::LitKind::from_token_lit(token_lit) {
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
match res {
Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")),
Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")),
Err(err) => {
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
Err(()) => {
has_errors = true;
}
_ => missing_literal.push(e.span),
}
for err in errs {
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
}
}
ast::ExprKind::IncludedBytes(..) => {
cx.emit_err(errors::ConcatBytestr { span: e.span });
Expand Down
18 changes: 11 additions & 7 deletions compiler/rustc_builtin_macros/src/concat_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ fn invalid_type_err(
ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob,
};
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
match ast::LitKind::from_token_lit(token_lit) {
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
match res {
Ok(ast::LitKind::CStr(_, _)) => {
// Avoid ambiguity in handling of terminal `NUL` by refusing to
// concatenate C string literals as bytes.
Expand Down Expand Up @@ -60,9 +61,11 @@ fn invalid_type_err(
cx.emit_err(ConcatBytesNonU8 { span });
}
Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(),
Err(err) => {
report_lit_error(&cx.sess.parse_sess, err, token_lit, span);
}
Err(()) => {}
}
for err in errs {
// njn: change to report_lit_error*s*
report_lit_error(&cx.sess.parse_sess, err, token_lit, span);
}
}

Expand All @@ -80,7 +83,8 @@ fn handle_array_element(
*has_errors = true;
None
}
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
// njn: e.g. why doesn't this one have report_lit_error?
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 {
Ok(ast::LitKind::Int(
val,
ast::LitIntType::Unsuffixed | ast::LitIntType::Unsigned(ast::UintTy::U8),
Expand Down Expand Up @@ -141,7 +145,7 @@ pub fn expand_concat_bytes(
ast::ExprKind::Repeat(expr, count) => {
if let ast::ExprKind::Lit(token_lit) = count.value.kind
&& let Ok(ast::LitKind::Int(count_val, _)) =
ast::LitKind::from_token_lit(token_lit)
ast::LitKind::from_token_lit(token_lit).0
{
if let Some(elem) =
handle_array_element(cx, &mut has_errors, &mut missing_literals, expr)
Expand All @@ -154,7 +158,7 @@ pub fn expand_concat_bytes(
cx.emit_err(errors::ConcatBytesBadRepeat { span: count.value.span });
}
}
&ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
&ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 {
Ok(ast::LitKind::Byte(val)) => {
accumulator.push(val);
}
Expand Down
40 changes: 22 additions & 18 deletions compiler/rustc_expand/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1235,26 +1235,30 @@ pub fn expr_to_spanned_string<'a>(
let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr();

Err(match expr.kind {
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
Ok(ast::LitKind::ByteStr(..)) => {
let mut err = cx.struct_span_err(expr.span, err_msg);
let span = expr.span.shrink_to_lo();
err.span_suggestion(
span.with_hi(span.lo() + BytePos(1)),
"consider removing the leading `b`",
"",
Applicability::MaybeIncorrect,
);
Some((err, true))
}
Ok(ast::LitKind::Err) => None,
Err(err) => {
ast::ExprKind::Lit(token_lit) => {
let (lit_kind, errs) = ast::LitKind::from_token_lit(token_lit);
let res = match lit_kind {
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
Ok(ast::LitKind::ByteStr(..)) => {
let mut err = cx.struct_span_err(expr.span, err_msg);
let span = expr.span.shrink_to_lo();
err.span_suggestion(
span.with_hi(span.lo() + BytePos(1)),
"consider removing the leading `b`",
"",
Applicability::MaybeIncorrect,
);
Some((err, true))
}
Ok(ast::LitKind::Err) => None,
Err(()) => None,
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
};
for err in errs {
parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span);
None
}
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
},
res
}
ast::ExprKind::Err => None,
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
})
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_expand/src/mbe/metavar_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ fn parse_depth<'sess>(
.span_diagnostic
.struct_span_err(span, "meta-variable expression depth must be a literal"));
};
if let Ok(lit_kind) = LitKind::from_token_lit(*lit)
if let Ok(lit_kind) = LitKind::from_token_lit(*lit).0
&& let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind
&& let Ok(n_usize) = usize::try_from(n_u128)
{
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ where
// them in the range computation.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
let res: Result<T, EscapeError> = match c {
'\\' => {
match chars.clone().next() {
Some('\n') => {
Expand All @@ -361,7 +361,7 @@ where
_ => scan_escape::<T>(&mut chars, mode),
}
}
'"' => Err(EscapeError::EscapeOnlyChar),
'"' => Err(EscapeError::EscapeOnlyChar), // njn: is this ever hit?
'\r' => Err(EscapeError::BareCarriageReturn),
_ => ascii_check(c, chars_should_be_ascii).map(Into::into),
};
Expand Down
48 changes: 24 additions & 24 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ use rustc_span::{edition::Edition, BytePos, Pos, Span};

mod diagnostics;
mod tokentrees;
mod unescape_error_reporting;
pub(crate) mod unescape_error_reporting;
mod unicode_chars;

use unescape_error_reporting::{emit_unescape_error, escaped_char};
use unescape_error_reporting::escaped_char;

// This type is used a lot. Make sure it doesn't unintentionally get bigger.
//
Expand Down Expand Up @@ -696,47 +696,47 @@ impl<'a> StringReader<'a> {
fn cook_common(
&self,
kind: token::LitKind,
mode: Mode,
mode: Mode, // njn: remove
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
) -> (token::LitKind, Symbol) {
let mut has_fatal_err = false;
let content_start = start + BytePos(prefix_len);
let content_end = end - BytePos(postfix_len);
let lit_content = self.str_from_to(content_start, content_end);
#[allow(unused)]
unescape(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
// njn: temp for comparison, remove eventually
if let Err(err) = result {
// `span` is substring expressed as a span
// `range` is substring expressed as indices
let span_with_quotes = self.mk_sp(start, end);
let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start);
let span = self.mk_sp(lo, hi);
if err.is_fatal() {
has_fatal_err = true;
}
emit_unescape_error(
&self.sess.span_diagnostic,
lit_content,
span_with_quotes,
span,
mode,
range,
err,
);
//if err.is_fatal() {
// //has_fatal_err = true;
//}
// eprintln!(
// "earl_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}",
// lit_content, span_with_quotes, span, mode, range, err
// );
// crate::lexer::unescape_error_reporting::emit_unescape_error(
// &self.sess.span_diagnostic,
// lit_content,
// span_with_quotes,
// span,
// mode,
// range,
// err,
// );
}
});

// We normally exclude the quotes for the symbol, but for errors we
// include it because it results in clearer error messages.
if !has_fatal_err {
(kind, Symbol::intern(lit_content))
} else {
(token::Err, self.symbol_from_to(start, end))
}
(kind, Symbol::intern(lit_content))
}

fn cook_quoted(
Expand Down
6 changes: 3 additions & 3 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ pub(crate) fn emit_unescape_error(
lit: &str,
// full span of the literal, including quotes
span_with_quotes: Span,
// interior span of the literal, without quotes
// interior span of the literal, without quotes // njn: is that wrong?
span: Span,
mode: Mode,
// range of the error inside `lit`
range: Range<usize>,
error: EscapeError,
) {
debug!(
"emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
lit, span_with_quotes, mode, range, error
"emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}",
lit, span_with_quotes, span, mode, range, error
);
let last_char = || {
let c = lit[range.clone()].chars().next_back().unwrap();
Expand Down
Loading

0 comments on commit 536b970

Please sign in to comment.