From 97d0659ce3e3977245ca202770078a9df60849dd Mon Sep 17 00:00:00 2001 From: Brent Westbrook <36778786+ntBre@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:50:50 -0500 Subject: [PATCH] Pass `ParserOptions` to the parser (#16220) ## Summary This is part of the preparation for detecting syntax errors in the parser from /~https://github.com/astral-sh/ruff/pull/16090/. As suggested in [this comment](/~https://github.com/astral-sh/ruff/pull/16090/#discussion_r1953084509), I started working on a `ParseOptions` struct that could be stored in the parser. For this initial refactor, I only made it hold the existing `Mode` option, but for syntax errors, we will also need it to have a `PythonVersion`. For that use case, I'm picturing something like a `ParseOptions::with_python_version` method, so you can extend the current calls to something like ```rust ParseOptions::from(mode).with_python_version(settings.target_version) ``` But I thought it was worth adding `ParseOptions` alone without changing any other behavior first. Most of the diff is just updating call sites taking `Mode` to take `ParseOptions::from(Mode)` or those taking `PySourceType`s to take `ParseOptions::from(PySourceType)`. The interesting changes are in the new `parser/options.rs` file and smaller parts of `parser/mod.rs` and `ruff_python_parser/src/lib.rs`. ## Test Plan Existing tests, this should not change any behavior. --- crates/ruff_benchmark/benches/formatter.rs | 6 +-- crates/ruff_dev/src/print_ast.rs | 5 +- crates/ruff_graph/src/lib.rs | 4 +- crates/ruff_linter/src/message/mod.rs | 4 +- .../tests/source_order.rs | 4 +- .../tests/visitor.rs | 4 +- crates/ruff_python_codegen/src/generator.rs | 5 +- crates/ruff_python_codegen/src/stylist.rs | 4 +- crates/ruff_python_formatter/src/cli.rs | 4 +- .../ruff_python_formatter/src/comments/mod.rs | 6 +-- crates/ruff_python_formatter/src/lib.rs | 8 ++-- crates/ruff_python_formatter/src/range.rs | 4 +- .../src/string/docstring.rs | 10 ++-- .../ruff_python_formatter/tests/fixtures.rs | 6 +-- crates/ruff_python_parser/src/lib.rs | 47 ++++++++++--------- .../src/parser/expression.rs | 2 +- crates/ruff_python_parser/src/parser/mod.rs | 23 +++++---- .../ruff_python_parser/src/parser/options.rs | 41 ++++++++++++++++ .../src/parser/statement.rs | 4 +- crates/ruff_python_parser/src/parser/tests.rs | 8 ++-- crates/ruff_python_parser/tests/fixtures.rs | 8 ++-- .../tests/block_comments.rs | 16 +++---- .../tests/simple_tokenizer.rs | 4 +- crates/ruff_wasm/src/lib.rs | 10 ++-- .../red_knot_check_invalid_syntax.rs | 4 +- 25 files changed, 148 insertions(+), 93 deletions(-) create mode 100644 crates/ruff_python_parser/src/parser/options.rs diff --git a/crates/ruff_benchmark/benches/formatter.rs b/crates/ruff_benchmark/benches/formatter.rs index 1320cd7228d17..69cb238f2ad3e 100644 --- a/crates/ruff_benchmark/benches/formatter.rs +++ b/crates/ruff_benchmark/benches/formatter.rs @@ -8,7 +8,7 @@ use ruff_benchmark::{ TestCase, LARGE_DATASET, NUMPY_CTYPESLIB, NUMPY_GLOBALS, PYDANTIC_TYPES, UNICODE_PYPINYIN, }; use ruff_python_formatter::{format_module_ast, PreviewMode, PyFormatOptions}; -use ruff_python_parser::{parse, Mode}; +use ruff_python_parser::{parse, Mode, ParseOptions}; use ruff_python_trivia::CommentRanges; #[cfg(target_os = "windows")] @@ -48,8 +48,8 @@ fn benchmark_formatter(criterion: &mut Criterion) { &case, |b, case| { // Parse the source. - let parsed = - parse(case.code(), Mode::Module).expect("Input should be a valid Python code"); + let parsed = parse(case.code(), ParseOptions::from(Mode::Module)) + .expect("Input should be a valid Python code"); let comment_ranges = CommentRanges::from(parsed.tokens()); diff --git a/crates/ruff_dev/src/print_ast.rs b/crates/ruff_dev/src/print_ast.rs index 35206ca45ec13..b3682e84f60a0 100644 --- a/crates/ruff_dev/src/print_ast.rs +++ b/crates/ruff_dev/src/print_ast.rs @@ -7,7 +7,7 @@ use anyhow::Result; use ruff_linter::source_kind::SourceKind; use ruff_python_ast::PySourceType; -use ruff_python_parser::{parse, AsMode}; +use ruff_python_parser::{parse, ParseOptions}; #[derive(clap::Args)] pub(crate) struct Args { @@ -24,7 +24,8 @@ pub(crate) fn main(args: &Args) -> Result<()> { args.file.display() ) })?; - let python_ast = parse(source_kind.source_code(), source_type.as_mode())?.into_syntax(); + let python_ast = + parse(source_kind.source_code(), ParseOptions::from(source_type))?.into_syntax(); println!("{python_ast:#?}"); Ok(()) } diff --git a/crates/ruff_graph/src/lib.rs b/crates/ruff_graph/src/lib.rs index 0d6a6669bafb6..327b07963471e 100644 --- a/crates/ruff_graph/src/lib.rs +++ b/crates/ruff_graph/src/lib.rs @@ -4,7 +4,7 @@ use anyhow::Result; use ruff_db::system::{SystemPath, SystemPathBuf}; use ruff_python_ast::helpers::to_module_path; -use ruff_python_parser::{parse, Mode}; +use ruff_python_parser::{parse, Mode, ParseOptions}; use crate::collector::Collector; pub use crate::db::ModuleDb; @@ -30,7 +30,7 @@ impl ModuleImports { ) -> Result { // Read and parse the source code. let source = std::fs::read_to_string(path)?; - let parsed = parse(&source, Mode::Module)?; + let parsed = parse(&source, ParseOptions::from(Mode::Module))?; let module_path = package.and_then(|package| to_module_path(package.as_std_path(), path.as_std_path())); diff --git a/crates/ruff_linter/src/message/mod.rs b/crates/ruff_linter/src/message/mod.rs index 9ce54301175b1..52de250c4c32d 100644 --- a/crates/ruff_linter/src/message/mod.rs +++ b/crates/ruff_linter/src/message/mod.rs @@ -309,7 +309,7 @@ mod tests { use ruff_diagnostics::{Diagnostic, DiagnosticKind, Edit, Fix}; use ruff_notebook::NotebookIndex; - use ruff_python_parser::{parse_unchecked, Mode}; + use ruff_python_parser::{parse_unchecked, Mode, ParseOptions}; use ruff_source_file::{OneIndexed, SourceFileBuilder}; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -325,7 +325,7 @@ if call(foo "; let locator = Locator::new(source); let source_file = SourceFileBuilder::new("syntax_errors.py", source).finish(); - parse_unchecked(source, Mode::Module) + parse_unchecked(source, ParseOptions::from(Mode::Module)) .errors() .iter() .map(|parse_error| { diff --git a/crates/ruff_python_ast_integration_tests/tests/source_order.rs b/crates/ruff_python_ast_integration_tests/tests/source_order.rs index fdfce78c38877..a2f7574d7e4a2 100644 --- a/crates/ruff_python_ast_integration_tests/tests/source_order.rs +++ b/crates/ruff_python_ast_integration_tests/tests/source_order.rs @@ -4,7 +4,7 @@ use insta::assert_snapshot; use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, BoolOp, CmpOp, Operator, Singleton, UnaryOp}; -use ruff_python_parser::{parse, Mode}; +use ruff_python_parser::{parse, Mode, ParseOptions}; #[test] fn function_arguments() { @@ -147,7 +147,7 @@ fn f_strings() { } fn trace_source_order_visitation(source: &str) -> String { - let parsed = parse(source, Mode::Module).unwrap(); + let parsed = parse(source, ParseOptions::from(Mode::Module)).unwrap(); let mut visitor = RecordVisitor::default(); visitor.visit_mod(parsed.syntax()); diff --git a/crates/ruff_python_ast_integration_tests/tests/visitor.rs b/crates/ruff_python_ast_integration_tests/tests/visitor.rs index 128d0c3f12184..8bfe4851f9279 100644 --- a/crates/ruff_python_ast_integration_tests/tests/visitor.rs +++ b/crates/ruff_python_ast_integration_tests/tests/visitor.rs @@ -13,7 +13,7 @@ use ruff_python_ast::{ Expr, FString, FStringElement, Keyword, MatchCase, Operator, Parameter, Parameters, Pattern, Stmt, StringLiteral, TypeParam, UnaryOp, WithItem, }; -use ruff_python_parser::{parse, Mode}; +use ruff_python_parser::{parse, Mode, ParseOptions}; #[test] fn function_arguments() { @@ -156,7 +156,7 @@ fn f_strings() { } fn trace_visitation(source: &str) -> String { - let parsed = parse(source, Mode::Module).unwrap(); + let parsed = parse(source, ParseOptions::from(Mode::Module)).unwrap(); let mut visitor = RecordVisitor::default(); walk_module(&mut visitor, parsed.syntax()); diff --git a/crates/ruff_python_codegen/src/generator.rs b/crates/ruff_python_codegen/src/generator.rs index 081a2dbeace25..6df104a02f0b0 100644 --- a/crates/ruff_python_codegen/src/generator.rs +++ b/crates/ruff_python_codegen/src/generator.rs @@ -1435,7 +1435,7 @@ impl<'a> Generator<'a> { #[cfg(test)] mod tests { use ruff_python_ast::{Mod, ModModule}; - use ruff_python_parser::{self, parse_module, Mode}; + use ruff_python_parser::{self, parse_module, Mode, ParseOptions}; use ruff_source_file::LineEnding; use crate::stylist::Indentation; @@ -1467,7 +1467,8 @@ mod tests { fn jupyter_round_trip(contents: &str) -> String { let indentation = Indentation::default(); let line_ending = LineEnding::default(); - let parsed = ruff_python_parser::parse(contents, Mode::Ipython).unwrap(); + let parsed = + ruff_python_parser::parse(contents, ParseOptions::from(Mode::Ipython)).unwrap(); let Mod::Module(ModModule { body, .. }) = parsed.into_syntax() else { panic!("Source code didn't return ModModule") }; diff --git a/crates/ruff_python_codegen/src/stylist.rs b/crates/ruff_python_codegen/src/stylist.rs index 20217c279e064..582f1e5ff8b97 100644 --- a/crates/ruff_python_codegen/src/stylist.rs +++ b/crates/ruff_python_codegen/src/stylist.rs @@ -148,7 +148,7 @@ impl Deref for Indentation { #[cfg(test)] mod tests { - use ruff_python_parser::{parse_module, parse_unchecked, Mode}; + use ruff_python_parser::{parse_module, parse_unchecked, Mode, ParseOptions}; use ruff_source_file::{find_newline, LineEnding}; use super::{Indentation, Quote, Stylist}; @@ -215,7 +215,7 @@ x = (  3, ) "; - let parsed = parse_unchecked(contents, Mode::Module); + let parsed = parse_unchecked(contents, ParseOptions::from(Mode::Module)); assert_eq!( Stylist::from_tokens(parsed.tokens(), contents).indentation(), &Indentation(" ".to_string()) diff --git a/crates/ruff_python_formatter/src/cli.rs b/crates/ruff_python_formatter/src/cli.rs index dddd0db5a3a9b..b88d8e20bbef4 100644 --- a/crates/ruff_python_formatter/src/cli.rs +++ b/crates/ruff_python_formatter/src/cli.rs @@ -7,7 +7,7 @@ use clap::{command, Parser, ValueEnum}; use ruff_formatter::SourceCode; use ruff_python_ast::PySourceType; -use ruff_python_parser::{parse, AsMode}; +use ruff_python_parser::{parse, ParseOptions}; use ruff_python_trivia::CommentRanges; use ruff_text_size::Ranged; @@ -48,7 +48,7 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Re let source_type = PySourceType::from(source_path); // Parse the AST. - let parsed = parse(source, source_type.as_mode()).context("Syntax error in input")?; + let parsed = parse(source, ParseOptions::from(source_type)).context("Syntax error in input")?; let options = PyFormatOptions::from_extension(source_path) .with_preview(if cli.preview { diff --git a/crates/ruff_python_formatter/src/comments/mod.rs b/crates/ruff_python_formatter/src/comments/mod.rs index 40b7f47005788..ce2658aaaa109 100644 --- a/crates/ruff_python_formatter/src/comments/mod.rs +++ b/crates/ruff_python_formatter/src/comments/mod.rs @@ -514,7 +514,7 @@ mod tests { use ruff_formatter::SourceCode; use ruff_python_ast::{Mod, PySourceType}; - use ruff_python_parser::{parse, AsMode, Parsed}; + use ruff_python_parser::{parse, ParseOptions, Parsed}; use ruff_python_trivia::CommentRanges; use crate::comments::Comments; @@ -529,8 +529,8 @@ mod tests { fn from_code(source: &'a str) -> Self { let source_code = SourceCode::new(source); let source_type = PySourceType::Python; - let parsed = - parse(source, source_type.as_mode()).expect("Expect source to be valid Python"); + let parsed = parse(source, ParseOptions::from(source_type)) + .expect("Expect source to be valid Python"); let comment_ranges = CommentRanges::from(parsed.tokens()); CommentsTestCase { diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 86c4236648a92..c6f265792b5a4 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -5,7 +5,7 @@ pub use range::format_range; use ruff_formatter::prelude::*; use ruff_formatter::{format, write, FormatError, Formatted, PrintError, Printed, SourceCode}; use ruff_python_ast::{AnyNodeRef, Mod}; -use ruff_python_parser::{parse, AsMode, ParseError, Parsed}; +use ruff_python_parser::{parse, ParseError, ParseOptions, Parsed}; use ruff_python_trivia::CommentRanges; use ruff_text_size::Ranged; @@ -112,7 +112,7 @@ pub fn format_module_source( options: PyFormatOptions, ) -> Result { let source_type = options.source_type(); - let parsed = parse(source, source_type.as_mode())?; + let parsed = parse(source, ParseOptions::from(source_type))?; let comment_ranges = CommentRanges::from(parsed.tokens()); let formatted = format_module_ast(&parsed, &comment_ranges, source, options)?; Ok(formatted.print()?) @@ -154,7 +154,7 @@ mod tests { use insta::assert_snapshot; use ruff_python_ast::PySourceType; - use ruff_python_parser::{parse, AsMode}; + use ruff_python_parser::{parse, ParseOptions}; use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextRange, TextSize}; @@ -199,7 +199,7 @@ def main() -> None: // Parse the AST. let source_path = "code_inline.py"; - let parsed = parse(source, source_type.as_mode()).unwrap(); + let parsed = parse(source, ParseOptions::from(source_type)).unwrap(); let comment_ranges = CommentRanges::from(parsed.tokens()); let options = PyFormatOptions::from_extension(Path::new(source_path)); let formatted = format_module_ast(&parsed, &comment_ranges, source, options).unwrap(); diff --git a/crates/ruff_python_formatter/src/range.rs b/crates/ruff_python_formatter/src/range.rs index c0f5cfa0c3294..07bf957968cfb 100644 --- a/crates/ruff_python_formatter/src/range.rs +++ b/crates/ruff_python_formatter/src/range.rs @@ -6,7 +6,7 @@ use ruff_formatter::{ }; use ruff_python_ast::visitor::source_order::{walk_body, SourceOrderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, Stmt, StmtMatch, StmtTry}; -use ruff_python_parser::{parse, AsMode}; +use ruff_python_parser::{parse, ParseOptions}; use ruff_python_trivia::{ indentation_at_offset, BackwardsTokenizer, CommentRanges, SimpleToken, SimpleTokenKind, }; @@ -73,7 +73,7 @@ pub fn format_range( assert_valid_char_boundaries(range, source); - let parsed = parse(source, options.source_type().as_mode())?; + let parsed = parse(source, ParseOptions::from(options.source_type()))?; let source_code = SourceCode::new(source); let comment_ranges = CommentRanges::from(parsed.tokens()); let comments = Comments::from_ast(parsed.syntax(), source_code, &comment_ranges); diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index 5d0a923752bde..ef6aaa190242f 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -11,6 +11,7 @@ use regex::Regex; use ruff_formatter::printer::SourceMapGeneration; use ruff_python_ast::{str::Quote, AnyStringFlags, StringFlags}; +use ruff_python_parser::ParseOptions; use ruff_python_trivia::CommentRanges; use { ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed}, @@ -492,8 +493,6 @@ impl<'src> DocstringLinePrinter<'_, '_, '_, 'src> { &mut self, kind: &mut CodeExampleKind<'_>, ) -> FormatResult>>> { - use ruff_python_parser::AsMode; - let line_width = match self.f.options().docstring_code_line_width() { DocstringCodeLineWidth::Fixed(width) => width, DocstringCodeLineWidth::Dynamic => { @@ -570,7 +569,8 @@ impl<'src> DocstringLinePrinter<'_, '_, '_, 'src> { std::format!(r#""""{}""""#, printed.as_code()) } }; - let result = ruff_python_parser::parse(&wrapped, self.f.options().source_type().as_mode()); + let result = + ruff_python_parser::parse(&wrapped, ParseOptions::from(self.f.options().source_type())); // If the resulting code is not valid, then reset and pass through // the docstring lines as-is. if result.is_err() { @@ -1580,10 +1580,8 @@ fn docstring_format_source( docstring_quote_style: Quote, source: &str, ) -> Result { - use ruff_python_parser::AsMode; - let source_type = options.source_type(); - let parsed = ruff_python_parser::parse(source, source_type.as_mode())?; + let parsed = ruff_python_parser::parse(source, ParseOptions::from(source_type))?; let comment_ranges = CommentRanges::from(parsed.tokens()); let source_code = ruff_formatter::SourceCode::new(source); let comments = crate::Comments::from_ast(parsed.syntax(), source_code, &comment_ranges); diff --git a/crates/ruff_python_formatter/tests/fixtures.rs b/crates/ruff_python_formatter/tests/fixtures.rs index 9b94b19c788dc..75bef10def55c 100644 --- a/crates/ruff_python_formatter/tests/fixtures.rs +++ b/crates/ruff_python_formatter/tests/fixtures.rs @@ -11,7 +11,7 @@ use crate::normalizer::Normalizer; use ruff_formatter::FormatOptions; use ruff_python_ast::comparable::ComparableMod; use ruff_python_formatter::{format_module_source, format_range, PreviewMode, PyFormatOptions}; -use ruff_python_parser::{parse, AsMode}; +use ruff_python_parser::{parse, ParseOptions}; use ruff_source_file::{LineIndex, OneIndexed}; use ruff_text_size::{TextRange, TextSize}; @@ -393,14 +393,14 @@ fn ensure_unchanged_ast( let source_type = options.source_type(); // Parse the unformatted code. - let mut unformatted_ast = parse(unformatted_code, source_type.as_mode()) + let mut unformatted_ast = parse(unformatted_code, ParseOptions::from(source_type)) .expect("Unformatted code to be valid syntax") .into_syntax(); Normalizer.visit_module(&mut unformatted_ast); let unformatted_ast = ComparableMod::from(&unformatted_ast); // Parse the formatted code. - let mut formatted_ast = parse(formatted_code, source_type.as_mode()) + let mut formatted_ast = parse(formatted_code, ParseOptions::from(source_type)) .expect("Formatted code to be valid syntax") .into_syntax(); Normalizer.visit_module(&mut formatted_ast); diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 53f96a8d73109..61db67ddad4f4 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -68,6 +68,7 @@ use std::iter::FusedIterator; use std::ops::Deref; pub use crate::error::{FStringErrorType, LexicalErrorType, ParseError, ParseErrorType}; +pub use crate::parser::ParseOptions; pub use crate::token::{Token, TokenKind}; use crate::parser::Parser; @@ -110,7 +111,7 @@ pub mod typing; /// assert!(module.is_ok()); /// ``` pub fn parse_module(source: &str) -> Result, ParseError> { - Parser::new(source, Mode::Module) + Parser::new(source, ParseOptions::from(Mode::Module)) .parse() .try_into_module() .unwrap() @@ -133,7 +134,7 @@ pub fn parse_module(source: &str) -> Result, ParseError> { /// assert!(expr.is_ok()); /// ``` pub fn parse_expression(source: &str) -> Result, ParseError> { - Parser::new(source, Mode::Expression) + Parser::new(source, ParseOptions::from(Mode::Expression)) .parse() .try_into_expression() .unwrap() @@ -161,7 +162,7 @@ pub fn parse_expression_range( range: TextRange, ) -> Result, ParseError> { let source = &source[..range.end().to_usize()]; - Parser::new_starts_at(source, Mode::Expression, range.start()) + Parser::new_starts_at(source, range.start(), ParseOptions::from(Mode::Expression)) .parse() .try_into_expression() .unwrap() @@ -187,8 +188,12 @@ pub fn parse_parenthesized_expression_range( range: TextRange, ) -> Result, ParseError> { let source = &source[..range.end().to_usize()]; - let parsed = - Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse(); + let parsed = Parser::new_starts_at( + source, + range.start(), + ParseOptions::from(Mode::ParenthesizedExpression), + ) + .parse(); parsed.try_into_expression().unwrap().into_result() } @@ -227,11 +232,11 @@ pub fn parse_string_annotation( } } -/// Parse the given Python source code using the specified [`Mode`]. +/// Parse the given Python source code using the specified [`ParseOptions`]. /// -/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied, -/// it can be used to parse a single expression, a full Python program, an interactive expression -/// or a Python program containing IPython escape commands. +/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied +/// via the [`ParseOptions`], it can be used to parse a single expression, a full Python program, +/// an interactive expression or a Python program containing IPython escape commands. /// /// # Example /// @@ -239,16 +244,16 @@ pub fn parse_string_annotation( /// parsing: /// /// ``` -/// use ruff_python_parser::{Mode, parse}; +/// use ruff_python_parser::{parse, Mode, ParseOptions}; /// -/// let parsed = parse("1 + 2", Mode::Expression); +/// let parsed = parse("1 + 2", ParseOptions::from(Mode::Expression)); /// assert!(parsed.is_ok()); /// ``` /// /// Alternatively, we can parse a full Python program consisting of multiple lines: /// /// ``` -/// use ruff_python_parser::{Mode, parse}; +/// use ruff_python_parser::{parse, Mode, ParseOptions}; /// /// let source = r#" /// class Greeter: @@ -256,39 +261,39 @@ pub fn parse_string_annotation( /// def greet(self): /// print("Hello, world!") /// "#; -/// let parsed = parse(source, Mode::Module); +/// let parsed = parse(source, ParseOptions::from(Mode::Module)); /// assert!(parsed.is_ok()); /// ``` /// /// Additionally, we can parse a Python program containing IPython escapes: /// /// ``` -/// use ruff_python_parser::{Mode, parse}; +/// use ruff_python_parser::{parse, Mode, ParseOptions}; /// /// let source = r#" /// %timeit 1 + 2 /// ?str.replace /// !ls /// "#; -/// let parsed = parse(source, Mode::Ipython); +/// let parsed = parse(source, ParseOptions::from(Mode::Ipython)); /// assert!(parsed.is_ok()); /// ``` -pub fn parse(source: &str, mode: Mode) -> Result, ParseError> { - parse_unchecked(source, mode).into_result() +pub fn parse(source: &str, options: ParseOptions) -> Result, ParseError> { + parse_unchecked(source, options).into_result() } -/// Parse the given Python source code using the specified [`Mode`]. +/// Parse the given Python source code using the specified [`ParseOptions`]. /// /// This is same as the [`parse`] function except that it doesn't check for any [`ParseError`] /// and returns the [`Parsed`] as is. -pub fn parse_unchecked(source: &str, mode: Mode) -> Parsed { - Parser::new(source, mode).parse() +pub fn parse_unchecked(source: &str, options: ParseOptions) -> Parsed { + Parser::new(source, options).parse() } /// Parse the given Python source code using the specified [`PySourceType`]. pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed { // SAFETY: Safe because `PySourceType` always parses to a `ModModule` - Parser::new(source, source_type.as_mode()) + Parser::new(source, ParseOptions::from(source_type)) .parse() .try_into_module() .unwrap() diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index cc2e38e8d9e3a..31d8b363d4170 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -2265,7 +2265,7 @@ impl<'src> Parser<'src> { value, }; - if self.mode != Mode::Ipython { + if self.options.mode != Mode::Ipython { self.add_error(ParseErrorType::UnexpectedIpythonEscapeCommand, &command); } diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index d4528c8c3c4a0..951727667f0ce 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -13,8 +13,11 @@ use crate::token_source::{TokenSource, TokenSourceCheckpoint}; use crate::{Mode, ParseError, ParseErrorType, TokenKind}; use crate::{Parsed, Tokens}; +pub use crate::parser::options::ParseOptions; + mod expression; mod helpers; +mod options; mod pattern; mod progress; mod recovery; @@ -32,8 +35,8 @@ pub(crate) struct Parser<'src> { /// Stores all the syntax errors found during the parsing. errors: Vec, - /// Specify the mode in which the code will be parsed. - mode: Mode, + /// Options for how the code will be parsed. + options: ParseOptions, /// The ID of the current token. This is used to track the progress of the parser /// to avoid infinite loops when the parser is stuck. @@ -51,16 +54,20 @@ pub(crate) struct Parser<'src> { impl<'src> Parser<'src> { /// Create a new parser for the given source code. - pub(crate) fn new(source: &'src str, mode: Mode) -> Self { - Parser::new_starts_at(source, mode, TextSize::new(0)) + pub(crate) fn new(source: &'src str, options: ParseOptions) -> Self { + Parser::new_starts_at(source, TextSize::new(0), options) } /// Create a new parser for the given source code which starts parsing at the given offset. - pub(crate) fn new_starts_at(source: &'src str, mode: Mode, start_offset: TextSize) -> Self { - let tokens = TokenSource::from_source(source, mode, start_offset); + pub(crate) fn new_starts_at( + source: &'src str, + start_offset: TextSize, + options: ParseOptions, + ) -> Self { + let tokens = TokenSource::from_source(source, options.mode, start_offset); Parser { - mode, + options, source, errors: Vec::new(), tokens, @@ -73,7 +80,7 @@ impl<'src> Parser<'src> { /// Consumes the [`Parser`] and returns the parsed [`Parsed`]. pub(crate) fn parse(mut self) -> Parsed { - let syntax = match self.mode { + let syntax = match self.options.mode { Mode::Expression | Mode::ParenthesizedExpression => { Mod::Expression(self.parse_single_expression()) } diff --git a/crates/ruff_python_parser/src/parser/options.rs b/crates/ruff_python_parser/src/parser/options.rs new file mode 100644 index 0000000000000..27a87a32ba4fb --- /dev/null +++ b/crates/ruff_python_parser/src/parser/options.rs @@ -0,0 +1,41 @@ +use ruff_python_ast::PySourceType; + +use crate::{AsMode, Mode}; + +/// Options for controlling how a source file is parsed. +/// +/// You can construct a [`ParseOptions`] directly from a [`Mode`]: +/// +/// ``` +/// use ruff_python_parser::{Mode, ParseOptions}; +/// +/// let options = ParseOptions::from(Mode::Module); +/// ``` +/// +/// or from a [`PySourceType`] +/// +/// ``` +/// use ruff_python_ast::PySourceType; +/// use ruff_python_parser::ParseOptions; +/// +/// let options = ParseOptions::from(PySourceType::Python); +/// ``` +#[derive(Debug)] +pub struct ParseOptions { + /// Specify the mode in which the code will be parsed. + pub(crate) mode: Mode, +} + +impl From for ParseOptions { + fn from(mode: Mode) -> Self { + Self { mode } + } +} + +impl From for ParseOptions { + fn from(source_type: PySourceType) -> Self { + Self { + mode: source_type.as_mode(), + } + } +} diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index bfa567da41495..e76fc08915e1f 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -304,7 +304,7 @@ impl<'src> Parser<'src> { op, start, )) - } else if self.mode == Mode::Ipython && self.at(TokenKind::Question) { + } else if self.options.mode == Mode::Ipython && self.at(TokenKind::Question) { Stmt::IpyEscapeCommand( self.parse_ipython_help_end_escape_command_statement(&parsed_expr), ) @@ -932,7 +932,7 @@ impl<'src> Parser<'src> { }; let range = self.node_range(start); - if self.mode != Mode::Ipython { + if self.options.mode != Mode::Ipython { self.add_error(ParseErrorType::UnexpectedIpythonEscapeCommand, range); } diff --git a/crates/ruff_python_parser/src/parser/tests.rs b/crates/ruff_python_parser/src/parser/tests.rs index 09bc41e7f7b66..645ec318d26c8 100644 --- a/crates/ruff_python_parser/src/parser/tests.rs +++ b/crates/ruff_python_parser/src/parser/tests.rs @@ -1,11 +1,11 @@ -use crate::{parse, parse_expression, parse_module, Mode}; +use crate::{parse, parse_expression, parse_module, Mode, ParseOptions}; #[test] fn test_modes() { let source = "a[0][1][2][3][4]"; - assert!(parse(source, Mode::Expression).is_ok()); - assert!(parse(source, Mode::Module).is_ok()); + assert!(parse(source, ParseOptions::from(Mode::Expression)).is_ok()); + assert!(parse(source, ParseOptions::from(Mode::Module)).is_ok()); } #[test] @@ -129,7 +129,7 @@ foo.bar[0].baz[1]?? foo.bar[0].baz[2].egg?? " .trim(), - Mode::Ipython, + ParseOptions::from(Mode::Ipython), ) .unwrap(); insta::assert_debug_snapshot!(parsed.syntax()); diff --git a/crates/ruff_python_parser/tests/fixtures.rs b/crates/ruff_python_parser/tests/fixtures.rs index debe45415a461..32ff8ce60f71e 100644 --- a/crates/ruff_python_parser/tests/fixtures.rs +++ b/crates/ruff_python_parser/tests/fixtures.rs @@ -6,7 +6,7 @@ use std::path::Path; use ruff_annotate_snippets::{Level, Renderer, Snippet}; use ruff_python_ast::visitor::source_order::{walk_module, SourceOrderVisitor, TraversalSignal}; use ruff_python_ast::{AnyNodeRef, Mod}; -use ruff_python_parser::{parse_unchecked, Mode, ParseErrorType, Token}; +use ruff_python_parser::{parse_unchecked, Mode, ParseErrorType, ParseOptions, Token}; use ruff_source_file::{LineIndex, OneIndexed, SourceCode}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -34,7 +34,7 @@ fn inline_err() { /// Snapshots the AST. fn test_valid_syntax(input_path: &Path) { let source = fs::read_to_string(input_path).expect("Expected test file to exist"); - let parsed = parse_unchecked(&source, Mode::Module); + let parsed = parse_unchecked(&source, ParseOptions::from(Mode::Module)); if !parsed.is_valid() { let line_index = LineIndex::from_source_text(&source); @@ -78,7 +78,7 @@ fn test_valid_syntax(input_path: &Path) { /// Snapshots the AST and the error messages. fn test_invalid_syntax(input_path: &Path) { let source = fs::read_to_string(input_path).expect("Expected test file to exist"); - let parsed = parse_unchecked(&source, Mode::Module); + let parsed = parse_unchecked(&source, ParseOptions::from(Mode::Module)); assert!( !parsed.is_valid(), @@ -130,7 +130,7 @@ f'{' f'{foo!r' "; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); println!("AST:\n----\n{:#?}", parsed.syntax()); println!("Tokens:\n-------\n{:#?}", parsed.tokens()); diff --git a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs index 13bec0bc43da6..a39bae973f800 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs @@ -1,4 +1,4 @@ -use ruff_python_parser::{parse_unchecked, Mode}; +use ruff_python_parser::{parse_unchecked, Mode, ParseOptions}; use ruff_python_trivia::CommentRanges; use ruff_text_size::TextSize; @@ -6,7 +6,7 @@ use ruff_text_size::TextSize; fn block_comments_two_line_block_at_start() { // arrange let source = "# line 1\n# line 2\n"; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); // act @@ -20,7 +20,7 @@ fn block_comments_two_line_block_at_start() { fn block_comments_indented_block() { // arrange let source = " # line 1\n # line 2\n"; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); // act @@ -34,7 +34,7 @@ fn block_comments_indented_block() { fn block_comments_single_line_is_not_a_block() { // arrange let source = "\n"; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); // act @@ -48,7 +48,7 @@ fn block_comments_single_line_is_not_a_block() { fn block_comments_lines_with_code_not_a_block() { // arrange let source = "x = 1 # line 1\ny = 2 # line 2\n"; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); // act @@ -62,7 +62,7 @@ fn block_comments_lines_with_code_not_a_block() { fn block_comments_sequential_lines_not_in_block() { // arrange let source = " # line 1\n # line 2\n"; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); // act @@ -81,7 +81,7 @@ fn block_comments_lines_in_triple_quotes_not_a_block() { # line 2 """ "#; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); // act @@ -117,7 +117,7 @@ y = 2 # do not form a block comment # therefore do not form a block comment """ "#; - let parsed = parse_unchecked(source, Mode::Module); + let parsed = parse_unchecked(source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); // act diff --git a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs index b609294855be2..1041a9189cb4c 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs @@ -1,6 +1,6 @@ use insta::assert_debug_snapshot; -use ruff_python_parser::{parse_unchecked, Mode}; +use ruff_python_parser::{parse_unchecked, Mode, ParseOptions}; use ruff_python_trivia::{lines_after, lines_before, CommentRanges, SimpleToken, SimpleTokenizer}; use ruff_python_trivia::{BackwardsTokenizer, SimpleTokenKind}; use ruff_text_size::{TextLen, TextRange, TextSize}; @@ -22,7 +22,7 @@ impl TokenizationTestCase { } fn tokenize_reverse(&self) -> Vec { - let parsed = parse_unchecked(self.source, Mode::Module); + let parsed = parse_unchecked(self.source, ParseOptions::from(Mode::Module)); let comment_ranges = CommentRanges::from(parsed.tokens()); BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect() } diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index 946bdc1520727..cc1d5bdcabc28 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -18,7 +18,9 @@ use ruff_python_ast::{Mod, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext, QuoteStyle}; use ruff_python_index::Indexer; -use ruff_python_parser::{parse, parse_unchecked, parse_unchecked_source, Mode, Parsed}; +use ruff_python_parser::{ + parse, parse_unchecked, parse_unchecked_source, Mode, ParseOptions, Parsed, +}; use ruff_python_trivia::CommentRanges; use ruff_source_file::SourceLocation; use ruff_text_size::Ranged; @@ -264,13 +266,13 @@ impl Workspace { /// Parses the content and returns its AST pub fn parse(&self, contents: &str) -> Result { - let parsed = parse_unchecked(contents, Mode::Module); + let parsed = parse_unchecked(contents, ParseOptions::from(Mode::Module)); Ok(format!("{:#?}", parsed.into_syntax())) } pub fn tokens(&self, contents: &str) -> Result { - let parsed = parse_unchecked(contents, Mode::Module); + let parsed = parse_unchecked(contents, ParseOptions::from(Mode::Module)); Ok(format!("{:#?}", parsed.tokens().as_ref())) } @@ -288,7 +290,7 @@ struct ParsedModule<'a> { impl<'a> ParsedModule<'a> { fn from_source(source_code: &'a str) -> Result { - let parsed = parse(source_code, Mode::Module).map_err(into_error)?; + let parsed = parse(source_code, ParseOptions::from(Mode::Module)).map_err(into_error)?; let comment_ranges = CommentRanges::from(parsed.tokens()); Ok(Self { source_code, diff --git a/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs b/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs index 35de7cdba5efe..61151c10c5a27 100644 --- a/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs +++ b/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs @@ -18,7 +18,7 @@ use ruff_db::system::{DbWithTestSystem, System, SystemPathBuf, TestSystem}; use ruff_db::vendored::VendoredFileSystem; use ruff_db::{Db as SourceDb, Upcast}; use ruff_python_ast::PythonVersion; -use ruff_python_parser::{parse_unchecked, Mode}; +use ruff_python_parser::{parse_unchecked, Mode, ParseOptions}; /// Database that can be used for testing. /// @@ -134,7 +134,7 @@ fn do_fuzz(case: &[u8]) -> Corpus { return Corpus::Reject; }; - let parsed = parse_unchecked(code, Mode::Module); + let parsed = parse_unchecked(code, ParseOptions::from(Mode::Module)); if parsed.is_valid() { return Corpus::Reject; }