From cee79c1ea3a8d0493305b689c0dc2ec39596bd9e Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 6 Jul 2023 10:31:41 +0530 Subject: [PATCH] Use Jupyter mode for the parser with Notebook files --- crates/ruff/src/importer/insertion.rs | 6 ++--- crates/ruff/src/jupyter/notebook.rs | 3 ++- crates/ruff/src/linter.rs | 30 +++++++++++++++++++----- crates/ruff/src/rules/pyflakes/mod.rs | 3 ++- crates/ruff/src/test.rs | 10 ++++++-- crates/ruff_benchmark/benches/linter.rs | 1 + crates/ruff_cli/src/diagnostics.rs | 20 ++++++++++++++-- crates/ruff_dev/src/print_tokens.rs | 10 +++++++- crates/ruff_python_ast/src/token_kind.rs | 3 +++ crates/ruff_rustpython/src/lib.rs | 7 +++--- crates/ruff_wasm/src/lib.rs | 3 ++- 11 files changed, 76 insertions(+), 20 deletions(-) diff --git a/crates/ruff/src/importer/insertion.rs b/crates/ruff/src/importer/insertion.rs index 469f7a1d28513..bb893822a05ea 100644 --- a/crates/ruff/src/importer/insertion.rs +++ b/crates/ruff/src/importer/insertion.rs @@ -302,7 +302,7 @@ mod tests { use ruff_text_size::TextSize; use rustpython_parser::ast::Suite; use rustpython_parser::lexer::LexResult; - use rustpython_parser::Parse; + use rustpython_parser::{Mode, Parse}; use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_whitespace::LineEnding; @@ -313,7 +313,7 @@ mod tests { fn start_of_file() -> Result<()> { fn insert(contents: &str) -> Result { let program = Suite::parse(contents, "")?; - let tokens: Vec = ruff_rustpython::tokenize(contents); + let tokens: Vec = ruff_rustpython::tokenize(contents, Mode::Module); let locator = Locator::new(contents); let stylist = Stylist::from_tokens(&tokens, &locator); Ok(Insertion::start_of_file(&program, &locator, &stylist)) @@ -424,7 +424,7 @@ x = 1 #[test] fn start_of_block() { fn insert(contents: &str, offset: TextSize) -> Insertion { - let tokens: Vec = ruff_rustpython::tokenize(contents); + let tokens: Vec = ruff_rustpython::tokenize(contents, Mode::Module); let locator = Locator::new(contents); let stylist = Stylist::from_tokens(&tokens, &locator); Insertion::start_of_block(offset, &locator, &stylist) diff --git a/crates/ruff/src/jupyter/notebook.rs b/crates/ruff/src/jupyter/notebook.rs index 342bc249309b5..0bee249c95ff5 100644 --- a/crates/ruff/src/jupyter/notebook.rs +++ b/crates/ruff/src/jupyter/notebook.rs @@ -6,6 +6,7 @@ use std::path::Path; use itertools::Itertools; use once_cell::sync::OnceCell; +use rustpython_parser::Mode; use serde::Serialize; use serde_json::error::Category; @@ -151,7 +152,7 @@ impl Notebook { ) })?; // Check if tokenizing was successful and the file is non-empty - if (ruff_rustpython::tokenize(&contents)) + if (ruff_rustpython::tokenize(&contents, Mode::Module)) .last() .map_or(true, Result::is_err) { diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index ebdfded081327..b16ee6cb77c7a 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -8,7 +8,7 @@ use itertools::Itertools; use log::error; use rustc_hash::FxHashMap; use rustpython_parser::lexer::LexResult; -use rustpython_parser::ParseError; +use rustpython_parser::{Mode, ParseError}; use ruff_diagnostics::Diagnostic; use ruff_python_ast::imports::ImportMap; @@ -134,7 +134,12 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_imports()); if use_ast || use_imports || use_doc_lines { - match ruff_rustpython::parse_program_tokens(tokens, &path.to_string_lossy()) { + let mode = if source_kind.map_or(false, |kind| kind.is_jupyter()) { + Mode::Jupyter + } else { + Mode::Module + }; + match ruff_rustpython::parse_program_tokens(tokens, mode, &path.to_string_lossy()) { Ok(python_ast) => { if use_ast { diagnostics.extend(check_ast( @@ -255,7 +260,7 @@ pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings let contents = std::fs::read_to_string(path)?; // Tokenize once. - let tokens: Vec = ruff_rustpython::tokenize(&contents); + let tokens: Vec = ruff_rustpython::tokenize(&contents, Mode::Module); // Map row and column locations to byte slices (lazily). let locator = Locator::new(&contents); @@ -320,9 +325,16 @@ pub fn lint_only( package: Option<&Path>, settings: &Settings, noqa: flags::Noqa, + source_kind: Option<&SourceKind>, ) -> LinterResult<(Vec, Option)> { + let mode = if source_kind.map_or(false, |source_kind| source_kind.is_jupyter()) { + Mode::Jupyter + } else { + Mode::Module + }; + // Tokenize once. - let tokens: Vec = ruff_rustpython::tokenize(contents); + let tokens: Vec = ruff_rustpython::tokenize(contents, mode); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents); @@ -352,7 +364,7 @@ pub fn lint_only( &directives, settings, noqa, - None, + source_kind, ); result.map(|(diagnostics, imports)| { @@ -411,10 +423,16 @@ pub fn lint_fix<'a>( // Track whether the _initial_ source code was parseable. let mut parseable = false; + let mode = if source_kind.is_jupyter() { + Mode::Jupyter + } else { + Mode::Module + }; + // Continuously autofix until the source code stabilizes. loop { // Tokenize once. - let tokens: Vec = ruff_rustpython::tokenize(&transformed); + let tokens: Vec = ruff_rustpython::tokenize(&transformed, mode); // Map row and column locations to byte slices (lazily). let locator = Locator::new(&transformed); diff --git a/crates/ruff/src/rules/pyflakes/mod.rs b/crates/ruff/src/rules/pyflakes/mod.rs index e796ac45639fd..052fb4772f438 100644 --- a/crates/ruff/src/rules/pyflakes/mod.rs +++ b/crates/ruff/src/rules/pyflakes/mod.rs @@ -12,6 +12,7 @@ mod tests { use anyhow::Result; use regex::Regex; use rustpython_parser::lexer::LexResult; + use rustpython_parser::Mode; use test_case::test_case; use ruff_diagnostics::Diagnostic; @@ -499,7 +500,7 @@ mod tests { fn flakes(contents: &str, expected: &[Rule]) { let contents = dedent(contents); let settings = Settings::for_rules(Linter::Pyflakes.rules()); - let tokens: Vec = ruff_rustpython::tokenize(&contents); + let tokens: Vec = ruff_rustpython::tokenize(&contents, Mode::Module); let locator = Locator::new(&contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); diff --git a/crates/ruff/src/test.rs b/crates/ruff/src/test.rs index 1c2eb7aefb46a..995197abbf68b 100644 --- a/crates/ruff/src/test.rs +++ b/crates/ruff/src/test.rs @@ -9,6 +9,7 @@ use itertools::Itertools; use ruff_textwrap::dedent; use rustc_hash::FxHashMap; use rustpython_parser::lexer::LexResult; +use rustpython_parser::Mode; use ruff_diagnostics::{AutofixKind, Diagnostic}; use ruff_python_ast::source_code::{Indexer, Locator, SourceFileBuilder, Stylist}; @@ -97,8 +98,13 @@ pub(crate) fn max_iterations() -> usize { /// A convenient wrapper around [`check_path`], that additionally /// asserts that autofixes converge after a fixed number of iterations. fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec { + let mode = if source_kind.is_jupyter() { + Mode::Jupyter + } else { + Mode::Module + }; let contents = source_kind.content().to_string(); - let tokens: Vec = ruff_rustpython::tokenize(&contents); + let tokens: Vec = ruff_rustpython::tokenize(&contents, mode); let locator = Locator::new(&contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -160,7 +166,7 @@ fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) notebook.update(&source_map, &fixed_contents); }; - let tokens: Vec = ruff_rustpython::tokenize(&fixed_contents); + let tokens: Vec = ruff_rustpython::tokenize(&fixed_contents, mode); let locator = Locator::new(&fixed_contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs index 98ea5c1bd3327..7abaa4fdafa25 100644 --- a/crates/ruff_benchmark/benches/linter.rs +++ b/crates/ruff_benchmark/benches/linter.rs @@ -63,6 +63,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &Settings) { None, settings, flags::Noqa::Enabled, + None, ); // Assert that file contains no parse errors diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index f994b7a055e05..1d058b9b98ff1 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -204,12 +204,26 @@ pub(crate) fn lint_path( (result, fixed) } else { // If we fail to autofix, lint the original source code. - let result = lint_only(&contents, path, package, &settings.lib, noqa); + let result = lint_only( + &contents, + path, + package, + &settings.lib, + noqa, + Some(&source_kind), + ); let fixed = FxHashMap::default(); (result, fixed) } } else { - let result = lint_only(&contents, path, package, &settings.lib, noqa); + let result = lint_only( + &contents, + path, + package, + &settings.lib, + noqa, + Some(&source_kind), + ); let fixed = FxHashMap::default(); (result, fixed) }; @@ -316,6 +330,7 @@ pub(crate) fn lint_stdin( package, settings, noqa, + Some(&source_kind), ); let fixed = FxHashMap::default(); @@ -333,6 +348,7 @@ pub(crate) fn lint_stdin( package, settings, noqa, + Some(&source_kind), ); let fixed = FxHashMap::default(); (result, fixed) diff --git a/crates/ruff_dev/src/print_tokens.rs b/crates/ruff_dev/src/print_tokens.rs index 39b05b3a6236e..d8e62051dfde0 100644 --- a/crates/ruff_dev/src/print_tokens.rs +++ b/crates/ruff_dev/src/print_tokens.rs @@ -12,11 +12,19 @@ pub(crate) struct Args { /// Python file for which to generate the AST. #[arg(required = true)] file: PathBuf, + /// Run in Jupyter mode i.e., allow line magics (%), shell commands (!), and help (?). + #[arg(long)] + jupyter: bool, } pub(crate) fn main(args: &Args) -> Result<()> { let contents = fs::read_to_string(&args.file)?; - for (tok, range) in lexer::lex(&contents, Mode::Module).flatten() { + let mode = if args.jupyter { + Mode::Jupyter + } else { + Mode::Module + }; + for (tok, range) in lexer::lex(&contents, mode).flatten() { println!( "{start:#?} {tok:#?} {end:#?}", start = range.start(), diff --git a/crates/ruff_python_ast/src/token_kind.rs b/crates/ruff_python_ast/src/token_kind.rs index 7b460eba20a91..a0745aef55597 100644 --- a/crates/ruff_python_ast/src/token_kind.rs +++ b/crates/ruff_python_ast/src/token_kind.rs @@ -19,6 +19,8 @@ pub enum TokenKind { /// Token value for a newline that is not a logical line break. These are filtered out of /// the token stream prior to parsing. NonLogicalNewline, + /// Token value for a Jupyter magic command. + MagicCommand, /// Token value for an indent. Indent, /// Token value for a dedent. @@ -341,6 +343,7 @@ impl TokenKind { Tok::Comment(_) => TokenKind::Comment, Tok::Newline => TokenKind::Newline, Tok::NonLogicalNewline => TokenKind::NonLogicalNewline, + Tok::MagicCommand(_) => TokenKind::MagicCommand, Tok::Indent => TokenKind::Indent, Tok::Dedent => TokenKind::Dedent, Tok::EndOfFile => TokenKind::EndOfFile, diff --git a/crates/ruff_rustpython/src/lib.rs b/crates/ruff_rustpython/src/lib.rs index c0fcd72e28081..1203ba82b33b7 100644 --- a/crates/ruff_rustpython/src/lib.rs +++ b/crates/ruff_rustpython/src/lib.rs @@ -4,9 +4,9 @@ use rustpython_parser::lexer::LexResult; use rustpython_parser::{lexer, Mode, ParseError}; /// Collect tokens up to and including the first error. -pub fn tokenize(contents: &str) -> Vec { +pub fn tokenize(contents: &str, mode: Mode) -> Vec { let mut tokens: Vec = vec![]; - for tok in lexer::lex(contents, Mode::Module) { + for tok in lexer::lex(contents, mode) { let is_err = tok.is_err(); tokens.push(tok); if is_err { @@ -19,9 +19,10 @@ pub fn tokenize(contents: &str) -> Vec { /// Parse a full Python program from its tokens. pub fn parse_program_tokens( lxr: Vec, + mode: Mode, source_path: &str, ) -> anyhow::Result { - parser::parse_tokens(lxr, Mode::Module, source_path).map(|top| match top { + parser::parse_tokens(lxr, mode, source_path).map(|top| match top { Mod::Module(ModModule { body, .. }) => body, _ => unreachable!(), }) diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index 27e3de73c09d6..2fd7ccdbf7fa2 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -1,6 +1,7 @@ use std::path::Path; use rustpython_parser::lexer::LexResult; +use rustpython_parser::Mode; use serde::{Deserialize, Serialize}; use wasm_bindgen::prelude::*; @@ -180,7 +181,7 @@ pub fn check(contents: &str, options: JsValue) -> Result { Settings::from_configuration(configuration, Path::new(".")).map_err(|e| e.to_string())?; // Tokenize once. - let tokens: Vec = ruff_rustpython::tokenize(contents); + let tokens: Vec = ruff_rustpython::tokenize(contents, Mode::Module); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents);