From b553e88b4b45a6b7ff7eaef182f11d41bdf1a795 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 26 Jul 2023 12:49:32 +0530 Subject: [PATCH] Use Jupyter mode for the parser with Notebook files --- Cargo.lock | 1 + .../fixtures/jupyter/cell/cell_magic.json | 8 ++++ .../test/fixtures/jupyter/line_magics.ipynb | 41 ++++++++++++++++++ .../jupyter/line_magics_expected.ipynb | 40 +++++++++++++++++ crates/ruff/src/importer/insertion.rs | 6 +-- crates/ruff/src/jupyter/notebook.rs | 43 +++++++++++-------- ...jupyter__notebook__tests__line_magics.snap | 21 +++++++++ crates/ruff/src/linter.rs | 26 ++++++++--- crates/ruff/src/rules/pyflakes/mod.rs | 3 +- crates/ruff/src/test.rs | 10 ++++- crates/ruff_dev/src/print_ast.rs | 13 ++++-- crates/ruff_dev/src/print_tokens.rs | 10 ++++- crates/ruff_python_parser/src/lib.rs | 12 ++++-- crates/ruff_shrinking/Cargo.toml | 1 + crates/ruff_shrinking/src/main.rs | 11 +++-- crates/ruff_wasm/src/lib.rs | 2 +- 16 files changed, 208 insertions(+), 40 deletions(-) create mode 100644 crates/ruff/resources/test/fixtures/jupyter/cell/cell_magic.json create mode 100644 crates/ruff/resources/test/fixtures/jupyter/line_magics.ipynb create mode 100644 crates/ruff/resources/test/fixtures/jupyter/line_magics_expected.ipynb create mode 100644 crates/ruff/src/jupyter/snapshots/ruff__jupyter__notebook__tests__line_magics.snap diff --git a/Cargo.lock b/Cargo.lock index 08cdd319176e7..56401f80f73eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2242,6 +2242,7 @@ dependencies = [ "ruff_python_ast", "ruff_python_parser", "rustpython-ast", + "rustpython-parser", "shlex", "tracing", "tracing-subscriber", diff --git a/crates/ruff/resources/test/fixtures/jupyter/cell/cell_magic.json b/crates/ruff/resources/test/fixtures/jupyter/cell/cell_magic.json new file mode 100644 index 0000000000000..ef68b202e6811 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/jupyter/cell/cell_magic.json @@ -0,0 +1,8 @@ +{ + "execution_count": null, + "cell_type": "code", + "id": "1", + "metadata": {}, + "outputs": [], + "source": ["%%timeit\n", "print('hello world')"] +} diff --git a/crates/ruff/resources/test/fixtures/jupyter/line_magics.ipynb b/crates/ruff/resources/test/fixtures/jupyter/line_magics.ipynb new file mode 100644 index 0000000000000..785a73102ad73 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/jupyter/line_magics.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "eab4754a-d6df-4b41-8ee8-7e23aef440f9", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "\n", + "%matplotlib inline\n", + "\n", + "import os\n", + "\n", + "_ = math.pi" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (ruff)", + "language": "python", + "name": "ruff" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/crates/ruff/resources/test/fixtures/jupyter/line_magics_expected.ipynb b/crates/ruff/resources/test/fixtures/jupyter/line_magics_expected.ipynb new file mode 100644 index 0000000000000..cdf69fa719600 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/jupyter/line_magics_expected.ipynb @@ -0,0 +1,40 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "cad32845-44f9-4a53-8b8c-a6b1bb3f3378", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "\n", + "%matplotlib inline\n", + "\n", + "\n", + "_ = math.pi" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (ruff)", + "language": "python", + "name": "ruff" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/crates/ruff/src/importer/insertion.rs b/crates/ruff/src/importer/insertion.rs index 12eb54d9b4a1b..2da0141db5f64 100644 --- a/crates/ruff/src/importer/insertion.rs +++ b/crates/ruff/src/importer/insertion.rs @@ -302,7 +302,7 @@ mod tests { use ruff_text_size::TextSize; use rustpython_ast::Suite; use rustpython_parser::lexer::LexResult; - use rustpython_parser::Parse; + use rustpython_parser::{Mode, Parse}; use ruff_python_codegen::Stylist; use ruff_source_file::{LineEnding, Locator}; @@ -313,7 +313,7 @@ mod tests { fn start_of_file() -> Result<()> { fn insert(contents: &str) -> Result { let program = Suite::parse(contents, "")?; - let tokens: Vec = ruff_python_parser::tokenize(contents); + let tokens: Vec = ruff_rustpython::tokenize(contents, Mode::Module); let locator = Locator::new(contents); let stylist = Stylist::from_tokens(&tokens, &locator); Ok(Insertion::start_of_file(&program, &locator, &stylist)) @@ -424,7 +424,7 @@ x = 1 #[test] fn start_of_block() { fn insert(contents: &str, offset: TextSize) -> Insertion { - let tokens: Vec = ruff_python_parser::tokenize(contents); + let tokens: Vec = ruff_rustpython::tokenize(contents, Mode::Module); let locator = Locator::new(contents); let stylist = Stylist::from_tokens(&tokens, &locator); Insertion::start_of_block(offset, &locator, &stylist) diff --git a/crates/ruff/src/jupyter/notebook.rs b/crates/ruff/src/jupyter/notebook.rs index 50cd68282272f..b96915cdf2342 100644 --- a/crates/ruff/src/jupyter/notebook.rs +++ b/crates/ruff/src/jupyter/notebook.rs @@ -6,6 +6,7 @@ use std::path::Path; use itertools::Itertools; use once_cell::sync::OnceCell; +use rustpython_parser::Mode; use serde::Serialize; use serde_json::error::Category; @@ -23,8 +24,6 @@ use crate::IOError; pub const JUPYTER_NOTEBOOK_EXT: &str = "ipynb"; -const MAGIC_PREFIX: [&str; 3] = ["%", "!", "?"]; - /// Run round-trip source code generation on a given Jupyter notebook file path. pub fn round_trip(path: &Path) -> anyhow::Result { let mut notebook = Notebook::read(path).map_err(|err| { @@ -63,26 +62,21 @@ impl Cell { /// Return `true` if it's a valid code cell. /// /// A valid code cell is a cell where the cell type is [`Cell::Code`] and the - /// source doesn't contain a magic, shell or help command. + /// source doesn't contain a cell magic. fn is_valid_code_cell(&self) -> bool { let source = match self { Cell::Code(cell) => &cell.source, _ => return false, }; - // Ignore a cell if it contains a magic command. There could be valid - // Python code as well, but we'll ignore that for now. - // TODO(dhruvmanila): https://github.com/psf/black/blob/main/src/black/handle_ipynb_magics.py + // Ignore cells containing cell magic. This is different from line magic + // which is allowed and ignored by the parser. !match source { - SourceValue::String(string) => string.lines().any(|line| { - MAGIC_PREFIX - .iter() - .any(|prefix| line.trim_start().starts_with(prefix)) - }), - SourceValue::StringArray(string_array) => string_array.iter().any(|line| { - MAGIC_PREFIX - .iter() - .any(|prefix| line.trim_start().starts_with(prefix)) - }), + SourceValue::String(string) => string + .lines() + .any(|line| line.trim_start().starts_with("%%")), + SourceValue::StringArray(string_array) => string_array + .iter() + .any(|line| line.trim_start().starts_with("%%")), } } } @@ -493,9 +487,10 @@ mod tests { } #[test_case(Path::new("markdown.json"), false; "markdown")] - #[test_case(Path::new("only_magic.json"), false; "only_magic")] - #[test_case(Path::new("code_and_magic.json"), false; "code_and_magic")] + #[test_case(Path::new("only_magic.json"), true; "only_magic")] + #[test_case(Path::new("code_and_magic.json"), true; "code_and_magic")] #[test_case(Path::new("only_code.json"), true; "only_code")] + #[test_case(Path::new("cell_magic.json"), false; "cell_magic")] fn test_is_valid_code_cell(path: &Path, expected: bool) -> Result<()> { assert_eq!(read_jupyter_cell(path)?.is_valid_code_cell(), expected); Ok(()) @@ -556,6 +551,18 @@ print("after empty cells") Ok(()) } + #[test] + fn test_line_magics() -> Result<()> { + let path = "line_magics.ipynb".to_string(); + let (diagnostics, source_kind) = test_notebook_path( + &path, + Path::new("line_magics_expected.ipynb"), + &settings::Settings::for_rule(Rule::UnusedImport), + )?; + assert_messages!(diagnostics, path, source_kind); + Ok(()) + } + #[test] fn test_json_consistency() -> Result<()> { let path = "before_fix.ipynb".to_string(); diff --git a/crates/ruff/src/jupyter/snapshots/ruff__jupyter__notebook__tests__line_magics.snap b/crates/ruff/src/jupyter/snapshots/ruff__jupyter__notebook__tests__line_magics.snap new file mode 100644 index 0000000000000..576b938b6b2e0 --- /dev/null +++ b/crates/ruff/src/jupyter/snapshots/ruff__jupyter__notebook__tests__line_magics.snap @@ -0,0 +1,21 @@ +--- +source: crates/ruff/src/jupyter/notebook.rs +--- +line_magics.ipynb:cell 1:5:8: F401 [*] `os` imported but unused + | +3 | %matplotlib inline +4 | +5 | import os + | ^^ F401 + | + = help: Remove unused import: `os` + +ℹ Fix +2 2 | +3 3 | %matplotlib inline +4 4 | +5 |-import os +6 5 | +7 6 | _ = math.pi + + diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index 92589dc427a91..9bc6bca98c563 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -8,7 +8,7 @@ use itertools::Itertools; use log::error; use rustc_hash::FxHashMap; use rustpython_parser::lexer::LexResult; -use rustpython_parser::ParseError; +use rustpython_parser::{Mode, ParseError}; use ruff_diagnostics::Diagnostic; use ruff_python_ast::imports::ImportMap; @@ -138,7 +138,11 @@ pub fn check_path( .iter_enabled() .any(|rule_code| rule_code.lint_source().is_imports()); if use_ast || use_imports || use_doc_lines { - match ruff_python_parser::parse_program_tokens(tokens, &path.to_string_lossy()) { + match ruff_python_parser::parse_program_tokens( + tokens, + &path.to_string_lossy(), + source_kind.map_or(false, SourceKind::is_jupyter), + ) { Ok(python_ast) => { if use_ast { diagnostics.extend(check_ast( @@ -260,7 +264,7 @@ pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings let contents = std::fs::read_to_string(path)?; // Tokenize once. - let tokens: Vec = ruff_python_parser::tokenize(&contents); + let tokens: Vec = ruff_python_parser::tokenize(&contents, Mode::Module); // Map row and column locations to byte slices (lazily). let locator = Locator::new(&contents); @@ -327,8 +331,14 @@ pub fn lint_only( noqa: flags::Noqa, source_kind: Option<&SourceKind>, ) -> LinterResult<(Vec, Option)> { + let mode = if source_kind.map_or(false, SourceKind::is_jupyter) { + Mode::Jupyter + } else { + Mode::Module + }; + // Tokenize once. - let tokens: Vec = ruff_python_parser::tokenize(contents); + let tokens: Vec = ruff_python_parser::tokenize(contents, mode); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents); @@ -417,10 +427,16 @@ pub fn lint_fix<'a>( // Track whether the _initial_ source code was parseable. let mut parseable = false; + let mode = if source_kind.is_jupyter() { + Mode::Jupyter + } else { + Mode::Module + }; + // Continuously autofix until the source code stabilizes. loop { // Tokenize once. - let tokens: Vec = ruff_python_parser::tokenize(&transformed); + let tokens: Vec = ruff_python_parser::tokenize(&transformed, mode); // Map row and column locations to byte slices (lazily). let locator = Locator::new(&transformed); diff --git a/crates/ruff/src/rules/pyflakes/mod.rs b/crates/ruff/src/rules/pyflakes/mod.rs index 44ae43add7ed5..4ae5cca6029db 100644 --- a/crates/ruff/src/rules/pyflakes/mod.rs +++ b/crates/ruff/src/rules/pyflakes/mod.rs @@ -12,6 +12,7 @@ mod tests { use anyhow::Result; use regex::Regex; use rustpython_parser::lexer::LexResult; + use rustpython_parser::Mode; use test_case::test_case; use ruff_diagnostics::Diagnostic; @@ -504,7 +505,7 @@ mod tests { fn flakes(contents: &str, expected: &[Rule]) { let contents = dedent(contents); let settings = Settings::for_rules(Linter::Pyflakes.rules()); - let tokens: Vec = ruff_python_parser::tokenize(&contents); + let tokens: Vec = ruff_python_parser::tokenize(&contents, Mode::Module); let locator = Locator::new(&contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); diff --git a/crates/ruff/src/test.rs b/crates/ruff/src/test.rs index 64b48996806a7..abf6c16131077 100644 --- a/crates/ruff/src/test.rs +++ b/crates/ruff/src/test.rs @@ -8,6 +8,7 @@ use anyhow::Result; use itertools::Itertools; use rustc_hash::FxHashMap; use rustpython_parser::lexer::LexResult; +use rustpython_parser::Mode; use ruff_diagnostics::{AutofixKind, Diagnostic}; use ruff_python_codegen::Stylist; @@ -99,8 +100,13 @@ pub(crate) fn max_iterations() -> usize { /// A convenient wrapper around [`check_path`], that additionally /// asserts that autofixes converge after a fixed number of iterations. fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec { + let mode = if source_kind.is_jupyter() { + Mode::Jupyter + } else { + Mode::Module + }; let contents = source_kind.content().to_string(); - let tokens: Vec = ruff_python_parser::tokenize(&contents); + let tokens: Vec = ruff_python_parser::tokenize(&contents, mode); let locator = Locator::new(&contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -162,7 +168,7 @@ fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) notebook.update(&source_map, &fixed_contents); }; - let tokens: Vec = ruff_python_parser::tokenize(&fixed_contents); + let tokens: Vec = ruff_python_parser::tokenize(&fixed_contents, mode); let locator = Locator::new(&fixed_contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); diff --git a/crates/ruff_dev/src/print_ast.rs b/crates/ruff_dev/src/print_ast.rs index 130f9b5bd0977..95a18f224b10e 100644 --- a/crates/ruff_dev/src/print_ast.rs +++ b/crates/ruff_dev/src/print_ast.rs @@ -5,19 +5,26 @@ use std::fs; use std::path::PathBuf; use anyhow::Result; -use rustpython_ast::Suite; -use rustpython_parser::Parse; +use rustpython_parser::{parse, Mode}; #[derive(clap::Args)] pub(crate) struct Args { /// Python file for which to generate the AST. #[arg(required = true)] file: PathBuf, + /// Run in Jupyter mode i.e., allow line magics. + #[arg(long)] + jupyter: bool, } pub(crate) fn main(args: &Args) -> Result<()> { let contents = fs::read_to_string(&args.file)?; - let python_ast = Suite::parse(&contents, &args.file.to_string_lossy())?; + let mode = if args.jupyter { + Mode::Jupyter + } else { + Mode::Module + }; + let python_ast = parse(&contents, mode, &args.file.to_string_lossy())?; println!("{python_ast:#?}"); Ok(()) } diff --git a/crates/ruff_dev/src/print_tokens.rs b/crates/ruff_dev/src/print_tokens.rs index 39b05b3a6236e..922c087402623 100644 --- a/crates/ruff_dev/src/print_tokens.rs +++ b/crates/ruff_dev/src/print_tokens.rs @@ -12,11 +12,19 @@ pub(crate) struct Args { /// Python file for which to generate the AST. #[arg(required = true)] file: PathBuf, + /// Run in Jupyter mode i.e., allow line magics (`%`, `!`, `?`, `/`, `,`, `;`). + #[arg(long)] + jupyter: bool, } pub(crate) fn main(args: &Args) -> Result<()> { let contents = fs::read_to_string(&args.file)?; - for (tok, range) in lexer::lex(&contents, Mode::Module).flatten() { + let mode = if args.jupyter { + Mode::Jupyter + } else { + Mode::Module + }; + for (tok, range) in lexer::lex(&contents, mode).flatten() { println!( "{start:#?} {tok:#?} {end:#?}", start = range.start(), diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 93c2f85b8e2f7..5ca5c0be57417 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -9,9 +9,9 @@ pub mod token_kind; pub mod typing; /// Collect tokens up to and including the first error. -pub fn tokenize(contents: &str) -> Vec { +pub fn tokenize(contents: &str, mode: Mode) -> Vec { let mut tokens: Vec = vec![]; - for tok in lexer::lex(contents, Mode::Module) { + for tok in lexer::lex(contents, mode) { let is_err = tok.is_err(); tokens.push(tok); if is_err { @@ -25,8 +25,14 @@ pub fn tokenize(contents: &str) -> Vec { pub fn parse_program_tokens( lxr: Vec, source_path: &str, + is_jupyter_notebook: bool, ) -> anyhow::Result { - parser::parse_tokens(lxr, Mode::Module, source_path).map(|top| match top { + let mode = if is_jupyter_notebook { + Mode::Jupyter + } else { + Mode::Module + }; + parser::parse_tokens(lxr, mode, source_path).map(|top| match top { Mod::Module(ModModule { body, .. }) => body, _ => unreachable!(), }) diff --git a/crates/ruff_shrinking/Cargo.toml b/crates/ruff_shrinking/Cargo.toml index e20d8a783c73f..270badf8d873b 100644 --- a/crates/ruff_shrinking/Cargo.toml +++ b/crates/ruff_shrinking/Cargo.toml @@ -13,6 +13,7 @@ regex = { workspace = true } ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_parser = { path = "../ruff_python_parser" } rustpython-ast = { workspace = true } +rustpython-parser = { workspace = true } shlex = "1.1.0" tracing = "0.1.37" tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } diff --git a/crates/ruff_shrinking/src/main.rs b/crates/ruff_shrinking/src/main.rs index 1808c1e47ad1a..2ed5d0001a660 100644 --- a/crates/ruff_shrinking/src/main.rs +++ b/crates/ruff_shrinking/src/main.rs @@ -37,6 +37,7 @@ use ruff_python_ast::statement_visitor::{walk_body, walk_stmt, StatementVisitor} use ruff_python_ast::visitor::{walk_expr, Visitor}; use rustpython_ast::text_size::TextRange; use rustpython_ast::{Expr, Ranged, Stmt, Suite}; +use rustpython_parser::Mode; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::process::{Command, ExitCode}; @@ -275,7 +276,11 @@ impl Strategy for StrategyRemoveToken { input: &'a str, _ast: &'a Suite, ) -> Result> { +<<<<<<< HEAD let token_ranges: Vec<_> = ruff_python_parser::tokenize(input) +======= + let token_ranges: Vec<_> = ruff_rustpython::tokenize(input, Mode::Module) +>>>>>>> 023d1dc72 (Use Jupyter mode for the parser with Notebook files) .into_iter() // At this point we know we have valid python code .map(Result::unwrap) @@ -320,9 +325,9 @@ fn minimization_step( pattern: &Regex, last_strategy_and_idx: Option<(&'static dyn Strategy, usize)>, ) -> Result> { - let tokens = ruff_python_parser::tokenize(input); - let ast = - ruff_python_parser::parse_program_tokens(tokens, "input.py").context("not valid python")?; + let tokens = ruff_python_parser::tokenize(input, Mode::Module); + let ast = ruff_python_parser::parse_program_tokens(tokens, "input.py", false) + .context("not valid python")?; // Try the last succeeding strategy first, skipping all that failed last time if let Some((last_strategy, last_idx)) = last_strategy_and_idx { diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index c937f65aa7f61..f693eee4c02b8 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -197,7 +197,7 @@ impl Workspace { pub fn check(&self, contents: &str) -> Result { // Tokenize once. - let tokens: Vec = ruff_python_parser::tokenize(contents); + let tokens: Vec = ruff_python_parser::tokenize(contents, Mode::Module); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents);