Skip to content

Commit

Permalink
Use Jupyter mode for the parser with Notebook files
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Jul 27, 2023
1 parent d04367a commit b553e88
Show file tree
Hide file tree
Showing 16 changed files with 208 additions and 40 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"execution_count": null,
"cell_type": "code",
"id": "1",
"metadata": {},
"outputs": [],
"source": ["%%timeit\n", "print('hello world')"]
}
41 changes: 41 additions & 0 deletions crates/ruff/resources/test/fixtures/jupyter/line_magics.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "eab4754a-d6df-4b41-8ee8-7e23aef440f9",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"\n",
"%matplotlib inline\n",
"\n",
"import os\n",
"\n",
"_ = math.pi"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "cad32845-44f9-4a53-8b8c-a6b1bb3f3378",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"\n",
"%matplotlib inline\n",
"\n",
"\n",
"_ = math.pi"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
6 changes: 3 additions & 3 deletions crates/ruff/src/importer/insertion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ mod tests {
use ruff_text_size::TextSize;
use rustpython_ast::Suite;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Parse;
use rustpython_parser::{Mode, Parse};

use ruff_python_codegen::Stylist;
use ruff_source_file::{LineEnding, Locator};
Expand All @@ -313,7 +313,7 @@ mod tests {
fn start_of_file() -> Result<()> {
fn insert(contents: &str) -> Result<Insertion> {
let program = Suite::parse(contents, "<filename>")?;
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
Ok(Insertion::start_of_file(&program, &locator, &stylist))
Expand Down Expand Up @@ -424,7 +424,7 @@ x = 1
#[test]
fn start_of_block() {
fn insert(contents: &str, offset: TextSize) -> Insertion {
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
Insertion::start_of_block(offset, &locator, &stylist)
Expand Down
43 changes: 25 additions & 18 deletions crates/ruff/src/jupyter/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::path::Path;

use itertools::Itertools;
use once_cell::sync::OnceCell;
use rustpython_parser::Mode;
use serde::Serialize;
use serde_json::error::Category;

Expand All @@ -23,8 +24,6 @@ use crate::IOError;

pub const JUPYTER_NOTEBOOK_EXT: &str = "ipynb";

const MAGIC_PREFIX: [&str; 3] = ["%", "!", "?"];

/// Run round-trip source code generation on a given Jupyter notebook file path.
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
let mut notebook = Notebook::read(path).map_err(|err| {
Expand Down Expand Up @@ -63,26 +62,21 @@ impl Cell {
/// Return `true` if it's a valid code cell.
///
/// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
/// source doesn't contain a magic, shell or help command.
/// source doesn't contain a cell magic.
fn is_valid_code_cell(&self) -> bool {
let source = match self {
Cell::Code(cell) => &cell.source,
_ => return false,
};
// Ignore a cell if it contains a magic command. There could be valid
// Python code as well, but we'll ignore that for now.
// TODO(dhruvmanila): https://github.com/psf/black/blob/main/src/black/handle_ipynb_magics.py
// Ignore cells containing cell magic. This is different from line magic
// which is allowed and ignored by the parser.
!match source {
SourceValue::String(string) => string.lines().any(|line| {
MAGIC_PREFIX
.iter()
.any(|prefix| line.trim_start().starts_with(prefix))
}),
SourceValue::StringArray(string_array) => string_array.iter().any(|line| {
MAGIC_PREFIX
.iter()
.any(|prefix| line.trim_start().starts_with(prefix))
}),
SourceValue::String(string) => string
.lines()
.any(|line| line.trim_start().starts_with("%%")),
SourceValue::StringArray(string_array) => string_array
.iter()
.any(|line| line.trim_start().starts_with("%%")),
}
}
}
Expand Down Expand Up @@ -493,9 +487,10 @@ mod tests {
}

#[test_case(Path::new("markdown.json"), false; "markdown")]
#[test_case(Path::new("only_magic.json"), false; "only_magic")]
#[test_case(Path::new("code_and_magic.json"), false; "code_and_magic")]
#[test_case(Path::new("only_magic.json"), true; "only_magic")]
#[test_case(Path::new("code_and_magic.json"), true; "code_and_magic")]
#[test_case(Path::new("only_code.json"), true; "only_code")]
#[test_case(Path::new("cell_magic.json"), false; "cell_magic")]
fn test_is_valid_code_cell(path: &Path, expected: bool) -> Result<()> {
assert_eq!(read_jupyter_cell(path)?.is_valid_code_cell(), expected);
Ok(())
Expand Down Expand Up @@ -556,6 +551,18 @@ print("after empty cells")
Ok(())
}

#[test]
fn test_line_magics() -> Result<()> {
let path = "line_magics.ipynb".to_string();
let (diagnostics, source_kind) = test_notebook_path(
&path,
Path::new("line_magics_expected.ipynb"),
&settings::Settings::for_rule(Rule::UnusedImport),
)?;
assert_messages!(diagnostics, path, source_kind);
Ok(())
}

#[test]
fn test_json_consistency() -> Result<()> {
let path = "before_fix.ipynb".to_string();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
source: crates/ruff/src/jupyter/notebook.rs
---
line_magics.ipynb:cell 1:5:8: F401 [*] `os` imported but unused
|
3 | %matplotlib inline
4 |
5 | import os
| ^^ F401
|
= help: Remove unused import: `os`

Fix
2 2 |
3 3 | %matplotlib inline
4 4 |
5 |-import os
6 5 |
7 6 | _ = math.pi


26 changes: 21 additions & 5 deletions crates/ruff/src/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use itertools::Itertools;
use log::error;
use rustc_hash::FxHashMap;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::ParseError;
use rustpython_parser::{Mode, ParseError};

use ruff_diagnostics::Diagnostic;
use ruff_python_ast::imports::ImportMap;
Expand Down Expand Up @@ -138,7 +138,11 @@ pub fn check_path(
.iter_enabled()
.any(|rule_code| rule_code.lint_source().is_imports());
if use_ast || use_imports || use_doc_lines {
match ruff_python_parser::parse_program_tokens(tokens, &path.to_string_lossy()) {
match ruff_python_parser::parse_program_tokens(
tokens,
&path.to_string_lossy(),
source_kind.map_or(false, SourceKind::is_jupyter),
) {
Ok(python_ast) => {
if use_ast {
diagnostics.extend(check_ast(
Expand Down Expand Up @@ -260,7 +264,7 @@ pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings
let contents = std::fs::read_to_string(path)?;

// Tokenize once.
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents, Mode::Module);

// Map row and column locations to byte slices (lazily).
let locator = Locator::new(&contents);
Expand Down Expand Up @@ -327,8 +331,14 @@ pub fn lint_only(
noqa: flags::Noqa,
source_kind: Option<&SourceKind>,
) -> LinterResult<(Vec<Message>, Option<ImportMap>)> {
let mode = if source_kind.map_or(false, SourceKind::is_jupyter) {
Mode::Jupyter
} else {
Mode::Module
};

// Tokenize once.
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, mode);

// Map row and column locations to byte slices (lazily).
let locator = Locator::new(contents);
Expand Down Expand Up @@ -417,10 +427,16 @@ pub fn lint_fix<'a>(
// Track whether the _initial_ source code was parseable.
let mut parseable = false;

let mode = if source_kind.is_jupyter() {
Mode::Jupyter
} else {
Mode::Module
};

// Continuously autofix until the source code stabilizes.
loop {
// Tokenize once.
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&transformed);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&transformed, mode);

// Map row and column locations to byte slices (lazily).
let locator = Locator::new(&transformed);
Expand Down
3 changes: 2 additions & 1 deletion crates/ruff/src/rules/pyflakes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mod tests {
use anyhow::Result;
use regex::Regex;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Mode;
use test_case::test_case;

use ruff_diagnostics::Diagnostic;
Expand Down Expand Up @@ -504,7 +505,7 @@ mod tests {
fn flakes(contents: &str, expected: &[Rule]) {
let contents = dedent(contents);
let settings = Settings::for_rules(Linter::Pyflakes.rules());
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents, Mode::Module);
let locator = Locator::new(&contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
Expand Down
10 changes: 8 additions & 2 deletions crates/ruff/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use anyhow::Result;
use itertools::Itertools;
use rustc_hash::FxHashMap;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Mode;

use ruff_diagnostics::{AutofixKind, Diagnostic};
use ruff_python_codegen::Stylist;
Expand Down Expand Up @@ -99,8 +100,13 @@ pub(crate) fn max_iterations() -> usize {
/// A convenient wrapper around [`check_path`], that additionally
/// asserts that autofixes converge after a fixed number of iterations.
fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
let mode = if source_kind.is_jupyter() {
Mode::Jupyter
} else {
Mode::Module
};
let contents = source_kind.content().to_string();
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents, mode);
let locator = Locator::new(&contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
Expand Down Expand Up @@ -162,7 +168,7 @@ fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings)
notebook.update(&source_map, &fixed_contents);
};

let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&fixed_contents);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&fixed_contents, mode);
let locator = Locator::new(&fixed_contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
Expand Down
13 changes: 10 additions & 3 deletions crates/ruff_dev/src/print_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,26 @@ use std::fs;
use std::path::PathBuf;

use anyhow::Result;
use rustpython_ast::Suite;
use rustpython_parser::Parse;
use rustpython_parser::{parse, Mode};

#[derive(clap::Args)]
pub(crate) struct Args {
/// Python file for which to generate the AST.
#[arg(required = true)]
file: PathBuf,
/// Run in Jupyter mode i.e., allow line magics.
#[arg(long)]
jupyter: bool,
}

pub(crate) fn main(args: &Args) -> Result<()> {
let contents = fs::read_to_string(&args.file)?;
let python_ast = Suite::parse(&contents, &args.file.to_string_lossy())?;
let mode = if args.jupyter {
Mode::Jupyter
} else {
Mode::Module
};
let python_ast = parse(&contents, mode, &args.file.to_string_lossy())?;
println!("{python_ast:#?}");
Ok(())
}
10 changes: 9 additions & 1 deletion crates/ruff_dev/src/print_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,19 @@ pub(crate) struct Args {
/// Python file for which to generate the AST.
#[arg(required = true)]
file: PathBuf,
/// Run in Jupyter mode i.e., allow line magics (`%`, `!`, `?`, `/`, `,`, `;`).
#[arg(long)]
jupyter: bool,
}

pub(crate) fn main(args: &Args) -> Result<()> {
let contents = fs::read_to_string(&args.file)?;
for (tok, range) in lexer::lex(&contents, Mode::Module).flatten() {
let mode = if args.jupyter {
Mode::Jupyter
} else {
Mode::Module
};
for (tok, range) in lexer::lex(&contents, mode).flatten() {
println!(
"{start:#?} {tok:#?} {end:#?}",
start = range.start(),
Expand Down
Loading

0 comments on commit b553e88

Please sign in to comment.