Skip to content

Commit

Permalink
Add TokenSource
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Jan 11, 2024
1 parent 14d3fe6 commit 3112468
Show file tree
Hide file tree
Showing 17 changed files with 163 additions and 80 deletions.
4 changes: 2 additions & 2 deletions crates/ruff_benchmark/benches/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
use ruff_python_formatter::{format_module_ast, PreviewMode, PyFormatOptions};
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_python_parser::{parse_tokens, Mode, TokenSource};

#[cfg(target_os = "windows")]
#[global_allocator]
Expand Down Expand Up @@ -65,7 +65,7 @@ fn benchmark_formatter(criterion: &mut Criterion) {
let comment_ranges = comment_ranges.finish();

// Parse the AST.
let module = parse_tokens(tokens, case.code(), Mode::Module)
let module = parse_tokens(TokenSource::new(tokens), case.code(), Mode::Module)
.expect("Input to be a valid python program");

b.iter(|| {
Expand Down
8 changes: 5 additions & 3 deletions crates/ruff_benchmark/benches/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::source_kind::SourceKind;
use ruff_linter::{registry::Rule, RuleSelector};
use ruff_python_ast::PySourceType;
use ruff_python_parser::{lexer, parse_program_tokens, Mode};
use ruff_python_parser::{lexer, parse_program_tokens, Mode, TokenSource};

#[cfg(target_os = "windows")]
#[global_allocator]
Expand Down Expand Up @@ -55,10 +55,12 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) {
&case,
|b, case| {
// Tokenize the source.
let tokens = lexer::lex(case.code(), Mode::Module).collect::<Vec<_>>();
let tokens: Vec<_> = lexer::lex(case.code(), Mode::Module).collect();

// Parse the source.
let ast = parse_program_tokens(tokens.clone(), case.code(), false).unwrap();
let ast =
parse_program_tokens(TokenSource::new(tokens.clone()), case.code(), false)
.unwrap();

b.iter(|| {
let path = case.path();
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ impl<'a> TokenSource<'a> {
) -> Result<AstSource<'a>, ParseError> {
match self {
Self::Tokens(tokens) => Ok(AstSource::Ast(ruff_python_parser::parse_program_tokens(
tokens,
ruff_python_parser::TokenSource::new(tokens),
source_kind.source_code(),
source_type.is_ipynb(),
)?)),
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_ast/tests/preorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ fn f_strings() {

fn trace_preorder_visitation(source: &str) -> String {
let tokens = lex(source, Mode::Module);
let parsed = parse_tokens(tokens, source, Mode::Module).unwrap();
let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap();

let mut visitor = RecordVisitor::default();
visitor.visit_mod(&parsed);
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_ast/tests/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ fn f_strings() {

fn trace_visitation(source: &str) -> String {
let tokens = lex(source, Mode::Module);
let parsed = parse_tokens(tokens, source, Mode::Module).unwrap();
let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap();

let mut visitor = RecordVisitor::default();
walk_module(&mut visitor, &parsed);
Expand Down
6 changes: 3 additions & 3 deletions crates/ruff_python_formatter/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use clap::{command, Parser, ValueEnum};
use ruff_formatter::SourceCode;
use ruff_python_ast::PySourceType;
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::{parse_ok_tokens, AsMode};
use ruff_python_parser::{parse_tokens, AsMode, TokenSource};
use ruff_text_size::Ranged;

use crate::comments::collect_comments;
Expand Down Expand Up @@ -50,8 +50,8 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Re
.map_err(|err| format_err!("Source contains syntax errors {err:?}"))?;

// Parse the AST.
let module =
parse_ok_tokens(tokens, source, source_type.as_mode()).context("Syntax error in input")?;
let module = parse_tokens(TokenSource::new(tokens), source, source_type.as_mode())
.context("Syntax error in input")?;

let options = PyFormatOptions::from_extension(source_path)
.with_preview(if cli.preview {
Expand Down
7 changes: 3 additions & 4 deletions crates/ruff_python_formatter/src/comments/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ use ruff_python_ast::Mod;
use ruff_python_trivia::{CommentRanges, PythonWhitespace};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange};
pub(crate) use visitor::collect_comments;

use crate::comments::debug::{DebugComment, DebugComments};
use crate::comments::map::{LeadingDanglingTrailing, MultiMap};
use crate::comments::node_key::NodeRefEqualityKey;
use crate::comments::visitor::{CommentsMapBuilder, CommentsVisitor};
pub(crate) use visitor::collect_comments;

mod debug;
pub(crate) mod format;
Expand Down Expand Up @@ -563,8 +563,7 @@ mod tests {
use ruff_formatter::SourceCode;
use ruff_python_ast::{Mod, PySourceType};
use ruff_python_index::tokens_and_ranges;

use ruff_python_parser::{parse_ok_tokens, AsMode};
use ruff_python_parser::{parse_tokens, AsMode, TokenSource};
use ruff_python_trivia::CommentRanges;

use crate::comments::Comments;
Expand All @@ -581,7 +580,7 @@ mod tests {
let source_type = PySourceType::Python;
let (tokens, comment_ranges) =
tokens_and_ranges(source, source_type).expect("Expect source to be valid Python");
let parsed = parse_ok_tokens(tokens, source, source_type.as_mode())
let parsed = parse_tokens(TokenSource::new(tokens), source, source_type.as_mode())
.expect("Expect source to be valid Python");

CommentsTestCase {
Expand Down
8 changes: 4 additions & 4 deletions crates/ruff_python_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, Source
use ruff_python_ast::AstNode;
use ruff_python_ast::Mod;
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::{parse_ok_tokens, AsMode, ParseError, ParseErrorType};
use ruff_python_parser::{parse_tokens, AsMode, ParseError, ParseErrorType, TokenSource};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::Locator;

Expand Down Expand Up @@ -126,7 +126,7 @@ pub fn format_module_source(
offset: err.location,
error: ParseErrorType::Lexical(err.error),
})?;
let module = parse_ok_tokens(tokens, source, source_type.as_mode())?;
let module = parse_tokens(TokenSource::new(tokens), source, source_type.as_mode())?;
let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
Ok(formatted.print()?)
}
Expand Down Expand Up @@ -169,7 +169,7 @@ mod tests {

use ruff_python_ast::PySourceType;
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::{parse_ok_tokens, AsMode};
use ruff_python_parser::{parse_tokens, AsMode, TokenSource};

use crate::{format_module_ast, format_module_source, PyFormatOptions};

Expand Down Expand Up @@ -213,7 +213,7 @@ def main() -> None:

// Parse the AST.
let source_path = "code_inline.py";
let module = parse_ok_tokens(tokens, source, source_type.as_mode()).unwrap();
let module = parse_tokens(TokenSource::new(tokens), source, source_type.as_mode()).unwrap();
let options = PyFormatOptions::from_extension(Path::new(source_path));
let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();

Expand Down
5 changes: 3 additions & 2 deletions crates/ruff_python_formatter/src/string/docstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use std::{borrow::Cow, collections::VecDeque};

use ruff_python_parser::ParseError;
use ruff_python_parser::{ParseError, TokenSource};
use {once_cell::sync::Lazy, regex::Regex};
use {
ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed},
Expand Down Expand Up @@ -1516,7 +1516,8 @@ fn docstring_format_source(
let source_type = options.source_type();
let (tokens, comment_ranges) =
ruff_python_index::tokens_and_ranges(source, source_type).map_err(ParseError::from)?;
let module = ruff_python_parser::parse_ok_tokens(tokens, source, source_type.as_mode())?;
let module =
ruff_python_parser::parse_tokens(TokenSource::new(tokens), source, source_type.as_mode())?;
let source_code = ruff_formatter::SourceCode::new(source);
let comments = crate::Comments::from_ast(&module, source_code, &comment_ranges);
let locator = Locator::new(source);
Expand Down
11 changes: 6 additions & 5 deletions crates/ruff_python_index/src/comment_ranges.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fmt::Debug;

use ruff_python_ast::PySourceType;
use ruff_python_parser::lexer::{lex, LexicalError};
use ruff_python_parser::lexer::{lex, LexResult, LexicalError};
use ruff_python_parser::{AsMode, Tok};
use ruff_python_trivia::CommentRanges;
use ruff_text_size::TextRange;
Expand All @@ -27,15 +27,16 @@ impl CommentRangesBuilder {
pub fn tokens_and_ranges(
source: &str,
source_type: PySourceType,
) -> Result<(Vec<(Tok, TextRange)>, CommentRanges), LexicalError> {
) -> Result<(Vec<LexResult>, CommentRanges), LexicalError> {
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();

for result in lex(source, source_type.as_mode()) {
let (token, range) = result?;
if let Ok((token, range)) = &result {
comment_ranges.visit_token(token, *range);
}

comment_ranges.visit_token(&token, range);
tokens.push((token, range));
tokens.push(result);
}

let comment_ranges = comment_ranges.finish();
Expand Down
4 changes: 2 additions & 2 deletions crates/ruff_python_parser/src/invalid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub(crate) fn assignment_target(target: &Expr) -> Result<(), LexicalError> {

#[cfg(test)]
mod tests {
use crate::parse_suite;
use crate::{parse_suite};

// First we test, broadly, that various kinds of assignments are now
// rejected by the parser. e.g., `5 = 3`, `5 += 3`, `(5): int = 3`.
Expand Down Expand Up @@ -687,7 +687,7 @@ mod tests {

let src = r"!foo = 42";
let tokens = crate::lexer::lex(src, Mode::Ipython);
let ast = crate::parse_tokens(tokens, src, Mode::Ipython);
let ast = crate::parse_tokens(tokens.collect(), src, Mode::Ipython);
insta::assert_debug_snapshot!(ast);
}

Expand Down
12 changes: 7 additions & 5 deletions crates/ruff_python_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@
//!
//! These tokens can be directly fed into the `ruff_python_parser` to generate an AST:
//!
//! ```
//! use ruff_python_parser::{lexer::lex, Mode, parse_tokens};
//!```
//! use ruff_python_parser::{lexer::lex, Mode, parse_tokens, TokenSource};
//!
//! let python_source = r#"
//! def is_odd(i):
//! return bool(i & 1)
//! "#;
//! let tokens = lex(python_source, Mode::Module);
//! let ast = parse_tokens(tokens, python_source, Mode::Module);
//! let ast = parse_tokens(TokenSource::from_iter(tokens), python_source, Mode::Module);
//!
//! assert!(ast.is_ok());
//! ```
Expand Down Expand Up @@ -118,6 +118,7 @@ pub use string::FStringErrorType;
pub use token::{StringKind, Tok, TokenKind};

use crate::lexer::LexResult;
pub use crate::token_source::TokenSource;

mod function;
// Skip flattening lexer to distinguish from full ruff_python_parser
Expand All @@ -128,6 +129,7 @@ mod parser;
mod soft_keywords;
mod string;
mod token;
mod token_source;
pub mod typing;

/// Collect tokens up to and including the first error.
Expand All @@ -145,7 +147,7 @@ pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {

/// Parse a full Python program from its tokens.
pub fn parse_program_tokens(
lxr: Vec<LexResult>,
tokens: TokenSource,
source: &str,
is_jupyter_notebook: bool,
) -> anyhow::Result<Suite, ParseError> {
Expand All @@ -154,7 +156,7 @@ pub fn parse_program_tokens(
} else {
Mode::Module
};
match parse_tokens(lxr, source, mode)? {
match parse_tokens(tokens, source, mode)? {
Mod::Module(m) => Ok(m.body),
Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
}
Expand Down
Loading

0 comments on commit 3112468

Please sign in to comment.