From f4e23d2dffb9ed798a8e98d996b786ee1def659b Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 3 Jun 2024 18:34:03 +0530 Subject: [PATCH] Use string expression for parsing type annotation (#11717) ## Summary This PR updates the logic for parsing type annotation to accept a `ExprStringLiteral` node instead of the string value and the range. The main motivation of this change is to simplify the implementation of `parse_type_annotation` function with: * Use the `opener_len` and `closer_len` from the string flags to get the raw contents range instead of extracting it via * `str::leading_quote(expression).unwrap().text_len()` * `str::trailing_quote(expression).unwrap().text_len()` * Avoid comparing the string content if we already know that it's implicitly concatenated ## Test Plan `cargo insta test` --- .../ruff_linter/src/checkers/ast/deferred.rs | 5 +- crates/ruff_linter/src/checkers/ast/mod.rs | 41 ++++++----- .../flake8_annotations/rules/definition.rs | 4 +- .../src/rules/ruff/rules/implicit_optional.rs | 8 +-- crates/ruff_linter/src/rules/ruff/typing.rs | 4 +- crates/ruff_python_parser/src/typing.rs | 72 ++++++++++++------- 6 files changed, 78 insertions(+), 56 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/deferred.rs b/crates/ruff_linter/src/checkers/ast/deferred.rs index 7f390e7afd577..01043e77d4505 100644 --- a/crates/ruff_linter/src/checkers/ast/deferred.rs +++ b/crates/ruff_linter/src/checkers/ast/deferred.rs @@ -1,13 +1,12 @@ -use ruff_python_ast::Expr; +use ruff_python_ast::{Expr, ExprStringLiteral}; use ruff_python_semantic::{ScopeId, Snapshot}; -use ruff_text_size::TextRange; /// A collection of AST nodes that are deferred for later visitation. Used to, e.g., store /// functions, whose bodies shouldn't be visited until all module-level definitions have been /// visited. #[derive(Debug, Default)] pub(crate) struct Visit<'a> { - pub(crate) string_type_definitions: Vec<(TextRange, &'a str, Snapshot)>, + pub(crate) string_type_definitions: Vec<(&'a ExprStringLiteral, Snapshot)>, pub(crate) future_type_definitions: Vec<(&'a Expr, Snapshot)>, pub(crate) type_param_definitions: Vec<(&'a Expr, Snapshot)>, pub(crate) functions: Vec, diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index 5f26244df7fff..73b3d607ed878 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -1011,12 +1011,10 @@ impl<'a> Visitor<'a> for Checker<'a> { && self.semantic.future_annotations_or_stub() && (self.semantic.in_annotation() || self.source_type.is_stub()) { - if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = expr { - self.visit.string_type_definitions.push(( - expr.range(), - value.to_str(), - self.semantic.snapshot(), - )); + if let Expr::StringLiteral(string_literal) = expr { + self.visit + .string_type_definitions + .push((string_literal, self.semantic.snapshot())); } else { self.visit .future_type_definitions @@ -1426,13 +1424,11 @@ impl<'a> Visitor<'a> for Checker<'a> { } } } - Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => { + Expr::StringLiteral(string_literal) => { if self.semantic.in_type_definition() && !self.semantic.in_typing_literal() { - self.visit.string_type_definitions.push(( - expr.range(), - value.to_str(), - self.semantic.snapshot(), - )); + self.visit + .string_type_definitions + .push((string_literal, self.semantic.snapshot())); } } Expr::FString(_) => { @@ -2156,22 +2152,25 @@ impl<'a> Checker<'a> { let snapshot = self.semantic.snapshot(); while !self.visit.string_type_definitions.is_empty() { let type_definitions = std::mem::take(&mut self.visit.string_type_definitions); - for (range, value, snapshot) in type_definitions { - if let Ok((expr, kind)) = - parse_type_annotation(value, range, self.locator.contents()) + for (string_expr, snapshot) in type_definitions { + if let Ok((parsed_annotation, kind)) = + parse_type_annotation(string_expr, self.locator.contents()) { - let expr = allocator.alloc(expr); + let parsed_annotation = allocator.alloc(parsed_annotation); + + let annotation = string_expr.value.to_str(); + let range = string_expr.range(); self.semantic.restore(snapshot); if self.semantic.in_annotation() && self.semantic.in_typing_only_annotation() { if self.enabled(Rule::QuotedAnnotation) { - pyupgrade::rules::quoted_annotation(self, value, range); + pyupgrade::rules::quoted_annotation(self, annotation, range); } } if self.source_type.is_stub() { if self.enabled(Rule::QuotedAnnotationInStub) { - flake8_pyi::rules::quoted_annotation_in_stub(self, value, range); + flake8_pyi::rules::quoted_annotation_in_stub(self, annotation, range); } } @@ -2184,14 +2183,14 @@ impl<'a> Checker<'a> { self.semantic.flags |= SemanticModelFlags::TYPE_DEFINITION | type_definition_flag; - self.visit_expr(expr); + self.visit_expr(parsed_annotation); } else { if self.enabled(Rule::ForwardAnnotationSyntaxError) { self.diagnostics.push(Diagnostic::new( pyflakes::rules::ForwardAnnotationSyntaxError { - body: value.to_string(), + body: string_expr.value.to_string(), }, - range, + string_expr.range(), )); } } diff --git a/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs b/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs index 25b0119a656af..ad9e5706624a6 100644 --- a/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs +++ b/crates/ruff_linter/src/rules/flake8_annotations/rules/definition.rs @@ -512,10 +512,10 @@ fn check_dynamically_typed( ) where F: FnOnce() -> String, { - if let Expr::StringLiteral(ast::ExprStringLiteral { range, value }) = annotation { + if let Expr::StringLiteral(string_expr) = annotation { // Quoted annotations if let Ok((parsed_annotation, _)) = - parse_type_annotation(value.to_str(), *range, checker.locator().contents()) + parse_type_annotation(string_expr, checker.locator().contents()) { if type_hint_resolves_to_any( &parsed_annotation, diff --git a/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs b/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs index 180409066f05a..b2012a0111123 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/implicit_optional.rs @@ -177,13 +177,13 @@ pub(crate) fn implicit_optional(checker: &mut Checker, parameters: &Parameters) continue; }; - if let Expr::StringLiteral(ast::ExprStringLiteral { range, value }) = annotation.as_ref() { + if let Expr::StringLiteral(string_expr) = annotation.as_ref() { // Quoted annotation. - if let Ok((annotation, kind)) = - parse_type_annotation(value.to_str(), *range, checker.locator().contents()) + if let Ok((parsed_annotation, kind)) = + parse_type_annotation(string_expr, checker.locator().contents()) { let Some(expr) = type_hint_explicitly_allows_none( - &annotation, + &parsed_annotation, checker.semantic(), checker.locator(), checker.settings.target_version.minor(), diff --git a/crates/ruff_linter/src/rules/ruff/typing.rs b/crates/ruff_linter/src/rules/ruff/typing.rs index 7668a18ebac06..f3422f29abce9 100644 --- a/crates/ruff_linter/src/rules/ruff/typing.rs +++ b/crates/ruff_linter/src/rules/ruff/typing.rs @@ -112,8 +112,8 @@ impl<'a> TypingTarget<'a> { .. }) => Some(TypingTarget::PEP604Union(left, right)), Expr::NoneLiteral(_) => Some(TypingTarget::None), - Expr::StringLiteral(ast::ExprStringLiteral { value, range }) => { - parse_type_annotation(value.to_str(), *range, locator.contents()) + Expr::StringLiteral(string_expr) => { + parse_type_annotation(string_expr, locator.contents()) .map_or(None, |(expr, _)| Some(TypingTarget::ForwardReference(expr))) } _ => semantic.resolve_qualified_name(expr).map_or( diff --git a/crates/ruff_python_parser/src/typing.rs b/crates/ruff_python_parser/src/typing.rs index 02ebf3243c0b3..a848d538dc4b1 100644 --- a/crates/ruff_python_parser/src/typing.rs +++ b/crates/ruff_python_parser/src/typing.rs @@ -3,8 +3,9 @@ use anyhow::Result; use ruff_python_ast::relocate::relocate_expr; -use ruff_python_ast::{str, Expr}; -use ruff_text_size::{TextLen, TextRange}; +use ruff_python_ast::str::raw_contents; +use ruff_python_ast::{Expr, ExprStringLiteral, StringFlags, StringLiteral}; +use ruff_text_size::Ranged; use crate::{parse_expression, parse_expression_range}; @@ -16,37 +17,60 @@ pub enum AnnotationKind { /// expressions within the annotation and apply automatic fixes, which is /// not possible for complex string literals. Simple, + /// The annotation is defined as part of a complex string literal, such as /// a literal containing an implicit concatenation or escaped characters, /// e.g. `x: "List" "[int]" = []`. These are comparatively rare, but valid. Complex, } -/// Parses the value of a string literal node (`parsed_contents`) with `range` as a type -/// annotation. The given `source` is the entire source code. +/// Parses the given string expression node as a type annotation. The given `source` is the entire +/// source code. pub fn parse_type_annotation( - parsed_contents: &str, - range: TextRange, + string_expr: &ExprStringLiteral, source: &str, ) -> Result<(Expr, AnnotationKind)> { - let expression = &source[range]; - - if str::raw_contents(expression).is_some_and(|raw_contents| raw_contents == parsed_contents) { - // The annotation is considered "simple" if and only if the raw representation (e.g., - // `List[int]` within "List[int]") exactly matches the parsed representation. This - // isn't the case, e.g., for implicit concatenations, or for annotations that contain - // escaped quotes. - let leading_quote_len = str::leading_quote(expression).unwrap().text_len(); - let trailing_quote_len = str::trailing_quote(expression).unwrap().text_len(); - let range = range - .add_start(leading_quote_len) - .sub_end(trailing_quote_len); - let expr = parse_expression_range(source, range)?.into_expr(); - Ok((expr, AnnotationKind::Simple)) + let expr_text = &source[string_expr.range()]; + + if let [string_literal] = string_expr.value.as_slice() { + // Compare the raw contents (without quotes) of the expression with the parsed contents + // contained in the string literal. + if raw_contents(expr_text) + .is_some_and(|raw_contents| raw_contents == string_literal.as_str()) + { + parse_simple_type_annotation(string_literal, source) + } else { + // The raw contents of the string doesn't match the parsed content. This could be the + // case for annotations that contain escaped quotes. + parse_complex_type_annotation(string_expr) + } } else { - // Otherwise, consider this a "complex" annotation. - let mut expr = parse_expression(parsed_contents)?.into_expr(); - relocate_expr(&mut expr, range); - Ok((expr, AnnotationKind::Complex)) + // String is implicitly concatenated. + parse_complex_type_annotation(string_expr) } } + +fn parse_simple_type_annotation( + string_literal: &StringLiteral, + source: &str, +) -> Result<(Expr, AnnotationKind)> { + Ok(( + parse_expression_range( + source, + string_literal + .range() + .add_start(string_literal.flags.opener_len()) + .sub_end(string_literal.flags.closer_len()), + )? + .into_expr(), + AnnotationKind::Simple, + )) +} + +fn parse_complex_type_annotation( + string_expr: &ExprStringLiteral, +) -> Result<(Expr, AnnotationKind)> { + let mut parsed = parse_expression(string_expr.value.to_str())?.into_expr(); + relocate_expr(&mut parsed, string_expr.range()); + Ok((parsed, AnnotationKind::Complex)) +}