From 180920fdd932be84b691bfe954482f766a24972d Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 13 Feb 2024 09:56:08 +0530 Subject: [PATCH] Make semantic model aware of docstring (#9960) ## Summary This PR introduces a new semantic model flag `DOCSTRING` which suggests that the model is currently in a module / class / function docstring. This is the first step in eliminating the docstring detection state machine which is prone to bugs as stated in #7595. ## Test Plan ~TODO: Is there a way to add a test case for this?~ I tested this using the following code snippet and adding a print statement in the `string_like` analyzer to print if we're currently in a docstring or not.
Test code snippet:

```python "Docstring" ", still a docstring" "Not a docstring" def foo(): "Docstring" "Not a docstring" if foo: "Not a docstring" pass class Foo: "Docstring" "Not a docstring" foo: int "Unofficial variable docstring" def method(): "Docstring" "Not a docstring" pass def bar(): "Not a docstring".strip() def baz(): _something_else = 1 """Not a docstring""" ```

--- crates/ruff_linter/src/checkers/ast/mod.rs | 51 +++++++++++++++++++++- crates/ruff_python_semantic/src/model.rs | 25 +++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index ce3083f2b1b30..e858b53899e2f 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -40,7 +40,7 @@ use ruff_diagnostics::{Diagnostic, IsolationLevel}; use ruff_notebook::{CellOffsets, NotebookIndex}; use ruff_python_ast::all::{extract_all_names, DunderAllFlags}; use ruff_python_ast::helpers::{ - collect_import_from_member, extract_handled_exceptions, to_module_path, + collect_import_from_member, extract_handled_exceptions, is_docstring_stmt, to_module_path, }; use ruff_python_ast::identifier::Identifier; use ruff_python_ast::str::trailing_quote; @@ -71,6 +71,38 @@ mod analyze; mod annotation; mod deferred; +/// State representing whether a docstring is expected or not for the next statement. +#[derive(Default, Debug, Copy, Clone, PartialEq)] +enum DocstringState { + /// The next statement is expected to be a docstring, but not necessarily so. + /// + /// For example, in the following code: + /// + /// ```python + /// class Foo: + /// pass + /// + /// + /// def bar(x, y): + /// """Docstring.""" + /// return x + y + /// ``` + /// + /// For `Foo`, the state is expected when the checker is visiting the class + /// body but isn't going to be present. While, for `bar` function, the docstring + /// is expected and present. + #[default] + Expected, + Other, +} + +impl DocstringState { + /// Returns `true` if the next statement is expected to be a docstring. + const fn is_expected(self) -> bool { + matches!(self, DocstringState::Expected) + } +} + pub(crate) struct Checker<'a> { /// The [`Path`] to the file under analysis. path: &'a Path, @@ -114,6 +146,8 @@ pub(crate) struct Checker<'a> { pub(crate) flake8_bugbear_seen: Vec, /// The end offset of the last visited statement. last_stmt_end: TextSize, + /// A state describing if a docstring is expected or not. + docstring_state: DocstringState, } impl<'a> Checker<'a> { @@ -153,6 +187,7 @@ impl<'a> Checker<'a> { cell_offsets, notebook_index, last_stmt_end: TextSize::default(), + docstring_state: DocstringState::default(), } } } @@ -350,6 +385,16 @@ where // the node. let flags_snapshot = self.semantic.flags; + // Update the semantic model if it is in a docstring. This should be done after the + // flags snapshot to ensure that it gets reset once the statement is analyzed. + if self.docstring_state.is_expected() { + if is_docstring_stmt(stmt) { + self.semantic.flags |= SemanticModelFlags::DOCSTRING; + } + // Reset the state irrespective of whether the statement is a docstring or not. + self.docstring_state = DocstringState::Other; + } + // Step 1: Binding match stmt { Stmt::AugAssign(ast::StmtAugAssign { @@ -651,6 +696,8 @@ where self.semantic.set_globals(globals); } + // Set the docstring state before visiting the class body. + self.docstring_state = DocstringState::Expected; self.visit_body(body); } Stmt::TypeAlias(ast::StmtTypeAlias { @@ -1961,6 +2008,8 @@ impl<'a> Checker<'a> { }; self.visit_parameters(parameters); + // Set the docstring state before visiting the function body. + self.docstring_state = DocstringState::Expected; self.visit_body(body); } } diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index add977d754357..51a2607cd8125 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -1489,6 +1489,11 @@ impl<'a> SemanticModel<'a> { .intersects(SemanticModelFlags::TYPE_CHECKING_BLOCK) } + /// Return `true` if the model is in a docstring. + pub const fn in_docstring(&self) -> bool { + self.flags.intersects(SemanticModelFlags::DOCSTRING) + } + /// Return `true` if the model has traversed past the "top-of-file" import boundary. pub const fn seen_import_boundary(&self) -> bool { self.flags.intersects(SemanticModelFlags::IMPORT_BOUNDARY) @@ -1853,6 +1858,26 @@ bitflags! { /// ``` const COMPREHENSION_ASSIGNMENT = 1 << 19; + + /// The model is in a module / class / function docstring. + /// + /// For example, the model could be visiting either the module, class, + /// or function docstring in: + /// ```python + /// """Module docstring.""" + /// + /// + /// class Foo: + /// """Class docstring.""" + /// pass + /// + /// + /// def foo(): + /// """Function docstring.""" + /// pass + /// ``` + const DOCSTRING = 1 << 20; + /// The context is in any type annotation. const ANNOTATION = Self::TYPING_ONLY_ANNOTATION.bits() | Self::RUNTIME_EVALUATED_ANNOTATION.bits() | Self::RUNTIME_REQUIRED_ANNOTATION.bits();