Skip to content

Commit

Permalink
Ignore overlong pragma comments when enforcing linter line length (#7692
Browse files Browse the repository at this point in the history
)

## Summary

This PR modifies the `line-too-long` and `doc-line-too-long` rules to
ignore lines that are too long due to the presence of a pragma comment
(e.g., `# type: ignore` or `# noqa`). That is, if a line only exceeds
the limit due to the pragma comment, it will no longer be flagged as
"too long". This behavior mirrors that of the formatter, thus ensuring
that we don't flag lines under E501 that the formatter would otherwise
avoid wrapping.

As a concrete example, given a line length of 88, the following would
_no longer_ be considered an E501 violation:

```python
# The string literal is 88 characters, including quotes.
"shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:sh"  # type: ignore
```

This, however, would:

```python
# The string literal is 89 characters, including quotes.
"shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:shape:sha"  # type: ignore
```

In addition to mirroring the formatter, this also means that adding a
pragma comment (like `# noqa`) won't _cause_ additional violations to
appear (namely, E501). It's very common for users to add a `# type:
ignore` or similar to a line, only to find that they then have to add a
suppression comment _after_ it that was required before, as in `# type:
ignore # noqa: E501`.

Closes #7471.

## Test Plan

`cargo test`
  • Loading branch information
charliermarsh authored Sep 29, 2023
1 parent b519b56 commit 1646939
Show file tree
Hide file tree
Showing 11 changed files with 296 additions and 134 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# TODO: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# TODO comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME: comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
# FIXME(charlie): comments starting with one of the configured task-tags sometimes are longer than line-length so that you can easily find them with `git grep`
11 changes: 11 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa" # type: ignore

# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa"# type: ignore

# OK (88 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaa" # type: ignore

# Error (89 characters)
"shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:" + "shape:aaaa" # type: ignore
4 changes: 2 additions & 2 deletions crates/ruff_linter/src/checkers/physical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub(crate) fn check_physical_lines(
.is_some()
{
if enforce_doc_line_too_long {
if let Some(diagnostic) = doc_line_too_long(&line, settings) {
if let Some(diagnostic) = doc_line_too_long(&line, indexer, settings) {
diagnostics.push(diagnostic);
}
}
Expand All @@ -54,7 +54,7 @@ pub(crate) fn check_physical_lines(
}

if enforce_line_too_long {
if let Some(diagnostic) = line_too_long(&line, settings) {
if let Some(diagnostic) = line_too_long(&line, indexer, settings) {
diagnostics.push(diagnostic);
}
}
Expand Down
85 changes: 2 additions & 83 deletions crates/ruff_linter/src/rules/pycodestyle/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
use unicode_width::UnicodeWidthStr;

use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::parenthesize::parenthesized_range;
use ruff_python_ast::{CmpOp, Expr};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::{Line, Locator};
use ruff_text_size::{Ranged, TextLen, TextRange};

use crate::line_width::{LineLength, LineWidthBuilder, TabSize};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;

pub(super) fn is_ambiguous_name(name: &str) -> bool {
name == "l" || name == "I" || name == "O"
Expand Down Expand Up @@ -64,80 +60,3 @@ pub(super) fn generate_comparison(

contents
}

pub(super) fn is_overlong(
line: &Line,
limit: LineLength,
ignore_overlong_task_comments: bool,
task_tags: &[String],
tab_size: TabSize,
) -> Option<Overlong> {
// The maximum width of the line is the number of bytes multiplied by the tab size (the
// worst-case scenario is that the line is all tabs). If the maximum width is less than the
// limit, then the line is not overlong.
let max_width = line.len() * tab_size.as_usize();
if max_width < limit.value() as usize {
return None;
}

let mut width = LineWidthBuilder::new(tab_size);
width = width.add_str(line.as_str());
if width <= limit {
return None;
}

let mut chunks = line.split_whitespace();
let (Some(first_chunk), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
// Single word / no printable chars - no way to make the line shorter
return None;
};

if first_chunk == "#" {
if ignore_overlong_task_comments {
let second = second_chunk.trim_end_matches(':');
if task_tags.iter().any(|task_tag| task_tag == second) {
return None;
}
}
}

// Do not enforce the line length for lines that end with a URL, as long as the URL
// begins before the limit.
let last_chunk = chunks.last().unwrap_or(second_chunk);
if last_chunk.contains("://") {
if width.get() - last_chunk.width() <= limit.value() as usize {
return None;
}
}

// Obtain the start offset of the part of the line that exceeds the limit
let mut start_offset = line.start();
let mut start_width = LineWidthBuilder::new(tab_size);
for c in line.chars() {
if start_width < limit {
start_offset += c.text_len();
start_width = start_width.add_char(c);
} else {
break;
}
}
Some(Overlong {
range: TextRange::new(start_offset, line.end()),
width: width.get(),
})
}

pub(super) struct Overlong {
range: TextRange,
width: usize,
}

impl Overlong {
pub(super) const fn range(&self) -> TextRange {
self.range
}

pub(super) const fn width(&self) -> usize {
self.width
}
}
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub(crate) mod rules;
pub mod settings;

pub(crate) mod helpers;
pub(super) mod overlong;

#[cfg(test)]
mod tests {
Expand Down Expand Up @@ -30,6 +31,7 @@ mod tests {
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501_3.py"))]
#[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
#[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402.py"))]
Expand Down
165 changes: 165 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/overlong.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
use std::ops::Deref;

use unicode_width::UnicodeWidthStr;

use ruff_python_index::Indexer;
use ruff_python_trivia::is_pragma_comment;
use ruff_source_file::Line;
use ruff_text_size::{TextLen, TextRange};

use crate::line_width::{LineLength, LineWidthBuilder, TabSize};

#[derive(Debug)]
pub(super) struct Overlong {
range: TextRange,
width: usize,
}

impl Overlong {
/// Returns an [`Overlong`] if the measured line exceeds the configured line length, or `None`
/// otherwise.
pub(super) fn try_from_line(
line: &Line,
indexer: &Indexer,
limit: LineLength,
task_tags: &[String],
tab_size: TabSize,
) -> Option<Self> {
// The maximum width of the line is the number of bytes multiplied by the tab size (the
// worst-case scenario is that the line is all tabs). If the maximum width is less than the
// limit, then the line is not overlong.
let max_width = line.len() * tab_size.as_usize();
if max_width < limit.value() as usize {
return None;
}

// Measure the line. If it's already below the limit, exit early.
let width = measure(line.as_str(), tab_size);
if width <= limit {
return None;
}

// Strip trailing comments and re-measure the line, if needed.
let line = StrippedLine::from_line(line, indexer, task_tags);
let width = match &line {
StrippedLine::WithoutPragma(line) => {
let width = measure(line.as_str(), tab_size);
if width <= limit {
return None;
}
width
}
StrippedLine::Unchanged(_) => width,
};

let mut chunks = line.split_whitespace();
let (Some(_), Some(second_chunk)) = (chunks.next(), chunks.next()) else {
// Single word / no printable chars - no way to make the line shorter.
return None;
};

// Do not enforce the line length for lines that end with a URL, as long as the URL
// begins before the limit.
let last_chunk = chunks.last().unwrap_or(second_chunk);
if last_chunk.contains("://") {
if width.get() - last_chunk.width() <= limit.value() as usize {
return None;
}
}

// Obtain the start offset of the part of the line that exceeds the limit.
let mut start_offset = line.start();
let mut start_width = LineWidthBuilder::new(tab_size);
for c in line.chars() {
if start_width < limit {
start_offset += c.text_len();
start_width = start_width.add_char(c);
} else {
break;
}
}

Some(Self {
range: TextRange::new(start_offset, line.end()),
width: width.get(),
})
}

/// Return the range of the overlong portion of the line.
pub(super) const fn range(&self) -> TextRange {
self.range
}

/// Return the measured width of the line, without any trailing pragma comments.
pub(super) const fn width(&self) -> usize {
self.width
}
}

/// A [`Line`] that may have trailing pragma comments stripped.
#[derive(Debug)]
enum StrippedLine<'a> {
/// The [`Line`] was unchanged.
Unchanged(&'a Line<'a>),
/// The [`Line`] was changed such that a trailing pragma comment (e.g., `# type: ignore`) was
/// removed. The stored [`Line`] consists of the portion of the original line that precedes the
/// pragma comment.
WithoutPragma(Line<'a>),
}

impl<'a> StrippedLine<'a> {
/// Strip trailing comments from a [`Line`], if the line ends with a pragma comment (like
/// `# type: ignore`) or, if necessary, a task comment (like `# TODO`).
fn from_line(line: &'a Line<'a>, indexer: &Indexer, task_tags: &[String]) -> Self {
let [comment_range] = indexer.comment_ranges().comments_in_range(line.range()) else {
return Self::Unchanged(line);
};

// Convert from absolute to relative range.
let comment_range = comment_range - line.start();
let comment = &line.as_str()[comment_range];

// Ex) `# type: ignore`
if is_pragma_comment(comment) {
// Remove the pragma from the line.
let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
return Self::WithoutPragma(Line::new(prefix, line.start()));
}

// Ex) `# TODO(charlie): ...`
if !task_tags.is_empty() {
let Some(trimmed) = comment.strip_prefix('#') else {
return Self::Unchanged(line);
};
let trimmed = trimmed.trim_start();
if task_tags
.iter()
.any(|task_tag| trimmed.starts_with(task_tag))
{
// Remove the task tag from the line.
let prefix = &line.as_str()[..usize::from(comment_range.start())].trim_end();
return Self::WithoutPragma(Line::new(prefix, line.start()));
}
}

Self::Unchanged(line)
}
}

impl<'a> Deref for StrippedLine<'a> {
type Target = Line<'a>;

fn deref(&self) -> &Self::Target {
match self {
Self::Unchanged(line) => line,
Self::WithoutPragma(line) => line,
}
}
}

/// Returns the width of a given string, accounting for the tab size.
fn measure(s: &str, tab_size: TabSize) -> LineWidthBuilder {
let mut width = LineWidthBuilder::new(tab_size);
width = width.add_str(s);
width
}
36 changes: 26 additions & 10 deletions crates/ruff_linter/src/rules/pycodestyle/rules/doc_line_too_long.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_source_file::Line;

use crate::rules::pycodestyle::helpers::is_overlong;
use crate::rules::pycodestyle::overlong::Overlong;
use crate::settings::LinterSettings;

/// ## What it does
Expand All @@ -19,10 +20,17 @@ use crate::settings::LinterSettings;
/// of either a standalone comment or a standalone string, like a docstring.
///
/// In the interest of pragmatism, this rule makes a few exceptions when
/// determining whether a line is overlong. Namely, it ignores lines that
/// consist of a single "word" (i.e., without any whitespace between its
/// characters), and lines that end with a URL (as long as the URL starts
/// before the line-length threshold).
/// determining whether a line is overlong. Namely, it:
///
/// 1. Ignores lines that consist of a single "word" (i.e., without any
/// whitespace between its characters).
/// 2. Ignores lines that end with a URL, as long as the URL starts before
/// the line-length threshold.
/// 3. Ignores line that end with a pragma comment (e.g., `# type: ignore`
/// or `# noqa`), as long as the pragma comment starts before the
/// line-length threshold. That is, a line will not be flagged as
/// overlong if a pragma comment _causes_ it to exceed the line length.
/// (This behavior aligns with that of the Ruff formatter.)
///
/// If [`pycodestyle.ignore-overlong-task-comments`] is `true`, this rule will
/// also ignore comments that start with any of the specified [`task-tags`]
Expand Down Expand Up @@ -50,7 +58,7 @@ use crate::settings::LinterSettings;
///
/// [PEP 8]: https://peps.python.org/pep-0008/#maximum-line-length
#[violation]
pub struct DocLineTooLong(pub usize, pub usize);
pub struct DocLineTooLong(usize, usize);

impl Violation for DocLineTooLong {
#[derive_message_formats]
Expand All @@ -61,16 +69,24 @@ impl Violation for DocLineTooLong {
}

/// W505
pub(crate) fn doc_line_too_long(line: &Line, settings: &LinterSettings) -> Option<Diagnostic> {
pub(crate) fn doc_line_too_long(
line: &Line,
indexer: &Indexer,
settings: &LinterSettings,
) -> Option<Diagnostic> {
let Some(limit) = settings.pycodestyle.max_doc_length else {
return None;
};

is_overlong(
Overlong::try_from_line(
line,
indexer,
limit,
settings.pycodestyle.ignore_overlong_task_comments,
&settings.task_tags,
if settings.pycodestyle.ignore_overlong_task_comments {
&settings.task_tags
} else {
&[]
},
settings.tab_size,
)
.map(|overlong| {
Expand Down
Loading

0 comments on commit 1646939

Please sign in to comment.