Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pyupgrade] Do not report when a UTF-8 comment is followed by a non-UTF-8 one (UP009) #14728

Merged
merged 13 commits into from
Dec 11, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# -*- coding: ascii -*-
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# -*- coding: ascii -*-
# -*- coding: latin -*-
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# -*- coding: ascii -*-
# -*- coding: utf-8 -*-
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# -*- coding: utf8 -*-
print("the following is not a coding comment")
# -*- coding: ascii -*-
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
# -*- coding: ascii -*-
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# -*- coding: ascii -*-
10 changes: 10 additions & 0 deletions crates/ruff_linter/src/rules/pyupgrade/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@ mod tests {
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_8.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_9.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_10.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_other.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_utf8.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_other.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8_other.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_code_other.py"))]
#[test_case(
Rule::UTF8EncodingDeclaration,
Path::new("UP009_hashbang_utf8_other.py")
)]
#[test_case(Rule::UnicodeKindPrefix, Path::new("UP025.py"))]
#[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"))]
#[test_case(Rule::UnnecessaryClassParentheses, Path::new("UP039.py"))]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_index::Indexer;
use ruff_python_trivia::CommentRanges;
use ruff_source_file::LineRanges;
use ruff_text_size::{Ranged, TextRange};
use ruff_text_size::TextRange;

use crate::Locator;

Expand Down Expand Up @@ -46,7 +46,17 @@ impl AlwaysFixableViolation for UTF8EncodingDeclaration {

// Regex from PEP263.
static CODING_COMMENT_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*utf-?8").unwrap());
LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*(?<name>[-_.a-zA-Z0-9]+)").unwrap());

enum CodingComment {
UTF8(CodingCommentRanges),
Other,
}

struct CodingCommentRanges {
self_range: TextRange,
line_range: TextRange,
}

/// UP009
pub(crate) fn unnecessary_coding_comment(
Expand All @@ -55,48 +65,92 @@ pub(crate) fn unnecessary_coding_comment(
indexer: &Indexer,
comment_ranges: &CommentRanges,
) {
// The coding comment must be on one of the first two lines. Since each comment spans at least
// one line, we only need to check the first two comments at most.
for comment_range in comment_ranges.iter().take(2) {
// If leading content is not whitespace then it's not a valid coding comment e.g.
// ```
// print(x) # coding=utf8
// ```
let line_range = locator.full_line_range(comment_range.start());
if !locator
.slice(TextRange::new(line_range.start(), comment_range.start()))
.trim()
.is_empty()
{
continue;
}
// The coding comment must be on one of the first two lines.
// Since each comment spans at least one line,
// we only need to check the first two comments,
// plus a third to make sure it would not become a new coding comment.
let mut coding_comments = comment_ranges
.iter()
.take(3)
.map(|comment_range| coding_comment(locator, indexer, *comment_range));

// If the line is after a continuation then it's not a valid coding comment e.g.
// ```
// x = 1 \
// # coding=utf8
// x = 2
// ```
if indexer
.preceded_by_continuations(line_range.start(), locator.contents())
.is_some()
{
continue;
}
let first = coding_comments.next().flatten();
let second = coding_comments.next().flatten();
let third = coding_comments.next().flatten();

if CODING_COMMENT_REGEX.is_match(locator.slice(line_range)) {
#[allow(deprecated)]
let index = locator.compute_line_index(line_range.start());
if index.to_zero_indexed() > 1 {
continue;
}

let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, *comment_range);
diagnostic.set_fix(Fix::safe_edit(Edit::deletion(
line_range.start(),
line_range.end(),
)));
diagnostics.push(diagnostic);
// Table: https://github.com/astral-sh/ruff/pull/14728#issuecomment-2518114454
match [first, second, third] {
[Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..)), _]
| [None, Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..))] => {
report(diagnostics, ranges.line_range, ranges.self_range);
}
_ => {}
}
}

fn coding_comment(
locator: &Locator,
indexer: &Indexer,
self_range: TextRange,
) -> Option<CodingComment> {
// If leading content is not whitespace then it's not a valid coding comment e.g.
// ```
// print(x) # coding=utf8
// ```
let line_range = locator.full_line_range(self_range.start());
if !locator
.slice(TextRange::new(line_range.start(), self_range.start()))
.trim()
.is_empty()
{
return None;
}

// If the line is after a continuation then it's not a valid coding comment e.g.
// ```
// x = 1 \
// # coding=utf8
// x = 2
// ```
if indexer
.preceded_by_continuations(line_range.start(), locator.contents())
.is_some()
{
return None;
}

let part_of_interest = CODING_COMMENT_REGEX.captures(locator.slice(line_range))?;
let coding_name = part_of_interest.name("name")?.as_str();

let line_index = locator.count_lines_until(line_range.start());

// Aside from the first two lines,
// we also need to check the third for overridden coding comments:
// ```
// #!/usr/bin/python
// # -*- coding: utf-8 -*-
// # -*- coding: ascii -*-
// ```
if line_index > 2 {
return None;
}

let ranges = CodingCommentRanges {
self_range,
line_range,
};

match coding_name {
"utf8" | "utf-8" => Some(CodingComment::UTF8(ranges)),
_ => Some(CodingComment::Other),
}
}

fn report(diagnostics: &mut Vec<Diagnostic>, line_range: TextRange, comment_range: TextRange) {
let edit = Edit::deletion(line_range.start(), line_range.end());
let fix = Fix::safe_edit(edit);

let diagnostic = Diagnostic::new(UTF8EncodingDeclaration, comment_range);

diagnostics.push(diagnostic.with_fix(fix));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
snapshot_kind: text
---

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
snapshot_kind: text
---

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
snapshot_kind: text
---

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
snapshot_kind: text
---

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
snapshot_kind: text
---

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
snapshot_kind: text
---
UP009_utf8_utf8.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
1 | # -*- coding: utf-8 -*-
| ^^^^^^^^^^^^^^^^^^^^^^^ UP009
2 | # -*- coding: utf-8 -*-
|
= help: Remove unnecessary coding comment

ℹ Safe fix
1 1 | # -*- coding: utf-8 -*-
2 |-# -*- coding: utf-8 -*-
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
snapshot_kind: text
---
UP009_utf8_utf8_other.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
1 | # -*- coding: utf-8 -*-
| ^^^^^^^^^^^^^^^^^^^^^^^ UP009
2 | # -*- coding: utf-8 -*-
3 | # -*- coding: ascii -*-
|
= help: Remove unnecessary coding comment

ℹ Safe fix
1 1 | # -*- coding: utf-8 -*-
2 |-# -*- coding: utf-8 -*-
3 2 | # -*- coding: ascii -*-
40 changes: 40 additions & 0 deletions crates/ruff_source_file/src/line_ranges.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,46 @@ pub trait LineRanges {
/// ## Panics
/// If the start or end of `range` is out of bounds.
fn full_lines_str(&self, range: TextRange) -> &str;

/// Returns the zero-based index of the line containing `range`'s start.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
/// # use ruff_source_file::LineRanges;
///
/// let text = "First line\nsecond line\r\nthird line";
///
/// assert_eq!(text.count_lines_until(TextSize::from(5)), 0);
/// assert_eq!(text.count_lines_until(TextSize::from(23)), 1);
/// assert_eq!(text.count_lines_until(TextSize::from(24)), 2);
/// assert_eq!(text.count_lines_until(TextSize::from(34)), 3);
///
/// let text = "foo\n";
///
/// assert_eq!(text.count_lines_until(TextSize::from(4)), 1);
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
fn count_lines_until(&self, offset: TextSize) -> u32 {
let mut count = 0;
let mut last_line_end = TextSize::default();

loop {
let line_end = self.full_line_end(last_line_end);

if line_end <= offset && line_end != last_line_end {
count += 1;
last_line_end = line_end;
} else {
break;
}
}

count
}
}

impl LineRanges for str {
Expand Down
Loading