astral-sh · MichaReiser · Dec 11, 2024 · Dec 2, 2024 · Dec 2, 2024 · Dec 3, 2024
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_other.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_other.py
@@ -0,0 +1,2 @@
+# -*- coding: ascii -*-
+# -*- coding: latin -*-
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_utf8.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_utf8.py
@@ -0,0 +1,2 @@
+# -*- coding: ascii -*-
+# -*- coding: utf-8 -*-
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_other.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_other.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+# -*- coding: ascii -*-
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8_other.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8_other.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
+# -*- coding: ascii -*-
diff --git a/crates/ruff_linter/src/rules/pyupgrade/mod.rs b/crates/ruff_linter/src/rules/pyupgrade/mod.rs
@@ -77,6 +77,11 @@ mod tests {
     #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_8.py"))]
     #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_9.py"))]
     #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_10.py"))]
+    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_other.py"))]
+    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_utf8.py"))]
+    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_other.py"))]
+    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8.py"))]
+    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8_other.py"))]
     #[test_case(Rule::UnicodeKindPrefix, Path::new("UP025.py"))]
     #[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"))]
     #[test_case(Rule::UnnecessaryClassParentheses, Path::new("UP039.py"))]

diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs
@@ -7,7 +7,7 @@ use ruff_macros::{derive_message_formats, ViolationMetadata};
 use ruff_python_index::Indexer;
 use ruff_python_trivia::CommentRanges;
 use ruff_source_file::LineRanges;
-use ruff_text_size::{Ranged, TextRange};
+use ruff_text_size::TextRange;
 
 use crate::Locator;
 
@@ -46,7 +46,17 @@ impl AlwaysFixableViolation for UTF8EncodingDeclaration {
 
 // Regex from PEP263.
 static CODING_COMMENT_REGEX: LazyLock<Regex> =
-    LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*utf-?8").unwrap());
+    LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*(?<name>[-_.a-zA-Z0-9]+)").unwrap());
+
+enum CodingComment {
+    UTF8(CodingCommentRanges),
+    Other,
+}
+
+struct CodingCommentRanges {
+    self_range: TextRange,
+    line_range: TextRange,
+}
 
 /// UP009
 pub(crate) fn unnecessary_coding_comment(
@@ -57,46 +67,80 @@ pub(crate) fn unnecessary_coding_comment(
 ) {
     // The coding comment must be on one of the first two lines. Since each comment spans at least
     // one line, we only need to check the first two comments at most.
-    for comment_range in comment_ranges.iter().take(2) {
-        // If leading content is not whitespace then it's not a valid coding comment e.g.
-        // ```
-        // print(x) # coding=utf8
-        // ```
-        let line_range = locator.full_line_range(comment_range.start());
-        if !locator
-            .slice(TextRange::new(line_range.start(), comment_range.start()))
-            .trim()
-            .is_empty()
-        {
-            continue;
-        }
+    let mut coding_comments = comment_ranges
+        .iter()
+        .take(2)
+        .map(|comment_range| coding_comment(locator, indexer, *comment_range));
 
-        // If the line is after a continuation then it's not a valid coding comment e.g.
-        // ```
-        // x = 1 \
-        //    # coding=utf8
-        // x = 2
-        // ```
-        if indexer
-            .preceded_by_continuations(line_range.start(), locator.contents())
-            .is_some()
-        {
-            continue;
-        }
+    let first = coding_comments.next().flatten();
+    let second = coding_comments.next().flatten();
 
-        if CODING_COMMENT_REGEX.is_match(locator.slice(line_range)) {
-            #[allow(deprecated)]
-            let index = locator.compute_line_index(line_range.start());
-            if index.to_zero_indexed() > 1 {
-                continue;
-            }
-
-            let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, *comment_range);
-            diagnostic.set_fix(Fix::safe_edit(Edit::deletion(
-                line_range.start(),
-                line_range.end(),
-            )));
-            diagnostics.push(diagnostic);
+    match [first, second] {
+        [Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..))]
+        | [None, Some(CodingComment::UTF8(ranges))] => {
+            report(diagnostics, ranges.line_range, ranges.self_range);
         }
+
+        _ => {}
     }
 }
+
+fn coding_comment(
+    locator: &Locator,
+    indexer: &Indexer,
+    self_range: TextRange,
+) -> Option<CodingComment> {
+    // If leading content is not whitespace then it's not a valid coding comment e.g.
+    // ```
+    // print(x) # coding=utf8
+    // ```
+    let line_range = locator.full_line_range(self_range.start());
+    if !locator
+        .slice(TextRange::new(line_range.start(), self_range.start()))
+        .trim()
+        .is_empty()
+    {
+        return None;
+    }
+
+    // If the line is after a continuation then it's not a valid coding comment e.g.
+    // ```
+    // x = 1 \
+    //    # coding=utf8
+    // x = 2
+    // ```
+    if indexer
+        .preceded_by_continuations(line_range.start(), locator.contents())
+        .is_some()
+    {
+        return None;
+    }
+
+    let part_of_interest = CODING_COMMENT_REGEX.captures(locator.slice(line_range))?;
+    let coding_name = part_of_interest.name("name")?.as_str();
+
+    let line_index = locator.count_lines_until(line_range.start());
+
+    if line_index > 1 {
+        return None;
+    }
+
+    let ranges = CodingCommentRanges {
+        self_range,
+        line_range,
+    };
+
+    match coding_name {
+        "utf8" | "utf-8" => Some(CodingComment::UTF8(ranges)),
+        _ => Some(CodingComment::Other),
+    }
+}
+
+fn report(diagnostics: &mut Vec<Diagnostic>, line_range: TextRange, comment_range: TextRange) {
+    let edit = Edit::deletion(line_range.start(), line_range.end());
+    let fix = Fix::safe_edit(edit);
+
+    let diagnostic = Diagnostic::new(UTF8EncodingDeclaration, comment_range);
+
+    diagnostics.push(diagnostic.with_fix(fix));
+}
diff --git a/...rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_other_other.py.snap b/...rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_other_other.py.snap
@@ -0,0 +1,5 @@
+---
+source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
+snapshot_kind: text
+---
+
diff --git a/.../rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_other_utf8.py.snap b/.../rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_other_utf8.py.snap
@@ -0,0 +1,5 @@
+---
+source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
+snapshot_kind: text
+---
+
diff --git a/.../rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_utf8_other.py.snap b/.../rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_utf8_other.py.snap
@@ -0,0 +1,5 @@
+---
+source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
+snapshot_kind: text
+---
+
diff --git a/...c/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_utf8_utf8.py.snap b/...c/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_utf8_utf8.py.snap
@@ -0,0 +1,15 @@
+---
+source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
+snapshot_kind: text
+---
+UP009_utf8_utf8.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
+  |
+1 | # -*- coding: utf-8 -*-
+  | ^^^^^^^^^^^^^^^^^^^^^^^ UP009
+2 | # -*- coding: utf-8 -*-
+  |
+  = help: Remove unnecessary coding comment
+
+ℹ Safe fix
+1 1 | # -*- coding: utf-8 -*-
+2   |-# -*- coding: utf-8 -*-
diff --git a/...s/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_utf8_utf8_other.py.snap b/...s/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_utf8_utf8_other.py.snap
@@ -0,0 +1,17 @@
+---
+source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
+snapshot_kind: text
+---
+UP009_utf8_utf8_other.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
+  |
+1 | # -*- coding: utf-8 -*-
+  | ^^^^^^^^^^^^^^^^^^^^^^^ UP009
+2 | # -*- coding: utf-8 -*-
+3 | # -*- coding: ascii -*-
+  |
+  = help: Remove unnecessary coding comment
+
+ℹ Safe fix
+1 1 | # -*- coding: utf-8 -*-
+2   |-# -*- coding: utf-8 -*-
+3 2 | # -*- coding: ascii -*-
diff --git a/crates/ruff_source_file/src/line_ranges.rs b/crates/ruff_source_file/src/line_ranges.rs
@@ -295,6 +295,40 @@ pub trait LineRanges {
     /// ## Panics
     /// If the start or end of `range` is out of bounds.
     fn full_lines_str(&self, range: TextRange) -> &str;
+
+    /// Returns the zero-based index of the line containing `offset`.
+    ///
+    /// ## Examples
+    ///
+    /// ```
+    /// # use ruff_text_size::{Ranged, TextRange, TextSize};
+    /// # use ruff_source_file::LineRanges;
+    ///
+    /// let text = "First line\nsecond line\r\nthird line";
+    ///
+    /// assert_eq!(text.count_lines_until(TextSize::from(5)), 0);
+    /// assert_eq!(text.count_lines_until(TextSize::from(23)), 1);
+    /// assert_eq!(text.count_lines_until(TextSize::from(24)), 2);
+    /// ```
+    ///
+    /// ## Panics
+    /// If `offset` is out of bounds.
+    fn count_lines_until(&self, offset: TextSize) -> u32 {
+        let mut count = 0;
+        let mut last_line_end = TextSize::default();
+
+        loop {
+            last_line_end = self.full_line_end(last_line_end);
+
+            if last_line_end <= offset {
+                count += 1;
+            } else {
+                break;
+            }
+        }
+
+        count
+    }
 }
 
 impl LineRanges for str {
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# -- coding: ascii --
		# -- coding: latin --
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# -- coding: ascii --
		# -- coding: utf-8 --