diff --git a/Cargo.lock b/Cargo.lock index 34e7f3e281d96..cc673ff26235c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2033,6 +2033,7 @@ dependencies = [ "textwrap", "thiserror", "toml", + "unicode-width", ] [[package]] diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 6ba7356d767e1..13d8b892e619d 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -58,6 +58,7 @@ rustpython-parser = { workspace = true } schemars = { workspace = true } semver = { version = "1.0.16" } serde = { workspace = true } +serde_json = { workspace = true } shellexpand = { workspace = true } smallvec = { version = "1.10.0" } strum = { workspace = true } @@ -65,7 +66,7 @@ strum_macros = { workspace = true } textwrap = { workspace = true } thiserror = { version = "1.0.38" } toml = { workspace = true } -serde_json = { workspace = true } +unicode-width = "0.1.10" [dev-dependencies] insta = { workspace = true, features = ["yaml", "redactions"] } diff --git a/crates/ruff/src/checkers/physical_lines.rs b/crates/ruff/src/checkers/physical_lines.rs index 83f1a7778be94..1890e9e67e530 100644 --- a/crates/ruff/src/checkers/physical_lines.rs +++ b/crates/ruff/src/checkers/physical_lines.rs @@ -211,7 +211,7 @@ mod tests { flags::Autofix::Enabled, ) }; - assert!(!check_with_max_line_length(6).is_empty()); - assert!(check_with_max_line_length(7).is_empty()); + assert_eq!(check_with_max_line_length(8), vec![]); + assert_eq!(check_with_max_line_length(8), vec![]); } } diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs index d8caae83a0dbd..7ce85dacb34a1 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs @@ -1,6 +1,7 @@ use log::error; use rustc_hash::FxHashSet; use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprContext, ExprKind, Stmt, StmtKind}; +use unicode_width::UnicodeWidthStr; use ruff_diagnostics::{AutofixKind, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; @@ -287,7 +288,7 @@ pub fn nested_if_statements( if fix .content .universal_newlines() - .all(|line| line.len() <= checker.settings.line_length) + .all(|line| line.width() <= checker.settings.line_length) { diagnostic.amend(fix); } @@ -490,7 +491,7 @@ pub fn use_ternary_operator(checker: &mut Checker, stmt: &Stmt, parent: Option<& let contents = unparse_stmt(&ternary, checker.stylist); // Don't flag if the resulting expression would exceed the maximum line length. - if stmt.location.column() + contents.len() > checker.settings.line_length { + if stmt.location.column() + contents.width() > checker.settings.line_length { return; } @@ -839,7 +840,7 @@ pub fn use_dict_get_with_default( ); // Don't flag if the resulting expression would exceed the maximum line length. - if stmt.location.column() + contents.len() > checker.settings.line_length { + if stmt.location.column() + contents.width() > checker.settings.line_length { return; } diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs index 1a0379f6e7a0c..49cf6a7355ebb 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs @@ -1,5 +1,6 @@ use log::error; use rustpython_parser::ast::{Located, Stmt, StmtKind, Withitem}; +use unicode_width::UnicodeWidthStr; use ruff_diagnostics::Diagnostic; use ruff_diagnostics::{AutofixKind, Violation}; @@ -117,7 +118,7 @@ pub fn multiple_with_statements( if fix .content .universal_newlines() - .all(|line| line.len() <= checker.settings.line_length) + .all(|line| line.width() <= checker.settings.line_length) { diagnostic.amend(fix); } diff --git a/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs b/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs index f5a372889b8ca..ed6aff53e0a03 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs @@ -1,6 +1,7 @@ use rustpython_parser::ast::{ Cmpop, Comprehension, Constant, Expr, ExprContext, ExprKind, Location, Stmt, StmtKind, Unaryop, }; +use unicode_width::UnicodeWidthStr; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; @@ -211,7 +212,7 @@ pub fn convert_for_loop_to_any_all(checker: &mut Checker, stmt: &Stmt, sibling: ); // Don't flag if the resulting expression would exceed the maximum line length. - if stmt.location.column() + contents.len() > checker.settings.line_length { + if stmt.location.column() + contents.width() > checker.settings.line_length { return; } @@ -288,7 +289,7 @@ pub fn convert_for_loop_to_any_all(checker: &mut Checker, stmt: &Stmt, sibling: ); // Don't flag if the resulting expression would exceed the maximum line length. - if stmt.location.column() + contents.len() > checker.settings.line_length { + if stmt.location.column() + contents.width() > checker.settings.line_length { return; } diff --git a/crates/ruff/src/rules/isort/format.rs b/crates/ruff/src/rules/isort/format.rs index 6c622d3b7116a..d7fd45424d3c6 100644 --- a/crates/ruff/src/rules/isort/format.rs +++ b/crates/ruff/src/rules/isort/format.rs @@ -1,4 +1,5 @@ use ruff_python_ast::source_code::Stylist; +use unicode_width::UnicodeWidthStr; use super::types::{AliasData, CommentSet, ImportFromData, Importable}; @@ -69,9 +70,9 @@ pub fn format_import_from( || aliases.len() == 1 || aliases.iter().all(|(alias, _)| alias.asname.is_none())) { - let (single_line, import_length) = + let (single_line, import_width) = format_single_line(import_from, comments, aliases, is_first, stylist); - if import_length <= line_length || aliases.iter().any(|(alias, _)| alias.name == "*") { + if import_width <= line_length || aliases.iter().any(|(alias, _)| alias.name == "*") { return single_line; } } @@ -90,7 +91,7 @@ fn format_single_line( stylist: &Stylist, ) -> (String, usize) { let mut output = String::with_capacity(CAPACITY); - let mut line_length = 0; + let mut line_width = 0; if !is_first && !comments.atop.is_empty() { output.push_str(stylist.line_ending()); @@ -104,28 +105,28 @@ fn format_single_line( output.push_str("from "); output.push_str(&module_name); output.push_str(" import "); - line_length += 5 + module_name.len() + 8; + line_width += 5 + module_name.width() + 8; for (index, (AliasData { name, asname }, comments)) in aliases.iter().enumerate() { if let Some(asname) = asname { output.push_str(name); output.push_str(" as "); output.push_str(asname); - line_length += name.len() + 4 + asname.len(); + line_width += name.width() + 4 + asname.width(); } else { output.push_str(name); - line_length += name.len(); + line_width += name.width(); } if index < aliases.len() - 1 { output.push_str(", "); - line_length += 2; + line_width += 2; } for comment in &comments.inline { output.push(' '); output.push(' '); output.push_str(comment); - line_length += 2 + comment.len(); + line_width += 2 + comment.width(); } } @@ -133,12 +134,12 @@ fn format_single_line( output.push(' '); output.push(' '); output.push_str(comment); - line_length += 2 + comment.len(); + line_width += 2 + comment.width(); } output.push_str(stylist.line_ending()); - (output, line_length) + (output, line_width) } /// Format an import-from statement in multi-line format. diff --git a/crates/ruff/src/rules/pycodestyle/helpers.rs b/crates/ruff/src/rules/pycodestyle/helpers.rs index 0fa6385024604..aaae17b643cf5 100644 --- a/crates/ruff/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff/src/rules/pycodestyle/helpers.rs @@ -25,12 +25,12 @@ static URL_REGEX: Lazy = Lazy::new(|| Regex::new(r"^https?://\S+$").unwra pub fn is_overlong( line: &str, - line_length: usize, + line_width: usize, limit: usize, ignore_overlong_task_comments: bool, task_tags: &[String], ) -> bool { - if line_length <= limit { + if line_width <= limit { return false; } diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs index c5f55d723ce04..a34a4bb485dac 100644 --- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs +++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs @@ -2,6 +2,7 @@ use bitflags::bitflags; use rustpython_parser::ast::Location; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; +use unicode_width::UnicodeWidthStr; use ruff_python_ast::source_code::Locator; use ruff_python_ast::types::Range; @@ -86,7 +87,7 @@ fn build_line<'a>( // TODO(charlie): "Mute" strings. let s; let text = if let Tok::String { value, .. } = tok { - s = format!("\"{}\"", "x".repeat(value.len()).clone()); + s = format!("\"{}\"", "x".repeat(value.width()).clone()); &s } else { locator.slice(Range { diff --git a/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs b/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs index a608640193944..f6e27bd845d93 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs @@ -1,4 +1,5 @@ use rustpython_parser::ast::Location; +use unicode_width::UnicodeWidthStr; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; @@ -34,8 +35,8 @@ pub struct DocLineTooLong(pub usize, pub usize); impl Violation for DocLineTooLong { #[derive_message_formats] fn message(&self) -> String { - let DocLineTooLong(length, limit) = self; - format!("Doc line too long ({length} > {limit} characters)") + let DocLineTooLong(width, limit) = self; + format!("Doc line too long ({width} > {limit} characters)") } } @@ -45,19 +46,19 @@ pub fn doc_line_too_long(lineno: usize, line: &str, settings: &Settings) -> Opti return None; }; - let line_length = line.chars().count(); + let line_width = line.width(); if is_overlong( line, - line_length, + line_width, limit, settings.pycodestyle.ignore_overlong_task_comments, &settings.task_tags, ) { Some(Diagnostic::new( - DocLineTooLong(line_length, limit), + DocLineTooLong(line_width, limit), Range::new( Location::new(lineno + 1, limit), - Location::new(lineno + 1, line_length), + Location::new(lineno + 1, line.chars().count()), ), )) } else { diff --git a/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs b/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs index 669215c0621e6..431e6939006b8 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs @@ -1,4 +1,5 @@ use rustpython_parser::ast::Location; +use unicode_width::UnicodeWidthStr; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; @@ -31,27 +32,27 @@ pub struct LineTooLong(pub usize, pub usize); impl Violation for LineTooLong { #[derive_message_formats] fn message(&self) -> String { - let LineTooLong(length, limit) = self; - format!("Line too long ({length} > {limit} characters)") + let LineTooLong(width, limit) = self; + format!("Line too long ({width} > {limit} characters)") } } /// E501 pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option { - let line_length = line.chars().count(); + let line_width = line.width(); let limit = settings.line_length; if is_overlong( line, - line_length, + line_width, limit, settings.pycodestyle.ignore_overlong_task_comments, &settings.task_tags, ) { Some(Diagnostic::new( - LineTooLong(line_length, limit), + LineTooLong(line_width, limit), Range::new( Location::new(lineno + 1, limit), - Location::new(lineno + 1, line_length), + Location::new(lineno + 1, line.chars().count()), ), )) } else { diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E501_E501.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E501_E501.py.snap index 82fe82bbf1cce..4819b25ff36c3 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E501_E501.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E501_E501.py.snap @@ -15,6 +15,19 @@ expression: diagnostics column: 123 fix: ~ parent: ~ +- kind: + name: LineTooLong + body: Line too long (95 > 88 characters) + suggestion: ~ + fixable: false + location: + row: 16 + column: 88 + end_location: + row: 16 + column: 88 + fix: ~ + parent: ~ - kind: name: LineTooLong body: Line too long (127 > 88 characters) diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index 33d104c0650a8..76219a5acd4a1 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -32,3 +32,6 @@ mimalloc = "0.1.34" [target.'cfg(all(not(target_os = "windows"), not(target_os = "openbsd"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64")))'.dev-dependencies] tikv-jemallocator = "0.5.0" + +[features] +logical_lines = [ "ruff/logical_lines" ]