Skip to content

Commit

Permalink
Respect tab width in line-length heuristic (astral-sh#6491)
Browse files Browse the repository at this point in the history
## Summary

In astral-sh#5811, I suggested that we add
a heuristic to the overlong-lines check such that if the line had fewer
bytes than the character limit, we return early -- the idea being that a
single byte per character was the "worst case". I overlooked that this
isn't true for tabs -- with tabs, the "worst case" scenario is that
every byte is a tab, which can have a width greater than 1.

Closes astral-sh#6425.

## Test Plan

`cargo test` with a new fixture borrowed from the issue, plus manual
testing.
  • Loading branch information
charliermarsh authored and durumu committed Aug 12, 2023
1 parent 5c730a5 commit cf9e652
Show file tree
Hide file tree
Showing 8 changed files with 294 additions and 173 deletions.
25 changes: 15 additions & 10 deletions crates/ruff/resources/test/fixtures/pycodestyle/E501_2.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
# aaaa
# aaaaa
# a
# a
# aa
# aaa
# aaaa
# a
# aa
# aaa

b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""

c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""

d = """💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
d = """💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
if True: # noqa: E501
[12]
[12 ]
[1,2]
[1, 2]
4 changes: 2 additions & 2 deletions crates/ruff/src/line_width.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ impl PartialOrd<LineLength> for LineWidth {
/// The size of a tab.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, CacheKey)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct TabSize(pub NonZeroU8);
pub struct TabSize(NonZeroU8);

impl TabSize {
fn as_usize(self) -> usize {
pub(crate) fn as_usize(self) -> usize {
self.0.get() as usize
}
}
Expand Down
7 changes: 5 additions & 2 deletions crates/ruff/src/rules/pycodestyle/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,11 @@ pub(super) fn is_overlong(
task_tags: &[String],
tab_size: TabSize,
) -> Option<Overlong> {
// Each character is between 1-4 bytes. If the number of bytes is smaller than the limit, it cannot be overlong.
if line.len() < limit.get() {
// The maximum width of the line is the number of bytes multiplied by the tab size (the
// worst-case scenario is that the line is all tabs). If the maximum width is less than the
// limit, then the line is not overlong.
let max_width = line.len() * tab_size.as_usize();
if max_width < limit.get() {
return None;
}

Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ mod tests {
Path::new("pycodestyle/E501_2.py"),
&settings::Settings {
tab_size: NonZeroU8::new(tab_size).unwrap().into(),
line_length: LineLength::from(6),
..settings::Settings::for_rule(Rule::LineTooLong)
},
)?;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,56 +1,51 @@
---
source: crates/ruff/src/rules/pycodestyle/mod.rs
---
E501_2.py:1:81: E501 Line too long (89 > 88 characters)
E501_2.py:2:7: E501 Line too long (7 > 6 characters)
|
1 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
2 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
1 | # aaaa
2 | # aaaaa
| ^ E501
3 | # a
4 | # a
|

E501_2.py:2:81: E501 Line too long (89 > 88 characters)
E501_2.py:3:7: E501 Line too long (7 > 6 characters)
|
1 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
2 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
3 |
4 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
1 | # aaaa
2 | # aaaaa
3 | # a
| ^ E501
4 | # a
5 | # aa
|

E501_2.py:4:81: E501 Line too long (89 > 88 characters)
E501_2.py:7:7: E501 Line too long (7 > 6 characters)
|
2 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
3 |
4 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
5 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
5 | # aa
6 | # aaa
7 | # aaaa
| ^ E501
8 | # a
9 | # aa
|

E501_2.py:5:81: E501 Line too long (89 > 88 characters)
|
4 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
5 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
6 |
7 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
|

E501_2.py:7:82: E501 Line too long (89 > 88 characters)
|
5 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
6 |
7 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
8 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
|
E501_2.py:10:7: E501 Line too long (7 > 6 characters)
|
8 | # a
9 | # aa
10 | # aaa
| ^ E501
11 |
12 | if True: # noqa: E501
|

E501_2.py:10:82: E501 Line too long (89 > 88 characters)
E501_2.py:16:7: E501 Line too long (7 > 6 characters)
|
8 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
9 |
10 | d = """💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
11 | d = """💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
14 | [12 ]
15 | [1,2]
16 | [1, 2]
| ^ E501
|


Original file line number Diff line number Diff line change
@@ -1,56 +1,90 @@
---
source: crates/ruff/src/rules/pycodestyle/mod.rs
---
E501_2.py:1:81: E501 Line too long (89 > 88 characters)
E501_2.py:2:7: E501 Line too long (7 > 6 characters)
|
1 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
2 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
1 | # aaaa
2 | # aaaaa
| ^ E501
3 | # a
4 | # a
|

E501_2.py:2:81: E501 Line too long (89 > 88 characters)
E501_2.py:3:7: E501 Line too long (7 > 6 characters)
|
1 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
2 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
3 |
4 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
1 | # aaaa
2 | # aaaaa
3 | # a
| ^ E501
4 | # a
5 | # aa
|

E501_2.py:4:81: E501 Line too long (89 > 88 characters)
E501_2.py:6:6: E501 Line too long (7 > 6 characters)
|
2 | a = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
3 |
4 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
5 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
4 | # a
5 | # aa
6 | # aaa
| ^ E501
7 | # aaaa
8 | # a
|

E501_2.py:5:81: E501 Line too long (89 > 88 characters)
E501_2.py:7:6: E501 Line too long (8 > 6 characters)
|
4 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
5 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
6 |
7 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
5 | # aa
6 | # aaa
7 | # aaaa
| ^^ E501
8 | # a
9 | # aa
|

E501_2.py:7:82: E501 Line too long (89 > 88 characters)
|
5 | b = """ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
6 |
7 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
8 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
|
E501_2.py:8:5: E501 Line too long (7 > 6 characters)
|
6 | # aaa
7 | # aaaa
8 | # a
| ^ E501
9 | # aa
10 | # aaa
|

E501_2.py:9:5: E501 Line too long (8 > 6 characters)
|
7 | # aaaa
8 | # a
9 | # aa
| ^^ E501
10 | # aaa
|

E501_2.py:10:5: E501 Line too long (9 > 6 characters)
|
8 | # a
9 | # aa
10 | # aaa
| ^^^ E501
11 |
12 | if True: # noqa: E501
|

E501_2.py:14:6: E501 Line too long (7 > 6 characters)
|
12 | if True: # noqa: E501
13 | [12]
14 | [12 ]
| ^ E501
15 | [1,2]
16 | [1, 2]
|

E501_2.py:10:82: E501 Line too long (89 > 88 characters)
E501_2.py:16:6: E501 Line too long (8 > 6 characters)
|
8 | c = """2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
9 |
10 | d = """💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
| ^ E501
11 | d = """💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A67ß9💣2ℝ4A6"""
14 | [12 ]
15 | [1,2]
16 | [1, 2]
| ^^ E501
|


Loading

0 comments on commit cf9e652

Please sign in to comment.