diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8b8598c432d49..92556526f81c7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -32,6 +32,8 @@ jobs: # Flag that is raised when any code is changed # This is superset of the linter and formatter code: ${{ steps.changed.outputs.code_any_changed }} + # Flag that is raised when any code that affects the fuzzer is changed + fuzz: ${{ steps.changed.outputs.fuzz_any_changed }} steps: - uses: actions/checkout@v4 with: @@ -79,6 +81,11 @@ jobs: - python/** - .github/workflows/ci.yaml + fuzz: + - fuzz/Cargo.toml + - fuzz/Cargo.lock + - fuzz/fuzz_targets/** + code: - "**/*" - "!**/*.md" @@ -288,7 +295,7 @@ jobs: name: "cargo fuzz build" runs-on: ubuntu-latest needs: determine_changes - if: ${{ github.ref == 'refs/heads/main' }} + if: ${{ github.ref == 'refs/heads/main' || needs.determine_changes.outputs.fuzz == 'true' }} timeout-minutes: 10 steps: - uses: actions/checkout@v4 diff --git a/crates/red_knot_python_semantic/src/types/builder.rs b/crates/red_knot_python_semantic/src/types/builder.rs index 067fccdcf237f..6077dbd9fda74 100644 --- a/crates/red_knot_python_semantic/src/types/builder.rs +++ b/crates/red_knot_python_semantic/src/types/builder.rs @@ -378,7 +378,6 @@ mod tests { use crate::db::tests::TestDb; use crate::program::{Program, SearchPathSettings}; use crate::python_version::PythonVersion; - use crate::stdlib::typing_symbol; use crate::types::{global_symbol, todo_type, KnownClass, UnionBuilder}; use crate::ProgramSettings; use ruff_db::files::system_path_to_file; @@ -626,59 +625,85 @@ mod tests { #[test] fn intersection_negation_distributes_over_union() { - let db = setup_db(); - let st = typing_symbol(&db, "Sized").expect_type().to_instance(&db); - let ht = typing_symbol(&db, "Hashable") + let mut db = setup_db(); + db.write_dedented( + "/src/module.py", + r#" + class A: ... + class B: ... + "#, + ) + .unwrap(); + let module = ruff_db::files::system_path_to_file(&db, "/src/module.py").unwrap(); + + let a = global_symbol(&db, module, "A") + .expect_type() + .to_instance(&db); + let b = global_symbol(&db, module, "B") .expect_type() .to_instance(&db); - // sh_t: Sized & Hashable - let sh_t = IntersectionBuilder::new(&db) - .add_positive(st) - .add_positive(ht) + + // intersection: A & B + let intersection = IntersectionBuilder::new(&db) + .add_positive(a) + .add_positive(b) .build() .expect_intersection(); - assert_eq!(sh_t.pos_vec(&db), &[st, ht]); - assert_eq!(sh_t.neg_vec(&db), &[]); + assert_eq!(intersection.pos_vec(&db), &[a, b]); + assert_eq!(intersection.neg_vec(&db), &[]); - // ~sh_t => ~Sized | ~Hashable - let not_s_h_t = IntersectionBuilder::new(&db) - .add_negative(Type::Intersection(sh_t)) + // ~intersection => ~A | ~B + let negated_intersection = IntersectionBuilder::new(&db) + .add_negative(Type::Intersection(intersection)) .build() .expect_union(); - // should have as elements: (~Sized),(~Hashable) - let not_st = st.negate(&db); - let not_ht = ht.negate(&db); - assert_eq!(not_s_h_t.elements(&db), &[not_st, not_ht]); + // should have as elements ~A and ~B + let not_a = a.negate(&db); + let not_b = b.negate(&db); + assert_eq!(negated_intersection.elements(&db), &[not_a, not_b]); } #[test] fn mixed_intersection_negation_distributes_over_union() { - let db = setup_db(); - let it = KnownClass::Int.to_instance(&db); - let st = typing_symbol(&db, "Sized").expect_type().to_instance(&db); - let ht = typing_symbol(&db, "Hashable") + let mut db = setup_db(); + db.write_dedented( + "/src/module.py", + r#" + class A: ... + class B: ... + "#, + ) + .unwrap(); + let module = ruff_db::files::system_path_to_file(&db, "/src/module.py").unwrap(); + + let a = global_symbol(&db, module, "A") + .expect_type() + .to_instance(&db); + let b = global_symbol(&db, module, "B") .expect_type() .to_instance(&db); - // s_not_h_t: Sized & ~Hashable - let s_not_h_t = IntersectionBuilder::new(&db) - .add_positive(st) - .add_negative(ht) + let int = KnownClass::Int.to_instance(&db); + + // a_not_b: A & ~B + let a_not_b = IntersectionBuilder::new(&db) + .add_positive(a) + .add_negative(b) .build() .expect_intersection(); - assert_eq!(s_not_h_t.pos_vec(&db), &[st]); - assert_eq!(s_not_h_t.neg_vec(&db), &[ht]); - - // let's build int & ~(Sized & ~Hashable) - let tt = IntersectionBuilder::new(&db) - .add_positive(it) - .add_negative(Type::Intersection(s_not_h_t)) + assert_eq!(a_not_b.pos_vec(&db), &[a]); + assert_eq!(a_not_b.neg_vec(&db), &[b]); + + // let's build + // int & ~(A & ~B) + // = int & ~(A & ~B) + // = int & (~A | B) + // = (int & ~A) | (int & B) + let t = IntersectionBuilder::new(&db) + .add_positive(int) + .add_negative(Type::Intersection(a_not_b)) .build(); - - // int & ~(Sized & ~Hashable) - // -> int & (~Sized | Hashable) - // -> (int & ~Sized) | (int & Hashable) - assert_eq!(tt.display(&db).to_string(), "int & ~Sized | int & Hashable"); + assert_eq!(t.display(&db).to_string(), "int & ~A | int & B"); } #[test] diff --git a/crates/ruff_linter/resources/test/fixtures/airflow/AIR001.py b/crates/ruff_linter/resources/test/fixtures/airflow/AIR001.py index 6e8bffcd9754d..d0b6d2443ea98 100644 --- a/crates/ruff_linter/resources/test/fixtures/airflow/AIR001.py +++ b/crates/ruff_linter/resources/test/fixtures/airflow/AIR001.py @@ -1,4 +1,6 @@ from airflow.operators import PythonOperator +from airflow.providers.airbyte.operators.airbyte import AirbyteTriggerSyncOperator +from airflow.providers.amazon.aws.operators.appflow import AppflowFlowRunOperator def my_callable(): @@ -6,11 +8,15 @@ def my_callable(): my_task = PythonOperator(task_id="my_task", callable=my_callable) -my_task_2 = PythonOperator(callable=my_callable, task_id="my_task_2") +incorrect_name = PythonOperator(task_id="my_task") # AIR001 -incorrect_name = PythonOperator(task_id="my_task") -incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2") +my_task = AirbyteTriggerSyncOperator(task_id="my_task", callable=my_callable) +incorrect_name = AirbyteTriggerSyncOperator(task_id="my_task") # AIR001 -from my_module import MyClass +my_task = AppflowFlowRunOperator(task_id="my_task", callable=my_callable) +incorrect_name = AppflowFlowRunOperator(task_id="my_task") # AIR001 -incorrect_name = MyClass(task_id="my_task") +# Consider only from the `airflow.operators` (or providers operators) module +from airflow import MyOperator + +incorrect_name = MyOperator(task_id="my_task") diff --git a/crates/ruff_linter/resources/test/fixtures/airflow/AIR302_names.py b/crates/ruff_linter/resources/test/fixtures/airflow/AIR302_names.py index 299c87b9feef1..63205874a8e2d 100644 --- a/crates/ruff_linter/resources/test/fixtures/airflow/AIR302_names.py +++ b/crates/ruff_linter/resources/test/fixtures/airflow/AIR302_names.py @@ -1,12 +1,52 @@ +from airflow.triggers.external_task import TaskStateTrigger +from airflow.www.auth import has_access +from airflow.api_connexion.security import requires_access +from airflow.metrics.validators import AllowListValidator +from airflow.metrics.validators import BlockListValidator from airflow.utils import dates -from airflow.utils.dates import date_range, datetime_to_nano, days_ago +from airflow.utils.dates import ( + date_range, + datetime_to_nano, + days_ago, + infer_time_unit, + parse_execution_date, + round_time, + scale_time_units, +) +from airflow.utils.file import TemporaryDirectory, mkdirs +from airflow.utils.state import SHUTDOWN, terminating_states +from airflow.utils.dag_cycle_tester import test_cycle + + +TaskStateTrigger -date_range -days_ago + +has_access +requires_access + +AllowListValidator +BlockListValidator dates.date_range dates.days_ago +date_range +days_ago +parse_execution_date +round_time +scale_time_units +infer_time_unit + + # This one was not deprecated. datetime_to_nano dates.datetime_to_nano + +TemporaryDirectory +mkdirs + +SHUTDOWN +terminating_states + + +test_cycle diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py index 7e60a686d228b..735881301ec5b 100644 --- a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py @@ -57,3 +57,12 @@ f"{{}}+-\d" f"\n{{}}+-\d+" f"\n{{}}�+-\d+" + +# See https://github.com/astral-sh/ruff/issues/11491 +total = 10 +ok = 7 +incomplete = 3 +s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n" + +# Debug text (should trigger) +t = f"{'\InHere'=}" diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs index 61e1bf43a6356..73c86c2b33bee 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs @@ -1554,11 +1554,7 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) { .rules .enabled(Rule::AirflowVariableNameTaskIdMismatch) { - if let Some(diagnostic) = - airflow::rules::variable_name_task_id(checker, targets, value) - { - checker.diagnostics.push(diagnostic); - } + airflow::rules::variable_name_task_id(checker, targets, value); } if checker.settings.rules.enabled(Rule::SelfAssigningVariable) { pylint::rules::self_assignment(checker, assign); diff --git a/crates/ruff_linter/src/rules/airflow/rules/removal_in_3.rs b/crates/ruff_linter/src/rules/airflow/rules/removal_in_3.rs index 527b5cd402275..3f450a7978c81 100644 --- a/crates/ruff_linter/src/rules/airflow/rules/removal_in_3.rs +++ b/crates/ruff_linter/src/rules/airflow/rules/removal_in_3.rs @@ -51,7 +51,7 @@ impl Violation for Airflow3Removal { match replacement { Replacement::None => format!("`{deprecated}` is removed in Airflow 3.0"), Replacement::Name(name) => { - format!("`{deprecated}` is removed in Airflow 3.0; use {name} instead") + format!("`{deprecated}` is removed in Airflow 3.0; use `{name}` instead") } } } @@ -103,13 +103,75 @@ fn removed_name(checker: &mut Checker, expr: &Expr, ranged: impl Ranged) { .semantic() .resolve_qualified_name(expr) .and_then(|qualname| match qualname.segments() { - ["airflow", "utils", "dates", "date_range"] => { + ["airflow", "triggers", "external_task", "TaskStateTrigger"] => { Some((qualname.to_string(), Replacement::None)) } + ["airflow", "www", "auth", "has_access"] => Some(( + qualname.to_string(), + Replacement::Name("airflow.www.auth.has_access_*".to_string()), + )), + ["airflow", "api_connexion", "security", "requires_access"] => Some(( + qualname.to_string(), + Replacement::Name( + "airflow.api_connexion.security.requires_access_*".to_string(), + ), + )), + // airflow.metrics.validators + ["airflow", "metrics", "validators", "AllowListValidator"] => Some(( + qualname.to_string(), + Replacement::Name( + "airflow.metrics.validators.PatternAllowListValidator".to_string(), + ), + )), + ["airflow", "metrics", "validators", "BlockListValidator"] => Some(( + qualname.to_string(), + Replacement::Name( + "airflow.metrics.validators.PatternBlockListValidator".to_string(), + ), + )), + // airflow.utils.dates + ["airflow", "utils", "dates", "date_range"] => Some(( + qualname.to_string(), + Replacement::Name("airflow.timetables.".to_string()), + )), ["airflow", "utils", "dates", "days_ago"] => Some(( qualname.to_string(), - Replacement::Name("datetime.timedelta()".to_string()), + Replacement::Name("pendulum.today('UTC').add(days=-N, ...)".to_string()), + )), + ["airflow", "utils", "dates", "parse_execution_date"] => { + Some((qualname.to_string(), Replacement::None)) + } + ["airflow", "utils", "dates", "round_time"] => { + Some((qualname.to_string(), Replacement::None)) + } + ["airflow", "utils", "dates", "scale_time_units"] => { + Some((qualname.to_string(), Replacement::None)) + } + ["airflow", "utils", "dates", "infer_time_unit"] => { + Some((qualname.to_string(), Replacement::None)) + } + // airflow.utils.file + ["airflow", "utils", "file", "TemporaryDirectory"] => { + Some((qualname.to_string(), Replacement::None)) + } + ["airflow", "utils", "file", "mkdirs"] => Some(( + qualname.to_string(), + Replacement::Name("pendulum.today('UTC').add(days=-N, ...)".to_string()), )), + // airflow.utils.state + ["airflow", "utils", "state", "SHUTDOWN"] => { + Some((qualname.to_string(), Replacement::None)) + } + ["airflow", "utils", "state", "terminating_states"] => { + Some((qualname.to_string(), Replacement::None)) + } + // airflow.uilts + ["airflow", "utils", "dag_cycle_tester", "test_cycle"] => { + Some((qualname.to_string(), Replacement::None)) + } + ["airflow", "utils", "decorators", "apply_defaults"] => { + Some((qualname.to_string(), Replacement::None)) + } _ => None, }); if let Some((deprecated, replacement)) = result { diff --git a/crates/ruff_linter/src/rules/airflow/rules/task_variable_name.rs b/crates/ruff_linter/src/rules/airflow/rules/task_variable_name.rs index a9d7a537d1073..dc018d613ee75 100644 --- a/crates/ruff_linter/src/rules/airflow/rules/task_variable_name.rs +++ b/crates/ruff_linter/src/rules/airflow/rules/task_variable_name.rs @@ -45,21 +45,17 @@ impl Violation for AirflowVariableNameTaskIdMismatch { } /// AIR001 -pub(crate) fn variable_name_task_id( - checker: &mut Checker, - targets: &[Expr], - value: &Expr, -) -> Option { +pub(crate) fn variable_name_task_id(checker: &mut Checker, targets: &[Expr], value: &Expr) { if !checker.semantic().seen_module(Modules::AIRFLOW) { - return None; + return; } // If we have more than one target, we can't do anything. let [target] = targets else { - return None; + return; }; let Expr::Name(ast::ExprName { id, .. }) = target else { - return None; + return; }; // If the value is not a call, we can't do anything. @@ -67,33 +63,58 @@ pub(crate) fn variable_name_task_id( func, arguments, .. }) = value else { - return None; + return; }; - // If the function doesn't come from Airflow, we can't do anything. + // If the function doesn't come from Airflow's operators module (builtin or providers), we + // can't do anything. if !checker .semantic() .resolve_qualified_name(func) - .is_some_and(|qualified_name| matches!(qualified_name.segments(), ["airflow", ..])) + .is_some_and(|qualified_name| { + match qualified_name.segments() { + // Match `airflow.operators.*` + ["airflow", "operators", ..] => true, + + // Match `airflow.providers.**.operators.*` + ["airflow", "providers", rest @ ..] => { + // Ensure 'operators' exists somewhere in the middle + if let Some(pos) = rest.iter().position(|&s| s == "operators") { + pos + 1 < rest.len() // Check that 'operators' is not the last element + } else { + false + } + } + + _ => false, + } + }) { - return None; + return; } // If the call doesn't have a `task_id` keyword argument, we can't do anything. - let keyword = arguments.find_keyword("task_id")?; + let Some(keyword) = arguments.find_keyword("task_id") else { + return; + }; // If the keyword argument is not a string, we can't do anything. - let ast::ExprStringLiteral { value: task_id, .. } = keyword.value.as_string_literal_expr()?; + let Some(ast::ExprStringLiteral { value: task_id, .. }) = + keyword.value.as_string_literal_expr() + else { + return; + }; // If the target name is the same as the task_id, no violation. if task_id == id.as_str() { - return None; + return; } - Some(Diagnostic::new( + let diagnostic = Diagnostic::new( AirflowVariableNameTaskIdMismatch { task_id: task_id.to_string(), }, target.range(), - )) + ); + checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR001_AIR001.py.snap b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR001_AIR001.py.snap index 9c1d56b6e88f3..1da4d56d20539 100644 --- a/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR001_AIR001.py.snap +++ b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR001_AIR001.py.snap @@ -1,21 +1,29 @@ --- source: crates/ruff_linter/src/rules/airflow/mod.rs -snapshot_kind: text --- AIR001.py:11:1: AIR001 Task variable name should match the `task_id`: "my_task" | - 9 | my_task_2 = PythonOperator(callable=my_callable, task_id="my_task_2") -10 | -11 | incorrect_name = PythonOperator(task_id="my_task") +10 | my_task = PythonOperator(task_id="my_task", callable=my_callable) +11 | incorrect_name = PythonOperator(task_id="my_task") # AIR001 | ^^^^^^^^^^^^^^ AIR001 -12 | incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2") +12 | +13 | my_task = AirbyteTriggerSyncOperator(task_id="my_task", callable=my_callable) | -AIR001.py:12:1: AIR001 Task variable name should match the `task_id`: "my_task_2" +AIR001.py:14:1: AIR001 Task variable name should match the `task_id`: "my_task" | -11 | incorrect_name = PythonOperator(task_id="my_task") -12 | incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2") - | ^^^^^^^^^^^^^^^^ AIR001 -13 | -14 | from my_module import MyClass +13 | my_task = AirbyteTriggerSyncOperator(task_id="my_task", callable=my_callable) +14 | incorrect_name = AirbyteTriggerSyncOperator(task_id="my_task") # AIR001 + | ^^^^^^^^^^^^^^ AIR001 +15 | +16 | my_task = AppflowFlowRunOperator(task_id="my_task", callable=my_callable) + | + +AIR001.py:17:1: AIR001 Task variable name should match the `task_id`: "my_task" + | +16 | my_task = AppflowFlowRunOperator(task_id="my_task", callable=my_callable) +17 | incorrect_name = AppflowFlowRunOperator(task_id="my_task") # AIR001 + | ^^^^^^^^^^^^^^ AIR001 +18 | +19 | # Consider only from the `airflow.operators` (or providers operators) module | diff --git a/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_args.py.snap b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_args.py.snap index 23725fc463073..0f1114cc34181 100644 --- a/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_args.py.snap +++ b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_args.py.snap @@ -1,7 +1,8 @@ --- source: crates/ruff_linter/src/rules/airflow/mod.rs +snapshot_kind: text --- -AIR302_args.py:6:39: AIR302 `schedule_interval` is removed in Airflow 3.0; use schedule instead +AIR302_args.py:6:39: AIR302 `schedule_interval` is removed in Airflow 3.0; use `schedule` instead | 4 | DAG(dag_id="class_schedule", schedule="@hourly") 5 | @@ -11,7 +12,7 @@ AIR302_args.py:6:39: AIR302 `schedule_interval` is removed in Airflow 3.0; use s 8 | DAG(dag_id="class_timetable", timetable=NullTimetable()) | -AIR302_args.py:8:31: AIR302 `timetable` is removed in Airflow 3.0; use schedule instead +AIR302_args.py:8:31: AIR302 `timetable` is removed in Airflow 3.0; use `schedule` instead | 6 | DAG(dag_id="class_schedule_interval", schedule_interval="@hourly") 7 | @@ -19,7 +20,7 @@ AIR302_args.py:8:31: AIR302 `timetable` is removed in Airflow 3.0; use schedule | ^^^^^^^^^ AIR302 | -AIR302_args.py:16:6: AIR302 `schedule_interval` is removed in Airflow 3.0; use schedule instead +AIR302_args.py:16:6: AIR302 `schedule_interval` is removed in Airflow 3.0; use `schedule` instead | 16 | @dag(schedule_interval="0 * * * *") | ^^^^^^^^^^^^^^^^^ AIR302 @@ -27,7 +28,7 @@ AIR302_args.py:16:6: AIR302 `schedule_interval` is removed in Airflow 3.0; use s 18 | pass | -AIR302_args.py:21:6: AIR302 `timetable` is removed in Airflow 3.0; use schedule instead +AIR302_args.py:21:6: AIR302 `timetable` is removed in Airflow 3.0; use `schedule` instead | 21 | @dag(timetable=NullTimetable()) | ^^^^^^^^^ AIR302 diff --git a/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_names.py.snap b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_names.py.snap index f97e1dc4f110f..13ff8b0fd3f7c 100644 --- a/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_names.py.snap +++ b/crates/ruff_linter/src/rules/airflow/snapshots/ruff_linter__rules__airflow__tests__AIR302_AIR302_names.py.snap @@ -1,38 +1,157 @@ --- source: crates/ruff_linter/src/rules/airflow/mod.rs +snapshot_kind: text --- -AIR302_names.py:4:1: AIR302 `airflow.utils.dates.date_range` is removed in Airflow 3.0 - | -2 | from airflow.utils.dates import date_range, datetime_to_nano, days_ago -3 | -4 | date_range - | ^^^^^^^^^^ AIR302 -5 | days_ago - | - -AIR302_names.py:5:1: AIR302 `airflow.utils.dates.days_ago` is removed in Airflow 3.0; use datetime.timedelta() instead - | -4 | date_range -5 | days_ago - | ^^^^^^^^ AIR302 -6 | -7 | dates.date_range - | - -AIR302_names.py:7:7: AIR302 `airflow.utils.dates.date_range` is removed in Airflow 3.0 - | -5 | days_ago -6 | -7 | dates.date_range - | ^^^^^^^^^^ AIR302 -8 | dates.days_ago - | - -AIR302_names.py:8:7: AIR302 `airflow.utils.dates.days_ago` is removed in Airflow 3.0; use datetime.timedelta() instead - | - 7 | dates.date_range - 8 | dates.days_ago +AIR302_names.py:21:1: AIR302 `airflow.triggers.external_task.TaskStateTrigger` is removed in Airflow 3.0 + | +21 | TaskStateTrigger + | ^^^^^^^^^^^^^^^^ AIR302 + | + +AIR302_names.py:24:1: AIR302 `airflow.www.auth.has_access` is removed in Airflow 3.0; use `airflow.www.auth.has_access_*` instead + | +24 | has_access + | ^^^^^^^^^^ AIR302 +25 | requires_access + | + +AIR302_names.py:25:1: AIR302 `airflow.api_connexion.security.requires_access` is removed in Airflow 3.0; use `airflow.api_connexion.security.requires_access_*` instead + | +24 | has_access +25 | requires_access + | ^^^^^^^^^^^^^^^ AIR302 +26 | +27 | AllowListValidator + | + +AIR302_names.py:27:1: AIR302 `airflow.metrics.validators.AllowListValidator` is removed in Airflow 3.0; use `airflow.metrics.validators.PatternAllowListValidator` instead + | +25 | requires_access +26 | +27 | AllowListValidator + | ^^^^^^^^^^^^^^^^^^ AIR302 +28 | BlockListValidator + | + +AIR302_names.py:28:1: AIR302 `airflow.metrics.validators.BlockListValidator` is removed in Airflow 3.0; use `airflow.metrics.validators.PatternBlockListValidator` instead + | +27 | AllowListValidator +28 | BlockListValidator + | ^^^^^^^^^^^^^^^^^^ AIR302 +29 | +30 | dates.date_range + | + +AIR302_names.py:30:7: AIR302 `airflow.utils.dates.date_range` is removed in Airflow 3.0; use `airflow.timetables.` instead + | +28 | BlockListValidator +29 | +30 | dates.date_range + | ^^^^^^^^^^ AIR302 +31 | dates.days_ago + | + +AIR302_names.py:31:7: AIR302 `airflow.utils.dates.days_ago` is removed in Airflow 3.0; use `pendulum.today('UTC').add(days=-N, ...)` instead + | +30 | dates.date_range +31 | dates.days_ago | ^^^^^^^^ AIR302 - 9 | -10 | # This one was not deprecated. +32 | +33 | date_range + | + +AIR302_names.py:33:1: AIR302 `airflow.utils.dates.date_range` is removed in Airflow 3.0; use `airflow.timetables.` instead + | +31 | dates.days_ago +32 | +33 | date_range + | ^^^^^^^^^^ AIR302 +34 | days_ago +35 | parse_execution_date + | + +AIR302_names.py:34:1: AIR302 `airflow.utils.dates.days_ago` is removed in Airflow 3.0; use `pendulum.today('UTC').add(days=-N, ...)` instead + | +33 | date_range +34 | days_ago + | ^^^^^^^^ AIR302 +35 | parse_execution_date +36 | round_time + | + +AIR302_names.py:35:1: AIR302 `airflow.utils.dates.parse_execution_date` is removed in Airflow 3.0 + | +33 | date_range +34 | days_ago +35 | parse_execution_date + | ^^^^^^^^^^^^^^^^^^^^ AIR302 +36 | round_time +37 | scale_time_units + | + +AIR302_names.py:36:1: AIR302 `airflow.utils.dates.round_time` is removed in Airflow 3.0 + | +34 | days_ago +35 | parse_execution_date +36 | round_time + | ^^^^^^^^^^ AIR302 +37 | scale_time_units +38 | infer_time_unit + | + +AIR302_names.py:37:1: AIR302 `airflow.utils.dates.scale_time_units` is removed in Airflow 3.0 + | +35 | parse_execution_date +36 | round_time +37 | scale_time_units + | ^^^^^^^^^^^^^^^^ AIR302 +38 | infer_time_unit + | + +AIR302_names.py:38:1: AIR302 `airflow.utils.dates.infer_time_unit` is removed in Airflow 3.0 + | +36 | round_time +37 | scale_time_units +38 | infer_time_unit + | ^^^^^^^^^^^^^^^ AIR302 + | + +AIR302_names.py:45:1: AIR302 `airflow.utils.file.TemporaryDirectory` is removed in Airflow 3.0 + | +43 | dates.datetime_to_nano +44 | +45 | TemporaryDirectory + | ^^^^^^^^^^^^^^^^^^ AIR302 +46 | mkdirs + | + +AIR302_names.py:46:1: AIR302 `airflow.utils.file.mkdirs` is removed in Airflow 3.0; use `pendulum.today('UTC').add(days=-N, ...)` instead + | +45 | TemporaryDirectory +46 | mkdirs + | ^^^^^^ AIR302 +47 | +48 | SHUTDOWN + | + +AIR302_names.py:48:1: AIR302 `airflow.utils.state.SHUTDOWN` is removed in Airflow 3.0 + | +46 | mkdirs +47 | +48 | SHUTDOWN + | ^^^^^^^^ AIR302 +49 | terminating_states + | + +AIR302_names.py:49:1: AIR302 `airflow.utils.state.terminating_states` is removed in Airflow 3.0 + | +48 | SHUTDOWN +49 | terminating_states + | ^^^^^^^^^^^^^^^^^^ AIR302 + | + +AIR302_names.py:52:1: AIR302 `airflow.utils.dag_cycle_tester.test_cycle` is removed in Airflow 3.0 + | +52 | test_cycle + | ^^^^^^^^^^ AIR302 | diff --git a/crates/ruff_linter/src/rules/flake8_type_checking/rules/runtime_cast_value.rs b/crates/ruff_linter/src/rules/flake8_type_checking/rules/runtime_cast_value.rs index 446cf5afd1708..27593b6a1647e 100644 --- a/crates/ruff_linter/src/rules/flake8_type_checking/rules/runtime_cast_value.rs +++ b/crates/ruff_linter/src/rules/flake8_type_checking/rules/runtime_cast_value.rs @@ -8,11 +8,22 @@ use crate::checkers::ast::Checker; use crate::rules::flake8_type_checking::helpers::quote_type_expression; /// ## What it does -/// Checks for an unquoted type expression in `typing.cast()` calls. +/// Checks for unquoted type expressions in `typing.cast()` calls. /// /// ## Why is this bad? -/// `typing.cast()` does not do anything at runtime, so the time spent -/// on evaluating the type expression is wasted. +/// This rule helps enforce a consistent style across your codebase. +/// +/// It's often necessary to quote the first argument passed to `cast()`, +/// as type expressions can involve forward references, or references +/// to symbols which are only imported in `typing.TYPE_CHECKING` blocks. +/// This can lead to a visual inconsistency across different `cast()` calls, +/// where some type expressions are quoted but others are not. By enabling +/// this rule, you ensure that all type expressions passed to `cast()` are +/// quoted, enforcing stylistic consistency across all of your `cast()` calls. +/// +/// In some cases where `cast()` is used in a hot loop, this rule may also +/// help avoid overhead from repeatedly evaluating complex type expressions at +/// runtime. /// /// ## Example /// ```python diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs index 68d13e0a4174c..928f7ff1ebe2f 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs @@ -66,70 +66,75 @@ pub(crate) fn invalid_escape_sequence(checker: &mut Checker, string_like: String if part.flags().is_raw_string() { continue; } - match part { - StringLikePart::String(string_literal) => { - check( - &mut checker.diagnostics, - locator, - string_literal.start(), - string_literal.range(), - AnyStringFlags::from(string_literal.flags), - ); - } - StringLikePart::Bytes(bytes_literal) => { - check( - &mut checker.diagnostics, - locator, - bytes_literal.start(), - bytes_literal.range(), - AnyStringFlags::from(bytes_literal.flags), - ); + let state = match part { + StringLikePart::String(_) | StringLikePart::Bytes(_) => { + analyze_escape_chars(locator, part.range(), part.flags()) } StringLikePart::FString(f_string) => { let flags = AnyStringFlags::from(f_string.flags); + let mut escape_chars_state = EscapeCharsState::default(); + // Whether we suggest converting to a raw string or + // adding backslashes depends on the presence of valid + // escape characters in the entire f-string. Therefore, + // we must analyze escape characters in each f-string + // element before pushing a diagnostic and fix. for element in &f_string.elements { match element { FStringElement::Literal(literal) => { - check( - &mut checker.diagnostics, + escape_chars_state.update(analyze_escape_chars( locator, - f_string.start(), literal.range(), flags, - ); + )); } FStringElement::Expression(expression) => { let Some(format_spec) = expression.format_spec.as_ref() else { continue; }; for literal in format_spec.elements.literals() { - check( - &mut checker.diagnostics, + escape_chars_state.update(analyze_escape_chars( locator, - f_string.start(), literal.range(), flags, - ); + )); } } } } + escape_chars_state } - } + }; + check( + &mut checker.diagnostics, + locator, + part.start(), + part.flags(), + state, + ); } } -fn check( - diagnostics: &mut Vec, +#[derive(Default)] +struct EscapeCharsState { + contains_valid_escape_sequence: bool, + invalid_escape_chars: Vec, +} + +impl EscapeCharsState { + fn update(&mut self, other: Self) { + self.contains_valid_escape_sequence |= other.contains_valid_escape_sequence; + self.invalid_escape_chars.extend(other.invalid_escape_chars); + } +} + +/// Traverses string, collects invalid escape characters, and flags if a valid +/// escape character is found. +fn analyze_escape_chars( locator: &Locator, - // Start position of the expression that contains the source range. This is used to generate - // the fix when the source range is part of the expression like in f-string which contains - // other f-string literal elements. - expr_start: TextSize, - // Range in the source code to perform the check on. + // Range in the source code to perform the analysis on. source_range: TextRange, flags: AnyStringFlags, -) { +) -> EscapeCharsState { let source = locator.slice(source_range); let mut contains_valid_escape_sequence = false; let mut invalid_escape_chars = Vec::new(); @@ -225,7 +230,31 @@ fn check( range, }); } + EscapeCharsState { + contains_valid_escape_sequence, + invalid_escape_chars, + } +} +/// Pushes a diagnostic and fix depending on escape characters seen so far. +/// +/// If we have not seen any valid escape characters, we convert to +/// a raw string. If we have seen valid escape characters, +/// we manually add backslashes to each invalid escape character found. +fn check( + diagnostics: &mut Vec, + locator: &Locator, + // Start position of the expression that contains the source range. This is used to generate + // the fix when the source range is part of the expression like in f-string which contains + // other f-string literal elements. + expr_start: TextSize, + flags: AnyStringFlags, + escape_chars_state: EscapeCharsState, +) { + let EscapeCharsState { + contains_valid_escape_sequence, + invalid_escape_chars, + } = escape_chars_state; if contains_valid_escape_sequence { // Escape with backslash. for invalid_escape_char in &invalid_escape_chars { diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap index fc4e14b01785e..9e49faa22840e 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap @@ -1,6 +1,5 @@ --- source: crates/ruff_linter/src/rules/pycodestyle/mod.rs -snapshot_kind: text --- W605_1.py:4:11: W605 [*] Invalid escape sequence: `\.` | @@ -243,6 +242,7 @@ W605_1.py:57:9: W605 [*] Invalid escape sequence: `\d` 57 |+rf"{{}}+-\d" 58 58 | f"\n{{}}+-\d+" 59 59 | f"\n{{}}�+-\d+" +60 60 | W605_1.py:58:11: W605 [*] Invalid escape sequence: `\d` | @@ -261,6 +261,8 @@ W605_1.py:58:11: W605 [*] Invalid escape sequence: `\d` 58 |-f"\n{{}}+-\d+" 58 |+f"\n{{}}+-\\d+" 59 59 | f"\n{{}}�+-\d+" +60 60 | +61 61 | # See https://github.com/astral-sh/ruff/issues/11491 W605_1.py:59:12: W605 [*] Invalid escape sequence: `\d` | @@ -268,6 +270,8 @@ W605_1.py:59:12: W605 [*] Invalid escape sequence: `\d` 58 | f"\n{{}}+-\d+" 59 | f"\n{{}}�+-\d+" | ^^ W605 +60 | +61 | # See https://github.com/astral-sh/ruff/issues/11491 | = help: Add backslash to escape sequence @@ -277,3 +281,42 @@ W605_1.py:59:12: W605 [*] Invalid escape sequence: `\d` 58 58 | f"\n{{}}+-\d+" 59 |-f"\n{{}}�+-\d+" 59 |+f"\n{{}}�+-\\d+" +60 60 | +61 61 | # See https://github.com/astral-sh/ruff/issues/11491 +62 62 | total = 10 + +W605_1.py:65:31: W605 [*] Invalid escape sequence: `\I` + | +63 | ok = 7 +64 | incomplete = 3 +65 | s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n" + | ^^ W605 +66 | +67 | # Debug text (should trigger) + | + = help: Add backslash to escape sequence + +ℹ Safe fix +62 62 | total = 10 +63 63 | ok = 7 +64 64 | incomplete = 3 +65 |-s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n" + 65 |+s = f"TOTAL: {total}\nOK: {ok}\\INCOMPLETE: {incomplete}\n" +66 66 | +67 67 | # Debug text (should trigger) +68 68 | t = f"{'\InHere'=}" + +W605_1.py:68:9: W605 [*] Invalid escape sequence: `\I` + | +67 | # Debug text (should trigger) +68 | t = f"{'\InHere'=}" + | ^^ W605 + | + = help: Use a raw string literal + +ℹ Safe fix +65 65 | s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n" +66 66 | +67 67 | # Debug text (should trigger) +68 |-t = f"{'\InHere'=}" + 68 |+t = f"{r'\InHere'=}" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 24c0e57e3e85b..02eb20df65b63 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -17,6 +17,9 @@ libfuzzer = ["libfuzzer-sys/link_libfuzzer"] cargo-fuzz = true [dependencies] +red_knot_python_semantic = { path = "../crates/red_knot_python_semantic" } +red_knot_vendored = { path = "../crates/red_knot_vendored" } +ruff_db = { path = "../crates/ruff_db" } ruff_linter = { path = "../crates/ruff_linter" } ruff_python_ast = { path = "../crates/ruff_python_ast" } ruff_python_codegen = { path = "../crates/ruff_python_codegen" } @@ -26,12 +29,18 @@ ruff_python_formatter = { path = "../crates/ruff_python_formatter"} ruff_text_size = { path = "../crates/ruff_text_size" } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } +salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "254c749b02cde2fd29852a7463a33e800b771758" } similar = { version = "2.5.0" } +tracing = { version = "0.1.40" } # Prevent this from interfering with workspaces [workspace] members = ["."] +[[bin]] +name = "red_knot_check_invalid_syntax" +path = "fuzz_targets/red_knot_check_invalid_syntax.rs" + [[bin]] name = "ruff_parse_simple" path = "fuzz_targets/ruff_parse_simple.rs" diff --git a/fuzz/README.md b/fuzz/README.md index 1b91e57d05c1f..5265149b8d8bf 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -74,6 +74,15 @@ Each fuzzer harness in [`fuzz_targets`](fuzz_targets) targets a different aspect them in different ways. While there is implementation-specific documentation in the source code itself, each harness is briefly described below. +### `red_knot_check_invalid_syntax` + +This fuzz harness checks that the type checker (Red Knot) does not panic when checking a source +file with invalid syntax. This rejects any corpus entries that is already valid Python code. +Currently, this is limited to syntax errors that's produced by Ruff's Python parser which means +that it does not cover all possible syntax errors (). +A possible workaround for now would be to bypass the parser and run the type checker on all inputs +regardless of syntax errors. + ### `ruff_parse_simple` This fuzz harness does not perform any "smart" testing of Ruff; it merely checks that the parsing diff --git a/fuzz/corpus/red_knot_check_invalid_syntax b/fuzz/corpus/red_knot_check_invalid_syntax new file mode 120000 index 0000000000000..38dc5bc1ea310 --- /dev/null +++ b/fuzz/corpus/red_knot_check_invalid_syntax @@ -0,0 +1 @@ +ruff_fix_validity \ No newline at end of file diff --git a/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs b/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs new file mode 100644 index 0000000000000..70e4fc55fab26 --- /dev/null +++ b/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs @@ -0,0 +1,143 @@ +//! Fuzzer harness that runs the type checker to catch for panics for source code containing +//! syntax errors. + +#![no_main] + +use std::sync::{Mutex, OnceLock}; + +use libfuzzer_sys::{fuzz_target, Corpus}; + +use red_knot_python_semantic::types::check_types; +use red_knot_python_semantic::{ + Db as SemanticDb, Program, ProgramSettings, PythonVersion, SearchPathSettings, +}; +use ruff_db::files::{system_path_to_file, File, Files}; +use ruff_db::system::{DbWithTestSystem, System, SystemPathBuf, TestSystem}; +use ruff_db::vendored::VendoredFileSystem; +use ruff_db::{Db as SourceDb, Upcast}; +use ruff_python_parser::{parse_unchecked, Mode}; + +/// Database that can be used for testing. +/// +/// Uses an in memory filesystem and it stubs out the vendored files by default. +#[salsa::db] +struct TestDb { + storage: salsa::Storage, + files: Files, + system: TestSystem, + vendored: VendoredFileSystem, + events: std::sync::Arc>>, +} + +impl TestDb { + fn new() -> Self { + Self { + storage: salsa::Storage::default(), + system: TestSystem::default(), + vendored: red_knot_vendored::file_system().clone(), + events: std::sync::Arc::default(), + files: Files::default(), + } + } +} + +#[salsa::db] +impl SourceDb for TestDb { + fn vendored(&self) -> &VendoredFileSystem { + &self.vendored + } + + fn system(&self) -> &dyn System { + &self.system + } + + fn files(&self) -> &Files { + &self.files + } +} + +impl DbWithTestSystem for TestDb { + fn test_system(&self) -> &TestSystem { + &self.system + } + + fn test_system_mut(&mut self) -> &mut TestSystem { + &mut self.system + } +} + +impl Upcast for TestDb { + fn upcast(&self) -> &(dyn SourceDb + 'static) { + self + } + fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) { + self + } +} + +#[salsa::db] +impl SemanticDb for TestDb { + fn is_file_open(&self, file: File) -> bool { + !file.path(self).is_vendored_path() + } +} + +#[salsa::db] +impl salsa::Database for TestDb { + fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) { + let event = event(); + tracing::trace!("event: {:?}", event); + let mut events = self.events.lock().unwrap(); + events.push(event); + } +} + +fn setup_db() -> TestDb { + let db = TestDb::new(); + + let src_root = SystemPathBuf::from("/src"); + db.memory_file_system() + .create_directory_all(&src_root) + .unwrap(); + + Program::from_settings( + &db, + &ProgramSettings { + target_version: PythonVersion::default(), + search_paths: SearchPathSettings::new(src_root), + }, + ) + .expect("Valid search path settings"); + + db +} + +static TEST_DB: OnceLock> = OnceLock::new(); + +fn do_fuzz(case: &[u8]) -> Corpus { + let Ok(code) = std::str::from_utf8(case) else { + return Corpus::Reject; + }; + + let parsed = parse_unchecked(code, Mode::Module); + if parsed.is_valid() { + return Corpus::Reject; + } + + let mut db = TEST_DB + .get_or_init(|| Mutex::new(setup_db())) + .lock() + .unwrap(); + + for path in &["/src/a.py", "/src/a.pyi"] { + db.write_file(path, code).unwrap(); + let file = system_path_to_file(&*db, path).unwrap(); + check_types(&*db, file); + db.memory_file_system().remove_file(path).unwrap(); + file.sync(&mut *db); + } + + Corpus::Keep +} + +fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh index 77d4cdfcb30bf..a652897b267d6 100755 --- a/fuzz/init-fuzzer.sh +++ b/fuzz/init-fuzzer.sh @@ -11,16 +11,32 @@ fi if [ ! -d corpus/ruff_fix_validity ]; then mkdir -p corpus/ruff_fix_validity - read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r - echo - cd corpus/ruff_fix_validity - if [[ $REPLY =~ ^[Yy]$ ]]; then - curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz + + ( + cd corpus/ruff_fix_validity + + read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz + fi + + # Build a smaller corpus in addition to the (optional) larger corpus + curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz + cp -r "../../../crates/red_knot_workspace/resources/test/corpus" "red_knot_workspace" + cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter" + cp -r "../../../crates/ruff_python_formatter/resources/test/fixtures" "ruff_python_formatter" + cp -r "../../../crates/ruff_python_parser/resources" "ruff_python_parser" + + # Delete all non-Python files + find . -type f -not -name "*.py" -delete + ) + + if [[ "$OSTYPE" == "darwin"* ]]; then + cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5 + else + cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5 fi - curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz - cp -r "../../../crates/ruff_linter/resources/test" . - cd - - cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5 fi echo "Done! You are ready to fuzz."