diff --git a/README.md b/README.md index afa9c13220534..c87efcfc173c0 100644 --- a/README.md +++ b/README.md @@ -293,6 +293,7 @@ quality tools, including: - [pep8-naming](https://pypi.org/project/pep8-naming/) - [pydocstyle](https://pypi.org/project/pydocstyle/) - [pygrep-hooks](https://github.com/pre-commit/pygrep-hooks) +- [pylint-airflow](https://pypi.org/project/pylint-airflow/) - [pyupgrade](https://pypi.org/project/pyupgrade/) - [tryceratops](https://pypi.org/project/tryceratops/) - [yesqa](https://pypi.org/project/yesqa/) diff --git a/crates/ruff/resources/test/fixtures/airflow/AIR001.py b/crates/ruff/resources/test/fixtures/airflow/AIR001.py new file mode 100644 index 0000000000000..6e8bffcd9754d --- /dev/null +++ b/crates/ruff/resources/test/fixtures/airflow/AIR001.py @@ -0,0 +1,16 @@ +from airflow.operators import PythonOperator + + +def my_callable(): + pass + + +my_task = PythonOperator(task_id="my_task", callable=my_callable) +my_task_2 = PythonOperator(callable=my_callable, task_id="my_task_2") + +incorrect_name = PythonOperator(task_id="my_task") +incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2") + +from my_module import MyClass + +incorrect_name = MyClass(task_id="my_task") diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 4bacc82baee24..d1cb79e7cd7cb 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -45,7 +45,7 @@ use crate::noqa::NoqaMapping; use crate::registry::{AsRule, Rule}; use crate::rules::flake8_builtins::helpers::AnyShadowing; use crate::rules::{ - flake8_2020, flake8_annotations, flake8_async, flake8_bandit, flake8_blind_except, + airflow, flake8_2020, flake8_annotations, flake8_async, flake8_bandit, flake8_blind_except, flake8_boolean_trap, flake8_bugbear, flake8_builtins, flake8_comprehensions, flake8_datetimez, flake8_debugger, flake8_django, flake8_errmsg, flake8_future_annotations, flake8_gettext, flake8_implicit_str_concat, flake8_import_conventions, flake8_logging_format, flake8_pie, @@ -1636,11 +1636,9 @@ where pycodestyle::rules::lambda_assignment(self, target, value, None, stmt); } } - if self.enabled(Rule::AssignmentToOsEnviron) { flake8_bugbear::rules::assignment_to_os_environ(self, targets); } - if self.enabled(Rule::HardcodedPasswordString) { if let Some(diagnostic) = flake8_bandit::rules::assign_hardcoded_password_string(value, targets) @@ -1648,7 +1646,6 @@ where self.diagnostics.push(diagnostic); } } - if self.enabled(Rule::GlobalStatement) { for target in targets.iter() { if let Expr::Name(ast::ExprName { id, .. }) = target { @@ -1656,7 +1653,6 @@ where } } } - if self.enabled(Rule::UselessMetaclassType) { pyupgrade::rules::useless_metaclass_type(self, stmt, value, targets); } @@ -1673,13 +1669,22 @@ where if self.enabled(Rule::UnpackedListComprehension) { pyupgrade::rules::unpacked_list_comprehension(self, targets, value); } - if self.enabled(Rule::PandasDfVariableName) { if let Some(diagnostic) = pandas_vet::rules::assignment_to_df(targets) { self.diagnostics.push(diagnostic); } } - + if self + .settings + .rules + .enabled(Rule::AirflowVariableNameTaskIdMismatch) + { + if let Some(diagnostic) = + airflow::rules::variable_name_task_id(self, targets, value) + { + self.diagnostics.push(diagnostic); + } + } if self.is_stub { if self.any_enabled(&[ Rule::UnprefixedTypeParam, diff --git a/crates/ruff/src/codes.rs b/crates/ruff/src/codes.rs index 8fc12850a23ec..8e6aa2559364c 100644 --- a/crates/ruff/src/codes.rs +++ b/crates/ruff/src/codes.rs @@ -749,6 +749,9 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Flake8Todos, "006") => (RuleGroup::Unspecified, Rule::InvalidTodoCapitalization), (Flake8Todos, "007") => (RuleGroup::Unspecified, Rule::MissingSpaceAfterTodoColon), + // airflow + (Airflow, "001") => (RuleGroup::Unspecified, Rule::AirflowVariableNameTaskIdMismatch), + _ => return None, }) } diff --git a/crates/ruff/src/registry.rs b/crates/ruff/src/registry.rs index 53458771c463b..39ed117890874 100644 --- a/crates/ruff/src/registry.rs +++ b/crates/ruff/src/registry.rs @@ -668,6 +668,8 @@ ruff_macros::register_rules!( rules::flake8_todos::rules::MissingTodoDescription, rules::flake8_todos::rules::InvalidTodoCapitalization, rules::flake8_todos::rules::MissingSpaceAfterTodoColon, + // airflow + rules::airflow::rules::AirflowVariableNameTaskIdMismatch, ); pub trait AsRule { @@ -838,6 +840,9 @@ pub enum Linter { /// NumPy-specific rules #[prefix = "NPY"] Numpy, + /// [Airflow](https://pypi.org/project/apache-airflow/) + #[prefix = "AIR"] + Airflow, /// Ruff-specific rules #[prefix = "RUF"] Ruff, diff --git a/crates/ruff/src/rules/airflow/mod.rs b/crates/ruff/src/rules/airflow/mod.rs new file mode 100644 index 0000000000000..e6d6ddf804d6a --- /dev/null +++ b/crates/ruff/src/rules/airflow/mod.rs @@ -0,0 +1,25 @@ +//! Airflow-specific rules. +pub(crate) mod rules; + +#[cfg(test)] +mod tests { + use std::path::Path; + + use anyhow::Result; + use test_case::test_case; + + use crate::registry::Rule; + use crate::test::test_path; + use crate::{assert_messages, settings}; + + #[test_case(Rule::AirflowVariableNameTaskIdMismatch, Path::new("AIR001.py"); "AIR001")] + fn rules(rule_code: Rule, path: &Path) -> Result<()> { + let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); + let diagnostics = test_path( + Path::new("airflow").join(path).as_path(), + &settings::Settings::for_rule(rule_code), + )?; + assert_messages!(snapshot, diagnostics); + Ok(()) + } +} diff --git a/crates/ruff/src/rules/airflow/rules/mod.rs b/crates/ruff/src/rules/airflow/rules/mod.rs new file mode 100644 index 0000000000000..0dbd1cf914ea6 --- /dev/null +++ b/crates/ruff/src/rules/airflow/rules/mod.rs @@ -0,0 +1,3 @@ +mod task_variable_name; + +pub(crate) use task_variable_name::{variable_name_task_id, AirflowVariableNameTaskIdMismatch}; diff --git a/crates/ruff/src/rules/airflow/rules/task_variable_name.rs b/crates/ruff/src/rules/airflow/rules/task_variable_name.rs new file mode 100644 index 0000000000000..eae2490138f39 --- /dev/null +++ b/crates/ruff/src/rules/airflow/rules/task_variable_name.rs @@ -0,0 +1,102 @@ +use rustpython_parser::ast; +use rustpython_parser::ast::{Expr, Ranged}; + +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::prelude::Constant; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks that the task variable name matches the `task_id` value for +/// Airflow Operators. +/// +/// ## Why is this bad? +/// When initializing an Airflow Operator, for consistency, the variable +/// name should match the `task_id` value. This makes it easier to +/// follow the flow of the DAG. +/// +/// ## Example +/// ```python +/// from airflow.operators import PythonOperator +/// +/// +/// incorrect_name = PythonOperator(task_id="my_task") +/// ``` +/// +/// Use instead: +/// ```python +/// from airflow.operators import PythonOperator +/// +/// +/// my_task = PythonOperator(task_id="my_task") +/// ``` +#[violation] +pub struct AirflowVariableNameTaskIdMismatch { + task_id: String, +} + +impl Violation for AirflowVariableNameTaskIdMismatch { + #[derive_message_formats] + fn message(&self) -> String { + let AirflowVariableNameTaskIdMismatch { task_id } = self; + format!("Task variable name should match the `task_id`: \"{task_id}\"") + } +} + +/// AIR001 +pub(crate) fn variable_name_task_id( + checker: &mut Checker, + targets: &[Expr], + value: &Expr, +) -> Option { + // If we have more than one target, we can't do anything. + if targets.len() != 1 { + return None; + } + + let target = &targets[0]; + let Expr::Name(ast::ExprName { id, .. }) = target else { + return None; + }; + + // If the value is not a call, we can't do anything. + let Expr::Call(ast::ExprCall { func, keywords, .. }) = value else { + return None; + }; + + // If the function doesn't come from Airflow, we can't do anything. + if !checker + .semantic_model() + .resolve_call_path(func) + .map_or(false, |call_path| matches!(call_path[0], "airflow")) + { + return None; + } + + // If the call doesn't have a `task_id` keyword argument, we can't do anything. + let keyword = keywords + .iter() + .find(|keyword| keyword.arg.as_ref().map_or(false, |arg| arg == "task_id"))?; + + // If the keyword argument is not a string, we can't do anything. + let task_id = match &keyword.value { + Expr::Constant(constant) => match &constant.value { + Constant::Str(value) => value, + _ => return None, + }, + _ => return None, + }; + + // If the target name is the same as the task_id, no violation. + if id == task_id { + return None; + } + + Some(Diagnostic::new( + AirflowVariableNameTaskIdMismatch { + task_id: task_id.to_string(), + }, + target.range(), + )) +} diff --git a/crates/ruff/src/rules/airflow/snapshots/ruff__rules__airflow__tests__AIR001_AIR001.py.snap b/crates/ruff/src/rules/airflow/snapshots/ruff__rules__airflow__tests__AIR001_AIR001.py.snap new file mode 100644 index 0000000000000..be7e9d7af654a --- /dev/null +++ b/crates/ruff/src/rules/airflow/snapshots/ruff__rules__airflow__tests__AIR001_AIR001.py.snap @@ -0,0 +1,22 @@ +--- +source: crates/ruff/src/rules/airflow/mod.rs +--- +AIR001.py:11:1: AIR001 Task variable name should match the `task_id`: "my_task" + | +11 | my_task_2 = PythonOperator(callable=my_callable, task_id="my_task_2") +12 | +13 | incorrect_name = PythonOperator(task_id="my_task") + | ^^^^^^^^^^^^^^ AIR001 +14 | incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2") + | + +AIR001.py:12:1: AIR001 Task variable name should match the `task_id`: "my_task_2" + | +12 | incorrect_name = PythonOperator(task_id="my_task") +13 | incorrect_name_2 = PythonOperator(callable=my_callable, task_id="my_task_2") + | ^^^^^^^^^^^^^^^^ AIR001 +14 | +15 | from my_module import MyClass + | + + diff --git a/crates/ruff/src/rules/mod.rs b/crates/ruff/src/rules/mod.rs index c0fcd71b516fe..0c4e5ef84ce43 100644 --- a/crates/ruff/src/rules/mod.rs +++ b/crates/ruff/src/rules/mod.rs @@ -1,4 +1,5 @@ #![allow(clippy::useless_format)] +pub mod airflow; pub mod eradicate; pub mod flake8_2020; pub mod flake8_annotations; diff --git a/ruff.schema.json b/ruff.schema.json index cc5aee89b6ce3..3edeec7d5fc94 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -1536,6 +1536,10 @@ "A001", "A002", "A003", + "AIR", + "AIR0", + "AIR00", + "AIR001", "ALL", "ANN", "ANN0",