From 4ec8a289b7c084f5aa7995cfb77b1308574c9627 Mon Sep 17 00:00:00 2001 From: David Kunzmann Date: Mon, 9 Oct 2023 15:31:10 +0200 Subject: [PATCH] Modify rule S6742: allow for up to 6 chained operation on pandas (#3218) --- rules/S6742/python/rule.adoc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rules/S6742/python/rule.adoc b/rules/S6742/python/rule.adoc index 7ce39d0182a..f2d4fd36a6b 100644 --- a/rules/S6742/python/rule.adoc +++ b/rules/S6742/python/rule.adoc @@ -1,4 +1,4 @@ -This rule raises an issue when 5 or more commands are applied on a data frame. +This rule raises an issue when 7 or more commands are applied on a data frame. == Why is this an issue? @@ -10,7 +10,8 @@ This allows the user to chain multiple operations together, making it effortless ---- import pandas as pd -joe = pd.read_csv("data.csv", dtype={'user_id':'str', 'name':'str'}).set_index("name").filter(like='jo', axis=0).head() +schema = {'name':str, 'domain': str, 'revenue': 'Int64'} +joe = pd.read_csv("data.csv", dtype=schema).set_index('name').filter(like='joe', axis=0).groupby('domain').mean().round().sample() ---- While this code is correct and concise, @@ -34,7 +35,7 @@ To fix this issue refactor chains of instruction into a function that can be con import pandas as pd def foo(df: pd.DataFrame): - return df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head() # Noncompliant: too many operations happen on this data frame. + return df.set_index('name').filter(like='joe', axis=0).groupby('team').mean().round().sort_values('salary').take([0]) # Noncompliant: too many operations happen on this data frame. ---- ==== Compliant solution @@ -44,13 +45,13 @@ def foo(df: pd.DataFrame): import pandas as pd def select_joes(df): - return df.set_index("name").filter(like='joe', axis=0) + return df.set_index('name').filter(like='joe', axis=0) def compute_mean_salary_per_team(df): - return df.groupby("team")["salary"].mean() + return df.groupby('team').mean().round() def foo(df: pd.DataFrame): - return df.pipe(select_joes).pipe(compute_mean_salary_per_team).head() # Compliant + return df.pipe(select_joes).pipe(compute_mean_salary_per_team).sort_values('salary').take([0]) # Compliant ----