From 4ec8a289b7c084f5aa7995cfb77b1308574c9627 Mon Sep 17 00:00:00 2001
From: David Kunzmann <david.kunzmann@sonarsource.com>
Date: Mon, 9 Oct 2023 15:31:10 +0200
Subject: [PATCH] Modify rule S6742: allow for up to 6 chained operation on
 pandas (#3218)

---
 rules/S6742/python/rule.adoc | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/rules/S6742/python/rule.adoc b/rules/S6742/python/rule.adoc
index 7ce39d0182a..f2d4fd36a6b 100644
--- a/rules/S6742/python/rule.adoc
+++ b/rules/S6742/python/rule.adoc
@@ -1,4 +1,4 @@
-This rule raises an issue when 5 or more commands are applied on a data frame.
+This rule raises an issue when 7 or more commands are applied on a data frame.
 
 == Why is this an issue?
 
@@ -10,7 +10,8 @@ This allows the user to chain multiple operations together, making it effortless
 ----
 import pandas as pd
 
-joe = pd.read_csv("data.csv", dtype={'user_id':'str', 'name':'str'}).set_index("name").filter(like='jo', axis=0).head()
+schema = {'name':str, 'domain': str, 'revenue': 'Int64'}
+joe = pd.read_csv("data.csv", dtype=schema).set_index('name').filter(like='joe', axis=0).groupby('domain').mean().round().sample()
 ----
 
 While this code is correct and concise, 
@@ -34,7 +35,7 @@ To fix this issue refactor chains of instruction into a function that can be con
 import pandas as pd
 
 def foo(df: pd.DataFrame):
-  return df.set_index("name").filter(like='joe', axis=0).groupby("team")["salary"].mean().head() # Noncompliant: too many operations happen on this data frame.
+  return df.set_index('name').filter(like='joe', axis=0).groupby('team').mean().round().sort_values('salary').take([0]) # Noncompliant: too many operations happen on this data frame.
 ----
 
 ==== Compliant solution
@@ -44,13 +45,13 @@ def foo(df: pd.DataFrame):
 import pandas as pd
 
 def select_joes(df):
-  return df.set_index("name").filter(like='joe', axis=0)
+  return df.set_index('name').filter(like='joe', axis=0)
 
 def compute_mean_salary_per_team(df):
-  return df.groupby("team")["salary"].mean()
+  return df.groupby('team').mean().round()
 
 def foo(df: pd.DataFrame):
-  return df.pipe(select_joes).pipe(compute_mean_salary_per_team).head() # Compliant
+  return df.pipe(select_joes).pipe(compute_mean_salary_per_team).sort_values('salary').take([0]) # Compliant
 ----