Skip to content

Commit

Permalink
fix(linting): code formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
azory-ydata authored and fabclmnt committed Oct 28, 2024
1 parent 3f126b0 commit 886be38
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 23 deletions.
3 changes: 2 additions & 1 deletion src/ydata_profiling/profile_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ def __initialize_dataframe(
) -> Optional[Union[pd.DataFrame, sDataFrame]]:

logger.info_def_report(
df=df, timeseries=report_config.vars.timeseries.active,
df=df,
timeseries=report_config.vars.timeseries.active,
)

if (
Expand Down
30 changes: 17 additions & 13 deletions src/ydata_profiling/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,9 @@ def convert_timestamp_to_datetime(timestamp: int) -> datetime:
return datetime(1970, 1, 1) + timedelta(seconds=int(timestamp))


def analytics_features(dataframe: str,
datatype: str,
report_type: str,
ncols: int,
nrows:int,
dbx: str) -> None:
def analytics_features(
dataframe: str, datatype: str, report_type: str, ncols: int, nrows: int, dbx: str
) -> None:
endpoint = "https://packages.ydata.ai/ydata-profiling?"
package_version = __version__

Expand Down Expand Up @@ -136,13 +133,15 @@ def analytics_features(dataframe: str,

requests.get(request_message)


def is_running_in_databricks():
mask = 'DATABRICKS_RUNTIME_VERSION' in os.environ
if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
return os.environ['DATABRICKS_RUNTIME_VERSION']
mask = "DATABRICKS_RUNTIME_VERSION" in os.environ
if "DATABRICKS_RUNTIME_VERSION" in os.environ:
return os.environ["DATABRICKS_RUNTIME_VERSION"]
else:
return str(mask)


def calculate_nrows(df):
"""
Calculates the approx. number of rows spark dataframes
Expand All @@ -152,10 +151,15 @@ def calculate_nrows(df):
try:
n_partitions = df.rdd.getNumPartitions()

nrows = df.rdd.mapPartitionsWithIndex(
lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]
).collect()[0] * n_partitions
nrows = (
df.rdd.mapPartitionsWithIndex(
lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]
).collect()[0]
* n_partitions
)
except:
nrows = 0 # returns 0 in case it was not possible to compute it from the partition
nrows = (
0 # returns 0 in case it was not possible to compute it from the partition
)

return nrows
24 changes: 15 additions & 9 deletions src/ydata_profiling/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

import pandas as pd

from ydata_profiling.utils.common import (calculate_nrows,
analytics_features,
is_running_in_databricks)
from ydata_profiling.utils.common import (
analytics_features,
calculate_nrows,
is_running_in_databricks,
)


class ProfilingLogger(logging.Logger):
Expand All @@ -20,22 +22,26 @@ def info_def_report(self, df, timeseries: bool) -> None: # noqa: ANN001
if isinstance(df, pd.DataFrame):
dataframe = "pandas"
report_type = "regular"
nrows=len(df)
nrows = len(df)
elif df is None:
dataframe = "pandas"
report_type = "compare"
nrows=len(df)
nrows = len(df)
else:
dataframe = "spark"
report_type = "regular"
nrows=calculate_nrows(df)
nrows = calculate_nrows(df)

dbx=is_running_in_databricks()
dbx = is_running_in_databricks()
datatype = "timeseries" if timeseries else "tabular"

analytics_features(
dataframe=dataframe, datatype=datatype, report_type=report_type,
nrows=nrows, ncols=ncols, dbx=dbx
dataframe=dataframe,
datatype=datatype,
report_type=report_type,
nrows=nrows,
ncols=ncols,
dbx=dbx,
)

super().info(
Expand Down

0 comments on commit 886be38

Please sign in to comment.