From 305997ae04c8590e799591bab60640804392c129 Mon Sep 17 00:00:00 2001 From: Fabiana Clemente Date: Mon, 6 May 2024 14:57:49 -0700 Subject: [PATCH 1/5] fix: comment html section. --- .../flavours/html/templates/sequence/sections.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/sections.html b/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/sections.html index de9028099..e722edf60 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/sections.html +++ b/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/sections.html @@ -5,9 +5,9 @@

{{ section.content['name'] }}

- {% if section.content['name'] == 'Overview' %} -

Brought to you by YData

- {% endif %} +
{{ html }} From b6ac3afbf6d7d87d3a21938044bd1d9b13f5f28d Mon Sep 17 00:00:00 2001 From: Fabiana Clemente Date: Wed, 1 May 2024 10:03:40 -0700 Subject: [PATCH 2/5] docs: add telemetry information to documentation --- docs/advanced_settings/analytics.md | 36 +++++++++++++++++++++++++++++ mkdocs.yml | 1 + src/ydata_profiling/utils/logger.py | 0 3 files changed, 37 insertions(+) create mode 100644 docs/advanced_settings/analytics.md create mode 100644 src/ydata_profiling/utils/logger.py diff --git a/docs/advanced_settings/analytics.md b/docs/advanced_settings/analytics.md new file mode 100644 index 000000000..e8b3de7a9 --- /dev/null +++ b/docs/advanced_settings/analytics.md @@ -0,0 +1,36 @@ +# Analytics & Telemetry + +## Overview + +`ydata-profiling` is a powerful library designed to generate profile reports from pandas and Spark Dataframe objects. +As part of our ongoing efforts to improve user experience and functionality, `ydata-profiling` includes a telemetry feature. +This feature collects anonymous usage data, helping us understand how the library is used and identify areas for improvement. + +The primary goal of collecting telemetry data is to: + +- Enhance the functionality and performance of the ydata-profiling library +- Prioritize new features based on user engagement +- Identify common issues and bugs to improve overall user experience + +### Data Collected +The telemetry system collects non-personal, anonymous information such as: + +- Python version +- `ydata-profiling` version +- Frequency of use of `ydata-profiling` features +- Errors or exceptions thrown within the library + +## Disabling usage analytics + +We respect your choice to not participate in our telemetry collection. If you prefer to disable telemetry, you can do so +by setting an environment variable on your system. Disabling telemetry will not affect the functionality of the ydata-profiling library, +except for the ability to contribute to its usage analytics. + +### Set an Environment Variable: +Open your terminal or command prompt and set the YDATA_PROFILING_ANALYTICS environment variable to false. + +### Configure it per ProfileReport +Add here the + + + diff --git a/mkdocs.yml b/mkdocs.yml index b725412d3..174c7d3c2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ nav: - General settings: 'advanced_settings/available_settings.md' - Changing settings: 'advanced_settings/changing_settings.md' - Caching: 'advanced_settings/caching.md' + - Analytics: 'advanced_settings/analytics.md' - Integrations: - Other dataframes: 'integrations/other_dataframe_libraries.md' - Pyspark: 'integrations/pyspark.md' diff --git a/src/ydata_profiling/utils/logger.py b/src/ydata_profiling/utils/logger.py new file mode 100644 index 000000000..e69de29bb From dba8fcdde4ea5263535393012365cb1f281341d2 Mon Sep 17 00:00:00 2001 From: Fabiana Clemente Date: Wed, 1 May 2024 15:59:03 -0700 Subject: [PATCH 3/5] feat: add metrics --- src/ydata_profiling/utils/common.py | 35 +++++++++++++++++++++++++ src/ydata_profiling/utils/logger.py | 40 +++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/src/ydata_profiling/utils/common.py b/src/ydata_profiling/utils/common.py index a8f9aa350..1fa9e1c35 100644 --- a/src/ydata_profiling/utils/common.py +++ b/src/ydata_profiling/utils/common.py @@ -1,4 +1,11 @@ """Common util functions (e.g. missing in Python).""" +import os +import subprocess +import platform + +import pandas as pd +import requests + import collections.abc import zipfile from datetime import datetime, timedelta @@ -8,6 +15,7 @@ from pathlib import Path from typing import Mapping +from ydata_profiling.version import __version__ def update(d: dict, u: Mapping) -> dict: """Recursively update a dict. @@ -88,3 +96,30 @@ def convert_timestamp_to_datetime(timestamp: int) -> datetime: return datetime.fromtimestamp(timestamp) else: return datetime(1970, 1, 1) + timedelta(seconds=int(timestamp)) + +def analytics_features(dataframe, datatype: bool, report_type: bool): + endpoint= "https://packages.ydata.ai/ydata-profiling?" + + if os.getenv("YDATA_PROFILING_NO_ANALYTICS") != True: + package_version = __version__ + try: + subprocess.check_output("nvidia-smi") + gpu_present = True + except Exception: + gpu_present = False + + python_version = ".".join(platform.python_version().split(".")[:2]) + + try: + request_syntax = f"{endpoint}version={package_version}" \ + f"&python_version={python_version}" \ + f"&report_type={report_type}" \ + f"&dataframe={dataframe}" \ + f"&datatype={datatype}" \ + f"&os={platform.system()}" \ + f"&gpu={str(gpu_present)}" + + #requests.get(request_syntax) + print(request_syntax) + except Exception: + pass diff --git a/src/ydata_profiling/utils/logger.py b/src/ydata_profiling/utils/logger.py index e69de29bb..95a28434a 100644 --- a/src/ydata_profiling/utils/logger.py +++ b/src/ydata_profiling/utils/logger.py @@ -0,0 +1,40 @@ +""" + Logger function for ydata-profiling reports +""" +from __future__ import absolute_import, division, print_function + +import logging + +import pandas as pd + +from ydata_profiling.utils.common import analytics_features + +class ProfilingLogger(logging.Logger): + def __init__(self, name, level=logging.INFO): + super().__init__(name, level) + + def info( + self, + msg: object, + ) -> None: + super().info(f'[PROFILING] - {msg}.') + + def info_def_report(self, dataframe, timeseries: bool): + if dataframe == pd.DataFrame: + dataframe = 'pandas' + report_type = 'regular' + elif dataframe == type(None): + dataframe = 'pandas' + report_type='compare' + else: + dataframe = 'spark' + report_type = 'regular' + + datatype='timeseries' if timeseries else 'tabular' + + analytics_features(dataframe=dataframe, + datatype=datatype, + report_type=report_type) + + super().info(f'[PROFILING] Calculating profile with the following characteristics ' + f'- {dataframe} | {datatype} | {report_type}.') \ No newline at end of file From d50ec07adf8343bacb703f5b40b8a7eb0112408c Mon Sep 17 00:00:00 2001 From: Fabiana Clemente Date: Wed, 1 May 2024 16:01:05 -0700 Subject: [PATCH 4/5] feat: add request call --- src/ydata_profiling/utils/common.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/ydata_profiling/utils/common.py b/src/ydata_profiling/utils/common.py index 1fa9e1c35..f30fc256a 100644 --- a/src/ydata_profiling/utils/common.py +++ b/src/ydata_profiling/utils/common.py @@ -111,15 +111,14 @@ def analytics_features(dataframe, datatype: bool, report_type: bool): python_version = ".".join(platform.python_version().split(".")[:2]) try: - request_syntax = f"{endpoint}version={package_version}" \ - f"&python_version={python_version}" \ - f"&report_type={report_type}" \ - f"&dataframe={dataframe}" \ - f"&datatype={datatype}" \ - f"&os={platform.system()}" \ - f"&gpu={str(gpu_present)}" - - #requests.get(request_syntax) - print(request_syntax) + request_message = f"{endpoint}version={package_version}" \ + f"&python_version={python_version}" \ + f"&report_type={report_type}" \ + f"&dataframe={dataframe}" \ + f"&datatype={datatype}" \ + f"&os={platform.system()}" \ + f"&gpu={str(gpu_present)}" + + requests.get(request_message) except Exception: pass From ac84eda4b2c646620e790a779504e810057e0fa3 Mon Sep 17 00:00:00 2001 From: Azory YData Bot Date: Mon, 6 May 2024 21:50:54 +0000 Subject: [PATCH 5/5] fix(linting): code formatting --- src/ydata_profiling/utils/common.py | 31 ++++++++++++++----------- src/ydata_profiling/utils/logger.py | 36 +++++++++++++++-------------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/src/ydata_profiling/utils/common.py b/src/ydata_profiling/utils/common.py index f30fc256a..443addba5 100644 --- a/src/ydata_profiling/utils/common.py +++ b/src/ydata_profiling/utils/common.py @@ -1,12 +1,8 @@ """Common util functions (e.g. missing in Python).""" +import collections.abc import os -import subprocess import platform - -import pandas as pd -import requests - -import collections.abc +import subprocess import zipfile from datetime import datetime, timedelta @@ -15,8 +11,12 @@ from pathlib import Path from typing import Mapping +import pandas as pd +import requests + from ydata_profiling.version import __version__ + def update(d: dict, u: Mapping) -> dict: """Recursively update a dict. @@ -97,8 +97,9 @@ def convert_timestamp_to_datetime(timestamp: int) -> datetime: else: return datetime(1970, 1, 1) + timedelta(seconds=int(timestamp)) + def analytics_features(dataframe, datatype: bool, report_type: bool): - endpoint= "https://packages.ydata.ai/ydata-profiling?" + endpoint = "https://packages.ydata.ai/ydata-profiling?" if os.getenv("YDATA_PROFILING_NO_ANALYTICS") != True: package_version = __version__ @@ -111,13 +112,15 @@ def analytics_features(dataframe, datatype: bool, report_type: bool): python_version = ".".join(platform.python_version().split(".")[:2]) try: - request_message = f"{endpoint}version={package_version}" \ - f"&python_version={python_version}" \ - f"&report_type={report_type}" \ - f"&dataframe={dataframe}" \ - f"&datatype={datatype}" \ - f"&os={platform.system()}" \ - f"&gpu={str(gpu_present)}" + request_message = ( + f"{endpoint}version={package_version}" + f"&python_version={python_version}" + f"&report_type={report_type}" + f"&dataframe={dataframe}" + f"&datatype={datatype}" + f"&os={platform.system()}" + f"&gpu={str(gpu_present)}" + ) requests.get(request_message) except Exception: diff --git a/src/ydata_profiling/utils/logger.py b/src/ydata_profiling/utils/logger.py index 95a28434a..dc70c3208 100644 --- a/src/ydata_profiling/utils/logger.py +++ b/src/ydata_profiling/utils/logger.py @@ -1,7 +1,6 @@ """ Logger function for ydata-profiling reports """ -from __future__ import absolute_import, division, print_function import logging @@ -9,32 +8,35 @@ from ydata_profiling.utils.common import analytics_features + class ProfilingLogger(logging.Logger): def __init__(self, name, level=logging.INFO): super().__init__(name, level) def info( - self, - msg: object, - ) -> None: - super().info(f'[PROFILING] - {msg}.') + self, + msg: object, + ) -> None: + super().info(f"[PROFILING] - {msg}.") def info_def_report(self, dataframe, timeseries: bool): if dataframe == pd.DataFrame: - dataframe = 'pandas' - report_type = 'regular' + dataframe = "pandas" + report_type = "regular" elif dataframe == type(None): - dataframe = 'pandas' - report_type='compare' + dataframe = "pandas" + report_type = "compare" else: - dataframe = 'spark' - report_type = 'regular' + dataframe = "spark" + report_type = "regular" - datatype='timeseries' if timeseries else 'tabular' + datatype = "timeseries" if timeseries else "tabular" - analytics_features(dataframe=dataframe, - datatype=datatype, - report_type=report_type) + analytics_features( + dataframe=dataframe, datatype=datatype, report_type=report_type + ) - super().info(f'[PROFILING] Calculating profile with the following characteristics ' - f'- {dataframe} | {datatype} | {report_type}.') \ No newline at end of file + super().info( + f"[PROFILING] Calculating profile with the following characteristics " + f"- {dataframe} | {datatype} | {report_type}." + )