Skip to content

Commit

Permalink
chore(actions): add local execution of pre-commit hook (#1146)
Browse files Browse the repository at this point in the history
  • Loading branch information
vascoalramos authored and aquemy committed Nov 22, 2022
1 parent a91abd1 commit 09b71ab
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 149 deletions.
9 changes: 8 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
files: ^src/|^tests/
default_stages: [commit, push, manual]

repos:
- repo: https://github.com/commitizen-tools/commitizen
rev: v2.37.0
hooks:
- id: commitizen
stages: [commit-msg]
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
Expand Down Expand Up @@ -29,6 +35,7 @@ repos:
hooks:
- id: check-manifest
args: [ "--ignore=src/pandas_profiling/version.py" ]
stages: [manual]
- repo: https://github.com/PyCQA/flake8
rev: "4.0.1"
hooks:
Expand Down
4 changes: 2 additions & 2 deletions docsrc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,6 @@ def _GetApiWrapperVersion():
autodoc_pydantic_model_show_json = False

source_suffix = {
'.rst': 'restructuredtext',
'.md': 'markdown',
".rst": "restructuredtext",
".md": "markdown",
}
14 changes: 14 additions & 0 deletions docsrc/source/pages/support_contrib/contribution_guidelines.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@ The following commands are supported:
make clean
Contribution quality standards
------------------------------

To guarantee a high quality of the contributed code, the project workflow validates the added modification as well as the introduced commit messages.
The same mechanisms are used locally to find and solve existing issues before submitting a pull request.

To active the local mechanisms (created using pre-commit hooks), run the following commands:

.. code-block:: console
pip install -r requirements-dev.txt
pre-commit install --hook-type commit-msg --hook-type pre-commit
Git workflow
------------

Expand Down
1 change: 1 addition & 0 deletions examples/features/correlation_auto_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path

import pandas as pd

from pandas_profiling import ProfileReport
Expand Down
142 changes: 18 additions & 124 deletions examples/usaairquality/usaairquality.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "b514dd38-2ebd-4c96-aed5-4e3695e20fa2",
"metadata": {},
"outputs": [],
Expand All @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "6bcf2de2-58bf-4995-8cf7-3145322b45f7",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -68,15 +68,15 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "b33a26ed-4e1e-4689-93ce-fa0f98f48e89",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"from pandas_profiling.utils.cache import cache_file\n",
"from pandas_profiling import ProfileReport"
"from pandas_profiling import ProfileReport\n",
"from pandas_profiling.utils.cache import cache_file"
]
},
{
Expand All @@ -89,7 +89,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "7dab0b47-537d-4402-af71-1bdfd0cf6cdd",
"metadata": {},
"outputs": [],
Expand All @@ -101,9 +101,9 @@
"\n",
"df = pd.read_csv(file_name, index_col=[0])\n",
"\n",
"#We will only consider the data from Arizone state for this example\n",
"df = df[df['State']=='Arizona']\n",
"df['Date Local']=pd.to_datetime(df['Date Local'])"
"# We will only consider the data from Arizone state for this example\n",
"df = df[df[\"State\"] == \"Arizona\"]\n",
"df[\"Date Local\"] = pd.to_datetime(df[\"Date Local\"])"
]
},
{
Expand Down Expand Up @@ -139,140 +139,34 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "15e613a6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Time'>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1200x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"from pandas_profiling.visualisation.plot import timeseries_heatmap\n",
"\n",
"timeseries_heatmap(dataframe=df, entity_column='Site Num', sortby='Date Local')"
"timeseries_heatmap(dataframe=df, entity_column=\"Site Num\", sortby=\"Date Local\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b29a7e78-d52d-458d-ac9a-e509ffd373d1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating the profile for the Site num: 1011\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f139498017074ec6828e65ca98f293c4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a2d4124d4cbb4e548e727a3e8deab5c6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0828c63867ff43de8654bbebc6b8f283",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Render HTML: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d3b758632d22449392be04fb55cc5b9a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Export report to file: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating the profile for the Site num: 1028\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1a60fe25562d45e0acc08a5024f299d9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"# Return the profile per station\n",
"for group in df.groupby('Site Num'):\n",
" #Running 1 profile per station\n",
"for group in df.groupby(\"Site Num\"):\n",
" # Running 1 profile per station\n",
" profile = ProfileReport(\n",
" group[1],\n",
" tsmode=True,\n",
" sortby=\"Date Local\",\n",
" #title=f\"Air Quality profiling - Site Num: {group[0]}\"\n",
" # title=f\"Air Quality profiling - Site Num: {group[0]}\"\n",
" )\n",
" \n",
" profile.to_file(f'Ts_Profile_{group[0]}.html')"
"\n",
" profile.to_file(f\"Ts_Profile_{group[0]}.html\")"
]
}
],
Expand Down
38 changes: 17 additions & 21 deletions examples/usaairquality/usaairquality.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,30 @@
"""
import pandas as pd

from pandas_profiling import ProfileReport
from pandas_profiling.utils.cache import cache_file
from pandas_profiling.visualisation.plot import timeseries_heatmap
from pandas_profiling import ProfileReport

if __name__ == '__main__':
if __name__ == "__main__":

file_name = cache_file(
"pollution_us_2000_2016.csv",
"https://query.data.world/s/mz5ot3l4zrgvldncfgxu34nda45kvb",
)

df = pd.read_csv(file_name, index_col=[0])
#Prepare the dataset
#We will only consider the data from Arizone state for this example
df = df[df['State']=='Arizona']
df['Date Local']=pd.to_datetime(df['Date Local'])
#Plot the time heatmap distribution for the per entity time-series
timeseries_heatmap(dataframe=df, entity_column='Site Num', sortby='Date Local')

# Prepare the dataset
# We will only consider the data from Arizone state for this example
df = df[df["State"] == "Arizona"]
df["Date Local"] = pd.to_datetime(df["Date Local"])

# Plot the time heatmap distribution for the per entity time-series
timeseries_heatmap(dataframe=df, entity_column="Site Num", sortby="Date Local")

# Return the profile per station
for group in df.groupby('Site Num'):
#Running 1 profile per station
profile = ProfileReport(
group[1],
tsmode=True,
sortby="Date Local"
)

profile.to_file(f'Ts_Profile_{group[0]}.html')
for group in df.groupby("Site Num"):
# Running 1 profile per station
profile = ProfileReport(group[1], tsmode=True, sortby="Date Local")

profile.to_file(f"Ts_Profile_{group[0]}.html")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
version = "dev"

with open(source_root / "src/pandas_profiling/version.py", "w") as version_file:
version_file.write(f'__version__ = \'{version}\'')
version_file.write(f"__version__ = '{version}'")

setup(
name="pandas-profiling",
Expand Down

0 comments on commit 09b71ab

Please sign in to comment.