diff --git a/.github/workflows/databricks_pull_request_tests.yml b/.github/workflows/databricks_pull_request_tests.yml index 15f91de..835b551 100644 --- a/.github/workflows/databricks_pull_request_tests.yml +++ b/.github/workflows/databricks_pull_request_tests.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 15 - steps: + steps: - name: Checkout repo uses: actions/checkout@v4 - name: Run test notebook diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..5b45efe --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,16 @@ +name: linting + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' # caching pip dependencies + - run: pip install pylint + - run: pip install databricks-labs-pylint + - run: pylint --load-plugins=databricks.labs.pylint.all covid_analysis/transforms.py diff --git a/covid_analysis/transforms.py b/covid_analysis/transforms.py index 5ee3a08..e8a36d1 100644 --- a/covid_analysis/transforms.py +++ b/covid_analysis/transforms.py @@ -2,23 +2,23 @@ # Filter by country code. def filter_country(pdf, country="USA"): - pdf = pdf[pdf.iso_code == country] - return pdf + pdf = pdf[pdf.iso_code == country] + return pdf # Pivot by indicator, and fill missing values. def pivot_and_clean(pdf, fillna): - pdf["value"] = pd.to_numeric(pdf["value"]) - pdf = pdf.fillna(fillna).pivot_table( - values="value", columns="indicator", index="date" - ) - return pdf + pdf["value"] = pd.to_numeric(pdf["value"]) + pdf = pdf.fillna(fillna).pivot_table( + values="value", columns="indicator", index="date" + ) + return pdf # Create column names that are compatible with Delta tables. def clean_spark_cols(pdf): - pdf.columns = pdf.columns.str.replace(" ", "_") - return pdf + pdf.columns = pdf.columns.str.replace(" ", "_") + return pdf # Convert index to column (works with pandas API on Spark, too). def index_to_col(df, colname): - df[colname] = df.index - return df \ No newline at end of file + df[colname] = df.index + return df diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6d8a5e9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.pylint.main] +load-plugins = "databricks.labs.pylint.all" diff --git a/requirements.txt b/requirements.txt index f0a03ed..bdbfb41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,5 @@ python-dateutil==2.8.2 pytz==2022.1 six==1.16.0 tomli==2.0.1 -wget==3.2 \ No newline at end of file +wget==3.2 +databricks-labs-pylint