Skip to content

Commit

Permalink
fix(floorist): RHICOMPL-2984 Bugfix for empty prefix floorplan
Browse files Browse the repository at this point in the history
 - Fix a bug for when floorplan file does not provide prefix
 - Add helper function for generating directory names
 - Add unit test coverage
  • Loading branch information
Victoremepunto committed May 24, 2022
1 parent d522496 commit 6dd9d5e
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ CMD python ./app.py

FROM base as test

ADD tests/test_* tests/floorplan_* tests/requirements.txt ./tests/
ADD tests/test_* tests/unit/test_* tests/floorplan_* tests/requirements.txt ./tests/

RUN pip install --no-cache-dir -r tests/requirements.txt
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ python_requires = >=3.6, <4
[options.extras_require]
test =
pytest
pytest-mock
10 changes: 4 additions & 6 deletions src/floorist/floorist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import pandas as pd
import yaml

from floorist.helpers import generate_name


def _configure_loglevel():

LOGLEVEL = environ.get('LOGLEVEL', 'INFO').upper()
Expand Down Expand Up @@ -45,12 +48,7 @@ def main():
logging.debug(f"Dumping #{dump_count}: {row['query']} to {row['prefix']}")

cursor = pd.read_sql(row['query'], conn, chunksize=row.get('chunksize', 1000))

target = '/'.join([
f"s3://{config.bucket_name}",
row['prefix'],
date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
])
target = generate_name(config.bucket_name, row['prefix'])

for data in cursor:
wr.s3.to_parquet(data, target,
Expand Down
11 changes: 11 additions & 0 deletions src/floorist/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from datetime import date


def generate_name(bucket_name, prefix=None):

file_name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
parts = ["s3:/", bucket_name, file_name]
if prefix:
parts.insert(2, prefix)

return '/'.join(parts)
2 changes: 2 additions & 0 deletions tests/floorplan_valid_with_prefix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- query: SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (num,letter);
prefix: some-prefix
15 changes: 15 additions & 0 deletions tests/test_floorist.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import re

import awswrangler as wr
import boto3
import pytest
Expand All @@ -9,6 +12,8 @@
from sqlalchemy.exc import OperationalError
from tempfile import NamedTemporaryFile

from floorist.helpers import generate_name


class TestFloorist:
@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -177,3 +182,13 @@ def test_floorplan_with_one_failing_dump(self, caplog, session):
assert 'ProgrammingError' in caplog.text
assert 'Dumped 1 from total of 2'
assert wr.s3.list_directories(prefix, boto3_session=session) == [f"{prefix}/numbers/"]

@pytest.mark.parametrize("template,prefix", [["tests/floorplan_valid.yaml", None], ["tests/floorplan_valid_with_prefix.yaml", "some-prefix"]])
def test_target_files_have_expected_names(self, template, prefix, session):
bucket = f"s3://{env['AWS_BUCKET']}"
env['FLOORPLAN_FILE'] = template
filename = generate_name(env['AWS_BUCKET'], prefix)
main()
existing_objects = wr.s3.list_objects(bucket, boto3_session=session)
assert len(existing_objects) == 1
assert re.match(rf"{filename}/[0-z]*\.gz.parquet", existing_objects[0])
28 changes: 28 additions & 0 deletions tests/unit/test_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from floorist.floorist import main
from floorist.config import Config


def test_floorplan_without_prefix_is_handled_correctly(mocker):
config_mock = mocker.patch('floorist.floorist.get_config')
config_mock.return_value = Config(bucket_name='foo')
awswrangler_mock = mocker.patch('floorist.floorist.wr')
mocker.patch('floorist.floorist.create_engine')
mocker.patch('floorist.floorist.open')
mocker.patch('floorist.floorist.logging')
safe_load_mock = mocker.patch('floorist.floorist.yaml.safe_load')
safe_load_mock.return_value = [{'query': "a query", 'prefix': None}]
pandas_mock = mocker.patch('floorist.floorist.pd')
pandas_mock.read_sql.return_value = ["some-data"]
helper_mock = mocker.patch("floorist.floorist.generate_name")
helper_mock.return_value = "some-name"
main()
helper_mock.assert_called_with("foo", None)
expected_kwargs = {
"index" : False,
"compression" : 'gzip',
"dataset" : True,
"mode" : 'append'
}
awswrangler_mock.s3.to_parquet.assert_called_with("some-data", "some-name", **expected_kwargs)


21 changes: 21 additions & 0 deletions tests/unit/test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from floorist.helpers import generate_name
from datetime import date


def test_name_without_prefix():
bucket_name = "my_bucket"
actual_name = generate_name("my_bucket")
name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
expected_name = f"s3://{bucket_name}/{name}"

assert actual_name == expected_name


def test_name_with_prefix():
bucket_name = "my_bucket"
prefix = "some-prefix"
actual_name = generate_name(bucket_name, prefix)
name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
expected_name = f"s3://{bucket_name}/{prefix}/{name}"

assert actual_name == expected_name

0 comments on commit 6dd9d5e

Please sign in to comment.