-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add data_frame * add tests * fix `description` * fix tests
- Loading branch information
1 parent
befc67e
commit 83a8e30
Showing
10 changed files
with
1,015 additions
and
3 deletions.
There are no files selected for viewing
6 changes: 5 additions & 1 deletion
6
python_modules/libraries/dagster-dask/dagster_dask/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,12 @@ | ||
from dagster.core.utils import check_dagster_package_version | ||
|
||
from .data_frame import DataFrame | ||
from .executor import dask_executor | ||
from .version import __version__ | ||
|
||
check_dagster_package_version('dagster-dask', __version__) | ||
|
||
__all__ = ['dask_executor'] | ||
__all__ = [ | ||
'DataFrame', | ||
'dask_executor', | ||
] |
898 changes: 898 additions & 0 deletions
898
python_modules/libraries/dagster-dask/dagster_dask/data_frame.py
Large diffs are not rendered by default.
Oops, something went wrong.
3 changes: 3 additions & 0 deletions
3
python_modules/libraries/dagster-dask/dagster_dask_tests/num.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
num1,num2 | ||
1,2 | ||
3,4 |
2 changes: 2 additions & 0 deletions
2
python_modules/libraries/dagster-dask/dagster_dask_tests/num.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
{"num1":1,"num2":2} | ||
{"num1":3,"num2":4} |
Binary file not shown.
70 changes: 70 additions & 0 deletions
70
python_modules/libraries/dagster-dask/dagster_dask_tests/test_data_frame.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import shutil | ||
|
||
import dask.dataframe as dd | ||
import pytest | ||
from dagster_dask import DataFrame | ||
from dask.dataframe.utils import assert_eq | ||
|
||
from dagster import InputDefinition, OutputDefinition, execute_solid, file_relative_path, solid | ||
from dagster.utils.test import get_temp_dir | ||
|
||
|
||
def create_dask_df(): | ||
path = file_relative_path(__file__, 'num.csv') | ||
return dd.read_csv(path) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'file_type,read,kwargs', | ||
[ | ||
pytest.param('csv', dd.read_csv, {'index': False}, id='csv'), | ||
pytest.param('parquet', dd.read_parquet, {'write_index': False}, id='parquet'), | ||
pytest.param('json', dd.read_json, {}, id='json'), | ||
], | ||
) | ||
def test_dataframe_outputs(file_type, read, kwargs): | ||
df = create_dask_df() | ||
|
||
@solid(output_defs=[OutputDefinition(dagster_type=DataFrame, name='output_df')]) | ||
def return_df(_): | ||
return df | ||
|
||
with get_temp_dir() as temp_path: | ||
shutil.rmtree(temp_path) | ||
result = execute_solid( | ||
return_df, | ||
run_config={ | ||
'solids': { | ||
'return_df': { | ||
'outputs': [{'output_df': {file_type: {'path': temp_path, **kwargs}}}] | ||
} | ||
} | ||
}, | ||
) | ||
assert result.success | ||
actual = read(f"{temp_path}/*") | ||
assert assert_eq(actual, df) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'file_type', | ||
[ | ||
pytest.param('csv', id='csv'), | ||
pytest.param('parquet', id='parquet'), | ||
pytest.param('json', id='json'), | ||
], | ||
) | ||
def test_dataframe_inputs(file_type): | ||
@solid(input_defs=[InputDefinition(dagster_type=DataFrame, name='input_df')]) | ||
def return_df(_, input_df): | ||
return input_df | ||
|
||
file_name = file_relative_path(__file__, f"num.{file_type}") | ||
result = execute_solid( | ||
return_df, | ||
run_config={ | ||
'solids': {'return_df': {'inputs': {'input_df': {file_type: {'path': file_name}}}}} | ||
}, | ||
) | ||
assert result.success | ||
assert assert_eq(result.output_value(), create_dask_df()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# we need `pyarrow` for testing read/write parquet files. | ||
pyarrow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters