Skip to content

Commit

Permalink
Move / minimize number of cudf / dask-cudf imports (#480)
Browse files Browse the repository at this point in the history
* Move / minimize number of cudf / dask-cudf imports

* Add tests for GPU-related errors

* Fix unbound local error

* Fix ddf value error
  • Loading branch information
charlesbluca authored Apr 19, 2022
1 parent 95b0dd0 commit 031c04c
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 33 deletions.
9 changes: 3 additions & 6 deletions dask_sql/input_utils/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@

from dask_sql.input_utils.base import BaseInputPlugin

try:
import dask_cudf
except ImportError:
dask_cudf = None


class DaskInputPlugin(BaseInputPlugin):
"""Input Plugin for Dask DataFrames, just keeping them"""
Expand All @@ -27,7 +22,9 @@ def to_dc(
**kwargs
):
if gpu: # pragma: no cover
if not dask_cudf:
try:
import dask_cudf
except ImportError:
raise ModuleNotFoundError(
"Setting `gpu=True` for table creation requires dask_cudf"
)
Expand Down
9 changes: 3 additions & 6 deletions dask_sql/input_utils/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@
from dask_sql.input_utils.base import BaseInputPlugin
from dask_sql.input_utils.convert import InputUtil

try:
import dask_cudf
except ImportError:
dask_cudf = None


class LocationInputPlugin(BaseInputPlugin):
"""Input Plugin for everything, which can be read in from a file (on disk, remote etc.)"""
Expand Down Expand Up @@ -44,7 +39,9 @@ def to_dc(
format = extension.lstrip(".")
try:
if gpu: # pragma: no cover
if not dask_cudf:
try:
import dask_cudf
except ImportError:
raise ModuleNotFoundError(
"Setting `gpu=True` for table creation requires dask-cudf"
)
Expand Down
9 changes: 3 additions & 6 deletions dask_sql/input_utils/pandaslike.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@

from dask_sql.input_utils.base import BaseInputPlugin

try:
import cudf
except ImportError:
cudf = None


class PandasLikeInputPlugin(BaseInputPlugin):
"""Input Plugin for Pandas Like DataFrames, which get converted to dask DataFrames"""
Expand All @@ -30,7 +25,9 @@ def to_dc(
):
npartitions = kwargs.pop("npartitions", 1)
if gpu: # pragma: no cover
if not cudf:
try:
import cudf
except ImportError:
raise ModuleNotFoundError(
"Setting `gpu=True` for table creation requires cudf"
)
Expand Down
7 changes: 1 addition & 6 deletions dask_sql/physical/rel/logical/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@
import pandas as pd
from dask import config as dask_config

try:
import dask_cudf
except ImportError:
dask_cudf = None

from dask_sql.datacontainer import ColumnContainer, DataContainer
from dask_sql.physical.rel.base import BaseRelPlugin
from dask_sql.physical.rex.core.call import IsNullOperation
Expand Down Expand Up @@ -83,7 +78,7 @@ def get_supported_aggregation(self, series):

if pd.api.types.is_string_dtype(series.dtype):
# If dask_cudf strings dtype, return built-in aggregation
if dask_cudf is not None and isinstance(series, dask_cudf.Series):
if "cudf" in str(series._partition_type):
return built_in_aggregation

# With pandas StringDtype built-in aggregations work
Expand Down
10 changes: 1 addition & 9 deletions dask_sql/physical/utils/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@

from dask_sql.utils import make_pickable_without_dask_sql

try:
import dask_cudf
except ImportError:
dask_cudf = None


def apply_sort(
df: dd.DataFrame,
Expand All @@ -35,10 +30,7 @@ def apply_sort(

# dask / dask-cudf don't support lists of ascending / null positions
if len(sort_columns) == 1 or (
dask_cudf is not None
and isinstance(df, dask_cudf.DataFrame)
and single_ascending
and single_null_first
"cudf" in str(df._partition_type) and single_ascending and single_null_first
):
try:
return df.sort_values(
Expand Down
31 changes: 31 additions & 0 deletions tests/integration/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,34 @@ def test_drop(c):

with pytest.raises(dask_sql.utils.ParsingException):
c.sql("SELECT a FROM new_table")


def test_create_gpu_error(c, df, temporary_data_file):
try:
import cudf
except ImportError:
cudf = None

if cudf is not None:
pytest.skip("GPU-related import errors only need to be checked on CPU")

with pytest.raises(ModuleNotFoundError):
c.create_table("new_table", df, gpu=True)

with pytest.raises(ModuleNotFoundError):
c.create_table("new_table", dd.from_pandas(df, npartitions=2), gpu=True)

df.to_csv(temporary_data_file, index=False)

with pytest.raises(ModuleNotFoundError):
c.sql(
f"""
CREATE TABLE
new_table
WITH (
location = '{temporary_data_file}',
format = 'csv',
gpu = True
)
"""
)

0 comments on commit 031c04c

Please sign in to comment.