Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move / minimize number of cudf / dask-cudf imports #480

Merged
merged 4 commits into from
Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions dask_sql/input_utils/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@

from dask_sql.input_utils.base import BaseInputPlugin

try:
import dask_cudf
except ImportError:
dask_cudf = None


class DaskInputPlugin(BaseInputPlugin):
"""Input Plugin for Dask DataFrames, just keeping them"""
Expand All @@ -27,7 +22,9 @@ def to_dc(
**kwargs
):
if gpu: # pragma: no cover
if not dask_cudf:
try:
import dask_cudf
except ImportError:
raise ModuleNotFoundError(
"Setting `gpu=True` for table creation requires dask_cudf"
)
Expand Down
9 changes: 3 additions & 6 deletions dask_sql/input_utils/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@
from dask_sql.input_utils.base import BaseInputPlugin
from dask_sql.input_utils.convert import InputUtil

try:
import dask_cudf
except ImportError:
dask_cudf = None


class LocationInputPlugin(BaseInputPlugin):
"""Input Plugin for everything, which can be read in from a file (on disk, remote etc.)"""
Expand Down Expand Up @@ -44,7 +39,9 @@ def to_dc(
format = extension.lstrip(".")
try:
if gpu: # pragma: no cover
if not dask_cudf:
try:
import dask_cudf
except ImportError:
raise ModuleNotFoundError(
"Setting `gpu=True` for table creation requires dask-cudf"
)
Expand Down
9 changes: 3 additions & 6 deletions dask_sql/input_utils/pandaslike.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@

from dask_sql.input_utils.base import BaseInputPlugin

try:
import cudf
except ImportError:
cudf = None


class PandasLikeInputPlugin(BaseInputPlugin):
"""Input Plugin for Pandas Like DataFrames, which get converted to dask DataFrames"""
Expand All @@ -30,7 +25,9 @@ def to_dc(
):
npartitions = kwargs.pop("npartitions", 1)
if gpu: # pragma: no cover
if not cudf:
try:
import cudf
except ImportError:
raise ModuleNotFoundError(
"Setting `gpu=True` for table creation requires cudf"
)
Expand Down
7 changes: 1 addition & 6 deletions dask_sql/physical/rel/logical/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@
import pandas as pd
from dask import config as dask_config

try:
import dask_cudf
except ImportError:
dask_cudf = None

from dask_sql.datacontainer import ColumnContainer, DataContainer
from dask_sql.physical.rel.base import BaseRelPlugin
from dask_sql.physical.rex.core.call import IsNullOperation
Expand Down Expand Up @@ -83,7 +78,7 @@ def get_supported_aggregation(self, series):

if pd.api.types.is_string_dtype(series.dtype):
# If dask_cudf strings dtype, return built-in aggregation
if dask_cudf is not None and isinstance(series, dask_cudf.Series):
if "cudf" in str(series._partition_type):
return built_in_aggregation

# With pandas StringDtype built-in aggregations work
Expand Down
10 changes: 1 addition & 9 deletions dask_sql/physical/utils/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@

from dask_sql.utils import make_pickable_without_dask_sql

try:
import dask_cudf
except ImportError:
dask_cudf = None


def apply_sort(
df: dd.DataFrame,
Expand All @@ -35,10 +30,7 @@ def apply_sort(

# dask / dask-cudf don't support lists of ascending / null positions
if len(sort_columns) == 1 or (
dask_cudf is not None
and isinstance(df, dask_cudf.DataFrame)
and single_ascending
and single_null_first
"cudf" in str(df._partition_type) and single_ascending and single_null_first
):
try:
return df.sort_values(
Expand Down
31 changes: 31 additions & 0 deletions tests/integration/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,34 @@ def test_drop(c):

with pytest.raises(dask_sql.utils.ParsingException):
c.sql("SELECT a FROM new_table")


def test_create_gpu_error(c, df, temporary_data_file):
try:
import cudf
except ImportError:
cudf = None

if cudf is not None:
pytest.skip("GPU-related import errors only need to be checked on CPU")

with pytest.raises(ModuleNotFoundError):
c.create_table("new_table", df, gpu=True)

with pytest.raises(ModuleNotFoundError):
c.create_table("new_table", dd.from_pandas(df, npartitions=2), gpu=True)

df.to_csv(temporary_data_file, index=False)

with pytest.raises(ModuleNotFoundError):
c.sql(
f"""
CREATE TABLE
new_table
WITH (
location = '{temporary_data_file}',
format = 'csv',
gpu = True
)
"""
)