Skip to content

Commit

Permalink
Standardize imports. (#10680)
Browse files Browse the repository at this point in the history
This PR standardizes a few imports across the cudf code base. Changes include:

- Removed usage of some non-standard "two letter" names. For example, `import numpy as np` is common, but `import pyorc as po` and `import fastavro as fa` are non-standard and not the style used by their documentation. I left `import cupy as cp`, since both `import cupy` and `import cupy as cp` are prevalent in the code base (the one exception that I changed was a file that had both `import cupy` and `import cupy as cp`).
- Avoid the pattern `from some_package import x as x` -- just write `from some_package import x`
- Fixed some `cimport`s
- Always use `import datetime` instead of `import datetime as dt` to avoid conflicts with the many other `dt` names in our code (including local names that had the potential to shadow/overwrite the library's name)
- Use `warnings.warn` rather than `from warnings import warn` for consistency across the library
- Remove some legacy Python 2 compatibility

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #10680
  • Loading branch information
bdice authored Apr 18, 2022
1 parent 45c003d commit c322cba
Show file tree
Hide file tree
Showing 20 changed files with 101 additions and 97 deletions.
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import pandas as pd
import rmm

import cudf
import cudf._lib as libcudfxx
import cudf._lib as libcudf
from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
from cudf.core.buffer import Buffer

Expand Down Expand Up @@ -160,7 +160,7 @@ cdef class Column:
if self.base_mask is None or self.offset == 0:
self._mask = self.base_mask
else:
self._mask = libcudfxx.null_mask.copy_bitmask(self)
self._mask = libcudf.null_mask.copy_bitmask(self)
return self._mask

@property
Expand Down
5 changes: 1 addition & 4 deletions python/cudf/cudf/_lib/null_mask.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from enum import Enum

Expand All @@ -8,9 +8,6 @@ from libcpp.utility cimport move
from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

from cudf._lib.column cimport Column

import cudf._lib as libcudfxx

from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.null_mask cimport (
bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

# cython: boundscheck = False

Expand All @@ -17,7 +17,7 @@ except ImportError:
import json

import numpy as np
from cython.operator import dereference
from cython.operator cimport dereference

from cudf.api.types import (
is_categorical_dtype,
Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/_lib/rolling.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.

from __future__ import print_function
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import pandas as pd

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/algorithms.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
from warnings import warn
import warnings

import cupy as cp
import numpy as np
Expand Down Expand Up @@ -50,7 +50,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
raise NotImplementedError("na_sentinel can not be None.")

if size_hint:
warn("size_hint is not applicable for cudf.factorize")
warnings.warn("size_hint is not applicable for cudf.factorize")

return_cupy_array = isinstance(values, cp.ndarray)

Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

import datetime as dt
import datetime
import locale
import re
from locale import nl_langinfo
Expand Down Expand Up @@ -237,9 +237,9 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
if isinstance(other, (cudf.Scalar, ColumnBase, cudf.DateOffset)):
return other

if isinstance(other, dt.datetime):
if isinstance(other, datetime.datetime):
other = np.datetime64(other)
elif isinstance(other, dt.timedelta):
elif isinstance(other, datetime.timedelta):
other = np.timedelta64(other)
elif isinstance(other, pd.Timestamp):
other = other.to_datetime64()
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

import warnings
from decimal import Decimal
from typing import Any, Sequence, Tuple, Union, cast
from warnings import warn

import cupy as cp
import numpy as np
Expand Down Expand Up @@ -43,7 +43,7 @@ def as_decimal_column(
isinstance(dtype, cudf.core.dtypes.DecimalDtype)
and dtype.scale < self.dtype.scale
):
warn(
warnings.warn(
"cuDF truncates when downcasting decimals to a lower scale. "
"To round, use Series.round() or DataFrame.round()."
)
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

import datetime as dt
import datetime
from typing import Any, Sequence, cast

import numpy as np
Expand Down Expand Up @@ -211,7 +211,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
def normalize_binop_value(self, other) -> ColumnBinaryOperand:
if isinstance(other, (ColumnBase, cudf.Scalar)):
return other
if isinstance(other, dt.timedelta):
if isinstance(other, datetime.timedelta):
other = np.timedelta64(other)
elif isinstance(other, pd.Timestamp):
other = other.to_datetime64()
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5596,14 +5596,14 @@ def select_dtypes(self, include=None, exclude=None):
@ioutils.doc_to_parquet()
def to_parquet(self, path, *args, **kwargs):
"""{docstring}"""
from cudf.io import parquet as pq
from cudf.io import parquet

return pq.to_parquet(self, path, *args, **kwargs)
return parquet.to_parquet(self, path, *args, **kwargs)

@ioutils.doc_to_feather()
def to_feather(self, path, *args, **kwargs):
"""{docstring}"""
from cudf.io import feather as feather
from cudf.io import feather

feather.to_feather(self, path, *args, **kwargs)

Expand All @@ -5623,7 +5623,7 @@ def to_csv(
**kwargs,
):
"""{docstring}"""
from cudf.io import csv as csv
from cudf.io import csv

return csv.to_csv(
self,
Expand All @@ -5643,7 +5643,7 @@ def to_csv(
@ioutils.doc_to_orc()
def to_orc(self, fname, compression=None, *args, **kwargs):
"""{docstring}"""
from cudf.io import orc as orc
from cudf.io import orc

orc.to_orc(self, fname, compression, *args, **kwargs)

Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/subword_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

from __future__ import annotations

import warnings
from typing import Union
from warnings import warn

import cupy as cp

Expand Down Expand Up @@ -186,7 +186,7 @@ def __call__(
"When truncation is not True, the behaviour currently differs "
"from HuggingFace as cudf always returns overflowing tokens"
)
warn(warning_msg)
warnings.warn(warning_msg)

if padding != "max_length":
error_msg = (
Expand Down
18 changes: 10 additions & 8 deletions python/cudf/cudf/tests/test_api_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import numpy as np
import pandas as pd
import pytest
from pandas.api import types as ptypes
from pandas.api import types as pd_types

import cudf
from cudf.api import types as types
from cudf.api import types


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1035,11 +1035,13 @@ def test_is_decimal_dtype(obj, expect):
),
)
def test_pandas_agreement(obj):
assert types.is_categorical_dtype(obj) == ptypes.is_categorical_dtype(obj)
assert types.is_numeric_dtype(obj) == ptypes.is_numeric_dtype(obj)
assert types.is_integer_dtype(obj) == ptypes.is_integer_dtype(obj)
assert types.is_integer(obj) == ptypes.is_integer(obj)
assert types.is_string_dtype(obj) == ptypes.is_string_dtype(obj)
assert types.is_categorical_dtype(obj) == pd_types.is_categorical_dtype(
obj
)
assert types.is_numeric_dtype(obj) == pd_types.is_numeric_dtype(obj)
assert types.is_integer_dtype(obj) == pd_types.is_integer_dtype(obj)
assert types.is_integer(obj) == pd_types.is_integer(obj)
assert types.is_string_dtype(obj) == pd_types.is_string_dtype(obj)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1115,7 +1117,7 @@ def test_pandas_agreement(obj):
),
)
def test_pandas_agreement_scalar(obj):
assert types.is_scalar(obj) == ptypes.is_scalar(obj)
assert types.is_scalar(obj) == pd_types.is_scalar(obj)


# TODO: Add test of interval.
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/tests/test_contains.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from datetime import datetime as dt
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -41,12 +43,12 @@ def get_string_series():
testdata_all = [
(
cudf_date_series("20010101", "20020215", freq="400h"),
dt.strptime("2001-01-01", "%Y-%m-%d"),
datetime.datetime.strptime("2001-01-01", "%Y-%m-%d"),
True,
),
(
cudf_date_series("20010101", "20020215", freq="400h"),
dt.strptime("2000-01-01", "%Y-%m-%d"),
datetime.datetime.strptime("2000-01-01", "%Y-%m-%d"),
False,
),
(cudf_date_series("20010101", "20020215", freq="400h"), 20000101, False),
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from copy import copy

import cupy
import cupy as cp
import numpy as np
import pandas as pd
import pyarrow as pa
Expand Down Expand Up @@ -7332,7 +7331,7 @@ def test_sample_axis_0(

@pytest.mark.parametrize("replace", [True, False])
@pytest.mark.parametrize(
"random_state_lib", [cp.random.RandomState, np.random.RandomState]
"random_state_lib", [cupy.random.RandomState, np.random.RandomState]
)
def test_sample_reproducibility(replace, random_state_lib):
df = cudf.DataFrame({"a": cupy.arange(0, 1024)})
Expand Down Expand Up @@ -7384,7 +7383,7 @@ def test_oversample_without_replace(n, frac, axis):
)


@pytest.mark.parametrize("random_state", [None, cp.random.RandomState(42)])
@pytest.mark.parametrize("random_state", [None, cupy.random.RandomState(42)])
def test_sample_unsupported_arguments(random_state):
df = cudf.DataFrame({"float": [0.05, 0.2, 0.3, 0.2, 0.25]})
with pytest.raises(
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

import datetime
import datetime as dt
import operator
import re

Expand Down Expand Up @@ -219,8 +218,8 @@ def test_sort_datetime():

def test_issue_165():
df_pandas = pd.DataFrame()
start_date = dt.datetime.strptime("2000-10-21", "%Y-%m-%d")
data = [(start_date + dt.timedelta(days=x)) for x in range(6)]
start_date = datetime.datetime.strptime("2000-10-21", "%Y-%m-%d")
data = [(start_date + datetime.timedelta(days=x)) for x in range(6)]
df_pandas["dates"] = data
df_pandas["num"] = [1, 2, 3, 4, 5, 6]
df_cudf = DataFrame.from_pandas(df_pandas)
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_duplicates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import itertools as it
import itertools
import random

import numpy as np
Expand Down Expand Up @@ -280,7 +280,7 @@ def test_drop_duplicates_empty(df):

@pytest.mark.parametrize("num_columns", [3, 4, 5])
def test_dataframe_drop_duplicates_numeric_method(num_columns):
comb = list(it.permutations(range(num_columns), num_columns))
comb = list(itertools.permutations(range(num_columns), num_columns))
shuf = list(comb)
random.Random(num_columns).shuffle(shuf)

Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/tests/test_hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import os
from io import BytesIO

import fastavro as fa
import fastavro
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest
from pyarrow import orc as orc
from pyarrow import orc

import cudf
from cudf.testing._utils import assert_eq
Expand Down Expand Up @@ -253,7 +253,7 @@ def test_read_avro(datadir, hdfs, test_url):

got = cudf.read_avro(hd_fpath)
with open(fname, mode="rb") as f:
expect = pd.DataFrame.from_records(fa.reader(f))
expect = pd.DataFrame.from_records(fastavro.reader(f))

for col in expect.columns:
expect[col] = expect[col].astype(got[col].dtype)
Expand Down
Loading

0 comments on commit c322cba

Please sign in to comment.