diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py index 3ef655560..2d47d2324 100644 --- a/awswrangler/__init__.py +++ b/awswrangler/__init__.py @@ -7,7 +7,7 @@ import logging as _logging -from awswrangler import ( # noqa +from awswrangler import ( athena, catalog, chime, @@ -34,9 +34,9 @@ timestream, typing, ) -from awswrangler.__metadata__ import __description__, __license__, __title__, __version__ # noqa -from awswrangler._config import config # noqa -from awswrangler._distributed import EngineEnum, MemoryFormatEnum, engine, memory_format # noqa +from awswrangler.__metadata__ import __description__, __license__, __title__, __version__ +from awswrangler._config import config +from awswrangler._distributed import EngineEnum, MemoryFormatEnum, engine, memory_format engine.register() diff --git a/awswrangler/_databases.py b/awswrangler/_databases.py index d6b78f2a9..10d901c13 100644 --- a/awswrangler/_databases.py +++ b/awswrangler/_databases.py @@ -132,8 +132,8 @@ def _convert_params(sql: str, params: Optional[Union[List[Any], Tuple[Any, ...], args: List[Any] = [sql] if params is not None: if hasattr(params, "keys"): - return args + [params] - return args + [list(params)] + return [*args, params] + return [*args, params] return args diff --git a/awswrangler/_distributed.py b/awswrangler/_distributed.py index 36f1bba9d..4b521f536 100644 --- a/awswrangler/_distributed.py +++ b/awswrangler/_distributed.py @@ -9,7 +9,7 @@ from enum import Enum, unique from functools import wraps from importlib import reload -from typing import Any, Callable, Dict, Literal, Optional, TypeVar, cast +from typing import Any, Callable, ClassVar, Dict, Literal, Optional, TypeVar, cast EngineLiteral = Literal["python", "ray"] MemoryFormatLiteral = Literal["pandas", "modin"] @@ -42,7 +42,7 @@ class Engine: _engine: Optional[EngineEnum] = EngineEnum[WR_ENGINE.upper()] if WR_ENGINE else None _initialized_engine: Optional[EngineEnum] = None - _registry: Dict[EngineLiteral, Dict[str, Callable[..., Any]]] = defaultdict(dict) + _registry: ClassVar[Dict[EngineLiteral, Dict[str, Callable[..., Any]]]] = defaultdict(dict) _lock: threading.RLock = threading.RLock() @classmethod diff --git a/awswrangler/athena/__init__.py b/awswrangler/athena/__init__.py index 0321a6a93..606fbe77c 100644 --- a/awswrangler/athena/__init__.py +++ b/awswrangler/athena/__init__.py @@ -12,13 +12,13 @@ delete_prepared_statement, list_prepared_statements, ) -from awswrangler.athena._read import ( # noqa +from awswrangler.athena._read import ( get_query_results, read_sql_query, read_sql_table, unload, ) -from awswrangler.athena._utils import ( # noqa +from awswrangler.athena._utils import ( create_athena_bucket, create_ctas_table, describe_table, diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py index 42f066dde..18bcc6cde 100644 --- a/awswrangler/athena/_read.py +++ b/awswrangler/athena/_read.py @@ -40,7 +40,7 @@ @_utils.check_optional_dependency(shapely_wkt, "shapely") @_utils.check_optional_dependency(geopandas, "geopandas") -def _cast_geometry(df: pd.DataFrame, parse_geometry: List[str] = None): +def _cast_geometry(df: pd.DataFrame, parse_geometry: Optional[List[str]] = None): def load_geom_wkt(x): """Load geometry from well-known text.""" return shapely_wkt.loads(x) @@ -166,7 +166,7 @@ def _fetch_parquet_result( ret = _apply_query_metadata(df=ret, query_metadata=query_metadata) else: ret = _add_query_metadata_generator(dfs=ret, query_metadata=query_metadata) - paths_delete: List[str] = paths + [manifest_path, metadata_path] + paths_delete: List[str] = [*paths, manifest_path, metadata_path] if chunked is False: if keep_files is False: s3.delete_objects( diff --git a/awswrangler/catalog/__init__.py b/awswrangler/catalog/__init__.py index bc7ed096d..d2e600c9a 100644 --- a/awswrangler/catalog/__init__.py +++ b/awswrangler/catalog/__init__.py @@ -7,9 +7,7 @@ add_orc_partitions, add_parquet_partitions, ) - -# noqa -from awswrangler.catalog._create import ( # noqa +from awswrangler.catalog._create import ( _create_csv_table, _create_json_table, _create_parquet_table, @@ -21,14 +19,14 @@ overwrite_table_parameters, upsert_table_parameters, ) -from awswrangler.catalog._delete import ( # noqa +from awswrangler.catalog._delete import ( delete_all_partitions, delete_column, delete_database, delete_partitions, delete_table_if_exists, ) -from awswrangler.catalog._get import ( # noqa +from awswrangler.catalog._get import ( _get_table_input, databases, get_columns_comments, @@ -48,7 +46,7 @@ table, tables, ) -from awswrangler.catalog._utils import ( # noqa +from awswrangler.catalog._utils import ( does_table_exist, drop_duplicated_columns, extract_athena_types, diff --git a/awswrangler/distributed/ray/__init__.py b/awswrangler/distributed/ray/__init__.py index 0dac9d36b..9765326db 100644 --- a/awswrangler/distributed/ray/__init__.py +++ b/awswrangler/distributed/ray/__init__.py @@ -1,6 +1,6 @@ """Ray Module.""" -from awswrangler.distributed.ray._core import RayLogger, initialize_ray, ray_get, ray_logger, ray_remote # noqa +from awswrangler.distributed.ray._core import RayLogger, initialize_ray, ray_get, ray_logger, ray_remote __all__ = [ "RayLogger", diff --git a/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py b/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py index 8e1b58533..569e55209 100644 --- a/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py +++ b/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py @@ -13,7 +13,7 @@ import numpy as np # fs required to implicitly trigger S3 subsystem initialization -import pyarrow.fs # noqa: F401 pylint: disable=unused-import +import pyarrow.fs # pylint: disable=unused-import from pyarrow.dataset import ParquetFileFragment from pyarrow.lib import Schema from ray import cloudpickle @@ -246,7 +246,7 @@ def __init__( _handle_read_os_error(e, paths) except pyarrow.ArrowInvalid as ex: if "Parquet file size is 0 bytes" in str(ex): - raise exceptions.InvalidFile(f"Invalid Parquet file. {str(ex)}") + raise exceptions.InvalidFile(f"Invalid Parquet file. {ex!s}") raise self._pq_ds = pq_ds self._meta_provider = meta_provider diff --git a/awswrangler/emr.py b/awswrangler/emr.py index 2d061eba9..e58844ea8 100644 --- a/awswrangler/emr.py +++ b/awswrangler/emr.py @@ -245,7 +245,7 @@ def _build_cluster_args(**pars: Any) -> Dict[str, Any]: # pylint: disable=too-m { "Classification": "spark-hive-site", "Properties": { - "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" # noqa + "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" }, "Configurations": [], } diff --git a/awswrangler/lakeformation/__init__.py b/awswrangler/lakeformation/__init__.py index 6ab8f46b4..298ce8e72 100644 --- a/awswrangler/lakeformation/__init__.py +++ b/awswrangler/lakeformation/__init__.py @@ -1,7 +1,7 @@ """Amazon Lake Formation Module.""" -from awswrangler.lakeformation._read import read_sql_query, read_sql_table # noqa -from awswrangler.lakeformation._utils import ( # noqa +from awswrangler.lakeformation._read import read_sql_query, read_sql_table +from awswrangler.lakeformation._utils import ( _build_table_objects, _get_table_objects, _update_table_objects, diff --git a/awswrangler/lakeformation/_utils.py b/awswrangler/lakeformation/_utils.py index 21f192807..814693e78 100644 --- a/awswrangler/lakeformation/_utils.py +++ b/awswrangler/lakeformation/_utils.py @@ -34,9 +34,9 @@ def _build_partition_predicate( partition_predicates: List[str] = [] for col, val in zip(partition_cols, partitions_values): if partitions_types[col].startswith(("tinyint", "smallint", "int", "bigint", "float", "double", "decimal")): - partition_predicates.append(f"{col}={str(val)}") + partition_predicates.append(f"{col}={val!s}") else: - partition_predicates.append(f"{col}='{str(val)}'") + partition_predicates.append(f"{col}='{val!s}'") return " AND ".join(partition_predicates) diff --git a/awswrangler/neptune/_neptune.py b/awswrangler/neptune/_neptune.py index ff1148458..eae69cc99 100644 --- a/awswrangler/neptune/_neptune.py +++ b/awswrangler/neptune/_neptune.py @@ -260,10 +260,10 @@ def to_rdf_graph( # build up a query if is_quads: insert = f"""INSERT DATA {{ GRAPH <{row[graph_column]}> {{<{row[subject_column]}> - <{str(row[predicate_column])}> <{row[object_column]}> . }} }}; """ + <{row[predicate_column]!s}> <{row[object_column]}> . }} }}; """ query = query + insert else: - insert = f"""INSERT DATA {{ <{row[subject_column]}> <{str(row[predicate_column])}> + insert = f"""INSERT DATA {{ <{row[subject_column]}> <{row[predicate_column]!s}> <{row[object_column]}> . }}; """ query = query + insert # run the query diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py index 8712492df..0d960d7db 100644 --- a/awswrangler/opensearch/_read.py +++ b/awswrangler/opensearch/_read.py @@ -116,7 +116,7 @@ def search( if is_scroll: if isinstance(filter_path, str): filter_path = [filter_path] - filter_path = ["_scroll_id", "_shards"] + list(filter_path) # required for scroll + filter_path = ["_scroll_id", "_shards", *list(filter_path)] # required for scroll documents_generator = opensearchpy.helpers.scan( client, index=index, query=search_body, filter_path=filter_path, **kwargs ) diff --git a/awswrangler/pandas/__init__.py b/awswrangler/pandas/__init__.py index 232651afe..0b29bbc14 100644 --- a/awswrangler/pandas/__init__.py +++ b/awswrangler/pandas/__init__.py @@ -8,7 +8,7 @@ from pandas import * # noqa: F403 # Explicit import because mypy doesn't support forward references to a star import - from pandas import ( # noqa: F401 + from pandas import ( DataFrame, Series, concat, @@ -24,7 +24,7 @@ from modin.pandas import * # noqa: F403 # Explicit import because mypy doesn't support forward references to a star import - from modin.pandas import ( # noqa: F401 + from modin.pandas import ( DataFrame, Series, concat, diff --git a/awswrangler/quicksight/__init__.py b/awswrangler/quicksight/__init__.py index 8099a6f2c..860637b9b 100644 --- a/awswrangler/quicksight/__init__.py +++ b/awswrangler/quicksight/__init__.py @@ -1,8 +1,8 @@ """Amazon QuickSight Module.""" -from awswrangler.quicksight._cancel import cancel_ingestion # noqa -from awswrangler.quicksight._create import create_athena_data_source, create_athena_dataset, create_ingestion # noqa -from awswrangler.quicksight._delete import ( # noqa +from awswrangler.quicksight._cancel import cancel_ingestion +from awswrangler.quicksight._create import create_athena_data_source, create_athena_dataset, create_ingestion +from awswrangler.quicksight._delete import ( delete_all_dashboards, delete_all_data_sources, delete_all_datasets, @@ -12,14 +12,14 @@ delete_dataset, delete_template, ) -from awswrangler.quicksight._describe import ( # noqa +from awswrangler.quicksight._describe import ( describe_dashboard, describe_data_source, describe_data_source_permissions, describe_dataset, describe_ingestion, ) -from awswrangler.quicksight._get_list import ( # noqa +from awswrangler.quicksight._get_list import ( get_dashboard_id, get_dashboard_ids, get_data_source_arn, diff --git a/awswrangler/s3/__init__.py b/awswrangler/s3/__init__.py index c5d80a448..3c53e0034 100644 --- a/awswrangler/s3/__init__.py +++ b/awswrangler/s3/__init__.py @@ -1,23 +1,23 @@ """Amazon S3 Read Module.""" -from awswrangler.s3._copy import copy_objects, merge_datasets # noqa -from awswrangler.s3._delete import delete_objects # noqa -from awswrangler.s3._describe import describe_objects, get_bucket_region, size_objects # noqa -from awswrangler.s3._download import download # noqa -from awswrangler.s3._list import does_object_exist, list_buckets, list_directories, list_objects # noqa -from awswrangler.s3._read_deltalake import read_deltalake # noqa -from awswrangler.s3._read_excel import read_excel # noqa -from awswrangler.s3._read_orc import read_orc, read_orc_metadata, read_orc_table # noqa -from awswrangler.s3._read_parquet import read_parquet, read_parquet_metadata, read_parquet_table # noqa -from awswrangler.s3._read_text import read_csv, read_fwf, read_json # noqa +from awswrangler.s3._copy import copy_objects, merge_datasets +from awswrangler.s3._delete import delete_objects +from awswrangler.s3._describe import describe_objects, get_bucket_region, size_objects +from awswrangler.s3._download import download +from awswrangler.s3._list import does_object_exist, list_buckets, list_directories, list_objects +from awswrangler.s3._read_deltalake import read_deltalake +from awswrangler.s3._read_excel import read_excel +from awswrangler.s3._read_orc import read_orc, read_orc_metadata, read_orc_table +from awswrangler.s3._read_parquet import read_parquet, read_parquet_metadata, read_parquet_table +from awswrangler.s3._read_text import read_csv, read_fwf, read_json from awswrangler.s3._select import select_query -from awswrangler.s3._upload import upload # noqa -from awswrangler.s3._wait import wait_objects_exist, wait_objects_not_exist # noqa -from awswrangler.s3._write_deltalake import to_deltalake # noqa -from awswrangler.s3._write_excel import to_excel # noqa -from awswrangler.s3._write_orc import to_orc # noqa -from awswrangler.s3._write_parquet import store_parquet_metadata, to_parquet # noqa -from awswrangler.s3._write_text import to_csv, to_json # noqa +from awswrangler.s3._upload import upload +from awswrangler.s3._wait import wait_objects_exist, wait_objects_not_exist +from awswrangler.s3._write_deltalake import to_deltalake +from awswrangler.s3._write_excel import to_excel +from awswrangler.s3._write_orc import to_orc +from awswrangler.s3._write_parquet import store_parquet_metadata, to_parquet +from awswrangler.s3._write_text import to_csv, to_json __all__ = [ "copy_objects", diff --git a/awswrangler/s3/_read.py b/awswrangler/s3/_read.py index 0b5e0d65b..de191b5ae 100644 --- a/awswrangler/s3/_read.py +++ b/awswrangler/s3/_read.py @@ -52,7 +52,7 @@ def _get_path_ignore_suffix(path_ignore_suffix: Union[str, List[str], None]) -> elif path_ignore_suffix is None: path_ignore_suffix = ["/_SUCCESS"] else: - path_ignore_suffix = path_ignore_suffix + ["/_SUCCESS"] + path_ignore_suffix = [*path_ignore_suffix, "/_SUCCESS"] return path_ignore_suffix diff --git a/awswrangler/timestream/_write.py b/awswrangler/timestream/_write.py index c0c1e33d6..0f8364d3e 100644 --- a/awswrangler/timestream/_write.py +++ b/awswrangler/timestream/_write.py @@ -330,7 +330,7 @@ def write( _data_types.timestream_type_from_pandas(df.loc[:, measure_cols]) if all(measure_cols) else [] ) dimensions_cols = dimensions_cols if dimensions_cols else [dimensions_cols] # type: ignore[list-item] - cols_names: List[Optional[str]] = [time_col] + measure_cols + dimensions_cols + cols_names: List[Optional[str]] = [time_col, *measure_cols, *dimensions_cols] measure_name = measure_name if measure_name else measure_cols[0] common_attributes = _sanitize_common_attributes(common_attributes, version, time_unit, measure_name) diff --git a/fix.sh b/fix.sh index 141fc04ed..5f08da8e4 100755 --- a/fix.sh +++ b/fix.sh @@ -2,4 +2,4 @@ set -ex black . -ruff --fix --select "I001" --select "I002" awswrangler \ No newline at end of file +ruff --fix awswrangler \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7604569ac..a27204dda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -158,9 +158,9 @@ extend_exclude = ''' ''' [tool.ruff] -select = ["D", "E", "F", "I001", "I002", "PL", "W", "PD"] -ignore = ["E501", "PLR2004", "PLR0911", "PLR0912", "PLR0913", "PLR0915", "PD901"] -fixable = ["I001"] +select = ["D", "E", "F", "I001", "I002", "PL", "W", "PD", "RUF"] +ignore = ["E501", "PLR2004", "PLR0911", "PLR0912", "PLR0913", "PLR0915", "PD901", "RUF002", "RUF015"] +fixable = ["I001", "I002", "RUF100", "RUF010"] exclude = [ ".eggs", ".git", diff --git a/validate.sh b/validate.sh index 228feda92..000209acd 100755 --- a/validate.sh +++ b/validate.sh @@ -2,7 +2,7 @@ set -ex black --check . -ruff . --ignore "PL" --ignore "D" +ruff . --ignore "PL" --ignore "D" --ignore "RUF" --ignore "PD" ruff awswrangler mypy --install-types --non-interactive awswrangler pylint -j 0 --disable=all --enable=R0911,R0912,R0913,R0915 awswrangler