Skip to content

Commit

Permalink
Merge pull request #83 from ONSdigital/rename-cdsw-to-cdp
Browse files Browse the repository at this point in the history
Rename Module: cdsw to cdp
  • Loading branch information
dombean authored May 10, 2024
2 parents f406687 + 2eff3a6 commit d8aa920
Show file tree
Hide file tree
Showing 15 changed files with 52 additions and 51 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ and this project adheres to [semantic versioning](https://semver.org/spec/v2.0.0
### Added

### Changed

- **Breaking Change**: Renamed module `cdsw` to `cdp` (Cloudera Data Platform).

### Deprecated

### Fixed
Expand Down
12 changes: 6 additions & 6 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ reference for the technical implementation of the`rdsa-utils` codebase.
::: rdsa_utils.typing
::: rdsa_utils.validation

## CDSW
## CDP

::: rdsa_utils.cdsw.helpers.hdfs_utils
::: rdsa_utils.cdsw.helpers.impala
::: rdsa_utils.cdsw.io.pipeline_runlog
::: rdsa_utils.cdsw.io.input
::: rdsa_utils.cdsw.io.output
::: rdsa_utils.cdp.helpers.hdfs_utils
::: rdsa_utils.cdp.helpers.impala
::: rdsa_utils.cdp.io.pipeline_runlog
::: rdsa_utils.cdp.io.input
::: rdsa_utils.cdp.io.output

## GCP

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion rdsa_utils/cdsw/io/input.py → rdsa_utils/cdp/io/input.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Read inputs on CDSW."""
"""Read inputs on CDP."""
import logging
from typing import Tuple

Expand Down
6 changes: 3 additions & 3 deletions rdsa_utils/cdsw/io/output.py → rdsa_utils/cdp/io/output.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Write outputs on CDSW."""
"""Write outputs on CDP."""
import logging
from typing import Union

Expand All @@ -7,8 +7,8 @@
from pyspark.sql import functions as F
from pyspark.sql.utils import AnalysisException

from rdsa_utils.cdsw.helpers.hdfs_utils import delete_path, file_exists, rename
from rdsa_utils.cdsw.io.input import load_and_validate_table
from rdsa_utils.cdp.helpers.hdfs_utils import delete_path, file_exists, rename
from rdsa_utils.cdp.io.input import load_and_validate_table
from rdsa_utils.exceptions import (
ColumnNotInDataframeError,
DataframeEmptyError,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pyspark.sql import functions as F
from pyspark.sql.utils import AnalysisException

from rdsa_utils.cdsw.helpers.hdfs_utils import create_txt_from_string
from rdsa_utils.cdp.helpers.hdfs_utils import create_txt_from_string

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from rdsa_utils.cdsw.helpers.hdfs_utils import (
from rdsa_utils.cdp.helpers.hdfs_utils import (
_perform,
change_permissions,
copy,
Expand Down Expand Up @@ -240,9 +240,9 @@ def test_create_txt_from_string(
):
"""Verify 'echo | hadoop fs -put -' command execution by create_txt_from_string."""
with patch('subprocess.call') as subprocess_mock, patch(
'rdsa_utils.cdsw.helpers.hdfs_utils.file_exists',
'rdsa_utils.cdp.helpers.hdfs_utils.file_exists',
) as file_exists_mock, patch(
'rdsa_utils.cdsw.helpers.hdfs_utils.delete_file',
'rdsa_utils.cdp.helpers.hdfs_utils.delete_file',
) as delete_file_mock:
file_exists_mock.return_value = (
replace # Assume file exists if replace is True
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Tests for impala.py module."""
import subprocess

from rdsa_utils.cdsw.helpers.impala import invalidate_impala_metadata
from rdsa_utils.cdp.helpers.impala import invalidate_impala_metadata


class TestInvalidateImpalaMetadata:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Tests for the cdsw/io/input.py module."""
"""Tests for the cdp/io/input.py module."""
from unittest.mock import MagicMock

import pytest
from pyspark.sql import DataFrame as SparkDF

from rdsa_utils.cdp.io.input import *
from rdsa_utils.exceptions import DataframeEmptyError
from rdsa_utils.cdsw.io.input import *


class TestGetCurrentDatabase:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""Tests for the cdsw/io/output.py module."""
"""Tests for the cdp/io/output.py module."""
from typing import Callable
from unittest.mock import Mock, patch

import pytest
from pyspark.sql import DataFrame as SparkDF
from pyspark.sql import types as T

from rdsa_utils.cdsw.io.output import *
from rdsa_utils.cdp.io.output import *


class TestInsertDataFrameToHiveTable:
Expand Down Expand Up @@ -137,8 +137,8 @@ def mock_df(self):
mock_df.columns = ['run_id', 'data']
return mock_df

@patch('rdsa_utils.cdsw.io.output.load_and_validate_table')
@patch('rdsa_utils.cdsw.io.output.insert_df_to_hive_table')
@patch('rdsa_utils.cdp.io.output.load_and_validate_table')
@patch('rdsa_utils.cdp.io.output.insert_df_to_hive_table')
def test_write_and_read_hive_table_success(
self,
mock_insert,
Expand Down Expand Up @@ -225,10 +225,10 @@ def mock_df(self) -> Mock:
"""Fixture for mocked Spark DataFrame."""
return Mock(spec=SparkDF)

@patch('rdsa_utils.cdsw.io.output.logger')
@patch('rdsa_utils.cdsw.io.output.delete_path')
@patch('rdsa_utils.cdsw.io.output.rename')
@patch('rdsa_utils.cdsw.io.output.file_exists')
@patch('rdsa_utils.cdp.io.output.logger')
@patch('rdsa_utils.cdp.io.output.delete_path')
@patch('rdsa_utils.cdp.io.output.rename')
@patch('rdsa_utils.cdp.io.output.file_exists')
def test_save_csv_to_hdfs_success(
self,
mock_file_exists,
Expand All @@ -251,10 +251,10 @@ def test_save_csv_to_hdfs_success(
mock_delete_path.assert_called_once()
assert mock_logger.info.call_count > 0

@patch('rdsa_utils.cdsw.io.output.file_exists')
@patch('rdsa_utils.cdsw.io.output.rename')
@patch('rdsa_utils.cdsw.io.output.delete_path')
@patch('rdsa_utils.cdsw.io.output.logger')
@patch('rdsa_utils.cdp.io.output.file_exists')
@patch('rdsa_utils.cdp.io.output.rename')
@patch('rdsa_utils.cdp.io.output.delete_path')
@patch('rdsa_utils.cdp.io.output.logger')
def test_overwriting_existing_file(
self,
mock_logger,
Expand All @@ -275,7 +275,7 @@ def test_overwriting_existing_file(

mock_rename.assert_called_once()

@patch('rdsa_utils.cdsw.io.output.file_exists')
@patch('rdsa_utils.cdp.io.output.file_exists')
def test_save_csv_to_hdfs_file_exists_error(
self,
mock_file_exists,
Expand Down Expand Up @@ -307,10 +307,10 @@ def test_save_csv_to_hdfs_invalid_file_name(self, mock_df):
('/user/hdfs/test/path', '/user/hdfs/test/path/should_write.csv'),
],
)
@patch('rdsa_utils.cdsw.io.output.file_exists')
@patch('rdsa_utils.cdsw.io.output.rename')
@patch('rdsa_utils.cdsw.io.output.delete_path')
@patch('rdsa_utils.cdsw.io.output.logger')
@patch('rdsa_utils.cdp.io.output.file_exists')
@patch('rdsa_utils.cdp.io.output.rename')
@patch('rdsa_utils.cdp.io.output.delete_path')
@patch('rdsa_utils.cdp.io.output.logger')
def test_file_path_schemes(
self,
mock_logger,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
from pyspark.sql import DataFrame

from rdsa_utils.cdsw.io.pipeline_runlog import (
from rdsa_utils.cdp.io.pipeline_runlog import (
_get_run_ids,
_write_entry,
add_runlog_entry,
Expand Down Expand Up @@ -134,7 +134,7 @@ def test_reserve_id_non_empty(self, mocker):

# Mock _write_entry function
mock_write_entry = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
)

# Mock pyspark.sql.functions.max
Expand Down Expand Up @@ -173,7 +173,7 @@ def test_reserve_id_edge_case(self, mocker):

# Mock _write_entry function
mock_write_entry = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
)

# Mock pyspark.sql.functions.max
Expand Down Expand Up @@ -299,7 +299,7 @@ def test_get_last_run_id_general_pipeline_non_empty(self, mocker):

# Patch _get_run_ids function and return a Mock object
get_run_ids_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
return_value=[3, 2, 1],
)

Expand All @@ -319,7 +319,7 @@ def test_get_last_run_id_specific_pipeline_empty(self, mocker):

# Patch _get_run_ids function and return a Mock object
get_run_ids_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
return_value=[],
)

Expand Down Expand Up @@ -352,7 +352,7 @@ def test_penultimate_run_id_non_empty(self, mocker):

# Patch _get_run_ids function and return a Mock object
get_run_ids_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
return_value=[3, 2, 1],
)

Expand Down Expand Up @@ -387,7 +387,7 @@ def test_penultimate_run_id_edge_cases(self, mocker):

# Patch _get_run_ids function and return a Mock object
get_run_ids_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
return_value=[1],
)

Expand All @@ -409,7 +409,7 @@ def test_penultimate_run_id_edge_cases(self, mocker):

# Patch _get_run_ids function and return a Mock object
get_run_ids_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
return_value=[],
)

Expand Down Expand Up @@ -512,16 +512,16 @@ def test_add_runlog_entry(self, mocker):

# Mock reserve_id, create_runlog_entry, _write_entry
reserve_id_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog.reserve_id',
'rdsa_utils.cdp.io.pipeline_runlog.reserve_id',
return_value=1,
)
entry_mock = mocker.Mock()
create_runlog_entry_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog.create_runlog_entry',
'rdsa_utils.cdp.io.pipeline_runlog.create_runlog_entry',
return_value=entry_mock,
)
_write_entry_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
)

# Call function
Expand Down Expand Up @@ -565,11 +565,11 @@ def test_add_runlog_entry_specified_id(self, mocker):
# Mock create_runlog_entry, _write_entry
entry_mock = mocker.Mock()
create_runlog_entry_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog.create_runlog_entry',
'rdsa_utils.cdp.io.pipeline_runlog.create_runlog_entry',
return_value=entry_mock,
)
_write_entry_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
)

# Call function
Expand Down Expand Up @@ -613,11 +613,11 @@ def test_write_runlog_file(self, mocker):

# Mock _parse_runlog_as_string and create_txt_from_string
parse_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._parse_runlog_as_string',
'rdsa_utils.cdp.io.pipeline_runlog._parse_runlog_as_string',
)
parse_mock.return_value = 'test metadata'
create_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog.create_txt_from_string',
'rdsa_utils.cdp.io.pipeline_runlog.create_txt_from_string',
)

# Call function
Expand All @@ -639,7 +639,7 @@ def test_write_runlog_file_edge_case(self, mocker):

# Mock _parse_runlog_as_string
parse_mock = mocker.patch(
'rdsa_utils.cdsw.io.pipeline_runlog._parse_runlog_as_string',
'rdsa_utils.cdp.io.pipeline_runlog._parse_runlog_as_string',
)
parse_mock.return_value = 'test metadata'

Expand Down

0 comments on commit d8aa920

Please sign in to comment.