Merge pull request #83 from ONSdigital/rename-cdsw-to-cdp

Rename Module: cdsw to cdp
ONSdigital · May 10, 2024 · d8aa920 · d8aa920
2 parents f406687 + 2eff3a6
commit d8aa920
Show file tree

Hide file tree

Showing 15 changed files with 52 additions and 51 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,7 +10,8 @@ and this project adheres to [semantic versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 ### Changed
-
+- **Breaking Change**: Renamed module `cdsw` to `cdp` (Cloudera Data Platform).
+
 ### Deprecated
 
 ### Fixed

diff --git a/docs/reference.md b/docs/reference.md
@@ -9,13 +9,13 @@ reference for the technical implementation of the`rdsa-utils` codebase.
 ::: rdsa_utils.typing
 ::: rdsa_utils.validation
 
-## CDSW
+## CDP
 
-::: rdsa_utils.cdsw.helpers.hdfs_utils
-::: rdsa_utils.cdsw.helpers.impala
-::: rdsa_utils.cdsw.io.pipeline_runlog
-::: rdsa_utils.cdsw.io.input
-::: rdsa_utils.cdsw.io.output
+::: rdsa_utils.cdp.helpers.hdfs_utils
+::: rdsa_utils.cdp.helpers.impala
+::: rdsa_utils.cdp.io.pipeline_runlog
+::: rdsa_utils.cdp.io.input
+::: rdsa_utils.cdp.io.output
 
 ## GCP
 

diff --git a/rdsa_utils/cdsw/__init__.py → rdsa_utils/cdp/__init__.py b/rdsa_utils/cdsw/__init__.py → rdsa_utils/cdp/__init__.py
diff --git a/rdsa_utils/cdsw/helpers/__init__.py → rdsa_utils/cdp/helpers/__init__.py b/rdsa_utils/cdsw/helpers/__init__.py → rdsa_utils/cdp/helpers/__init__.py
diff --git a/rdsa_utils/cdsw/helpers/hdfs_utils.py → rdsa_utils/cdp/helpers/hdfs_utils.py b/rdsa_utils/cdsw/helpers/hdfs_utils.py → rdsa_utils/cdp/helpers/hdfs_utils.py
diff --git a/rdsa_utils/cdsw/helpers/impala.py → rdsa_utils/cdp/helpers/impala.py b/rdsa_utils/cdsw/helpers/impala.py → rdsa_utils/cdp/helpers/impala.py
diff --git a/rdsa_utils/cdsw/io/__init__.py → rdsa_utils/cdp/io/__init__.py b/rdsa_utils/cdsw/io/__init__.py → rdsa_utils/cdp/io/__init__.py
diff --git a/rdsa_utils/cdsw/io/input.py → rdsa_utils/cdp/io/input.py b/rdsa_utils/cdsw/io/input.py → rdsa_utils/cdp/io/input.py
@@ -1,4 +1,4 @@
-"""Read inputs on CDSW."""
+"""Read inputs on CDP."""
 import logging
 from typing import Tuple
 

diff --git a/rdsa_utils/cdsw/io/output.py → rdsa_utils/cdp/io/output.py b/rdsa_utils/cdsw/io/output.py → rdsa_utils/cdp/io/output.py
@@ -1,4 +1,4 @@
-"""Write outputs on CDSW."""
+"""Write outputs on CDP."""
 import logging
 from typing import Union
 
@@ -7,8 +7,8 @@
 from pyspark.sql import functions as F
 from pyspark.sql.utils import AnalysisException
 
-from rdsa_utils.cdsw.helpers.hdfs_utils import delete_path, file_exists, rename
-from rdsa_utils.cdsw.io.input import load_and_validate_table
+from rdsa_utils.cdp.helpers.hdfs_utils import delete_path, file_exists, rename
+from rdsa_utils.cdp.io.input import load_and_validate_table
 from rdsa_utils.exceptions import (
     ColumnNotInDataframeError,
     DataframeEmptyError,

diff --git a/rdsa_utils/cdsw/io/pipeline_runlog.py → rdsa_utils/cdp/io/pipeline_runlog.py b/rdsa_utils/cdsw/io/pipeline_runlog.py → rdsa_utils/cdp/io/pipeline_runlog.py
@@ -11,7 +11,7 @@
 from pyspark.sql import functions as F
 from pyspark.sql.utils import AnalysisException
 
-from rdsa_utils.cdsw.helpers.hdfs_utils import create_txt_from_string
+from rdsa_utils.cdp.helpers.hdfs_utils import create_txt_from_string
 
 logger = logging.getLogger(__name__)
 

diff --git a/tests/cdsw/helpers/test_hdfs_utils.py → tests/cdp/helpers/test_hdfs_utils.py b/tests/cdsw/helpers/test_hdfs_utils.py → tests/cdp/helpers/test_hdfs_utils.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from rdsa_utils.cdsw.helpers.hdfs_utils import (
+from rdsa_utils.cdp.helpers.hdfs_utils import (
     _perform,
     change_permissions,
     copy,
@@ -240,9 +240,9 @@ def test_create_txt_from_string(
     ):
         """Verify 'echo | hadoop fs -put -' command execution by create_txt_from_string."""
         with patch('subprocess.call') as subprocess_mock, patch(
-            'rdsa_utils.cdsw.helpers.hdfs_utils.file_exists',
+            'rdsa_utils.cdp.helpers.hdfs_utils.file_exists',
         ) as file_exists_mock, patch(
-            'rdsa_utils.cdsw.helpers.hdfs_utils.delete_file',
+            'rdsa_utils.cdp.helpers.hdfs_utils.delete_file',
         ) as delete_file_mock:
             file_exists_mock.return_value = (
                 replace  # Assume file exists if replace is True

diff --git a/tests/cdsw/helpers/test_impala.py → tests/cdp/helpers/test_impala.py b/tests/cdsw/helpers/test_impala.py → tests/cdp/helpers/test_impala.py
@@ -1,7 +1,7 @@
 """Tests for impala.py module."""
 import subprocess
 
-from rdsa_utils.cdsw.helpers.impala import invalidate_impala_metadata
+from rdsa_utils.cdp.helpers.impala import invalidate_impala_metadata
 
 
 class TestInvalidateImpalaMetadata:

diff --git a/tests/cdsw/io/test_cdsw_input.py → tests/cdp/io/test_cdsw_input.py b/tests/cdsw/io/test_cdsw_input.py → tests/cdp/io/test_cdsw_input.py
@@ -1,11 +1,11 @@
-"""Tests for the cdsw/io/input.py module."""
+"""Tests for the cdp/io/input.py module."""
 from unittest.mock import MagicMock
 
 import pytest
 from pyspark.sql import DataFrame as SparkDF
 
+from rdsa_utils.cdp.io.input import *
 from rdsa_utils.exceptions import DataframeEmptyError
-from rdsa_utils.cdsw.io.input import *
 
 
 class TestGetCurrentDatabase:

diff --git a/tests/cdsw/io/test_cdsw_output.py → tests/cdp/io/test_cdsw_output.py b/tests/cdsw/io/test_cdsw_output.py → tests/cdp/io/test_cdsw_output.py
@@ -1,12 +1,12 @@
-"""Tests for the cdsw/io/output.py module."""
+"""Tests for the cdp/io/output.py module."""
 from typing import Callable
 from unittest.mock import Mock, patch
 
 import pytest
 from pyspark.sql import DataFrame as SparkDF
 from pyspark.sql import types as T
 
-from rdsa_utils.cdsw.io.output import *
+from rdsa_utils.cdp.io.output import *
 
 
 class TestInsertDataFrameToHiveTable:
@@ -137,8 +137,8 @@ def mock_df(self):
         mock_df.columns = ['run_id', 'data']
         return mock_df
 
-    @patch('rdsa_utils.cdsw.io.output.load_and_validate_table')
-    @patch('rdsa_utils.cdsw.io.output.insert_df_to_hive_table')
+    @patch('rdsa_utils.cdp.io.output.load_and_validate_table')
+    @patch('rdsa_utils.cdp.io.output.insert_df_to_hive_table')
     def test_write_and_read_hive_table_success(
         self,
         mock_insert,
@@ -225,10 +225,10 @@ def mock_df(self) -> Mock:
         """Fixture for mocked Spark DataFrame."""
         return Mock(spec=SparkDF)
 
-    @patch('rdsa_utils.cdsw.io.output.logger')
-    @patch('rdsa_utils.cdsw.io.output.delete_path')
-    @patch('rdsa_utils.cdsw.io.output.rename')
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.logger')
+    @patch('rdsa_utils.cdp.io.output.delete_path')
+    @patch('rdsa_utils.cdp.io.output.rename')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
     def test_save_csv_to_hdfs_success(
         self,
         mock_file_exists,
@@ -251,10 +251,10 @@ def test_save_csv_to_hdfs_success(
         mock_delete_path.assert_called_once()
         assert mock_logger.info.call_count > 0
 
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
-    @patch('rdsa_utils.cdsw.io.output.rename')
-    @patch('rdsa_utils.cdsw.io.output.delete_path')
-    @patch('rdsa_utils.cdsw.io.output.logger')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.rename')
+    @patch('rdsa_utils.cdp.io.output.delete_path')
+    @patch('rdsa_utils.cdp.io.output.logger')
     def test_overwriting_existing_file(
         self,
         mock_logger,
@@ -275,7 +275,7 @@ def test_overwriting_existing_file(
 
         mock_rename.assert_called_once()
 
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
     def test_save_csv_to_hdfs_file_exists_error(
         self,
         mock_file_exists,
@@ -307,10 +307,10 @@ def test_save_csv_to_hdfs_invalid_file_name(self, mock_df):
             ('/user/hdfs/test/path', '/user/hdfs/test/path/should_write.csv'),
         ],
     )
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
-    @patch('rdsa_utils.cdsw.io.output.rename')
-    @patch('rdsa_utils.cdsw.io.output.delete_path')
-    @patch('rdsa_utils.cdsw.io.output.logger')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.rename')
+    @patch('rdsa_utils.cdp.io.output.delete_path')
+    @patch('rdsa_utils.cdp.io.output.logger')
     def test_file_path_schemes(
         self,
         mock_logger,

diff --git a/tests/cdsw/io/test_pipeline_runlog.py → tests/cdp/io/test_pipeline_runlog.py b/tests/cdsw/io/test_pipeline_runlog.py → tests/cdp/io/test_pipeline_runlog.py
@@ -3,7 +3,7 @@
 import pytest
 from pyspark.sql import DataFrame
 
-from rdsa_utils.cdsw.io.pipeline_runlog import (
+from rdsa_utils.cdp.io.pipeline_runlog import (
     _get_run_ids,
     _write_entry,
     add_runlog_entry,
@@ -134,7 +134,7 @@ def test_reserve_id_non_empty(self, mocker):
 
         # Mock _write_entry function
         mock_write_entry = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Mock pyspark.sql.functions.max
@@ -173,7 +173,7 @@ def test_reserve_id_edge_case(self, mocker):
 
         # Mock _write_entry function
         mock_write_entry = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Mock pyspark.sql.functions.max
@@ -299,7 +299,7 @@ def test_get_last_run_id_general_pipeline_non_empty(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[3, 2, 1],
         )
 
@@ -319,7 +319,7 @@ def test_get_last_run_id_specific_pipeline_empty(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[],
         )
 
@@ -352,7 +352,7 @@ def test_penultimate_run_id_non_empty(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[3, 2, 1],
         )
 
@@ -387,7 +387,7 @@ def test_penultimate_run_id_edge_cases(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[1],
         )
 
@@ -409,7 +409,7 @@ def test_penultimate_run_id_edge_cases(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[],
         )
 
@@ -512,16 +512,16 @@ def test_add_runlog_entry(self, mocker):
 
         # Mock reserve_id, create_runlog_entry, _write_entry
         reserve_id_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.reserve_id',
+            'rdsa_utils.cdp.io.pipeline_runlog.reserve_id',
             return_value=1,
         )
         entry_mock = mocker.Mock()
         create_runlog_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.create_runlog_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog.create_runlog_entry',
             return_value=entry_mock,
         )
         _write_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Call function
@@ -565,11 +565,11 @@ def test_add_runlog_entry_specified_id(self, mocker):
         # Mock create_runlog_entry, _write_entry
         entry_mock = mocker.Mock()
         create_runlog_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.create_runlog_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog.create_runlog_entry',
             return_value=entry_mock,
         )
         _write_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Call function
@@ -613,11 +613,11 @@ def test_write_runlog_file(self, mocker):
 
         # Mock _parse_runlog_as_string and create_txt_from_string
         parse_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._parse_runlog_as_string',
+            'rdsa_utils.cdp.io.pipeline_runlog._parse_runlog_as_string',
         )
         parse_mock.return_value = 'test metadata'
         create_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.create_txt_from_string',
+            'rdsa_utils.cdp.io.pipeline_runlog.create_txt_from_string',
         )
 
         # Call function
@@ -639,7 +639,7 @@ def test_write_runlog_file_edge_case(self, mocker):
 
         # Mock _parse_runlog_as_string
         parse_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._parse_runlog_as_string',
+            'rdsa_utils.cdp.io.pipeline_runlog._parse_runlog_as_string',
         )
         parse_mock.return_value = 'test metadata'