From fd3bc5943060d2ac696451a6628c8a16d8af6498 Mon Sep 17 00:00:00 2001
From: dombean <46692370+dombean@users.noreply.github.com>
Date: Fri, 10 May 2024 11:54:39 +0100
Subject: [PATCH 1/4] Rename Module: cdsw to cdp

---
 docs/reference.md                             | 12 +++----
 rdsa_utils/{cdsw => cdp}/__init__.py          |  0
 rdsa_utils/{cdsw => cdp}/helpers/__init__.py  |  0
 .../{cdsw => cdp}/helpers/hdfs_utils.py       |  0
 rdsa_utils/{cdsw => cdp}/helpers/impala.py    |  0
 rdsa_utils/{cdsw => cdp}/io/__init__.py       |  0
 rdsa_utils/{cdsw => cdp}/io/input.py          |  2 +-
 rdsa_utils/{cdsw => cdp}/io/output.py         |  6 ++--
 .../{cdsw => cdp}/io/pipeline_runlog.py       |  2 +-
 tests/cdsw/helpers/test_hdfs_utils.py         |  6 ++--
 tests/cdsw/helpers/test_impala.py             |  2 +-
 tests/cdsw/io/test_cdsw_input.py              |  4 +--
 tests/cdsw/io/test_cdsw_output.py             | 34 +++++++++----------
 tests/cdsw/io/test_pipeline_runlog.py         | 32 ++++++++---------
 14 files changed, 50 insertions(+), 50 deletions(-)
 rename rdsa_utils/{cdsw => cdp}/__init__.py (100%)
 rename rdsa_utils/{cdsw => cdp}/helpers/__init__.py (100%)
 rename rdsa_utils/{cdsw => cdp}/helpers/hdfs_utils.py (100%)
 rename rdsa_utils/{cdsw => cdp}/helpers/impala.py (100%)
 rename rdsa_utils/{cdsw => cdp}/io/__init__.py (100%)
 rename rdsa_utils/{cdsw => cdp}/io/input.py (99%)
 rename rdsa_utils/{cdsw => cdp}/io/output.py (98%)
 rename rdsa_utils/{cdsw => cdp}/io/pipeline_runlog.py (99%)

diff --git a/docs/reference.md b/docs/reference.md
index 92b6c82..e800ccb 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -9,13 +9,13 @@ reference for the technical implementation of the`rdsa-utils` codebase.
 ::: rdsa_utils.typing
 ::: rdsa_utils.validation
 
-## CDSW
+## cdp
 
-::: rdsa_utils.cdsw.helpers.hdfs_utils
-::: rdsa_utils.cdsw.helpers.impala
-::: rdsa_utils.cdsw.io.pipeline_runlog
-::: rdsa_utils.cdsw.io.input
-::: rdsa_utils.cdsw.io.output
+::: rdsa_utils.cdp.helpers.hdfs_utils
+::: rdsa_utils.cdp.helpers.impala
+::: rdsa_utils.cdp.io.pipeline_runlog
+::: rdsa_utils.cdp.io.input
+::: rdsa_utils.cdp.io.output
 
 ## GCP
 
diff --git a/rdsa_utils/cdsw/__init__.py b/rdsa_utils/cdp/__init__.py
similarity index 100%
rename from rdsa_utils/cdsw/__init__.py
rename to rdsa_utils/cdp/__init__.py
diff --git a/rdsa_utils/cdsw/helpers/__init__.py b/rdsa_utils/cdp/helpers/__init__.py
similarity index 100%
rename from rdsa_utils/cdsw/helpers/__init__.py
rename to rdsa_utils/cdp/helpers/__init__.py
diff --git a/rdsa_utils/cdsw/helpers/hdfs_utils.py b/rdsa_utils/cdp/helpers/hdfs_utils.py
similarity index 100%
rename from rdsa_utils/cdsw/helpers/hdfs_utils.py
rename to rdsa_utils/cdp/helpers/hdfs_utils.py
diff --git a/rdsa_utils/cdsw/helpers/impala.py b/rdsa_utils/cdp/helpers/impala.py
similarity index 100%
rename from rdsa_utils/cdsw/helpers/impala.py
rename to rdsa_utils/cdp/helpers/impala.py
diff --git a/rdsa_utils/cdsw/io/__init__.py b/rdsa_utils/cdp/io/__init__.py
similarity index 100%
rename from rdsa_utils/cdsw/io/__init__.py
rename to rdsa_utils/cdp/io/__init__.py
diff --git a/rdsa_utils/cdsw/io/input.py b/rdsa_utils/cdp/io/input.py
similarity index 99%
rename from rdsa_utils/cdsw/io/input.py
rename to rdsa_utils/cdp/io/input.py
index dd85ca8..d62af6d 100644
--- a/rdsa_utils/cdsw/io/input.py
+++ b/rdsa_utils/cdp/io/input.py
@@ -1,4 +1,4 @@
-"""Read inputs on CDSW."""
+"""Read inputs on CDP."""
 import logging
 from typing import Tuple
 
diff --git a/rdsa_utils/cdsw/io/output.py b/rdsa_utils/cdp/io/output.py
similarity index 98%
rename from rdsa_utils/cdsw/io/output.py
rename to rdsa_utils/cdp/io/output.py
index 75845f3..b3c746f 100644
--- a/rdsa_utils/cdsw/io/output.py
+++ b/rdsa_utils/cdp/io/output.py
@@ -1,4 +1,4 @@
-"""Write outputs on CDSW."""
+"""Write outputs on CDP."""
 import logging
 from typing import Union
 
@@ -7,8 +7,8 @@
 from pyspark.sql import functions as F
 from pyspark.sql.utils import AnalysisException
 
-from rdsa_utils.cdsw.helpers.hdfs_utils import delete_path, file_exists, rename
-from rdsa_utils.cdsw.io.input import load_and_validate_table
+from rdsa_utils.cdp.helpers.hdfs_utils import delete_path, file_exists, rename
+from rdsa_utils.cdp.io.input import load_and_validate_table
 from rdsa_utils.exceptions import (
     ColumnNotInDataframeError,
     DataframeEmptyError,
diff --git a/rdsa_utils/cdsw/io/pipeline_runlog.py b/rdsa_utils/cdp/io/pipeline_runlog.py
similarity index 99%
rename from rdsa_utils/cdsw/io/pipeline_runlog.py
rename to rdsa_utils/cdp/io/pipeline_runlog.py
index 3e3b08e..c5d1b3d 100644
--- a/rdsa_utils/cdsw/io/pipeline_runlog.py
+++ b/rdsa_utils/cdp/io/pipeline_runlog.py
@@ -11,7 +11,7 @@
 from pyspark.sql import functions as F
 from pyspark.sql.utils import AnalysisException
 
-from rdsa_utils.cdsw.helpers.hdfs_utils import create_txt_from_string
+from rdsa_utils.cdp.helpers.hdfs_utils import create_txt_from_string
 
 logger = logging.getLogger(__name__)
 
diff --git a/tests/cdsw/helpers/test_hdfs_utils.py b/tests/cdsw/helpers/test_hdfs_utils.py
index 8f004b8..bdb7a5b 100644
--- a/tests/cdsw/helpers/test_hdfs_utils.py
+++ b/tests/cdsw/helpers/test_hdfs_utils.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from rdsa_utils.cdsw.helpers.hdfs_utils import (
+from rdsa_utils.cdp.helpers.hdfs_utils import (
     _perform,
     change_permissions,
     copy,
@@ -240,9 +240,9 @@ def test_create_txt_from_string(
     ):
         """Verify 'echo | hadoop fs -put -' command execution by create_txt_from_string."""
         with patch('subprocess.call') as subprocess_mock, patch(
-            'rdsa_utils.cdsw.helpers.hdfs_utils.file_exists',
+            'rdsa_utils.cdp.helpers.hdfs_utils.file_exists',
         ) as file_exists_mock, patch(
-            'rdsa_utils.cdsw.helpers.hdfs_utils.delete_file',
+            'rdsa_utils.cdp.helpers.hdfs_utils.delete_file',
         ) as delete_file_mock:
             file_exists_mock.return_value = (
                 replace  # Assume file exists if replace is True
diff --git a/tests/cdsw/helpers/test_impala.py b/tests/cdsw/helpers/test_impala.py
index cf2a659..75f0fb7 100644
--- a/tests/cdsw/helpers/test_impala.py
+++ b/tests/cdsw/helpers/test_impala.py
@@ -1,7 +1,7 @@
 """Tests for impala.py module."""
 import subprocess
 
-from rdsa_utils.cdsw.helpers.impala import invalidate_impala_metadata
+from rdsa_utils.cdp.helpers.impala import invalidate_impala_metadata
 
 
 class TestInvalidateImpalaMetadata:
diff --git a/tests/cdsw/io/test_cdsw_input.py b/tests/cdsw/io/test_cdsw_input.py
index 998638d..40d2c28 100644
--- a/tests/cdsw/io/test_cdsw_input.py
+++ b/tests/cdsw/io/test_cdsw_input.py
@@ -1,11 +1,11 @@
-"""Tests for the cdsw/io/input.py module."""
+"""Tests for the cdp/io/input.py module."""
 from unittest.mock import MagicMock
 
 import pytest
 from pyspark.sql import DataFrame as SparkDF
 
+from rdsa_utils.cdp.io.input import *
 from rdsa_utils.exceptions import DataframeEmptyError
-from rdsa_utils.cdsw.io.input import *
 
 
 class TestGetCurrentDatabase:
diff --git a/tests/cdsw/io/test_cdsw_output.py b/tests/cdsw/io/test_cdsw_output.py
index adcf790..70d35fd 100644
--- a/tests/cdsw/io/test_cdsw_output.py
+++ b/tests/cdsw/io/test_cdsw_output.py
@@ -1,4 +1,4 @@
-"""Tests for the cdsw/io/output.py module."""
+"""Tests for the cdp/io/output.py module."""
 from typing import Callable
 from unittest.mock import Mock, patch
 
@@ -6,7 +6,7 @@
 from pyspark.sql import DataFrame as SparkDF
 from pyspark.sql import types as T
 
-from rdsa_utils.cdsw.io.output import *
+from rdsa_utils.cdp.io.output import *
 
 
 class TestInsertDataFrameToHiveTable:
@@ -137,8 +137,8 @@ def mock_df(self):
         mock_df.columns = ['run_id', 'data']
         return mock_df
 
-    @patch('rdsa_utils.cdsw.io.output.load_and_validate_table')
-    @patch('rdsa_utils.cdsw.io.output.insert_df_to_hive_table')
+    @patch('rdsa_utils.cdp.io.output.load_and_validate_table')
+    @patch('rdsa_utils.cdp.io.output.insert_df_to_hive_table')
     def test_write_and_read_hive_table_success(
         self,
         mock_insert,
@@ -225,10 +225,10 @@ def mock_df(self) -> Mock:
         """Fixture for mocked Spark DataFrame."""
         return Mock(spec=SparkDF)
 
-    @patch('rdsa_utils.cdsw.io.output.logger')
-    @patch('rdsa_utils.cdsw.io.output.delete_path')
-    @patch('rdsa_utils.cdsw.io.output.rename')
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.logger')
+    @patch('rdsa_utils.cdp.io.output.delete_path')
+    @patch('rdsa_utils.cdp.io.output.rename')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
     def test_save_csv_to_hdfs_success(
         self,
         mock_file_exists,
@@ -251,10 +251,10 @@ def test_save_csv_to_hdfs_success(
         mock_delete_path.assert_called_once()
         assert mock_logger.info.call_count > 0
 
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
-    @patch('rdsa_utils.cdsw.io.output.rename')
-    @patch('rdsa_utils.cdsw.io.output.delete_path')
-    @patch('rdsa_utils.cdsw.io.output.logger')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.rename')
+    @patch('rdsa_utils.cdp.io.output.delete_path')
+    @patch('rdsa_utils.cdp.io.output.logger')
     def test_overwriting_existing_file(
         self,
         mock_logger,
@@ -275,7 +275,7 @@ def test_overwriting_existing_file(
 
         mock_rename.assert_called_once()
 
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
     def test_save_csv_to_hdfs_file_exists_error(
         self,
         mock_file_exists,
@@ -307,10 +307,10 @@ def test_save_csv_to_hdfs_invalid_file_name(self, mock_df):
             ('/user/hdfs/test/path', '/user/hdfs/test/path/should_write.csv'),
         ],
     )
-    @patch('rdsa_utils.cdsw.io.output.file_exists')
-    @patch('rdsa_utils.cdsw.io.output.rename')
-    @patch('rdsa_utils.cdsw.io.output.delete_path')
-    @patch('rdsa_utils.cdsw.io.output.logger')
+    @patch('rdsa_utils.cdp.io.output.file_exists')
+    @patch('rdsa_utils.cdp.io.output.rename')
+    @patch('rdsa_utils.cdp.io.output.delete_path')
+    @patch('rdsa_utils.cdp.io.output.logger')
     def test_file_path_schemes(
         self,
         mock_logger,
diff --git a/tests/cdsw/io/test_pipeline_runlog.py b/tests/cdsw/io/test_pipeline_runlog.py
index 429c7c6..3cd73c3 100644
--- a/tests/cdsw/io/test_pipeline_runlog.py
+++ b/tests/cdsw/io/test_pipeline_runlog.py
@@ -3,7 +3,7 @@
 import pytest
 from pyspark.sql import DataFrame
 
-from rdsa_utils.cdsw.io.pipeline_runlog import (
+from rdsa_utils.cdp.io.pipeline_runlog import (
     _get_run_ids,
     _write_entry,
     add_runlog_entry,
@@ -134,7 +134,7 @@ def test_reserve_id_non_empty(self, mocker):
 
         # Mock _write_entry function
         mock_write_entry = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Mock pyspark.sql.functions.max
@@ -173,7 +173,7 @@ def test_reserve_id_edge_case(self, mocker):
 
         # Mock _write_entry function
         mock_write_entry = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Mock pyspark.sql.functions.max
@@ -299,7 +299,7 @@ def test_get_last_run_id_general_pipeline_non_empty(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[3, 2, 1],
         )
 
@@ -319,7 +319,7 @@ def test_get_last_run_id_specific_pipeline_empty(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[],
         )
 
@@ -352,7 +352,7 @@ def test_penultimate_run_id_non_empty(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[3, 2, 1],
         )
 
@@ -387,7 +387,7 @@ def test_penultimate_run_id_edge_cases(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[1],
         )
 
@@ -409,7 +409,7 @@ def test_penultimate_run_id_edge_cases(self, mocker):
 
         # Patch _get_run_ids function and return a Mock object
         get_run_ids_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._get_run_ids',
+            'rdsa_utils.cdp.io.pipeline_runlog._get_run_ids',
             return_value=[],
         )
 
@@ -512,16 +512,16 @@ def test_add_runlog_entry(self, mocker):
 
         # Mock reserve_id, create_runlog_entry, _write_entry
         reserve_id_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.reserve_id',
+            'rdsa_utils.cdp.io.pipeline_runlog.reserve_id',
             return_value=1,
         )
         entry_mock = mocker.Mock()
         create_runlog_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.create_runlog_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog.create_runlog_entry',
             return_value=entry_mock,
         )
         _write_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Call function
@@ -565,11 +565,11 @@ def test_add_runlog_entry_specified_id(self, mocker):
         # Mock create_runlog_entry, _write_entry
         entry_mock = mocker.Mock()
         create_runlog_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.create_runlog_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog.create_runlog_entry',
             return_value=entry_mock,
         )
         _write_entry_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._write_entry',
+            'rdsa_utils.cdp.io.pipeline_runlog._write_entry',
         )
 
         # Call function
@@ -613,11 +613,11 @@ def test_write_runlog_file(self, mocker):
 
         # Mock _parse_runlog_as_string and create_txt_from_string
         parse_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._parse_runlog_as_string',
+            'rdsa_utils.cdp.io.pipeline_runlog._parse_runlog_as_string',
         )
         parse_mock.return_value = 'test metadata'
         create_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog.create_txt_from_string',
+            'rdsa_utils.cdp.io.pipeline_runlog.create_txt_from_string',
         )
 
         # Call function
@@ -639,7 +639,7 @@ def test_write_runlog_file_edge_case(self, mocker):
 
         # Mock _parse_runlog_as_string
         parse_mock = mocker.patch(
-            'rdsa_utils.cdsw.io.pipeline_runlog._parse_runlog_as_string',
+            'rdsa_utils.cdp.io.pipeline_runlog._parse_runlog_as_string',
         )
         parse_mock.return_value = 'test metadata'
 

From 11d83447a426562ce5d8a76e5dc67d807e5c19e8 Mon Sep 17 00:00:00 2001
From: dombean <46692370+dombean@users.noreply.github.com>
Date: Fri, 10 May 2024 11:55:39 +0100
Subject: [PATCH 2/4] Update CHANGELOG.md

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3a36813..882d93b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,7 +10,8 @@ and this project adheres to [semantic versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 ### Changed
-  
+- **Breaking Change**: Renamed module `cdsw` to `cdp` (Cloudera Data Platform).
+
 ### Deprecated
 
 ### Fixed

From 6add038da57ee74912f4a0e7b7238b14d79a537b Mon Sep 17 00:00:00 2001
From: dombean <46692370+dombean@users.noreply.github.com>
Date: Fri, 10 May 2024 11:57:27 +0100
Subject: [PATCH 3/4] Capitalise CDP

---
 docs/reference.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference.md b/docs/reference.md
index e800ccb..b1b09d1 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -9,7 +9,7 @@ reference for the technical implementation of the`rdsa-utils` codebase.
 ::: rdsa_utils.typing
 ::: rdsa_utils.validation
 
-## cdp
+## CDP
 
 ::: rdsa_utils.cdp.helpers.hdfs_utils
 ::: rdsa_utils.cdp.helpers.impala

From 2eff3a6dbe375c87b298322c8a2df6985e9794eb Mon Sep 17 00:00:00 2001
From: dombean <46692370+dombean@users.noreply.github.com>
Date: Fri, 10 May 2024 12:01:53 +0100
Subject: [PATCH 4/4] Rename Module in tests/: cdsw to cdp

---
 tests/{cdsw => cdp}/helpers/test_hdfs_utils.py | 0
 tests/{cdsw => cdp}/helpers/test_impala.py     | 0
 tests/{cdsw => cdp}/io/test_cdsw_input.py      | 0
 tests/{cdsw => cdp}/io/test_cdsw_output.py     | 0
 tests/{cdsw => cdp}/io/test_pipeline_runlog.py | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/{cdsw => cdp}/helpers/test_hdfs_utils.py (100%)
 rename tests/{cdsw => cdp}/helpers/test_impala.py (100%)
 rename tests/{cdsw => cdp}/io/test_cdsw_input.py (100%)
 rename tests/{cdsw => cdp}/io/test_cdsw_output.py (100%)
 rename tests/{cdsw => cdp}/io/test_pipeline_runlog.py (100%)

diff --git a/tests/cdsw/helpers/test_hdfs_utils.py b/tests/cdp/helpers/test_hdfs_utils.py
similarity index 100%
rename from tests/cdsw/helpers/test_hdfs_utils.py
rename to tests/cdp/helpers/test_hdfs_utils.py
diff --git a/tests/cdsw/helpers/test_impala.py b/tests/cdp/helpers/test_impala.py
similarity index 100%
rename from tests/cdsw/helpers/test_impala.py
rename to tests/cdp/helpers/test_impala.py
diff --git a/tests/cdsw/io/test_cdsw_input.py b/tests/cdp/io/test_cdsw_input.py
similarity index 100%
rename from tests/cdsw/io/test_cdsw_input.py
rename to tests/cdp/io/test_cdsw_input.py
diff --git a/tests/cdsw/io/test_cdsw_output.py b/tests/cdp/io/test_cdsw_output.py
similarity index 100%
rename from tests/cdsw/io/test_cdsw_output.py
rename to tests/cdp/io/test_cdsw_output.py
diff --git a/tests/cdsw/io/test_pipeline_runlog.py b/tests/cdp/io/test_pipeline_runlog.py
similarity index 100%
rename from tests/cdsw/io/test_pipeline_runlog.py
rename to tests/cdp/io/test_pipeline_runlog.py