-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from ONSdigital/dev_invalidate_impala_metadata
Add CDSW Module: impala.py
- Loading branch information
Showing
3 changed files
with
169 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
"""Utilities for working with Impala.""" | ||
import logging | ||
import subprocess | ||
from typing import Optional | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def invalidate_impala_metadata( | ||
table: str, | ||
impalad_address_port: str, | ||
impalad_ca_cert: str, | ||
keep_stderr: Optional[bool] = False, | ||
): | ||
"""Automate the invalidation of a table's metadata using impala-shell. | ||
This function uses the impala-shell command with the given | ||
impalad_address_port and impalad_ca_cert, to invalidate a specified | ||
table's metadata. | ||
It proves useful during a data pipeline's execution after writing to an | ||
intermediate Hive table. Using Impala Query Editor in Hue, end-users often | ||
need to run "INVALIDATE METADATA" command to refresh a table's metadata. | ||
However, this manual step can be missed, leading to potential use of | ||
outdated metadata. | ||
The function automates the "INVALIDATE METADATA" command for a given table, | ||
ensuring up-to-date metadata for future queries. This reduces manual | ||
intervention, making outdated metadata issues less likely to occur. | ||
Parameters | ||
---------- | ||
table | ||
Name of the table for metadata invalidation. | ||
impalad_address_port | ||
'address:port' of the impalad instance. | ||
impalad_ca_cert | ||
Path to impalad's CA certificate file. | ||
keep_stderr | ||
If True, will print impala-shell command's stderr output. | ||
Returns | ||
------- | ||
None | ||
Examples | ||
-------- | ||
>>> invalidate_impala_metadata( | ||
... 'my_table', | ||
... 'localhost:21050', | ||
... '/path/to/ca_cert.pem' | ||
... ) | ||
>>> invalidate_impala_metadata( | ||
... 'my_table', | ||
... 'localhost:21050', | ||
... '/path/to/ca_cert.pem', | ||
... keep_stderr=True | ||
... ) | ||
""" | ||
result = subprocess.run( | ||
[ | ||
'impala-shell', | ||
'-k', | ||
'--ssl', | ||
'-i', | ||
impalad_address_port, | ||
'--ca_cert', | ||
impalad_ca_cert, | ||
'-q', | ||
f'invalidate metadata {table};', | ||
], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
) | ||
|
||
if keep_stderr: | ||
logger.info(result.stderr.decode()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
"""Tests for impala.py module.""" | ||
import subprocess | ||
|
||
from rdsa_utils.cdsw.impala import invalidate_impala_metadata | ||
|
||
|
||
class TestInvalidateImpalaMetadata: | ||
"""Tests for invalidate_impala_metadata function.""" | ||
|
||
def test_invalidate_impala_metadata(self, mocker): | ||
"""Test the invalidate_impala_metadata function. | ||
Parameters | ||
---------- | ||
mocker : pytest_mock.MockFixture | ||
Pytest's MockFixture object to mock subprocess.run(). | ||
Notes | ||
----- | ||
This test verifies the following: | ||
1. The correct impala-shell command is executed with | ||
the correct arguments. | ||
2. The function does not raise any exceptions. | ||
3. The function correctly handles and logs the stderr output | ||
when keep_stderr is True. | ||
""" | ||
# Mock the subprocess.run() call | ||
mock_subprocess_run = mocker.patch('subprocess.run') | ||
|
||
# Set up test parameters | ||
table = 'test_table' | ||
impalad_address_port = 'localhost:21050' | ||
impalad_ca_cert = '/path/to/ca_cert.pem' | ||
|
||
# Mock logger.info | ||
mock_logger_info = mocker.patch('logging.Logger.info') | ||
|
||
# Call the function without keep_stderr | ||
invalidate_impala_metadata(table, impalad_address_port, impalad_ca_cert) | ||
|
||
# Check that subprocess.run() was called with the correct arguments | ||
mock_subprocess_run.assert_called_with( | ||
[ | ||
'impala-shell', | ||
'-k', | ||
'--ssl', | ||
'-i', | ||
impalad_address_port, | ||
'--ca_cert', | ||
impalad_ca_cert, | ||
'-q', | ||
f'invalidate metadata {table};', | ||
], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
) | ||
|
||
# Reset the mock | ||
mock_subprocess_run.reset_mock() | ||
|
||
# Call the function with keep_stderr | ||
result = subprocess.CompletedProcess( | ||
args=['dummy'], returncode=0, stdout=b'', stderr=b'Test Error', | ||
) | ||
mock_subprocess_run.return_value = result | ||
|
||
invalidate_impala_metadata( | ||
table, | ||
impalad_address_port, | ||
impalad_ca_cert, | ||
keep_stderr=True, | ||
) | ||
|
||
# Check that subprocess.run() was called with the correct arguments | ||
# and logger.info() was called with the expected error message. | ||
mock_subprocess_run.assert_called_with( | ||
[ | ||
'impala-shell', | ||
'-k', | ||
'--ssl', | ||
'-i', | ||
impalad_address_port, | ||
'--ca_cert', | ||
impalad_ca_cert, | ||
'-q', | ||
f'invalidate metadata {table};', | ||
], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
) | ||
mock_logger_info.assert_called_once_with('Test Error') |