diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 800b0cca3..27a3a5529 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,7 @@ Unreleased ---------- +* feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to add additional null values. * Add Python 3.12 support. 1.2.0 - October 4, 2023 diff --git a/csvkit/cli.py b/csvkit/cli.py index 58f386ce7..00e54494a 100644 --- a/csvkit/cli.py +++ b/csvkit/cli.py @@ -10,6 +10,7 @@ from os.path import splitext import agate +from agate.data_types.base import DEFAULT_NULL_VALUES from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError @@ -187,6 +188,10 @@ def _init_common_parser(self): self.argparser.add_argument( '--blanks', dest='blanks', action='store_true', help='Do not convert "", "na", "n/a", "none", "null", "." to NULL.') + if 'blanks' not in self.override_flags: + self.argparser.add_argument( + '--null-value', dest='null_values', nargs='+', default=[], + help='Convert this value to NULL. --null-value can be specified multiple times.') if 'date-format' not in self.override_flags: self.argparser.add_argument( '--date-format', dest='date_format', @@ -302,9 +307,11 @@ def handler(t, value, traceback): def get_column_types(self): if getattr(self.args, 'blanks', None): - type_kwargs = {'null_values': ()} + type_kwargs = {'null_values': []} else: - type_kwargs = {} + type_kwargs = {'null_values': list(DEFAULT_NULL_VALUES)} + for null_value in getattr(self.args, 'null_values', []): + type_kwargs['null_values'].append(null_value) text_type = agate.Text(**type_kwargs) diff --git a/tests/test_utilities/test_in2csv.py b/tests/test_utilities/test_in2csv.py index 7eced7560..49543c484 100644 --- a/tests/test_utilities/test_in2csv.py +++ b/tests/test_utilities/test_in2csv.py @@ -37,6 +37,28 @@ def test_no_blanks(self): def test_blanks(self): self.assertConverted('csv', 'examples/blanks.csv', 'examples/blanks.csv', ['--blanks']) + def test_null_value(self): + input_file = StringIO('a,b\nn/a,\\N') + + with stdin_as_string(input_file): + self.assertLines(['-f', 'csv', '--null-value', '\\N'], [ + 'a,b', + ',', + ]) + + input_file.close() + + def test_null_value_blanks(self): + input_file = StringIO('a,b\nn/a,\\N') + + with stdin_as_string(input_file): + self.assertLines(['-f', 'csv', '--null-value', '\\N', '--blanks'], [ + 'a,b', + 'n/a,', + ]) + + input_file.close() + def test_date_format(self): self.assertConverted('csv', 'examples/test_date_format.csv', 'examples/test_date_format_converted.csv', ['--date-format', '%d/%m/%Y'])