Skip to content

Commit

Permalink
RM-86 allow multiple versions of lineterminator
Browse files Browse the repository at this point in the history
  • Loading branch information
ryantimjohn committed May 16, 2023
1 parent 61a588f commit 5fd5e3c
Show file tree
Hide file tree
Showing 7 changed files with 338 additions and 148 deletions.
7 changes: 6 additions & 1 deletion records_mover/records/pandas/read_csv_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from records_mover.records.schema import RecordsSchema
import logging
from typing import Set, Dict, Any
from packaging import version
import pandas as pd


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -499,7 +501,10 @@ def day_first(dateish_format: str) -> bool:
# Character to break file into lines. Only valid with C parser.
#
if non_standard_record_terminator:
pandas_options['lineterminator'] = hints.record_terminator
if version.parse(pd.__version__) >= version.parse('1.5.0'):
pandas_options['lineterminator'] = hints.record_terminator
else:
pandas_options['line_terminator'] = hints.record_terminator
quiet_remove(unhandled_hints, 'record-terminator')

#
Expand Down
7 changes: 6 additions & 1 deletion records_mover/records/pandas/to_csv_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from records_mover.mover_types import _assert_never
import logging
from typing import Set, Dict
from packaging import version
import pandas as pd


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -111,7 +113,10 @@ def pandas_to_csv_options(records_format: DelimitedRecordsFormat,
pandas_options['sep'] = hints.field_delimiter
quiet_remove(unhandled_hints, 'field-delimiter')

pandas_options['lineterminator'] = hints.record_terminator
if version.parse(pd.__version__) >= version.parse('1.5.0'):
pandas_options['lineterminator'] = hints.record_terminator
else:
pandas_options['line_terminator'] = hints.record_terminator
quiet_remove(unhandled_hints, 'record-terminator')

return pandas_options
43 changes: 30 additions & 13 deletions tests/component/records/test_pandas_read_csv_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from records_mover.records.processing_instructions import ProcessingInstructions
from records_mover.records.records_format import DelimitedRecordsFormat
from records_mover.records.schema import RecordsSchema
from packaging import version
import pandas as pd


class TestPandasReadCsvOptions(unittest.TestCase):
Expand Down Expand Up @@ -131,19 +133,34 @@ def test_pandas_read_csv_options_csv(self):

def test_pandas_read_csv_options_vertica(self):
self.maxDiff = None
expected = {
'dayfirst': False,
'compression': None,
'delimiter': '\x01',
'doublequote': False,
'engine': 'c',
'on_bad_lines': 'error',
'header': None,
'lineterminator': '\x02',
'quotechar': '"',
'quoting': 3,
'parse_dates': [0, 1, 2, 3],
}
if version.parse(pd.__version__) >= version.parse('1.5.0'):
expected = {
'dayfirst': False,
'compression': None,
'delimiter': '\x01',
'doublequote': False,
'engine': 'c',
'on_bad_lines': 'error',
'header': None,
'lineterminator': '\x02',
'quotechar': '"',
'quoting': 3,
'parse_dates': [0, 1, 2, 3],
}
else:
expected = {
'dayfirst': False,
'compression': None,
'delimiter': '\x01',
'doublequote': False,
'engine': 'c',
'on_bad_lines': 'error',
'header': None,
'line_terminator': '\x02',
'quotechar': '"',
'quoting': 3,
'parse_dates': [0, 1, 2, 3],
}
processing_instructions = ProcessingInstructions()
records_format = DelimitedRecordsFormat(hints=vertica_format_hints)
unhandled_hints = set(records_format.hints)
Expand Down
40 changes: 28 additions & 12 deletions tests/component/records/test_pandas_to_csv_options_bluelabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,38 @@
from records_mover.records.pandas import pandas_to_csv_options
from records_mover.records.processing_instructions import ProcessingInstructions
from records_mover.records.records_format import DelimitedRecordsFormat
from packaging import version
import pandas as pd


class TestPandasToCsvOptionsBlueLabs(unittest.TestCase):
def test_pandas_to_csv_options_bluelabs(self):
expected = {
'compression': 'gzip',
'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
'doublequote': False,
'encoding': 'UTF8',
'escapechar': '\\',
'header': False,
'lineterminator': '\n',
'quotechar': '"',
'quoting': 3,
'sep': ',',
}
if version.parse(pd.__version__) >= version.parse('1.5.0'):
expected = {
'compression': 'gzip',
'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
'doublequote': False,
'encoding': 'UTF8',
'escapechar': '\\',
'header': False,
'lineterminator': '\n',
'quotechar': '"',
'quoting': 3,
'sep': ',',
}
else:
expected = {
'compression': 'gzip',
'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
'doublequote': False,
'encoding': 'UTF8',
'escapechar': '\\',
'header': False,
'line_terminator': '\n',
'quotechar': '"',
'quoting': 3,
'sep': ',',
}
processing_instructions = ProcessingInstructions()
records_format = DelimitedRecordsFormat(hints=bluelabs_format_hints)
unhandled_hints = set(records_format.hints)
Expand Down
37 changes: 26 additions & 11 deletions tests/component/records/test_pandas_to_csv_options_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,36 @@
from records_mover.records.pandas import pandas_to_csv_options
from records_mover.records.processing_instructions import ProcessingInstructions
from records_mover.records.records_format import DelimitedRecordsFormat
from packaging import version
import pandas as pd


class TestPandasToCsvOptionsCsv(unittest.TestCase):
def test_pandas_to_csv_options_csv(self):
expected = {
'compression': 'gzip',
'date_format': '%m/%d/%y %H:%M',
'doublequote': True,
'encoding': 'UTF8',
'header': True,
'lineterminator': '\n',
'quotechar': '"',
'quoting': 0,
'sep': ','
}
if version.parse(pd.__version__) >= version.parse('1.5.0'):
expected = {
'compression': 'gzip',
'date_format': '%m/%d/%y %H:%M',
'doublequote': True,
'encoding': 'UTF8',
'header': True,
'lineterminator': '\n',
'quotechar': '"',
'quoting': 0,
'sep': ','
}
else:
expected = {
'compression': 'gzip',
'date_format': '%m/%d/%y %H:%M',
'doublequote': True,
'encoding': 'UTF8',
'header': True,
'line_terminator': '\n',
'quotechar': '"',
'quoting': 0,
'sep': ','
}
processing_instructions =\
ProcessingInstructions(fail_if_cant_handle_hint=True)
records_format = DelimitedRecordsFormat(hints=csv_format_hints)
Expand Down
34 changes: 24 additions & 10 deletions tests/component/records/test_pandas_to_csv_options_vertica.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,34 @@
from records_mover.records.pandas import pandas_to_csv_options
from records_mover.records.processing_instructions import ProcessingInstructions
from records_mover.records.records_format import DelimitedRecordsFormat
from packaging import version
import pandas as pd


class TestPandasToCsvOptionsVertica(unittest.TestCase):
def test_pandas_to_csv_options_vertica(self):
expected = {
'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
'doublequote': False,
'encoding': 'UTF8',
'header': False,
'lineterminator': '\x02',
'quotechar': '"',
'quoting': 3,
'sep': '\x01',
}
if version.parse(pd.__version__) >= version.parse('1.5.0'):
expected = {
'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
'doublequote': False,
'encoding': 'UTF8',
'header': False,
'lineterminator': '\x02',
'quotechar': '"',
'quoting': 3,
'sep': '\x01',
}
else:
expected = {
'date_format': '%Y-%m-%d %H:%M:%S.%f%z',
'doublequote': False,
'encoding': 'UTF8',
'header': False,
'line_terminator': '\x02',
'quotechar': '"',
'quoting': 3,
'sep': '\x01',
}
processing_instructions = ProcessingInstructions()
records_format = DelimitedRecordsFormat(hints=vertica_format_hints)
unhandled_hints = set(records_format.hints)
Expand Down
Loading

0 comments on commit 5fd5e3c

Please sign in to comment.