From 15f479e2d07c54cdae0e343e703e8381c5f2b53e Mon Sep 17 00:00:00 2001 From: Jon Connell Date: Sat, 12 Oct 2024 13:00:37 +0100 Subject: [PATCH] Add --encoding to csv2numbers --- pyproject.toml | 2 +- src/numbers_parser/_csv2numbers.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bafb63d..6acfa27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ name = "numbers-parser" packages = [{include = "numbers_parser", from = "src"}] readme = "README.md" repository = "https://github.com/masaccio/numbers-parser" -version = "4.13.2" +version = "4.13.3" [tool.poetry.scripts] cat-numbers = "numbers_parser._cat_numbers:main" diff --git a/src/numbers_parser/_csv2numbers.py b/src/numbers_parser/_csv2numbers.py index 5634032..4445814 100644 --- a/src/numbers_parser/_csv2numbers.py +++ b/src/numbers_parser/_csv2numbers.py @@ -33,6 +33,7 @@ class Converter: no_header: bool = False reverse: bool = False whitespace: bool = None + encoding: str = "utf-8" def __post_init__(self: Converter) -> None: """Parse CSV file with Pandas and return a dataframe.""" @@ -45,7 +46,7 @@ def _read_csv(self) -> None: dialect = csv.excel dialect.strict = True lineno = 1 - with open(self.input_filename) as csvfile: + with open(self.input_filename, encoding=self.encoding) as csvfile: csvreader = csv.reader(csvfile, dialect=dialect) if self.no_header: self.header = None @@ -333,6 +334,12 @@ def command_line_parser() -> argparse.ArgumentParser: action="store_true", help="dates are represented day first in the CSV file (default: false)", ) + parser.add_argument( + "--encoding", + required=False, + default="utf-8", + help="python-style text encoding of the CSV file (default: utf-8)", + ) parser.add_argument( "--date", metavar="COLUMNS", @@ -400,6 +407,7 @@ def main() -> None: date_columns=args.date, input_filename=input_filename, output_filename=output_filename, + encoding=args.encoding, ) converter.transform_columns(args.transform)