From ba61e2bbb5c085ffb1c3cd30e05af0a1405ccd57 Mon Sep 17 00:00:00 2001 From: Spencer J Rothfuss <108840392+sjrothfuss@users.noreply.github.com> Date: Wed, 7 Aug 2024 18:39:21 -0500 Subject: [PATCH 1/5] Check for include_line_num --- src/starfile/writer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/starfile/writer.py b/src/starfile/writer.py index be8d784..2d2c1ad 100644 --- a/src/starfile/writer.py +++ b/src/starfile/writer.py @@ -163,7 +163,8 @@ def loop_block( separator: str = '\t', na_rep: str = '', quote_character: str = '"', - quote_all_strings: bool = False + quote_all_strings: bool = False, + include_line_num: bool = True, ) -> Generator[str, None, None]: # Header @@ -171,7 +172,7 @@ def loop_block( yield '' yield 'loop_' for idx, column_name in enumerate(df.columns, 1): - yield f'_{column_name} #{idx}' + yield f'_{column_name} #{idx}' if include_line_num else f'_{column_name}' # Data for line in df.map(lambda x: From 3c6f90460745c80b180c13ec794c19df37017060 Mon Sep 17 00:00:00 2001 From: Spencer J Rothfuss Date: Thu, 8 Aug 2024 11:07:40 -0500 Subject: [PATCH 2/5] include_field_num docstring, test, and function calling --- src/starfile/functions.py | 7 +++++++ src/starfile/writer.py | 9 ++++++--- tests/test_writing.py | 18 ++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/starfile/functions.py b/src/starfile/functions.py index 792f666..c21b8c2 100644 --- a/src/starfile/functions.py +++ b/src/starfile/functions.py @@ -55,6 +55,7 @@ def write( na_rep: str = '', quote_character: str = '"', quote_all_strings: bool = False, + include_field_num: bool = True, **kwargs ): """Write data to disk in the STAR format. @@ -72,6 +73,11 @@ def write( Separator between values, will be passed to pandas. na_rep: str Representation of null values, will be passed to pandas. + include_field_num: bool + Whether field numbers should be included after field names in the ouput file. + Default is True which includes field numbers (i.e. `_rlnImageName #1`) and is + compatible with RELION and Python STOPGAP. False excludes field numbers (i.e. + `_motl_idx`) and is compatible with legacy/MATLAB STOPGAP. """ StarWriter( data, @@ -81,6 +87,7 @@ def write( separator=sep, quote_character=quote_character, quote_all_strings=quote_all_strings, + include_field_num=include_field_num, ).write() diff --git a/src/starfile/writer.py b/src/starfile/writer.py index 2d2c1ad..d48397f 100644 --- a/src/starfile/writer.py +++ b/src/starfile/writer.py @@ -27,6 +27,7 @@ def __init__( na_rep: str = '', quote_character: str = '"', quote_all_strings: bool = False, + include_field_num: bool = True, ): # coerce data self.data_blocks = self.coerce_data_blocks(data_blocks) @@ -40,6 +41,7 @@ def __init__( self.na_rep = na_rep self.quote_character = quote_character self.quote_all_strings = quote_all_strings + self.include_field_num = include_field_num self.buffer = TextBuffer() self.backup_if_file_exists() @@ -93,7 +95,8 @@ def data_block_generator(self) -> Generator[str, None, None]: separator=self.sep, na_rep=self.na_rep, quote_character=self.quote_character, - quote_all_strings=self.quote_all_strings + quote_all_strings=self.quote_all_strings, + include_field_num=self.include_field_num, ): yield line @@ -164,7 +167,7 @@ def loop_block( na_rep: str = '', quote_character: str = '"', quote_all_strings: bool = False, - include_line_num: bool = True, + include_field_num: bool = True, ) -> Generator[str, None, None]: # Header @@ -172,7 +175,7 @@ def loop_block( yield '' yield 'loop_' for idx, column_name in enumerate(df.columns, 1): - yield f'_{column_name} #{idx}' if include_line_num else f'_{column_name}' + yield f'_{column_name} #{idx}' if include_field_num else f'_{column_name}' # Data for line in df.map(lambda x: diff --git a/tests/test_writing.py b/tests/test_writing.py index 8e0ee00..3dc80ac 100644 --- a/tests/test_writing.py +++ b/tests/test_writing.py @@ -72,6 +72,24 @@ def test_can_write_non_zero_indexed_one_row_dataframe(): assert (expected in output) +def test_exclude_field_numbers(): + with TemporaryDirectory() as directory: + filename = join_path(directory, "test.star") + StarWriter(test_df, filename, include_field_num=False).write() + with open(filename) as output_file: + output = output_file.read() + + expected = ( + "_Brand\n" + "_Price\n" + "Honda_Civic\t22000\n" + "Toyota_Corolla\t25000\n" + "Ford_Focus\t27000\n" + "Audi_A4\t35000\n" + ) + assert (expected in output) + + @pytest.mark.parametrize("quote_character, quote_all_strings, num_quotes", [('"', False, 6), ('"', True, 8), From 4e92c840ea9c65d3e39fad3cc38086f3bec10fe8 Mon Sep 17 00:00:00 2001 From: Spencer J Rothfuss Date: Thu, 8 Aug 2024 11:20:33 -0500 Subject: [PATCH 3/5] Add include test --- tests/test_writing.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_writing.py b/tests/test_writing.py index 3dc80ac..1434793 100644 --- a/tests/test_writing.py +++ b/tests/test_writing.py @@ -72,21 +72,21 @@ def test_can_write_non_zero_indexed_one_row_dataframe(): assert (expected in output) -def test_exclude_field_numbers(): +@pytest.mark.parametrize("include_field_num, expected", + [ + (True, "_Brand #1\n_Price #2\n"), + (False, "_Brand\n_Price\n"), + ]) +def test_include_exclude_field_numbers(include_field_num, expected): with TemporaryDirectory() as directory: filename = join_path(directory, "test.star") - StarWriter(test_df, filename, include_field_num=False).write() + StarWriter( + test_df, + filename, + include_field_num=include_field_num + ).write() with open(filename) as output_file: output = output_file.read() - - expected = ( - "_Brand\n" - "_Price\n" - "Honda_Civic\t22000\n" - "Toyota_Corolla\t25000\n" - "Ford_Focus\t27000\n" - "Audi_A4\t35000\n" - ) assert (expected in output) From 1fbc48c3dde61167d5f21c94813f619b77b6961a Mon Sep 17 00:00:00 2001 From: Spencer J Rothfuss <108840392+sjrothfuss@users.noreply.github.com> Date: Thu, 8 Aug 2024 12:21:41 -0500 Subject: [PATCH 4/5] Use include_field_number in tests --- tests/test_writing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_writing.py b/tests/test_writing.py index 1434793..065ad55 100644 --- a/tests/test_writing.py +++ b/tests/test_writing.py @@ -72,18 +72,18 @@ def test_can_write_non_zero_indexed_one_row_dataframe(): assert (expected in output) -@pytest.mark.parametrize("include_field_num, expected", +@pytest.mark.parametrize("include_field_number, expected", [ (True, "_Brand #1\n_Price #2\n"), (False, "_Brand\n_Price\n"), ]) -def test_include_exclude_field_numbers(include_field_num, expected): +def test_include_exclude_field_numbers(include_field_number, expected): with TemporaryDirectory() as directory: filename = join_path(directory, "test.star") StarWriter( test_df, filename, - include_field_num=include_field_num + include_field_number=include_field_number ).write() with open(filename) as output_file: output = output_file.read() From a646dfd8a0aea7930932b017aa2fc66df42ce01b Mon Sep 17 00:00:00 2001 From: Spencer J Rothfuss Date: Thu, 8 Aug 2024 12:26:22 -0500 Subject: [PATCH 5/5] Rename to include_field_numBERS --- src/starfile/functions.py | 6 +++--- src/starfile/writer.py | 10 +++++----- tests/test_writing.py | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/starfile/functions.py b/src/starfile/functions.py index c21b8c2..4ba0669 100644 --- a/src/starfile/functions.py +++ b/src/starfile/functions.py @@ -55,7 +55,7 @@ def write( na_rep: str = '', quote_character: str = '"', quote_all_strings: bool = False, - include_field_num: bool = True, + include_field_numbers: bool = True, **kwargs ): """Write data to disk in the STAR format. @@ -73,7 +73,7 @@ def write( Separator between values, will be passed to pandas. na_rep: str Representation of null values, will be passed to pandas. - include_field_num: bool + include_field_numbers: bool Whether field numbers should be included after field names in the ouput file. Default is True which includes field numbers (i.e. `_rlnImageName #1`) and is compatible with RELION and Python STOPGAP. False excludes field numbers (i.e. @@ -87,7 +87,7 @@ def write( separator=sep, quote_character=quote_character, quote_all_strings=quote_all_strings, - include_field_num=include_field_num, + include_field_numbers=include_field_numbers, ).write() diff --git a/src/starfile/writer.py b/src/starfile/writer.py index d48397f..0418857 100644 --- a/src/starfile/writer.py +++ b/src/starfile/writer.py @@ -27,7 +27,7 @@ def __init__( na_rep: str = '', quote_character: str = '"', quote_all_strings: bool = False, - include_field_num: bool = True, + include_field_numbers: bool = True, ): # coerce data self.data_blocks = self.coerce_data_blocks(data_blocks) @@ -41,7 +41,7 @@ def __init__( self.na_rep = na_rep self.quote_character = quote_character self.quote_all_strings = quote_all_strings - self.include_field_num = include_field_num + self.include_field_numbers = include_field_numbers self.buffer = TextBuffer() self.backup_if_file_exists() @@ -96,7 +96,7 @@ def data_block_generator(self) -> Generator[str, None, None]: na_rep=self.na_rep, quote_character=self.quote_character, quote_all_strings=self.quote_all_strings, - include_field_num=self.include_field_num, + include_field_numbers=self.include_field_numbers, ): yield line @@ -167,7 +167,7 @@ def loop_block( na_rep: str = '', quote_character: str = '"', quote_all_strings: bool = False, - include_field_num: bool = True, + include_field_numbers: bool = True, ) -> Generator[str, None, None]: # Header @@ -175,7 +175,7 @@ def loop_block( yield '' yield 'loop_' for idx, column_name in enumerate(df.columns, 1): - yield f'_{column_name} #{idx}' if include_field_num else f'_{column_name}' + yield f'_{column_name} #{idx}' if include_field_numbers else f'_{column_name}' # Data for line in df.map(lambda x: diff --git a/tests/test_writing.py b/tests/test_writing.py index 1434793..135e319 100644 --- a/tests/test_writing.py +++ b/tests/test_writing.py @@ -72,18 +72,18 @@ def test_can_write_non_zero_indexed_one_row_dataframe(): assert (expected in output) -@pytest.mark.parametrize("include_field_num, expected", +@pytest.mark.parametrize("include_field_numbers, expected", [ (True, "_Brand #1\n_Price #2\n"), (False, "_Brand\n_Price\n"), ]) -def test_include_exclude_field_numbers(include_field_num, expected): +def test_include_exclude_field_numbers(include_field_numbers, expected): with TemporaryDirectory() as directory: filename = join_path(directory, "test.star") StarWriter( test_df, filename, - include_field_num=include_field_num + include_field_numbers=include_field_numbers ).write() with open(filename) as output_file: output = output_file.read()