diff --git a/python/biobear/__init__.py b/python/biobear/__init__.py index dd61758..72f1f02 100644 --- a/python/biobear/__init__.py +++ b/python/biobear/__init__.py @@ -27,6 +27,7 @@ from biobear.compression import Compression from .biobear import FileCompressionType +from .biobear import FastaSequenceDataType from .biobear import FASTQReadOptions from .biobear import FASTAReadOptions from .biobear import VCFReadOptions @@ -65,6 +66,7 @@ "compression", "Compression", "FileCompressionType", + "FastaSequenceDataType", "FASTQReadOptions", "FASTAReadOptions", "BCFReadOptions", diff --git a/python/tests/test_session.py b/python/tests/test_session.py index 242caa7..8b139ff 100644 --- a/python/tests/test_session.py +++ b/python/tests/test_session.py @@ -15,7 +15,6 @@ from pathlib import Path import importlib import tempfile -from biobear.biobear import BEDReadOptions import polars as pl import pytest @@ -23,9 +22,11 @@ from biobear import ( BAMReadOptions, connect, + FastaSequenceDataType, FASTQReadOptions, FASTAReadOptions, FileCompressionType, + BEDReadOptions, BCFReadOptions, GFFReadOptions, VCFReadOptions, @@ -192,6 +193,20 @@ def test_fasta_sequence_type(): assert df.get_column("sequence").dtype == pl.List(pl.Int8) +def test_fasta_sequence_type_with_options(): + """Test reading a fasta file.""" + session = connect() + + df = session.read_fasta_file( + str(DATA / "test.fasta"), + options=FASTAReadOptions( + fasta_sequence_data_type=FastaSequenceDataType.INTEGER_ENCODE_DNA + ), + ).to_polars() + + assert df.get_column("sequence").dtype == pl.List(pl.Int8) + + @pytest.mark.skipif( not importlib.util.find_spec("polars"), reason="polars not installed" ) diff --git a/src/datasources/fasta.rs b/src/datasources/fasta.rs index 3b243ad..80a15f7 100644 --- a/src/datasources/fasta.rs +++ b/src/datasources/fasta.rs @@ -21,19 +21,22 @@ const DEFAULT_FASTA_FILE_EXTENSION: &str = "fasta"; #[derive(Debug, Clone)] #[pyclass] pub enum FastaSequenceDataType { - Utf8, - LargeUtf8, - IntegerEncodeDNA, - IntegerEncodeProtein, + UTF8, + #[allow(non_camel_case_types)] + LARGE_UTF8, + #[allow(non_camel_case_types)] + INTEGER_ENCODE_DNA, + #[allow(non_camel_case_types)] + INTEGER_ENCODE_PROTEIN, } impl From for SequenceDataType { fn from(data_type: FastaSequenceDataType) -> Self { match data_type { - FastaSequenceDataType::Utf8 => SequenceDataType::Utf8, - FastaSequenceDataType::LargeUtf8 => SequenceDataType::LargeUtf8, - FastaSequenceDataType::IntegerEncodeDNA => SequenceDataType::IntegerEncodeDNA, - FastaSequenceDataType::IntegerEncodeProtein => SequenceDataType::IntegerEncodeProtein, + FastaSequenceDataType::UTF8 => SequenceDataType::Utf8, + FastaSequenceDataType::LARGE_UTF8 => SequenceDataType::LargeUtf8, + FastaSequenceDataType::INTEGER_ENCODE_DNA => SequenceDataType::IntegerEncodeDNA, + FastaSequenceDataType::INTEGER_ENCODE_PROTEIN => SequenceDataType::IntegerEncodeProtein, } } } @@ -75,7 +78,7 @@ impl Default for FASTAReadOptions { Self { file_extension: String::from(DEFAULT_FASTA_FILE_EXTENSION), file_compression_type: FileCompressionType::UNCOMPRESSED, - fasta_sequence_data_type: FastaSequenceDataType::Utf8, + fasta_sequence_data_type: FastaSequenceDataType::UTF8, } } } @@ -107,7 +110,7 @@ impl FASTAReadOptions { file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED); let fasta_sequence_data_type = - fasta_sequence_data_type.unwrap_or(FastaSequenceDataType::Utf8); + fasta_sequence_data_type.unwrap_or(FastaSequenceDataType::UTF8); Ok(Self { file_compression_type, diff --git a/src/lib.rs b/src/lib.rs index dc2dbab..f937ea9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,7 @@ fn biobear(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?;