Skip to content

Commit

Permalink
refactor: use file options struct
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck committed Jun 21, 2024
1 parent 803000b commit 1ff3502
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 106 deletions.
6 changes: 6 additions & 0 deletions bin/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ function teardown {
}

# Build the code

# uninstall biobear if it's installed
if pip show biobear; then
pip uninstall -y biobear
fi

cargo build
maturin develop

Expand Down
110 changes: 12 additions & 98 deletions src/datasources/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::{path::Path, str::FromStr};

use crate::{error::BioBearResult, file_compression_type::FileCompressionType};
use crate::{error::BioBearResult, file_compression_type::FileCompressionType, file_options::FileOptions};
use exon::datasources::fasta::{table_provider::ListingFASTATableOptions, SequenceDataType};
use pyo3::{pyclass, pymethods};

Expand Down Expand Up @@ -43,99 +41,6 @@ impl From<FastaSequenceDataType> for SequenceDataType {
}
}

#[derive(Debug, Clone, Default)]
pub struct FASTAReadOptionsBuilder {
file_extension: Option<String>,
file_compression_type: Option<FileCompressionType>,
fasta_sequence_data_type: Option<FastaSequenceDataType>,
}

impl FASTAReadOptionsBuilder {
pub fn new() -> Self {
Self {
file_extension: None,
file_compression_type: None,
fasta_sequence_data_type: None,
}
}

pub fn merge(mut self, other: FASTAReadOptions) -> Self {
if other.file_extension.is_some() {
self.file_extension = other.file_extension;
}

if other.file_compression_type.is_some() {
self.file_compression_type = other.file_compression_type;
}

if other.fasta_sequence_data_type.is_some() {
self.fasta_sequence_data_type = other.fasta_sequence_data_type;
}

self
}

pub fn from_path(file_path: &str) -> Self {
let path = Path::new(file_path);

let extension = if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) {
extension
} else {
return Self::new();
};

if let Ok(file_compression_type) = FileCompressionType::from_str(extension) {
// we got a file compression type, so now check the stem and its extension
if let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) {
let stem = Path::new(stem);
if let Some(file_extension) = stem.extension().and_then(|ext| ext.to_str()) {
return Self::new()
.with_file_extension(file_extension)
.with_file_compression_type(file_compression_type);
} else {
return Self::new().with_file_compression_type(file_compression_type);
};
} else {
return Self::new().with_file_compression_type(file_compression_type);
};
}

Self {
file_extension: Some(extension.to_string()),
file_compression_type: None,
fasta_sequence_data_type: None,
}
}

pub fn with_file_extension(mut self, file_extension: &str) -> Self {
self.file_extension = Some(file_extension.to_string());
self
}

pub fn with_file_compression_type(
mut self,
file_compression_type: FileCompressionType,
) -> Self {
self.file_compression_type = Some(file_compression_type);
self
}

pub fn with_fasta_sequence_data_type(
mut self,
fasta_sequence_data_type: FastaSequenceDataType,
) -> Self {
self.fasta_sequence_data_type = Some(fasta_sequence_data_type);
self
}

pub fn build(self) -> FASTAReadOptions {
FASTAReadOptions {
file_extension: self.file_extension,
file_compression_type: self.file_compression_type,
fasta_sequence_data_type: self.fasta_sequence_data_type,
}
}
}

#[pyclass]
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -202,8 +107,17 @@ impl FASTAReadOptions {
}

impl FASTAReadOptions {
pub fn builder() -> FASTAReadOptionsBuilder {
FASTAReadOptionsBuilder::new()
pub(crate) fn update_from_file_options(&mut self, file_options: &FileOptions) -> BioBearResult<()> {
if let Some(file_extension) = file_options.file_extension() {
self.file_extension = Some(file_extension.to_string());
}

if let Some(file_compression_type) = file_options.file_compression_type() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
}

Ok(())
}
}

Expand Down
86 changes: 86 additions & 0 deletions src/file_options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright 2024 WHERE TRUE Technologies.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::{path::Path, str::FromStr};

use datafusion::datasource::file_format::file_compression_type::FileCompressionType;

#[derive(Debug, Clone, Default)]
pub(crate) struct FileOptions {
file_extension: Option<String>,
file_compression_type: Option<FileCompressionType>,
}

impl FileOptions {
pub fn file_extension(&self) -> Option<&str> {
self.file_extension.as_deref()
}

pub fn file_compression_type(&self) -> Option<FileCompressionType> {
self.file_compression_type
}

}

impl From<&str> for FileOptions {
fn from(s: &str) -> Self {
let path = Path::new(s);

let extension = match path.extension().and_then(|ext| ext.to_str()) {
Some(ext) => ext,
None => return Self::default(),
};

if let Ok(file_compression_type) = FileCompressionType::from_str(extension) {
if let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) {
let file_extension = Path::new(stem).extension().and_then(|ext| ext.to_str());
return Self {
file_extension: file_extension.map(|ext| ext.to_string()),
file_compression_type: Some(file_compression_type),
};
}
return Self {
file_extension: None,
file_compression_type: Some(file_compression_type),
}
}

Self {
file_extension: Some(extension.to_string()),
file_compression_type: None,
}

}
}


#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_from_str() {
let file_options = FileOptions::from("test.csv");
assert_eq!(file_options.file_extension(), Some("csv"));
assert_eq!(file_options.file_compression_type(), None);

let file_options = FileOptions::from("test.csv.gz");
assert_eq!(file_options.file_extension(), Some("csv"));
assert_eq!(file_options.file_compression_type(), Some(FileCompressionType::GZIP));

let file_options = FileOptions::from("test");
assert_eq!(file_options.file_extension, None);
assert_eq!(file_options.file_compression_type, None);
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ mod exon_reader;
mod vcf_reader;

mod file_compression_type;
mod file_options;

pub use file_compression_type::FileCompressionType;

Expand Down
13 changes: 5 additions & 8 deletions src/session_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ use pyo3::prelude::*;

use crate::datasources::bcf::BCFReadOptions;
use crate::datasources::bigwig::BigWigReadOptions;
use crate::datasources::fasta::{FASTAReadOptions, FASTAReadOptionsBuilder};
use crate::datasources::fasta::FASTAReadOptions;
use crate::datasources::fastq::FASTQReadOptions;
use crate::datasources::hmm_dom_tab::HMMDomTabReadOptions;
use crate::datasources::mzml::MzMLReadOptions;
use crate::error;
use crate::execution_result::ExecutionResult;
use crate::file_options::FileOptions;
use crate::runtime::wait_for_future;

#[pyclass]
Expand Down Expand Up @@ -217,13 +218,9 @@ impl BioBearSessionContext {
options: Option<FASTAReadOptions>,
py: Python,
) -> PyResult<ExecutionResult> {
let options = if let Some(options) = options {
let fasta_read_options_builder = FASTAReadOptionsBuilder::from_path(file_path);

fasta_read_options_builder.merge(options).build()
} else {
FASTAReadOptionsBuilder::from_path(file_path).build()
};
let file_options = FileOptions::from(file_path);
let mut options = options.unwrap_or_default();
options.update_from_file_options(&file_options)?;

let result = self.ctx.read_fasta(file_path, options.into());
let df = wait_for_future(py, result).map_err(error::BioBearError::from)?;
Expand Down

0 comments on commit 1ff3502

Please sign in to comment.