Skip to content

Commit

Permalink
feat: finish transition to file options (#166)
Browse files Browse the repository at this point in the history
* refactor: fill out file inference
* feat: finish transition to file options
  • Loading branch information
tshauck authored Aug 1, 2024
1 parent 7efabf0 commit bb20528
Show file tree
Hide file tree
Showing 13 changed files with 139 additions and 111 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ name = "biobear"
[dependencies]
arrow = { version = "52.1.0", features = ["pyarrow"] }
datafusion = "40"
exon = { version = "0.29.1", features = ["default"] }
exon = { version = "0.30.0", features = ["default"] }
pyo3 = "0.21.2"
tokio = { version = "1", features = ["rt"] }
noodles = { version = "0.78", features = ["core"] }
Expand Down
26 changes: 3 additions & 23 deletions src/datasources/bed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
use exon::datasources::bed::table_provider::ListingBEDTableOptions;
use pyo3::{pyclass, pymethods};

use crate::{error::BioBearResult, file_options::FileOptions, FileCompressionType};
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone, Default)]
Expand All @@ -31,6 +31,8 @@ pub struct BEDReadOptions {
file_extension: Option<String>,
}

impl_settable_from_file_options!(BEDReadOptions);

#[pymethods]
impl BEDReadOptions {
#[new]
Expand All @@ -48,28 +50,6 @@ impl BEDReadOptions {
}
}

impl BEDReadOptions {
pub(crate) fn update_from_file_options(
&mut self,
file_options: &FileOptions,
) -> BioBearResult<()> {
if let Some(file_extension) = file_options.file_extension() {
if self.file_extension.is_none() {
self.file_extension = Some(file_extension.to_string());
}
}

if let Some(file_compression_type) = file_options.file_compression_type() {
if self.file_compression_type.is_none() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
}
}

Ok(())
}
}

impl From<BEDReadOptions> for ListingBEDTableOptions {
fn from(options: BEDReadOptions) -> Self {
let file_compression_type = options
Expand Down
24 changes: 2 additions & 22 deletions src/datasources/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

use crate::{
error::BioBearResult, file_compression_type::FileCompressionType, file_options::FileOptions,
file_compression_type::FileCompressionType, file_options::impl_settable_from_file_options,
};
use exon::datasources::fasta::{table_provider::ListingFASTATableOptions, SequenceDataType};
use pyo3::{pyclass, pymethods};
Expand Down Expand Up @@ -107,27 +107,7 @@ impl FASTAReadOptions {
}
}

impl FASTAReadOptions {
pub(crate) fn update_from_file_options(
&mut self,
file_options: &FileOptions,
) -> BioBearResult<()> {
if let Some(file_extension) = file_options.file_extension() {
if self.file_extension.is_none() {
self.file_extension = Some(file_extension.to_string());
}
}

if let Some(file_compression_type) = file_options.file_compression_type() {
if self.file_compression_type.is_none() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
}
}

Ok(())
}
}
impl_settable_from_file_options!(FASTAReadOptions);

impl From<FASTAReadOptions> for ListingFASTATableOptions {
fn from(options: FASTAReadOptions) -> Self {
Expand Down
22 changes: 3 additions & 19 deletions src/datasources/fastq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

use crate::{
error::BioBearResult, file_compression_type::FileCompressionType, file_options::FileOptions,
file_compression_type::FileCompressionType, file_options::impl_settable_from_file_options,
};
use exon::datasources::fastq::table_provider::ListingFASTQTableOptions;
use pyo3::{pyclass, pymethods};
Expand Down Expand Up @@ -51,6 +51,8 @@ pub struct FASTQReadOptions {
file_compression_type: Option<FileCompressionType>,
}

impl_settable_from_file_options!(FASTQReadOptions);

#[pymethods]
impl FASTQReadOptions {
#[new]
Expand Down Expand Up @@ -84,24 +86,6 @@ impl FASTQReadOptions {
}
}

impl FASTQReadOptions {
pub(crate) fn update_from_file_options(
&mut self,
file_options: &FileOptions,
) -> BioBearResult<()> {
if let Some(file_extension) = file_options.file_extension() {
self.file_extension = Some(file_extension.to_string());
}

if let Some(file_compression_type) = file_options.file_compression_type() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
}

Ok(())
}
}

impl From<FASTQReadOptions> for ListingFASTQTableOptions {
fn from(options: FASTQReadOptions) -> Self {
let file_compression_type = options
Expand Down
17 changes: 13 additions & 4 deletions src/datasources/genbank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,37 @@
use exon::datasources::genbank::table_provider::ListingGenbankTableOptions;
use pyo3::{pyclass, pymethods};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone, Default)]
/// Options for reading GenBank files.
pub struct GenBankReadOptions {
/// The file compression type.
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
/// The file extension.
file_extension: Option<String>,
}

impl_settable_from_file_options!(GenBankReadOptions);

#[pymethods]
impl GenBankReadOptions {
#[new]
fn new(file_compression_type: Option<FileCompressionType>) -> Self {
Self {
file_compression_type: file_compression_type.unwrap_or_default(),
file_compression_type,
file_extension: Some("gb".to_string()),
}
}
}

impl From<GenBankReadOptions> for ListingGenbankTableOptions {
fn from(options: GenBankReadOptions) -> Self {
ListingGenbankTableOptions::new(options.file_compression_type.into())
let c = options
.file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED);

ListingGenbankTableOptions::new(c.into())
}
}
11 changes: 8 additions & 3 deletions src/datasources/gff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ use exon::datasources::gff::table_provider::ListingGFFTableOptions;
use noodles::core::Region;
use pyo3::{pyclass, pymethods, PyResult};

use crate::{error::BioBearResult, file_options::FileOptions, FileCompressionType};
use crate::{
error::BioBearResult,
file_options::{impl_settable_from_file_options, FileOptions},
FileCompressionType,
};

use super::parse_region;

Expand All @@ -28,6 +32,8 @@ pub struct GFFReadOptions {
file_compression_type: Option<FileCompressionType>,
}

impl_settable_from_file_options!(GFFReadOptions);

#[pymethods]
impl GFFReadOptions {
#[new]
Expand Down Expand Up @@ -56,8 +62,7 @@ impl GFFReadOptions {

if let Some(file_compression_type) = options.file_compression_type() {
if self.file_compression_type.is_none() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
self.file_compression_type = Some(file_compression_type);
}
}

Expand Down
21 changes: 15 additions & 6 deletions src/datasources/gtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,44 @@
use exon::datasources::gtf::table_provider::ListingGTFTableOptions;
use pyo3::{pyclass, pymethods};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone)]
pub struct GTFReadOptions {
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
file_extension: Option<String>,
}

impl Default for GTFReadOptions {
fn default() -> Self {
Self {
file_compression_type: FileCompressionType::UNCOMPRESSED,
file_compression_type: Some(FileCompressionType::UNCOMPRESSED),
file_extension: None,
}
}
}

impl_settable_from_file_options!(GTFReadOptions);

#[pymethods]
impl GTFReadOptions {
#[new]
pub fn new(file_compression_type: Option<FileCompressionType>) -> Self {
Self {
file_compression_type: file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED),
file_compression_type,
file_extension: Some("gtf".to_string()),
}
}
}

impl From<GTFReadOptions> for ListingGTFTableOptions {
fn from(options: GTFReadOptions) -> Self {
ListingGTFTableOptions::new(options.file_compression_type.into())
ListingGTFTableOptions::new(
options
.file_compression_type
.map(|c| c.into())
.unwrap_or(datafusion::datasource::file_format::file_compression_type::FileCompressionType::UNCOMPRESSED),
)
}
}
30 changes: 24 additions & 6 deletions src/datasources/mzml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,23 @@
use exon::datasources::mzml::table_provider::ListingMzMLTableOptions;
use pyo3::{pyclass, pymethods};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone)]
/// Options for reading mzML files.
pub struct MzMLReadOptions {
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
file_extension: Option<String>,
}

impl_settable_from_file_options!(MzMLReadOptions);

impl Default for MzMLReadOptions {
fn default() -> Self {
Self {
file_compression_type: FileCompressionType::UNCOMPRESSED,
file_compression_type: Some(FileCompressionType::UNCOMPRESSED),
file_extension: None,
}
}
}
Expand All @@ -37,14 +41,28 @@ impl MzMLReadOptions {
#[new]
fn new(file_compression_type: Option<FileCompressionType>) -> Self {
Self {
file_compression_type: file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED),
file_compression_type: Some(
file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED),
),
file_extension: None,
}
}
}

impl From<MzMLReadOptions> for ListingMzMLTableOptions {
fn from(options: MzMLReadOptions) -> Self {
ListingMzMLTableOptions::new(options.file_compression_type.into())
let file_compression_type = options
.file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED);

let mut new_options = ListingMzMLTableOptions::new(file_compression_type.into());

// let file_extension = options.file_extension;
if let Some(fe) = options.file_extension {
eprintln!("Setting file extension to {}", fe);
new_options = new_options.with_file_extension(fe)
}

new_options
}
}
18 changes: 12 additions & 6 deletions src/datasources/vcf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use exon::datasources::vcf::ListingVCFTableOptions;
use noodles::core::Region;
use pyo3::{pyclass, pymethods, PyResult};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

use super::parse_region;

Expand All @@ -28,15 +28,19 @@ pub struct VCFReadOptions {
/// The region to read.
region: Option<Region>,
/// The file compression type.
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
/// True if the INFO column should be parsed.
parse_info: bool,
/// True if the FORMAT column should be parsed.
parse_formats: bool,
/// The partition fields.
partition_cols: Option<Vec<String>>,
/// The file extension.
file_extension: Option<String>,
}

impl_settable_from_file_options!(VCFReadOptions);

#[pymethods]
impl VCFReadOptions {
#[new]
Expand All @@ -50,22 +54,24 @@ impl VCFReadOptions {
) -> PyResult<Self> {
let region = parse_region(region)?;

let file_compression_type =
file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);

Ok(Self {
region,
file_compression_type,
parse_info,
parse_formats,
partition_cols,
file_extension: Some("vcf".to_string()),
})
}
}

impl From<VCFReadOptions> for ListingVCFTableOptions {
fn from(options: VCFReadOptions) -> Self {
let mut o = ListingVCFTableOptions::new(options.file_compression_type.into(), false)
let compression = options
.file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED);

let mut o = ListingVCFTableOptions::new(compression.into(), false)
.with_parse_info(options.parse_info)
.with_parse_formats(options.parse_formats);

Expand Down
2 changes: 1 addition & 1 deletion src/file_compression_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use pyo3::prelude::*;
use crate::error::BioBearError;

#[pyclass]
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub enum FileCompressionType {
GZIP,
ZSTD,
Expand Down
Loading

0 comments on commit bb20528

Please sign in to comment.