Skip to content

Commit

Permalink
Merge pull request #3 from mortonjt/monte_carlo_tensor
Browse files Browse the repository at this point in the history
Monte carlo tensor
  • Loading branch information
mortonjt authored May 24, 2021
2 parents 623c9aa + f378386 commit 060acfe
Show file tree
Hide file tree
Showing 10 changed files with 316 additions and 10 deletions.
1 change: 1 addition & 0 deletions ci/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ requirements:
- biom-format >=2.1.5,<2.2.0
- ijson
- h5py
- arviz
- qiime2 {{ release }}.*

test:
Expand Down
7 changes: 5 additions & 2 deletions q2_types/feature_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
AlignedProteinSequencesDirectoryFormat, ProteinSequencesDirectoryFormat,
ProteinFASTAFormat, AlignedProteinFASTAFormat, RNASequencesDirectoryFormat,
RNAFASTAFormat, AlignedRNAFASTAFormat, AlignedRNASequencesDirectoryFormat,
PairedRNASequencesDirectoryFormat)
PairedRNASequencesDirectoryFormat,
MonteCarloTensorFormat, MonteCarloTensorDirectoryFormat)
from ._type import (
FeatureData, Taxonomy, Sequence, PairedEndSequence, AlignedSequence,
Differential, ProteinSequence, AlignedProteinSequence, RNASequence,
AlignedRNASequence, PairedEndRNASequence)
AlignedRNASequence, PairedEndRNASequence, MonteCarloTensor)

# TODO remove these imports when tests are rewritten. Remove from __all__ too
from ._transformer import (
Expand All @@ -37,6 +38,8 @@
'DNASequencesDirectoryFormat', 'PairedDNASequencesDirectoryFormat',
'AlignedDNAFASTAFormat', 'AlignedDNASequencesDirectoryFormat',
'FeatureData', 'Taxonomy', 'Sequence', 'PairedEndSequence',
'MonteCarloTensor', 'MonteCarloTensorFormat',
'MonteCarloTensorDirectoryFormat',
'AlignedSequence', 'NucleicAcidIterator', 'DNAIterator',
'PairedDNAIterator', 'FASTAFormat', 'AlignedDNAIterator', 'Differential',
'DifferentialDirectoryFormat', 'AlignedFASTAFormatMixin',
Expand Down
18 changes: 18 additions & 0 deletions q2_types/feature_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import qiime2.plugin.model as model
from qiime2.plugin import ValidationError
import qiime2
import arviz as az

from ..plugin_setup import plugin

Expand Down Expand Up @@ -335,6 +336,7 @@ def _construct_validator_from_alphabet(alphabet_str):

class DifferentialFormat(model.TextFileFormat):
def validate(self, *args):

try:
md = qiime2.Metadata.load(str(self))
except qiime2.metadata.MetadataFileError as md_exc:
Expand All @@ -352,6 +354,21 @@ def validate(self, *args):
'DifferentialDirectoryFormat', 'differentials.tsv', DifferentialFormat)


class MonteCarloTensorFormat(model.BinaryFileFormat):

def sniff(self):
try:
az.InferenceData.from_netcdf(str(self))
return True
except Exception:
return False


MonteCarloTensorDirectoryFormat = model.SingleFileDirectoryFormat(
'MonteCarloTensorDirectoryFormat', 'monte-carlo-samples.az',
MonteCarloTensorFormat)


class ProteinFASTAFormat(FASTAFormat):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -382,6 +399,7 @@ def __init__(self, *args, **kwargs):
TaxonomyFormat, TaxonomyDirectoryFormat, DNAFASTAFormat,
DNASequencesDirectoryFormat, PairedDNASequencesDirectoryFormat,
AlignedDNAFASTAFormat, AlignedDNASequencesDirectoryFormat,
MonteCarloTensorFormat, MonteCarloTensorDirectoryFormat,
DifferentialFormat, DifferentialDirectoryFormat, ProteinFASTAFormat,
AlignedProteinFASTAFormat, ProteinSequencesDirectoryFormat,
AlignedProteinSequencesDirectoryFormat, RNAFASTAFormat,
Expand Down
15 changes: 14 additions & 1 deletion q2_types/feature_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
import biom
import skbio
import qiime2

import arviz as az
from ..plugin_setup import plugin
from ..feature_table import BIOMV210Format
from . import (TaxonomyFormat, HeaderlessTSVTaxonomyFormat, TSVTaxonomyFormat,
DNAFASTAFormat, PairedDNASequencesDirectoryFormat,
MonteCarloTensorFormat,
AlignedDNAFASTAFormat, DifferentialFormat, ProteinFASTAFormat,
AlignedProteinFASTAFormat, RNAFASTAFormat,
AlignedRNAFASTAFormat, PairedRNASequencesDirectoryFormat
Expand Down Expand Up @@ -644,3 +645,15 @@ def _224(data: pd.DataFrame) -> DifferentialFormat:
ff = DifferentialFormat()
qiime2.Metadata(data).save(str(ff))
return ff


@plugin.register_transformer
def _225(ff: MonteCarloTensorFormat) -> az.InferenceData:
return az.InferenceData.from_netcdf(str(ff))


@plugin.register_transformer
def _226(obj: az.InferenceData) -> MonteCarloTensorFormat:
ff = MonteCarloTensorFormat()
obj.to_netcdf(str(ff))
return ff
9 changes: 8 additions & 1 deletion q2_types/feature_data/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from . import (TSVTaxonomyDirectoryFormat, DNASequencesDirectoryFormat,
PairedDNASequencesDirectoryFormat,
AlignedDNASequencesDirectoryFormat,
MonteCarloTensorDirectoryFormat,
DifferentialDirectoryFormat, ProteinSequencesDirectoryFormat,
AlignedProteinSequencesDirectoryFormat,
RNASequencesDirectoryFormat, AlignedRNASequencesDirectoryFormat,
Expand All @@ -35,6 +36,9 @@
AlignedSequence = SemanticType('AlignedSequence',
variant_of=FeatureData.field['type'])

MonteCarloTensor = SemanticType('MonteCarloTensor')


AlignedRNASequence = SemanticType('AlignedRNASequence',
variant_of=FeatureData.field['type'])

Expand All @@ -49,7 +53,8 @@

plugin.register_semantic_types(FeatureData, Taxonomy, Sequence,
PairedEndSequence, AlignedSequence,
Differential, ProteinSequence,
Differential, MonteCarloTensor,
ProteinSequence,
AlignedProteinSequence, RNASequence,
AlignedRNASequence, PairedEndRNASequence)

Expand All @@ -72,6 +77,8 @@
plugin.register_semantic_type_to_format(
FeatureData[AlignedSequence],
artifact_format=AlignedDNASequencesDirectoryFormat)
plugin.register_semantic_type_to_format(
MonteCarloTensor, MonteCarloTensorDirectoryFormat)
plugin.register_semantic_type_to_format(
FeatureData[AlignedRNASequence],
artifact_format=AlignedRNASequencesDirectoryFormat)
Expand Down
Binary file not shown.
201 changes: 201 additions & 0 deletions q2_types/feature_data/tests/data/nan_differential.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
featureid effect
F0
F1 1.01418002973925
F2 1.02456128258909
F3 -0.74363992043225
F4 1.29823896534823
F5 -1.12965055281585
F6 -0.522401797448688
F7 0.327560711072239
F8 -1.3738693465664802
F9 -0.7847891526325621
F10 -0.280063201878434
F11 -0.251269847578052
F12 1.24602780723028
F13 0.665734866338239
F14 -0.889042985114811
F15 -0.811956802515126
F16 0.11591764582945
F17 -0.156195990858492
F18 -0.965770633683909
F19 0.8035240337800391
F20 0.680306950765235
F21 -0.688388077896823
F22 0.7949653982850671
F23 -1.11912925367142
F24 -1.10593563527746
F25 0.678934047810573
F26 -0.937189288219405
F27 -0.99973019311648
F28 -0.31799033232181
F29 -0.66141741897569
F30 0.550511528315366
F31 -0.9325448302932858
F32 -0.34253881283785104
F33 1.52951044529528
F34 -0.0322228434081009
F35 -1.3936520437793
F36 0.8687222580720692
F37 0.6823592821191031
F38 1.37480393072457
F39 0.239227247696375
F40 -0.95692625177732
F41 1.00830368417208
F42 0.8288761564573699
F43 -1.3476440812435702
F44 0.529025296092455
F45 -0.340145172805159
F46 0.964756233144604
F47 -0.7310281367702399
F48 0.5383478597420129
F49 0.29953220850051193
F50 1.00798009601963
F51 -0.0132115389102393
F52 -0.278915712970544
F53 0.06277454002648
F54 0.3516746428630449
F55 0.137951639939762
F56 1.33461560518154
F57 0.43628478179779
F58 1.11453590606852
F59 1.29860068665808
F60 0.0955549779502926
F61 0.495998910273253
F62 -0.7833488766142149
F63 -0.18096581167667
F64 1.30235947734143
F65 0.280013344538779
F66 1.30046727244606
F67 -1.06463133806824
F68 -1.25938109004015
F69 0.264024004173156
F70 -1.2529907420368602
F71 1.4937353010847
F72 0.970455224810024
F73 -1.23960496737832
F74 -0.203565231196184
F75 -0.450539610910928
F76 0.411462370457529
F77 -0.309713881057648
F78 0.9140794729087808
F79 1.31441543849981
F80 -0.387324391709564
F81 -1.2144940378668605
F82 0.981754295455261
F83 0.0289330595561297
F84 1.24311989408419
F85 -0.24182271418096696
F86 0.76870209333065
F87 0.238515379111686
F88 -0.338651978123051
F89 -0.4482656135303739
F90 0.5279056466564099
F91 1.37807956999098
F92 1.04310859627148
F93 -0.117482004574065
F94 0.6232413973258649
F95 0.00403130502610116
F96 -1.34598938813891
F97 0.4549706581338591
F98 1.0192013773894302
F99 -0.291756302796704
F100 -1.12688813864611
F101 0.557769384655089
F102 1.1975908860910902
F103 1.15355341444875
F104 0.946531581472272
F105 1.22600261259885
F106 1.0261210015913198
F107 0.841857769084618
F108 -0.4019324938447151
F109 0.19764363305971105
F110 -0.149480290579409
F111 0.6408321521409511
F112 -0.966639287078902
F113 1.16738708670172
F114 0.174326207230448
F115 -0.5695094365752371
F116 1.20806940422198
F117 -1.24543884840423
F118 -1.15412531725971
F119 -0.8296471214736691
F120 0.736529671228746
F121 -0.660047869199522
F122 0.904640201633404
F123 -0.5986532323341109
F124 0.691079988752513
F125 -0.562592354003712
F126 1.01123956644237
F127 -0.41423096856367103
F128 0.619238453979786
F129 -0.620591765205541
F130 -1.27255930040442
F131 -0.3512359129578449
F132 1.00467461529444
F133 -1.13541106483259
F134 -1.26161085240342
F135 -0.38659519790583097
F136 0.872749204870088
F137 -0.810751128748022
F138 -0.8197848282048691
F139 0.20047162644758895
F140 -0.604549610882948
F141 0.228624680177828
F142 1.28609633871252
F143 -0.1679362832488
F144 -0.257647674436839
F145 0.74101019223122
F146 0.132687409665163
F147 -1.09538523785539
F148 -0.4520741143592221
F149 -1.1796888761929298
F150 1.07271995122987
F151 -0.219846098067332
F152 0.529493500001223
F153 0.6270798132806121
F154 -1.01527420105315
F155 0.746112185723826
F156 -0.686265054564927
F157 -1.3326095821448702
F158 -0.3777879682136721
F159 -1.13401841717297
F160 -0.296439244074386
F161 0.40847742352928296
F162 1.27982997837018
F163 -0.7134413622708241
F164 -0.349087095579928
F165 0.6163069356830501
F166 0.217564847263855
F167 -0.242902200286938
F168 -1.01430232660254
F169 -0.919646912875392
F170 -0.19937212012607394
F171 -1.31129333739411
F172 -0.721872176785536
F173 -1.12678232313294
F174 1.0904935742820798
F175 -1.17644925652159
F176 1.25195089868697
F177 -0.5994841508457149
F178 -1.12642775074296
F179 -0.927361070785748
F180 0.852558007271541
F181 0.740604316851619
F182 -0.504950438684657
F183 0.358858461465484
F184 -0.0652057793033983
F185 -0.210512467576898
F186 0.15407114587584306
F187 0.7501398636830259
F188 -1.38405071916205
F189 -1.3863784044176497
F190 0.35465073693029203
F191 -0.0527678764658988
F192 1.04452771030868
F193 -1.0105215476872
F194 -0.6098963926022061
F195 1.24019945405587
F196 -1.2486190178628
F197 0.28873133501175297
F198 -0.9444328759187892
F199 -0.308547237330663
34 changes: 33 additions & 1 deletion q2_types/feature_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
ProteinFASTAFormat, AlignedProteinFASTAFormat, FASTAFormat,
AlignedProteinSequencesDirectoryFormat, ProteinSequencesDirectoryFormat,
RNAFASTAFormat, RNASequencesDirectoryFormat, AlignedRNAFASTAFormat,
AlignedRNASequencesDirectoryFormat
AlignedRNASequencesDirectoryFormat,
MonteCarloTensorDirectoryFormat
)
from qiime2.plugin.testing import TestPluginBase
from qiime2.plugin import ValidationError
import arviz as az
import numpy as np


class TestTaxonomyFormats(TestPluginBase):
Expand Down Expand Up @@ -538,5 +541,34 @@ def test_aligned_protein_sequences_directory_format(self):
format.validate()


class TestMonteCarloTensorFormat(TestPluginBase):

package = 'q2_types.feature_data.tests'

def test_monte_carlo_format(self):
# Note that this file is empty, we are using it as a
# placeholder to place the az Inference object
filepath = self.get_data_path('monte-carlo-samples.az')

size = 100
dataset = az.convert_to_inference_data(np.random.randn(size))
dataset.to_netcdf(filepath)

temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'monte-carlo-samples.az'))
format = MonteCarloTensorDirectoryFormat(temp_dir, mode='r')
format.validate()

def test_monte_carlo_format_bad(self):
filepath = self.get_data_path('nan_differential.tsv')
temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'nan_differential.tsv'))
format = MonteCarloTensorDirectoryFormat(temp_dir, mode='r')
with self.assertRaisesRegex(ValidationError, 'MonteCarloTensor'):
format.validate()


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 060acfe

Please sign in to comment.