Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Monte carlo tensor #3

Merged
merged 18 commits into from
May 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ requirements:
- biom-format >=2.1.5,<2.2.0
- ijson
- h5py
- arviz
- qiime2 {{ release }}.*

test:
Expand Down
7 changes: 5 additions & 2 deletions q2_types/feature_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
AlignedProteinSequencesDirectoryFormat, ProteinSequencesDirectoryFormat,
ProteinFASTAFormat, AlignedProteinFASTAFormat, RNASequencesDirectoryFormat,
RNAFASTAFormat, AlignedRNAFASTAFormat, AlignedRNASequencesDirectoryFormat,
PairedRNASequencesDirectoryFormat)
PairedRNASequencesDirectoryFormat,
MonteCarloTensorFormat, MonteCarloTensorDirectoryFormat)
from ._type import (
FeatureData, Taxonomy, Sequence, PairedEndSequence, AlignedSequence,
Differential, ProteinSequence, AlignedProteinSequence, RNASequence,
AlignedRNASequence, PairedEndRNASequence)
AlignedRNASequence, PairedEndRNASequence, MonteCarloTensor)

# TODO remove these imports when tests are rewritten. Remove from __all__ too
from ._transformer import (
Expand All @@ -37,6 +38,8 @@
'DNASequencesDirectoryFormat', 'PairedDNASequencesDirectoryFormat',
'AlignedDNAFASTAFormat', 'AlignedDNASequencesDirectoryFormat',
'FeatureData', 'Taxonomy', 'Sequence', 'PairedEndSequence',
'MonteCarloTensor', 'MonteCarloTensorFormat',
'MonteCarloTensorDirectoryFormat',
'AlignedSequence', 'NucleicAcidIterator', 'DNAIterator',
'PairedDNAIterator', 'FASTAFormat', 'AlignedDNAIterator', 'Differential',
'DifferentialDirectoryFormat', 'AlignedFASTAFormatMixin',
Expand Down
18 changes: 18 additions & 0 deletions q2_types/feature_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import qiime2.plugin.model as model
from qiime2.plugin import ValidationError
import qiime2
import arviz as az

from ..plugin_setup import plugin

Expand Down Expand Up @@ -335,6 +336,7 @@ def _construct_validator_from_alphabet(alphabet_str):

class DifferentialFormat(model.TextFileFormat):
def validate(self, *args):

try:
md = qiime2.Metadata.load(str(self))
except qiime2.metadata.MetadataFileError as md_exc:
Expand All @@ -352,6 +354,21 @@ def validate(self, *args):
'DifferentialDirectoryFormat', 'differentials.tsv', DifferentialFormat)


class MonteCarloTensorFormat(model.BinaryFileFormat):

def sniff(self):
try:
az.InferenceData.from_netcdf(str(self))
return True
except Exception:
return False


MonteCarloTensorDirectoryFormat = model.SingleFileDirectoryFormat(
'MonteCarloTensorDirectoryFormat', 'monte-carlo-samples.az',
MonteCarloTensorFormat)


class ProteinFASTAFormat(FASTAFormat):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -382,6 +399,7 @@ def __init__(self, *args, **kwargs):
TaxonomyFormat, TaxonomyDirectoryFormat, DNAFASTAFormat,
DNASequencesDirectoryFormat, PairedDNASequencesDirectoryFormat,
AlignedDNAFASTAFormat, AlignedDNASequencesDirectoryFormat,
MonteCarloTensorFormat, MonteCarloTensorDirectoryFormat,
DifferentialFormat, DifferentialDirectoryFormat, ProteinFASTAFormat,
AlignedProteinFASTAFormat, ProteinSequencesDirectoryFormat,
AlignedProteinSequencesDirectoryFormat, RNAFASTAFormat,
Expand Down
15 changes: 14 additions & 1 deletion q2_types/feature_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
import biom
import skbio
import qiime2

import arviz as az
from ..plugin_setup import plugin
from ..feature_table import BIOMV210Format
from . import (TaxonomyFormat, HeaderlessTSVTaxonomyFormat, TSVTaxonomyFormat,
DNAFASTAFormat, PairedDNASequencesDirectoryFormat,
MonteCarloTensorFormat,
AlignedDNAFASTAFormat, DifferentialFormat, ProteinFASTAFormat,
AlignedProteinFASTAFormat, RNAFASTAFormat,
AlignedRNAFASTAFormat, PairedRNASequencesDirectoryFormat
Expand Down Expand Up @@ -644,3 +645,15 @@ def _224(data: pd.DataFrame) -> DifferentialFormat:
ff = DifferentialFormat()
qiime2.Metadata(data).save(str(ff))
return ff


@plugin.register_transformer
def _225(ff: MonteCarloTensorFormat) -> az.InferenceData:
return az.InferenceData.from_netcdf(str(ff))


@plugin.register_transformer
def _226(obj: az.InferenceData) -> MonteCarloTensorFormat:
ff = MonteCarloTensorFormat()
obj.to_netcdf(str(ff))
return ff
9 changes: 8 additions & 1 deletion q2_types/feature_data/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from . import (TSVTaxonomyDirectoryFormat, DNASequencesDirectoryFormat,
PairedDNASequencesDirectoryFormat,
AlignedDNASequencesDirectoryFormat,
MonteCarloTensorDirectoryFormat,
DifferentialDirectoryFormat, ProteinSequencesDirectoryFormat,
AlignedProteinSequencesDirectoryFormat,
RNASequencesDirectoryFormat, AlignedRNASequencesDirectoryFormat,
Expand All @@ -35,6 +36,9 @@
AlignedSequence = SemanticType('AlignedSequence',
variant_of=FeatureData.field['type'])

MonteCarloTensor = SemanticType('MonteCarloTensor')


AlignedRNASequence = SemanticType('AlignedRNASequence',
variant_of=FeatureData.field['type'])

Expand All @@ -49,7 +53,8 @@

plugin.register_semantic_types(FeatureData, Taxonomy, Sequence,
PairedEndSequence, AlignedSequence,
Differential, ProteinSequence,
Differential, MonteCarloTensor,
ProteinSequence,
AlignedProteinSequence, RNASequence,
AlignedRNASequence, PairedEndRNASequence)

Expand All @@ -72,6 +77,8 @@
plugin.register_semantic_type_to_format(
FeatureData[AlignedSequence],
artifact_format=AlignedDNASequencesDirectoryFormat)
plugin.register_semantic_type_to_format(
MonteCarloTensor, MonteCarloTensorDirectoryFormat)
plugin.register_semantic_type_to_format(
FeatureData[AlignedRNASequence],
artifact_format=AlignedRNASequencesDirectoryFormat)
Expand Down
Binary file not shown.
201 changes: 201 additions & 0 deletions q2_types/feature_data/tests/data/nan_differential.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
featureid effect
F0
F1 1.01418002973925
F2 1.02456128258909
F3 -0.74363992043225
F4 1.29823896534823
F5 -1.12965055281585
F6 -0.522401797448688
F7 0.327560711072239
F8 -1.3738693465664802
F9 -0.7847891526325621
F10 -0.280063201878434
F11 -0.251269847578052
F12 1.24602780723028
F13 0.665734866338239
F14 -0.889042985114811
F15 -0.811956802515126
F16 0.11591764582945
F17 -0.156195990858492
F18 -0.965770633683909
F19 0.8035240337800391
F20 0.680306950765235
F21 -0.688388077896823
F22 0.7949653982850671
F23 -1.11912925367142
F24 -1.10593563527746
F25 0.678934047810573
F26 -0.937189288219405
F27 -0.99973019311648
F28 -0.31799033232181
F29 -0.66141741897569
F30 0.550511528315366
F31 -0.9325448302932858
F32 -0.34253881283785104
F33 1.52951044529528
F34 -0.0322228434081009
F35 -1.3936520437793
F36 0.8687222580720692
F37 0.6823592821191031
F38 1.37480393072457
F39 0.239227247696375
F40 -0.95692625177732
F41 1.00830368417208
F42 0.8288761564573699
F43 -1.3476440812435702
F44 0.529025296092455
F45 -0.340145172805159
F46 0.964756233144604
F47 -0.7310281367702399
F48 0.5383478597420129
F49 0.29953220850051193
F50 1.00798009601963
F51 -0.0132115389102393
F52 -0.278915712970544
F53 0.06277454002648
F54 0.3516746428630449
F55 0.137951639939762
F56 1.33461560518154
F57 0.43628478179779
F58 1.11453590606852
F59 1.29860068665808
F60 0.0955549779502926
F61 0.495998910273253
F62 -0.7833488766142149
F63 -0.18096581167667
F64 1.30235947734143
F65 0.280013344538779
F66 1.30046727244606
F67 -1.06463133806824
F68 -1.25938109004015
F69 0.264024004173156
F70 -1.2529907420368602
F71 1.4937353010847
F72 0.970455224810024
F73 -1.23960496737832
F74 -0.203565231196184
F75 -0.450539610910928
F76 0.411462370457529
F77 -0.309713881057648
F78 0.9140794729087808
F79 1.31441543849981
F80 -0.387324391709564
F81 -1.2144940378668605
F82 0.981754295455261
F83 0.0289330595561297
F84 1.24311989408419
F85 -0.24182271418096696
F86 0.76870209333065
F87 0.238515379111686
F88 -0.338651978123051
F89 -0.4482656135303739
F90 0.5279056466564099
F91 1.37807956999098
F92 1.04310859627148
F93 -0.117482004574065
F94 0.6232413973258649
F95 0.00403130502610116
F96 -1.34598938813891
F97 0.4549706581338591
F98 1.0192013773894302
F99 -0.291756302796704
F100 -1.12688813864611
F101 0.557769384655089
F102 1.1975908860910902
F103 1.15355341444875
F104 0.946531581472272
F105 1.22600261259885
F106 1.0261210015913198
F107 0.841857769084618
F108 -0.4019324938447151
F109 0.19764363305971105
F110 -0.149480290579409
F111 0.6408321521409511
F112 -0.966639287078902
F113 1.16738708670172
F114 0.174326207230448
F115 -0.5695094365752371
F116 1.20806940422198
F117 -1.24543884840423
F118 -1.15412531725971
F119 -0.8296471214736691
F120 0.736529671228746
F121 -0.660047869199522
F122 0.904640201633404
F123 -0.5986532323341109
F124 0.691079988752513
F125 -0.562592354003712
F126 1.01123956644237
F127 -0.41423096856367103
F128 0.619238453979786
F129 -0.620591765205541
F130 -1.27255930040442
F131 -0.3512359129578449
F132 1.00467461529444
F133 -1.13541106483259
F134 -1.26161085240342
F135 -0.38659519790583097
F136 0.872749204870088
F137 -0.810751128748022
F138 -0.8197848282048691
F139 0.20047162644758895
F140 -0.604549610882948
F141 0.228624680177828
F142 1.28609633871252
F143 -0.1679362832488
F144 -0.257647674436839
F145 0.74101019223122
F146 0.132687409665163
F147 -1.09538523785539
F148 -0.4520741143592221
F149 -1.1796888761929298
F150 1.07271995122987
F151 -0.219846098067332
F152 0.529493500001223
F153 0.6270798132806121
F154 -1.01527420105315
F155 0.746112185723826
F156 -0.686265054564927
F157 -1.3326095821448702
F158 -0.3777879682136721
F159 -1.13401841717297
F160 -0.296439244074386
F161 0.40847742352928296
F162 1.27982997837018
F163 -0.7134413622708241
F164 -0.349087095579928
F165 0.6163069356830501
F166 0.217564847263855
F167 -0.242902200286938
F168 -1.01430232660254
F169 -0.919646912875392
F170 -0.19937212012607394
F171 -1.31129333739411
F172 -0.721872176785536
F173 -1.12678232313294
F174 1.0904935742820798
F175 -1.17644925652159
F176 1.25195089868697
F177 -0.5994841508457149
F178 -1.12642775074296
F179 -0.927361070785748
F180 0.852558007271541
F181 0.740604316851619
F182 -0.504950438684657
F183 0.358858461465484
F184 -0.0652057793033983
F185 -0.210512467576898
F186 0.15407114587584306
F187 0.7501398636830259
F188 -1.38405071916205
F189 -1.3863784044176497
F190 0.35465073693029203
F191 -0.0527678764658988
F192 1.04452771030868
F193 -1.0105215476872
F194 -0.6098963926022061
F195 1.24019945405587
F196 -1.2486190178628
F197 0.28873133501175297
F198 -0.9444328759187892
F199 -0.308547237330663
34 changes: 33 additions & 1 deletion q2_types/feature_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
ProteinFASTAFormat, AlignedProteinFASTAFormat, FASTAFormat,
AlignedProteinSequencesDirectoryFormat, ProteinSequencesDirectoryFormat,
RNAFASTAFormat, RNASequencesDirectoryFormat, AlignedRNAFASTAFormat,
AlignedRNASequencesDirectoryFormat
AlignedRNASequencesDirectoryFormat,
MonteCarloTensorDirectoryFormat
)
from qiime2.plugin.testing import TestPluginBase
from qiime2.plugin import ValidationError
import arviz as az
import numpy as np


class TestTaxonomyFormats(TestPluginBase):
Expand Down Expand Up @@ -538,5 +541,34 @@ def test_aligned_protein_sequences_directory_format(self):
format.validate()


class TestMonteCarloTensorFormat(TestPluginBase):

package = 'q2_types.feature_data.tests'

def test_monte_carlo_format(self):
# Note that this file is empty, we are using it as a
# placeholder to place the az Inference object
filepath = self.get_data_path('monte-carlo-samples.az')

size = 100
dataset = az.convert_to_inference_data(np.random.randn(size))
dataset.to_netcdf(filepath)

temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'monte-carlo-samples.az'))
format = MonteCarloTensorDirectoryFormat(temp_dir, mode='r')
format.validate()

def test_monte_carlo_format_bad(self):
filepath = self.get_data_path('nan_differential.tsv')
temp_dir = self.temp_dir.name
shutil.copy(filepath,
os.path.join(temp_dir, 'nan_differential.tsv'))
format = MonteCarloTensorDirectoryFormat(temp_dir, mode='r')
with self.assertRaisesRegex(ValidationError, 'MonteCarloTensor'):
format.validate()


if __name__ == '__main__':
unittest.main()
Loading