-
Notifications
You must be signed in to change notification settings - Fork 80
/
Copy pathstudy_samples.py
258 lines (202 loc) · 8.41 KB
/
study_samples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from collections import defaultdict
import io
from qiita_db.metadata_template.util import load_template_to_dataframe
from tornado.escape import json_encode, json_decode
import pandas as pd
from qiita_db.handlers.oauth2 import authenticate_oauth
from .rest_handler import RESTHandler
def _sample_details(study, samples):
def detail_maker(**kwargs):
base = {'sample_id': None,
'sample_found': False,
'ebi_sample_accession': None,
'preparation_id': None,
'ebi_experiment_accession': None,
'preparation_visibility': None,
'preparation_type': None}
assert set(kwargs).issubset(set(base)), "Unexpected key to set"
base.update(kwargs)
return base
# cache sample detail for lookup
study_samples = set(study.sample_template)
sample_accessions = study.sample_template.ebi_sample_accessions
# cache preparation information that we'll need
# map of {sample_id: [indices, of, light, prep, info, ...]}
sample_prep_mapping = defaultdict(list)
pt_light = []
offset = 0
incoming_samples = set(samples)
for pt in study.prep_templates():
prep_samples = set(pt)
overlap = incoming_samples & prep_samples
if overlap:
# cache if any of or query samples are present on the prep
# reduce accessions to only samples of interest
accessions = pt.ebi_experiment_accessions
overlap_accessions = {i: accessions[i] for i in overlap}
# store the detail we need
pt_light.append((pt.id, overlap_accessions,
pt.status, pt.data_type()))
# only care about mapping the incoming samples
for ptsample in overlap:
sample_prep_mapping[ptsample].append(offset)
offset += 1
details = []
for sample in samples:
if sample in study_samples:
# if the sample exists
sample_acc = sample_accessions.get(sample)
if sample in sample_prep_mapping:
# if the sample is present in any prep, pull out the detail
# specific to those preparations
for pt_idx in sample_prep_mapping[sample]:
ptid, ptacc, ptstatus, ptdtype = pt_light[pt_idx]
details.append(detail_maker(
sample_id=sample,
sample_found=True,
ebi_sample_accession=sample_acc,
preparation_id=ptid,
ebi_experiment_accession=ptacc.get(sample),
preparation_visibility=ptstatus,
preparation_type=ptdtype))
else:
# the sample is not present on any preparations
details.append(detail_maker(
sample_id=sample,
sample_found=True,
# it would be weird to have an EBI sample accession
# but not be present on a preparation...?
ebi_sample_accession=sample_acc))
else:
# the is not present, let's note and move ona
details.append(detail_maker(sample_id=sample))
return details
class StudySampleDetailHandler(RESTHandler):
@authenticate_oauth
def get(self, study_id, sample_id):
study = self.safe_get_study(study_id)
sample_detail = _sample_details(study, [sample_id, ])
self.write(json_encode(sample_detail))
self.finish()
class StudySamplesDetailHandler(RESTHandler):
@authenticate_oauth
def post(self, study_id):
samples = json_decode(self.request.body)
if 'sample_ids' not in samples:
self.fail('Missing sample_id key', 400)
return
study = self.safe_get_study(study_id)
samples_detail = _sample_details(study, samples['sample_ids'])
self.write(json_encode(samples_detail))
self.finish()
class StudySamplesHandler(RESTHandler):
@authenticate_oauth
def get(self, study_id):
study = self.safe_get_study(study_id)
if study is None:
return
if study.sample_template is None:
samples = []
else:
samples = list(study.sample_template.keys())
self.write(json_encode(samples))
self.finish()
@authenticate_oauth
def patch(self, study_id):
study = self.safe_get_study(study_id)
if study is None:
return
if study.sample_template is None:
self.fail('No sample information found', 404)
return
else:
sample_info = study.sample_template.to_dataframe()
# convert from json into a format that qiita can validate
rawdata = pd.DataFrame.from_dict(json_decode(self.request.body),
orient='index')
rawdata.index.name = 'sample_name'
if len(rawdata.index) == 0:
self.fail('No samples provided', 400)
return
buffer = io.StringIO()
rawdata.to_csv(buffer, sep='\t', index=True, header=True)
buffer.seek(0)
# validate on load
data = load_template_to_dataframe(buffer)
categories = set(study.sample_template.categories)
# issuperset() will return True for true supersets or exact matches.
# In either case, keep processing. Subsets of categories remain
# invalid, however.
if not set(data.columns).issuperset(categories):
self.fail('Not all sample information categories provided',
400)
return
existing_samples = set(sample_info.index)
overlapping_ids = set(data.index).intersection(existing_samples)
new_ids = list(set(data.index) - existing_samples)
status = 500
# warnings generated are not currently caught
# see https://github.com/biocore/qiita/issues/2096
# processing new_ids first allows us to extend the sample_template
# w/new columns before calling update(). update() will return an
# error if unexpected columns are found.
if new_ids:
to_extend = data.loc[new_ids]
study.sample_template.extend(to_extend)
status = 201
if overlapping_ids:
to_update = data.loc[list(overlapping_ids)]
study.sample_template.update(to_update)
if status == 500:
# don't overwrite a possible status = 201
status = 200
self.set_status(status)
self.finish()
class StudySamplesCategoriesHandler(RESTHandler):
@authenticate_oauth
def get(self, study_id, categories):
if not categories:
self.fail('No categories specified', 405)
return
study = self.safe_get_study(study_id)
if study is None:
return
categories = categories.split(',')
if study.sample_template is None:
self.fail('Study does not have sample information', 404)
return
available_categories = set(study.sample_template.categories)
not_found = set(categories) - available_categories
if not_found:
self.fail('Category not found', 404,
categories_not_found=sorted(not_found))
return
blob = {'header': categories,
'samples': {}}
df = study.sample_template.to_dataframe()
for idx, row in df[categories].iterrows():
blob['samples'][idx] = list(row)
self.write(json_encode(blob))
self.finish()
class StudySamplesInfoHandler(RESTHandler):
@authenticate_oauth
def get(self, study_id):
study = self.safe_get_study(study_id)
if study is None:
return
st = study.sample_template
if st is None:
info = {'number-of-samples': 0,
'categories': []}
else:
info = {'number-of-samples': len(st),
'categories': st.categories}
self.write(json_encode(info))
self.finish()