From 89de2968bcbe758c39975b238b59dee9b9863aa1 Mon Sep 17 00:00:00 2001 From: LTLA Date: Thu, 10 Oct 2024 17:58:41 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20SingleR-?= =?UTF-8?q?inc/singlepp@5c14a6c44ea0cd018f06aeaeb4820a575af6e400=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Intersection_8hpp_source.html | 70 +-- docs/index.html | 6 +- docs/namespacesinglepp.html | 7 +- docs/train__integrated_8hpp_source.html | 562 ++++++++++++------------ docs/train__single_8hpp_source.html | 66 +-- 5 files changed, 356 insertions(+), 355 deletions(-) diff --git a/docs/Intersection_8hpp_source.html b/docs/Intersection_8hpp_source.html index 95f9b49..60b56d3 100644 --- a/docs/Intersection_8hpp_source.html +++ b/docs/Intersection_8hpp_source.html @@ -103,44 +103,44 @@
11
17namespace singlepp {
18
-
30template<typename Index_ = DefaultIndex>
-
31using Intersection = std::vector<std::pair<Index_, Index_> >;
-
32
-
49template<typename Index_, typename Id_>
-
- -
51 std::unordered_map<Id_, Index_> ref_found;
-
52 for (Index_ i = 0; i < ref_nrow; ++i) {
-
53 auto current = ref_id[i];
-
54 auto tfIt = ref_found.find(current);
-
55 if (tfIt == ref_found.end()) { // only using the first occurrence of each ID in ref_id.
- -
57 }
-
58 }
-
59
- -
61 for (Index_ i = 0; i < test_nrow; ++i) {
-
62 auto current = test_id[i];
-
63 auto tfIt = ref_found.find(current);
-
64 if (tfIt != ref_found.end()) {
-
65 output.emplace_back(i, tfIt->second);
-
66 ref_found.erase(tfIt); // only using the first occurrence of each ID in test_id; the next will not enter this clause.
-
67 }
-
68 }
-
69
-
70 // This is implicitly sorted by the test indices... not that it really
-
71 // matters, as subset_to_markers() doesn't care that it's unsorted.
-
72 return output;
-
73}
+
34template<typename Index_ = DefaultIndex>
+
35using Intersection = std::vector<std::pair<Index_, Index_> >;
+
36
+
53template<typename Index_, typename Id_>
+
+ +
55 std::unordered_map<Id_, Index_> ref_found;
+
56 for (Index_ i = 0; i < ref_nrow; ++i) {
+
57 auto current = ref_id[i];
+
58 auto tfIt = ref_found.find(current);
+
59 if (tfIt == ref_found.end()) { // only using the first occurrence of each ID in ref_id.
+ +
61 }
+
62 }
+
63
+ +
65 for (Index_ i = 0; i < test_nrow; ++i) {
+
66 auto current = test_id[i];
+
67 auto tfIt = ref_found.find(current);
+
68 if (tfIt != ref_found.end()) {
+
69 output.emplace_back(i, tfIt->second);
+
70 ref_found.erase(tfIt); // only using the first occurrence of each ID in test_id; the next will not enter this clause.
+
71 }
+
72 }
+
73
+
74 // This is implicitly sorted by the test indices... not that it really
+
75 // matters, as subset_to_markers() doesn't care that it's unsorted.
+
76 return output;
+
77}
-
74
-
75}
-
76
-
77#endif
+
78
+
79}
+
80
+
81#endif
Common definitions for singlepp.
Cell type classification using the SingleR algorithm in C++.
Definition classify_single.hpp:19
-
Intersection< Index_ > intersect_genes(Index_ test_nrow, const Id_ *test_id, Index_ ref_nrow, const Id_ *ref_id)
Definition Intersection.hpp:50
-
std::vector< std::pair< Index_, Index_ > > Intersection
Definition Intersection.hpp:31
+
Intersection< Index_ > intersect_genes(Index_ test_nrow, const Id_ *test_id, Index_ ref_nrow, const Id_ *ref_id)
Definition Intersection.hpp:54
+
std::vector< std::pair< Index_, Index_ > > Intersection
Definition Intersection.hpp:35
std::vector< std::vector< std::vector< Index_ > > > Markers
Definition Markers.hpp:40
diff --git a/docs/index.html b/docs/index.html index 8ee153d..28d058f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -150,7 +150,7 @@

Intersecting feature sets

ref_markers,
train_opt
);
-
TrainedSingleIntersect< Index_, Float_ > train_single_intersect(const Intersection< Index_ > &intersection, const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, Markers< Index_ > markers, const TrainSingleOptions< Index_, Float_ > &options)
Definition train_single.hpp:309
+
TrainedSingleIntersect< Index_, Float_ > train_single_intersect(const Intersection< Index_ > &intersection, const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, Markers< Index_ > markers, const TrainSingleOptions< Index_, Float_ > &options)
Definition train_single.hpp:311

Then, classify_single_intersect() will perform classification using only the intersection of genes:

test_mat,
@@ -174,8 +174,8 @@

Integrating results across references

auto train_integrated = singlepp::train_integrated(inputs, ti_opt);
TrainIntegratedInput< Value_, Index_, Label_ > prepare_integrated_input(const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, const TrainedSingle< Index_, Float_ > &trained)
Definition train_integrated.hpp:73
-
TrainedIntegrated< Index_ > train_integrated(const std::vector< TrainIntegratedInput< Value_, Index_, Label_ > > &inputs, const TrainIntegratedOptions &options)
Definition train_integrated.hpp:451
-
Options for train_integrated().
Definition train_integrated.hpp:263
+
TrainedIntegrated< Index_ > train_integrated(const std::vector< TrainIntegratedInput< Value_, Index_, Label_ > > &inputs, const TrainIntegratedOptions &options)
Definition train_integrated.hpp:453
+
Options for train_integrated().
Definition train_integrated.hpp:265

And then we can finally run the scoring. For each cell in the test dataset, classify_integrated() picks the best label among the assignments from each individual reference.

auto ires = single.run(test_mat, train_integrated, ci_opt);
diff --git a/docs/namespacesinglepp.html b/docs/namespacesinglepp.html index f6006fa..e37dd19 100644 --- a/docs/namespacesinglepp.html +++ b/docs/namespacesinglepp.html @@ -299,8 +299,9 @@

-

Intersection of genes between two datasets. Each element is a pair of matching genes and contains the row indices of those genes in each dataset. A row index for either matrix should occur no more than once in this object.

+

Intersection of genes between two datasets. Each pair represents a gene that is present in both datasets. The two elements of the pair represent the row indices of that gene in the respective matrices.

Typically, the first element of the pair contains the row index of a gene in the test dataset, while the second element of the pair contains the row index of the same gene in the reference dataset. This convention is used by intersect_genes(), train_single_intersect() and prepare_integrated_input_intersect().

+

A row index for a matrix should occur no more than once in the Intersection object. That is, all the first elements should be unique and all of the second elements should be unique. Pairs may be arbitrarily ordered within the object.

Template Parameters
@@ -1010,7 +1011,7 @@

Parameters

Index_Integer type for the gene (row) indices.
- + @@ -1311,7 +1312,7 @@

Parameters

intersectionVector defining the intersection of genes between the test and reference datasets, see intersect_genes() for details.
intersectionVector defining the intersection of genes between the test and reference datasets. Each pair corresponds to a gene where the first and second elements represent the row indices of that gene in the test and reference matrices, respectively. See intersect_genes() for more details.
refMatrix containing the reference expression values, where rows are genes and columns are reference profiles. The number and identity of genes should be consistent with intersection.
[in]labelsAn array of length equal to the number of columns of ref, containing the label for each sample. Values should be integers in \([0, L)\) where \(L\) is the number of unique labels.
trainedClassifier created by calling train_single_intersect() on intersection, ref and labels.
- + diff --git a/docs/train__integrated_8hpp_source.html b/docs/train__integrated_8hpp_source.html index f920dcf..fc68ee2 100644 --- a/docs/train__integrated_8hpp_source.html +++ b/docs/train__integrated_8hpp_source.html @@ -163,304 +163,304 @@
106}
107
-
127template<typename Index_, typename Value_, typename Label_, typename Float_>
-
- - -
130 const tatami::Matrix<Value_, Index_>& ref,
-
131 const Label_* labels,
- -
133{
- -
135 output.ref = &ref;
-
136 output.labels = labels;
-
137
-
138 // Updating the markers so that they point to rows of the test matrix.
-
139 const auto& old_markers = trained.get_markers();
-
140 size_t nlabels = old_markers.size();
-
141 auto& new_markers = output.markers;
-
142 new_markers.resize(nlabels);
-
143
-
144 const auto& test_subset = trained.get_test_subset();
-
145 std::unordered_set<Index_> unified;
-
146
-
147 for (size_t i = 0; i < nlabels; ++i) {
-
148 const auto& cur_old_markers = old_markers[i];
-
149
-
150 unified.clear();
-
151 for (const auto& x : cur_old_markers) {
-
152 unified.insert(x.begin(), x.end());
-
153 }
-
154
- -
156 cur_new_markers.reserve(unified.size());
-
157 for (auto y : unified) {
-
158 cur_new_markers.push_back(test_subset[y]);
-
159 }
-
160 }
-
161
-
162 output.with_intersection = true;
-
163 output.user_intersection = &intersection;
-
164 return output;
-
165}
+
129template<typename Index_, typename Value_, typename Label_, typename Float_>
+
+ + +
132 const tatami::Matrix<Value_, Index_>& ref,
+
133 const Label_* labels,
+ +
135{
+ +
137 output.ref = &ref;
+
138 output.labels = labels;
+
139
+
140 // Updating the markers so that they point to rows of the test matrix.
+
141 const auto& old_markers = trained.get_markers();
+
142 size_t nlabels = old_markers.size();
+
143 auto& new_markers = output.markers;
+
144 new_markers.resize(nlabels);
+
145
+
146 const auto& test_subset = trained.get_test_subset();
+
147 std::unordered_set<Index_> unified;
+
148
+
149 for (size_t i = 0; i < nlabels; ++i) {
+
150 const auto& cur_old_markers = old_markers[i];
+
151
+
152 unified.clear();
+
153 for (const auto& x : cur_old_markers) {
+
154 unified.insert(x.begin(), x.end());
+
155 }
+
156
+ +
158 cur_new_markers.reserve(unified.size());
+
159 for (auto y : unified) {
+
160 cur_new_markers.push_back(test_subset[y]);
+
161 }
+
162 }
+
163
+
164 output.with_intersection = true;
+
165 output.user_intersection = &intersection;
+
166 return output;
+
167}
-
166
-
193template<typename Index_, typename Id_, typename Value_, typename Label_, typename Float_>
-
- - -
196 const Id_* test_id,
-
197 const tatami::Matrix<Value_, Index_>& ref,
-
198 const Id_* ref_id,
-
199 const Label_* labels,
- -
201{
- - -
204 output.user_intersection = NULL;
-
205 output.auto_intersection.swap(intersection);
-
206 return output;
-
207}
+
168
+
195template<typename Index_, typename Id_, typename Value_, typename Label_, typename Float_>
+
+ + +
198 const Id_* test_id,
+
199 const tatami::Matrix<Value_, Index_>& ref,
+
200 const Id_* ref_id,
+
201 const Label_* labels,
+ +
203{
+ + +
206 output.user_intersection = NULL;
+
207 output.auto_intersection.swap(intersection);
+
208 return output;
+
209}
-
208
-
213template<typename Index_>
-
- -
215public:
-
-
219 size_t num_references() const {
-
220 return markers.size();
-
221 }
+
210
+
215template<typename Index_>
+
+ +
217public:
+
+
221 size_t num_references() const {
+
222 return markers.size();
+
223 }
-
222
-
-
227 size_t num_labels(size_t r) const {
-
228 return markers[r].size();
-
229 }
+
224
+
+
229 size_t num_labels(size_t r) const {
+
230 return markers[r].size();
+
231 }
-
230
-
-
235 size_t num_profiles(size_t r) const {
-
236 size_t n = 0;
-
237 for (const auto& ref : ranked[r]) {
-
238 n += ref.size();
-
239 }
-
240 return n;
-
241 }
+
232
+
+
237 size_t num_profiles(size_t r) const {
+
238 size_t n = 0;
+
239 for (const auto& ref : ranked[r]) {
+
240 n += ref.size();
+
241 }
+
242 return n;
+
243 }
-
242
-
243public:
-
247 // Technically this should be private, but it's a pain to add
-
248 // templated friend functions, so I can't be bothered.
-
249 std::vector<Index_> universe; // To be used by classify_integrated() for indexed extraction.
-
250
-
251 std::vector<uint8_t> check_availability;
-
252 std::vector<std::unordered_set<Index_> > available; // indices to 'universe'
-
253 std::vector<std::vector<std::vector<Index_> > > markers; // indices to 'universe'
-
254 std::vector<std::vector<std::vector<internal::RankedVector<Index_, Index_> > > > ranked; // .second contains indices to 'universe'
-
258};
+
244
+
245public:
+
249 // Technically this should be private, but it's a pain to add
+
250 // templated friend functions, so I can't be bothered.
+
251 std::vector<Index_> universe; // To be used by classify_integrated() for indexed extraction.
+
252
+
253 std::vector<uint8_t> check_availability;
+
254 std::vector<std::unordered_set<Index_> > available; // indices to 'universe'
+
255 std::vector<std::vector<std::vector<Index_> > > markers; // indices to 'universe'
+
256 std::vector<std::vector<std::vector<internal::RankedVector<Index_, Index_> > > > ranked; // .second contains indices to 'universe'
+
260};
-
259
-
- -
268 int num_threads = 1;
-
269};
+
261
+
+ +
270 int num_threads = 1;
+
271};
-
270
-
274namespace internal {
-
275
-
276template<typename Value_, typename Index_, typename Input_>
- -
278 size_t ref_i,
- - -
281 const std::unordered_map<Index_, Index_> remap_to_universe,
- -
283{
-
284 auto curlab = curinput.labels;
-
285 const auto& ref = *(curinput.ref);
-
286
-
287 // Reindexing the markers so that they contain indices into to the universe.
-
288 auto& curmarkers = output.markers[ref_i];
-
289 if constexpr(std::is_const<Input_>::value) {
-
290 curmarkers.swap(curinput.markers);
-
291 } else {
-
292 curmarkers = curinput.markers;
-
293 }
-
294 for (auto& outer : curmarkers) {
-
295 for (auto& x : outer) {
-
296 x = remap_to_universe.find(x)->second;
-
297 }
-
298 }
-
299
-
300 // Pre-allocating the vectors of pre-ranked expression.
-
301 auto& cur_ranked = output.ranked[ref_i];
-
302 std::vector<Index_> positions;
-
303 {
-
304 size_t nlabels = curmarkers.size();
-
305 Index_ NC = ref.ncol();
-
306 positions.reserve(NC);
-
307
-
308 std::vector<Index_> samples_per_label(nlabels);
-
309 for (Index_ c = 0; c < NC; ++c) {
-
310 auto& pos = samples_per_label[curlab[c]];
-
311 positions.push_back(pos);
-
312 ++pos;
-
313 }
-
314
-
315 cur_ranked.resize(nlabels);
-
316 for (size_t l = 0; l < nlabels; ++l) {
- -
318 }
-
319 }
-
320
-
321 if (!curinput.with_intersection) {
-
322 // The universe is guaranteed to be sorted and unique, see its derivation
-
323 // in internal::train_integrated() below. This means we can directly use it
-
324 // for indexed extraction from a tatami::Matrix.
-
325 tatami::VectorPtr<Index_> universe_ptr(tatami::VectorPtr<Index_>{}, &(output.universe));
-
326
-
327 tatami::parallelize([&](int, Index_ start, Index_ len) {
-
328 std::vector<Value_> buffer(output.universe.size());
-
329 internal::RankedVector<Value_, Index_> tmp_ranked;
-
330 tmp_ranked.reserve(output.universe.size());
-
331 auto ext = tatami::consecutive_extractor<false>(&ref, false, start, len, universe_ptr);
-
332
-
333 for (Index_ c = start, end = start + len; c < end; ++c) {
-
334 auto ptr = ext->fetch(buffer.data());
-
335
-
336 tmp_ranked.clear();
-
337 for (int i = 0, end = output.universe.size(); i < end; ++i, ++ptr) {
-
338 tmp_ranked.emplace_back(*ptr, i);
-
339 }
-
340 std::sort(tmp_ranked.begin(), tmp_ranked.end());
-
341
- - -
344 }
-
345 }, ref.ncol(), options.num_threads);
-
346
-
347 } else {
-
348 output.check_availability[ref_i] = 1;
-
349
-
350 // Need to remap from indices on the test matrix to those in the current reference matrix
-
351 // so that we can form an appropriate vector for indexed tatami extraction.
-
352 const auto& intersection = (curinput.user_intersection == NULL ? curinput.auto_intersection : *(curinput.user_intersection));
-
353 std::unordered_map<Index_, Index_> intersection_map;
-
354 intersection_map.reserve(intersection.size());
-
355 for (const auto& in : intersection) {
-
356 intersection_map[in.first] = in.second;
-
357 }
-
358
-
359 std::vector<std::pair<Index_, Index_> > intersection_in_universe;
-
360 intersection_in_universe.reserve(output.universe.size());
-
361 auto& cur_available = output.available[ref_i];
-
362 cur_available.reserve(output.universe.size());
-
363
-
364 for (Index_ i = 0, end = output.universe.size(); i < end; ++i) {
-
365 auto it = intersection_map.find(output.universe[i]);
-
366 if (it != intersection_map.end()) {
-
367 intersection_in_universe.emplace_back(it->second, i); // using 'i' as we want to work with indices into 'universe', not the indices of the universe itself.
-
368 cur_available.insert(i);
-
369 }
-
370 }
-
371 std::sort(intersection_in_universe.begin(), intersection_in_universe.end());
-
372
-
373 std::vector<Index_> to_extract;
-
374 to_extract.reserve(intersection_in_universe.size());
-
375 for (const auto& p : intersection_in_universe) {
-
376 to_extract.push_back(p.first);
-
377 }
-
378 tatami::VectorPtr<Index_> to_extract_ptr(tatami::VectorPtr<Index_>{}, &to_extract);
-
379
-
380 tatami::parallelize([&](int, Index_ start, Index_ len) {
-
381 std::vector<Value_> buffer(to_extract.size());
-
382 internal::RankedVector<Value_, Index_> tmp_ranked;
-
383 tmp_ranked.reserve(to_extract.size());
-
384 auto ext = tatami::consecutive_extractor<false>(&ref, false, start, len, to_extract_ptr);
-
385
-
386 for (size_t c = start, end = start + len; c < end; ++c) {
-
387 auto ptr = ext->fetch(buffer.data());
-
388
-
389 tmp_ranked.clear();
-
390 for (const auto& p : intersection_in_universe) {
-
391 tmp_ranked.emplace_back(*ptr, p.second); // remember, 'p.second' corresponds to indices into the universe.
-
392 ++ptr;
-
393 }
-
394 std::sort(tmp_ranked.begin(), tmp_ranked.end());
-
395
- - -
398 }
-
399 }, ref.ncol(), options.num_threads);
-
400 }
-
401}
-
402
-
403template<typename Value_, typename Index_, typename Inputs_>
-
404TrainedIntegrated<Index_> train_integrated(Inputs_& inputs, const TrainIntegratedOptions& options) {
- -
406 size_t nrefs = inputs.size();
-
407 output.check_availability.resize(nrefs);
-
408 output.available.resize(nrefs);
-
409 output.markers.resize(nrefs);
-
410 output.ranked.resize(nrefs);
-
411
-
412 // Identify the union of all marker genes.
-
413 std::unordered_map<Index_, Index_> remap_to_universe;
-
414 std::unordered_set<Index_> subset_tmp;
-
415 for (const auto& in : inputs) {
-
416 for (const auto& mrk : in.markers) {
-
417 subset_tmp.insert(mrk.begin(), mrk.end());
-
418 }
-
419 }
-
420
-
421 output.universe.insert(output.universe.end(), subset_tmp.begin(), subset_tmp.end());
-
422 std::sort(output.universe.begin(), output.universe.end());
-
423 remap_to_universe.reserve(output.universe.size());
-
424 for (Index_ i = 0, end = output.universe.size(); i < end; ++i) {
-
425 remap_to_universe[output.universe[i]] = i;
-
426 }
-
427
-
428 for (size_t r = 0; r < nrefs; ++r) {
- -
430 }
-
431
-
432 return output;
-
433}
-
434
+
272
+
276namespace internal {
+
277
+
278template<typename Value_, typename Index_, typename Input_>
+ +
280 size_t ref_i,
+ + +
283 const std::unordered_map<Index_, Index_> remap_to_universe,
+ +
285{
+
286 auto curlab = curinput.labels;
+
287 const auto& ref = *(curinput.ref);
+
288
+
289 // Reindexing the markers so that they contain indices into to the universe.
+
290 auto& curmarkers = output.markers[ref_i];
+
291 if constexpr(std::is_const<Input_>::value) {
+
292 curmarkers.swap(curinput.markers);
+
293 } else {
+
294 curmarkers = curinput.markers;
+
295 }
+
296 for (auto& outer : curmarkers) {
+
297 for (auto& x : outer) {
+
298 x = remap_to_universe.find(x)->second;
+
299 }
+
300 }
+
301
+
302 // Pre-allocating the vectors of pre-ranked expression.
+
303 auto& cur_ranked = output.ranked[ref_i];
+
304 std::vector<Index_> positions;
+
305 {
+
306 size_t nlabels = curmarkers.size();
+
307 Index_ NC = ref.ncol();
+
308 positions.reserve(NC);
+
309
+
310 std::vector<Index_> samples_per_label(nlabels);
+
311 for (Index_ c = 0; c < NC; ++c) {
+
312 auto& pos = samples_per_label[curlab[c]];
+
313 positions.push_back(pos);
+
314 ++pos;
+
315 }
+
316
+
317 cur_ranked.resize(nlabels);
+
318 for (size_t l = 0; l < nlabels; ++l) {
+ +
320 }
+
321 }
+
322
+
323 if (!curinput.with_intersection) {
+
324 // The universe is guaranteed to be sorted and unique, see its derivation
+
325 // in internal::train_integrated() below. This means we can directly use it
+
326 // for indexed extraction from a tatami::Matrix.
+
327 tatami::VectorPtr<Index_> universe_ptr(tatami::VectorPtr<Index_>{}, &(output.universe));
+
328
+
329 tatami::parallelize([&](int, Index_ start, Index_ len) {
+
330 std::vector<Value_> buffer(output.universe.size());
+
331 internal::RankedVector<Value_, Index_> tmp_ranked;
+
332 tmp_ranked.reserve(output.universe.size());
+
333 auto ext = tatami::consecutive_extractor<false>(&ref, false, start, len, universe_ptr);
+
334
+
335 for (Index_ c = start, end = start + len; c < end; ++c) {
+
336 auto ptr = ext->fetch(buffer.data());
+
337
+
338 tmp_ranked.clear();
+
339 for (int i = 0, end = output.universe.size(); i < end; ++i, ++ptr) {
+
340 tmp_ranked.emplace_back(*ptr, i);
+
341 }
+
342 std::sort(tmp_ranked.begin(), tmp_ranked.end());
+
343
+ + +
346 }
+
347 }, ref.ncol(), options.num_threads);
+
348
+
349 } else {
+
350 output.check_availability[ref_i] = 1;
+
351
+
352 // Need to remap from indices on the test matrix to those in the current reference matrix
+
353 // so that we can form an appropriate vector for indexed tatami extraction.
+
354 const auto& intersection = (curinput.user_intersection == NULL ? curinput.auto_intersection : *(curinput.user_intersection));
+
355 std::unordered_map<Index_, Index_> intersection_map;
+
356 intersection_map.reserve(intersection.size());
+
357 for (const auto& in : intersection) {
+
358 intersection_map[in.first] = in.second;
+
359 }
+
360
+
361 std::vector<std::pair<Index_, Index_> > intersection_in_universe;
+
362 intersection_in_universe.reserve(output.universe.size());
+
363 auto& cur_available = output.available[ref_i];
+
364 cur_available.reserve(output.universe.size());
+
365
+
366 for (Index_ i = 0, end = output.universe.size(); i < end; ++i) {
+
367 auto it = intersection_map.find(output.universe[i]);
+
368 if (it != intersection_map.end()) {
+
369 intersection_in_universe.emplace_back(it->second, i); // using 'i' as we want to work with indices into 'universe', not the indices of the universe itself.
+
370 cur_available.insert(i);
+
371 }
+
372 }
+
373 std::sort(intersection_in_universe.begin(), intersection_in_universe.end());
+
374
+
375 std::vector<Index_> to_extract;
+
376 to_extract.reserve(intersection_in_universe.size());
+
377 for (const auto& p : intersection_in_universe) {
+
378 to_extract.push_back(p.first);
+
379 }
+
380 tatami::VectorPtr<Index_> to_extract_ptr(tatami::VectorPtr<Index_>{}, &to_extract);
+
381
+
382 tatami::parallelize([&](int, Index_ start, Index_ len) {
+
383 std::vector<Value_> buffer(to_extract.size());
+
384 internal::RankedVector<Value_, Index_> tmp_ranked;
+
385 tmp_ranked.reserve(to_extract.size());
+
386 auto ext = tatami::consecutive_extractor<false>(&ref, false, start, len, to_extract_ptr);
+
387
+
388 for (size_t c = start, end = start + len; c < end; ++c) {
+
389 auto ptr = ext->fetch(buffer.data());
+
390
+
391 tmp_ranked.clear();
+
392 for (const auto& p : intersection_in_universe) {
+
393 tmp_ranked.emplace_back(*ptr, p.second); // remember, 'p.second' corresponds to indices into the universe.
+
394 ++ptr;
+
395 }
+
396 std::sort(tmp_ranked.begin(), tmp_ranked.end());
+
397
+ + +
400 }
+
401 }, ref.ncol(), options.num_threads);
+
402 }
+
403}
+
404
+
405template<typename Value_, typename Index_, typename Inputs_>
+
406TrainedIntegrated<Index_> train_integrated(Inputs_& inputs, const TrainIntegratedOptions& options) {
+ +
408 size_t nrefs = inputs.size();
+
409 output.check_availability.resize(nrefs);
+
410 output.available.resize(nrefs);
+
411 output.markers.resize(nrefs);
+
412 output.ranked.resize(nrefs);
+
413
+
414 // Identify the union of all marker genes.
+
415 std::unordered_map<Index_, Index_> remap_to_universe;
+
416 std::unordered_set<Index_> subset_tmp;
+
417 for (const auto& in : inputs) {
+
418 for (const auto& mrk : in.markers) {
+
419 subset_tmp.insert(mrk.begin(), mrk.end());
+
420 }
+
421 }
+
422
+
423 output.universe.insert(output.universe.end(), subset_tmp.begin(), subset_tmp.end());
+
424 std::sort(output.universe.begin(), output.universe.end());
+
425 remap_to_universe.reserve(output.universe.size());
+
426 for (Index_ i = 0, end = output.universe.size(); i < end; ++i) {
+
427 remap_to_universe[output.universe[i]] = i;
+
428 }
+
429
+
430 for (size_t r = 0; r < nrefs; ++r) {
+ +
432 }
+
433
+
434 return output;
435}
-
450template<typename Value_, typename Index_, typename Label_>
-
- -
452 return internal::train_integrated<Value_, Index_>(inputs, options);
-
453}
+
436
+
437}
+
452template<typename Value_, typename Index_, typename Label_>
+
+ +
454 return internal::train_integrated<Value_, Index_>(inputs, options);
+
455}
-
454
-
465template<typename Value_, typename Index_, typename Label_>
-
- -
467 return internal::train_integrated<Value_, Index_>(inputs, options);
-
468}
-
-
469
+
456
+
467template<typename Value_, typename Index_, typename Label_>
+
+ +
469 return internal::train_integrated<Value_, Index_>(inputs, options);
470}
+
471
-
472#endif
+
472}
+
473
+
474#endif
Create an intersection of genes.
-
Classifier that integrates multiple reference datasets.
Definition train_integrated.hpp:214
-
size_t num_labels(size_t r) const
Definition train_integrated.hpp:227
-
size_t num_profiles(size_t r) const
Definition train_integrated.hpp:235
-
size_t num_references() const
Definition train_integrated.hpp:219
+
Classifier that integrates multiple reference datasets.
Definition train_integrated.hpp:216
+
size_t num_labels(size_t r) const
Definition train_integrated.hpp:229
+
size_t num_profiles(size_t r) const
Definition train_integrated.hpp:237
+
size_t num_references() const
Definition train_integrated.hpp:221
Common definitions for singlepp.
Cell type classification using the SingleR algorithm in C++.
Definition classify_single.hpp:19
-
TrainIntegratedInput< Value_, Index_, Label_ > prepare_integrated_input_intersect(const Intersection< Index_ > &intersection, const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, const TrainedSingleIntersect< Index_, Float_ > &trained)
Definition train_integrated.hpp:128
-
Intersection< Index_ > intersect_genes(Index_ test_nrow, const Id_ *test_id, Index_ ref_nrow, const Id_ *ref_id)
Definition Intersection.hpp:50
+
TrainIntegratedInput< Value_, Index_, Label_ > prepare_integrated_input_intersect(const Intersection< Index_ > &intersection, const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, const TrainedSingleIntersect< Index_, Float_ > &trained)
Definition train_integrated.hpp:130
+
Intersection< Index_ > intersect_genes(Index_ test_nrow, const Id_ *test_id, Index_ ref_nrow, const Id_ *ref_id)
Definition Intersection.hpp:54
TrainIntegratedInput< Value_, Index_, Label_ > prepare_integrated_input(const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, const TrainedSingle< Index_, Float_ > &trained)
Definition train_integrated.hpp:73
std::vector< std::vector< std::vector< Index_ > > > Markers
Definition Markers.hpp:40
-
TrainedIntegrated< Index_ > train_integrated(const std::vector< TrainIntegratedInput< Value_, Index_, Label_ > > &inputs, const TrainIntegratedOptions &options)
Definition train_integrated.hpp:451
+
TrainedIntegrated< Index_ > train_integrated(const std::vector< TrainIntegratedInput< Value_, Index_, Label_ > > &inputs, const TrainIntegratedOptions &options)
Definition train_integrated.hpp:453
Input to train_integrated().
Definition train_integrated.hpp:34
-
Options for train_integrated().
Definition train_integrated.hpp:263
-
int num_threads
Definition train_integrated.hpp:268
+
Options for train_integrated().
Definition train_integrated.hpp:265
+
int num_threads
Definition train_integrated.hpp:270
Train a classifier from a single reference.
diff --git a/docs/train__single_8hpp_source.html b/docs/train__single_8hpp_source.html index d1c63bc..b57652a 100644 --- a/docs/train__single_8hpp_source.html +++ b/docs/train__single_8hpp_source.html @@ -262,40 +262,40 @@
284};
285
-
308template<typename Index_, typename Value_, typename Label_, typename Float_>
-
- - -
311 const tatami::Matrix<Value_, Index_>& ref,
-
312 const Label_* labels,
- - -
315{
-
316 auto pairs = internal::subset_to_markers(intersection, markers, options.top);
-
317 auto subref = internal::build_references(ref, labels, pairs.second, options);
-
318 return TrainedSingleIntersect<Index_, Float_>(std::move(markers), std::move(pairs.first), std::move(pairs.second), std::move(subref));
-
319}
+
310template<typename Index_, typename Value_, typename Label_, typename Float_>
+
+ + +
313 const tatami::Matrix<Value_, Index_>& ref,
+
314 const Label_* labels,
+ + +
317{
+
318 auto pairs = internal::subset_to_markers(intersection, markers, options.top);
+
319 auto subref = internal::build_references(ref, labels, pairs.second, options);
+
320 return TrainedSingleIntersect<Index_, Float_>(std::move(markers), std::move(pairs.first), std::move(pairs.second), std::move(subref));
+
321}
-
320
-
349template<typename Index_, typename Id_, typename Value_, typename Label_, typename Float_>
-
- - -
352 const Id_* test_id,
-
353 const tatami::Matrix<Value_, Index_>& ref,
-
354 const Id_* ref_id,
-
355 const Label_* labels,
- - -
358{
- - -
361}
-
-
362
+
322
+
351template<typename Index_, typename Id_, typename Value_, typename Label_, typename Float_>
+
+ + +
354 const Id_* test_id,
+
355 const tatami::Matrix<Value_, Index_>& ref,
+
356 const Id_* ref_id,
+
357 const Label_* labels,
+ + +
360{
+ +
363}
+
364
-
365#endif
+
365}
+
366
+
367#endif
Classifier built from an intersection of genes.
Definition train_single.hpp:203
const Markers< Index_ > & get_markers() const
Definition train_single.hpp:235
@@ -311,8 +311,8 @@
Common definitions for singlepp.
Cell type classification using the SingleR algorithm in C++.
Definition classify_single.hpp:19
-
Intersection< Index_ > intersect_genes(Index_ test_nrow, const Id_ *test_id, Index_ ref_nrow, const Id_ *ref_id)
Definition Intersection.hpp:50
-
TrainedSingleIntersect< Index_, Float_ > train_single_intersect(const Intersection< Index_ > &intersection, const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, Markers< Index_ > markers, const TrainSingleOptions< Index_, Float_ > &options)
Definition train_single.hpp:309
+
Intersection< Index_ > intersect_genes(Index_ test_nrow, const Id_ *test_id, Index_ ref_nrow, const Id_ *ref_id)
Definition Intersection.hpp:54
+
TrainedSingleIntersect< Index_, Float_ > train_single_intersect(const Intersection< Index_ > &intersection, const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, Markers< Index_ > markers, const TrainSingleOptions< Index_, Float_ > &options)
Definition train_single.hpp:311
std::vector< std::vector< std::vector< Index_ > > > Markers
Definition Markers.hpp:40
TrainedSingle< Index_, Float_ > train_single(const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, Markers< Index_ > markers, const TrainSingleOptions< Index_, Float_ > &options)
Definition train_single.hpp:181
Options for train_single() and friends.
Definition train_single.hpp:28
intersectionVector defining the intersection of genes between the test and reference datasets, see intersect_genes() for more details.
intersectionVector defining the intersection of genes between the test and reference datasets. Each pair corresponds to a gene where the first and second elements represent the row indices of that gene in the test and reference matrices, respectively. See intersect_genes() for more details.
refAn expression matrix for the reference expression profiles, where rows are genes and columns are cells. This should have non-zero columns.
[in]labelsAn array of length equal to the number of columns of ref, containing the label for each reference profile. Labels should be integers in \([0, L)\) where \(L\) is the total number of unique labels.
markersA vector of vectors of ranked marker genes for each pairwise comparison between labels, see singlepp::Markers for more details.