Skip to content

Commit

Permalink
write and adapt tests
Browse files Browse the repository at this point in the history
  • Loading branch information
joergi-w authored and Irallia committed Nov 4, 2022
1 parent 919f579 commit c58bdc9
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 35 deletions.
2 changes: 2 additions & 0 deletions test/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ add_api_test (clustering_test.cpp)
# add_api_test (refinement_test.cpp)

add_api_test (snp_indel_test.cpp)
target_use_datasources (snp_indel_test FILES simulated.minimap2.hg19.coordsorted_cutoff.sam)

add_api_test (structures_test.cpp)
5 changes: 3 additions & 2 deletions test/api/input_file_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,11 +337,12 @@ TEST(output_file, output_file_fail)
std::filesystem::path fail_path{tmp_dir/"fail_file.vcf"};
std::filesystem::create_directory(fail_path); // Make a directory with same name as output file name.
std::map<std::string, int32_t> empty_map{};
std::set<Junction> empty_set{};
std::vector<Cluster> empty_vec{};
cmd_arguments empty_args{};

EXPECT_THROW(find_and_output_variants(empty_map, empty_vec, empty_args, fail_path), std::runtime_error);
EXPECT_THROW(find_and_output_variants(empty_map, empty_vec, empty_set, empty_args, fail_path), std::runtime_error);
std::filesystem::remove_all(fail_path);
EXPECT_NO_THROW(find_and_output_variants(empty_map, empty_vec, empty_args, fail_path));
EXPECT_NO_THROW(find_and_output_variants(empty_map, empty_vec, empty_set, empty_args, fail_path));
std::filesystem::remove_all(fail_path);
}
74 changes: 70 additions & 4 deletions test/api/snp_indel_test.cpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#include <gtest/gtest.h>

#include <fstream>
#include <vector>

#include "variant_detection/snp_indel_detection.hpp"

TEST(activity_analysis, input_empty)
TEST(activity_detection, input_empty)
{
Activity activity{};
auto regions = get_active_regions(activity);
EXPECT_TRUE(regions.empty());
}

TEST(activity_analysis, input_zero)
TEST(activity_detection, input_zero)
{
Activity activity{};
activity.values.emplace_back(150, 0U); // 150x 0
Expand All @@ -20,7 +21,7 @@ TEST(activity_analysis, input_zero)
EXPECT_TRUE(regions.front().empty());
}

TEST(activity_analysis, input_active_everywhere)
TEST(activity_detection, input_active_everywhere)
{
Activity activity{};
activity.values.emplace_back(150, 9999U); // 150x 9999
Expand All @@ -30,7 +31,7 @@ TEST(activity_analysis, input_active_everywhere)
EXPECT_EQ(regions.front().front(), (std::pair<int, int>{0, 150}));
}

TEST(activity_analysis, input_interval)
TEST(activity_detection, input_interval)
{
Activity activity{};
activity.values.emplace_back(150, 0U);
Expand All @@ -44,3 +45,68 @@ TEST(activity_analysis, input_interval)
EXPECT_EQ(regions.front()[0], (std::pair<int, int>{30, 50}));
EXPECT_EQ(regions.front()[1], (std::pair<int, int>{131, 149}));
}

TEST(activity_analysis, incompatible_genome)
{
Genome genome;
std::filesystem::path const reads = DATADIR"simulated.minimap2.hg19.coordsorted_cutoff.sam";
// genome has too few sequences (0 instead of 1)
EXPECT_THROW(analyze_activity(reads, genome, 20UL), std::runtime_error);

using namespace seqan3::literals;
genome.names.emplace_back("chr8");
genome.seqs.push_back("ACG"_dna5);
// genome length mismatch (3 instead of 46709983)
EXPECT_THROW(analyze_activity(reads, genome, 20UL), std::runtime_error);
}
#include <seqan3/core/debug_stream.hpp>
TEST(activity_analysis, skip_ref)
{
// Create a SAM file with three references.
std::filesystem::path reads{std::filesystem::temp_directory_path()/"reads.sam"};
{
std::ofstream samfile(reads);
samfile << "@HD\tVN:1.6\tSO:coordinate\n"
<< "@SQ\tSN:chr1\tLN:4\n"
<< "@SQ\tSN:chr2\tLN:3\n"
<< "test1\t16\tchr2\t1\t60\t10M\t=\t1\t0\tG\tF\n"; // chr1 skipped
samfile.close();
}

Genome genome;
using namespace seqan3::literals;
genome.names.emplace_back("chr1");
genome.seqs.push_back("ACGT"_dna5);
genome.names.emplace_back("chr2");
genome.seqs.push_back("ACG"_dna5);

auto act = analyze_activity(reads, genome, 20UL);
EXPECT_EQ(act.refmap, (std::vector<int>{-1, 1})); // first unused, second maps to seq 1
EXPECT_TRUE(act.values[0].empty()); // first is unused => empty
EXPECT_EQ(act.values[1], (std::vector<unsigned>{0, 0, 0})); // second has length 3 (=ref len), no activity added
}

TEST(store_snp, same_junction)
{
using namespace seqan3::literals;
std::set<Junction> junctions;
seqan3::dna5_vector bufR = "ACG"_dna5;
seqan3::dna5_vector bufH = "CT"_dna5;
int pos = 30;
std::string const ref_name = "chr1";

store_snp(junctions, bufR, bufH, pos, ref_name, 0.1);
bufR = "ACG"_dna5;
bufH = "CG"_dna5;
store_snp(junctions, bufR, bufH, pos, ref_name, 0.6); // this junction compares smaller
bufR = "ACG"_dna5;
bufH = "CT"_dna5;
store_snp(junctions, bufR, bufH, pos, ref_name, 0.3); // add the initial junction again

EXPECT_EQ(junctions.size(), 2UL);
auto jctIt = junctions.begin(); // check 1st junction
EXPECT_FLOAT_EQ(jctIt->get_quality(), 0.6F);
++jctIt; // go to 2nd junction
EXPECT_FLOAT_EQ(jctIt->get_quality(), 0.4F); // 0.1 + 0.3
EXPECT_TRUE(*jctIt != *junctions.begin());
}
68 changes: 51 additions & 17 deletions test/cli/iGenVar_cli_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ std::string const help_page_advanced
" -d, --method (List of detection_methods)\n"
" Choose the detection method(s) to be used. Value must be one of\n"
" (method name or number)\n"
" [0,cigar_string,1,split_read,2,read_pairs,3,read_depth]. Default:\n"
" [cigar_string, split_read, read_pairs, read_depth].\n"
" [0,cigar_string,1,split_read,2,read_pairs,3,read_depth,4,snp_indel].\n"
" Default: [cigar_string,split_read,read_pairs,read_depth,snp_indel].\n"
" -c, --clustering_method (clustering_methods)\n"
" Choose the clustering method to be used. Value must be one of\n"
" (method name or number)\n"
Expand Down Expand Up @@ -178,7 +178,7 @@ std::string const expected_err_default_no_err_2
{
"Done with clustering. Found 2 junction clusters.\n"
"No refinement was selected.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};

// VCF output
Expand All @@ -195,7 +195,7 @@ std::string const general_header_lines_1
};

std::string const contig_cutoff_sam = "##contig=<ID=chr21,length=46709983>\n";
std::string const contig_mini_example = "##contig=<ID=chr1,length=482>\n";
std::string const contig_mini_example = "##contig=<ID=chr1,length=610>\n";

size_t filedate_position_0 = general_header_lines_1.size() + 11;
size_t filedate_position_1 = general_header_lines_1.size() + contig_cutoff_sam.size() + 11;
Expand Down Expand Up @@ -340,16 +340,50 @@ TEST_F(iGenVar_cli_test, test_genome_input)
std::string const expected_err =
{
"Detect SNPs and indels in short reads...\n"
"Active regions of chr1: [(6,15),(53,74),(121,130),(176,185),(184,193),(262,304),"
"(311,319),(332,354),(364,373),(381,398),(467,476)]\n"
"Active regions for chr1: [(0,36),(33,95),(101,151),(156,214),(242,419),(447,497),(484,555),(556,610)]\n"
"Start clustering...\n"
"Done with clustering. Found 0 junction clusters.\n"
"No refinement was selected.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 31 SNPs/Indels.\n"
};
std::string const expected_out =
{
"chr1\t59\tigenvar_snp_0\tT\tA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t188\tigenvar_ins_1\t.\tA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t190\tigenvar_snp_2\tCGGG\tT\t1\tPASS\t.\tGT\t./.\n"
"chr1\t196\tigenvar_snp_3\tA\tT\t1\tPASS\t.\tGT\t./.\n"
"chr1\t198\tigenvar_snp_4\tA\tT\t1\tPASS\t.\tGT\t./.\n"
"chr1\t337\tigenvar_snp_5\tTAT\tGGGC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t342\tigenvar_del_6\tA\t.\t1\tPASS\t.\tGT\t./.\n"
"chr1\t344\tigenvar_del_7\tGG\t.\t1\tPASS\t.\tGT\t./.\n"
"chr1\t348\tigenvar_del_8\tTT\t.\t1\tPASS\t.\tGT\t./.\n"
"chr1\t353\tigenvar_snp_9\tG\tT\t1\tPASS\t.\tGT\t./.\n"
"chr1\t355\tigenvar_snp_10\tT\tC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t357\tigenvar_ins_11\t.\tA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t358\tigenvar_snp_12\tC\tG\t1\tPASS\t.\tGT\t./.\n"
"chr1\t361\tigenvar_snp_13\tG\tAC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t364\tigenvar_snp_14\tCC\tGT\t1\tPASS\t.\tGT\t./.\n"
"chr1\t368\tigenvar_del_15\tA\t.\t1\tPASS\t.\tGT\t./.\n"
"chr1\t384\tigenvar_ins_16\t.\tC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t471\tigenvar_ins_17\t.\tA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t472\tigenvar_snp_18\tGG\tTA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t476\tigenvar_snp_19\tCT\tGA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t479\tigenvar_snp_20\tAT\tGC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t484\tigenvar_ins_21\t.\tC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t485\tigenvar_ins_22\t.\tA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t489\tigenvar_snp_23\tA\tGC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t491\tigenvar_ins_24\t.\tC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t581\tigenvar_snp_25\tC\tT\t1\tPASS\t.\tGT\t./.\n"
"chr1\t583\tigenvar_snp_26\tA\tGC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t587\tigenvar_del_27\tC\t.\t1\tPASS\t.\tGT\t./.\n"
"chr1\t592\tigenvar_ins_28\t.\tC\t1\tPASS\t.\tGT\t./.\n"
"chr1\t594\tigenvar_snp_29\tCG\tA\t1\tPASS\t.\tGT\t./.\n"
"chr1\t597\tigenvar_snp_30\tT\tC\t1\tPASS\t.\tGT\t./.\n"
};
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.err, expected_err);
EXPECT_EQ(result.out.erase(filedate_position_0, 19), general_header_lines_1 + general_header_lines_2); // erase the filedate
EXPECT_EQ(result.out.erase(filedate_position_2, 19), // erase the filedate
general_header_lines_1 + contig_mini_example + general_header_lines_2 + expected_out);
}

// SV specifications:
Expand Down Expand Up @@ -428,7 +462,7 @@ TEST_F(iGenVar_cli_test, set_min_qual)
{
"Done with clustering. Found 2 junction clusters.\n"
"No refinement was selected.\n"
"Detected 1 SVs.\n"
"Detected 1 SVs and 0 SNPs/Indels.\n"
};
std::string const expected_res
{
Expand Down Expand Up @@ -500,7 +534,7 @@ TEST_F(iGenVar_cli_test, simple_clustering)
{
"Done with clustering. Found 3 junction clusters.\n"
"No refinement was selected.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out.erase(filedate_position_1, 19), expected_res_default); // erase the filedate
Expand Down Expand Up @@ -544,7 +578,7 @@ TEST_F(iGenVar_cli_test, self_balancing_binary_tree)
"The self-balancing binary tree clustering method is not yet implemented.\n"
"Done with clustering. Found 0 junction clusters.\n"
"No refinement was selected.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out.erase(filedate_position_1, 19), expected_res_default); // erase the filedate
Expand All @@ -562,7 +596,7 @@ TEST_F(iGenVar_cli_test, candidate_selection_based_on_voting)
"The candidate selection based on voting clustering method is not yet implemented.\n"
"Done with clustering. Found 0 junction clusters.\n"
"No refinement was selected.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out.erase(filedate_position_1, 19), expected_res_default); // erase the filedate
Expand Down Expand Up @@ -592,7 +626,7 @@ TEST_F(iGenVar_cli_test, sViper_refinement_method)
{
"Done with clustering. Found 2 junction clusters.\n"
"The sViper refinement method is not yet implemented.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out.erase(filedate_position_1, 19), expected_res_default); // erase the filedate
Expand All @@ -609,7 +643,7 @@ TEST_F(iGenVar_cli_test, sVirl_refinement_method)
{
"Done with clustering. Found 2 junction clusters.\n"
"The sVirl refinement method is not yet implemented.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out.erase(filedate_position_1, 19), expected_res_default); // erase the filedate
Expand Down Expand Up @@ -670,7 +704,7 @@ TEST_F(iGenVar_cli_test, test_direct_methods_input)
"Start clustering...\n"
"Done with clustering. Found 3 junction clusters.\n"
"No refinement was selected.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};
EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out.erase(filedate_position_1, 19), expected_res_default); // erase the filedate
Expand All @@ -686,7 +720,7 @@ TEST_F(iGenVar_cli_test, test_unknown_argument)
std::string const expected_err
{
"[Error] You have chosen an invalid input value: 9. "
"Please use one of: [0, cigar_string, 1, split_read, 2, read_pairs, 3, read_depth]\n"
"Please use one of: [0, cigar_string, 1, split_read, 2, read_pairs, 3, read_depth, 4, snp_indel]\n"
};
EXPECT_EQ(result.exit_code, 65280);
EXPECT_EQ(result.out, std::string{});
Expand Down Expand Up @@ -748,7 +782,7 @@ TEST_F(iGenVar_cli_test, dataset_paired_end_mini_example)
"Start clustering...\n"
"Done with clustering. Found 0 junction clusters.\n"
"No refinement was selected.\n"
"Detected 0 SVs.\n"
"Detected 0 SVs and 0 SNPs/Indels.\n"
};
EXPECT_EQ(result.err, expected_err);

Expand Down
12 changes: 6 additions & 6 deletions test/data/datasources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,29 @@ declare_datasource (FILE mini_example_reference.fasta
# copies file to <build>/data/paired_end_mini_example.sam
declare_datasource (FILE paired_end_mini_example.sam
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/paired_end_mini_example.sam
URL_HASH SHA256=9dc47068c9a685ae414d9d943b512ab14d1d8de041d805e61c7f8352831a51b3)
URL_HASH SHA256=58961c8f016dbcaa6d38806f9cd3e90dbacda4f40af4693db9540c5544b9367a)

# copies file to <build>/data/single_end_mini_example.sam
declare_datasource (FILE single_end_mini_example.sam
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/single_end_mini_example.sam
URL_HASH SHA256=5dbf1d7f41b392bd34ff765b65ac9be73b08246aad774a2399a83523ca45cf41)
URL_HASH SHA256=b7ad2c43c444e4897e883c01f8efe3bbdd2b185d1c30efaa16be28ca2b04049d)

# copies file to <build>/data/output_err.txt
declare_datasource (FILE output_err.txt
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_err.txt
URL_HASH SHA256=f44b6522c6df97506a70e6cc961c4d2b1caf2cef3b245688d327ce22c7425133)
URL_HASH SHA256=d30107b5222a0768db9cbe6f4f7c4b11b7c530057f99c969d891a56f596f6320)

# copies file to <build>/data/output_res.vcf
declare_datasource (FILE output_res.vcf
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_res.vcf
URL_HASH SHA256=9b70052748679b2e29d4985ad9864380c6f7f66f56c6797f73421dd2598a1455)
URL_HASH SHA256=938d73ef495f4cddccc10eaefbda7c3ec934926798edf5a8db4d1b00ae10e646)

# copies file to <build>/data/output_short_and_long_err.txt
declare_datasource (FILE output_short_and_long_err.txt
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_short_and_long_err.txt
URL_HASH SHA256=568ed8a913fc05f31f91051df6ef17be3b55c0afd79c5f6c657ad6342f897fd2)
URL_HASH SHA256=86dfe165ea871101b52c37dbcb6db8b4ccf86a2484d12277ecdd4b5e75be5cc8)

# copies file to <build>/data/output_short_and_long_res.vcf
declare_datasource (FILE output_short_and_long_res.vcf
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_short_and_long_res.vcf
URL_HASH SHA256=71b92fb734be1fa5b4d5973207920c17e8a2d02d9351a413fb401401eb500ad4)
URL_HASH SHA256=a95fd7e6f882ea977feaef541ec8fc92906b5fda329088433670fcaa05d36a50)
2 changes: 1 addition & 1 deletion test/data/mini_example/output_err.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ Inverted bases: TAGCAACTCTCCAAAAC
Start clustering...
Done with clustering. Found 21 junction clusters.
No refinement was selected.
Detected 14 SVs.
Detected 14 SVs and 0 SNPs/Indels.
2 changes: 1 addition & 1 deletion test/data/mini_example/output_res.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
##INFO=<ID=iGenVar_SVLEN,Number=1,Type=Integer,Description="Length of SV called.">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of SV called.">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chr1,length=482>
##contig=<ID=chr1,length=610>
##filedate=
##source=iGenVarCaller
##ALT=<ID=DEL,Description="Deletion">
Expand Down
2 changes: 1 addition & 1 deletion test/data/mini_example/output_short_and_long_err.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,4 @@ Inverted bases: TAGCAACTCTCCAAAAC
Start clustering...
Done with clustering. Found 21 junction clusters.
No refinement was selected.
Detected 14 SVs.
Detected 14 SVs and 0 SNPs/Indels.
2 changes: 1 addition & 1 deletion test/data/mini_example/output_short_and_long_res.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
##INFO=<ID=iGenVar_SVLEN,Number=1,Type=Integer,Description="Length of SV called.">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of SV called.">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chr1,length=482>
##contig=<ID=chr1,length=610>
##filedate=
##source=iGenVarCaller
##ALT=<ID=DEL,Description="Deletion">
Expand Down
2 changes: 1 addition & 1 deletion test/data/mini_example/paired_end_mini_example.sam
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
@HD VN:1.6 SO:coordinate
@SQ SN:1 LN:482
@SQ SN:1 LN:610
read001 99 1 1 60 50M 1 113 148 CGCCCATGCAACTAGCGATGCTAGCTAGCTAGCTTACGACTGGCCATGCG * AS:i:50 NM:i:0
read002 99 1 2 60 50M 1 114 148 GCCCATGCAACTAGCGATGCTAGCTAGCTAGCTTACGACTGGCCATGCGG * AS:i:50 NM:i:0
read003 99 1 3 60 50M 1 118 148 CCCATGCAACTAGCGATGCTAGCTAGCTAGCTTACGACTGGCCATGCGGC * AS:i:50 NM:i:0
Expand Down
2 changes: 1 addition & 1 deletion test/data/mini_example/single_end_mini_example.sam
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
@HD VN:1.6 SO:coordinate
@SQ SN:chr1 LN:482
@SQ SN:chr1 LN:610
read001 0 chr1 1 60 50M * 0 0 CGCCCATGCAACTAGCGATGCTAGCTAGCTAGCTTACGACTGGCCATGCG * AS:i:50 NM:i:0
read002 0 chr1 2 60 50M * 0 0 GCCCATGCAACTAGCGATGCTAGCTAGCTAGCTTACGACTGGCCATGCGG * AS:i:50 NM:i:0
read003 0 chr1 3 60 50M * 0 0 CCCATGCAACTAGCGATGCTAGCTAGCTAGCTTACGACTGGCCATGCGGC * AS:i:50 NM:i:0
Expand Down

0 comments on commit c58bdc9

Please sign in to comment.