diff --git a/src/cli.rs b/src/cli.rs index aef89cd..b598634 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -11,6 +11,8 @@ pub const DEFAULT_KMER: usize = 17; pub const DEFAULT_STRAND: bool = false; /// Default repeat masking behaviour pub const DEFAULT_REPEATMASK: bool = false; +/// Default ambiguous masking behaviour +pub const DEFAULT_AMBIGMASK: bool = false; /// Default minimum k-mer count for FASTQ files pub const DEFAULT_MINCOUNT: u16 = 10; /// Default minimum base quality (PHRED score) for FASTQ files @@ -71,14 +73,16 @@ pub enum FileType { Aln, } -/// Possible variant filters for align +/// Possible variant filters #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] pub enum FilterType { /// Output all variants NoFilter, /// Filter constant bases NoConst, - /// Filter constant bases, and any variants where there are ambiguous bases + /// Filter any site with an ambiguous base + NoAmbig, + /// Filter constant bases, and any ambiguous bases NoAmbigOrConst, } @@ -88,6 +92,7 @@ impl fmt::Display for FilterType { match *self { Self::NoFilter => write!(f, "No filtering"), Self::NoConst => write!(f, "No constant sites"), + Self::NoAmbig => write!(f, "No ambiguous sites"), Self::NoAmbigOrConst => write!(f, "No constant sites or ambiguous bases"), } } @@ -171,6 +176,10 @@ pub enum Commands { #[arg(long, value_enum, default_value_t = FilterType::NoConst)] filter: FilterType, + /// Mask any ambiguous bases in the alignment with 'N' + #[arg(long, default_value_t = DEFAULT_AMBIGMASK)] + ambig_mask: bool, + /// Number of CPU threads #[arg(long, value_parser = valid_cpus, default_value_t = 1)] threads: usize, @@ -191,6 +200,10 @@ pub enum Commands { #[arg(short, long, value_enum, default_value_t = FileType::Aln)] format: FileType, + /// Mask any ambiguous bases in the alignment with 'N' + #[arg(long, default_value_t = DEFAULT_AMBIGMASK)] + ambig_mask: bool, + /// Mask any repeats in the alignment with 'N' #[arg(long, default_value_t = DEFAULT_REPEATMASK)] repeat_mask: bool, @@ -266,6 +279,10 @@ pub enum Commands { /// Filter for constant middle base sites #[arg(long, value_enum, default_value_t = FilterType::NoFilter)] filter: FilterType, + + /// Mask any ambiguous bases in the alignment with 'N' + #[arg(long, default_value_t = DEFAULT_AMBIGMASK)] + ambig_mask: bool, }, /// Get the number of k-mers in a split k-mer file, and other information Nk { diff --git a/src/generic_modes.rs b/src/generic_modes.rs index 2853a16..b02deac 100644 --- a/src/generic_modes.rs +++ b/src/generic_modes.rs @@ -19,13 +19,14 @@ pub fn align UInt<'a>>( ska_array: &mut MergeSkaArray, output: &Option, filter: &FilterType, + mask_ambig: bool, min_freq: f64, ) { // In debug mode (cannot be set from CLI, give details) log::debug!("{ska_array}"); // Apply filters - apply_filters(ska_array, min_freq, filter); + apply_filters(ska_array, min_freq, filter, mask_ambig); // Write out to file/stdout log::info!("Writing alignment"); @@ -102,11 +103,12 @@ pub fn apply_filters UInt<'a>>( ska_array: &mut MergeSkaArray, min_freq: f64, filter: &FilterType, + ambig_mask: bool, ) -> i32 { let update_kmers = false; let filter_threshold = f64::ceil(ska_array.nsamples() as f64 * min_freq) as usize; - log::info!("Applying filters: threshold={filter_threshold} constant_site_filter={filter}"); - ska_array.filter(filter_threshold, filter, update_kmers) + log::info!("Applying filters: threshold={filter_threshold} constant_site_filter={filter} ambig_mask={ambig_mask}"); + ska_array.filter(filter_threshold, filter, ambig_mask, update_kmers) } /// Calculate distances between samples @@ -122,12 +124,13 @@ pub fn distance UInt<'a>>( // In debug mode (cannot be set from CLI, give details) log::debug!("{ska_array}"); - let constant = apply_filters(ska_array, min_freq, &FilterType::NoConst); + let mask_ambig = false; + let constant = apply_filters(ska_array, min_freq, &FilterType::NoConst, mask_ambig); if filt_ambig || (min_freq * ska_array.nsamples() as f64 >= 1.0) { if filt_ambig { - apply_filters(ska_array, min_freq, &FilterType::NoAmbigOrConst); + apply_filters(ska_array, min_freq, &FilterType::NoAmbigOrConst, mask_ambig); } else { - apply_filters(ska_array, min_freq, &FilterType::NoFilter); + apply_filters(ska_array, min_freq, &FilterType::NoFilter, mask_ambig); } } @@ -181,6 +184,7 @@ pub fn weed UInt<'a>>( reverse: bool, min_freq: f64, filter: &FilterType, + ambig_mask: bool, out_file: &str, ) { if let Some(weed_fasta) = weed_file { @@ -189,7 +193,15 @@ pub fn weed UInt<'a>>( ska_array.kmer_len(), ska_array.rc() ); - let ska_weed = RefSka::new(ska_array.kmer_len(), weed_fasta, ska_array.rc(), false); + let repeat_mask = false; + let filter_ambig = false; + let ska_weed = RefSka::new( + ska_array.kmer_len(), + weed_fasta, + ska_array.rc(), + repeat_mask, + filter_ambig, + ); if !reverse { log::info!("Removing weed k-mers"); @@ -200,10 +212,10 @@ pub fn weed UInt<'a>>( } let filter_threshold = f64::floor(ska_array.nsamples() as f64 * min_freq) as usize; - if filter_threshold > 0 || *filter != FilterType::NoFilter { - log::info!("Applying filters: threshold={filter_threshold} constant_site_filter={filter}"); + if filter_threshold > 0 || *filter != FilterType::NoFilter || ambig_mask { + log::info!("Applying filters: threshold={filter_threshold} constant_site_filter={filter} ambig_mask={ambig_mask}"); let update_kmers = true; - ska_array.filter(filter_threshold, filter, update_kmers); + ska_array.filter(filter_threshold, filter, ambig_mask, update_kmers); } log::info!("Saving modified skf file"); diff --git a/src/io_utils.rs b/src/io_utils.rs index c06cd1e..46b284d 100644 --- a/src/io_utils.rs +++ b/src/io_utils.rs @@ -132,3 +132,15 @@ pub fn get_input_list( None => read_input_fastas(seq_files.as_ref().unwrap()), } } + +/// Checks if any input files are fastq +pub fn any_fastq(files: &[InputFastx]) -> bool { + let mut fastq = false; + for file in files { + if file.2.is_some() { + fastq = true; + break; + } + } + fastq +} diff --git a/src/lib.rs b/src/lib.rs index c0fb53d..4b7bbd2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ //! Various optimisations are used to make this as fast as possible. For a more thorough comparison with version 1.0 of SKA, see the //! [github description](https://github.com/bacpop/ska.rust/blob/master/README.md). //! -//! //! *NB As split k-mers are even lengths, it is possible that they are their +//! *NB As split k-mers are even lengths, it is possible that they are their //! own reverse complement. The original version of ska would randomly pick a strand, //! possibly introducing a SNP across samples. This version uses an ambiguous middle //! base (W for A/T; S for C/G) to represent this case.* @@ -50,6 +50,18 @@ //! to stream into another compatible program on STDIN. You can also add an output //! file prefix directly with `-o` (for `ska build` this is required). //! +//! ## Important notes +//! +//! - In this version of ska the k-mer length is the _total_ split k-mer size, +//! whereas in ska1 the k-mer length is half the split length. So the default of +//! k=15 in ska1 corresponds to choosing `-k 31` in this version. +//! - If you are using FASTQ input files (reads), these must be provided as two +//! deinterleaved files using the `-f` option to `ska build`. Providing them as +//! a single file will treat them as FASTA, and not correct sequencing errors. +//! - If you are running `ska map`, it is more memory efficient to run these one at +//! a time, rather than merging a single `.skf` file. A command for doing this +//! in parallel is listed below. +//! //! ## Common options //! //! Version can be viewed by running `ska -V`. @@ -168,6 +180,18 @@ //! ska map ref.fa seq1.fa seq2.fa -f vcf --threads 2 | bgzip -c - > seqs.vcf.gz //! ``` //! +//! ### Efficiency +//! +//! As `ska map` is independent for each input file, the most efficient way to +//! run this on a number of samples is with gnu parallel, for example: +//! ```bash +//! cat ${FILE_LIST} | parallel -j 8 "ID=\$(basename {} | sed 's/_a.*fasta/_temp_skf/g'); +//! ska build -o \$ID -k 31 {} ; +//! MAPOUT=\$(basename {} | sed 's/_a.*fasta/_aln/g'); +//! ska map $REFERENCE_LOC \${ID}.skf -o \${MAPOUT}.aln" +//! cat *.aln > ska_map.aln +//! ``` +//! //! ## ska distance //! //! Use to calculate distances between all samples within an `.skf` file. The output @@ -342,7 +366,8 @@ //! let min_count = 2; //! let update_kmers = false; //! let filter = FilterType::NoConst; -//! ska_array.filter(min_count, &filter, update_kmers); +//! let ambig_mask = false; +//! ska_array.filter(min_count, &filter, update_kmers, ambig_mask); //! //! // Write out to stdout //! let mut out_stream = set_ostream(&None); @@ -485,17 +510,18 @@ pub fn main() { output, min_freq, filter, + ambig_mask, threads, } => { check_threads(*threads); if let Ok(mut ska_array) = load_array::(input, *threads) { // In debug mode (cannot be set from CLI, give details) log::debug!("{ska_array}"); - align(&mut ska_array, output, filter, *min_freq); + align(&mut ska_array, output, filter, *ambig_mask, *min_freq); } else if let Ok(mut ska_array) = load_array::(input, *threads) { // In debug mode (cannot be set from CLI, give details) log::debug!("{ska_array}"); - align(&mut ska_array, output, filter, *min_freq); + align(&mut ska_array, output, filter, *ambig_mask, *min_freq); } else { panic!("Could not read input file(s): {input:?}"); } @@ -505,10 +531,12 @@ pub fn main() { input, output, format, + ambig_mask, repeat_mask, threads, } => { check_threads(*threads); + log::info!("Loading skf as dictionary"); if let Ok(mut ska_array) = load_array::(input, *threads) { log::info!( @@ -520,6 +548,7 @@ pub fn main() { ska_array.kmer_len(), reference, ska_array.rc(), + *ambig_mask, *repeat_mask, ); map(&mut ska_array, &mut ska_ref, output, format, *threads); @@ -533,6 +562,7 @@ pub fn main() { ska_array.kmer_len(), reference, ska_array.rc(), + *ambig_mask, *repeat_mask, ); map(&mut ska_array, &mut ska_ref, output, format, *threads); @@ -607,6 +637,7 @@ pub fn main() { weed_file, reverse, min_freq, + ambig_mask, filter, } => { log::info!("Loading skf file"); @@ -617,6 +648,7 @@ pub fn main() { *reverse, *min_freq, filter, + *ambig_mask, skf_file, ); } else if let Ok(mut ska_array) = MergeSkaArray::::load(skf_file) { @@ -626,6 +658,7 @@ pub fn main() { *reverse, *min_freq, filter, + *ambig_mask, skf_file, ); } else { diff --git a/src/merge_ska_array.rs b/src/merge_ska_array.rs index 78f2115..a23dfe5 100644 --- a/src/merge_ska_array.rs +++ b/src/merge_ska_array.rs @@ -55,8 +55,9 @@ use crate::cli::FilterType; /// // Remove constant sites and save /// let min_count = 1; // no filtering by minor allele frequency /// let filter = FilterType::NoAmbigOrConst; // remove sites with no minor allele +/// let mask_ambiguous = false; // leave ambiguous sites as IUPAC codes /// let update_counts = true; // keep counts updated, as saving -/// ska_array.filter(min_count, &filter, update_counts); +/// ska_array.filter(min_count, &filter, mask_ambiguous, update_counts); /// ska_array.save(&"no_const_sites.skf"); /// /// // Delete a sample @@ -64,7 +65,7 @@ use crate::cli::FilterType; /// /// // Delete k-mers /// let mask_repeats = false; -/// let ska_weed = RefSka::new(ska_array.kmer_len(), &"tests/test_files_in/weed.fa", ska_array.rc(), mask_repeats); +/// let ska_weed = RefSka::new(ska_array.kmer_len(), &"tests/test_files_in/weed.fa", ska_array.rc(), mask_repeats, mask_ambiguous); /// let reverse = false; /// ska_array.weed(&ska_weed, reverse); /// ``` @@ -234,11 +235,18 @@ where /// /// - `min_count` -- minimum number of samples split k-mer found in. /// - `const_sites` -- include sites where all middle bases are the same. + /// - `mask_ambig` -- replace any non-ACGTUN- with N /// - `update_kmers` -- update counts and split k-mers after removing variants. /// /// The default for `update_kmers` should be `true`, but it can be `false` /// if [`write_fasta()`] will be called immediately afterwards. - pub fn filter(&mut self, min_count: usize, filter: &FilterType, update_kmers: bool) -> i32 { + pub fn filter( + &mut self, + min_count: usize, + filter: &FilterType, + mask_ambig: bool, + update_kmers: bool, + ) -> i32 { let total = self.names.len(); let mut filtered_variants = Array2::zeros((0, total)); let mut filtered_counts = Vec::new(); @@ -265,6 +273,16 @@ where } keep } + FilterType::NoAmbig => { + let mut keep = true; + for var in row { + if is_ambiguous(*var) { + keep = false; + break; + } + } + keep + } FilterType::NoAmbigOrConst => { let mut keep = false; let mut first_var = None; @@ -300,6 +318,21 @@ where self.split_kmers = filtered_kmers; } log::info!("Filtering removed {removed} split k-mers"); + + // Replace any ambiguous variants with Ns, if requested + if mask_ambig { + let mut masked = 0; + self.variants.mapv_inplace(|v| { + if is_ambiguous(v) { + masked += 1; + b'N' + } else { + v + } + }); + log::info!("Masked {masked} ambiguous bases (non-A/C/G/T/U/N/-) with 'N'"); + } + removed } diff --git a/src/merge_ska_dict.rs b/src/merge_ska_dict.rs index 19788fe..e5b412b 100644 --- a/src/merge_ska_dict.rs +++ b/src/merge_ska_dict.rs @@ -14,6 +14,7 @@ use hashbrown::HashMap; use indicatif::ProgressIterator; use super::QualOpts; +use crate::io_utils::any_fastq; use crate::ska_dict::bit_encoding::UInt; use crate::ska_dict::SkaDict; @@ -330,7 +331,13 @@ where { // Build indexes log::info!("Building skf dicts from sequence input"); - log::info!("If FASTQ input: {qual}"); + + if any_fastq(input_files) { + log::info!("FASTQ files filtered with: {qual}"); + } else { + log::info!("All input files FASTA (no error filtering)"); + } + if threads > 1 { rayon::ThreadPoolBuilder::new() .num_threads(threads) diff --git a/src/ska_ref.rs b/src/ska_ref.rs index 9fae006..c248c01 100644 --- a/src/ska_ref.rs +++ b/src/ska_ref.rs @@ -21,7 +21,8 @@ //! //! // Index a reference sequence //! let mask_repeats = false; -//! let mut ref_kmers = RefSka::new(ska_dict.kmer_len(), &"tests/test_files_in/test_ref.fa", ska_dict.rc(), mask_repeats); +//! let mask_ambiguous = false; +//! let mut ref_kmers = RefSka::new(ska_dict.kmer_len(), &"tests/test_files_in/test_ref.fa", ska_dict.rc(), mask_repeats, mask_ambiguous); //! //! // Run mapping, output an alignment to stdout //! ref_kmers.map(&ska_dict); @@ -98,6 +99,8 @@ where k: usize, /// Concatenated list of split k-mers split_kmer_pos: Vec>, + /// Replace ambiguous bases with N + ambig_mask: bool, /// Input sequence @@ -158,7 +161,7 @@ where /// - File doesn't exist or can't be opened. /// - File cannot be parsed as FASTA (FASTQ is not supported). /// - If there are no valid split k-mers. - pub fn new(k: usize, filename: &str, rc: bool, repeat_mask: bool) -> Self { + pub fn new(k: usize, filename: &str, rc: bool, ambig_mask: bool, repeat_mask: bool) -> Self { if !(5..=63).contains(&k) || k % 2 == 0 { panic!("Invalid k-mer length"); } @@ -267,6 +270,7 @@ where Self { k, seq, + ambig_mask, chrom_names, split_kmer_pos, repeat_coors, @@ -342,9 +346,15 @@ where if !self.is_mapped() { panic!("No split k-mers mapped to reference"); } + if self.ambig_mask { + log::info!("Masking any ambiguous bases (non-A/C/G/T/U/N/-) with 'N'"); + } let mut seq_writers = - vec![AlnWriter::new(&self.seq, self.k, &self.repeat_coors); self.mapped_names.len()]; + vec![ + AlnWriter::new(&self.seq, self.k, &self.repeat_coors, self.ambig_mask); + self.mapped_names.len() + ]; rayon::ThreadPoolBuilder::new() .num_threads(threads) .build_global() diff --git a/src/ska_ref/aln_writer.rs b/src/ska_ref/aln_writer.rs index fb94efb..7227fb4 100644 --- a/src/ska_ref/aln_writer.rs +++ b/src/ska_ref/aln_writer.rs @@ -39,6 +39,8 @@ pub struct AlnWriter<'a> { finalised: bool, /// Repeats to mask, which happens at finalisation repeat_regions: &'a Vec, + /// Whether to treat all ambiguous bases as Ns + mask_ambig: bool, /// Buffer for amibguous bases, which are written at finalisation. _ambig_out: Vec<(u8, usize)>, } @@ -46,7 +48,12 @@ pub struct AlnWriter<'a> { impl<'a> AlnWriter<'a> { /// Create a new [`AlnWriter`] taking the reference sequence mapped against /// and the k-mer size used for the mapping - pub fn new(ref_seq: &'a Vec>, k: usize, repeat_regions: &'a Vec) -> Self { + pub fn new( + ref_seq: &'a Vec>, + k: usize, + repeat_regions: &'a Vec, + mask_ambig: bool, + ) -> Self { let total_size = ref_seq.iter().map(|x| x.len()).sum(); let half_split_len = (k - 1) / 2; Self { @@ -60,6 +67,7 @@ impl<'a> AlnWriter<'a> { half_split_len, finalised: false, repeat_regions, + mask_ambig, _ambig_out: Vec::new(), } } @@ -117,12 +125,15 @@ impl<'a> AlnWriter<'a> { if mapped_chrom > self.curr_chrom { self.fill_contig(); } + // Ambiguous bases may clash with the flanks, which are copied from + // reference. Deal with these in `finalise`. + if is_ambiguous(base) { + self._ambig_out.push(( + if self.mask_ambig { b'N' } else { base }, + mapped_pos + self.chrom_offset, + )); + } if mapped_pos < self.next_pos { - // Ambiguous bases may clash with the flanks, which are copied from - // reference. Deal with these in `finalise`. - if is_ambiguous(base) { - self._ambig_out.push((base, mapped_pos + self.chrom_offset)); - } self.last_mapped = mapped_pos; } else { // Write bases between last match and this one diff --git a/tests/align.rs b/tests/align.rs index d920e8b..6228c3b 100644 --- a/tests/align.rs +++ b/tests/align.rs @@ -162,7 +162,7 @@ fn filters() { let unfilt_align_out = Command::new(cargo_bin("ska")) .current_dir(sandbox.get_wd()) .arg("align") - .arg(sandbox.file_string("merge_k9.fa", TestDir::Input)) + .arg(sandbox.file_string("merge_k9.skf", TestDir::Input)) .arg("--filter") .arg("no-filter") .arg("-v") @@ -170,9 +170,26 @@ fn filters() { .unwrap() .stdout; + let full_length = 38; // could also get 38 from ska nk let lengths = aln_length(&unfilt_align_out); for length in lengths { - assert_eq!(39, length); // could also get 39 from ska nk + assert_eq!(full_length, length); + } + + let noambig_align_out = Command::new(cargo_bin("ska")) + .current_dir(sandbox.get_wd()) + .arg("align") + .arg(sandbox.file_string("merge_k9.skf", TestDir::Input)) + .arg("--filter") + .arg("no-ambig") + .arg("-v") + .output() + .unwrap() + .stdout; + + let lengths = aln_length(&noambig_align_out); + for length in lengths { + assert_eq!(full_length - 1, length); } let const_filt_align_out = Command::new(cargo_bin("ska")) @@ -186,10 +203,10 @@ fn filters() { .unwrap() .stdout; - let mut correct_aln = HashSet::from([vec!['T', 'A'], vec!['C', 'T'], vec!['S', 'G']]); + let correct_aln = HashSet::from([vec!['T', 'A'], vec!['C', 'T'], vec!['S', 'G']]); assert_eq!(var_hash(&const_filt_align_out), correct_aln); - let no_ambig_align_out = Command::new(cargo_bin("ska")) + let no_ambig_or_const_align_out = Command::new(cargo_bin("ska")) .current_dir(sandbox.get_wd()) .arg("align") .arg(sandbox.file_string("merge_k9.skf", TestDir::Input)) @@ -200,8 +217,23 @@ fn filters() { .unwrap() .stdout; - correct_aln.remove(&vec!['S', 'G']); - assert_eq!(var_hash(&no_ambig_align_out), correct_aln); + let correct_aln = HashSet::from([vec!['T', 'A'], vec!['C', 'T']]); + assert_eq!(var_hash(&no_ambig_or_const_align_out), correct_aln); + + let ambig_filter_align_out = Command::new(cargo_bin("ska")) + .current_dir(sandbox.get_wd()) + .arg("align") + .arg(sandbox.file_string("merge_k9.skf", TestDir::Input)) + .arg("--filter") + .arg("no-const") + .arg("--ambig-mask") + .arg("-v") + .output() + .unwrap() + .stdout; + + let correct_aln = HashSet::from([vec!['T', 'A'], vec!['C', 'T'], vec!['N', 'G']]); + assert_eq!(var_hash(&ambig_filter_align_out), correct_aln); } #[test] diff --git a/tests/map.rs b/tests/map.rs index ed4ea12..339d937 100644 --- a/tests/map.rs +++ b/tests/map.rs @@ -42,6 +42,16 @@ fn map_aln() { .assert() .stdout_eq_path(sandbox.file_string("map_aln_k9.stdout", TestDir::Correct)); + Command::new(cargo_bin("ska")) + .current_dir(sandbox.get_wd()) + .arg("map") + .arg(sandbox.file_string("test_ref.fa", TestDir::Input)) + .arg(sandbox.file_string("merge_k9.skf", TestDir::Input)) + .arg("--ambig-mask") + .arg("-v") + .assert() + .stdout_eq_path(sandbox.file_string("map_aln_k9_filter.stdout", TestDir::Correct)); + Command::new(cargo_bin("ska")) .current_dir(sandbox.get_wd()) .arg("map") diff --git a/tests/skf_ops.rs b/tests/skf_ops.rs index d2f652f..674d2e1 100644 --- a/tests/skf_ops.rs +++ b/tests/skf_ops.rs @@ -202,6 +202,29 @@ fn weed() { .assert() .stdout_matches_path(sandbox.file_string("weed_nk.stdout", TestDir::Correct)); + // Masking ambig sites + Command::new("cp") + .current_dir(sandbox.get_wd()) + .arg(sandbox.file_string("merge_k9.skf", TestDir::Input)) + .arg("merge_k9.skf") + .assert() + .success(); + + Command::new(cargo_bin("ska")) + .current_dir(sandbox.get_wd()) + .arg("weed") + .arg("merge_k9.skf") + .arg("--ambig-mask") + .assert() + .success(); + + Command::new(cargo_bin("ska")) + .current_dir(sandbox.get_wd()) + .arg("nk") + .arg("merge_k9.skf") + .assert() + .stdout_matches_path(sandbox.file_string("weed_nk_k9.stdout", TestDir::Correct)); + // Keep rather than weed Command::new("cp") .current_dir(sandbox.get_wd()) diff --git a/tests/test_results_correct/map_aln_k9_filter.stdout b/tests/test_results_correct/map_aln_k9_filter.stdout new file mode 100644 index 0000000..549e2be --- /dev/null +++ b/tests/test_results_correct/map_aln_k9_filter.stdout @@ -0,0 +1,4 @@ +>test_1 +------------GATTTGAAGGCGTTTATGATATTTAGCTGTACGAGAGTNTTTTAAGAGTNTTTTGATGGTTT------------ +>test_2 +------------GATTTGAAGGCGTTTATGATATATAGCTGTACGAGAGTCTTTTAAAAGTGTTTTGATGGTTT------------ diff --git a/tests/test_results_correct/weed_nk_k9.stdout b/tests/test_results_correct/weed_nk_k9.stdout new file mode 100644 index 0000000..26093c8 --- /dev/null +++ b/tests/test_results_correct/weed_nk_k9.stdout @@ -0,0 +1,9 @@ +ska_version=[..] +k=9 +k_bits=64 +rc=true +k-mers=68 +samples=2 +sample_names=["test_1", "test_2"] +sample_kmers=[53, 53] +