diff --git a/src/bin/ddnnife.rs b/src/bin/ddnnife.rs index 35edd89..32f1c57 100644 --- a/src/bin/ddnnife.rs +++ b/src/bin/ddnnife.rs @@ -4,7 +4,7 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use clap::{ArgGroup, Parser, Subcommand}; use ddnnf_lib::ddnnf::anomalies::t_wise_sampling::save_sample_to_file; -use ddnnf_lib::parser::util::format_vec; +use ddnnf_lib::util::format_vec; use itertools::Itertools; use std::fs::File; diff --git a/src/ddnnf/anomalies/t_wise_sampling.rs b/src/ddnnf/anomalies/t_wise_sampling.rs index 6b88589..0d1239e 100644 --- a/src/ddnnf/anomalies/t_wise_sampling.rs +++ b/src/ddnnf/anomalies/t_wise_sampling.rs @@ -1,32 +1,27 @@ -pub mod covering_strategies; -pub mod data_structure; -pub mod sample_merger; -pub mod sat_wrapper; -pub mod t_iterator; +mod config_partial_iterator; +mod covering_strategies; +mod data_structure; +mod sample_merger; +mod sampling_result; +mod sat_wrapper; +mod t_iterator; +mod t_wise_sampler; -use std::cmp::min; -use std::collections::{HashMap, HashSet}; -use std::path::Path; -use std::{fmt, fs, io, iter}; - -use crate::ddnnf::anomalies::t_wise_sampling::sample_merger::{AndMerger, OrMerger}; -use crate::ddnnf::anomalies::t_wise_sampling::SamplingResult::ResultWithSample; -use rand::prelude::{SliceRandom, StdRng}; -use rand::SeedableRng; -use streaming_iterator::StreamingIterator; - -use crate::parser::util::format_vec; -use crate::{Ddnnf, NodeType::*}; - -use self::covering_strategies::cover_with_caching; use self::data_structure::Sample; use self::sample_merger::similarity_merger::SimilarityMerger; use self::sample_merger::zipping_merger::ZippingMerger; +use self::sampling_result::SamplingResult; use self::sat_wrapper::SatWrapper; -use self::t_iterator::TInteractionIter; +use self::t_wise_sampler::TWiseSampler; +use crate::util::format_vec; +use crate::Ddnnf; +use std::path::Path; +use std::{fs, io, iter}; impl Ddnnf { + /// Generates samples so that all t-wise interactions between literals are covered. pub fn sample_t_wise(&self, t: usize) -> SamplingResult { + // Setup everything needed for the sampling process. let sat_solver = SatWrapper::new(self); let and_merger = ZippingMerger { t, @@ -34,336 +29,9 @@ impl Ddnnf { ddnnf: self, }; let or_merger = SimilarityMerger { t }; - let mut rng = StdRng::seed_from_u64(42); - let mut sampler = TWiseSampler::new(self, and_merger, or_merger); - - for node_id in 0..sampler.ddnnf.nodes.len() { - let partial_sample = sampler.make_partial_sample(node_id, &mut rng); - sampler.partial_samples.insert(node_id, partial_sample); - } - - let root_id = sampler.ddnnf.nodes.len() - 1; - - let sampling_result = sampler - .partial_samples - .remove(&root_id) - .expect("Root sample does not exist!"); - - if let ResultWithSample(mut sample) = sampling_result { - sample = trim_and_resample( - root_id, - sample, - t, - self.number_of_variables as usize, - &sat_solver, - &mut rng, - ); - sampler.complete_partial_configs(&mut sample, root_id, &sat_solver); - ResultWithSample(sample) - } else { - sampling_result - } - } -} - -struct TWiseSampler<'a, A: AndMerger, O: OrMerger> { - ddnnf: &'a Ddnnf, - /// Map that holds the [SamplingResult]s for the nodes. - partial_samples: HashMap, - and_merger: A, - or_merger: O, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum SamplingResult { - /// An empty result that is *valid* (i.e., a regular sample that contains 0 configurations). - /// This is used to indicate that a subgraph evaluates to true. - Empty, - /// An empty result that is *invalid*. - /// This is used to indicate that a subgraph evaluates to false. - Void, - /// A *valid* Result that just has a regular sample, nothing special. - ResultWithSample(Sample), -} - -impl SamplingResult { - pub fn get_sample(&self) -> Option<&Sample> { - if let ResultWithSample(sample) = self { - Some(sample) - } else { - None - } - } - - pub fn len(&self) -> usize { - match self { - SamplingResult::Empty | SamplingResult::Void => 0, - ResultWithSample(sample) => sample.len(), - } - } - - pub fn is_empty(&self) -> bool { - match self { - SamplingResult::Empty | SamplingResult::Void => true, - ResultWithSample(sample) => sample.is_empty(), - } - } -} - -const EXPECT_SAMPLE: &str = "children should have a sampling result when sampling their parent"; - -impl fmt::Display for SamplingResult { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - SamplingResult::Empty | SamplingResult::Void => write!(f, ""), - ResultWithSample(sample) => write!(f, "{}", format_vec(sample.iter())), - } - } -} - -impl<'a, A: AndMerger, O: OrMerger> TWiseSampler<'a, A, O> { - /// Constructs a new sampler - fn new(ddnnf: &'a Ddnnf, and_merger: A, or_merger: O) -> Self { - Self { - ddnnf, - partial_samples: HashMap::with_capacity(ddnnf.nodes.len()), - and_merger, - or_merger, - } - } - - /// Generates a sample for the sub-graph rooted at the given node. If the node is an AND or an - /// OR node, then it is assumed that all direct children of the node already have a sample. - /// The caller has to make sure that this is the case (usually by calling this method for the - /// children first). - /// - /// # Panics - /// Panics if one child does not have a [SamplingResult] in [TWiseSampler::partial_samples]. - fn make_partial_sample(&mut self, node_id: usize, rng: &mut StdRng) -> SamplingResult { - let node = self.ddnnf.nodes.get(node_id).expect("Node does not exist!"); - match &node.ntype { - Literal { literal } => ResultWithSample(Sample::from_literal( - *literal, - self.ddnnf.number_of_variables as usize, - )), - And { children } => { - let sample = self.sample_and(node_id, self.get_child_results(children), rng); - self.remove_not_needed_samples(node_id, children); - sample - } - Or { children } => { - let sample = self.sample_or(node_id, self.get_child_results(children), rng); - self.remove_not_needed_samples(node_id, children); - sample - } - True => SamplingResult::Empty, - False => SamplingResult::Void, - } - } - - /// Remove samples that are no longer needed to reduce memory usage. A sample is no - /// longer needed if all it's parent nodes have a sample. - fn remove_not_needed_samples(&mut self, node_id: usize, children: &[usize]) { - for child in children { - let node = self.ddnnf.nodes.get(*child).expect(EXPECT_SAMPLE); - if node.parents.iter().all(|parent| *parent <= node_id) { - // delete no longer needed sample - self.partial_samples.remove(child); - } - } - } - - /// Returns a vec with references to [SamplingResult]s for the given children - fn get_child_results(&self, children: &[usize]) -> Vec<&SamplingResult> { - children - .iter() - .map(|child| self.partial_samples.get(child).expect(EXPECT_SAMPLE)) - .collect() + TWiseSampler::new(self, and_merger, or_merger).sample(t) } - - /// Merges the samples of the given children by assuming they are the children of a - /// AND node. - fn sample_and( - &self, - node_id: usize, - child_results: Vec<&SamplingResult>, - rng: &mut StdRng, - ) -> SamplingResult { - if child_results - .iter() - .any(|result| matches!(result, SamplingResult::Void)) - { - return SamplingResult::Void; - } - - let child_samples: Vec<&Sample> = child_results - .into_iter() - .filter_map(SamplingResult::get_sample) - .collect(); - - let sample = self.and_merger.merge_all(node_id, &child_samples, rng); - if sample.is_empty() { - SamplingResult::Empty - } else { - ResultWithSample(sample) - } - } - - /// Merges the samples of the given children by assuming they are the children of a - /// OR node. - fn sample_or( - &self, - node_id: usize, - child_results: Vec<&SamplingResult>, - rng: &mut StdRng, - ) -> SamplingResult { - if child_results - .iter() - .all(|result| matches!(result, SamplingResult::Void)) - { - return SamplingResult::Void; - } - - let child_samples: Vec<&Sample> = child_results - .into_iter() - .filter_map(SamplingResult::get_sample) - .collect(); - - let sample = self.or_merger.merge_all(node_id, &child_samples, rng); - if sample.is_empty() { - SamplingResult::Empty - } else { - ResultWithSample(sample) - } - } - - fn complete_partial_configs(&self, sample: &mut Sample, root: usize, sat_solver: &SatWrapper) { - let vars: Vec = (1..=self.ddnnf.number_of_variables as i32).collect(); - for config in sample.partial_configs.iter_mut() { - for &var in vars.iter() { - if config.contains(var) || config.contains(-var) { - continue; - } - - config.update_sat_state(sat_solver, root); - - // clone sat state so that we don't change the state that is cached in the config - let mut sat_state = config - .get_sat_state() - .cloned() - .expect("sat state should exist after calling update_sat_state()"); - - if sat_solver.is_sat_cached(&[var], &mut sat_state) { - config.add(var); - } else { - config.add(-var); - } - } - } - - debug_assert!(sample - .iter() - .all(|config| !config.get_literals().contains(&0))); - } -} - -#[inline] -fn trim_and_resample( - node_id: usize, - sample: Sample, - t: usize, - number_of_variables: usize, - sat_solver: &SatWrapper, - rng: &mut StdRng, -) -> Sample { - if sample.is_empty() { - return sample; - } - - let t = min(sample.get_vars().len(), t); - let (ranks, avg_rank) = calc_stats(&sample, t); - - let (mut new_sample, literals_to_resample) = trim_sample(&sample, &ranks, avg_rank); - - let mut literals_to_resample: Vec = literals_to_resample.into_iter().collect(); - literals_to_resample.sort_unstable(); - literals_to_resample.shuffle(rng); - - let mut iter = TInteractionIter::new(&literals_to_resample, t); - while let Some(interaction) = iter.next() { - cover_with_caching( - &mut new_sample, - interaction, - sat_solver, - node_id, - number_of_variables, - ); - } - - if new_sample.len() < sample.len() { - new_sample - } else { - sample - } -} - -#[inline] -fn trim_sample(sample: &Sample, ranks: &[f64], avg_rank: f64) -> (Sample, HashSet) { - let mut literals_to_resample: HashSet = HashSet::new(); - let mut new_sample = Sample::new_from_samples(&[sample]); - let complete_len = sample.complete_configs.len(); - - for (index, config) in sample.iter().enumerate() { - if ranks[index] < avg_rank { - literals_to_resample.extend(config.get_decided_literals()); - } else if index < complete_len { - new_sample.add_complete(config.clone()); - } else { - new_sample.add_partial(config.clone()); - } - } - (new_sample, literals_to_resample) -} - -#[inline] -fn calc_stats(sample: &Sample, t: usize) -> (Vec, f64) { - let mut unique_coverage = vec![0; sample.len()]; - let mut iter = TInteractionIter::new(sample.get_literals(), t); - while let Some(interaction) = iter.next() { - if let Some(conf_index) = find_unique_covering_conf(sample, interaction) { - unique_coverage[conf_index] += 1; - } - } - - let mut ranks = vec![0.0; sample.len()]; - let mut sum: f64 = 0.0; - - for (index, config) in sample.iter().enumerate() { - let config_size = config.get_decided_literals().count(); - ranks[index] = unique_coverage[index] as f64 / config_size.pow(t as u32) as f64; - sum += ranks[index]; - } - - let avg_rank = sum / sample.len() as f64; - (ranks, avg_rank) -} - -#[inline] -fn find_unique_covering_conf(sample: &Sample, interaction: &[i32]) -> Option { - let mut result = None; - - for (index, config) in sample.iter().enumerate() { - if config.covers(interaction) { - if result.is_none() { - result = Some(index); - } else { - return None; - } - } - } - - result } pub fn save_sample_to_file(sampling_result: &SamplingResult, file_path: &str) -> io::Result<()> { @@ -383,7 +51,7 @@ pub fn save_sample_to_file(sampling_result: &SamplingResult, file_path: &str) -> */ SamplingResult::Empty => wtr.write_record(iter::once("true"))?, SamplingResult::Void => wtr.write_record(iter::once("false"))?, - ResultWithSample(sample) => { + SamplingResult::ResultWithSample(sample) => { for (index, config) in sample.iter().enumerate() { wtr.write_record([index.to_string(), format_vec(config.get_literals().iter())])?; } @@ -413,7 +81,7 @@ mod test { fn check_validity_samplingresult(ddnnf: &mut Ddnnf, t: usize) { let t_wise_samples = ddnnf.sample_t_wise(t); let configs = t_wise_samples - .get_sample() + .optional() .unwrap() .iter() .map(|config| config.get_literals()) diff --git a/src/ddnnf/anomalies/t_wise_sampling/config_partial_iterator.rs b/src/ddnnf/anomalies/t_wise_sampling/config_partial_iterator.rs new file mode 100644 index 0000000..54ecd38 --- /dev/null +++ b/src/ddnnf/anomalies/t_wise_sampling/config_partial_iterator.rs @@ -0,0 +1,80 @@ +use crate::ddnnf::anomalies::t_wise_sampling::data_structure::Config; + +/// An iterator over all possible partial configurations of given length of a configuration. +#[derive(Debug)] +pub struct ConfigPartialsIterator { + /// The length of partial configurations to return. + length: usize, + /// All available literals of the corresponding config. + literals: Vec, + /// Indices of the next interaction. + indices: Vec, + /// The current index increasing. + current_index: usize, +} + +impl ConfigPartialsIterator { + /// Creates a new iterator for the given config and length. + pub fn new(config: Config, length: usize) -> Self { + let literals = config.get_decided_literals().collect(); + + Self { + length, + literals, + indices: vec![0; length], + current_index: 0, + } + } + + /// Steps the indices vector further. + fn step(&mut self) { + // Move to the next index in case the current is complete. + if self.indices[self.current_index] == self.literals.len() - 1 { + self.current_index += 1; + self.step(); + return; + } + + // Otherwise simply increase the current. + self.indices[self.current_index] += 1; + } + + /// Whether the current indices contain duplicate literals. + fn has_duplicates(&self) -> bool { + self.indices + .iter() + .enumerate() + .any(|(n, index)| self.indices.iter().skip(n + 1).any(|other| index == other)) + } + + /// Whether this iterator has reached the end. + fn is_max(&self) -> bool { + self.indices + .iter() + .all(|&index| index == self.literals.len() - 1) + } +} + +impl Iterator for ConfigPartialsIterator { + type Item = Vec; + + /// The next interaction is retrieved by stepping the indices vector forward. + fn next(&mut self) -> Option { + self.step(); + + while self.has_duplicates() { + self.step(); + + if self.is_max() { + return None; + } + } + + let next = self + .indices + .iter() + .map(|&index| self.literals[index]) + .collect(); + Some(next) + } +} diff --git a/src/ddnnf/anomalies/t_wise_sampling/covering_strategies.rs b/src/ddnnf/anomalies/t_wise_sampling/covering_strategies.rs index 4813006..fa1819f 100644 --- a/src/ddnnf/anomalies/t_wise_sampling/covering_strategies.rs +++ b/src/ddnnf/anomalies/t_wise_sampling/covering_strategies.rs @@ -10,14 +10,73 @@ pub(super) fn cover_with_caching( number_of_vars: usize, ) { debug_assert!(!interaction.iter().any(|x| *x == 0)); + if sample.covers(interaction) { return; // already covered } + let mut interaction_sat_state = sat_solver.new_state(); + if !sat_solver.is_sat_in_subgraph_cached(interaction, node_id, &mut interaction_sat_state) { return; // interaction invalid } + let mut found = cover(sample, interaction, sat_solver, node_id); + + if let Some(index) = found { + // move config to the complete configs if it is complete now + let config = sample.partial_configs.get(index).expect(""); + if sample.is_config_complete(config) { + let config = sample.partial_configs.swap_remove(index); + sample.add_complete(config); + } + } else { + // no config found - create new config + let mut config = Config::from(interaction, number_of_vars); + config.set_sat_state(interaction_sat_state); + sample.add(config); + } +} + +/// Covering strategy that uses the sat state caching. +pub(super) fn cover_with_caching_twise( + sample: &mut Sample, + interaction: &[i32], + sat_solver: &SatWrapper, + node_id: usize, + number_of_vars: usize, +) { + debug_assert!(!interaction.iter().any(|x| *x == 0)); + + if sample.covers(interaction) { + return; // already covered + } + + let mut found = cover(sample, interaction, sat_solver, node_id); + + if let Some(index) = found { + // move config to the complete configs if it is complete now + let config = sample.partial_configs.get(index).expect(""); + if sample.is_config_complete(config) { + let config = sample.partial_configs.swap_remove(index); + sample.add_complete(config); + } + } else { + // no config found - create new config + let mut interaction_sat_state = sat_solver.new_state(); + sat_solver.is_sat_in_subgraph_cached(interaction, node_id, &mut interaction_sat_state); + let mut config = Config::from(interaction, number_of_vars); + config.set_sat_state(interaction_sat_state); + sample.add(config); + } +} + +fn cover( + sample: &mut Sample, + interaction: &[i32], + sat_solver: &SatWrapper, + node_id: usize, +) -> Option { let mut found = None; for (index, config) in sample.partial_configs.iter_mut().enumerate() { if config.conflicts_with(interaction) { @@ -41,17 +100,5 @@ pub(super) fn cover_with_caching( } } - if let Some(index) = found { - // move config to the complete configs if it is complete now - let config = sample.partial_configs.get(index).expect(""); - if sample.is_config_complete(config) { - let config = sample.partial_configs.swap_remove(index); - sample.add_complete(config); - } - } else { - // no config found - create new config - let mut config = Config::from(interaction, number_of_vars); - config.set_sat_state(interaction_sat_state); - sample.add(config); - } + found } diff --git a/src/ddnnf/anomalies/t_wise_sampling/data_structure.rs b/src/ddnnf/anomalies/t_wise_sampling/data_structure.rs index 5dca91f..b6e5632 100644 --- a/src/ddnnf/anomalies/t_wise_sampling/data_structure.rs +++ b/src/ddnnf/anomalies/t_wise_sampling/data_structure.rs @@ -1,5 +1,5 @@ use crate::ddnnf::anomalies::t_wise_sampling::sat_wrapper::SatWrapper; -use crate::parser::util::format_vec; +use crate::util::format_vec; use std::cmp::Ordering; use std::collections::HashSet; use std::fmt::Display; @@ -9,7 +9,7 @@ use std::iter; #[derive(Debug, Clone, Eq)] pub struct Config { /// A vector of selected features (positive values) and deselected features (negative values) - literals: Vec, + pub literals: Vec, pub sat_state: Option>, sat_state_complete: bool, } diff --git a/src/ddnnf/anomalies/t_wise_sampling/sample_merger.rs b/src/ddnnf/anomalies/t_wise_sampling/sample_merger.rs index 0719d16..709c6a4 100644 --- a/src/ddnnf/anomalies/t_wise_sampling/sample_merger.rs +++ b/src/ddnnf/anomalies/t_wise_sampling/sample_merger.rs @@ -1,6 +1,6 @@ use crate::ddnnf::anomalies::t_wise_sampling::data_structure::{Config, Sample}; +use crate::ddnnf::anomalies::t_wise_sampling::SamplingResult; use crate::Ddnnf; -use rand::prelude::StdRng; pub mod similarity_merger; pub mod zipping_merger; @@ -8,31 +8,28 @@ pub mod zipping_merger; pub(super) trait SampleMerger { /// Creates a new sample by merging two samples. /// The merging follows the behaviour defined by the merger. - fn merge(&self, node_id: usize, left: &Sample, right: &Sample, rng: &mut StdRng) -> Sample; + fn merge(&self, node_id: usize, left: &Sample, right: &Sample) -> Sample; /// Creates a new sample by merging two samples. /// The merging follows the behaviour defined by the merger. /// /// This method only works in place if the used merger actually overrides this method. /// The default implementation calls [Self::merge()] and is therefore not in place. - fn merge_in_place( - &self, - node_id: usize, - left: Sample, - right: &Sample, - rng: &mut StdRng, - ) -> Sample { - self.merge(node_id, &left, right, rng) + fn merge_in_place(&self, node_id: usize, left: Sample, right: &Sample) -> Sample { + self.merge(node_id, &left, right) } /// Creates a new sample by merging all given samples. /// The merging follows the behaviour defined by the merger. /// Returns [Sample::empty] if the given slice is empty. - fn merge_all(&self, node_id: usize, samples: &[&Sample], rng: &mut StdRng) -> Sample { + fn merge_all(&self, node_id: usize, samples: &[&Sample]) -> Sample { samples.iter().fold(Sample::default(), |acc, &sample| { - self.merge_in_place(node_id, acc, sample, rng) + self.merge_in_place(node_id, acc, sample) }) } + + /// Determines whether a set of results short-circuits to a void sample under the assumptions of this sampler. + fn is_void(&self, samples: &[&SamplingResult]) -> bool; } /// This is a marker trait that indicates that a [SampleMerger] is for merging the samples @@ -50,7 +47,7 @@ pub(super) struct DummyAndMerger<'a> { } impl SampleMerger for DummyAndMerger<'_> { - fn merge(&self, _node_id: usize, left: &Sample, right: &Sample, _rng: &mut StdRng) -> Sample { + fn merge(&self, _node_id: usize, left: &Sample, right: &Sample) -> Sample { if left.is_empty() { return right.clone(); } else if right.is_empty() { @@ -72,6 +69,10 @@ impl SampleMerger for DummyAndMerger<'_> { sample } + + fn is_void(&self, _samples: &[&SamplingResult]) -> bool { + false + } } impl AndMerger for DummyAndMerger<'_> {} @@ -81,7 +82,7 @@ impl AndMerger for DummyAndMerger<'_> {} pub(super) struct DummyOrMerger {} impl SampleMerger for DummyOrMerger { - fn merge(&self, _node_id: usize, left: &Sample, right: &Sample, _rng: &mut StdRng) -> Sample { + fn merge(&self, _node_id: usize, left: &Sample, right: &Sample) -> Sample { if left.is_empty() { return right.clone(); } else if right.is_empty() { @@ -101,6 +102,10 @@ impl SampleMerger for DummyOrMerger { sample } + + fn is_void(&self, _samples: &[&SamplingResult]) -> bool { + false + } } impl OrMerger for DummyOrMerger {} diff --git a/src/ddnnf/anomalies/t_wise_sampling/sample_merger/similarity_merger.rs b/src/ddnnf/anomalies/t_wise_sampling/sample_merger/similarity_merger.rs index 364d536..f0b6d4f 100644 --- a/src/ddnnf/anomalies/t_wise_sampling/sample_merger/similarity_merger.rs +++ b/src/ddnnf/anomalies/t_wise_sampling/sample_merger/similarity_merger.rs @@ -1,11 +1,11 @@ use crate::ddnnf::anomalies::t_wise_sampling::data_structure::{Config, Sample}; use crate::ddnnf::anomalies::t_wise_sampling::sample_merger::{OrMerger, SampleMerger}; use crate::ddnnf::anomalies::t_wise_sampling::t_iterator::TInteractionIter; +use crate::ddnnf::anomalies::t_wise_sampling::SamplingResult; +use crate::util::rng; +use rand::prelude::SliceRandom; use std::cmp::{min, Ordering}; - -use rand::prelude::{SliceRandom, StdRng}; use std::collections::HashSet; - use streaming_iterator::StreamingIterator; #[derive(Debug, Copy, Clone)] @@ -17,13 +17,7 @@ pub struct SimilarityMerger { impl OrMerger for SimilarityMerger {} impl SampleMerger for SimilarityMerger { - fn merge<'a>( - &self, - _node_id: usize, - left: &Sample, - right: &Sample, - rng: &mut StdRng, - ) -> Sample { + fn merge<'a>(&self, _node_id: usize, left: &Sample, right: &Sample) -> Sample { if left.is_empty() { return right.clone(); } else if right.is_empty() { @@ -54,7 +48,7 @@ impl SampleMerger for SimilarityMerger { .map(|(index, _)| index) { let next = candidates.swap_remove(next); - if next.is_t_wise_covered_by(&new_sample, self.t, rng) { + if next.is_t_wise_covered_by(&new_sample, self.t) { continue; } @@ -64,6 +58,13 @@ impl SampleMerger for SimilarityMerger { } new_sample } + + // For an or node, all samples have to be void for the resulting sample to also be void. + fn is_void(&self, samples: &[&SamplingResult]) -> bool { + samples + .iter() + .all(|result| matches!(result, SamplingResult::Void)) + } } fn snd<'a>((_, candidate): &(usize, &'a Candidate<'_>)) -> &'a Candidate<'a> { @@ -121,7 +122,7 @@ impl<'a> Candidate<'a> { } } - fn is_t_wise_covered_by(&self, sample: &Sample, t: usize, rng: &mut StdRng) -> bool { + fn is_t_wise_covered_by(&self, sample: &Sample, t: usize) -> bool { if self.max_intersect == self.literals.len() { return true; } @@ -139,7 +140,7 @@ impl<'a> Candidate<'a> { } let mut literals: Vec = self.config.get_decided_literals().collect(); - literals.shuffle(rng); + literals.shuffle(&mut rng()); debug_assert!(!literals.contains(&0)); TInteractionIter::new(&literals, min(t, literals.len())) @@ -150,16 +151,14 @@ impl<'a> Candidate<'a> { #[cfg(test)] mod test { use super::*; - use rand::SeedableRng; #[test] fn test_similarity_merger() { let merger = SimilarityMerger { t: 2 }; - let mut rng = StdRng::seed_from_u64(42); let left = Sample::new_from_configs(vec![Config::from(&[1], 1)]); let right = Sample::new_from_configs(vec![Config::from(&[1], 1)]); - let merged = merger.merge(0, &left, &right, &mut rng); + let merged = merger.merge(0, &left, &right); assert_eq!( merged, Sample::new_from_configs(vec![Config::from(&[1], 1)]) @@ -171,7 +170,6 @@ mod test { let number_of_variables = 4; let candidate_config = Config::from(&[1, 2, 3, 4], number_of_variables); let mut candidate = Candidate::new(&candidate_config); - let mut rng = StdRng::seed_from_u64(42); let sample = Sample::new_from_configs(vec![ Config::from(&[1, 2, 3], number_of_variables), @@ -184,7 +182,7 @@ mod test { .iter() .for_each(|c| candidate.update(&c.get_decided_literals().collect())); - assert!(candidate.is_t_wise_covered_by(&sample, 2, &mut rng)); + assert!(candidate.is_t_wise_covered_by(&sample, 2)); let mut candidate = Candidate::new(&candidate_config); let sample = Sample::new_from_configs(vec![ @@ -197,6 +195,6 @@ mod test { .iter() .for_each(|c| candidate.update(&c.get_decided_literals().collect())); - assert!(!candidate.is_t_wise_covered_by(&sample, 2, &mut rng)); + assert!(!candidate.is_t_wise_covered_by(&sample, 2)); } } diff --git a/src/ddnnf/anomalies/t_wise_sampling/sample_merger/zipping_merger.rs b/src/ddnnf/anomalies/t_wise_sampling/sample_merger/zipping_merger.rs index 3737836..2bed746 100644 --- a/src/ddnnf/anomalies/t_wise_sampling/sample_merger/zipping_merger.rs +++ b/src/ddnnf/anomalies/t_wise_sampling/sample_merger/zipping_merger.rs @@ -1,13 +1,15 @@ -use crate::ddnnf::anomalies::t_wise_sampling::covering_strategies::cover_with_caching; +use crate::ddnnf::anomalies::t_wise_sampling::covering_strategies::{ + cover_with_caching, cover_with_caching_twise, +}; use crate::ddnnf::anomalies::t_wise_sampling::data_structure::{Config, Sample}; use crate::ddnnf::anomalies::t_wise_sampling::sample_merger::{AndMerger, SampleMerger}; use crate::ddnnf::anomalies::t_wise_sampling::sat_wrapper::SatWrapper; use crate::ddnnf::anomalies::t_wise_sampling::t_iterator::TInteractionIter; +use crate::ddnnf::anomalies::t_wise_sampling::SamplingResult; +use crate::util::rng; use crate::Ddnnf; +use rand::prelude::{IteratorRandom, SliceRandom}; use std::cmp::min; - -use rand::prelude::{SliceRandom, StdRng}; - use streaming_iterator::StreamingIterator; #[derive(Debug, Clone)] @@ -21,7 +23,8 @@ pub struct ZippingMerger<'a> { impl AndMerger for ZippingMerger<'_> {} impl SampleMerger for ZippingMerger<'_> { - fn merge(&self, node_id: usize, left: &Sample, right: &Sample, rng: &mut StdRng) -> Sample { + fn merge(&self, node_id: usize, left: &Sample, right: &Sample) -> Sample { + // eprintln!("[zip merge] {:?} + {:?}", left, right); if left.is_empty() { return right.clone(); } else if right.is_empty() { @@ -34,55 +37,61 @@ impl SampleMerger for ZippingMerger<'_> { Iterate over the remaining interactions. Those are all interactions that contain at least one literal of the left and one of the right subgraph. */ - let mut left_literals: Vec = left.get_literals().to_vec(); - let mut right_literals: Vec = right.get_literals().to_vec(); - left_literals.shuffle(rng); - right_literals.shuffle(rng); - - debug_assert!(!left_literals.iter().any(|x| *x == 0)); - debug_assert!(!right_literals.iter().any(|x| *x == 0)); - - for k in 1..self.t { - // take k literals of the left subgraph and t-k literals of the right subgraph - let left_len = min(left_literals.len(), k); - let right_len = min(right_literals.len(), self.t - k); - //let left_iter = t_wise_over(left_literals, left_len); - let mut left_iter = TInteractionIter::new(&left_literals, left_len); - - while let Some(left_part) = left_iter.next() { - //let right_iter = t_wise_over(right_literals, right_len); - let mut right_iter = TInteractionIter::new(&right_literals, right_len); - while let Some(right_part) = right_iter.next() { - let mut interaction = right_part.to_vec(); - interaction.extend_from_slice(left_part); - cover_with_caching( - &mut sample, - &interaction, - self.sat_solver, - node_id, - self.ddnnf.number_of_variables as usize, - ); - } - } - } + + let right_configs: Vec> = right + .iter() + .map(|config| config.get_decided_literals().collect::>()) + .collect(); + + left.iter() + .map(|config| config.get_decided_literals().collect::>()) + .for_each(|left| { + (1..self.t).into_iter().for_each(|k| { + let left_len = min(left.len(), k); + TInteractionIter::new(&left, left_len).for_each(|left| { + right_configs.iter().for_each(|right| { + let right_len = min(right.len(), self.t - left_len); + TInteractionIter::new(&right, right_len).for_each(|right| { + let mut interaction = right.to_vec(); + interaction.extend_from_slice(left); + cover_with_caching( + &mut sample, + &interaction, + self.sat_solver, + node_id, + self.ddnnf.number_of_variables as usize, + ); + }); + }); + }); + }); + }); + sample } - fn merge_all(&self, node_id: usize, samples: &[&Sample], rng: &mut StdRng) -> Sample { + fn merge_all(&self, node_id: usize, samples: &[&Sample]) -> Sample { let (singles, mut samples): (Vec<&Sample>, Vec<&Sample>) = samples.iter().partition(|sample| sample.len() <= 1); let single = singles.iter().fold(Sample::default(), |acc, s| { - self.merge_in_place(node_id, acc, s, rng) + self.merge_in_place(node_id, acc, s) }); samples.push(&single); samples.sort_unstable(); samples.iter().fold(Sample::default(), |acc, s| { - self.merge_in_place(node_id, acc, s, rng) + self.merge_in_place(node_id, acc, s) }) } + + // For an and node, a single void sample will render the resulting sample void. + fn is_void(&self, samples: &[&SamplingResult]) -> bool { + samples + .iter() + .any(|result| matches!(result, SamplingResult::Void)) + } } impl ZippingMerger<'_> { @@ -123,7 +132,6 @@ mod test { use crate::parser::build_ddnnf; use super::*; - use rand::SeedableRng; use std::collections::HashSet; #[test] @@ -167,12 +175,11 @@ mod test { ddnnf: &ddnnf, }; - let mut rng = StdRng::seed_from_u64(42); let mut left_sample = new_with_literals(HashSet::from([2, 3]), vec![-2, 3]); left_sample.add_partial(Config::from(&[3], 4)); let right_sample = Sample::new_from_configs(vec![Config::from(&[1, 4], 4)]); - let result = zipping_merger.merge(node, &left_sample, &right_sample, &mut rng); + let result = zipping_merger.merge(node, &left_sample, &right_sample); let expected = Sample::new_from_configs(vec![Config::from(&[-2, 1, 3, 4], 4)]); assert_eq!(result, expected); } diff --git a/src/ddnnf/anomalies/t_wise_sampling/sampling_result.rs b/src/ddnnf/anomalies/t_wise_sampling/sampling_result.rs new file mode 100644 index 0000000..4e524d3 --- /dev/null +++ b/src/ddnnf/anomalies/t_wise_sampling/sampling_result.rs @@ -0,0 +1,62 @@ +use super::Sample; +use crate::ddnnf::anomalies::t_wise_sampling::sat_wrapper::SatWrapper; +use crate::util::format_vec; +use std::fmt; + +/// An abstraction over the result of sampling as it might be invalid or empty. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SamplingResult { + /// An empty result that is *valid* (a regular sample containing 0 configurations). + /// This is used to indicate that a subgraph evaluates to true. + Empty, + /// An empty result that is *invalid*. + /// This is used to indicate that a subgraph evaluates to false. + Void, + /// A *valid* result having a regular sample. + ResultWithSample(Sample), +} + +impl SamplingResult { + /// Converts a sampling result into an optional sample. + pub fn optional(&self) -> Option<&Sample> { + match self { + SamplingResult::ResultWithSample(sample) => Some(sample), + _ => None, + } + } + + /// Determines how many configuration the sample contains. + pub fn len(&self) -> usize { + match self { + SamplingResult::Empty | SamplingResult::Void => 0, + SamplingResult::ResultWithSample(sample) => sample.len(), + } + } + + /// Determines whether the sample contains no configurations. + pub fn is_empty(&self) -> bool { + match self { + SamplingResult::Empty | SamplingResult::Void => true, + SamplingResult::ResultWithSample(sample) => sample.is_empty(), + } + } +} + +impl From for SamplingResult { + fn from(value: Sample) -> Self { + if value.is_empty() { + return SamplingResult::Empty; + } + + SamplingResult::ResultWithSample(value) + } +} + +impl fmt::Display for SamplingResult { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SamplingResult::Empty | SamplingResult::Void => write!(f, ""), + SamplingResult::ResultWithSample(sample) => write!(f, "{}", format_vec(sample.iter())), + } + } +} diff --git a/src/ddnnf/anomalies/t_wise_sampling/t_wise_sampler.rs b/src/ddnnf/anomalies/t_wise_sampling/t_wise_sampler.rs new file mode 100644 index 0000000..027cb2e --- /dev/null +++ b/src/ddnnf/anomalies/t_wise_sampling/t_wise_sampler.rs @@ -0,0 +1,277 @@ +use crate::ddnnf::anomalies::t_wise_sampling::covering_strategies::cover_with_caching; +use crate::ddnnf::anomalies::t_wise_sampling::data_structure::Sample; +use crate::ddnnf::anomalies::t_wise_sampling::sample_merger::{AndMerger, OrMerger, SampleMerger}; +use crate::ddnnf::anomalies::t_wise_sampling::sampling_result::SamplingResult; +use crate::ddnnf::anomalies::t_wise_sampling::sat_wrapper::SatWrapper; +use crate::ddnnf::anomalies::t_wise_sampling::t_iterator::TInteractionIter; +use crate::util::rng; +use crate::Ddnnf; +use crate::NodeType; +use rand::prelude::SliceRandom; +use std::cmp::min; +use std::collections::{HashMap, HashSet}; +use streaming_iterator::StreamingIterator; + +pub struct TWiseSampler<'a, A: AndMerger, O: OrMerger> { + /// The d-DNNF to sample. + ddnnf: &'a Ddnnf, + /// Map that holds the [SamplingResult]s for the nodes. + partial_samples: HashMap, + /// The merger for and nodes. + and_merger: A, + /// The merger for or nodes. + or_merger: O, +} + +impl<'a, A: AndMerger, O: OrMerger> TWiseSampler<'a, A, O> { + /// Constructs a new sampler. + pub fn new(ddnnf: &'a Ddnnf, and_merger: A, or_merger: O) -> Self { + Self { + ddnnf, + partial_samples: HashMap::with_capacity(ddnnf.nodes.len()), + and_merger, + or_merger, + } + } + + pub fn sample(&mut self, t: usize) -> SamplingResult { + let sat_solver = SatWrapper::new(self.ddnnf); + + // Sample each node and keep the result as a partial sample. + for node_id in 0..self.ddnnf.nodes.len() { + let partial_sample = self.partial_sample(node_id); + self.partial_samples.insert(node_id, partial_sample); + } + + let root_id = self.ddnnf.nodes.len() - 1; + + // Extract the resulting (root node) sample for further processing. + let result = self + .partial_samples + .remove(&root_id) + .expect("Root sample does not exist!"); + + // Trim and resample as the finishing step (if there is anything to do). + if let SamplingResult::ResultWithSample(mut sample) = result { + sample = trim_and_resample( + root_id, + sample, + t, + self.ddnnf.number_of_variables as usize, + &sat_solver, + ); + + self.complete_partial_configs(&mut sample, root_id, &sat_solver); + return sample.into(); + } + + result + } + + /// Generates a sample for the sub-graph rooted at the given node. + /// + /// If the node is an and or an or node, then it is assumed that all direct children of the node already have a sample. + /// The caller has to make sure that this is the case (usually by calling this method for the children first). + /// + /// # Panics + /// Panics if one child does not have a [SamplingResult] in [TWiseSampler::partial_samples]. + fn partial_sample(&mut self, node_id: usize) -> SamplingResult { + let node = self.ddnnf.nodes.get(node_id).expect("Node does not exist!"); + + match &node.ntype { + NodeType::Literal { literal } => SamplingResult::ResultWithSample( + Sample::from_literal(*literal, self.ddnnf.number_of_variables as usize), + ), + NodeType::And { children } => { + let sample = self.sample_node(&self.and_merger, node_id, children); + self.remove_unneeded(node_id, children); + sample + } + NodeType::Or { children } => { + let sample = self.sample_node(&self.or_merger, node_id, children); + self.remove_unneeded(node_id, children); + sample + } + NodeType::True => SamplingResult::Empty, + NodeType::False => SamplingResult::Void, + } + } + + /// Merges the samples of the given children by using the specified sampler. + fn sample_node( + &self, + sampler: &M, + id: usize, + children: &[usize], + ) -> SamplingResult { + // Get the samples of all child nodes. + let children: Vec<&SamplingResult> = children + .iter() + .map(|child| { + self.partial_samples + .get(child) + .expect("Samples of child node not present!") + }) + .collect(); + + // Check whether the set of child nodes short-circuits to a void sample. + if sampler.is_void(&children) { + return SamplingResult::Void; + } + + // Only keep samples with a result. + let samples: Vec<&Sample> = children + .iter() + .filter_map(|sample: &&SamplingResult| sample.optional()) + .collect(); + + // Merge the samples using the specified sampler. + sampler.merge_all(id, &samples).into() + } + + /// Removes samples that are no longer needed to reduce memory usage. + /// + /// A sample is no longer needed if all parent nodes have a sample. + fn remove_unneeded(&mut self, node_id: usize, children: &[usize]) { + // Of all children ... + children + .iter() + // ... find the ones which have all parents processed ... + .filter(|&&id| { + let node = self.ddnnf.nodes.get(id).expect("Node does not exist!"); + node.parents.iter().all(|&parent| parent <= node_id) + }) + // ... and remove those. + .for_each(|id| { + self.partial_samples + .remove(id) + .expect("Sample does not exist!"); + }); + } + + fn complete_partial_configs(&self, sample: &mut Sample, root: usize, sat_solver: &SatWrapper) { + let vars: Vec = (1..=self.ddnnf.number_of_variables as i32).collect(); + for config in sample.partial_configs.iter_mut() { + for &var in vars.iter() { + if config.contains(var) || config.contains(-var) { + continue; + } + + config.update_sat_state(sat_solver, root); + + // clone sat state so that we don't change the state that is cached in the config + let mut sat_state = config + .get_sat_state() + .cloned() + .expect("sat state should exist after calling update_sat_state()"); + + if sat_solver.is_sat_cached(&[var], &mut sat_state) { + config.add(var); + } else { + config.add(-var); + } + } + } + + debug_assert!(sample + .iter() + .all(|config| !config.get_literals().contains(&0))); + } +} + +#[inline] +fn trim_and_resample( + node_id: usize, + sample: Sample, + t: usize, + number_of_variables: usize, + sat_solver: &SatWrapper, +) -> Sample { + if sample.is_empty() { + return sample; + } + + let t = min(sample.get_vars().len(), t); + let (ranks, avg_rank) = calc_stats(&sample, t); + + let (mut new_sample, literals_to_resample) = trim_sample(&sample, &ranks, avg_rank); + + let mut literals_to_resample: Vec = literals_to_resample.into_iter().collect(); + literals_to_resample.sort_unstable(); + literals_to_resample.shuffle(&mut rng()); + + let mut iter = TInteractionIter::new(&literals_to_resample, t); + while let Some(interaction) = iter.next() { + cover_with_caching( + &mut new_sample, + interaction, + sat_solver, + node_id, + number_of_variables, + ); + } + + if new_sample.len() < sample.len() { + new_sample + } else { + sample + } +} + +#[inline] +fn trim_sample(sample: &Sample, ranks: &[f64], avg_rank: f64) -> (Sample, HashSet) { + let mut literals_to_resample: HashSet = HashSet::new(); + let mut new_sample = Sample::new_from_samples(&[sample]); + let complete_len = sample.complete_configs.len(); + + for (index, config) in sample.iter().enumerate() { + if ranks[index] < avg_rank { + literals_to_resample.extend(config.get_decided_literals()); + } else if index < complete_len { + new_sample.add_complete(config.clone()); + } else { + new_sample.add_partial(config.clone()); + } + } + (new_sample, literals_to_resample) +} + +#[inline] +fn calc_stats(sample: &Sample, t: usize) -> (Vec, f64) { + let mut unique_coverage = vec![0; sample.len()]; + let mut iter = TInteractionIter::new(sample.get_literals(), t); + while let Some(interaction) = iter.next() { + if let Some(conf_index) = find_unique_covering_conf(sample, interaction) { + unique_coverage[conf_index] += 1; + } + } + + let mut ranks = vec![0.0; sample.len()]; + let mut sum: f64 = 0.0; + + for (index, config) in sample.iter().enumerate() { + let config_size = config.get_decided_literals().count(); + ranks[index] = unique_coverage[index] as f64 / config_size.pow(t as u32) as f64; + sum += ranks[index]; + } + + let avg_rank = sum / sample.len() as f64; + (ranks, avg_rank) +} + +#[inline] +fn find_unique_covering_conf(sample: &Sample, interaction: &[i32]) -> Option { + let mut result = None; + + for (index, config) in sample.iter().enumerate() { + if config.covers(interaction) { + if result.is_none() { + result = Some(index); + } else { + return None; + } + } + } + + result +} diff --git a/src/ddnnf/stream.rs b/src/ddnnf/stream.rs index 851600f..c302c3d 100644 --- a/src/ddnnf/stream.rs +++ b/src/ddnnf/stream.rs @@ -19,7 +19,7 @@ use nom::IResult; use workctl::WorkQueue; use crate::parser::persisting::{write_cnf_to_file, write_ddnnf_to_file}; -use crate::{parser::util::*, Ddnnf}; +use crate::{util::*, Ddnnf}; impl Ddnnf { /// Initiate the Stream mode. This enables a commincation channel between stdin and stdout. diff --git a/src/lib.rs b/src/lib.rs index 1cd5e0f..ec649ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ extern crate test; pub mod parser; +pub mod util; pub use crate::parser::c2d_lexer; pub use crate::parser::d4_lexer; diff --git a/src/parser.rs b/src/parser.rs index c577ef5..8174ade 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -8,7 +8,6 @@ pub mod from_cnf; use from_cnf::{check_for_cnf_header, CNFToken}; pub mod persisting; -pub mod util; use core::panic; use std::{ diff --git a/src/parser/persisting.rs b/src/parser/persisting.rs index 46b28af..49860fe 100644 --- a/src/parser/persisting.rs +++ b/src/parser/persisting.rs @@ -9,7 +9,7 @@ use rug::Assign; use crate::{Ddnnf, Node, NodeType}; -use super::util::format_vec; +use crate::util::format_vec; /// Takes a CNF and writes the string representation into a file with the provided name pub(crate) fn write_cnf_to_file( diff --git a/src/parser/util.rs b/src/parser/util.rs deleted file mode 100644 index 67d54f5..0000000 --- a/src/parser/util.rs +++ /dev/null @@ -1,15 +0,0 @@ -pub fn format_vec(vals: impl Iterator) -> String { - vals.map(|v| v.to_string()) - .collect::>() - .join(" ") -} - -pub fn format_vec_vec(vals: impl Iterator) -> String -where - T: IntoIterator, - T::Item: ToString, -{ - vals.map(|res| format_vec(res.into_iter())) - .collect::>() - .join(";") -} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..7ad2519 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,35 @@ +use rand::Rng; + +#[cfg(any(feature = "deterministic", test))] +use rand::prelude::{SeedableRng, StdRng}; + +#[cfg(not(any(feature = "deterministic", test)))] +use rand::thread_rng; + +pub fn format_vec(vals: impl Iterator) -> String { + vals.map(|v| v.to_string()) + .collect::>() + .join(" ") +} + +pub fn format_vec_vec(vals: impl Iterator) -> String +where + T: IntoIterator, + T::Item: ToString, +{ + vals.map(|res| format_vec(res.into_iter())) + .collect::>() + .join(";") +} + +#[cfg(any(feature = "deterministic", test))] +#[inline] +pub fn rng() -> impl Rng { + StdRng::seed_from_u64(42) +} + +#[cfg(not(any(feature = "deterministic", test)))] +#[inline] +pub fn rng() -> impl Rng { + thread_rng() +}