Skip to content

Commit

Permalink
broken with ICE
Browse files Browse the repository at this point in the history
  • Loading branch information
frankmcsherry committed Nov 23, 2015
1 parent bead9b5 commit 2fa719e
Show file tree
Hide file tree
Showing 9 changed files with 402 additions and 320 deletions.
2 changes: 1 addition & 1 deletion examples/cc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,6 @@ where G::Timestamp: LeastUpperBound+Hash {

inner.join_map_u(&edges, |_k,l,d| (*d,*l))
.concat(&nodes)
.group_u(|_, s, t| { t.push((*s.peek().unwrap().0, 1)); } )
.group_u(|_, mut s, t| { t.push((*s.peek().unwrap().0, 1)); } )
})
}
6 changes: 3 additions & 3 deletions examples/scc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use timely::dataflow::*;
use timely::dataflow::operators::*;

use differential_dataflow::operators::*;
use differential_dataflow::operators::group::{GroupUnsigned, GroupBy};
use differential_dataflow::operators::group::{GroupUnsigned};
use differential_dataflow::operators::join::{JoinUnsigned};
use differential_dataflow::collection::LeastUpperBound;

Expand Down Expand Up @@ -90,9 +90,9 @@ fn _trim_and_flip<G: Scope>(graph: &Stream<G, (Edge, i32)>) -> Stream<G, (Edge,
where G::Timestamp: LeastUpperBound {
graph.iterate(|edges| {
let inner = edges.scope().enter(&graph).map_in_place(|x| x.0 = ((x.0).1, (x.0).0));
edges.map(|((x,_),w)| (x,w))
edges.map(|((x,_),w)| ((x,()),w))
// .threshold(|&x| x, |i| (Vec::new(), i), |_, w| if w > 0 { 1 } else { 0 })
.group_by_u(|x|(x,()), |&x,_| (x, ()), |_,_,target| target.push(((),1)))
.group_u(|_,_,target| target.push(((),1)))
.join_map_u(&inner, |&d,_,&s| (s,d))
})
.map_in_place(|x| x.0 = ((x.0).1, (x.0).0))
Expand Down
153 changes: 76 additions & 77 deletions src/collection/count.rs
Original file line number Diff line number Diff line change
@@ -1,43 +1,17 @@
//! Like `Count` but with the value type specialized to `()`.

use std::fmt::Debug;

use collection::{close_under_lub, LeastUpperBound, Lookup};
use ::Data;
use collection::{LeastUpperBound, Lookup};
use collection::compact::Compact;
use collection::trace::{Traceable, TraceRef};

#[derive(Copy, Clone, Debug)]
pub struct Offset {
dataz: u32,
}

impl Offset {
#[inline(always)]
fn new(offset: usize) -> Offset {
assert!(offset < ((!0u32) as usize)); // note strict inequality
Offset { dataz: (!0u32) - offset as u32 }
}
#[inline(always)]
fn val(&self) -> usize { ((!0u32) - self.dataz) as usize }
}

struct ListEntry {
time: u32,
wgts: i32,
next: Option<Offset>,
}

pub struct Count<K, T, L> {
phantom: ::std::marker::PhantomData<K>,
links: Vec<ListEntry>,
times: Vec<T>,
pub keys: L,
temp: Vec<T>,
}

impl<K, L, T> Count<K, T, L> where K: Ord, L: Lookup<K, Offset>, T: LeastUpperBound+Debug {

impl<K, L, T> Traceable for Count<K, T, L> where K: Data+Ord+'static, L: Lookup<K, Offset>+'static, T: LeastUpperBound+'static {
type Key = K;
type Index = T;
type Value = ();

/// Installs a supplied set of keys and values as the differences for `time`.
pub fn set_difference(&mut self, time: T, accumulation: Compact<K, ()>) {
fn set_difference(&mut self, time: T, accumulation: Compact<K, ()>) {

// extract the relevant fields
let keys = accumulation.keys;
Expand Down Expand Up @@ -78,52 +52,59 @@ impl<K, L, T> Count<K, T, L> where K: Ord, L: Lookup<K, Offset>, T: LeastUpperBo

self.times.push(time);
}
}

impl<'a,K,L,T> TraceRef<'a,K,T,()> for &'a Count<K,T,L> where K: Ord+'a, L: Lookup<K, Offset>+'a, T: LeastUpperBound+'a {
type VIterator = WeightIterator<'a>;
type TIterator = CountIterator<'a,K,T,L>;
fn trace(self, key: &K) -> Self::TIterator {
CountIterator {
trace: self,
next0: self.keys.get_ref(key).map(|&x|x),
// silly: (),
}
}
}

#[derive(Copy, Clone, Debug)]
pub struct Offset {
dataz: u32,
}

/// Enumerates the differences for `key` at `time`.
pub fn get_diff(&self, key: &K, time: &T) -> i32 {
self.trace(key)
.filter(|x| x.0 == time)
.map(|x| x.1)
.next()
.unwrap_or(0)
impl Offset {
#[inline(always)]
fn new(offset: usize) -> Offset {
assert!(offset < ((!0u32) as usize)); // note strict inequality
Offset { dataz: (!0u32) - offset as u32 }
}
#[inline(always)]
fn val(&self) -> usize { ((!0u32) - self.dataz) as usize }
}

struct ListEntry {
time: u32,
wgts: i32,
next: Option<Offset>,
}

pub struct Count<K, T, L> {
phantom: ::std::marker::PhantomData<K>,
links: Vec<ListEntry>,
times: Vec<T>,
pub keys: L,
// temp: Vec<T>,
silly: (),
}

impl<K, L, T> Count<K, T, L> where K: Data+Ord+'static, L: Lookup<K, Offset>+'static, T: LeastUpperBound+'static {

pub fn get_count(&self, key: &K, time: &T) -> i32 {
let mut sum = 0;
for wgt in self.trace(key).filter(|x| x.0 <= time).map(|x| x.1) {
for wgt in Traceable::trace(self, key).filter(|x| x.0 <= time).map(|mut x| x.1.next().unwrap().1) {
sum += wgt;
}
sum
}

// TODO : this could do a better job of returning newly interesting times: those times that are
// TODO : now in the least upper bound, but were not previously so. The main risk is that the
// TODO : easy way to do this computes the LUB before and after, but this can be expensive:
// TODO : the LUB with `index` is often likely to be smaller than the LUB without it.
/// Lists times that are the least upper bound of `time` and any subset of existing times.
pub fn interesting_times<'a>(&'a mut self, key: &K, index: T) -> &'a [T] {
// panic!();
let mut temp = ::std::mem::replace(&mut self.temp, Vec::new());
temp.clear();
temp.push(index);
for (time, _) in self.trace(key) {
let lub = time.least_upper_bound(&temp[0]);
if !temp.contains(&lub) {
temp.push(lub);
}
}
close_under_lub(&mut temp);
::std::mem::replace(&mut self.temp, temp);
&self.temp[..]
}

/// Enumerates pairs of time `&T` and differences `DifferenceIterator<V>` for `key`.
pub fn trace<'a>(&'a self, key: &K) -> CountIterator<'a, K, T, L> {
CountIterator {
trace: self,
next0: self.keys.get_ref(key).map(|&x|x),
}
}
}

impl<K: Eq, L: Lookup<K, Offset>, T> Count<K, T, L> {
Expand All @@ -133,7 +114,8 @@ impl<K: Eq, L: Lookup<K, Offset>, T> Count<K, T, L> {
links: Vec::new(),
times: Vec::new(),
keys: l,
temp: Vec::new(),
// temp: Vec::new(),
silly: (),
}
}
}
Expand All @@ -146,18 +128,35 @@ pub struct CountIterator<'a, K: Eq+'a, T: 'a, L: Lookup<K, Offset>+'a> {
}

impl<'a, K: Eq, T, L> Iterator for CountIterator<'a, K, T, L>
where K: Ord+'a,
T: LeastUpperBound+Debug+'a,
where K: Ord+'a,
T: LeastUpperBound+'a,
L: Lookup<K, Offset>+'a {
type Item = (&'a T, i32);
type Item = (&'a T, WeightIterator<'a>);

#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.next0.map(|position| {
let time_index = self.trace.links[position.val()].time as usize;
let result = (&self.trace.times[time_index], self.trace.links[position.val()].wgts);
let result = (&self.trace.times[time_index], WeightIterator { weight: self.trace.links[position.val()].wgts, silly: &self.trace.silly });
self.next0 = self.trace.links[position.val()].next;
result
})
}
}

pub struct WeightIterator<'a> {
weight: i32,
silly: &'a (),
}

impl<'a> Iterator for WeightIterator<'a> {
type Item = (&'a (), i32);
fn next(&mut self) -> Option<(&'a (), i32)> {
if self.weight == 0 { None }
else {
let result = self.weight;
self.weight = 0;
Some((self.silly, result))
}
}
}
24 changes: 16 additions & 8 deletions src/collection/trace.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
use std::iter::Peekable;

use ::Data;
use collection::{close_under_lub, LeastUpperBound, Lookup};
use collection::compact::Compact;

use iterators::merge::{Merge, MergeIterator};
use iterators::coalesce::{Coalesce, CoalesceIterator};
use collection::compact::Compact;

// Test implementation which uses references rather than clones

pub trait Traceable where for<'a> &'a Self: TraceRef<'a, Self::Key, Self::Index, Self::Value> {

type Key: Ord+'static;
type Key: Data+Ord+'static;
type Index: LeastUpperBound+'static;
type Value: Ord+'static;
type Value: Data+Ord+'static;

// type PartKey: Unsigned; // the keys are partitioned and likely ordered by this unsigned integer

// // indicates the part for a key
// fn part(&self, key: &Self::Key) -> Self::PartKey;

// TODO : Should probably allow the trace to determine how it receives data.
// TODO : Radix sorting and such might live in the trace, rather than in `Arrange`.
/// Introduces differences in `accumulation` at `time`.
fn set_difference(&mut self, time: Self::Index, accumulation: Compact<Self::Key, Self::Value>);

Expand Down Expand Up @@ -46,7 +54,7 @@ pub trait Traceable where for<'a> &'a Self: TraceRef<'a, Self::Key, Self::Index,
}

// TODO : Make sure the right assumptions are made about contents of stash.
fn interesting_times<'a>(&'a mut self, key: &Self::Key, time: &Self::Index, stash: &mut Vec<Self::Index>) {
fn interesting_times<'a>(&'a self, key: &Self::Key, time: &Self::Index, stash: &mut Vec<Self::Index>) {
// add all times, but filter a bit if possible
for iter in self.trace(key) {
let lub = iter.0.least_upper_bound(time);
Expand All @@ -59,14 +67,14 @@ pub trait Traceable where for<'a> &'a Self: TraceRef<'a, Self::Key, Self::Index,
}

pub trait TraceRef<'a,K,T:'a,V:'a> {
type VIterator: Iterator<Item=(&'a V, i32)>;
type TIterator: Iterator<Item=(&'a T, Self::VIterator)>;
type VIterator: Iterator<Item=(&'a V, i32)>+'a;
type TIterator: Iterator<Item=(&'a T, Self::VIterator)>+'a;
fn trace(self, key: &K) -> Self::TIterator;
}

pub type CollectionIterator<VIterator> = Peekable<CoalesceIterator<MergeIterator<VIterator>>>;

impl<K,V,L,T> Traceable for Trace<K, T, V, L> where K: Ord+'static, V: Ord+'static, L: Lookup<K, Offset>+'static, T: LeastUpperBound+'static {
impl<K,V,L,T> Traceable for Trace<K, T, V, L> where K: Data+Ord+'static, V: Data+Ord+'static, L: Lookup<K, Offset>+'static, T: LeastUpperBound+'static {
type Key = K;
type Index = T;
type Value = V;
Expand Down Expand Up @@ -122,7 +130,7 @@ impl<K,V,L,T> Traceable for Trace<K, T, V, L> where K: Ord+'static, V: Ord+'stat
}
}

impl<'a,K,V,L,T> TraceRef<'a,K,T,V> for &'a Trace<K,T,V,L> where K: Ord+'a, V: Ord+'a, L: Lookup<K, Offset>+'a, T: LeastUpperBound+'a {
impl<'a,K,V,L,T> TraceRef<'a,K,T,V> for &'a Trace<K,T,V,L> where K: Data+Ord+'a, V: Data+Ord+'a, L: Lookup<K, Offset>+'a, T: LeastUpperBound+'a {
type VIterator = DifferenceIterator<'a, V>;
type TIterator = TraceIterator<'a,K,T,V,L>;
fn trace(self, key: &K) -> Self::TIterator {
Expand Down
31 changes: 19 additions & 12 deletions src/operators/arrange.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ use collection::count::Count;
use collection::compact::Compact;
use radix_sort::{RadixSorter, Unsigned};

/// A collection of `(K,V)` values as a timely stream and shared trace.
///
/// An `Arranged` performs the task of arranging a keyed collection once,
/// allowing multiple differential operators to use the same trace. This
/// saves on computation and memory, in exchange for some cognitive overhead
/// in writing differential operators: each must pay enough care to signals
/// from the `stream` field to know the subset of `trace` it has logically
/// received.
pub struct Arranged<G: Scope, T: Traceable<Index=G::Timestamp>>
where
T::Key: Data,
Expand All @@ -32,6 +40,7 @@ pub struct Arranged<G: Scope, T: Traceable<Index=G::Timestamp>>
pub trace: Rc<RefCell<T>>,
}

/// Arranges something as `(Key,Val)` pairs.
pub trait ArrangeByKey<G: Scope, K: Data, V: Data> where G::Timestamp: LeastUpperBound {
fn arrange_by_key<
U: Unsigned+Default,
Expand All @@ -42,7 +51,6 @@ pub trait ArrangeByKey<G: Scope, K: Data, V: Data> where G::Timestamp: LeastUppe
}

impl<G: Scope, K: Data, V: Data> ArrangeByKey<G, K, V> for Collection<G, (K, V)> where G::Timestamp: LeastUpperBound {

fn arrange_by_key<
U: Unsigned+Default,
KH: Fn(&K)->U+'static,
Expand Down Expand Up @@ -86,9 +94,6 @@ impl<G: Scope, K: Data, V: Data> ArrangeByKey<G, K, V> for Collection<G, (K, V)>

// 2a. fetch any data associated with this time.
if let Some(mut queue) = inputs.remove_key(&index) {

// println!("got some data for {:?}; updating", index);

// sort things; radix if many, .sort_by if few.
let compact = if queue.len() > 1 {
for element in queue.into_iter() {
Expand Down Expand Up @@ -122,18 +127,19 @@ impl<G: Scope, K: Data, V: Data> ArrangeByKey<G, K, V> for Collection<G, (K, V)>
}


pub struct ArrangedBySelf<G: Scope, K: Data, L: Lookup<K, ::collection::count::Offset>+'static> {
pub stream: Stream<G, (Vec<K>, Vec<u32>, Vec<((), i32)>)>,
pub trace: Rc<RefCell<Count<K, G::Timestamp, L>>>,
}

/// Arranges something as `(Key,())` pairs, logically by `Key`.
///
/// This trait provides an optimized implementation of `ArrangeByKey` in which
/// the underlying trace does not support dynamic numbers of values for each key,
/// which saves on computation and memory.
pub trait ArrangeBySelf<G: Scope, K: Data> {
fn arrange_by_self<
U: Unsigned+Default,
KH: Fn(&K)->U+'static,
Look: Lookup<K, ::collection::count::Offset>+'static,
LookG: Fn(u64)->Look,
>(&self, key_h: KH, look: LookG) -> ArrangedBySelf<G, K, Look>;
>(&self, key_h: KH, look: LookG) -> Arranged<G, Count<K, G::Timestamp, Look>>
where G::Timestamp: LeastUpperBound;
}

impl<G: Scope, K: Data> ArrangeBySelf<G, K> for Collection<G, K> where G::Timestamp: LeastUpperBound {
Expand All @@ -144,7 +150,8 @@ impl<G: Scope, K: Data> ArrangeBySelf<G, K> for Collection<G, K> where G::Timest
Look: Lookup<K, ::collection::count::Offset>+'static,
LookG: Fn(u64)->Look,
>
(&self, key_h: KH, look: LookG) -> ArrangedBySelf<G, K, Look> {
(&self, key_h: KH, look: LookG) -> Arranged<G, Count<K, G::Timestamp, Look>>
where G::Timestamp: LeastUpperBound {

let peers = self.scope().peers();
let mut log_peers = 0;
Expand Down Expand Up @@ -208,6 +215,6 @@ impl<G: Scope, K: Data> ArrangeBySelf<G, K> for Collection<G, K> where G::Timest
}
});

ArrangedBySelf { stream: stream, trace: trace }
Arranged { stream: stream, trace: trace }
}
}
Loading

0 comments on commit 2fa719e

Please sign in to comment.