opendp · mccalluc · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024
diff --git a/docs/requirements_notebooks.txt b/docs/requirements_notebooks.txt
@@ -6,4 +6,5 @@ seaborn
 scipy
 scikit-learn
 polars==1.1.0
-pyarrow
+pyarrow
+hvplot
diff --git a/docs/source/getting-started/tabular-data/group-by.ipynb b/docs/source/getting-started/tabular-data/group-by.ipynb
diff --git a/docs/source/getting-started/tabular-data/index.rst b/docs/source/getting-started/tabular-data/index.rst
@@ -100,20 +100,18 @@ The specific methods that will be demonstrated are:
   * Quantiles 
 
 * Grouping
+  * Protected Group Keys
-  * Protected Group Keys
+
+    * Protected Group Keys
-  * Protected Group Keys
+
+    * Protected Group Keys
+  * Public Group Keys
+  * Public Group Lengths
 
-  * Grouping By Multiple Variables 
-  * Filtering
+This section explains strategies for how to release statistics on grouped data. 
 
-* Public vs. Private Grouping Lengths 
+* Data Preparation
 
-This section will explain the implications and limitations of having public and private keys and/or lengths when grouping. 
+  * using ``with_columns``
+  * using ``filter`` 
 
-* Data Preparation Limitations 
-
-  * Limitations with ``with_columns``
-  * Limitations with ``filter`` 
-
-This section will explain the limitations and properties of common Polars functions that are unique to their usage in OpenDP. 
+This section explains how to build stable dataframe transformations with Polars. 
 
 Compositor Overview
 -------------------

diff --git a/docs/source/getting-started/tabular-data/keys.ipynb b/docs/source/getting-started/tabular-data/keys.ipynb
diff --git a/rust/src/accuracy/polars/mod.rs b/rust/src/accuracy/polars/mod.rs
@@ -66,6 +66,7 @@ where
     lazyframe_utility(&lf, alpha)
 }
 
+#[derive(Clone)]
 struct UtilitySummary {
     pub name: String,
     pub aggregate: String,
@@ -188,26 +189,38 @@ fn expr_utility<'a>(
         }]);
     }
 
-    match expr {
-        Expr::Len => Ok(vec![UtilitySummary {
-            name,
+    Ok(match expr {
+        Expr::Len => vec![UtilitySummary {
+            name: name.clone(),
             aggregate: "Len".to_string(),
             distribution: None,
             scale: None,
             accuracy: alpha.is_some().then_some(0.0),
             threshold: t_value,
-        }]),
+        }],
 
-        Expr::Function { input, .. } => Ok(input
+        Expr::Function { input, .. } => input
             .iter()
             .map(|e| expr_utility(e, alpha, threshold.clone()))
             .collect::<Fallible<Vec<_>>>()?
             .into_iter()
             .flatten()
-            .collect()),
+            .collect(),
 
-        _ => fallible!(FailedFunction, "unrecognized primitive"),
+        Expr::BinaryExpr { left, op: _, right } => [
+            expr_utility(&left, alpha, threshold.clone())?,
+            expr_utility(&right, alpha, threshold)?,
+        ]
+        .concat(),
+
+        e => return fallible!(FailedFunction, "unrecognized primitive: {:?}", e),
     }
+    .into_iter()
+    .map(|mut summary| {
+        summary.name = name.clone();
+        summary
+    })
+    .collect())
 }
 
 fn expr_aggregate(expr: &Expr) -> Fallible<&'static str> {

diff --git a/rust/src/accuracy/polars/test.rs b/rust/src/accuracy/polars/test.rs
@@ -64,3 +64,48 @@ fn test_describe_polars_measurement_accuracy() -> Fallible<()> {
 
     Ok(())
 }
+
+#[test]
+fn test_describe_polars_measurement_accuracy_mean() -> Fallible<()> {
+    let lf_domain = LazyFrameDomain::new(vec![
+        SeriesDomain::new("A", AtomDomain::<i32>::default()),
+        SeriesDomain::new("B", AtomDomain::<f64>::default()),
+    ])?
+    .with_margin::<&str>(
+        &[],
+        Margin::new()
+            .with_public_lengths()
+            .with_max_partition_length(10),
+    )?;
+
+    let lf = df!("A" => &[3, 4, 5], "B" => &[1., 3., 7.])?.lazy();
+
+    let meas = make_private_lazyframe(
+        lf_domain,
+        SymmetricDistance,
+        MaxDivergence::default(),
+        lf.select([col("A").dp().mean((3, 5), Some(1.0))]),
+        None,
+        None,
+    )?;
+
+    let description = describe_polars_measurement_accuracy(meas.clone(), None)?;
+
+    let mut expected = df![
+        "column" => &["A", "A"],
+        "aggregate" => &["Sum", "Len"],
+        "distribution" => &[Some("Integer Laplace"), None],
+        "scale" => &[Some(1.0), None]
+    ]?;
+    println!("{:?}", expected);
+    assert_eq!(expected, description);
+
+    let description = describe_polars_measurement_accuracy(meas.clone(), Some(0.05))?;
+
+    let accuracy = discrete_laplacian_scale_to_accuracy(1.0, 0.05)?;
+    expected.with_column(Series::new("accuracy", &[Some(accuracy), Some(0.0)]))?;
+    println!("{:?}", expected);
+    assert_eq!(expected, description);
+
+    Ok(())
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,4 +6,5 @@ seaborn @@
     scipy
     scikit-learn
     polars==1.1.0
-    pyarrow
+    pyarrow
+    hvplot