forked from google/differential-privacy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmean.go
64 lines (56 loc) · 2.68 KB
/
mean.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
//
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package codelab
import (
"github.com/google/differential-privacy/privacy-on-beam/v2/pbeam"
"github.com/apache/beam/sdks/v2/go/pkg/beam"
"github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats"
)
func init() {
beam.RegisterFunction(extractVisitHourAndTimeSpentFn)
}
// MeanTimeSpent calculates and returns the average time spent by visitors
// who entered the restaurant for each hour. This produces a non-anonymized,
// non-private count. Use PrivateMeanTimeSpent for computing this in an
// anonymized way.
func MeanTimeSpent(s beam.Scope, col beam.PCollection) beam.PCollection {
s = s.Scope("MeanTimeSpent")
hourToTimeSpent := beam.ParDo(s, extractVisitHourAndTimeSpentFn, col)
meanTimeSpent := stats.MeanPerKey(s, hourToTimeSpent)
return meanTimeSpent
}
func extractVisitHourAndTimeSpentFn(v Visit) (int, int) {
return v.TimeEntered.Hour(), v.MinutesSpent
}
// PrivateMeanTimeSpent calculates and returns the average time spent by visitors
// who entered the restaurant for each hour in a differentially private way.
func PrivateMeanTimeSpent(s beam.Scope, col beam.PCollection) beam.PCollection {
s = s.Scope("PrivateMeanTimeSpent")
// Create a Privacy Spec and convert col into a PrivatePCollection.
spec := pbeam.NewPrivacySpec(epsilon /* delta */, 0)
pCol := pbeam.MakePrivateFromStruct(s, col, spec, "VisitorID")
// Create a PCollection of output partitions, i.e. restaurant's work hours (from 9 am till 9pm (exclusive)).
hours := beam.CreateList(s, [12]int{9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20})
hourToTimeSpent := pbeam.ParDo(s, extractVisitHourAndTimeSpentFn, pCol)
meanTimeSpent := pbeam.MeanPerKey(s, hourToTimeSpent, pbeam.MeanParams{
MaxPartitionsContributed: 1, // Visitors can visit the restaurant once (one hour) a day
MaxContributionsPerPartition: 1, // Visitors can visit the restaurant once within an hour
MinValue: 0, // Minimum time spent per user (in mins)
MaxValue: 60, // Maximum time spent per user (in mins)
PublicPartitions: hours, // Visitors only visit during work hours
})
return meanTimeSpent
}