Skip to content

Commit

Permalink
Add PNNSGenerateDatabase and PNNSProcessDatabase (#88)
Browse files Browse the repository at this point in the history
  • Loading branch information
fboemer authored Sep 3, 2024
1 parent 466b67c commit 9b101fd
Show file tree
Hide file tree
Showing 15 changed files with 772 additions and 19 deletions.
20 changes: 20 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ let package = Package(
.executable(name: "PIRGenerateDatabase", targets: ["PIRGenerateDatabase"]),
.executable(name: "PIRProcessDatabase", targets: ["PIRProcessDatabase"]),
.executable(name: "PIRShardDatabase", targets: ["PIRShardDatabase"]),
.executable(name: "PNNSGenerateDatabase", targets: ["PNNSGenerateDatabase"]),
.executable(name: "PNNSProcessDatabase", targets: ["PNNSProcessDatabase"]),
],
dependencies: [
.package(url: "https://github.com/apple/swift-algorithms", from: "1.2.0"),
Expand Down Expand Up @@ -146,6 +148,24 @@ let package = Package(
"PrivateInformationRetrievalProtobuf",
],
swiftSettings: executableSettings),
.executableTarget(
name: "PNNSGenerateDatabase",
dependencies: [
.product(name: "ArgumentParser", package: "swift-argument-parser"),
"HomomorphicEncryption",
"PrivateNearestNeighborsSearchProtobuf",
],
swiftSettings: executableSettings),
.executableTarget(
name: "PNNSProcessDatabase",
dependencies: [
.product(name: "ArgumentParser", package: "swift-argument-parser"),
"HomomorphicEncryptionProtobuf",
"PrivateNearestNeighborsSearchProtobuf",
"HomomorphicEncryption",
.product(name: "Logging", package: "swift-log"),
],
swiftSettings: executableSettings),
.testTarget(
name: "HomomorphicEncryptionTests",
dependencies: [
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ and executables:
* [PIRProcessDatabase](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/pirprocessdatabase)
* [PIRShardDatabase](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/pirsharddatabase)

The documentation is hosted on the [Swift Package Index](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/documentation/homomorphicencryption).

## Background
### Homomorphic Encryption (HE)
Swift Homomorphic Encryption implements a special form of cryptography called homomorphic encryption (HE).
Expand Down
15 changes: 12 additions & 3 deletions Sources/HomomorphicEncryption/Array2d.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,17 @@ public struct Array2d<T: Equatable & AdditiveArithmetic & Sendable>: Equatable,
@usableFromInline package var count: Int { rowCount * columnCount }

@inlinable
package init(data: [[T]]) {
self.init(data: data.flatMap { $0 }, rowCount: data.count, columnCount: data[0].count)
package init(data: [[T]] = []) {
if data.isEmpty {
self.init(data: [], rowCount: 0, columnCount: 0)
} else {
let flatData = data.flatMap { $0 }
if flatData.isEmpty {
self.init(data: [], rowCount: 0, columnCount: 0)
} else {
self.init(data: flatData, rowCount: data.count, columnCount: data[0].count)
}
}
}

@inlinable
Expand Down Expand Up @@ -165,7 +174,7 @@ extension Array2d {
/// Appends extra rows to the array.
/// - Parameter rows: The row-major elements to append. Must have count dividing `columnCount`.
@inlinable
mutating func append(rows: [T]) {
package mutating func append(rows: [T]) {
let (newRowCount, leftover) = rows.count.quotientAndRemainder(dividingBy: columnCount)
precondition(leftover == 0)
data.append(contentsOf: rows)
Expand Down
2 changes: 1 addition & 1 deletion Sources/HomomorphicEncryption/HeScheme.swift
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ public protocol HeScheme {
/// - Returns: A plaintext encoding `signedValues`.
/// - Throws: Error upon failure to encode.
/// - seealso: ``Context/encode(signedValues:format:)`` for an alternative API.
/// - seealso: ``HeScheme/encode(context:values:format)`` to encode unsigned values.
/// - seealso: ``HeScheme/encode(context:values:format:)`` to encode unsigned values.
static func encode(context: Context<Self>, signedValues: some Collection<SignedScalar>, format: EncodeFormat) throws
-> CoeffPlaintext

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,17 @@ Run `PIRProcessDatabase --help` to get a sample JSON configuration.
There are four required parameters:
1. `rlweParameters` is one of the [PredefinedRlweParameters](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/homomorphicencryption/predefinedrlweparameters),
e.g., `n_4096_logq_27_28_28_logt_5`.
1. `inputDatabase` is the path to the unprocessed input database. It must be a
2. `inputDatabase` is the path to the unprocessed input database. It must be a
serialized [Apple_SwiftHomomorphicEncryption_Pir_V1_KeywordDatabase](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/privateinformationretrievalprotobuf/apple_swifthomomorphicencryption_pir_v1_keyworddatabase).

> Note: The `PIRGenerateDatabase` binary can be used to generate a sample database.
3. `outputDatabase` is the path to where the processed database’s shards will be
written. This string must contain `SHARD_ID`, unless `sharding` is
`shardCount(1)`, and have extension `.txtpb` or `binpb`. `SHARD_ID` will be
replaced with the shard number of each shard.
`shardCount(1)`. `SHARD_ID` will be replaced with the shard number of each shard.
4. `outputPirParameters` is the path to where each shard’s PIR parameters will be
written. This string must end contain `SHARD_ID`, unless `sharding` is
`shardCount(1)`, and have extension `.txtpb` or `binpb`. Again, `SHARD_ID` will
`shardCount(1)`, and have extension `.txtpb` or `.binpb`. Again, `SHARD_ID` will
be replaced with the shard number of each shard.

A minimal configuration sample is
Expand Down
70 changes: 70 additions & 0 deletions Sources/PNNSGenerateDatabase/GenerateDatabase.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2024 Apple Inc. and the Swift Homomorphic Encryption project authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import ArgumentParser
import Foundation
import HomomorphicEncryption
import PrivateNearestNeighborsSearch
import PrivateNearestNeighborsSearchProtobuf

enum VectorTypeArguments: String, CaseIterable, ExpressibleByArgument {
/// Each vector's entry is uniform random from `[-1.0, 1.0]`
case random
/// The vector of the i'th row is all 0s except a 1 at index `i % vectorDimension`.
case unit
}

@main
struct GenerateDatabaseCommand: ParsableCommand {
static let configuration: CommandConfiguration = .init(
commandName: "PNNSGenerateDatabase")

@Option(help: "Path to output database. Must end in '.txtpb' or '.binpb'.")
var outputDatabase: String

@Option(help: "Number of rows in the database.")
var rowCount: Int

@Option(help: "Number of entries in each row's vector.")
var vectorDimension: Int

@Option(help: "Number of bytes of metadata for each row.")
var metadataSize: Int

@Option var vectorType: VectorTypeArguments

mutating func run() throws {
let rows: [DatabaseRow] = (0..<rowCount).map { rowIndex in
var vector: [Float]
switch vectorType {
case .unit:
vector = Array(repeating: Float(0), count: vectorDimension)
vector[rowIndex % vectorDimension] = Float(1)
case .random:
vector = (0..<vectorDimension).map { _ in Float.random(in: -1.0...1.0) }
}

let rowString = String(rowIndex)
let repeatCount = metadataSize.dividingCeil(rowString.count, variableTime: true)
let metadata = Array([[UInt8]](repeating: Array(rowString.utf8), count: repeatCount).flatMap { $0 }
.prefix(metadataSize))
return DatabaseRow(
entryId: UInt64(rowIndex),
entryMetadata: metadata,
vector: vector)
}
let database = Database(rows: rows)
try database.proto().save(to: outputDatabase)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# ``PNNSGenerateDatabase``

Private Nearest Neighbors Search database generation

## Overview

`PNNSGenerateDatabase` is an executable which generates a sample database for testing.
The resulting database can be processed with the `PNNSProcessDatabase` executable.

### Requirements
First ensure sure that the `~/.swiftpm/bin` directory is on your `$PATH`.
For example, if using the `zsh` shell, add the following line to your `~/.zshrc`
```sh
export PATH="$HOME/.swiftpm/bin:$PATH"
```
Make sure to reload the path via (`source ~/.zshrc`) or by restarting your terminal emulator.

Then, to install the `PNNSGenerateDatabase`, executable, e.g., run
```sh
swift package experimental-install -c release --product PNNSGenerateDatabase
```

### Example

1. We start by generating a sample database.
```sh
PNNSGenerateDatabase \
--output-database database.txtpb \
--row-count 100 \
--metadata-size 3 \
--vector-dimension 10 \
--vector-type unit
```

This will generate a database of 100 entries, with entry identifiers 0 to 99, 3 byte metadata for each row, and each vector a 10-dimensional unit vector.

The database is a serialized `Apple_SwiftHomomorphicEncryption_Pnns_V1_Database`
For readability, the `.txtpb` extension ensures the output database will be saved in protocol buffer text format.

> Note: For a more compact format, use the `.binpb` extension to save the database in protocol buffer binary format.
2. We view a few rows from the database with
```sh
head database.txtpb
```
which shows
```json
rows {
entry_metadata: "000"
vector: [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
}
rows {
entry_id: 1
entry_metadata: "111"
vector: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
}
rows {
```

You can use `PNNSProcessDatabase` to prepare the database for hosting PNNS queries.
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# ``PNNSProcessDatabase``

Private Nearest Neighbors Search database processing

## Overview
PNNS database processing will transform a database in preparation for hosting PNNS queries.
The `PNNSProcessDatabase` binary performs the processing.

### Requirements
First ensure sure that the `~/.swiftpm/bin` directory is on your `$PATH`.
For example, if using the `zsh` shell, add the following line to your `~/.zshrc`
```sh
export PATH="$HOME/.swiftpm/bin:$PATH"
```
Make sure to reload the path via (`source ~/.zshrc`) or by restarting your terminal emulator.

Then, to install the `PNNSProcessDatabase`, executable, e.g., run
```sh
swift package experimental-install -c release --product PNNSProcessDatabase
```

### Processing
PNNS database processing is determined by its parameters.
All parameters are set with a configuration `.json` file.
The database is processed by running the `PNNSProcessDatabase` binary using
```sh
PNNSProcessDatabase path/to/config.json
```

Run `PNNSProcessDatabase --help` to get a sample JSON configuration.

#### Required Configuration Parameters

There are three required parameters:
1. `rlweParameters` is one of the [PredefinedRlweParameters](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/homomorphicencryption/predefinedrlweparameters),
e.g., `n_8192_logq_3x55_logt_30`.
2. `inputDatabase` is the path to the unprocessed input database. It must be a
serialized `Apple_SwiftHomomorphicEncryption_Pnns_V1_Database`.

> Note: The `PNNSGenerateDatabase` binary can be used to generate a sample database.
3. `outputDatabase` is the path to where the processed database will be
written. This string should have extension either `.txtpb` or `.binpb`.

A minimal configuration sample is
```json
{
"rlweParameters": "n_8192_logq_3x55_logt_30",
"inputDatabase": "/path/to/input/database.binpb",
"outputDatabase": "/path/to/output/database.binpb",
}
```
The only required parameter variable which affects performance is
`rlweParameters`. These parameters are picked from a set of [PredefinedRlweParameters](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/homomorphicencryption/predefinedrlweparameters).
See the [EncryptionParameters snippet]( https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/homomorphicencryption/usingswifthomomorphicencryption#Encryption-Parameters) for more information on encryption parameters.

For vector dimensions, e.g. 128 or below, `n_4096_logq_27_28_28_logt_16` may be a good choice.
For larger vector dimensions, `n_8192_logq_3x55_logt_30` may be a good choice.

### Optional Configuration Parameters

* `outputServerConfig`. This is a path to the output server configuration. While not required, it can be useful to see what configuration was used to process the database.

* `distanceMetric`. Specifies the metric to use for distance computation. For instance, to specify cosine similarity, use `"distanceMetric" : { "cosineSimilarity" : { } },`.

* `extraPlaintextModuli`. To increase precision of the distance computation, we can use multiple plaintext moduli. For instance, if `n_4096_logq_27_28_28_logt_16` gives too low precision, you might include `extraPlaintextModuli: [65537]` in your configuration.

* `batchSize`. The maximum number of vectors entries in each client query.

* `scalingFactor`. The amount to scale each query vector entry before rounding. A larger `scalingFactor` will increase precision, but may require a larger plaintext modulus for accurate results. If unspecified, the maximum scaling factor will be used.

* `databasePacking`. How the database should packing entries into a matrix.

* `queryPacking`. How the client should pack entries into a matrix.

* `trials`. How many test queries to run against the processed database.
For each trial, a query is checked for correctness.

* `trialDistanceTolerance`. The absolute value of the distance between the test query and the expected result.

### Example

Our example relies in the `PNNSGenerateDatabase` executable.
To install it, run `PNNSProcessDatabase`, executable, run
```sh
swift package experimental-install -c release --product PNNSProcessDatabase
```

```sh
PNNSGenerateDatabase \
--output-database database.txtpb \
--row-count 4096 \
--metadata-size 3 \
--vector-dimension 128 \
--vector-type unit
```

To process the data, write the following configuration into a file called `config.json`.
```json
{
"batchSize" : 1,
"databasePacking" : {
"diagonal" : {
"babyStepGiantStep" : {
"babyStep" : 12,
"giantStep" : 11,
"vectorDimension" : 128
}
}
},
"distanceMetric" : {
"cosineSimilarity" : { }
},
"extraPlaintextModuli" : [ ],
"inputDatabase" : "database.txtpb",
"outputDatabase" : "processed-database.binpb",
"outputServerConfig" : "server-config.txtpb",
"queryPacking" : {
"denseRow" : { }
},
"rlweParameters" : "n_4096_logq_27_28_28_logt_17",
"trialTolerance" : 0.01,
"trials" : 10
}
```

Now call the executable.
```sh
PNNSProcessDatabase config.json
```

You might observe logs like the below
```
2024-08-29T12:58:12-0700 info PNNSProcessDatabase : [PNNSProcessDatabase] ValidationResult {
evaluation key size : 170.2 KB (2 keys),
noise budget : 4.1,
query size : 28.2 KB (1 ciphertexts),
response size : 52.1 KB (1 ciphertexts),
runtime (ms) : [10.4, 10.7, 10.8, 10.9, 11.0, 11.1, 11.2, 11.2, 12.3, 12.5]
}
```

The executable should also have saved `server-config.txtpb` and `processed-database.binpb`.
You can then load `processed-database.binpb` to host PNNS queries, for example via

```swift
let databasePath = "processed-database.binpb"
let serializedDatabase = try Apple_SwiftHomomorphicEncryption_Pnns_V1_SerializedProcessedDatabase(
from: databasePath
)
let native: SerializedProcessedDatabase<Scheme> = try serializedDatabase.native()
let database = try ProcessedDatabase<Scheme>(from: native)
let server = try Server<Scheme>(database: database)
```
Loading

0 comments on commit 9b101fd

Please sign in to comment.