-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathasphr_definition.tex
303 lines (274 loc) · 18.2 KB
/
asphr_definition.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
\section{A Secure MPM protocol}
\label{sec:asphr-defn}
In this section, we formally define the core protocol of a messaging system satisfying \cref{defn:messaging-security} under the $B$-bounded friend hypothesis. Our protocol closely resembles the Anysphere core protocol described in our whitepaper \cite{whitepaper}, modified to achieve full prevention against CF attacks.
\subsection{Cryptographic Primitives}
Anysphere relies on two cryptographic primitives: an authenticated encryption (AE) system, and a PIR scheme. We outline formal simulator-based security definitions of the two.
\subsubsection{Authenticated Encryption Scheme}
\label{subsec:AE}
Our authenticated encryption (AE) scheme $\Pi_{\ae}$ consists of three randomized efficient algorithms
$$(\gen, \enc, \dec)$$
with specifications
\begin{itemize}
\item $\gen(1^{\lambda}) \to \sk$,
\item $\enc(\sk, m) \to \ct,$
\item $\dec(\sk, \ct) \to m.$
\end{itemize}
We require the scheme to be correct and EUF-CMA unforgeable. We also require a variation of IK-CCA key privacy defined in \cite{BBDP01keyprivate}. Below are the precise security requirements.
\begin{definition}
\label{defn:AE-correctness}
Let $\sk \leftarrow \gen(1^{\lambda})$. We say that our AE scheme is \textbf{correct} if for any plaintext $m$ of length $L_{\ae}$, we have
$$\dec(\sk, \enc(\sk, m)) = m$$
with probability 1.
\end{definition}
\begin{definition}
\label{defn:AE-eval-oracle}
Given a secret key $\sk$, and a polynomial-time computable function $f: \Sigma^* \times \Sigma^* \to \Sigma^{L_{\ae}}$, the \textbf{Eval oracle}
$$\Eval_{f}(\sk, \sk', \arg_{\ct}, \arg_p) \to \ct'$$ takes as input a set of ciphertexts $\arg_{\ct} = \{\ct_i\}$, and a plaintext argument $\arg_{p}$. It sets
$$m_i \leftarrow \dec(\sk', \ct_i)$$
and outputs $\ct' = \enc(\sk, f(\{m_i\}, \arg_p))$.
\end{definition}
\begin{definition}
\label{defn:AE-eval-security}
Let $N$ and $R$ be polynomial in $\lambda$. Consider the two experiments defined in \cref{expr:AE-real-ideal-world}.
\begin{figure}[ht!]
\begin{framed}
\textbf{Real-World Experiment $\Real_{\Eval}^{\cA}$}
\begin{enumerate}
\item For $i$ from $1$ to $N$, $\sk_i \leftarrow \gen(1^{\lambda}).$
\item For $r$ from $1$ to $R$
\begin{enumerate}
\item $i, j, \arg_\ct, \arg_p \leftarrow \cA(1^{\lambda}).$
\item $\ct^{0}_r \leftarrow \Eval_f(\sk_i, \sk_j, \arg_\ct, \arg_p)$.
\item $\cA$ stores $\ct^{0}_r$.
\end{enumerate}
\end{enumerate}
\vspace{0.2cm}
\textbf{Ideal-World Experiment $\Ideal_{\Eval}^{\cA, \Sim}$}
\begin{enumerate}
\item For $i$ from $1$ to $N$, $\sk_i \leftarrow \gen(1^{\lambda}).$
\item For $r$ from $1$ to $R$
\begin{enumerate}
\item $i, j, \arg_\ct, \arg_p \leftarrow \cA(1^{\lambda}).$
\item $\ct^{1}_r \leftarrow \Sim(1^{\lambda})$.
\item $\cA$ stores $\ct^{1}_r$.
\end{enumerate}
\end{enumerate}
\end{framed}
\caption{Real- and ideal-world experiments for authenticated encryption (AE).}
\label{expr:AE-real-ideal-world}
\end{figure}
Then we say the AE scheme is \textbf{eval-secure} if there exists a p.p.t. simulator $\Sim$ such that for any polynomial-time computable function $f: \Sigma^* \times \Sigma^* \to \Sigma^{L_{\ae}}$ and any p.p.t. adversary with oracle $\cA^O$, the real-world experiment and the ideal-world experiment are computationally indistinguishable.
\end{definition}
We will later show that eval-security is implied by an analogy of IK-CCA \cite[Definition 1]{BBDP01keyprivate} for symmetric-key cryptography. Since the proof is quite lengthy, we delay it to \cref{sec:IND-CCPKA}.
\begin{definition}
\label{defn:AE-unforgability}
Consider the forging experiment described in \cref{expr:AE-forging}.
\begin{figure}[ht!]
\begin{framed}
\textbf{Forging Experiment}
\begin{enumerate}
\item $\sk \leftarrow \gen(1^{\lambda}).$
\item $\ct \leftarrow \cA^{\enc(\sk, \cdot), \dec(\sk, \cdot)}(1^{\lambda}).$
\end{enumerate}
\end{framed}
\caption{Forging experiment for authenticated encryption (AE).}
\label{expr:AE-forging}
\end{figure}
For each $i \in [N]$, let $\ct_{\query}$ be the set of outputs of the $\enc$ oracle. We say the AE scheme is \textbf{EUF-CMA} if for any p.p.t adversary with oracle $\cA^O$, we have
$$\PP(\dec(\sk, \ct) \neq \bot \land \ct \notin \ct_{\query}) = \negl(\lambda).$$
\end{definition}
\subsubsection{Symmetric Key Distribution}
\label{subsec:key-distribution}
In our system, each pair of users shares two secret keys, one for encrypting each direction of traffic. For two users identified by registration info $\reg_0$ and $\reg_1$, user $\reg_0$ decrypts messages from $\reg_1$ using their \textbf{read key} $\sk_r$, and encrypts messages to $\reg_1$ using their \textbf{write key} $\sk_w$. Thus, $\reg_0$'s read key is $\reg_1$'s write key, and vice versa.
Previous MPM systems like Pung and Addra assume each pair of users has a shared secret distributed in advance. In this paper, we assume these keys are independently generated by a trusted third party. For users identified by registration info $\reg_0$ and $\reg_1$, the trusted third party delivers their shared secret keys $(\sk_r, \sk_w)$ to user $\reg_i$ when $\gensharedsecret(\reg_i, \reg_{1 - i})$ is called. The adversary does not have access to the shared secret between trusted users.
In \cite[Section 4]{whitepaper}, we describe separate trust establishment protocols to replace the trusted third party without compromising metadata security.
\subsubsection{PIR Scheme}
\label{subsec:PIR}
Like previous MPM systems, Anysphere relies on a private information retrieval (PIR) scheme $\Pi_{\pir}$. A PIR scheme supports three efficient algorithms
\begin{itemize}
\item $\query(1^{\lambda}, i) \to (\ct, \sk)$.
\item $\answer^{\DB}(1^{\lambda}, \ct) \to a$.
\item $\dec(1^{\lambda}, \sk, a) \to x_i$.
\end{itemize}
where $\DB$ is a database with $N$ entries of length $L_{\pir}$, and $N$ is a parameter bounded by some polynomial $N(\lambda)$. It should satisfy the following standard correctness and security definitions \cite{kushilevitz1997replication}.
\begin{definition}
\label{defn:PIR-correctness}
We say that the PIR scheme is \textbf{correct} if for any database $\DB$ with $N$ entries of length $L_{\pir}$, the experiment
\begin{enumerate}
\item $(\ct, \sk) \leftarrow \query(1^{\lambda}, i)$.
\item $a \leftarrow \answer^{\DB}(1^{\lambda}, \ct)$.
\item $x_i \leftarrow \dec(1^{\lambda}, \sk, a)$.
\end{enumerate}
satisfies $x_i = DB[i]$ with probability $1$.
\end{definition}
\begin{definition}
\label{defn:PIR-SIM-security}
Let $R$ be polynomially bounded in $\lambda$. Consider the experiments in \cref{expr:pir-real-ideal-world}. We say the PIR scheme is \textbf{SIM-secure} if there exists a p.p.t simulator $\Sim$ such that for any stateful p.p.t. adversary $\cA$, the view of $\cA$ under the real-world experiment and the ideal-world experiment are computationally indistinguishable.
\begin{figure}[ht!]
\begin{framed}
\textbf{Real-World Experiment $\Real^{\cA}_{\pir}$}
\begin{enumerate}
\item for $r$ from $1$ to $R$
\begin{enumerate}
\item $i_r \leftarrow \cA(1^{\lambda})$.
\item $\ct_r^0, \sk \leftarrow \query(1^{\lambda}, i_r)$.
\item $\cA$ stores $\ct_r^0$.
\end{enumerate}
\end{enumerate}
\vspace{0.2cm}
\textbf{Ideal-World Experiment $\Ideal^{\cA}_{\pir}$}
\begin{enumerate}
\item for $r$ from $1$ to $R$
\begin{enumerate}
\item $i_r \leftarrow \cA(1^{\lambda})$.
\item $\ct_r^1, \sk \leftarrow \Sim(1^{\lambda})$.
\item $\cA$ stores $\ct_r^1$.
\end{enumerate}
\end{enumerate}
\end{framed}
\caption{Real- and ideal-world experiments for private information retrieval (PIR).}
\label{expr:pir-real-ideal-world}
\end{figure}
\end{definition}
For our implementation, we use Libsodium's key exchange functionality to implement the $\gensharedsecret()$ functions as described in \cite[Section 4]{whitepaper}, and Libsodium's secret key AEAD for the $\enc$ and $\dec$ functions \cite{libsodium}. We use Addra's FastPIR as the PIR protocol. The definition, correctness, and security proof of FastPIR can be found in \cite[Section 4]{ahmad2021addra} and in \cite{angel2018thesis} with more detail.
Throughout the rest of the document, we denote the AE scheme $\Pi_{\ae}$ and the PIR scheme $\Pi_{\pir}$.
\subsection{Messages, Sequence numbers, ACKs}
\label{subsec:ACK}
This section describes how Anysphere adopts TCP's acknowledgement system to satisfy integrity (\cref{defn:messaging-integrity}).
Each client $i$ labels all outgoing messages to another client $j$ with a positive integer called the sequence number. The client transmits the labeled message\footnote{Called chunk in the implementation.} $\msg^{lb} = (k, \msg)$, where $k$ is the sequence number and $\msg$ is the actual message.
Critical to both consistent prefix and eventual consistency are the ACK messages. An ACK message denoted $\ACK(k)$ encodes a single integer $k$.\footnote{In our implementation, the ACK message is slightly more complicated to take chunking into account.} It means ``I have read all messages up to sequence number $k$ from you''. As we will soon define rigorously, user $i$ broadcasts the message with sequence number $k$ until user $j$ sends $\ACK(k)$, in which case they begin broadcasting the message with sequence number $k + 1$. We ensure ACK messages are distinct from labeled messages generated from user input.\footnote{In our implementation, we encode ACKs and labeled messages with different protobuf structs.}
Finally, we deal with the subtlety of the length of messages. Let $L_{\msglb}$ denote the length of a labeled message $\msg^{lb}$, and let $L_{\ct}$ be the length of a ciphertext generated by encoding $\msg^{lb}$ with $\Pi_{\ae}.\enc$. While the input messages have length $L_{\msg}$, the messages the client sent to the server have length $L_{\ct}$. We set parameter $L_{\ae} = L_{\msglb}$ in the AE scheme, and $L_{\pir} = L_{\ct}$ in the PIR scheme.
\textbf{Remark}: To support $T(\lambda)$ rounds of messages between each pair of users, we need to reserve $\log T(\lambda)$ bits for the label. Therefore, $L_{\ae}$ and $L_{\pir}$ depend linearly on $\log \lambda$.
\subsection{The Core Protocol}
\label{subsec:core-protocol}
%\todo{I'm going to stick to our actual implementation as closely as possible. Please point out anything that doesn't agree with the current protocol, greatly appreciate it.}
Recall some notations.
\begin{itemize}
\item $\lambda$ is the security parameter.
\item $N = N(\lambda)$ is the number of users.
\item $T = T(\lambda)$ is the number of timesteps our protocol is run.
\item $L_{\msg}$ is the length of the raw message.
\item $\Pi_{\ae}$ is an AE scheme satisfying \cref{defn:AE-correctness}, \cref{defn:AE-eval-security} and \cref{defn:AE-unforgability}.
\item $\Pi_{\pir}$ is a PIR scheme satisfying \cref{defn:PIR-correctness} and \cref{defn:PIR-SIM-security}.
\end{itemize}
We now define the core protocol. We assume the $B$-bounded friends scenario in \cref{defn:messaging-security-weaker}.
\begin{definition}
\label{defn:asphr-code}
The \textbf{$B$-bounded Anysphere messaging system} $\Pi_{\asphr}$ implements the method signatures in \cref{defn:messaging-scheme} with the pseudocode below. In each method, the caller stores all inputs for future use.
\vspace{10pt}
$\mathbf{\Pi_{\asphr}.C.\mathsf{Register}}(1^{\lambda}, i, N)$
\vspace{5pt}
\hrule
\vspace{5pt}
\begin{enumerate}
\item Initialize empty map $\frienddb$ (database of friends). The map takes registration info $\reg$ as keys and the following fields as values.\footnote{These fields are currently implicit. They are made explicit here for simplicity}
\begin{itemize}
\item $\mathsf{sk}$, the secret keys shared with the friend identified by $\reg$.
\item $\seqs$, the sequence number of the current message being broadcasted to the friend identified by $\reg$.
\item $\seqe$, the largest sequence number ever assigned to messages to the friend identified by $\reg$.
\item $\seqr$, the largest sequence number such that the client has received all messages with sequence number in $1,\cdots,\seqr$ from the friend.
\end{itemize}
\item Initialize empty maps $\inb$ and $\outb$. The maps take registration info $\reg$ as keys and arrays of messages as values.\footnote{They are named Friend, Inbox, Outbox in our code. Our code is slightly more complicated to support features like sending to multiple friends and chunking.}
\item Set a transmission schedule $T_{\trans}$ (the time between messages sent over the network to the server). The user can customize this parameter. We assume $T_{\trans}$ is upper bounded by a constant $T_{\trans}^U$.
\item Initialize a ``dummy'' key $\sk_{\dummy} \leftarrow \Pi_{\ae}.\gen(1^{\lambda})$.
\item Return $\reg = (i)$.
\end{enumerate}
\vspace{10pt}
$\mathbf{\Pi_{\asphr}.S.\mathsf{InitServer}}(1^{\lambda}, N)$.
\vspace{5pt}
\hrule
\vspace{5pt}
Initialize arrays $\msgdb, \ackdb$ of length $N$ with entries of length $L_{\ct}$. Fill them with random strings.
\vspace{10pt}
$\mathbf{\Pi_{\asphr}.C.\Userinput}(t, \cI)$
\vspace{5pt}
\hrule
\vspace{5pt}
This method runs in two phases. Phase 1 handles the user input $\cI$, and phase 2 formulates the request to the server.
\textit{Phase 1.}
If $\cI = \emptyset$, do nothing.
If $\cI = \send(\reg, \msg)$,
\begin{enumerate}
\item Check that $\reg$ is in $\frienddb$. If not, skip to Phase 2.
\item If $\reg$ is the registration of $C_i$ itself, append $\msg$ to $\inb[\reg]$, and skip to Phase 2.\footnote{Skipping this step breaks consistent prefix.}
\item Add $1$ to $\frienddb[\reg].\seqe$.
\item Push $\msg^{lb} = (\frienddb[\reg].\seqe, \msg)$ to $\outb[\reg]$.
\end{enumerate}
If $\cI = \trust(\reg)$.
\begin{enumerate}
\item Check if $\reg$ is in $\frienddb$. If so, skip to Phase 2.
\item $\sk_r, \sk_w \leftarrow \gensharedsecret(\reg_m, \reg).$ Here $\reg_m$ is the client's own registration information.
\item $\frienddb[\reg] \leftarrow \{\sk: (\sk_r, \sk_w), \seqs: 1, \seqe: 0, \\ \seqr: 0\}$.
\end{enumerate}
\textit{Phase 2.}
\begin{enumerate}
\item If $t$ is not divisible by $T_{\trans}$, return $\emptyset$.
\item Let $\{\reg_1, \cdots, \reg_k\}$ be the keys of $\frienddb$, where $k\leq B$ since we assume $B$-bounded friends. Construct
$$S = [\reg_1, \cdots, \reg_k, \reg, \cdots, \reg],$$
where we add $B - k$ copies of $\reg = (-1)$, a dummy registration info. Sample $\reg_s, \reg_r$ uniformly and independently at random from $S$.\footnote{This step is inefficient. We will change this step in \cref{sec:actual-asphr-protocol}.}
\item Let $\msg^{lb}$ be the labeled message with sequence number $\frienddb[\reg_s].\seqs$ in $\outb[\reg_s]$. If $\outb[\reg_s]$ is empty, let $\msg \leftarrow (-1, 0^{L_{\msg}})$.
\item $\sk_w \leftarrow \frienddb[\reg_s].\sk[1]$. If $\reg_s = \reg_0$ or does not exist in $\frienddb$, set $\sk_w \leftarrow \sk_{\dummy}.$
\item $\seqr \leftarrow \frienddb[\reg_s].\seqr$. If $\reg_s = \reg_0$ or does not exist in $\frienddb$, set $\seqr \leftarrow -1.$
\item Encrypt Messages with $\sk_w$.
\begin{itemize}
\item $\ct_{\msg} \leftarrow \Pi_{\ae}.\enc(\sk_w, \msg^{lb}).$
\item $\ct_{\ack} \leftarrow \Pi_{\ae}.\enc(\sk_w, \ACK(\seqr))$.
%\arvid{should we talk about the ACK db here, and the fact that we always send all ACKs to everyone? maybe we shouldn't do that anymore... it was necessary for prioritization, but if we don't want to do prioritization then maybe we shouldn't do it anymore}
\end{itemize}
\item Let $\reg_r = (i_r)$. Formulate a PIR request for index $i_r$. %\arvid{this $r$ is not the same as the $r$ in $\reg_r$? maybe this should be $i_r$ or something}
\begin{itemize}
\item $\ct_{\query}, \sk_{\pir} \leftarrow \Pi_{\pir}.\query(1^{\lambda}, i_r).$
\end{itemize}
\item return $\req = (\ct_{\msg}, \ct_{\ack}, \ct_{\query})$.
\item Remember $\reg_r$ and $\sk_{\pir}$.
\end{enumerate}
\vspace{10pt}
$\mathbf{\Pi_{\asphr}.S.\Clientrpc}(t, \{\req_i\}_{i = 1}^N)$
\vspace{5pt}
\hrule
\vspace{5pt}
\item for $i$ from $1$ to $N$
%\arvid{do we want to deal with authentication tokens here? only if we modify the security definition to include potentially malicious clients, which I'm not sure is worth the trouble...}
%\stzh{I vote for no Authentication token for now.}
\begin{enumerate}
\item If $\req_i = \emptyset$, let $\resp_i = \emptyset$, and continue to next $i$.
\item Parse $\req_i = (\ct_{\msg}, \ct_{\ack}, \ct_{\query})$.
\item $\msgdb[i] \leftarrow \ct_{\msg}, \ackdb[i] \leftarrow \ct_{\ack}$.
\item $a_{\msg} \leftarrow \Pi_{\pir}.\answer^{\msgdb}(1^{\lambda}, \ct_{\query}).$
\item $a_{\ack} \leftarrow \Pi_{\pir}.\answer^{\ackdb}(1^{\lambda}, \ct_{\query}).$
\item $\resp_i \leftarrow (a_{\msg}, a_{\ack}).$
\end{enumerate}
\vspace{10pt}
$\mathbf{\Pi_{\asphr}.C.\Serverrpc}(t, \resp)$
\vspace{5pt}
\hrule
\vspace{5pt}
\begin{enumerate}
\item If $\resp = \emptyset$, return.
\item Parse $\resp = (a_{\msg}, a_{\ack})$. Let $\reg_r, \sk_{\pir}$ be defined in the last call to $\Pi_{\asphr}.C.\Userinput$.
\item $\ct_{\msg} \leftarrow \Pi_{\pir}.\Dec(1^{\lambda}, \sk_{\pir}, a_{\msg}).$
\item $\ct_{\ack} \leftarrow \Pi_{\pir}.\Dec(1^{\lambda}, \sk_{\pir}, a_{\ack}).$
\item $\sk_{r} \leftarrow \frienddb[\reg_r].\sk[0]$.
\item Decipher the message.
\begin{enumerate}
\item $\msg^{lb} \leftarrow \Pi_{\ae}.\Dec(\sk_{r}, \ct_{\msg})$.
\item If $\msg^{lb} = \bot$ or $\msg^{lb}[0] \neq \frienddb[\reg_r].\seqr + 1,$
skip the next two steps.
\item Add $1$ to $\frienddb[\reg_r].\seqr$.
\item $\msg \leftarrow \msg^{lb}[1]$. Push $\msg$ to $\inb[\reg_r]$.
\end{enumerate}
\item Decipher the ACK.
\begin{enumerate}
\item $\ack \leftarrow \Pi_{\ae}.\Dec(\sk_{r}, \ct_{\ack})$.
\item If $\ack = \bot$ or $\ack$ is not the form $\ACK(k)$ for some $k$, return.
\item Let $\ack = \ACK(k)$. If $k < \frienddb[\reg_r].\seqs$, return.
\item $\frienddb[\reg_r].\seqs \leftarrow k + 1$. Remove the message with sequence number $k$ from $\outb[\reg_r]$.
\end{enumerate}
\end{enumerate}
\vspace{10pt}
$\mathbf{\Pi_{\asphr}.C.\mathsf{GetView}}()$
\vspace{5pt}
\hrule
\vspace{5pt}
Let $\cF$ be the set of keys in $\frienddb$. Let $\cM$ be the set $\{(\reg_r, \msg): \msg \in \inb[\reg_r]\}$. Return $(\cF, \cM)$.
\end{definition}