This repository has been archived by the owner on Aug 27, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
averages.tex
270 lines (261 loc) · 11.7 KB
/
averages.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
% This file is part of the GROMACS molecular simulation package.
%
% Copyright (c) 2013, by the GROMACS development team, led by
% David van der Spoel, Berk Hess, Erik Lindahl, and including many
% others, as listed in the AUTHORS file in the top-level source
% directory and at http://www.gromacs.org.
%
% GROMACS is free software; you can redistribute it and/or
% modify it under the terms of the GNU Lesser General Public License
% as published by the Free Software Foundation; either version 2.1
% of the License, or (at your option) any later version.
%
% GROMACS is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
% Lesser General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public
% License along with GROMACS; if not, see
% http://www.gnu.org/licenses, or write to the Free Software Foundation,
% Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%
% If you want to redistribute modifications to GROMACS, please
% consider that scientific software is very special. Version
% control is crucial - bugs must be traceable. We will be happy to
% consider code for inclusion in the official distribution, but
% derived work must not be called official GROMACS. Details are found
% in the README & COPYING files - if they are missing, get the
% official version at http://www.gromacs.org.
%
% To help us fund GROMACS development, we humbly ask that you cite
% the research papers on the package. Check out http://www.gromacs.org
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% AVERAGES AND FLUCTUATIONS
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{Averages and fluctuations}
\section{Formulae for averaging}
{\bf Note:} this section was taken from ref~\cite{Gunsteren94a}.
When analyzing a MD trajectory averages $\left<x\right>$ and fluctuations
\beq
\left<(\Delta x)^2\right>^{\half} ~=~ \left<[x-\left<x\right>]^2\right>^{\half}
\label{eqn:var0}
\eeq
of a quantity $x$ are to be computed.
The variance $\sigma_x$ of a series of N$_x$ values,
\{x$_i$\}, can be computed from
\beq
\sigma_x~=~ \sum_{i=1}^{N_x} x_i^2 ~-~ \frac{1}{N_x}\left(\sum_{i=1}^{N_x}x_i\right)^2
\label{eqn:var1}
\eeq
Unfortunately this formula is numerically not very accurate,
especially when $\sigma_x^{\half}$ is small compared to the values of $x_i$.
The following (equivalent) expression is numerically more accurate
\beq
\sigma_x ~=~ \sum_{i=1}^{N_x} [x_i - \left<x\right>]^2
\eeq
with
\beq
\left<x\right> ~=~ \frac{1}{N_x} \sum_{i=1}^{N_x} x_i
\label{eqn:var2}
\eeq
Using ~\eqnsref{var1}{var2} one has to go
through the series of $x_i$ values twice, once to determine
$\left<x\right>$ and again to
compute $\sigma_x$,
whereas \eqnref{var0} requires only one sequential scan of
the series \{x$_i$\}. However, one may cast \eqnref{var1} in
another form, containing partial sums, which allows for a sequential
update algorithm. Define the partial sum
\beq
X_{n,m} ~=~ \sum_{i=n}^{m} x_i
\eeq
and the partial variance
\beq
\sigma_{n,m} ~=~ \sum_{i=n}^{m} \left[x_i - \frac{X_{n,m}}{m-n+1}\right]^2
\label{eqn:sigma}
\eeq
It can be shown that
\beq
X_{n,m+k} ~=~ X_{n,m} + X_{m+1,m+k}
\label{eqn:Xpartial}
\eeq
and
\bea
\sigma_{n,m+k} &=& \sigma_{n,m} + \sigma_{m+1,m+k} + \left[~\frac {X_{n,m}}{m-n+1} - \frac{X_{n,m+k}}{m+k-n+1}~\right]^2~* \nonumber\\
&& ~\frac{(m-n+1)(m+k-n+1)}{k}
\label{eqn:varpartial}
\eea
For $n=1$ one finds
\beq
\sigma_{1,m+k} ~=~ \sigma_{1,m} + \sigma_{m+1,m+k}~+~
\left[~\frac{X_{1,m}}{m} - \frac{X_{1,m+k}}{m+k}~\right]^2~ \frac{m(m+k)}{k}
\label{eqn:sig1}
\eeq
and for $n=1$ and $k=1$ ~(\eqnref{varpartial}) becomes
\bea
\sigma_{1,m+1} &=& \sigma_{1,m} +
\left[\frac{X_{1,m}}{m} - \frac{X_{1,m+1}}{m+1}\right]^2 m(m+1)\\
&=& \sigma_{1,m} +
\frac {[~X_{1,m} - m x_{m+1}~]^2}{m(m+1)}
\label{eqn:simplevar0}
\eea
where we have used the relation
\beq
X_{1,m+1} ~=~ X_{1,m} + x_{m+1}
\label{eqn:simplevar1}
\eeq
Using formulae~(\eqnref{simplevar0}) and ~(\eqnref{simplevar1}) the average
\beq
\left<x\right> ~=~ \frac{X_{1,N_x}}{N_x}
\eeq
and the fluctuation
\beq
\left<(\Delta x)^2\right>^{\half} = \left[\frac {\sigma_{1,N_x}}{N_x}\right]^{\half}
\eeq
can be obtained by one sweep through the data.
\section{Implementation}
In {\gromacs} the instantaneous
energies $E(m)$ are stored in the \swapindex{energy}{file}, along with the
values of $\sigma_{1,m}$ and $X_{1,m}$. Although the steps are counted from 0,
for the energy and fluctuations steps are counted from 1. This means that the
equations presented here are the ones that are implemented.
We give somewhat lengthy derivations in this section
to simplify checking of code and equations later on.
\subsection{Part of a Simulation}
It is not uncommon to perform a simulation where the first part,
{\eg} 100 ps, is taken as \normindex{equilibration}. However, the
averages and fluctuations as printed in the \swapindex{log}{file}
are computed over the whole simulation. The equilibration time,
which is now part of the simulation, may in such a case invalidate the
averages and fluctuations, because these numbers are now dominated
by the initial drift towards equilibrium.
Using~\eqnsref{Xpartial}{varpartial} the average and
standard deviation over part of the trajectory can be computed as:
\bea
X_{m+1,m+k} &=& X_{1,m+k} - X_{1,m} \\
\sigma_{m+1,m+k} &=& \sigma_{1,m+k}-\sigma_{1,m} - \left[~\frac{X_{1,m}}{m} - \frac{X_{1,m+k}}{m+k}~\right]^{2}~ \frac{m(m+k)}{k}
\eea
or, more generally (with $p \geq 1$ and $q \geq p$):
\bea
X_{p,q} &=& X_{1,q} - X_{1,p-1} \\
\sigma_{p,q} &=& \sigma_{1,q}-\sigma_{1,p-1} - \left[~\frac{X_{1,p-1}}{p-1} - \frac{X_{1,q}}{q}~\right]^{2}~ \frac{(p-1)q}{q-p+1}
\eea
{\bf Note} that implementation of this is not entirely trivial, since energies
are not stored every time step of the simulation. We therefore have to construct
$X_{1,p-1}$ and $\sigma_{1,p-1}$ from the information at time $p$ using
\eqnsref{simplevar0}{simplevar1}:
\bea
X_{1,p-1} &=& X_{1,p} - x_p \\
\sigma_{1,p-1} &=& \sigma_{1,p} - \frac {[~X_{1,p-1} - (p-1) x_{p}~]^2}{(p-1)p}
\eea
\subsection{Combining two simulations}
Another frequently occurring problem is, that the fluctuations of two simulations
must be combined. Consider the following example: we have two simulations
(A) of $n$ and (B) of $m$ steps, in which the second simulation is a
continuation of the first. However, the second simulation starts numbering from 1
instead of from $n+1$. For the partial sum
this is no problem, we have to add $X_{1,n}^A$ from run A:
\beq
X_{1,n+m}^{AB} ~=~ X_{1,n}^A + X_{1,m}^B
\label{eqn:pscomb}
\eeq
When we want to compute the partial variance from the two components we have to
make a correction $\Delta\sigma$:
\beq
\sigma_{1,n+m}^{AB} ~=~ \sigma_{1,n}^A + \sigma_{1,m}^B +\Delta\sigma
\eeq
if we define $x_i^{AB}$ as the combined and renumbered set of data points we can
write:
\beq
\sigma_{1,n+m}^{AB} ~=~ \sum_{i=1}^{n+m} \left[x_i^{AB} - \frac{X_{1,n+m}^{AB}}{n+m}\right]^2
\eeq
and thus
\beq
\sum_{i=1}^{n+m} \left[x_i^{AB} - \frac{X_{1,n+m}^{AB}}{n+m}\right]^2 ~=~
\sum_{i=1}^{n} \left[x_i^{A} - \frac{X_{1,n}^{A}}{n}\right]^2 +
\sum_{i=1}^{m} \left[x_i^{B} - \frac{X_{1,m}^{B}}{m}\right]^2 +\Delta\sigma
\eeq
or
\bea
\sum_{i=1}^{n+m} \left[(x_i^{AB})^2 - 2 x_i^{AB}\frac{X^{AB}_{1,n+m}}{n+m} + \left(\frac{X^{AB}_{1,n+m}}{n+m}\right)^2 \right] &-& \nonumber \\
\sum_{i=1}^{n} \left[(x_i^{A})^2 - 2 x_i^{A}\frac{X^A_{1,n}}{n} + \left(\frac{X^A_{1,n}}{n}\right)^2 \right] &-& \nonumber \\
\sum_{i=1}^{m} \left[(x_i^{B})^2 - 2 x_i^{B}\frac{X^B_{1,m}}{m} + \left(\frac{X^B_{1,m}}{m}\right)^2 \right] &=& \Delta\sigma
\eea
all the $x_i^2$ terms drop out, and the terms independent of the summation
counter $i$ can be simplified:
\bea
\frac{\left(X^{AB}_{1,n+m}\right)^2}{n+m} \,-\,
\frac{\left(X^A_{1,n}\right)^2}{n} \,-\,
\frac{\left(X^B_{1,m}\right)^2}{m} &-& \nonumber \\
2\,\frac{X^{AB}_{1,n+m}}{n+m}\sum_{i=1}^{n+m}x_i^{AB} \,+\,
2\,\frac{X^{A}_{1,n}}{n}\sum_{i=1}^{n}x_i^{A} \,+\,
2\,\frac{X^{B}_{1,m}}{m}\sum_{i=1}^{m}x_i^{B} &=& \Delta\sigma
\eea
we recognize the three partial sums on the second line and use \eqnref{pscomb}
to obtain:
\beq
\Delta\sigma ~=~ \frac{\left(mX^A_{1,n} - nX^B_{1,m}\right)^2}{nm(n+m)}
\eeq
if we check this by inserting $m=1$ we get back \eqnref{simplevar0}
\subsection{Summing energy terms}
The {\tt \normindex{g_energy}} program can also sum energy terms into one, {\eg}
potential + kinetic = total. For the partial averages this is again easy
if we have $S$ energy components $s$:
\beq
X_{m,n}^S ~=~ \sum_{i=m}^n \sum_{s=1}^S x_i^s ~=~ \sum_{s=1}^S \sum_{i=m}^n x_i^s ~=~ \sum_{s=1}^S X_{m,n}^s
\label{eqn:sumterms}
\eeq
For the fluctuations it is less trivial again, considering for example
that the fluctuation in potential and kinetic energy should cancel.
Nevertheless we can try the same approach as before by writing:
\beq
\sigma_{m,n}^S ~=~ \sum_{s=1}^S \sigma_{m,n}^s + \Delta\sigma
\eeq
if we fill in \eqnref{sigma}:
\beq
\sum_{i=m}^n \left[\left(\sum_{s=1}^S x_i^s\right) - \frac{X_{m,n}^S}{m-n+1}\right]^2 ~=~
\sum_{s=1}^S \sum_{i=m}^n \left[\left(x_i^s\right) - \frac{X_{m,n}^s}{m-n+1}\right]^2 + \Delta\sigma
\label{eqn:sigmaterms}
\eeq
which we can expand to:
\bea
&~&\sum_{i=m}^n \left[\sum_{s=1}^S (x_i^s)^2 + \left(\frac{X_{m,n}^S}{m-n+1}\right)^2 -2\left(\frac{X_{m,n}^S}{m-n+1}\sum_{s=1}^S x_i^s + \sum_{s=1}^S \sum_{s'=s+1}^S x_i^s x_i^{s'} \right)\right] \nonumber \\
&-&\sum_{s=1}^S \sum_{i=m}^n \left[(x_i^s)^2 - 2\,\frac{X_{m,n}^s}{m-n+1}\,x_i^s + \left(\frac{X_{m,n}^s}{m-n+1}\right)^2\right] ~=~\Delta\sigma
\eea
the terms with $(x_i^s)^2$ cancel, so that we can simplify to:
\bea
&~&\frac{\left(X_{m,n}^S\right)^2}{m-n+1} -2 \frac{X_{m,n}^S}{m-n+1}\sum_{i=m}^n\sum_{s=1}^S x_i^s -2\sum_{i=m}^n\sum_{s=1}^S \sum_{s'=s+1}^S x_i^s x_i^{s'}\, - \nonumber \\
&~&\sum_{s=1}^S \sum_{i=m}^n \left[- 2\,\frac{X_{m,n}^s}{m-n+1}\,x_i^s + \left(\frac{X_{m,n}^s}{m-n+1}\right)^2\right] ~=~\Delta\sigma
\eea
or
\beq
-\frac{\left(X_{m,n}^S\right)^2}{m-n+1} -2\sum_{i=m}^n\sum_{s=1}^S \sum_{s'=s+1}^S x_i^s x_i^{s'}\, + \sum_{s=1}^S \frac{\left(X_{m,n}^s\right)^2}{m-n+1} ~=~\Delta\sigma
\eeq
If we now expand the first term using \eqnref{sumterms} we obtain:
\beq
-\frac{\left(\sum_{s=1}^SX_{m,n}^s\right)^2}{m-n+1} -2\sum_{i=m}^n\sum_{s=1}^S \sum_{s'=s+1}^S x_i^s x_i^{s'}\, + \sum_{s=1}^S \frac{\left(X_{m,n}^s\right)^2}{m-n+1} ~=~\Delta\sigma
\eeq
which we can reformulate to:
\beq
-2\left[\sum_{s=1}^S \sum_{s'=s+1}^S X_{m,n}^s X_{m,n}^{s'}\,+\sum_{i=m}^n\sum_{s=1}^S \sum_{s'=s+1}^S x_i^s x_i^{s'}\right] ~=~\Delta\sigma
\eeq
or
\beq
-2\left[\sum_{s=1}^S X_{m,n}^s \sum_{s'=s+1}^S X_{m,n}^{s'}\,+\,\sum_{s=1}^S \sum_{i=m}^nx_i^s \sum_{s'=s+1}^S x_i^{s'}\right] ~=~\Delta\sigma
\eeq
which gives
\beq
-2\sum_{s=1}^S \left[X_{m,n}^s \sum_{s'=s+1}^S \sum_{i=m}^n x_i^{s'}\,+\,\sum_{i=m}^n x_i^s \sum_{s'=s+1}^S x_i^{s'}\right] ~=~\Delta\sigma
\eeq
Since we need all data points $i$ to evaluate this, in general this is not possible.
We can then make an estimate of $\sigma_{m,n}^S$ using only the data points that
are available using the left hand side of \eqnref{sigmaterms}. While the average can
be computed using all time steps in the simulation, the accuracy of the
fluctuations is thus limited by the frequency with which energies are saved.
Since this can be easily done with a program such as \normindex{xmgr} this is not
built-in in {\gromacs}.
% LocalWords: Formulae varpartial formulae simplevar Xpartial pscomb mX nX SX
% LocalWords: sumterms nx sigmaterms xmgr ps nm