-
Notifications
You must be signed in to change notification settings - Fork 5
/
combine_cliques_and_write_to_file.m
341 lines (266 loc) · 14.4 KB
/
combine_cliques_and_write_to_file.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
function [ maxFiltration ] = combine_cliques_and_write_to_file(...
symMatrix, maxCliqueSize, maxDensity, filePrefix, writeMaxCliques )
% ----------------------------------------------------------------
% ENUMERATE CLIQUES AND WRITE TO FILE
% written by Chad Giusti, 9/2014
%
% Given a symmetric matrix, use an iterative "combining" method to
% enumerate cliques of a fixed size or smaller in the thresholded family
% of graphs with adjancency matrices obtained through thresholding the
% matrix at successively lower values until the density of above threshold
% entries is above a specified maximum density. Write these cliques along
% with the threshold level to a file for input to Perseus.
%
% ** THE INPUT MATRIX IS CURRENTLY ASSUMED TO HAVE NO REPEATED ENTRIES**
%
% INPUTS:
% symMatrix: the symmetric matrix whose order complex
% we are working with
% maxCliqueSize: maximum size of clique to enumerate
% maxDensity: maximum edge density -- stopping condition
% filePrefix: file prefix for output file
%
% ----------------------------------------------------------------
numVertices = size(symMatrix, 1);
% ----------------------------------------------------------------
% Get a list of the off-diagonal entries of the matrix.
% ----------------------------------------------------------------
offDiagMask = logical(triu(ones(numVertices),1));
offDiagEntries = sort(unique(symMatrix(offDiagMask)), 'descend');
% ----------------------------------------------------------------
% Ensure that the off-diagonal matrix entries are all positive
% except the smallest one, which doesn't matter anyway
% ----------------------------------------------------------------
minEntry = min(offDiagEntries);
shiftedSymMatrix = symMatrix - ...
minEntry * (ones(numVertices) - eye(numVertices));
shiftedOffDiagEntries = offDiagEntries - ...
minEntry * ones(size(offDiagEntries));
% ----------------------------------------------------------------
% Whenever an entire row of entries in the matrix (off the diagonal)
% is above threshold, there can be no further homology. Stop there
% at the latest!
% ----------------------------------------------------------------
specifiedMaxFiltration = floor(nchoosek(numVertices, 2) * maxDensity);
maxMinColEntry = max(min(shiftedSymMatrix +...
eye(numVertices) * max(shiftedOffDiagEntries)));
computedMaxFiltration = find(shiftedOffDiagEntries == maxMinColEntry);
maxFiltration = min(specifiedMaxFiltration, computedMaxFiltration);
% ----------------------------------------------------------------
% Construct an ordered list of edges to add to the matrix based
% on decresing entry size
% ----------------------------------------------------------------
edgeList = zeros(maxFiltration, 2);
for f = 1:maxFiltration
[edgeList(f,1), edgeList(f,2)] = ...
find(shiftedSymMatrix == shiftedOffDiagEntries(f), 1, 'first');
end
% ----------------------------------------------------------------
% Create the output file and add the first line indicating
% clique notation format (see Perseus documentation)
% ----------------------------------------------------------------
cliqueFid = fopen(sprintf('%s_simplices.txt',filePrefix), 'w');
fprintf(cliqueFid, '1\n');
if writeMaxCliques
maxCliqueFid = fopen(sprintf('%s_max_simplices.txt',filePrefix), 'w');
fprintf(maxCliqueFid, '1\n');
end
% ----------------------------------------------------------------
% Initialize clique list to the set of vertices in the graph
% and output them to the file.
% ----------------------------------------------------------------
cliqueMatrix = sparse(eye(numVertices));
nonemptyIntersections = sparse(false(numVertices));
print_clique_matrix_to_perseus_file(cliqueMatrix, maxCliqueSize, [], cliqueFid, 1);
% ----------------------------------------------------------------
% At each filtration level, create a list of newly added cliques
% in the size range (2, maxCliqueSize) and output them to file
% ----------------------------------------------------------------
for f = 1:maxFiltration
% ----------------------------------------------------------------
% Get a list of maximal cliques containing each vertex in the new
% edge, and compute which pairs from these lists intersect,
% thus forming new cliques at this filtration
% ----------------------------------------------------------------
cliqueIndices = [cliqueMatrix(:,edgeList(f,1)) ...
cliqueMatrix(:,edgeList(f,2))];
possibleIntersections = sparse(logical(cliqueIndices(:, 1) ...
* cliqueIndices(:, 2)'));
actualIntersections = nonemptyIntersections & possibleIntersections;
[ints_1, ints_2] = find(actualIntersections);
% ----------------------------------------------------------------
% Some current maximal cliques will be contained in new maximal
% ----------------------------------------------------------------
remIndex = 0;
cliquesToRemove = zeros(2 * (length(ints_1)+1),1);
if ~isempty(ints_1)
% ----------------------------------------------------------------
% The two vertices lie in existing cliques which intersect one
% another. Get a list of unique intersections of cliques containing
% the two vertices.
% ----------------------------------------------------------------
[cliqueInts, ia, ~] = unique(cliqueMatrix(ints_1, :) & ...
cliqueMatrix(ints_2, :), 'rows');
ints_1 = ints_1(ia);
ints_2 = ints_2(ia);
numIntersections = length(ints_1);
if numIntersections > 1
% ------------------------------------------------------------
% It is possible that some of these new intersections are not
% maximal under containment. Detect this and remove smaller
% elements
%
% To do this quickly in matlab, we vector-ize the computation.
% Observe that if one of the intersection sets is properly
% contained in another then the intersection of those
% sets equals the smaller of the two. Since we have a binary
% set membership matrices, we can find intersection size using
% matrix product. The diagonal elements are the sizes of the
% underlying sets, from which we construct comparison matrices
% to do the inequality comparison across the whole matrix
% at once. Lastly, we check which of the "complete" overlaps
% are with larger sets and remove those sets.
% ------------------------------------------------------------
nCliqueInts = double(cliqueInts);
cliqueIntOverlaps = nCliqueInts * nCliqueInts';
cliqueIntSizes = ...
cliqueIntOverlaps(logical(eye(numIntersections)));
sizeCompArray = (cliqueIntSizes * ones(1, numIntersections))';
fullOverlap = (sizeCompArray == cliqueIntOverlaps);
fullOverlap(logical(eye(numIntersections))) = 0;
isMaximal = ...
~any((fullOverlap .* sizeCompArray') > sizeCompArray,1);
cliqueInts = cliqueInts(isMaximal, :);
ints_1 = ints_1(isMaximal);
ints_2 = ints_2(isMaximal);
end
% ----------------------------------------------------------------
% Build the sparse matrix representation of the new cliques
% and add their intersection information to the matrix
% ----------------------------------------------------------------
addedRows = length(ints_1);
numNewEntries = nnz(cliqueInts) + 2 * addedRows;
cliqueColsToAdd = zeros(numNewEntries, 1);
cliqueRowsToAdd = ones(numNewEntries, 1);
maxNumIntersections = max(sum(nonemptyIntersections));
neiOldCliqueColsToAdd = zeros(addedRows * maxNumIntersections, 1);
neiOldCliqueRowsToAdd = ones(addedRows * maxNumIntersections, 1);
addIndex = 0;
remIndex = 0;
neiOldIndex = 0;
for i=1:addedRows
% ------------------------------------------------------------
% For each clique to add, build new sparse matrix rows
% ------------------------------------------------------------
intersectionSize = nnz(cliqueInts(i,:));
cliqueColsToAdd(addIndex+1:addIndex+intersectionSize) = ...
find(cliqueInts(i,:));
cliqueColsToAdd((1:2)+addIndex+intersectionSize) = ...
edgeList(f,:);
cliqueRowsToAdd(addIndex+1:addIndex+intersectionSize + 2) = ...
ones(intersectionSize + 2, 1) * i;
addIndex = addIndex + intersectionSize + 2;
% ------------------------------------------------------------
% Determine which cliques in the graph this new clique
% intersects and record that information for new rows
% in the intersection sparse matrix
% ------------------------------------------------------------
newVerticesInClique = sparse(ones(2,1), edgeList(f,:),...
ones(2,1), 1, numVertices);
newClique = cliqueInts(i,:) | newVerticesInClique;
intersectionVector = nonemptyIntersections(:,ints_1(i)) |...
nonemptyIntersections(:,ints_2(i));
intersectionCliques = find(intersectionVector);
possibleOldInts = cliqueMatrix(intersectionVector, :);
actualOldInts = find(any(possibleOldInts(:, newClique),2));
numOldInts = length(actualOldInts);
neiOldRange = neiOldIndex+1:neiOldIndex+numOldInts;
neiOldCliqueColsToAdd(neiOldRange)= ...
intersectionCliques(actualOldInts);
neiOldCliqueRowsToAdd(neiOldRange) = i;
neiOldIndex = neiOldIndex + numOldInts;
% ------------------------------------------------------------
% For each clique to add, build new sparse matrix rows
% ------------------------------------------------------------
if (intersectionSize == nnz(cliqueMatrix(ints_1(i),:)) - 1)
remIndex = remIndex + 1;
cliquesToRemove(remIndex) = ints_1(i);
end
if (intersectionSize == nnz(cliqueMatrix(ints_2(i),:)) - 1)
remIndex = remIndex + 1;
cliquesToRemove(remIndex) = ints_2(i);
end
end
neiOldCliqueColsToAdd = neiOldCliqueColsToAdd(1:neiOldIndex);
neiOldCliqueRowsToAdd = neiOldCliqueRowsToAdd(1:neiOldIndex);
else
% ----------------------------------------------------------------
% These two vertices share no cliques at all, so insert the edge
% remove either vertex from the clique list if they are still
% singletons, and update the intersection matrix
% ----------------------------------------------------------------
addedRows = 1;
addIndex = 2;
cliqueColsToAdd = edgeList(f, :);
cliqueRowsToAdd = ones(2,1);
neiOldCliqueColsToAdd = find(cliqueIndices(:,1) + cliqueIndices(:,2));
neiOldCliqueRowsToAdd = ones(nnz(neiOldCliqueColsToAdd),1);
% ----------------------------------------------------------------
% Check to see if each vertex is listed as a maximal clique
% and mark it for removal if so
% ----------------------------------------------------------------
for i=1:2
[isRow, rowNum] = ismember(sparse(1, edgeList(f,i), ...
1, 1, numVertices), cliqueMatrix, 'rows');
if isRow
remIndex = remIndex + 1;
cliquesToRemove(remIndex) = rowNum;
end
end
end
% ----------------------------------------------------------------
% Update the clique matrix by adding rows and removing cliques
% that are contained in new maximal cliques
% ----------------------------------------------------------------
numOldCliques = size(cliqueMatrix,1);
cliquesToRemove = cliquesToRemove(1:remIndex);
cliquesToKeep = setdiff(1:numOldCliques, cliquesToRemove);
addedCliques = sparse(cliqueRowsToAdd(1:addIndex), ...
cliqueColsToAdd(1:addIndex), ones(addIndex,1), addedRows, ...
numVertices);
cliqueMatrix = [cliqueMatrix(cliquesToKeep,:); ...
addedCliques];
% ----------------------------------------------------------------
% List intersections of new cliques with old ones
% ----------------------------------------------------------------
oldNewIntersections = sparse(neiOldCliqueRowsToAdd,...
neiOldCliqueColsToAdd, ones(nnz(neiOldCliqueColsToAdd),1), ...
addedRows, size(nonemptyIntersections,1));
% ----------------------------------------------------------------
% All the new cliques added intersect one another because they
% share at least a common edge
% ----------------------------------------------------------------
newIntersections = sparse(ones(addedRows) - eye(addedRows));
exCliquesToKeep = [cliquesToKeep (numOldCliques+1):(numOldCliques+addedRows)];
% ----------------------------------------------------------------
% Update the matrix of clique intersections
% ----------------------------------------------------------------
nonemptyIntersections = [nonemptyIntersections oldNewIntersections';...
oldNewIntersections newIntersections];
nonemptyIntersections = nonemptyIntersections(exCliquesToKeep,...
exCliquesToKeep);
% ----------------------------------------------------------------
% Print the new maximal cliques at this filtraiton level to file
% ----------------------------------------------------------------
print_clique_matrix_to_perseus_file(addedCliques, maxCliqueSize,...
edgeList(f,:), cliqueFid, f);
if writeMaxCliques
print_clique_matrix_to_perseus_file(addedCliques, -1,...
edgeList(f,:), maxCliqueFid, f);
end
end
fclose(cliqueFid);
if writeMaxCliques
fclose(maxCliqueFid);
end
end