-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Apply the Review from @bockthom and @clhunsen on the previous changes. This includes compliance of coding conventions, update of copyright headers and improvement of documentation. Move the functions for 'get.author.names.from.networks' and 'get.expanded.adjacency' to new file 'util-networks-misc.R'. Also add two functions 'get.author.names.from.data' and 'convert.adjacency.matrix.list.to.array' from the 'dev-network-growth' project to the new file. Signed-off-by: fehnkera <[email protected]>
- Loading branch information
Showing
5 changed files
with
384 additions
and
206 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
## This file is part of coronet, which is free software: you | ||
## can redistribute it and/or modify it under the terms of the GNU General | ||
## Public License as published by the Free Software Foundation, version 2. | ||
## | ||
## This program is distributed in the hope that it will be useful, | ||
## but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
## GNU General Public License for more details. | ||
## | ||
## You should have received a copy of the GNU General Public License along | ||
## with this program; if not, write to the Free Software Foundation, Inc., | ||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
## | ||
## Copyright 2016 by Sofie Kemper <[email protected]> | ||
## Copyright 2016 by Claus Hunsen <[email protected]> | ||
## Copyright 2016-2018 by Thomas Bock <[email protected]> | ||
## Copyright 2017 by Angelika Schmid <[email protected]> | ||
## Copyright 2019 by Jakob Kronawitter <[email protected]> | ||
## Copyright 2019-2020 by Anselm Fehnker <[email protected]> | ||
## All Rights Reserved. | ||
|
||
|
||
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / | ||
## Libraries --------------------------------------------------------------- | ||
|
||
requireNamespace("parallel") # for parallel computation | ||
requireNamespace("igraph") # networks | ||
requireNamespace("Matrix") # for sparse matrices | ||
|
||
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / | ||
## Get active authors ----------------------------------------------------- | ||
|
||
#' Get all author names that are active in at least one of the networks. | ||
#' | ||
#' @param networks the list of networks from which the author names are wanted | ||
#' @param globally decides if all author names are in one list or in separate lists for each network [default: TRUE] | ||
#' | ||
#' @return the list of author names | ||
get.author.names.from.networks = function(networks, globally = TRUE) { | ||
|
||
## for each network, get a list of authors that are in this network | ||
active.authors.list = lapply(networks, function(network) { | ||
active.authors = igraph::V(network)$name | ||
return(active.authors) | ||
}) | ||
|
||
if (globally) { | ||
## flatten the list of lists to one list of authors | ||
active.authors = unlist(active.authors.list, recursive = FALSE) | ||
|
||
## remove distracting named list members | ||
names(active.authors) = NULL | ||
|
||
## remove duplicates and order alphabetically ascending | ||
active.authors = active.authors[!duplicated(active.authors)] | ||
active.authors = sort(active.authors) | ||
return(active.authors) | ||
} else { | ||
return(active.authors.list) | ||
} | ||
} | ||
|
||
#' Get all author names that are active in at least one of the date ranges. | ||
#' | ||
#' @param data.ranges the list of the data ranges | ||
#' @param is.mail.analysis if the data is a mail analysis | ||
#' @param globally decides if all author names are in one list or in separate for each network [default: TRUE] | ||
#' | ||
#' @return the list of author names | ||
get.author.names.from.data = function(data.ranges, is.mail.analysis, globally = TRUE) { | ||
|
||
## for each range, get the authors who made at least one commit/mail in this range | ||
active.authors.list = lapply(data.ranges, function(range.data) { | ||
if (is.mail.analysis) { | ||
active.authors = names(range.data$group.artifacts.by.data.column("mails", "author.name")) | ||
} else { | ||
active.authors = names(range.data$group.artifacts.by.data.column("commits", "author.name")) | ||
} | ||
return(active.authors) | ||
}) | ||
|
||
if (globally) { | ||
## flatten the list of lists to one list of authors | ||
active.authors = unlist(active.authors.list, recursive = FALSE) | ||
|
||
## remove distracting named list members | ||
names(active.authors) = NULL | ||
|
||
## remove duplicates and order alphabetically ascending | ||
active.authors = active.authors[!duplicated(active.authors)] | ||
active.authors = sort(active.authors) | ||
return(active.authors) | ||
} else { | ||
return(active.authors.list) | ||
} | ||
} | ||
|
||
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / | ||
## Adjacency matrices ---------------------------------------------------- | ||
|
||
#' Get a sparse adjacency matrix for a network. | ||
#' | ||
#' @param network the given network | ||
#' @param authors all authors that are wanted in the adjacency matrix | ||
#' @param weighted decides if the adjacency matrix shall be weighted [default: FALSE] | ||
#' | ||
#' @return the sparse adjacency matrix of the network | ||
get.expanded.adjacency = function(network, authors, weighted = FALSE) { | ||
|
||
## create an empty sparse matrix with the right size | ||
matrix = Matrix::sparseMatrix(i = c(), j = c(), dims = c(length(authors), length(authors)), giveCsparse = FALSE) | ||
matrix = as(matrix, "dgTMatrix") | ||
|
||
## add row and column names | ||
rownames(matrix) = authors | ||
colnames(matrix) = authors | ||
|
||
if (igraph::vcount(network) > 0) { | ||
|
||
if (weighted) { | ||
## get the weighted adjacency matrix for the current network | ||
matrix.data = igraph::get.adjacency(network, attr = "weight") | ||
} else { | ||
## get the unweighted adjacency matrix for the current network | ||
matrix.data = igraph::get.adjacency(network) | ||
} | ||
|
||
## order the adjacency matrix | ||
if (nrow(matrix.data) > 1) { # for a 1x1 matrix ordering does not work | ||
matrix.data = matrix.data[order(rownames(matrix.data)), order(colnames(matrix.data))] | ||
} | ||
|
||
## save the activity data per author | ||
if (nrow(matrix.data) > 0) { | ||
matrix[rownames(matrix.data), colnames(matrix.data)] = matrix.data | ||
} | ||
|
||
if (!weighted) { | ||
matrix[matrix > 0] = 1 | ||
} | ||
|
||
} | ||
|
||
return(matrix) | ||
} | ||
|
||
#' Calculates a sparse adjacency matrix for each network in the list. | ||
#' All adjacency matrix have the same authors. | ||
#' | ||
#' @param networks list of networks | ||
#' @param weighted decides if the adjacency matrix shall be weighted [default: FALSE] | ||
#' | ||
#' @return the list of adjacency matrices | ||
get.expanded.adjacency.matrices = function(networks, weighted = FALSE){ | ||
|
||
authors = get.authors.from.networks(networks) | ||
|
||
adjacency.matrices = parallel::mclapply(networks, get.expanded.adjacency, authors, weighted) | ||
|
||
return(adjacency.matrices) | ||
} | ||
|
||
#' Gets a list of networks, converts them to sparse adjacency matrices, and sums up the adjacency matrices cumulatively. | ||
#' This means that the first entry of the returned list is just the adjacency matrix from the first network, | ||
#' the second entry is the sum of the first and the second entry, and so on. | ||
#' | ||
#' @param networks list of networks | ||
#' @param weighted decides if the adjacency matrix shall be weighted [default: FALSE] | ||
#' | ||
#' @return the list of cumulated adjacency matrices | ||
get.expanded.adjacency.cumulated = function(networks, weighted = FALSE) { | ||
## get expanded adjacency matrices first | ||
matrices = get.expanded.adjacency.matrices(networks, weighted) | ||
|
||
## pair-wise sum of matrices: m.cumul(n) = m.cumul(m-1) + m | ||
## (intermediate results consecutively stored in matrices.cumulated) | ||
matrices.cumulated = list(matrices[[1]]) # first one is complete already | ||
|
||
if (length(matrices) > 1) { | ||
for (m in 2:(length(matrices))){ | ||
|
||
matrices.cumulated[[m]] = matrices.cumulated[[m - 1]] + matrices[[m]] | ||
rownames(matrices.cumulated[[m]]) = rownames(matrices.cumulated[[m-1]]) | ||
colnames(matrices.cumulated[[m]]) = colnames(matrices.cumulated[[m-1]]) | ||
|
||
if (!weighted) { | ||
## search for a non-zero entry and set them to an arbitray number (e.g., 42) | ||
## to force that all non-zero entries are correctly set to 1 afterwards | ||
not.zero.idxs = which(matrices.cumulated[[m]] >= 1, arr.ind = TRUE) | ||
if (nrow(not.zero.idxs) > 0) { | ||
first.not.zero.idx = not.zero.idxs[1, ] | ||
names(first.not.zero.idx) = c("row", "col") | ||
matrices.cumulated[[m]][first.not.zero.idx[["row"]], first.not.zero.idx[["col"]]] = 42 | ||
matrices.cumulated[[m]]@x = rep(1, length(matrices.cumulated[[m]]@i)) | ||
} | ||
} | ||
} | ||
} | ||
|
||
return(matrices.cumulated) | ||
} | ||
|
||
#' Converts a list of adjacency matrices to an array. | ||
#' | ||
#' @param adjacency.list the list of adjacency matrices | ||
#' | ||
#' @return the converted array | ||
convert.adjacency.matrix.list.to.array = function(adjacency.list){ | ||
|
||
## create a 3-dimensional array representing the adjacency matrices (SIENA data format) as result | ||
array = array(data = 0, dim = c(nrow(adjacency.list[[1]]), nrow(adjacency.list[[1]]), length(adjacency.list))) | ||
rownames(array) = rownames(adjacency.list[[1]]) | ||
colnames(array) = colnames(adjacency.list[[1]]) | ||
|
||
## copy the activity values from the adjacency matrices in the list to the corresponding array slices | ||
for (i in seq_along(adjacency.ist)){ | ||
adjacency = adjacency.list[[i]] | ||
activity.indices = which(adjacency != 0, arr.ind = TRUE) | ||
|
||
for (j in 1:nrow(activity.indices)){ | ||
array[as.vector(activity.indices[j, 1]), as.vector(activity.indices[j,2]), i] = | ||
adjacency[as.vector(activity.indices[j, 1]), as.vector(activity.indices[j, 2])] | ||
} | ||
} | ||
|
||
return(array) | ||
} |
Oops, something went wrong.