Skip to content

Commit

Permalink
Add preliminary methods to calculate EDCPTD centrality
Browse files Browse the repository at this point in the history
Methods that enable to calculate the EDCPTD centrality for a project are
added to a provisional file 'util-tensor.R'. This include methods that
retrive the needed single-layer developer networks, a method that builds
a forth-order tensor using this single-layer networks and e method that
calculates the EDCPTD centrality using this tensor.

Signed-off-by: Anselm Fehnker <[email protected]>
  • Loading branch information
Anselm Fehnker authored and fehnkera committed Sep 23, 2020
1 parent 75ae4a5 commit c136b1f
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 1 deletion.
4 changes: 3 additions & 1 deletion install.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ packages = c(
"markovchain",
"lubridate",
"viridis",
"jsonlite"
"jsonlite",
"rTensor",
"Matrix"
)


Expand Down
1 change: 1 addition & 0 deletions util-init.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ source("util-core-peripheral.R")
source("util-networks-metrics.R")
source("util-networks-covariates.R")
source("util-plot-evaluation.R")
source("util-tensor.R")
187 changes: 187 additions & 0 deletions util-tensor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# function that manages the process to calculate EDCPTD centrality for a project
EDCPTD.centrality = function(network.builder){

# get the single-layer developer networks
networks = get.author.networks(network.builder)

# get and order all active developers from the single-layer nteworks
active.developers = get.developers.from.networks(networks)
active.developers = active.developers[order(active.developers)]

# build the tensor representing the single-layer networks
tensor = build.tensor.from.adjacency(networks, active.developers)

# calculate EDCPTD centrality
edcptd = calculate.EDCPTD.centrality(active.developers, tensor)

# return a data frame containing the names of the active developers and their EDCPTD score
return(edcptd)
}



# function that gets the single-layer developer networks for the mail, cochange and issue data for the project.
# A list containing all available single-layer networks is returned.
get.author.networks = function(network.builder){

networks = list()

# get the single-layer mail developer network if available
network.builder$update.network.conf(updated.values = list(author.relation = "mail"))

mail.network = network.builder$get.author.network()


# only if the mail data is available for the project, the network is added to the list
if(vcount(mail.network) > 0){
networks = union(networks, list(mail.network))
}

# get the single-layer cochange developer network if available
network.builder$update.network.conf(updated.values = list(author.relation = "cochange"))

cochange.network = network.builder$get.author.network()

# only if the cochange data is available for the project, the network is added to the list
if(vcount(cochange.network) > 0){
networks = union(networks, list(cochange.network))
}

# get the single-layer issue developer network if available
network.builder$update.network.conf(updated.values = list(author.relation = "issue"))

issue.network = network.builder$get.author.network()

# only if the issue data is available for the project, the network is added to the list
if(vcount(issue.network) > 0){
networks = union(networks, list(issue.network))
}

return(networks)
}

# Get an ordered vector all developers (i.e. their names) who are in at least one of the networks. If globally == FALSE,
# get a seperate list for each network which contains all the authors in the specific network.
get.developers.from.networks = function(networks, globally = TRUE) {

# for each network, get a list of authors that are in this network
active.authors.list = lapply(networks, function(network) {
active.authors = V(network)$name
return (active.authors)
})

if (globally) {
# flatten the list of lists to one list of authors
active.authors = unlist(active.authors.list, recursive = FALSE)

# remove distracting named list members
names(active.authors) = NULL

# remove duplicates and order alphabetically ascending
active.authors = active.authors[!duplicated(active.authors)]
active.authors = active.authors[order(active.authors)]
return (active.authors)
} else {
return (active.authors.list)
}
}

# function that builds a forth-order tensor containing the same information as the single-layer developer networks
build.tensor.from.adjacency = function(networks, active.developers){

# calculate dimensions for the forth-order tensor
number.layers = length(networks)
number.nodes = length(active.developers)

# get the adjacency matrices of the single-layer developer networks
adjacency.matrices = parallel::mclapply(networks, get.expanded.adjacency, active.developers)

# create an array with the size of the forth-order tensor that only contains zeros
array <-array(0, dim = c(number.nodes, number.layers, number.nodes, number.layers))

# the entries from every adjacency matrix are transfered to the array
for (l in 1:length(adjacency.matrices)) {

mat = as(adjacency.matrices[[l]], "dgTMatrix")

for (entry in 1:length(mat@x)) {
array[mat@i[entry]+1, l, mat@j[entry]+1, l] =mat@x[entry]
}
}

# the array is converted into a tensor
tensor <- rTensor::as.tensor(array)

return(tensor)

}


# The expanded adjacency is a 3-dimensional matrix where each slice represents
# one time window (i.e. one network in the given list of networks). The rows
# and vertices are the developers who are active in at
# least one network. It is possible to choose whether to consider edge weights.
#
# If a fields value is 0, the two developers were not linked to each other, if
# it is 1 (any positive integer for the weighted version) both developers were
# active and a link between them existed (with the indicated edge weight).
get.expanded.adjacency = function(network, authors, weighted = FALSE){

# create the matrix for this time step in the appropriate format, adding developer names
matrix <- sparseMatrix(i = c(), j = c(), dims = c(length(authors), length(authors)), giveCsparse = FALSE)
matrix <- as(matrix, "dgTMatrix")
rownames(matrix) <- authors
colnames(matrix) <- authors

if(weighted && vcount(network)>0){
# get the weighted adjacency matrix for the current network
A <- get.adjacency(network, attr = "weight")
}else{
# get the unweighted adjacency matrix for the current network
A <- get.adjacency(network)
}

# order the adjacency matrix
if(nrow(A)>1){ # for a 1x1 matrix ordering doesn't work
A <- A[order(rownames(A)),order(colnames(A))]
}

# save the activity data per developer
if(nrow(A)>0){
matrix[rownames(A), colnames(A)] <- A
}

if(!weighted){
matrix[matrix > 0] <- 1
}

return(matrix)
}

# method that calculates the EDCPTD centrality from the tensor
calculate.EDCPTD.centrality = function(authors, tensor){

# create data frame for results
data = data.frame(
names = authors
)

# decompose tensor
decomposition <-rTensor::cp(tensor, num_components = 1, max_iter = 50, tol = 1e-05)


# calculate EDCPTD centrality

data[["EDCPTD.score"]] = 0

for (y in 1:length(decomposition[["U"]][[2]][,1])) {
data[["EDCPTD.score"]] = data[["EDCPTD.score"]] + abs(decomposition[["U"]][[1]][,1]*decomposition[["U"]][[2]][,1][y]) +abs(decomposition[["U"]][[3]][,1]*decomposition[["U"]][[4]][,1][y])
}

data[["EDCPTD.score"]] = data[["EDCPTD.score"]]/2

return(data)
}



0 comments on commit c136b1f

Please sign in to comment.