Skip to content

Commit

Permalink
Merge pull request #2 from smu-cs-3353/BreadthFirstSearch
Browse files Browse the repository at this point in the history
community detection and file report
  • Loading branch information
djryn authored Apr 11, 2022
2 parents 2ba894a + 5de6699 commit 48702b4
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 57 deletions.
10 changes: 4 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ set(CMAKE_CXX_FLAGS_RELEASE -O3)
add_executable(pa03 src/main.cpp src/Graph_helper.cpp)
configure_file(data/test_data_2.graphml data/test_data_2.graphml COPYONLY)
configure_file(data/dataset.graphml data/dataset.graphml COPYONLY)
configure_file(data/test_data_1.graphml data/test_data_1.graphml COPYONLY)
configure_file(data/communities.txt data/communities.txt COPYONLY)



set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
Expand All @@ -15,9 +19,3 @@ find_package(Boost 1.71.0 REQUIRED COMPONENTS graph system filesystem)
message(STATUS "Boost version: ${Boost_VERSION}")

target_link_libraries(pa03 PUBLIC Boost::graph PUBLIC Boost::system PUBLIC Boost::filesystem)

#set(1 file1)

#foreach(file IN LISTS 1)
# configure_file(${file} ${file} COPYONLY)
#endforeach()
Empty file added data/communities.txt
Empty file.
36 changes: 36 additions & 0 deletions data/test_data_1.graphml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
<key id="d0" for="node" attr.name="community" attr.type="int"/>
<graph id="G" edgedefault="undirected">
<node id="n0">
<data key="d0">1</data>
</node>
<node id="n1">
<data key="d0">1</data>
</node>
<node id="n2">
<data key="d0">1</data>
</node>
<node id="n3">
<data key="d0">2</data>
</node>
<node id="n4">
<data key="d0">2</data>
</node>
<node id="n5">
<data key="d0">2</data>
</node>

<edge source="n0" target="n1"/>
<edge source="n0" target="n2"/>
<edge source="n1" target="n2"/>
<edge source="n2" target="n3"/>
<edge source="n3" target="n4"/>
<edge source="n3" target="n5"/>
<edge source="n4" target="n5"/>

</graph>
</graphml>
9 changes: 3 additions & 6 deletions data/test_data_2.graphml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,13 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
<key id="d0" for="node" attr.name="name" attr.type="string"/>
<key id="d1" for="node" attr.name="community" attr.type="int"/>
<key id="d0" for="node" attr.name="community" attr.type="int"/>
<graph id="G" edgedefault="undirected">
<node id="n0">
<data key="d0">A</data>
<data key="d1">1</data>
<data key="d0">1</data>
</node>
<node id="n1">
<data key="d0">B</data>
<data key="d1">1</data>
<data key="d0">1</data>
</node>
<edge source="n0" target="n1"/>
</graph>
Expand Down
215 changes: 212 additions & 3 deletions src/Graph_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@

#include "Graph_helper.h"


void Graph_helper::print_graph() {
boost::print_graph(graph);
}

void Graph_helper::print_edges() {
auto epair = boost::edges(graph);
for(auto iter = epair.first; iter != epair.second; iter++)
std::cout << "edge " << *iter << std::endl;
for(auto iter = epair.first; iter != epair.second; iter++) {
graph[*iter].Name = "taco";
std::cout << "edge " << graph[*iter].Name << std::endl;

}
}

void Graph_helper::print_vertices() {
Expand All @@ -37,4 +39,211 @@ void Graph_helper::read_graphml(const char* file) {
std::cout << e.what() << std::endl;
return;
}

}

void Graph_helper::girvan_newman() {
numNodes = (int)boost::num_vertices(graph);
numEdges = (int)boost::num_edges(graph);


auto vpair = boost::vertices(graph);
for(auto iter = vpair.first; iter != vpair.second; iter++) {
graph[*iter].origDegree = (int)boost::degree(*iter, graph);
}

double mod = 0;
while(true) {
girvan_newman_helper();
mod = get_modularity();
if(mod > best_mod) {
best_mod = mod;
num_communities = boost::connected_components(graph, boost::make_assoc_property_map(max_comp));
}

if(boost::num_edges(graph) == 0)
break;
}

std::vector<std::string> report(num_communities);

std::cout << "Best Modularity: " << best_mod << std::endl;
for(auto& c : max_comp) {
std::cout << c.first << " in community " << c.second << std::endl;
report[c.second] += std::to_string(c.first);
report[c.second] += ", ";
}
print_report(report);



}

void Graph_helper::girvan_newman_helper() {


// While Girvan-Neumann modularity is not satisfied

// keep track of original vertex degree
// make 2 vertex properties: orig degree, new degree after edge is removed
// loop girvan newman until new community is formed
// for each node in community:
// add new degrees -> no of edges within communities
// add old degrees -> no of random edges
// calculate modularity for each community

// keep track of modularity and compare against current max
// keep doing it until no more edges, or until it decreases (idk which is better)


std::vector<int> component(numNodes);
size_t num_components = boost::connected_components(graph, &component[0]);
size_t new_num = num_components;
std::cout << num_components << std::endl;

while(new_num <= num_components) {
//Breadth First Search to find shortest paths
auto vpair = boost::vertices(graph);
for(vertexIt iter = vpair.first; iter != vpair.second; iter++) {

std::cout << "source: " << *iter << std::endl;
std::cout << "visited: ";

std::map<vd, vd> prev;

breadth_first_search(prev, iter);
reconstruct_paths(prev, iter);
reset_tracking_data(iter); // Resets tracking data for path construction
}

// Finds the Highest Value
auto e = boost::edges(graph);
auto max = *(e.first);
int maxCount = -1;
for(auto ed = e.first; ed != e.second; ed++) {
if(graph[*ed].count > maxCount) {
max = *ed;
maxCount = graph[*ed].count;

}
graph[*ed].count = 0;
}
std::cout << "max edge: " << max.m_source << " to " << max.m_target << std::endl;
boost::remove_edge(max.m_source, max.m_target, graph);
std::cout << "edge removed" << std::endl;
// remove edge with highest value

new_num = boost::connected_components(graph, &component[0]);
std::cout << "new comp num: " << new_num << std::endl;

}

// find communities

}

double Graph_helper::get_modularity () {
set_degree();
// std::vector<int> components(numNodes);
// size_t num_comp = boost::connected_components(graph, &components[0]);

std::map<vd, int> components;
int num_components = boost::connected_components(graph, boost::make_assoc_property_map(components));

std::vector<component> comp_mod(num_components);
for(auto& p : components) {
std::cout << "Vertex: " << p.first << " is in component " << p.second << std::endl;
comp_mod[p.second].compEdges += graph[p.first].newDegree;
comp_mod[p.second].totalEdges += graph[p.first].origDegree;
}

double mod = 0;
for(auto& c : comp_mod) {
double temp = c.compEdges - ((double)c.totalEdges * c.totalEdges) / (2 * numEdges);
temp /= (2*numEdges);
std::cout << temp << std::endl;
mod += temp;
}
std::cout << "mod: " << mod << std::endl;
return mod;


}

void Graph_helper::set_degree() {
auto vpair = boost::vertices(graph);
for(auto iter = vpair.first; iter != vpair.second; iter++) {
graph[*iter].newDegree = (int)boost::degree(*iter, graph);
}
}

void Graph_helper::breadth_first_search(std::map<vd, vd>& prev, vertexIt iter) {
std::queue<vd> q;
int count = -1;

q.push(*iter);
graph[*iter].used = true;
// source not included, when reconstructing use prev.count(source) != 0 as condition
while(!q.empty()) {
vd temp = q.front();
q.pop();
count++;
auto neighbors = boost::adjacent_vertices(temp, graph);
for(auto it = neighbors.first; it != neighbors.second; it++) {
if(!graph[*it].used && !graph[*it].foundPaths) {
graph[*it].used = true;
q.push(*it);
graph[*it].distance = graph[temp].distance + 1;
prev.insert(std::pair<vd, vd>(*it, temp));
std::cout << *it << "(" << temp << ", " << graph[*it].distance << ") ";
}
}
}
std::cout << std::endl;
}

void Graph_helper::reconstruct_paths (std::map<vd, vd>& prev, vertexIt iter) {
for(auto& c : prev) {
std::vector<vd> path;
std::cout << c.first << ", " << c.second << ": ";
if(!graph[c.first].used) {
std::cout << "no path" << std::endl;
} else {
for(auto v = c.first; prev.count(v) != 0; v = prev[v]) {
path.push_back(v);
}
path.push_back(*iter);
std::reverse(path.begin(), path.end());
std::cout << "Path: ";
int v = 0;
std::cout << path[v] << " ";
for(v = 1; v < path.size(); v++) {
std::cout << path[v] << " ";
auto e = boost::edge(path[v-1], path[v], graph);
graph[e.first].count++;
}

std::cout << std::endl;
}
}
std::cout << std::endl;
}

void Graph_helper::reset_tracking_data(vertexIt iter) {
graph[*iter].foundPaths = true;
auto vpair = boost::vertices(graph);
for(auto v = vpair.first; v != vpair.second; v++)
graph[*v].used = false;
}

void Graph_helper::print_report(std::vector<std::string> report) {
std::ofstream out("data/communities.txt");
if(out.is_open()) {
out << "Community Report\n";
for(int i = 0; i < report.size(); i++) {
out << "community #" << i << ": [";
out << report[i].substr(0, report[i].size()-2) << "]\n";
}
}
out << "Modularity: " << best_mod;
}
43 changes: 39 additions & 4 deletions src/Graph_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,64 @@
#include <boost/graph/graphml.hpp>
#include <boost/property_map/dynamic_property_map.hpp>
#include <boost/property_map/property_map.hpp>
#include <boost/graph/connected_components.hpp>
#include <typeinfo>
#include <cxxabi.h>
#include <fstream>
#include <queue>


struct GraphData { int community; };
struct VertexProperty {
int community;
int community = 0;
bool foundPaths = false;
bool used = false;
int distance = -1;
int origDegree = 0;
int newDegree = 0;
};
struct EdgeProperty {
std::string Name = "m";
int count = 0;
};

struct component {
int compEdges = 0;
int totalEdges = 0;
};

typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, VertexProperty, EdgeProperty> Graph;
typedef boost::range_detail::integer_iterator<unsigned long> vertexIt;
typedef boost::graph_traits<Graph>::vertex_descriptor vd;

struct Mapper {

};
struct EdgeProperty { std::string Name; };

using Graph = boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, VertexProperty, EdgeProperty>;

class Graph_helper {
private:
Graph graph;
int numNodes;
int numEdges;
std::map<vd, int> max_comp;
double best_mod;
int num_communities;

public:
void print_graph();
void print_edges();
void print_vertices();

void read_graphml(const char*);

void set_degree();
double get_modularity();
void girvan_newman();
void girvan_newman_helper();
void breadth_first_search(std::map<vd, vd>&, vertexIt);
void reconstruct_paths(std::map<vd, vd>&, vertexIt);
void reset_tracking_data(vertexIt);
void print_report(std::vector<std::string>);

};

Expand Down
Loading

0 comments on commit 48702b4

Please sign in to comment.