Skip to content

Commit

Permalink
Checking in the code
Browse files Browse the repository at this point in the history
  • Loading branch information
H. Marmanis committed Dec 3, 2012
1 parent 1d728ca commit 762d794
Show file tree
Hide file tree
Showing 255 changed files with 35,422 additions and 0 deletions.
448 changes: 448 additions & 0 deletions src/org/yooreeka/algos/clustering/dbscan/DBSCANAlgorithm.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* ________________________________________________________________________________________
*
* Y O O R E E K A
* A library for data mining, machine learning, soft computing, and mathematical analysis
* ________________________________________________________________________________________
*
* The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web "
* (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms
* are valuable in any software application.
*
* Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
* Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.
*
* Certain library functions depend on other Open Source software libraries, which are covered
* by different license agreements. See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.yooreeka.algos.clustering.hierarchical;

import org.yooreeka.algos.clustering.model.Cluster;
import org.yooreeka.algos.clustering.model.DataPoint;
import org.yooreeka.algos.clustering.utils.ObjectToIndexMapping;

/** A hierarchical agglomerative clustering algorithm based on the average link */
public class AverageLinkAlgorithm {

public static void main(String[] args) {
// Define data
DataPoint[] elements = new DataPoint[5];
elements[0] = new DataPoint("A", new double[] {});
elements[1] = new DataPoint("B", new double[] {});
elements[2] = new DataPoint("C", new double[] {});
elements[3] = new DataPoint("D", new double[] {});
elements[4] = new DataPoint("E", new double[] {});

double[][] a = new double[][] { { 0, 1, 2, 2, 3 }, { 1, 0, 2, 4, 3 },
{ 2, 2, 0, 1, 5 }, { 2, 4, 1, 0, 3 }, { 3, 3, 5, 3, 0 } };

AverageLinkAlgorithm ca = new AverageLinkAlgorithm(elements, a);
Dendrogram dnd = ca.cluster();
dnd.printAll();
}
private DataPoint[] elements;
private double[][] a;

private ClusterSet allClusters;

public AverageLinkAlgorithm(DataPoint[] elements, double[][] adjacencyMatrix) {
this.elements = elements;
this.a = adjacencyMatrix;
this.allClusters = new ClusterSet();
}

public Dendrogram cluster() {

Dendrogram dnd = new Dendrogram("Distance");
double d = 0.0;

// initially load all elements as individual clusters
for (DataPoint e : elements) {
Cluster c = new Cluster(e);
allClusters.add(c);
}

dnd.addLevel(String.valueOf(d), allClusters.getAllClusters());

d = 1.0;

while (allClusters.size() > 1) {
int K = allClusters.size();
mergeClusters(d);
// it is possible that there were no clusters to merge for current
// d.
if (K > allClusters.size()) {
dnd.addLevel(String.valueOf(d), allClusters.getAllClusters());
K = allClusters.size();
}

d = d + 0.5;
}
return dnd;
}

private void mergeClusters(double distanceThreshold) {
int nClusters = allClusters.size();

ObjectToIndexMapping<Cluster> idxMapping = new ObjectToIndexMapping<Cluster>();

double[][] clusterDistances = new double[nClusters][nClusters];

for (int i = 0, n = a.length; i < n; i++) {
for (int j = i + 1, k = a.length; j < k; j++) {
double d = a[i][j];
if (d > 0) {
DataPoint e1 = elements[i];
DataPoint e2 = elements[j];
Cluster c1 = allClusters.findClusterByElement(e1);
Cluster c2 = allClusters.findClusterByElement(e2);
if (!c1.equals(c2)) {
int ci = idxMapping.getIndex(c1);
int cj = idxMapping.getIndex(c2);
clusterDistances[ci][cj] += d;
clusterDistances[cj][ci] += d;
}
}
}
}

boolean[] merged = new boolean[clusterDistances.length];
for (int i = 0, n = clusterDistances.length; i < n; i++) {
for (int j = i + 1, k = clusterDistances.length; j < k; j++) {
Cluster ci = idxMapping.getObject(i);
Cluster cj = idxMapping.getObject(j);
int ni = ci.size();
int nj = cj.size();
clusterDistances[i][j] = clusterDistances[i][j] / (ni * nj);
clusterDistances[j][i] = clusterDistances[i][j];
// merge clusters if distance is below the threshold
if (merged[i] == false && merged[j] == false) {
if (clusterDistances[i][j] <= distanceThreshold) {
allClusters.remove(ci);
allClusters.remove(cj);
Cluster mergedCluster = new Cluster(ci, cj);
allClusters.add(mergedCluster);
merged[i] = true;
merged[j] = true;
}
}
}
}
}
}
83 changes: 83 additions & 0 deletions src/org/yooreeka/algos/clustering/hierarchical/ClusterSet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* ________________________________________________________________________________________
*
* Y O O R E E K A
* A library for data mining, machine learning, soft computing, and mathematical analysis
* ________________________________________________________________________________________
*
* The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web "
* (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms
* are valuable in any software application.
*
* Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
* Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.
*
* Certain library functions depend on other Open Source software libraries, which are covered
* by different license agreements. See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.yooreeka.algos.clustering.hierarchical;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.yooreeka.algos.clustering.model.Cluster;
import org.yooreeka.algos.clustering.model.DataPoint;

/**
* Set of clusters.
*/
public class ClusterSet {

private Set<Cluster> allClusters = new HashSet<Cluster>();

public boolean add(Cluster c) {
return allClusters.add(c);
}

public Cluster findClusterByElement(DataPoint e) {
Cluster cluster = null;
for (Cluster c : allClusters) {
if (c.contains(e)) {
cluster = c;
break;
}
}
return cluster;
}

public List<Cluster> getAllClusters() {
return new ArrayList<Cluster>(allClusters);
}

public boolean remove(Cluster c) {
return allClusters.remove(c);
}

public int size() {
return allClusters.size();
}

// public ClusterSet copy() {
// ClusterSet clusterSet = new ClusterSet();
// for(Cluster c : this.allClusters ) {
// Cluster clusterCopy = c.copy();
// clusterSet.add(clusterCopy);
// }
// return clusterSet;
// }
}
162 changes: 162 additions & 0 deletions src/org/yooreeka/algos/clustering/hierarchical/Dendrogram.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* ________________________________________________________________________________________
*
* Y O O R E E K A
* A library for data mining, machine learning, soft computing, and mathematical analysis
* ________________________________________________________________________________________
*
* The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web "
* (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms
* are valuable in any software application.
*
* Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
* Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.
*
* Certain library functions depend on other Open Source software libraries, which are covered
* by different license agreements. See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.yooreeka.algos.clustering.hierarchical;

import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

import org.yooreeka.algos.clustering.model.Cluster;
import org.yooreeka.config.YooreekaConfigurator;

public class Dendrogram {

private static final Logger LOG = Logger.getLogger(Dendrogram.class.getName());

/*
* Clusters by level.
*/
private Map<Integer, ClusterSet> entryMap;
private Map<Integer, String> levelLabels;
private Integer nextLevel;
private String levelLabelName;

public Dendrogram(String levelLabelName) {

LOG.setLevel(YooreekaConfigurator.getLevel(Dendrogram.class.getName()));

entryMap = new LinkedHashMap<Integer, ClusterSet>();
levelLabels = new LinkedHashMap<Integer, String>();
nextLevel = 1;
this.levelLabelName = levelLabelName;
}

public int addLevel(String label, Cluster cluster) {
List<Cluster> values = new ArrayList<Cluster>();
values.add(cluster);
return addLevel(label, values);
}

/**
* Creates a new dendrogram level using copies of provided clusters.
*/
public int addLevel(String label, Collection<Cluster> clusters) {

ClusterSet clusterSet = new ClusterSet();

for (Cluster c : clusters) {
// copy cluster before adding - over time cluster elements may
// change
// but for dendrogram we want to keep current state.
clusterSet.add(c.copy());
}

int level = nextLevel;

entryMap.put(level, clusterSet);
levelLabels.put(level, label);

nextLevel++;
return level;
}

public List<Integer> getAllLevels() {
return new ArrayList<Integer>(entryMap.keySet());
}

public List<Cluster> getClustersForLevel(int level) {
ClusterSet cs = entryMap.get(level);
return cs.getAllClusters();
}

public String getLabelForLevel(int level) {
return levelLabels.get(level);
}

public int getTopLevel() {
return nextLevel - 1;
}

public void print(int level) {
String label = levelLabels.get(level);
ClusterSet clusters = entryMap.get(level);
LOG.info("Clusters for: level=" + level + ", "
+ levelLabelName + "=" + label);
for (Cluster c : clusters.getAllClusters()) {
if (c.getElements().size() > 1) {
LOG.info("____________________________________________________________\n");
LOG.info(c.getElementsAsString());
LOG.info("____________________________________________________________\n\n");
}
}
}

public void printAll() {
for (Map.Entry<Integer, ClusterSet> e : entryMap.entrySet()) {
Integer level = e.getKey();
print(level);
}
}

/**
* Replaces clusters in the specified level. If level doesn't exist it will
* be created.
*
* @param level
* dendrogram level.
* @param label
* level description.
* @param clusters
* clusters for the level.
* @return
*/
public void setLevel(int level, String label, Collection<Cluster> clusters) {

ClusterSet clusterSet = new ClusterSet();

for (Cluster c : clusters) {
clusterSet.add(c.copy());
}

LOG.fine("Setting cluster level: " + level);

entryMap.put(level, clusterSet);
levelLabels.put(level, label);

if (level >= nextLevel) {
nextLevel = level + 1;
}
}

}
Loading

0 comments on commit 762d794

Please sign in to comment.