From 762d794eae7b589b883d1bd31dcd1fad1942c51b Mon Sep 17 00:00:00 2001
From: "H. Marmanis" <h@marmanis.com>
Date: Mon, 3 Dec 2012 12:00:06 -0500
Subject: [PATCH] Checking in the code

---
 .../clustering/dbscan/DBSCANAlgorithm.java    | 448 ++++++++++++++
 .../hierarchical/AverageLinkAlgorithm.java    | 145 +++++
 .../clustering/hierarchical/ClusterSet.java   |  83 +++
 .../clustering/hierarchical/Dendrogram.java   | 162 ++++++
 .../algos/clustering/hierarchical/MST.java    | 130 +++++
 .../hierarchical/MSTSingleLinkAlgorithm.java  | 142 +++++
 .../hierarchical/SingleLinkAlgorithm.java     | 126 ++++
 .../algos/clustering/model/Attribute.java     | 119 ++++
 .../algos/clustering/model/Cluster.java       | 197 +++++++
 .../algos/clustering/model/DataPoint.java     | 181 ++++++
 .../partitional/KMeansAlgorithm.java          | 306 ++++++++++
 .../partitional/NearestNeighborAlgorithm.java | 230 ++++++++
 .../algos/clustering/rock/LinkMatrix.java     | 195 +++++++
 .../clustering/rock/MergeGoodnessMeasure.java |  92 +++
 .../algos/clustering/rock/ROCKAlgorithm.java  | 142 +++++
 .../algos/clustering/rock/ROCKClusters.java   | 205 +++++++
 .../algos/clustering/rock/SimilarCluster.java |  85 +++
 .../clustering/test/MyDiggSpaceData.java      | 125 ++++
 .../clustering/test/MyDiggSpaceDataset.java   |  56 ++
 .../algos/clustering/test/SFData.java         | 212 +++++++
 .../algos/clustering/test/SFDataset.java      |  93 +++
 .../algos/clustering/utils/Attributes.java    | 143 +++++
 .../utils/ObjectToIndexMapping.java           |  90 +++
 .../utils/SortedArrayClustering.java          |  71 +++
 .../algos/reco/collab/cache/FileStore.java    | 134 +++++
 .../algos/reco/collab/cache/Store.java        |  72 +++
 .../algos/reco/collab/data/BaseDataset.java   | 431 ++++++++++++++
 .../algos/reco/collab/data/ContentItem.java   |  59 ++
 .../algos/reco/collab/data/DiggData.java      | 361 ++++++++++++
 .../algos/reco/collab/data/HTMLContent.java   |  99 ++++
 .../algos/reco/collab/data/MovieLensData.java |  83 +++
 .../reco/collab/data/MovieLensDataset.java    | 385 +++++++++++++
 .../algos/reco/collab/data/MusicData.java     | 256 ++++++++
 .../algos/reco/collab/data/MusicItem.java     |  71 +++
 .../algos/reco/collab/data/MusicRating.java   |  52 ++
 .../algos/reco/collab/data/MusicUser.java     | 249 ++++++++
 .../algos/reco/collab/data/NewsData.java      | 202 +++++++
 .../algos/reco/collab/data/NewsItem.java      |  54 ++
 .../algos/reco/collab/data/NewsUser.java      |  82 +++
 .../algos/reco/collab/data/RatingBuilder.java |  94 +++
 .../evaluation/EvaluationDataProvider.java    |  44 ++
 .../MovieLensEvaluationDataProvider.java      | 283 +++++++++
 .../reco/collab/evaluation/MovieLensRMSE.java | 104 ++++
 .../reco/collab/evaluation/RMSEEstimator.java | 173 ++++++
 .../reco/collab/evaluation/RMSEResult.java    |  85 +++
 .../algos/reco/collab/model/Content.java      | 182 ++++++
 .../algos/reco/collab/model/Dataset.java      | 142 +++++
 .../algos/reco/collab/model/Item.java         | 171 ++++++
 .../algos/reco/collab/model/Rating.java       | 127 ++++
 .../reco/collab/model/RecommendationType.java |  38 ++
 .../algos/reco/collab/model/SimilarItem.java  | 128 ++++
 .../algos/reco/collab/model/SimilarUser.java  | 134 +++++
 .../algos/reco/collab/model/User.java         | 175 ++++++
 .../algos/reco/collab/recommender/Delphi.java | 545 ++++++++++++++++++
 .../reco/collab/recommender/DiggDelphi.java   | 282 +++++++++
 .../collab/recommender/MovieLensDelphi.java   | 324 +++++++++++
 .../recommender/PredictedItemRating.java      | 152 +++++
 .../reco/collab/recommender/Recommender.java  |  88 +++
 .../movielens/MovieLensItemSimilarity.java    |  92 +++
 .../movielens/MovieLensUserSimilarity.java    | 107 ++++
 .../naive/ImprovedItemBasedSimilarity.java    | 120 ++++
 .../naive/ImprovedUserBasedSimilarity.java    | 129 +++++
 .../similarity/naive/ItemBasedSimilarity.java | 110 ++++
 .../naive/ItemContentBasedSimilarity.java     |  92 +++
 .../naive/ItemPenaltyBasedSimilarity.java     | 161 ++++++
 .../similarity/naive/SimilarityMatrix.java    |  74 +++
 .../naive/SimilarityMatrixImpl.java           | 148 +++++
 .../similarity/naive/UserBasedSimilarity.java | 117 ++++
 .../naive/UserContentBasedSimilarity.java     | 107 ++++
 .../naive/UserItemContentBasedSimilarity.java | 184 ++++++
 .../UpperTriangularSimilarityMatrix.java      |  82 +++
 .../UpperTriangularSimilarityMatrixImpl.java  | 150 +++++
 .../similarity/util/PearsonCorrelation.java   | 170 ++++++
 .../similarity/util/RatingCountMatrix.java    | 131 +++++
 .../util/SimilarityMatrixCache.java           |  71 +++
 .../util/SimilarityMatrixRepository.java      | 173 ++++++
 .../algos/reco/content/digg/DiggCategory.java |  83 +++
 .../algos/reco/content/digg/DiggService.java  | 253 ++++++++
 .../reco/content/digg/DiggStoryItem.java      | 109 ++++
 .../algos/reco/content/digg/DiggUser.java     |  45 ++
 .../algos/search/data/SearchResult.java       | 180 ++++++
 .../search/lucene/LuceneIndexBuilder.java     | 152 +++++
 .../lucene/analyzer/CustomAnalyzer.java       | 113 ++++
 .../lucene/analyzer/TextDocumentTerms.java    |  78 +++
 .../search/ranking/DocRankMatrixBuilder.java  | 197 +++++++
 .../search/ranking/PageRankMatrixBuilder.java |  98 ++++
 .../algos/search/ranking/PageRankMatrixH.java | 184 ++++++
 .../yooreeka/algos/search/ranking/Rank.java   | 294 ++++++++++
 .../algos/search/ranking/RelevanceScore.java  |  78 +++
 .../algos/search/util/TermFreqMapUtils.java   |  93 +++
 .../algos/taxis/bayesian/NaiveBayes.java      | 327 +++++++++++
 .../boosting/BoostingARCX4Classifier.java     | 190 ++++++
 .../taxis/boosting/WeightBasedRandom.java     |  80 +++
 .../algos/taxis/core/AttributeValue.java      | 113 ++++
 .../algos/taxis/core/BaseConcept.java         | 124 ++++
 .../algos/taxis/core/BaseInstance.java        | 239 ++++++++
 .../algos/taxis/core/DoubleAttribute.java     |  96 +++
 .../algos/taxis/core/StringAttribute.java     | 108 ++++
 .../algos/taxis/core/TrainingSet.java         | 173 ++++++
 .../algos/taxis/core/intf/Attribute.java      |  42 ++
 .../algos/taxis/core/intf/Classifier.java     |  51 ++
 .../algos/taxis/core/intf/Concept.java        |  44 ++
 .../algos/taxis/core/intf/Instance.java       |  46 ++
 .../taxis/ensemble/ClassifierEnsemble.java    | 106 ++++
 .../taxis/ensemble/ConceptMajorityVoter.java  |  87 +++
 .../taxis/evaluation/ClassifierResults.java   |  70 +++
 .../algos/taxis/evaluation/CochransQTest.java | 128 ++++
 .../algos/taxis/evaluation/Diff2PropTest.java |  84 +++
 .../algos/taxis/evaluation/FTest.java         | 182 ++++++
 .../algos/taxis/evaluation/McNemarTest.java   | 118 ++++
 .../yooreeka/algos/taxis/evaluation/Test.java | 105 ++++
 .../taxis/networks/neural/XORNetwork.java     | 163 ++++++
 .../taxis/networks/neural/core/BaseLayer.java | 139 +++++
 .../taxis/networks/neural/core/BaseLink.java  |  85 +++
 .../taxis/networks/neural/core/BaseNN.java    | 429 ++++++++++++++
 .../taxis/networks/neural/core/BaseNode.java  | 216 +++++++
 .../networks/neural/core/LinearNode.java      |  59 ++
 .../networks/neural/core/SigmoidNode.java     |  52 ++
 .../networks/neural/core/intf/Layer.java      |  57 ++
 .../taxis/networks/neural/core/intf/Link.java |  53 ++
 .../neural/core/intf/NeuralNetwork.java       |  69 +++
 .../taxis/networks/neural/core/intf/Node.java |  98 ++++
 .../algos/taxis/tree/AttributeDefinition.java | 100 ++++
 .../algos/taxis/tree/AttributeSelector.java   | 145 +++++
 .../algos/taxis/tree/AttributeUtils.java      |  61 ++
 src/org/yooreeka/algos/taxis/tree/Branch.java |  92 +++
 .../algos/taxis/tree/BranchGroup.java         | 130 +++++
 .../algos/taxis/tree/ConceptUtils.java        |  85 +++
 .../taxis/tree/DecisionTreeClassifier.java    | 248 ++++++++
 .../yooreeka/algos/taxis/tree/InfoGain.java   | 151 +++++
 src/org/yooreeka/algos/taxis/tree/Node.java   | 403 +++++++++++++
 .../algos/taxis/tree/SplittingCriterion.java  | 124 ++++
 .../taxis/tree/TrueErrorRateEstimator.java    |  71 +++
 .../yooreeka/config/YooreekaConfigurator.java | 220 +++++++
 .../credit/BaggingCreditClassifier.java       |  79 +++
 .../credit/BoostingCreditClassifier.java      | 131 +++++
 .../examples/credit/CreditConcept.java        |  92 +++
 .../examples/credit/CreditInstance.java       | 121 ++++
 .../examples/credit/DTCreditClassifier.java   | 194 +++++++
 .../examples/credit/NBCreditClassifier.java   | 121 ++++
 .../examples/credit/NNCreditClassifier.java   | 406 +++++++++++++
 .../examples/credit/UserCreditNN.java         | 211 +++++++
 .../examples/credit/data/UseCaseData.java     | 194 +++++++
 .../examples/credit/data/UserDataset.java     |  80 +++
 .../examples/credit/data/UserLoader.java      |  70 +++
 .../credit/data/users/BadUserType.java        |  53 ++
 .../credit/data/users/DangerousUserType.java  |  53 ++
 .../credit/data/users/ExcellentUserType.java  |  53 ++
 .../credit/data/users/GoodUserType.java       |  53 ++
 .../examples/credit/data/users/User.java      | 319 ++++++++++
 .../examples/credit/data/users/UserType.java  | 512 ++++++++++++++++
 .../credit/data/users/VeryGoodUserType.java   |  53 ++
 .../examples/credit/util/AttributeInfo.java   |  68 +++
 .../examples/credit/util/AttributeUtils.java  |  88 +++
 .../util/BootstrapTrainingSetBuilder.java     | 121 ++++
 .../credit/util/ClassifierResults.java        |  70 +++
 .../examples/credit/util/CreditDataUtils.java | 100 ++++
 .../credit/util/CreditErrorEstimator.java     | 231 ++++++++
 .../examples/credit/util/DataGenerator.java   | 130 +++++
 .../credit/util/UserInstanceBuilder.java      | 167 ++++++
 .../examples/fraud/DTFraudClassifier.java     | 136 +++++
 .../examples/fraud/NNFraudClassifier.java     | 356 ++++++++++++
 .../examples/fraud/TransactionConcept.java    |  92 +++
 .../examples/fraud/TransactionInstance.java   |  99 ++++
 .../examples/fraud/TransactionNN.java         | 106 ++++
 .../examples/fraud/data/Transaction.java      | 124 ++++
 .../fraud/data/TransactionDataset.java        | 134 +++++
 .../data/TransactionInstanceBuilder.java      | 224 +++++++
 .../fraud/data/TransactionLoader.java         |  59 ++
 .../fraud/data/TransactionLocation.java       |  94 +++
 .../examples/fraud/util/DataGenerator.java    | 119 ++++
 .../examples/fraud/util/FraudDataUtils.java   | 148 +++++
 .../fraud/util/FraudErrorEstimator.java       | 123 ++++
 .../examples/fraud/util/TenUsersSample.java   | 363 ++++++++++++
 .../fraud/util/TransactionSetProfile.java     | 145 +++++
 .../examples/fraud/util/UserStatistics.java   | 153 +++++
 .../fraud/util/UserStatisticsCalculator.java  | 164 ++++++
 .../examples/newsgroups/NewsCrawler.java      | 195 +++++++
 .../recommender/MovieLensRMSESample.java      |  61 ++
 .../examples/recommender/RatingGrapher.java   | 174 ++++++
 .../examples/recommender/Recommender.java     | 119 ++++
 src/org/yooreeka/examples/search/DocRank.java |  57 ++
 .../examples/search/LuceneIndexer.java        |  87 +++
 .../yooreeka/examples/search/MySearcher.java  | 360 ++++++++++++
 .../yooreeka/examples/search/PageRank.java    |  56 ++
 .../examples/spamfilter/EmailClassifier.java  | 247 ++++++++
 .../examples/spamfilter/EmailInstance.java    |  86 +++
 .../examples/spamfilter/data/Email.java       | 119 ++++
 .../examples/spamfilter/data/EmailData.java   | 223 +++++++
 .../spamfilter/data/EmailDataset.java         | 137 +++++
 src/org/yooreeka/util/C.java                  |  64 ++
 src/org/yooreeka/util/P.java                  |  57 ++
 src/org/yooreeka/util/gui/GraphGui.java       | 152 +++++
 src/org/yooreeka/util/gui/XyGui.java          | 203 +++++++
 .../util/internet/behavior/UserClick.java     | 157 +++++
 .../util/internet/behavior/UserQuery.java     | 159 +++++
 .../crawling/FetchAndProcessCrawler.java      | 310 ++++++++++
 .../util/internet/crawling/YCrawler.java      | 197 +++++++
 .../crawling/core/BasicWebCrawler.java        | 332 +++++++++++
 .../internet/crawling/core/CrawlData.java     |  99 ++++
 .../crawling/core/CrawlDataProcessor.java     |  46 ++
 .../crawling/core/DocumentFilter.java         |  44 ++
 .../internet/crawling/core/URLFilter.java     |  79 +++
 .../internet/crawling/core/URLNormalizer.java |  77 +++
 .../internet/crawling/db/FetchedDocsDB.java   | 305 ++++++++++
 .../util/internet/crawling/db/KnownUrlDB.java | 279 +++++++++
 .../util/internet/crawling/db/PageLinkDB.java | 163 ++++++
 .../internet/crawling/db/ProcessedDocsDB.java | 413 +++++++++++++
 .../crawling/model/FetchedDocument.java       | 143 +++++
 .../crawling/model/KnownUrlEntry.java         |  77 +++
 .../util/internet/crawling/model/Outlink.java |  55 ++
 .../crawling/transport/common/Transport.java  |  43 ++
 .../transport/common/TransportException.java  |  47 ++
 .../transport/file/FileTransport.java         | 134 +++++
 .../file/FileTransportException.java          |  49 ++
 .../transport/http/HTTPTransport.java         | 260 +++++++++
 .../http/HTTPTransportException.java          |  46 ++
 .../crawling/transport/http/HTTPUtils.java    | 142 +++++
 .../crawling/util/DocumentIdUtils.java        |  56 ++
 .../internet/crawling/util/FileUtils.java     | 130 +++++
 .../util/internet/crawling/util/UrlGroup.java |  71 +++
 .../util/internet/crawling/util/UrlUtils.java |  65 +++
 .../crawling/util/ValueToIndexMapping.java    |  93 +++
 .../yooreeka/util/metrics/CosineDistance.java |  58 ++
 .../util/metrics/CosineSimilarity.java        |  76 +++
 .../util/metrics/CosineSimilarityMeasure.java |  56 ++
 .../util/metrics/EuclideanDistance.java       |  55 ++
 .../util/metrics/JaccardCoefficient.java      |  77 +++
 .../util/metrics/JaccardDistance.java         |  57 ++
 .../util/metrics/NumericDistance.java         |  40 ++
 .../util/metrics/SimilarityMeasure.java       |  43 ++
 .../util/metrics/TermFrequencyBuilder.java    |  78 +++
 .../util/parsing/common/AbstractDocument.java |  48 ++
 .../util/parsing/common/DataEntry.java        |  40 ++
 .../util/parsing/common/DataField.java        |  68 +++
 .../util/parsing/common/DataType.java         |  40 ++
 .../util/parsing/common/DocumentParser.java   |  44 ++
 .../common/DocumentParserException.java       |  45 ++
 .../parsing/common/DocumentParserFactory.java |  68 +++
 .../parsing/common/ProcessedDocument.java     | 198 +++++++
 .../util/parsing/csv/CSVDocument.java         |  93 +++
 .../yooreeka/util/parsing/csv/CSVEntry.java   | 108 ++++
 .../yooreeka/util/parsing/csv/CSVFile.java    | 149 +++++
 .../yooreeka/util/parsing/csv/CSVParser.java  | 135 +++++
 .../yooreeka/util/parsing/csv/CSVSchema.java  |  58 ++
 .../util/parsing/html/CompositeFilter.java    |  64 ++
 .../util/parsing/html/ElementNodeFilter.java  |  61 ++
 .../util/parsing/html/HTMLDocumentParser.java | 457 +++++++++++++++
 .../html/HTMLDocumentParserException.java     |  49 ++
 .../util/parsing/html/HTMLWriter.java         | 119 ++++
 .../util/parsing/html/LinkNodeFilter.java     |  58 ++
 .../util/parsing/html/MultiFilter.java        |  61 ++
 .../parsing/msword/MSWordDocumentParser.java  | 103 ++++
 .../msword/MSWordDocumentParserException.java |  49 ++
 .../util/text/AlphabetProjection.java         | 313 ++++++++++
 255 files changed, 35422 insertions(+)
 create mode 100644 src/org/yooreeka/algos/clustering/dbscan/DBSCANAlgorithm.java
 create mode 100644 src/org/yooreeka/algos/clustering/hierarchical/AverageLinkAlgorithm.java
 create mode 100644 src/org/yooreeka/algos/clustering/hierarchical/ClusterSet.java
 create mode 100644 src/org/yooreeka/algos/clustering/hierarchical/Dendrogram.java
 create mode 100644 src/org/yooreeka/algos/clustering/hierarchical/MST.java
 create mode 100644 src/org/yooreeka/algos/clustering/hierarchical/MSTSingleLinkAlgorithm.java
 create mode 100644 src/org/yooreeka/algos/clustering/hierarchical/SingleLinkAlgorithm.java
 create mode 100644 src/org/yooreeka/algos/clustering/model/Attribute.java
 create mode 100644 src/org/yooreeka/algos/clustering/model/Cluster.java
 create mode 100644 src/org/yooreeka/algos/clustering/model/DataPoint.java
 create mode 100644 src/org/yooreeka/algos/clustering/partitional/KMeansAlgorithm.java
 create mode 100644 src/org/yooreeka/algos/clustering/partitional/NearestNeighborAlgorithm.java
 create mode 100644 src/org/yooreeka/algos/clustering/rock/LinkMatrix.java
 create mode 100644 src/org/yooreeka/algos/clustering/rock/MergeGoodnessMeasure.java
 create mode 100644 src/org/yooreeka/algos/clustering/rock/ROCKAlgorithm.java
 create mode 100644 src/org/yooreeka/algos/clustering/rock/ROCKClusters.java
 create mode 100644 src/org/yooreeka/algos/clustering/rock/SimilarCluster.java
 create mode 100644 src/org/yooreeka/algos/clustering/test/MyDiggSpaceData.java
 create mode 100644 src/org/yooreeka/algos/clustering/test/MyDiggSpaceDataset.java
 create mode 100644 src/org/yooreeka/algos/clustering/test/SFData.java
 create mode 100644 src/org/yooreeka/algos/clustering/test/SFDataset.java
 create mode 100644 src/org/yooreeka/algos/clustering/utils/Attributes.java
 create mode 100644 src/org/yooreeka/algos/clustering/utils/ObjectToIndexMapping.java
 create mode 100644 src/org/yooreeka/algos/clustering/utils/SortedArrayClustering.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/cache/FileStore.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/cache/Store.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/BaseDataset.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/ContentItem.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/DiggData.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/HTMLContent.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/MovieLensData.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/MovieLensDataset.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/MusicData.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/MusicItem.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/MusicRating.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/MusicUser.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/NewsData.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/NewsItem.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/NewsUser.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/data/RatingBuilder.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/evaluation/EvaluationDataProvider.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/evaluation/MovieLensEvaluationDataProvider.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/evaluation/MovieLensRMSE.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/evaluation/RMSEEstimator.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/evaluation/RMSEResult.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/Content.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/Dataset.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/Item.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/Rating.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/RecommendationType.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/SimilarItem.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/SimilarUser.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/model/User.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/recommender/Delphi.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/recommender/DiggDelphi.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/recommender/MovieLensDelphi.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/recommender/PredictedItemRating.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/recommender/Recommender.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensItemSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensUserSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedItemBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedUserBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/ItemBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/ItemContentBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/ItemPenaltyBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrix.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrixImpl.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/UserBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/UserContentBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/naive/UserItemContentBasedSimilarity.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrix.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrixImpl.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/util/PearsonCorrelation.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/util/RatingCountMatrix.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixCache.java
 create mode 100644 src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixRepository.java
 create mode 100644 src/org/yooreeka/algos/reco/content/digg/DiggCategory.java
 create mode 100644 src/org/yooreeka/algos/reco/content/digg/DiggService.java
 create mode 100644 src/org/yooreeka/algos/reco/content/digg/DiggStoryItem.java
 create mode 100644 src/org/yooreeka/algos/reco/content/digg/DiggUser.java
 create mode 100644 src/org/yooreeka/algos/search/data/SearchResult.java
 create mode 100644 src/org/yooreeka/algos/search/lucene/LuceneIndexBuilder.java
 create mode 100644 src/org/yooreeka/algos/search/lucene/analyzer/CustomAnalyzer.java
 create mode 100644 src/org/yooreeka/algos/search/lucene/analyzer/TextDocumentTerms.java
 create mode 100644 src/org/yooreeka/algos/search/ranking/DocRankMatrixBuilder.java
 create mode 100644 src/org/yooreeka/algos/search/ranking/PageRankMatrixBuilder.java
 create mode 100644 src/org/yooreeka/algos/search/ranking/PageRankMatrixH.java
 create mode 100644 src/org/yooreeka/algos/search/ranking/Rank.java
 create mode 100644 src/org/yooreeka/algos/search/ranking/RelevanceScore.java
 create mode 100644 src/org/yooreeka/algos/search/util/TermFreqMapUtils.java
 create mode 100644 src/org/yooreeka/algos/taxis/bayesian/NaiveBayes.java
 create mode 100644 src/org/yooreeka/algos/taxis/boosting/BoostingARCX4Classifier.java
 create mode 100644 src/org/yooreeka/algos/taxis/boosting/WeightBasedRandom.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/AttributeValue.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/BaseConcept.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/BaseInstance.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/DoubleAttribute.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/StringAttribute.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/TrainingSet.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/intf/Attribute.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/intf/Classifier.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/intf/Concept.java
 create mode 100644 src/org/yooreeka/algos/taxis/core/intf/Instance.java
 create mode 100644 src/org/yooreeka/algos/taxis/ensemble/ClassifierEnsemble.java
 create mode 100644 src/org/yooreeka/algos/taxis/ensemble/ConceptMajorityVoter.java
 create mode 100644 src/org/yooreeka/algos/taxis/evaluation/ClassifierResults.java
 create mode 100644 src/org/yooreeka/algos/taxis/evaluation/CochransQTest.java
 create mode 100644 src/org/yooreeka/algos/taxis/evaluation/Diff2PropTest.java
 create mode 100644 src/org/yooreeka/algos/taxis/evaluation/FTest.java
 create mode 100644 src/org/yooreeka/algos/taxis/evaluation/McNemarTest.java
 create mode 100644 src/org/yooreeka/algos/taxis/evaluation/Test.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/XORNetwork.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/BaseLayer.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/BaseLink.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/BaseNN.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/BaseNode.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/LinearNode.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/SigmoidNode.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/intf/Layer.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/intf/Link.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/intf/NeuralNetwork.java
 create mode 100644 src/org/yooreeka/algos/taxis/networks/neural/core/intf/Node.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/AttributeDefinition.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/AttributeSelector.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/AttributeUtils.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/Branch.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/BranchGroup.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/ConceptUtils.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/DecisionTreeClassifier.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/InfoGain.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/Node.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/SplittingCriterion.java
 create mode 100644 src/org/yooreeka/algos/taxis/tree/TrueErrorRateEstimator.java
 create mode 100644 src/org/yooreeka/config/YooreekaConfigurator.java
 create mode 100644 src/org/yooreeka/examples/credit/BaggingCreditClassifier.java
 create mode 100644 src/org/yooreeka/examples/credit/BoostingCreditClassifier.java
 create mode 100644 src/org/yooreeka/examples/credit/CreditConcept.java
 create mode 100644 src/org/yooreeka/examples/credit/CreditInstance.java
 create mode 100644 src/org/yooreeka/examples/credit/DTCreditClassifier.java
 create mode 100644 src/org/yooreeka/examples/credit/NBCreditClassifier.java
 create mode 100644 src/org/yooreeka/examples/credit/NNCreditClassifier.java
 create mode 100644 src/org/yooreeka/examples/credit/UserCreditNN.java
 create mode 100644 src/org/yooreeka/examples/credit/data/UseCaseData.java
 create mode 100644 src/org/yooreeka/examples/credit/data/UserDataset.java
 create mode 100644 src/org/yooreeka/examples/credit/data/UserLoader.java
 create mode 100644 src/org/yooreeka/examples/credit/data/users/BadUserType.java
 create mode 100644 src/org/yooreeka/examples/credit/data/users/DangerousUserType.java
 create mode 100644 src/org/yooreeka/examples/credit/data/users/ExcellentUserType.java
 create mode 100644 src/org/yooreeka/examples/credit/data/users/GoodUserType.java
 create mode 100644 src/org/yooreeka/examples/credit/data/users/User.java
 create mode 100644 src/org/yooreeka/examples/credit/data/users/UserType.java
 create mode 100644 src/org/yooreeka/examples/credit/data/users/VeryGoodUserType.java
 create mode 100644 src/org/yooreeka/examples/credit/util/AttributeInfo.java
 create mode 100644 src/org/yooreeka/examples/credit/util/AttributeUtils.java
 create mode 100644 src/org/yooreeka/examples/credit/util/BootstrapTrainingSetBuilder.java
 create mode 100644 src/org/yooreeka/examples/credit/util/ClassifierResults.java
 create mode 100644 src/org/yooreeka/examples/credit/util/CreditDataUtils.java
 create mode 100644 src/org/yooreeka/examples/credit/util/CreditErrorEstimator.java
 create mode 100644 src/org/yooreeka/examples/credit/util/DataGenerator.java
 create mode 100644 src/org/yooreeka/examples/credit/util/UserInstanceBuilder.java
 create mode 100644 src/org/yooreeka/examples/fraud/DTFraudClassifier.java
 create mode 100644 src/org/yooreeka/examples/fraud/NNFraudClassifier.java
 create mode 100644 src/org/yooreeka/examples/fraud/TransactionConcept.java
 create mode 100644 src/org/yooreeka/examples/fraud/TransactionInstance.java
 create mode 100644 src/org/yooreeka/examples/fraud/TransactionNN.java
 create mode 100644 src/org/yooreeka/examples/fraud/data/Transaction.java
 create mode 100644 src/org/yooreeka/examples/fraud/data/TransactionDataset.java
 create mode 100644 src/org/yooreeka/examples/fraud/data/TransactionInstanceBuilder.java
 create mode 100644 src/org/yooreeka/examples/fraud/data/TransactionLoader.java
 create mode 100644 src/org/yooreeka/examples/fraud/data/TransactionLocation.java
 create mode 100644 src/org/yooreeka/examples/fraud/util/DataGenerator.java
 create mode 100644 src/org/yooreeka/examples/fraud/util/FraudDataUtils.java
 create mode 100644 src/org/yooreeka/examples/fraud/util/FraudErrorEstimator.java
 create mode 100644 src/org/yooreeka/examples/fraud/util/TenUsersSample.java
 create mode 100644 src/org/yooreeka/examples/fraud/util/TransactionSetProfile.java
 create mode 100644 src/org/yooreeka/examples/fraud/util/UserStatistics.java
 create mode 100644 src/org/yooreeka/examples/fraud/util/UserStatisticsCalculator.java
 create mode 100644 src/org/yooreeka/examples/newsgroups/NewsCrawler.java
 create mode 100644 src/org/yooreeka/examples/recommender/MovieLensRMSESample.java
 create mode 100644 src/org/yooreeka/examples/recommender/RatingGrapher.java
 create mode 100644 src/org/yooreeka/examples/recommender/Recommender.java
 create mode 100644 src/org/yooreeka/examples/search/DocRank.java
 create mode 100644 src/org/yooreeka/examples/search/LuceneIndexer.java
 create mode 100644 src/org/yooreeka/examples/search/MySearcher.java
 create mode 100644 src/org/yooreeka/examples/search/PageRank.java
 create mode 100644 src/org/yooreeka/examples/spamfilter/EmailClassifier.java
 create mode 100644 src/org/yooreeka/examples/spamfilter/EmailInstance.java
 create mode 100644 src/org/yooreeka/examples/spamfilter/data/Email.java
 create mode 100644 src/org/yooreeka/examples/spamfilter/data/EmailData.java
 create mode 100644 src/org/yooreeka/examples/spamfilter/data/EmailDataset.java
 create mode 100644 src/org/yooreeka/util/C.java
 create mode 100644 src/org/yooreeka/util/P.java
 create mode 100644 src/org/yooreeka/util/gui/GraphGui.java
 create mode 100644 src/org/yooreeka/util/gui/XyGui.java
 create mode 100644 src/org/yooreeka/util/internet/behavior/UserClick.java
 create mode 100644 src/org/yooreeka/util/internet/behavior/UserQuery.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/FetchAndProcessCrawler.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/YCrawler.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/core/BasicWebCrawler.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/core/CrawlData.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/core/CrawlDataProcessor.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/core/DocumentFilter.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/core/URLFilter.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/core/URLNormalizer.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/db/FetchedDocsDB.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/db/KnownUrlDB.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/db/PageLinkDB.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/db/ProcessedDocsDB.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/model/FetchedDocument.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/model/KnownUrlEntry.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/model/Outlink.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/transport/common/Transport.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/transport/common/TransportException.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/transport/file/FileTransport.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/transport/file/FileTransportException.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransport.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransportException.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/transport/http/HTTPUtils.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/util/DocumentIdUtils.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/util/FileUtils.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/util/UrlGroup.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/util/UrlUtils.java
 create mode 100644 src/org/yooreeka/util/internet/crawling/util/ValueToIndexMapping.java
 create mode 100644 src/org/yooreeka/util/metrics/CosineDistance.java
 create mode 100644 src/org/yooreeka/util/metrics/CosineSimilarity.java
 create mode 100644 src/org/yooreeka/util/metrics/CosineSimilarityMeasure.java
 create mode 100644 src/org/yooreeka/util/metrics/EuclideanDistance.java
 create mode 100644 src/org/yooreeka/util/metrics/JaccardCoefficient.java
 create mode 100644 src/org/yooreeka/util/metrics/JaccardDistance.java
 create mode 100644 src/org/yooreeka/util/metrics/NumericDistance.java
 create mode 100644 src/org/yooreeka/util/metrics/SimilarityMeasure.java
 create mode 100644 src/org/yooreeka/util/metrics/TermFrequencyBuilder.java
 create mode 100644 src/org/yooreeka/util/parsing/common/AbstractDocument.java
 create mode 100644 src/org/yooreeka/util/parsing/common/DataEntry.java
 create mode 100644 src/org/yooreeka/util/parsing/common/DataField.java
 create mode 100644 src/org/yooreeka/util/parsing/common/DataType.java
 create mode 100644 src/org/yooreeka/util/parsing/common/DocumentParser.java
 create mode 100644 src/org/yooreeka/util/parsing/common/DocumentParserException.java
 create mode 100644 src/org/yooreeka/util/parsing/common/DocumentParserFactory.java
 create mode 100644 src/org/yooreeka/util/parsing/common/ProcessedDocument.java
 create mode 100644 src/org/yooreeka/util/parsing/csv/CSVDocument.java
 create mode 100644 src/org/yooreeka/util/parsing/csv/CSVEntry.java
 create mode 100644 src/org/yooreeka/util/parsing/csv/CSVFile.java
 create mode 100644 src/org/yooreeka/util/parsing/csv/CSVParser.java
 create mode 100644 src/org/yooreeka/util/parsing/csv/CSVSchema.java
 create mode 100644 src/org/yooreeka/util/parsing/html/CompositeFilter.java
 create mode 100644 src/org/yooreeka/util/parsing/html/ElementNodeFilter.java
 create mode 100644 src/org/yooreeka/util/parsing/html/HTMLDocumentParser.java
 create mode 100644 src/org/yooreeka/util/parsing/html/HTMLDocumentParserException.java
 create mode 100644 src/org/yooreeka/util/parsing/html/HTMLWriter.java
 create mode 100644 src/org/yooreeka/util/parsing/html/LinkNodeFilter.java
 create mode 100644 src/org/yooreeka/util/parsing/html/MultiFilter.java
 create mode 100644 src/org/yooreeka/util/parsing/msword/MSWordDocumentParser.java
 create mode 100644 src/org/yooreeka/util/parsing/msword/MSWordDocumentParserException.java
 create mode 100644 src/org/yooreeka/util/text/AlphabetProjection.java

diff --git a/src/org/yooreeka/algos/clustering/dbscan/DBSCANAlgorithm.java b/src/org/yooreeka/algos/clustering/dbscan/DBSCANAlgorithm.java
new file mode 100644
index 0000000..82dfd83
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/dbscan/DBSCANAlgorithm.java
@@ -0,0 +1,448 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.dbscan;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.algos.clustering.utils.ObjectToIndexMapping;
+import org.yooreeka.util.P;
+import org.yooreeka.util.metrics.NumericDistance;
+import org.yooreeka.util.metrics.TermFrequencyBuilder;
+
+/**
+ * Implementation of DBSCAN clustering algorithm.
+ * <p>
+ * Algorithm parameters:
+ * <ol>
+ * <li>Eps - threshold value to determine point neighbors. Two points are
+ * neighbors if the distance between them does not exceed this threshold value.</li>
+ * <li>MinPts - minimum number of points in any cluster.</li>
+ * </ol>
+ * Choice of parameter values depends on the data.
+ * </p>
+ * <p>
+ * Point types:
+ * <ol>
+ * <li>Core point - point that belongs to the core of the cluster. It has at
+ * least MinPts neighboring points.</li>
+ * <li>Border point - is a neighbor to at least one core point but it doesn't
+ * have enough neighbors to be a core point.</li>
+ * <li>Noise point - is a point that doesn't belong to any cluster because it is
+ * not close to any of the core points.</li>
+ * </ol>
+ */
+public class DBSCANAlgorithm {
+
+	private static final Logger LOG = Logger.getLogger(DBSCANAlgorithm.class
+			.getName());
+
+	private static double[][] calculateAdjacencyMatrix(NumericDistance distance,
+			DataPoint[] points, boolean useTermFrequencies) {
+		int n = points.length;
+		double[][] a = new double[n][n];
+		for (int i = 0; i < n; i++) {
+			double[] x = points[i].getNumericAttrValues();
+			for (int j = i + 1; j < n; j++) {
+				double[] y;
+				if (useTermFrequencies) {
+					double[][] tfVectors = TermFrequencyBuilder
+							.buildTermFrequencyVectors(
+									points[i].getTextAttrValues(),
+									points[j].getTextAttrValues());
+					x = tfVectors[0];
+					y = tfVectors[1];
+				} else {
+					y = points[j].getNumericAttrValues();
+				}
+				a[i][j] = distance.getDistance(x, y);
+				a[j][i] = a[i][j];
+			}
+			a[i][i] = 0.0;
+		}
+		return a;
+	}
+
+	public static void main(String[] args) {
+
+		DataPoint[] elements = new DataPoint[5];
+		elements[0] = new DataPoint("A", new double[] {});
+		elements[1] = new DataPoint("B", new double[] {});
+		elements[2] = new DataPoint("C", new double[] {});
+		elements[3] = new DataPoint("D", new double[] {});
+		elements[4] = new DataPoint("E", new double[] {});
+
+		double[][] a = new double[][] { { 0, 0, 0, 0, 1 }, { 0, 0, 0, 0, 2 },
+				{ 2, 2, 2, 11, 31 }, { 2, 2, 2, 10, 30 }, { 60, 60, 60, 0, 0 } };
+
+		double eps = 0.5;
+		int minPoints = 2;
+
+		DBSCANAlgorithm dbscan = new DBSCANAlgorithm(elements, a, eps,
+				minPoints);
+		
+		printResults(dbscan.cluster(), eps,minPoints);
+	}
+
+	/*
+	 * Data points for clustering.
+	 */
+	private DataPoint[] points;
+
+	/*
+	 * Adjacency matrix. Contains distances between points.
+	 */
+	private double[][] adjacencyMatrix;
+
+	/*
+	 * Threshold value. Determines which points will be considered as neighbors.
+	 * Two points are neighbors if the distance between them does not exceed
+	 * threshold value.
+	 */
+	private double eps;
+
+	/*
+	 * Identifies a set of Noise points.
+	 */
+	private static int CLUSTER_ID_NOISE = -1;
+
+	/*
+	 * Identifies a set of Unclassified points.
+	 */
+	private int CLUSTER_ID_UNCLASSIFIED = 0;
+
+	/*
+	 * Sequence that is used to generate next cluster id.
+	 */
+	private int nextClusterId = 1;
+
+	/*
+	 * Sets of points. Initially all points will be assigned into Unclassified
+	 * points set.
+	 */
+	private Map<Integer, Set<DataPoint>> clusters = new LinkedHashMap<Integer, Set<DataPoint>>();
+
+	/*
+	 * Number of points that should exist in the neighborhood for a point to be
+	 * a core point.
+	 * 
+	 * Best value for this parameter depends on the data set.
+	 */
+	private int minPoints;
+
+	private ObjectToIndexMapping<DataPoint> idxMapping = new ObjectToIndexMapping<DataPoint>();
+
+	private boolean verbose = true;
+
+	/**
+	 * Initializes algorithm with all data that it needs.
+	 * 
+	 * @param points
+	 *            all points to cluster
+	 * @param distance
+	 *            metric distance function
+	 * @param eps
+	 *            threshold value used to calculate point neighborhood.
+	 * @param minPoints
+	 *            number of neighbors for point to be considered a core point.
+	 */
+	public DBSCANAlgorithm(DataPoint[] points, NumericDistance distance, double eps,
+			int minPoints, boolean useTermFrequencies) {
+
+		init(points, eps, minPoints);
+		this.adjacencyMatrix = calculateAdjacencyMatrix(distance, points,
+				useTermFrequencies);
+	}
+
+	/**
+	 * Initializes algorithm with all data that it needs.
+	 * 
+	 * @param points
+	 *            points to cluster
+	 * @param adjacencyMatrix
+	 *            adjacency matrix with distances
+	 * @param eps
+	 *            distance threshold value
+	 * @param minPoints
+	 *            number of neighbors for point to be considered a core point.
+	 */
+	public DBSCANAlgorithm(DataPoint[] points, double[][] adjacencyMatrix,
+			double eps, int minPoints) {
+		init(points, eps, minPoints);
+		this.adjacencyMatrix = adjacencyMatrix;
+	}
+
+	private void assignPointToCluster(DataPoint p, int clusterId) {
+
+		// Remove point from the group that it currently belongs to...
+		if (isNoise(p)) {
+			removePointFromCluster(p, CLUSTER_ID_NOISE);
+		} else if (isUnclassified(p)) {
+			removePointFromCluster(p, CLUSTER_ID_UNCLASSIFIED);
+		} else {
+			if (clusterId != CLUSTER_ID_UNCLASSIFIED) {
+				throw new RuntimeException(
+						"Trying to move point that has already been"
+								+ "assigned to some other cluster. Point: " + p
+								+ ", clusterId=" + clusterId);
+			} else {
+				// do nothing. we are registering a brand new point in
+				// UNCLASSIFIED set.
+			}
+		}
+
+		Set<DataPoint> points = clusters.get(clusterId);
+		if (points == null) {
+			points = new HashSet<DataPoint>();
+			clusters.put(clusterId, points);
+		}
+		points.add(p);
+	}
+
+	private void assignPointToCluster(Set<DataPoint> points, int clusterId) {
+		for (DataPoint p : points) {
+			assignPointToCluster(p, clusterId);
+		}
+	}
+
+	public List<Cluster> cluster() {
+		int clusterId = getNextClusterId();
+
+		for (DataPoint p : points) {
+			if (isUnclassified(p)) {
+
+				boolean isClusterCreated = createCluster(p, clusterId);
+
+				if (isClusterCreated) {
+					// Generate id for the next cluster
+					clusterId = getNextClusterId();
+				}
+			}
+		}
+
+		// Convert sets of points into clusters...
+		List<Cluster> allClusters = new ArrayList<Cluster>();
+
+		for (Map.Entry<Integer, Set<DataPoint>> e : clusters.entrySet()) {
+
+			String label = String.valueOf(e.getKey());
+
+			Set<DataPoint> points = e.getValue();
+
+			if (points != null && !points.isEmpty()) {
+
+				Cluster cluster = new Cluster(label, e.getValue());
+
+				allClusters.add(cluster);
+			}
+		}
+
+		// Group with Noise elements returned as well
+		return allClusters;
+	}
+
+	private boolean createCluster(DataPoint p, Integer clusterId) {
+
+		boolean isClusterCreated = false;
+
+		Set<DataPoint> nPoints = findNeighbors(p, eps);
+
+		if (nPoints.size() < minPoints) {
+			// Assign point into "Noise" group.
+			// It will have a chance to become a border point later on.
+			assignPointToCluster(p, CLUSTER_ID_NOISE);
+
+			// return false to indicate that we didn't create any cluster
+			isClusterCreated = false;
+
+		} else {
+
+			// All points are reachable from the core point...
+			assignPointToCluster(nPoints, clusterId);
+
+			// Remove point itself.
+			nPoints.remove(p);
+
+			// Process the rest of the neighbors...
+			while (nPoints.size() > 0) {
+
+				// pick the first neighbor
+				DataPoint nPoint = nPoints.iterator().next();
+
+				// process neighbor
+				Set<DataPoint> nnPoints = findNeighbors(nPoint, eps);
+
+				if (nnPoints.size() >= minPoints) {
+
+					// nPoint is another core point.
+					for (DataPoint nnPoint : nnPoints) {
+
+						if (isNoise(nnPoint)) {
+
+							/*
+							 * It's a border point. We know that it doesn't have
+							 * enough neighbors to be a core point. Just add it
+							 * to the cluster.
+							 */
+							assignPointToCluster(nnPoint, clusterId);
+
+						} else if (isUnclassified(nnPoint)) {
+
+							/*
+							 * We don't know if this point has enough neighbors
+							 * to be a core point... add it to the list of
+							 * points to be checked.
+							 */
+							nPoints.add(nnPoint);
+
+							/*
+							 * And assign it to the cluster
+							 */
+							assignPointToCluster(nnPoint, clusterId);
+						}
+					}
+				} else {
+					// do nothing. The neighbor is just a border point.
+				}
+
+				nPoints.remove(nPoint);
+			}
+
+			// return true to indicate that we did create a cluster
+			isClusterCreated = true;
+		}
+
+		return isClusterCreated;
+	}
+
+	private Set<DataPoint> findNeighbors(DataPoint p, double threshold) {
+		Set<DataPoint> neighbors = new HashSet<DataPoint>();
+		int i = idxMapping.getIndex(p);
+		for (int j = 0, n = idxMapping.getSize(); j < n; j++) {
+			if (adjacencyMatrix[i][j] <= threshold) {
+				neighbors.add(idxMapping.getObject(j));
+			}
+		}
+		return neighbors;
+	}
+
+	private int getNextClusterId() {
+		return nextClusterId++;
+	}
+
+	private void init(DataPoint[] points, double neighborThreshold,
+			int minPoints) {
+
+		LOG.setLevel(Level.FINEST); //YooreekaConfigurator.getLevel(DBSCANAlgorithm.class.getName()));
+
+		this.points = points;
+		this.eps = neighborThreshold;
+		this.minPoints = minPoints;
+
+		for (DataPoint p : points) {
+			// Creating a Point <-> Index mappping for all points
+			idxMapping.getIndex(p);
+			// Assign all points into "Unclassified" group
+			assignPointToCluster(p, CLUSTER_ID_UNCLASSIFIED);
+		}
+	}
+
+	private boolean isNoise(DataPoint p) {
+		return isPointInCluster(p, CLUSTER_ID_NOISE);
+	}
+
+	private boolean isPointInCluster(DataPoint p, int clusterId) {
+		boolean inCluster = false;
+		Set<DataPoint> points = clusters.get(clusterId);
+		if (points != null) {
+			inCluster = points.contains(p);
+		}
+		return inCluster;
+	}
+
+	private boolean isUnclassified(DataPoint p) {
+		return isPointInCluster(p, CLUSTER_ID_UNCLASSIFIED);
+
+	}
+
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	public void printDistances() {
+		LOG.info("Point Similarity matrix:");
+		for (int i = 0; i < adjacencyMatrix.length; i++) {
+			LOG.info(Arrays.toString(adjacencyMatrix[i]));
+		}
+	}
+
+	public static void printResults(List<Cluster> allClusters, double eps, int minPoints) {
+		StringBuilder sb = new StringBuilder();
+		sb.append("DBSCAN Clustering with NeighborThreshold=").append(eps);
+		sb.append(", minPoints=").append(minPoints).append("\n");
+		sb.append("Clusters:\n");
+		String noiseElements = "no noise elements";
+		for (Cluster c : allClusters) {
+			if (String.valueOf(CLUSTER_ID_NOISE).equals(c.getLabel())) {
+				// print noise data at the end
+				noiseElements = c.getElementsAsString();
+			} else {
+				sb.append("____________________________________________________________\n");
+				sb.append(c.getLabel()).append(": \n").append(c.getElementsAsString());
+				sb.append("____________________________________________________________\n\n");
+			}
+		}
+		sb.append("Noise Elements:\n ").append(noiseElements).append("\n");
+		P.println(sb.toString());
+	}
+	private boolean removePointFromCluster(DataPoint p, int clusterId) {
+		boolean removed = false;
+		Set<DataPoint> points = clusters.get(clusterId);
+		if (points != null) {
+			removed = points.remove(p);
+		}
+		return removed;
+	}
+
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/clustering/hierarchical/AverageLinkAlgorithm.java b/src/org/yooreeka/algos/clustering/hierarchical/AverageLinkAlgorithm.java
new file mode 100644
index 0000000..e5963e0
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/hierarchical/AverageLinkAlgorithm.java
@@ -0,0 +1,145 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.hierarchical;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.algos.clustering.utils.ObjectToIndexMapping;
+
+/** A hierarchical agglomerative clustering algorithm based on the average link */
+public class AverageLinkAlgorithm {
+
+	public static void main(String[] args) {
+		// Define data
+		DataPoint[] elements = new DataPoint[5];
+		elements[0] = new DataPoint("A", new double[] {});
+		elements[1] = new DataPoint("B", new double[] {});
+		elements[2] = new DataPoint("C", new double[] {});
+		elements[3] = new DataPoint("D", new double[] {});
+		elements[4] = new DataPoint("E", new double[] {});
+
+		double[][] a = new double[][] { { 0, 1, 2, 2, 3 }, { 1, 0, 2, 4, 3 },
+				{ 2, 2, 0, 1, 5 }, { 2, 4, 1, 0, 3 }, { 3, 3, 5, 3, 0 } };
+
+		AverageLinkAlgorithm ca = new AverageLinkAlgorithm(elements, a);
+		Dendrogram dnd = ca.cluster();
+		dnd.printAll();
+	}
+	private DataPoint[] elements;
+	private double[][] a;
+
+	private ClusterSet allClusters;
+
+	public AverageLinkAlgorithm(DataPoint[] elements, double[][] adjacencyMatrix) {
+		this.elements = elements;
+		this.a = adjacencyMatrix;
+		this.allClusters = new ClusterSet();
+	}
+
+	public Dendrogram cluster() {
+
+		Dendrogram dnd = new Dendrogram("Distance");
+		double d = 0.0;
+
+		// initially load all elements as individual clusters
+		for (DataPoint e : elements) {
+			Cluster c = new Cluster(e);
+			allClusters.add(c);
+		}
+
+		dnd.addLevel(String.valueOf(d), allClusters.getAllClusters());
+
+		d = 1.0;
+
+		while (allClusters.size() > 1) {
+			int K = allClusters.size();
+			mergeClusters(d);
+			// it is possible that there were no clusters to merge for current
+			// d.
+			if (K > allClusters.size()) {
+				dnd.addLevel(String.valueOf(d), allClusters.getAllClusters());
+				K = allClusters.size();
+			}
+
+			d = d + 0.5;
+		}
+		return dnd;
+	}
+
+	private void mergeClusters(double distanceThreshold) {
+		int nClusters = allClusters.size();
+
+		ObjectToIndexMapping<Cluster> idxMapping = new ObjectToIndexMapping<Cluster>();
+
+		double[][] clusterDistances = new double[nClusters][nClusters];
+
+		for (int i = 0, n = a.length; i < n; i++) {
+			for (int j = i + 1, k = a.length; j < k; j++) {
+				double d = a[i][j];
+				if (d > 0) {
+					DataPoint e1 = elements[i];
+					DataPoint e2 = elements[j];
+					Cluster c1 = allClusters.findClusterByElement(e1);
+					Cluster c2 = allClusters.findClusterByElement(e2);
+					if (!c1.equals(c2)) {
+						int ci = idxMapping.getIndex(c1);
+						int cj = idxMapping.getIndex(c2);
+						clusterDistances[ci][cj] += d;
+						clusterDistances[cj][ci] += d;
+					}
+				}
+			}
+		}
+
+		boolean[] merged = new boolean[clusterDistances.length];
+		for (int i = 0, n = clusterDistances.length; i < n; i++) {
+			for (int j = i + 1, k = clusterDistances.length; j < k; j++) {
+				Cluster ci = idxMapping.getObject(i);
+				Cluster cj = idxMapping.getObject(j);
+				int ni = ci.size();
+				int nj = cj.size();
+				clusterDistances[i][j] = clusterDistances[i][j] / (ni * nj);
+				clusterDistances[j][i] = clusterDistances[i][j];
+				// merge clusters if distance is below the threshold
+				if (merged[i] == false && merged[j] == false) {
+					if (clusterDistances[i][j] <= distanceThreshold) {
+						allClusters.remove(ci);
+						allClusters.remove(cj);
+						Cluster mergedCluster = new Cluster(ci, cj);
+						allClusters.add(mergedCluster);
+						merged[i] = true;
+						merged[j] = true;
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/hierarchical/ClusterSet.java b/src/org/yooreeka/algos/clustering/hierarchical/ClusterSet.java
new file mode 100644
index 0000000..236fb38
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/hierarchical/ClusterSet.java
@@ -0,0 +1,83 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.hierarchical;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+
+/**
+ * Set of clusters.
+ */
+public class ClusterSet {
+
+	private Set<Cluster> allClusters = new HashSet<Cluster>();
+
+	public boolean add(Cluster c) {
+		return allClusters.add(c);
+	}
+
+	public Cluster findClusterByElement(DataPoint e) {
+		Cluster cluster = null;
+		for (Cluster c : allClusters) {
+			if (c.contains(e)) {
+				cluster = c;
+				break;
+			}
+		}
+		return cluster;
+	}
+
+	public List<Cluster> getAllClusters() {
+		return new ArrayList<Cluster>(allClusters);
+	}
+
+	public boolean remove(Cluster c) {
+		return allClusters.remove(c);
+	}
+
+	public int size() {
+		return allClusters.size();
+	}
+
+	// public ClusterSet copy() {
+	// ClusterSet clusterSet = new ClusterSet();
+	// for(Cluster c : this.allClusters ) {
+	// Cluster clusterCopy = c.copy();
+	// clusterSet.add(clusterCopy);
+	// }
+	// return clusterSet;
+	// }
+}
diff --git a/src/org/yooreeka/algos/clustering/hierarchical/Dendrogram.java b/src/org/yooreeka/algos/clustering/hierarchical/Dendrogram.java
new file mode 100644
index 0000000..bdfd51f
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/hierarchical/Dendrogram.java
@@ -0,0 +1,162 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.hierarchical;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.config.YooreekaConfigurator;
+
+public class Dendrogram {
+
+	private static final Logger LOG = Logger.getLogger(Dendrogram.class.getName());
+
+	/*
+	 * Clusters by level.
+	 */
+	private Map<Integer, ClusterSet> entryMap;
+	private Map<Integer, String> levelLabels;
+	private Integer nextLevel;
+	private String levelLabelName;
+
+	public Dendrogram(String levelLabelName) {
+		
+		LOG.setLevel(YooreekaConfigurator.getLevel(Dendrogram.class.getName()));
+		
+		entryMap = new LinkedHashMap<Integer, ClusterSet>();
+		levelLabels = new LinkedHashMap<Integer, String>();
+		nextLevel = 1;
+		this.levelLabelName = levelLabelName;
+	}
+
+	public int addLevel(String label, Cluster cluster) {
+		List<Cluster> values = new ArrayList<Cluster>();
+		values.add(cluster);
+		return addLevel(label, values);
+	}
+
+	/**
+	 * Creates a new dendrogram level using copies of provided clusters.
+	 */
+	public int addLevel(String label, Collection<Cluster> clusters) {
+
+		ClusterSet clusterSet = new ClusterSet();
+
+		for (Cluster c : clusters) {
+			// copy cluster before adding - over time cluster elements may
+			// change
+			// but for dendrogram we want to keep current state.
+			clusterSet.add(c.copy());
+		}
+
+		int level = nextLevel;
+
+		entryMap.put(level, clusterSet);
+		levelLabels.put(level, label);
+
+		nextLevel++;
+		return level;
+	}
+
+	public List<Integer> getAllLevels() {
+		return new ArrayList<Integer>(entryMap.keySet());
+	}
+
+	public List<Cluster> getClustersForLevel(int level) {
+		ClusterSet cs = entryMap.get(level);
+		return cs.getAllClusters();
+	}
+
+	public String getLabelForLevel(int level) {
+		return levelLabels.get(level);
+	}
+
+	public int getTopLevel() {
+		return nextLevel - 1;
+	}
+
+	public void print(int level) {
+		String label = levelLabels.get(level);
+		ClusterSet clusters = entryMap.get(level);
+		LOG.info("Clusters for: level=" + level + ", "
+				+ levelLabelName + "=" + label);
+		for (Cluster c : clusters.getAllClusters()) {
+			if (c.getElements().size() > 1) {
+				LOG.info("____________________________________________________________\n");
+				LOG.info(c.getElementsAsString());
+				LOG.info("____________________________________________________________\n\n");
+			}
+		}
+	}
+
+	public void printAll() {
+		for (Map.Entry<Integer, ClusterSet> e : entryMap.entrySet()) {
+			Integer level = e.getKey();
+			print(level);
+		}
+	}
+
+	/**
+	 * Replaces clusters in the specified level. If level doesn't exist it will
+	 * be created.
+	 * 
+	 * @param level
+	 *            dendrogram level.
+	 * @param label
+	 *            level description.
+	 * @param clusters
+	 *            clusters for the level.
+	 * @return
+	 */
+	public void setLevel(int level, String label, Collection<Cluster> clusters) {
+
+		ClusterSet clusterSet = new ClusterSet();
+
+		for (Cluster c : clusters) {
+			clusterSet.add(c.copy());
+		}
+
+		LOG.fine("Setting cluster level: " + level);
+
+		entryMap.put(level, clusterSet);
+		levelLabels.put(level, label);
+
+		if (level >= nextLevel) {
+			nextLevel = level + 1;
+		}
+	}
+
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/algos/clustering/hierarchical/MST.java b/src/org/yooreeka/algos/clustering/hierarchical/MST.java
new file mode 100644
index 0000000..6b78307
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/hierarchical/MST.java
@@ -0,0 +1,130 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.hierarchical;
+
+/**
+ * Basic implementation of Prim's algorithm to build Minimal Spanning Tree
+ * (MST).
+ * 
+ */
+public class MST {
+
+	class Edge {
+
+		private int i;
+		private int j;
+		private double w;
+
+		Edge(int i, int j, double w) {
+			this.i = i;
+			this.j = j;
+			this.w = w;
+		}
+
+		public int getI() {
+			return i;
+		}
+
+		public int getJ() {
+			return j;
+		}
+
+		public double getW() {
+			return w;
+		}
+
+	}
+
+	/** The adjacency matrix of the graph */
+	private double[][] adjM;
+
+	public MST() {
+	}
+
+	public double[][] buildMST(double[][] adjM) {
+
+		this.adjM = adjM;
+
+		// Marks nodes that belong to MST. Initial MST has only one node.
+		boolean[] allV = new boolean[adjM.length];
+		allV[0] = true;
+
+		// Adjacency matrix defining MST
+		double[][] mst = new double[adjM.length][adjM.length];
+		for (int i = 0, n = mst.length; i < n; i++) {
+			for (int j = 0; j < n; j++) {
+				/*
+				 * Using -1 to indicate that there is no edge between nodes i
+				 * and j. Can't use 0 because it is a valid distance.
+				 */
+				mst[i][j] = -1;
+			}
+		}
+
+		Edge e = null;
+		while ((e = findMinimumEdge(allV)) != null) {
+			allV[e.getJ()] = true;
+			mst[e.getI()][e.getJ()] = e.getW();
+			mst[e.getJ()][e.getI()] = e.getW();
+		}
+
+		return mst;
+	}
+
+	private Edge findMinimumEdge(boolean[] mstV) {
+		Edge e = null;
+		double minW = Double.POSITIVE_INFINITY;
+		int minI = -1;
+		int minJ = -1;
+
+		for (int i = 0, n = adjM.length; i < n; i++) {
+			// part of MST
+			if (mstV[i] == true) {
+				for (int j = 0, k = adjM.length; j < k; j++) {
+					// not part of MST
+					if (mstV[j] == false) {
+						if (minW > adjM[i][j]) {
+							minW = adjM[i][j];
+							minI = i;
+							minJ = j;
+						}
+					}
+				}
+			}
+		}
+
+		if (minI > -1) {
+			e = new Edge(minI, minJ, minW);
+		}
+
+		return e;
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/hierarchical/MSTSingleLinkAlgorithm.java b/src/org/yooreeka/algos/clustering/hierarchical/MSTSingleLinkAlgorithm.java
new file mode 100644
index 0000000..2162623
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/hierarchical/MSTSingleLinkAlgorithm.java
@@ -0,0 +1,142 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.hierarchical;
+
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.config.YooreekaConfigurator;
+
+public class MSTSingleLinkAlgorithm {
+
+	private static final Logger LOG = Logger.getLogger(MSTSingleLinkAlgorithm.class.getName());
+	
+	public static void main(String[] args) {
+		// Define data
+		DataPoint[] elements = new DataPoint[5];
+		elements[0] = new DataPoint("A", new double[] {});
+		elements[1] = new DataPoint("B", new double[] {});
+		elements[2] = new DataPoint("C", new double[] {});
+		elements[3] = new DataPoint("D", new double[] {});
+		elements[4] = new DataPoint("E", new double[] {});
+
+		double[][] a = new double[][] { { 0, 1, 2, 2, 3 }, { 1, 0, 2, 4, 3 },
+				{ 2, 2, 0, 1, 5 }, { 2, 4, 1, 0, 3 }, { 3, 3, 5, 3, 0 } };
+
+		MSTSingleLinkAlgorithm ca = new MSTSingleLinkAlgorithm(elements, a);
+		Dendrogram dnd = ca.cluster();
+		dnd.printAll();
+	}
+	private DataPoint[] elements;
+	private double[][] a;
+	private double[][] m;
+
+	private ClusterSet allClusters;
+
+	public MSTSingleLinkAlgorithm(DataPoint[] elements,
+			double[][] adjacencyMatrix) {
+		
+		LOG.setLevel(YooreekaConfigurator.getLevel(MSTSingleLinkAlgorithm.class.getName()));
+		
+		this.elements = elements;
+		this.a = adjacencyMatrix;
+		this.allClusters = new ClusterSet();
+	}
+
+	public Dendrogram cluster() {
+
+		m = (new MST()).buildMST(a);
+
+		Dendrogram dnd = new Dendrogram("Distance");
+		double d = 0.0;
+
+		// initially load all elements as individual clusters
+		for (DataPoint e : elements) {
+			Cluster c = new Cluster(e);
+			allClusters.add(c);
+		}
+
+		int lastDndLevel = dnd.addLevel(String.valueOf(d),
+				allClusters.getAllClusters());
+
+		double previousD = d;
+
+		while (allClusters.size() > 1) {
+			d = mergeTwoClosestClusters();
+			if (previousD == d) {
+				dnd.setLevel(lastDndLevel, String.valueOf(d),
+						allClusters.getAllClusters());
+			} else {
+				lastDndLevel = dnd.addLevel(String.valueOf(d),
+						allClusters.getAllClusters());
+			}
+			previousD = d;
+		}
+
+		return dnd;
+	}
+
+	private double mergeTwoClosestClusters() {
+		int minI = -1;
+		int minJ = -1;
+		double minWeight = Double.POSITIVE_INFINITY;
+
+		for (int i = 0, n = m.length; i < n; i++) {
+			for (int j = 0, k = m.length; j < k; j++) {
+				if (m[i][j] >= 0 && minWeight > m[i][j]) {
+					minI = i;
+					minJ = j;
+					minWeight = m[i][j];
+				}
+			}
+		}
+
+		double d = Double.NaN;
+		if (minI > -1) {
+			DataPoint e1 = elements[minI];
+			Cluster c1 = allClusters.findClusterByElement(e1);
+			DataPoint e2 = elements[minJ];
+			Cluster c2 = allClusters.findClusterByElement(e2);
+			allClusters.remove(c1);
+			allClusters.remove(c2);
+			d = minWeight;
+			Cluster mergedCluster = new Cluster(c1, c2);
+			allClusters.add(mergedCluster);
+			m[minI][minJ] = -1; // remove link. Using -1 because 0 is a valid
+								// distance.
+			m[minJ][minI] = -1; // remove link. Using -1 because 0 is a valid
+								// distance.
+		}
+
+		return d;
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/hierarchical/SingleLinkAlgorithm.java b/src/org/yooreeka/algos/clustering/hierarchical/SingleLinkAlgorithm.java
new file mode 100644
index 0000000..a0090d4
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/hierarchical/SingleLinkAlgorithm.java
@@ -0,0 +1,126 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.hierarchical;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/** A hierarchical agglomerative clustering algorithm based on single link */
+public class SingleLinkAlgorithm {
+
+	private static final Logger LOG = Logger.getLogger(SingleLinkAlgorithm.class.getName());
+
+	public static void main(String[] args) {
+		// Define data
+		DataPoint[] elements = new DataPoint[5];
+		elements[0] = new DataPoint("A", new double[] {});
+		elements[1] = new DataPoint("B", new double[] {});
+		elements[2] = new DataPoint("C", new double[] {});
+		elements[3] = new DataPoint("D", new double[] {});
+		elements[4] = new DataPoint("E", new double[] {});
+
+		double[][] a = new double[][] { { 0, 1, 2, 2, 3 }, { 1, 0, 2, 4, 3 },
+				{ 2, 2, 0, 1, 5 }, { 2, 4, 1, 0, 3 }, { 3, 3, 5, 3, 0 } };
+
+		SingleLinkAlgorithm ca = new SingleLinkAlgorithm(elements, a);
+		Dendrogram dnd = ca.cluster();
+		dnd.printAll();
+		// dnd.print(3);
+	}
+	private DataPoint[] elements;
+
+	private double[][] a;
+
+	// Hierarchical Agglomerative Algorithm
+	public SingleLinkAlgorithm(DataPoint[] elements, double[][] adjacencyMatrix) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(SingleLinkAlgorithm.class.getName()));
+
+		this.elements = elements;
+		this.a = adjacencyMatrix;
+	}
+
+	// Implements Single Link Technique
+	private List<Cluster> buildClusters(double distanceThreshold) {
+		boolean[] usedElementFlags = new boolean[elements.length];
+		List<Cluster> clusters = new ArrayList<Cluster>();
+		for (int i = 0, n = a.length; i < n; i++) {
+			List<DataPoint> clusterPoints = new ArrayList<DataPoint>();
+			for (int j = i, k = a.length; j < k; j++) {
+				if (a[i][j] <= distanceThreshold
+						&& usedElementFlags[j] == false) {
+					clusterPoints.add(elements[j]);
+					usedElementFlags[j] = true;
+				}
+			}
+			if (clusterPoints.size() > 0) {
+				Cluster c = new Cluster(clusterPoints);
+				clusters.add(c);
+			}
+		}
+		return clusters;
+	}
+
+	public Dendrogram cluster() {
+		Dendrogram dnd = new Dendrogram("Distance");
+		double d = 0;
+
+		// initially load all elements as individual clusters
+		List<Cluster> initialClusters = new ArrayList<Cluster>();
+		for (DataPoint e : elements) {
+			Cluster c = new Cluster(e);
+			initialClusters.add(c);
+		}
+
+		dnd.addLevel(String.valueOf(d), initialClusters);
+
+		d = 1.0;
+
+		int k = initialClusters.size();
+
+		while (k > 1) {
+			int oldK = k;
+			List<Cluster> clusters = buildClusters(d);
+			k = clusters.size();
+			if (oldK != k) {
+				dnd.addLevel(String.valueOf(d), clusters);
+			}
+
+			d = d + 1;
+		}
+		return dnd;
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/model/Attribute.java b/src/org/yooreeka/algos/clustering/model/Attribute.java
new file mode 100644
index 0000000..375f3f1
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/model/Attribute.java
@@ -0,0 +1,119 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.model;
+
+/**
+ * Attribute for text or numeric values.
+ */
+public class Attribute {
+
+	private String name;
+	private Object value;
+
+	public Attribute(String name, Double numericValue) {
+		init(name, numericValue);
+	}
+
+	public Attribute(String name, String textValue) {
+		init(name, textValue);
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final Attribute other = (Attribute) obj;
+		if (name == null) {
+			if (other.name != null)
+				return false;
+		} else if (!name.equals(other.name))
+			return false;
+		if (value == null) {
+			if (other.value != null)
+				return false;
+		} else if (!value.equals(other.value))
+			return false;
+		return true;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public Double getNumericValue() {
+		return (Double) value;
+	}
+
+	public String getTextValue() {
+		return (String) value;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + ((name == null) ? 0 : name.hashCode());
+		result = prime * result + ((value == null) ? 0 : value.hashCode());
+		return result;
+	}
+
+	private void init(String name, Object value) {
+		this.name = name;
+		this.value = value;
+	}
+
+	public boolean isNumeric() {
+		if (value instanceof java.lang.Double) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	public boolean isText() {
+		if (value instanceof java.lang.String) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	@Override
+	public String toString() {
+		return "[name=" + this.name + ", value=" + value + ", isText="
+				+ this.isText() + ", isNumeric=" + this.isNumeric() + "]";
+	}
+
+}
diff --git a/src/org/yooreeka/algos/clustering/model/Cluster.java b/src/org/yooreeka/algos/clustering/model/Cluster.java
new file mode 100644
index 0000000..5038059
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/model/Cluster.java
@@ -0,0 +1,197 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.model;
+
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.Set;
+
+/*
+ * Group of data points.
+ */
+public class Cluster {
+
+	private String label;
+
+	private Set<DataPoint> elements;
+
+	// Empty cluster with no elements.
+	public Cluster() {
+		init("");
+	}
+
+	// New cluster that contains all elements from provided clusters.
+	public Cluster(Cluster c1, Cluster c2) {
+		init("");
+		add(c1);
+		add(c2);
+	}
+
+	public Cluster(Collection<DataPoint> elements) {
+		init("");
+		for (DataPoint e : elements) {
+			add(e);
+		}
+	}
+
+	public Cluster(DataPoint element) {
+		init("");
+		add(element);
+	}
+
+	public Cluster(String label) {
+		init(label);
+	}
+
+	public Cluster(String label, Collection<DataPoint> elements) {
+		init(label);
+		for (DataPoint e : elements) {
+			add(e);
+		}
+	}
+
+	/**
+	 * Modifies existing cluster by adding all elements from provided cluster.
+	 * 
+	 * @param c
+	 */
+	public void add(Cluster c) {
+		for (DataPoint e : c.getElements()) {
+			elements.add(e);
+		}
+	}
+
+	/**
+	 * Modifies existing cluster by adding a new element.
+	 * 
+	 * @param e
+	 */
+	public void add(DataPoint e) {
+		elements.add(e);
+	}
+
+	public boolean contains(Cluster c) {
+		boolean result = true;
+		for (DataPoint e : c.getElements()) {
+			if (!contains(e)) {
+				result = false;
+				break;
+			}
+		}
+		return result;
+	}
+
+	public boolean contains(DataPoint e) {
+		return elements.contains(e);
+	}
+
+	public Cluster copy() {
+		Cluster copy = new Cluster();
+		for (DataPoint e : this.getElements()) {
+			// DataPoint is immutable. No need to create a copy.
+			copy.add(e);
+		}
+		return copy;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final Cluster other = (Cluster) obj;
+		if (elements == null) {
+			if (other.elements != null)
+				return false;
+		} else if (!elements.equals(other.elements))
+			return false;
+		return true;
+	}
+
+	/*
+	 * Returns number of attributes used to define points in the cluster.
+	 */
+	public int getDimensionCount() {
+		if (elements == null || elements.isEmpty()) {
+			return 0;
+		}
+
+		return elements.iterator().next().getAttributeCount();
+	}
+
+	public Set<DataPoint> getElements() {
+		return new LinkedHashSet<DataPoint>(elements);
+	}
+
+	public String getElementsAsString() {
+		StringBuffer buf = new StringBuffer("{");
+		for (DataPoint e : elements) {
+			if (buf.length() > 1) {
+				buf.append(",\n");
+			}
+			buf.append(e.getLabel());
+		}
+		buf.append("}");
+		
+		return buf.toString();
+	}
+
+	public String getLabel() {
+		return label;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result
+				+ ((elements == null) ? 0 : elements.hashCode());
+		return result;
+	}
+
+	private void init(String label) {
+		this.label = label;
+		elements = new LinkedHashSet<DataPoint>();
+	}
+
+	public int size() {
+		return elements.size();
+	}
+
+	@Override
+	public String toString() {
+		return getElementsAsString();
+	}
+
+}
diff --git a/src/org/yooreeka/algos/clustering/model/DataPoint.java b/src/org/yooreeka/algos/clustering/model/DataPoint.java
new file mode 100644
index 0000000..7ccadc6
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/model/DataPoint.java
@@ -0,0 +1,181 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.model;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.yooreeka.algos.clustering.utils.Attributes;
+import org.yooreeka.util.metrics.EuclideanDistance;
+
+/**
+ * A Thing to be clustered. Defined by a set of attributes.
+ */
+public class DataPoint {
+
+	/**
+	 * Descriptive label or name. We also use it as unique ID for the instance.
+	 */
+	private String label;
+
+	/**
+	 * Collection of attributes that define this point.
+	 */
+	private Attribute[] attributes;
+
+	/*
+	 * Values derived from attributes.
+	 */
+	private String[] attributeNames;
+	private double[] numericAttributeValues;
+	private String[] textAttributeValues;
+
+	public DataPoint(String label, Attribute[] attributes) {
+		init(label, attributes);
+	}
+
+	/**
+	 * Creates a new point with numerical attributes. Attribute names are
+	 * auto-generated.
+	 */
+	public DataPoint(String label, double[] attrValues) {
+		// create attributes with auto-generated names
+		init(label, Attributes.createAttributes(attrValues));
+	}
+
+	public DataPoint(String label, String[] attrValues) {
+		// create attributes with auto-generated names
+		init(label, Attributes.createAttributes(attrValues));
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final DataPoint other = (DataPoint) obj;
+		if (!Arrays.equals(attributes, other.attributes))
+			return false;
+		if (label == null) {
+			if (other.label != null)
+				return false;
+		} else if (!label.equals(other.label))
+			return false;
+		return true;
+	}
+
+	public int getAttributeCount() {
+		return numericAttributeValues.length;
+	}
+
+	public String[] getAttributeNames() {
+		return attributeNames;
+	}
+
+	public Attribute[] getAttributes() {
+		return attributes;
+	}
+
+	public String getLabel() {
+		return label;
+	}
+
+	public double[] getNumericAttrValues() {
+		return numericAttributeValues;
+	}
+
+	public double getR() {
+
+		EuclideanDistance euclid = new EuclideanDistance();
+
+		int n = attributes.length;
+
+		double[] x = new double[n];
+
+		for (int i = 0; i < n; i++) {
+			x[i] = 0d;
+		}
+
+		return euclid.getDistance(x, this.numericAttributeValues);
+	}
+
+	public String[] getTextAttrValues() {
+		return textAttributeValues;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + Arrays.hashCode(attributes);
+		result = prime * result + ((label == null) ? 0 : label.hashCode());
+		return result;
+	}
+
+	private void init(String label, Attribute[] attributes) {
+		this.label = label;
+		this.attributes = attributes;
+		this.attributeNames = Attributes.getNames(attributes);
+		if (Attributes.allText(attributes)) {
+			this.textAttributeValues = Attributes.getTextValues(attributes);
+		} else {
+			this.textAttributeValues = null;
+		}
+		if (Attributes.allNumeric(attributes)) {
+			this.numericAttributeValues = Attributes
+					.getNumericValues(attributes);
+		} else {
+			this.numericAttributeValues = null;
+		}
+	}
+
+	public String toShortString() {
+		List<String> attrValues = new ArrayList<String>();
+		for (Attribute a : attributes) {
+			if (a.isNumeric()) {
+				attrValues.add(String.valueOf(a.getNumericValue()));
+			} else {
+				attrValues.add(a.getTextValue());
+			}
+		}
+		return label + "(" + attrValues.toString() + ")";
+	}
+
+	@Override
+	public String toString() {
+		return label + "(" + Arrays.toString(attributes) + ")";
+	}
+
+}
diff --git a/src/org/yooreeka/algos/clustering/partitional/KMeansAlgorithm.java b/src/org/yooreeka/algos/clustering/partitional/KMeansAlgorithm.java
new file mode 100644
index 0000000..43b07c1
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/partitional/KMeansAlgorithm.java
@@ -0,0 +1,306 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.partitional;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.algos.clustering.utils.Attributes;
+import org.yooreeka.config.YooreekaConfigurator;
+
+public class KMeansAlgorithm {
+
+	private static final Logger LOG = Logger.getLogger(KMeansAlgorithm.class.getName());
+	
+	public static void main(String[] args) {
+
+		DataPoint[] dataPoints = new DataPoint[] {
+				new DataPoint("2", new double[] { 2.0 }),
+				new DataPoint("4", new double[] { 4.0 }),
+				new DataPoint("10", new double[] { 10.0 }),
+				new DataPoint("12", new double[] { 12.0 }),
+				new DataPoint("3", new double[] { 3.0 }),
+				new DataPoint("20", new double[] { 20.0 }),
+				new DataPoint("30", new double[] { 30.0 }),
+				new DataPoint("11", new double[] { 11.0 }),
+				new DataPoint("25", new double[] { 25.0 }) };
+
+		DataPoint[] clusterMeans = new DataPoint[] {
+				new DataPoint("Mean-2", new double[] { 2.0 }),
+				new DataPoint("Mean-4", new double[] { 4.0 }) };
+
+		KMeansAlgorithm kmeans = new KMeansAlgorithm(clusterMeans, dataPoints);
+		kmeans.cluster();
+
+		kmeans.print();
+
+	}
+	public static DataPoint[] pickInitialCentroids(int k, DataPoint[] data) {
+
+		Random randGen = new Random();
+		DataPoint[] centroids = new DataPoint[k];
+
+		// Calculate random mean values for each cluster based on the data
+		/**
+		 * TODO: 4.2 -- Selecting the means for seeding
+		 * 
+		 * In large datasets, the selection of the initial centroids can be
+		 * important from a computational (time) complexity perspective.
+		 * 
+		 * In general, how can we improve the seeding of the initial mean
+		 * values? For example, consider the following heuristic:
+		 * 
+		 * 1. pick randomly one node 2. calculate the distance between that node
+		 * and O (10*k) other nodes 3. sort the list of nodes according to their
+		 * distance from the first node 4. pick every 10th node in the sequence
+		 * 5. calculate the mean distance between each one of these nodes and
+		 * the original node
+		 * 
+		 * This algorithmic choice is as ad hoc as they come, however, it does
+		 * have some key principles embedded in it? What are these principles?
+		 * How can you generalize this algorithm?
+		 * 
+		 * Discuss advantages/disadvantages of the initial seeding with your
+		 * friends.
+		 * 
+		 */
+		Set<Integer> previouslyUsedIds = new HashSet<Integer>();
+		for (int i = 0; i < k; i++) {
+			// pick point index that we haven't used yet
+			int centroidId;
+			do {
+				centroidId = randGen.nextInt(data.length);
+			} while (previouslyUsedIds.add(centroidId) == false);
+
+			// Create DataPoint that will represent the cluster's centroid.
+			String label = "Mean-" + i + "(" + data[centroidId].getLabel()
+					+ ")";
+			double[] values = data[centroidId].getNumericAttrValues();
+			String[] attrNames = data[centroidId].getAttributeNames();
+			centroids[i] = new DataPoint(label, Attributes.createAttributes(
+					attrNames, values));
+		}
+
+		return centroids;
+	}
+	private int k;
+	private DataPoint[] allCentroids;
+
+	private Cluster[] allClusters;
+
+	private DataPoint[] allDataPoints;
+
+	/**
+	 * @param initialCentroids
+	 *            - starting values for the centroids of each cluster.
+	 */
+	public KMeansAlgorithm(DataPoint[] initialCentroids, DataPoint[] dataPoints) {
+		init(initialCentroids, dataPoints);
+	}
+
+	/**
+	 * 
+	 * @param k
+	 *            - desired number of clusters.
+	 * 
+	 */
+	public KMeansAlgorithm(int k, DataPoint[] dataPoints) {
+		DataPoint[] initialCentroids = KMeansAlgorithm.pickInitialCentroids(k,
+				dataPoints);
+		init(initialCentroids, dataPoints);
+	}
+
+	public void cluster() {
+
+		boolean centroidsChanged = true;
+
+		while (centroidsChanged == true) {
+			// Create a set points for each cluster
+			List<Set<DataPoint>> clusters = new ArrayList<Set<DataPoint>>(k);
+			for (int i = 0; i < k; i++) {
+				clusters.add(new HashSet<DataPoint>());
+			}
+
+			// Assign points to each set based on minimum distance from the
+			// centroids
+			for (DataPoint p : allDataPoints) {
+				int i = findClosestCentroid(allCentroids, p);
+				clusters.get(i).add(p);
+			}
+
+			for (int i = 0; i < k; i++) {
+				allClusters[i] = new Cluster(clusters.get(i));
+			}
+
+			// Calculate new cluster centroids, and
+			// check if any of the centroids has changed
+			centroidsChanged = false;
+			for (int i = 0; i < allClusters.length; i++) {
+				if (clusters.get(i).size() > 0) {
+					double[] newCentroidValues = findCentroid(allClusters[i]);
+					double[] oldCentroidValues = allCentroids[i]
+							.getNumericAttrValues();
+					if (!Arrays.equals(oldCentroidValues, newCentroidValues)) {
+						allCentroids[i] = new DataPoint(
+								allCentroids[i].getLabel(), newCentroidValues);
+						centroidsChanged = true;
+					}
+				} else {
+					// keep mean unchanged if cluster has no elements.
+				}
+			}
+		}
+	}
+
+	private double distance(DataPoint x, DataPoint y) {
+		return distance(x.getNumericAttrValues(), y.getNumericAttrValues());
+	}
+
+	private double distance(double[] x, double[] y) {
+		double sumXY2 = 0.0;
+		for (int i = 0, n = x.length; i < n; i++) {
+			sumXY2 += Math.pow(x[i] - y[i], 2);
+		}
+		return Math.sqrt(sumXY2);
+	}
+
+	private double[] findCentroid(Cluster c) {
+
+		Set<DataPoint> clusterPoints = c.getElements();
+		int n = clusterPoints.size();
+
+		if (n == 0) {
+			return new double[0];
+		}
+
+		int d = c.getDimensionCount();
+		double[] meanAttributes = new double[d];
+
+		for (DataPoint p : clusterPoints) {
+			double[] pointAttributes = p.getNumericAttrValues();
+			for (int i = 0; i < d; i++) {
+				meanAttributes[i] += pointAttributes[i];
+			}
+		}
+
+		for (int i = 0; i < d; i++) {
+			meanAttributes[i] = meanAttributes[i] / n;
+		}
+
+		return meanAttributes;
+	}
+
+	/**
+	 * This method calculates the closest centroid for a given data point
+	 * 
+	 * @param centroids
+	 * @param x
+	 *            is the <CODE>DataPoint</CODE> for which we seek the closest
+	 *            centroid
+	 * @return the index (from the centroids array) of the closest centroid
+	 */
+	private int findClosestCentroid(DataPoint[] centroids, DataPoint x) {
+		double minDistance = Double.POSITIVE_INFINITY;
+		int closestCentroid = -1;
+		for (int i = 0, n = centroids.length; i < n; i++) {
+			double d = distance(centroids[i], x);
+			// if the d == minDistance then keep current selection
+			if (d < minDistance) {
+				minDistance = d;
+				closestCentroid = i;
+			}
+
+		}
+		return closestCentroid;
+	}
+
+	public DataPoint[] getAllCentroids() {
+		return this.allCentroids;
+	}
+
+	public Cluster[] getAllClusters() {
+		return this.allClusters;
+	}
+
+	public int getK() {
+		return this.k;
+	}
+
+	private void init(DataPoint[] initialCentroids, DataPoint[] dataPoints) {
+		
+		LOG.setLevel(YooreekaConfigurator.getLevel(KMeansAlgorithm.class.getName()));
+		
+		this.k = initialCentroids.length;
+		this.allDataPoints = dataPoints;
+		this.allCentroids = initialCentroids;
+		this.allClusters = new Cluster[k];
+	}
+
+	public void print() {
+		// show results
+		Cluster[] clusters = this.getAllClusters();
+
+		System.out.println("Clusters:");
+		for (Cluster c : clusters) {
+			System.out.println(c.getElementsAsString());
+		}
+	}
+
+	public void printAll() {
+
+		Cluster[] clusters = this.getAllClusters();
+		System.out.println("Clusters:");
+		for (Cluster c : clusters) {
+			System.out.println(c.getElementsAsString());
+		}
+		System.out
+				.println("___________________________________________________");
+		DataPoint[] means = this.getAllCentroids();
+		System.out.println("Cluster means:");
+		for (DataPoint p : means) {
+			System.out.println(p.toString());
+		}
+	}
+
+	public void printMeans() {
+		System.out.println("Cluster means:");
+		for (DataPoint mean : this.allCentroids) {
+			System.out.println(mean);
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/partitional/NearestNeighborAlgorithm.java b/src/org/yooreeka/algos/clustering/partitional/NearestNeighborAlgorithm.java
new file mode 100644
index 0000000..b1a67f6
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/partitional/NearestNeighborAlgorithm.java
@@ -0,0 +1,230 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.partitional;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.algos.clustering.utils.ObjectToIndexMapping;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.metrics.NumericDistance;
+import org.yooreeka.util.metrics.EuclideanDistance;
+
+public class NearestNeighborAlgorithm {
+
+	private static final Logger LOG = Logger.getLogger(NearestNeighborAlgorithm.class.getName());
+	
+	public static void main(String[] args) {
+
+		DataPoint[] elements = new DataPoint[5];
+		elements[0] = new DataPoint("A", new double[] {});
+		elements[1] = new DataPoint("B", new double[] {});
+		elements[2] = new DataPoint("C", new double[] {});
+		elements[3] = new DataPoint("D", new double[] {});
+		elements[4] = new DataPoint("E", new double[] {});
+
+		double[][] a = new double[][] { { 0, 1, 2, 2, 3 }, { 1, 0, 2, 4, 3 },
+				{ 2, 2, 0, 1, 5 }, { 2, 4, 1, 0, 3 }, { 3, 3, 5, 3, 0 } };
+
+		double threshold = 2;
+
+		NearestNeighborAlgorithm nn = new NearestNeighborAlgorithm(elements, a,
+				threshold);
+
+		nn.run();
+	}
+
+	/*
+	 * All elements for clustering.
+	 */
+	private DataPoint[] allDataPoints;
+
+	/*
+	 * Matrix with distances between elements.
+	 */
+	private double[][] a;
+
+	/*
+	 * Threshold value that is used to determine if elements will be added to
+	 * one of the existing clusters or if a new cluster will be created.
+	 */
+	private double t = 0.5;
+
+	/*
+	 * List of clusters.
+	 */
+	private List<Cluster> allClusters;
+
+	/*
+	 * Distance metric that will be used to calculate distance between elements.
+	 */
+	private NumericDistance dist = new EuclideanDistance();
+
+	/*
+	 * DataPoint -> Index mapping. Used to access data in distance matrix.
+	 */
+	ObjectToIndexMapping<DataPoint> idxMapping = null;
+
+	private boolean verbose = true;
+
+	public NearestNeighborAlgorithm(DataPoint[] dataPoints, double t) {
+		this(dataPoints, null, t);
+	}
+
+	/**
+	 * 
+	 * @param dataPoints
+	 *            elements to cluster. Element order should correspond to
+	 *            elements in distance matrix.
+	 * @param a
+	 *            matrix showing distance between elements. Can be null.
+	 * @param t
+	 *            threshold value for new cluster creation.
+	 */
+	public NearestNeighborAlgorithm(DataPoint[] dataPoints, double[][] a,
+			double t) {
+		
+		LOG.setLevel(YooreekaConfigurator.getLevel(NearestNeighborAlgorithm.class.getName()));
+		
+		this.t = t;
+		this.allDataPoints = dataPoints;
+		this.a = a;
+		this.allClusters = new ArrayList<Cluster>();
+
+		/*
+		 * Create DataPoint -> Index mapping for all data points.
+		 */
+		idxMapping = new ObjectToIndexMapping<DataPoint>();
+
+		for (int i = 0, n = dataPoints.length; i < n; i++) {
+			idxMapping.getIndex(dataPoints[i]);
+		}
+
+	}
+
+	private void assignPointToCluster(DataPoint x) {
+
+		/* find min distance between current point and all clusters */
+		double minNNDist = Double.POSITIVE_INFINITY;
+		Cluster closestCluster = null;
+		for (Cluster c : allClusters) {
+			double nnDist = getNNDistance(c, x);
+			if (nnDist < minNNDist) {
+				minNNDist = nnDist;
+				closestCluster = c;
+			}
+		}
+
+		/* Assign point to cluster based on calculated distance and threshold */
+		if (minNNDist <= t) {
+			closestCluster.add(x);
+		} else {
+			/* Best distance exceeds the threshold - create a new cluster. */
+			Cluster newCluster = new Cluster();
+			newCluster.add(x);
+			allClusters.add(newCluster);
+		}
+	}
+
+	private void calculateDistanceMatrix() {
+		a = new double[allDataPoints.length][allDataPoints.length];
+		for (int i = 0, n = allDataPoints.length; i < n; i++) {
+			DataPoint x = allDataPoints[i];
+			for (int j = i + 1; j < n; j++) {
+				DataPoint y = allDataPoints[j];
+				a[i][j] = dist.getDistance(x.getNumericAttrValues(),
+						y.getNumericAttrValues());
+				a[j][i] = a[i][j];
+			}
+			a[i][i] = 0.0;
+		}
+	}
+
+	public List<Cluster> getAllClusters() {
+		return allClusters;
+	}
+
+	/**
+	 * Calculates distance between cluster and element using Nearest Neighbor
+	 * approach.
+	 */
+	private double getNNDistance(Cluster c, DataPoint x) {
+
+		double nnDist = Double.POSITIVE_INFINITY;
+
+		if (c.contains(x)) {
+			nnDist = 0.0;
+		} else {
+			int i = idxMapping.getIndex(x);
+			for (DataPoint y : c.getElements()) {
+				int j = idxMapping.getIndex(y);
+				double xyDist = a[i][j];
+				nnDist = Math.min(nnDist, xyDist);
+			}
+		}
+
+		return nnDist;
+	}
+
+	private void printResults() {
+		System.out.println("Nearest Neighbor Clustering with t = " + t);
+		System.out.println("Clusters:");
+		for (Cluster c : allClusters) {
+			System.out.println(c.getElementsAsString());
+		}
+	}
+
+	public void run() {
+
+		if (allDataPoints == null || allDataPoints.length == 0) {
+			return;
+		}
+
+		if (a == null) {
+			calculateDistanceMatrix();
+		}
+
+		for (int i = 0, n = allDataPoints.length; i < n; i++) {
+			assignPointToCluster(allDataPoints[i]);
+		}
+
+		if (verbose) {
+			printResults();
+		}
+	}
+
+	public void setDistance(NumericDistance dist) {
+		this.dist = dist;
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/rock/LinkMatrix.java b/src/org/yooreeka/algos/clustering/rock/LinkMatrix.java
new file mode 100644
index 0000000..4728cca
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/rock/LinkMatrix.java
@@ -0,0 +1,195 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.rock;
+
+import java.util.Arrays;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.algos.clustering.utils.ObjectToIndexMapping;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.metrics.SimilarityMeasure;
+
+/**
+ * Calculates number of links between data points.
+ */
+public class LinkMatrix {
+
+	private static final Logger LOG = Logger.getLogger(LinkMatrix.class.getName());
+	
+	private double th;
+	double[][] pointSimilarityMatrix;
+	int[][] pointNeighborMatrix;
+	int[][] pointLinkMatrix;
+	private ObjectToIndexMapping<DataPoint> objToIndexMapping;
+
+	public LinkMatrix(DataPoint[] points, double[][] similarityMatrix, double th) {
+		init(points, similarityMatrix, th);
+	}
+
+	public LinkMatrix(DataPoint[] points, SimilarityMeasure pointSim, double th) {
+
+		double[][] similarityMatrix = calculatePointSimilarities(points,
+				pointSim);
+		init(points, similarityMatrix, th);
+	}
+
+	/*
+	 * Calculates similarity matrix for all points.
+	 */
+	private double[][] calculatePointSimilarities(DataPoint[] points,
+			SimilarityMeasure pointSim) {
+
+		int n = points.length;
+		double[][] simMatrix = new double[n][n];
+		for (int i = 0; i < n; i++) {
+			DataPoint itemX = points[i];
+			String[] attributesX = itemX.getTextAttrValues();
+			for (int j = i + 1; j < n; j++) {
+				DataPoint itemY = points[j];
+				String[] attributesY = itemY.getTextAttrValues();
+				simMatrix[i][j] = pointSim.similarity(attributesX, attributesY);
+				simMatrix[j][i] = simMatrix[i][j];
+			}
+			simMatrix[i][i] = 1.0;
+		}
+
+		return simMatrix;
+	}
+
+	/**
+	 * Calculates number of links between two clusters. Number of links between
+	 * two clusters is the sum of links between all point pairs( p1, p2) where
+	 * p1 belongs to the first cluster and p2 belongs to the other cluster.
+	 * 
+	 * @param clusterX
+	 * @param clusterY
+	 * 
+	 * @return link count between two clusters.
+	 */
+	public int getLinks(Cluster clusterX, Cluster clusterY) {
+		Set<DataPoint> itemsX = clusterX.getElements();
+		Set<DataPoint> itemsY = clusterY.getElements();
+
+		int linkSum = 0;
+
+		for (DataPoint x : itemsX) {
+			for (DataPoint y : itemsY) {
+				linkSum += getLinks(x, y);
+			}
+		}
+		return linkSum;
+	}
+
+	public int getLinks(DataPoint p1, DataPoint p2) {
+		int i = objToIndexMapping.getIndex(p1);
+		int j = objToIndexMapping.getIndex(p2);
+		return pointLinkMatrix[i][j];
+	}
+
+	private void init(DataPoint[] points, double[][] similarityMatrix, double th) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(LinkMatrix.class.getName()));
+		
+		this.th = th;
+
+		objToIndexMapping = new ObjectToIndexMapping<DataPoint>();
+
+		// Create DataPoint <-> Index mapping.
+		for (DataPoint point : points) {
+			objToIndexMapping.getIndex(point);
+		}
+
+		pointSimilarityMatrix = similarityMatrix;
+
+		// Identify neighbors: a[i][j] == 1 if (i,j) are neighbors and 0
+		// otherwise.
+		int n = points.length;
+
+		pointNeighborMatrix = new int[n][n];
+		for (int i = 0; i < n; i++) {
+			for (int j = i + 1; j < n; j++) {
+				if (pointSimilarityMatrix[i][j] >= th) {
+					pointNeighborMatrix[i][j] = 1;
+				} else {
+					pointNeighborMatrix[i][j] = 0;
+				}
+				pointNeighborMatrix[j][i] = pointNeighborMatrix[i][j];
+			}
+			pointNeighborMatrix[i][i] = 1;
+		}
+
+		// Calculate number of links between points
+		pointLinkMatrix = new int[n][n];
+		for (int i = 0; i < n; i++) {
+			for (int j = i; j < n; j++) {
+				pointLinkMatrix[i][j] = nLinksBetweenPoints(
+						pointNeighborMatrix, i, j);
+				pointLinkMatrix[j][i] = pointLinkMatrix[i][j];
+			}
+		}
+
+	}
+
+	private int nLinksBetweenPoints(int[][] neighbors, int indexX, int indexY) {
+		int nLinks = 0;
+		for (int i = 0, n = neighbors.length; i < n; i++) {
+			nLinks += neighbors[indexX][i] * neighbors[i][indexY];
+		}
+		return nLinks;
+	}
+
+	public void printPointLinkMatrix() {
+		System.out
+				.println("Point Link matrix (th=" + String.valueOf(th) + "):");
+		for (int i = 0; i < pointLinkMatrix.length; i++) {
+			System.out.println(Arrays.toString(pointLinkMatrix[i]));
+		}
+	}
+
+	public void printPointNeighborMatrix() {
+		System.out.println("Point Neighbor matrix (th=" + String.valueOf(th)
+				+ "):");
+		for (int i = 0; i < pointNeighborMatrix.length; i++) {
+			System.out.println(Arrays.toString(pointNeighborMatrix[i]));
+		}
+	}
+
+	public void printSimilarityMatrix() {
+		System.out.println("Point Similarity matrix:");
+		for (int i = 0; i < pointSimilarityMatrix.length; i++) {
+			System.out.println(Arrays.toString(pointSimilarityMatrix[i]));
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/algos/clustering/rock/MergeGoodnessMeasure.java b/src/org/yooreeka/algos/clustering/rock/MergeGoodnessMeasure.java
new file mode 100644
index 0000000..21d217a
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/rock/MergeGoodnessMeasure.java
@@ -0,0 +1,92 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.rock;
+
+/**
+ * Goodness measure for merging two clusters.
+ */
+public class MergeGoodnessMeasure {
+
+	/*
+	 * Threshold value that was used to identify neighbors among points.
+	 */
+	private double linkThreshold;
+
+	/*
+	 * Intermediate value that is used in calculation of goodness measure and
+	 * stays the same for different clusters.
+	 */
+	private double p;
+
+	public MergeGoodnessMeasure(double th) {
+		this.linkThreshold = th;
+		this.p = 1.0 + 2.0 * f(th);
+	}
+
+	/**
+	 * This is just one of the possible implementations.
+	 * 
+	 * @param linkThreshold
+	 *            threshold value that was used to identify neighbors among
+	 *            points.
+	 */
+	private double f(double th) {
+
+		/*
+		 * This implementation assumes that linkThreshold was a threshold for
+		 * similarity measure (as opposed to dissimilarity/distance).
+		 */
+		return (1.0 - th) / (1.0 + th);
+	}
+
+	public double g(int nLinks, int nX, int nY) {
+		double a = Math.pow(nX + nY, p);
+		double b = Math.pow(nX, p);
+		double c = Math.pow(nY, p);
+
+		return nLinks / (a - b - c);
+	}
+
+	/**
+	 * @return the linkThreshold
+	 */
+	public double getTh() {
+		return linkThreshold;
+	}
+
+	/**
+	 * @param linkThreshold
+	 *            the linkThreshold to set
+	 */
+	public void setTh(double th) {
+		this.linkThreshold = th;
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/rock/ROCKAlgorithm.java b/src/org/yooreeka/algos/clustering/rock/ROCKAlgorithm.java
new file mode 100644
index 0000000..2932f0c
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/rock/ROCKAlgorithm.java
@@ -0,0 +1,142 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.rock;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.hierarchical.Dendrogram;
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.metrics.JaccardCoefficient;
+import org.yooreeka.util.metrics.SimilarityMeasure;
+
+public class ROCKAlgorithm {
+
+	private static final Logger LOG = Logger.getLogger(ROCKAlgorithm.class.getName());
+	
+	public static void main(String[] args) {
+		// Define data
+		DataPoint[] elements = new DataPoint[4];
+		elements[0] = new DataPoint("Doc1", new String[] { "book" });
+		elements[1] = new DataPoint("Doc2", new String[] { "water", "sun",
+				"sand", "swim" });
+		elements[2] = new DataPoint("Doc3", new String[] { "water", "sun",
+				"swim", "read" });
+		elements[3] = new DataPoint("Doc4", new String[] { "read", "sand" });
+
+		int k = 1;
+		double th = 0.2;
+		ROCKAlgorithm rock = new ROCKAlgorithm(elements, k, th);
+		Dendrogram dnd = rock.cluster();
+		dnd.printAll();
+	}
+	private DataPoint[] points;
+	private int k;
+
+	private double th;
+
+	private SimilarityMeasure similarityMeasure;
+
+	private LinkMatrix linkMatrix;
+
+	/**
+	 * 
+	 * @param k
+	 *            desired number of clusters.
+	 * @param th
+	 *            threshold value to identify neighbors among points.
+	 */
+	public ROCKAlgorithm(DataPoint[] points, int k, double th) {
+		
+		LOG.setLevel(YooreekaConfigurator.getLevel(ROCKAlgorithm.class.getName()));
+		
+		this.points = points;
+		this.k = k;
+		this.th = th;
+		this.similarityMeasure = new JaccardCoefficient();
+		// this.similarityMeasure = new CosineSimilarity();
+		this.linkMatrix = new LinkMatrix(points, similarityMeasure, th);
+	}
+
+	public Dendrogram cluster() {
+
+		// Create a new cluster out of every point.
+		List<Cluster> initialClusters = new ArrayList<Cluster>();
+		for (int i = 0, n = points.length; i < n; i++) {
+			Cluster cluster = new Cluster(points[i]);
+			initialClusters.add(cluster);
+		}
+		double g = Double.POSITIVE_INFINITY;
+		Dendrogram dnd = new Dendrogram("Goodness");
+		dnd.addLevel(String.valueOf(g), initialClusters);
+
+		MergeGoodnessMeasure goodnessMeasure = new MergeGoodnessMeasure(th);
+
+		ROCKClusters allClusters = new ROCKClusters(initialClusters,
+				linkMatrix, goodnessMeasure);
+
+		int nClusters = allClusters.size();
+		while (nClusters > k) {
+			int nClustersBeforeMerge = nClusters;
+			g = allClusters.mergeBestCandidates();
+			nClusters = allClusters.size();
+			if (nClusters == nClustersBeforeMerge) {
+				// there are no linked clusters to merge
+				break;
+			}
+			dnd.addLevel(String.valueOf(g), allClusters.getAllClusters());
+		}
+
+		System.out.println("Number of clusters: "
+				+ allClusters.getAllClusters().size());
+		return dnd;
+	}
+
+	public int getK() {
+		return k;
+	}
+
+	public LinkMatrix getLinkMatrix() {
+		return linkMatrix;
+	}
+
+	public SimilarityMeasure getSimilarityMeasure() {
+		return similarityMeasure;
+	}
+
+	public double getTh() {
+		return th;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/clustering/rock/ROCKClusters.java b/src/org/yooreeka/algos/clustering/rock/ROCKClusters.java
new file mode 100644
index 0000000..447cb57
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/rock/ROCKClusters.java
@@ -0,0 +1,205 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.rock;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.Cluster;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * Set of clusters and link data for ROCK implementation.
+ */
+public class ROCKClusters {
+
+	private static final Logger LOG = Logger.getLogger(ROCKClusters.class.getName());
+
+	/*
+	 * Used to assign unique IDs to clusters.
+	 */
+	private int nextKey;
+
+	/*
+	 * Provides ID -> Cluster mapping.
+	 */
+	private Map<Integer, Cluster> clusterMap;
+
+	/*
+	 * Provides ID -> Similar Clusters mapping.
+	 */
+	private Map<Integer, List<SimilarCluster>> similarClustersMap;
+
+	/*
+	 * Goodness measure between two clusters. It is used to determine cluster
+	 * eligibility for merge.
+	 */
+	private MergeGoodnessMeasure goodnessMeasure;
+
+	/*
+	 * Links between data points and clusters.
+	 */
+	private LinkMatrix linkMatrix;
+
+	public ROCKClusters(List<Cluster> initialClusters, LinkMatrix linkMatrix,
+			MergeGoodnessMeasure goodnessMeasure) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(ROCKClusters.class.getName()));
+		
+		this.linkMatrix = linkMatrix;
+		clusterMap = new HashMap<Integer, Cluster>();
+		nextKey = 0;
+		this.goodnessMeasure = goodnessMeasure;
+
+		for (Cluster c : initialClusters) {
+			addCluster(c);
+		}
+		calculateClusterSimilarities();
+	}
+
+	public int addCluster(Cluster c) {
+		int key = nextKey;
+		clusterMap.put(key, c);
+		nextKey++;
+		return key;
+	}
+
+	public void calculateClusterSimilarities() {
+		similarClustersMap = new HashMap<Integer, List<SimilarCluster>>();
+		for (Integer clusterKey : getAllKeys()) {
+			List<SimilarCluster> similarClusters = new LinkedList<SimilarCluster>();
+			Cluster cluster = getCluster(clusterKey);
+			for (Integer similarClusterKey : getAllKeys()) {
+				if (clusterKey != similarClusterKey) {
+					Cluster similarCluster = getCluster(similarClusterKey);
+					int nLinks = linkMatrix.getLinks(cluster, similarCluster);
+					if (nLinks > 0) {
+						double goodness = goodnessMeasure.g(nLinks,
+								cluster.size(), similarCluster.size());
+						similarClusters.add(new SimilarCluster(
+								similarClusterKey, goodness));
+					}
+				}
+			}
+			setSimilarClusters(clusterKey, similarClusters);
+		}
+	}
+
+	/**
+	 * Finds a pair of cluster indexes with the best goodness measure.
+	 */
+	public List<Integer> findBestMergeCandidates() {
+		Integer bestKey = null;
+		SimilarCluster bestSimilarCluster = null;
+		Double bestGoodness = Double.NEGATIVE_INFINITY;
+		for (Map.Entry<Integer, List<SimilarCluster>> e : similarClustersMap
+				.entrySet()) {
+			List<SimilarCluster> similarClusters = e.getValue();
+			if (similarClusters != null && similarClusters.size() > 0) {
+				SimilarCluster topSimilarCluster = similarClusters.get(0);
+				if (topSimilarCluster.getGoodness() > bestGoodness) {
+					bestGoodness = topSimilarCluster.getGoodness();
+					bestKey = e.getKey();
+					bestSimilarCluster = topSimilarCluster;
+				}
+			}
+		}
+		List<Integer> bestMergeCandidates = new ArrayList<Integer>();
+		if (bestKey != null) {
+			bestMergeCandidates.add(bestKey);
+			bestMergeCandidates.add(bestSimilarCluster.getClusterKey());
+		}
+		return bestMergeCandidates;
+	}
+
+	public Collection<Cluster> getAllClusters() {
+		return clusterMap.values();
+	}
+
+	public Set<Integer> getAllKeys() {
+		return new HashSet<Integer>(clusterMap.keySet());
+	}
+
+	public Cluster getCluster(Integer key) {
+		return clusterMap.get(key);
+	}
+
+	public double mergeBestCandidates() {
+		List<Integer> mergeCandidates = findBestMergeCandidates();
+
+		double goodness = Double.NaN;
+
+		if (mergeCandidates.size() > 1) {
+
+			Integer key1 = mergeCandidates.get(0);
+			Integer key2 = mergeCandidates.get(1);
+			goodness = similarClustersMap.get(key1).get(0).getGoodness();
+
+			mergeClusters(key1, key2);
+		}
+
+		return goodness;
+	}
+
+	public Integer mergeClusters(Integer key1, Integer key2) {
+
+		Cluster cluster1 = getCluster(key1);
+		Cluster cluster2 = getCluster(key2);
+		Cluster cluster3 = new Cluster(cluster1, cluster2);
+		removeCluster(key1);
+		removeCluster(key2);
+		Integer key3 = addCluster(cluster3);
+
+		calculateClusterSimilarities();
+
+		return key3;
+	}
+
+	public Cluster removeCluster(Integer key) {
+		return clusterMap.remove(key);
+	}
+
+	private void setSimilarClusters(Integer key, List<SimilarCluster> list) {
+		SimilarCluster.sortByGoodness(list);
+		similarClustersMap.put(key, list);
+	}
+
+	public int size() {
+		return clusterMap.size();
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/rock/SimilarCluster.java b/src/org/yooreeka/algos/clustering/rock/SimilarCluster.java
new file mode 100644
index 0000000..e4ad7dc
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/rock/SimilarCluster.java
@@ -0,0 +1,85 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.rock;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+public class SimilarCluster {
+	/**
+	 * Sorts list by goodness value in descending order. Higher goodness values
+	 * will be in the head of the list.
+	 * 
+	 * @param values
+	 *            list to sort.
+	 */
+	public static void sortByGoodness(List<SimilarCluster> values) {
+		Collections.sort(values, new Comparator<SimilarCluster>() {
+
+			public int compare(SimilarCluster f1, SimilarCluster f2) {
+
+				int result = 0;
+				if (f1.getGoodness() < f2.getGoodness()) {
+					result = 1; // order in the decreasing order of goodness
+								// value
+				} else if (f1.getGoodness() > f2.getGoodness()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+	}
+	private Integer clusterKey;
+
+	private Double goodness;
+
+	public SimilarCluster(Integer clusterKey, Double goodness) {
+		this.clusterKey = clusterKey;
+		this.goodness = goodness;
+	}
+
+	public Integer getClusterKey() {
+		return clusterKey;
+	}
+
+	public Double getGoodness() {
+		return goodness;
+	}
+
+	@Override
+	public String toString() {
+		return "[clusterKey=" + this.clusterKey + ",goodness=" + this.goodness
+				+ "]";
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/test/MyDiggSpaceData.java b/src/org/yooreeka/algos/clustering/test/MyDiggSpaceData.java
new file mode 100644
index 0000000..5489bbd
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/test/MyDiggSpaceData.java
@@ -0,0 +1,125 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.test;
+
+import java.util.List;
+
+import org.yooreeka.algos.clustering.dbscan.DBSCANAlgorithm;
+import org.yooreeka.algos.clustering.hierarchical.Dendrogram;
+import org.yooreeka.algos.clustering.model.Attribute;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.algos.clustering.rock.ROCKAlgorithm;
+import org.yooreeka.algos.clustering.utils.Attributes;
+import org.yooreeka.algos.reco.collab.data.DiggData;
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.content.digg.DiggStoryItem;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.metrics.CosineDistance;
+
+public class MyDiggSpaceData {
+
+	private static DataPoint createDataPoint(DiggStoryItem story, int topNTerms) {
+		String storyLabel = String.valueOf(story.getId() + ":"
+				+ story.getTitle());
+		String storyText = story.getTitle() + " " + story.getDescription();
+		Content content = new Content(storyLabel, storyText, topNTerms);
+		String[] terms = content.getTerms();
+		// using term as attribute name and value.
+		Attribute[] attributes = Attributes.createAttributes(terms, terms);
+		return new DataPoint(storyLabel, attributes);
+	}
+
+	public static MyDiggSpaceDataset createDataset() {
+		return createDataset(10);
+	}
+
+	public static MyDiggSpaceDataset createDataset(int topNTerms) {
+		DiggData.loadData(YooreekaConfigurator.getHome()
+				+ "/data/ch04/ch4_digg_stories.csv");
+
+		List<DiggStoryItem> allStories = DiggData.allStories;
+
+		DataPoint[] allDataPoints = new DataPoint[allStories.size()];
+
+		for (int i = 0, n = allDataPoints.length; i < n; i++) {
+			DiggStoryItem story = allStories.get(i);
+			DataPoint di = createDataPoint(story, topNTerms);
+			allDataPoints[i] = di;
+		}
+		return new MyDiggSpaceDataset(allDataPoints);
+	}
+
+	public static MyDiggSpaceDataset createDataset(int topNTerms,
+			List<DiggStoryItem> allStories) {
+
+		DataPoint[] allDataPoints = new DataPoint[allStories.size()];
+
+		for (int i = 0, n = allDataPoints.length; i < n; i++) {
+
+			DiggStoryItem story = allStories.get(i);
+			story.print();
+
+			DataPoint di = createDataPoint(story, topNTerms);
+			allDataPoints[i] = di;
+		}
+		return new MyDiggSpaceDataset(allDataPoints);
+	}
+
+	public static void main(String[] args) {
+		// testRockOnDigg();
+		testDBSCAN();
+	}
+
+	private static void testDBSCAN() {
+		MyDiggSpaceDataset ds = MyDiggSpaceData.createDataset(3);
+		double eps = 0.8;
+		int minPts = 2;
+		boolean useTermFreq = true;
+		DBSCANAlgorithm dbscan = new DBSCANAlgorithm(ds.getData(),
+				new CosineDistance(), eps, minPts, useTermFreq);
+
+		dbscan.cluster();
+		// dbscan.printDistances();
+	}
+
+	public static void testRockOnDigg() {
+		MyDiggSpaceDataset ds = MyDiggSpaceData.createDataset(10);
+		ROCKAlgorithm rock = new ROCKAlgorithm(ds.getData(), 4, 0.1);
+		// rock.getLinkMatrix().printSimilarityMatrix();
+		// rock.getLinkMatrix().printPointNeighborMatrix();
+		// rock.getLinkMatrix().printPointLinkMatrix();
+		Dendrogram dnd = rock.cluster();
+		dnd.print(130); // if you get NPE here it means that level doesn't
+						// exist.
+
+		// ROCK stops clustering if there are no links between clusters.
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/test/MyDiggSpaceDataset.java b/src/org/yooreeka/algos/clustering/test/MyDiggSpaceDataset.java
new file mode 100644
index 0000000..a374d6f
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/test/MyDiggSpaceDataset.java
@@ -0,0 +1,56 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.test;
+
+import org.yooreeka.algos.clustering.model.DataPoint;
+
+public class MyDiggSpaceDataset {
+
+	private DataPoint[] data;
+
+	private boolean verbose = true;
+
+	public MyDiggSpaceDataset(DataPoint[] data) {
+		this.data = data;
+
+		if (verbose) {
+			System.out.println("\nCreated " + this.getClass().getSimpleName()
+					+ " dataset with " + data.length + " items:\n");
+			for (DataPoint item : data) {
+				System.out.println(item.toShortString());
+			}
+		}
+	}
+
+	public DataPoint[] getData() {
+		return data;
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/test/SFData.java b/src/org/yooreeka/algos/clustering/test/SFData.java
new file mode 100644
index 0000000..94e4f42
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/test/SFData.java
@@ -0,0 +1,212 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.test;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.supercsv.io.CsvListReader;
+import org.supercsv.prefs.CsvPreference;
+import org.yooreeka.algos.clustering.model.Attribute;
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.algos.clustering.partitional.NearestNeighborAlgorithm;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.metrics.NumericDistance;
+import org.yooreeka.util.metrics.EuclideanDistance;
+
+public class SFData {
+
+	/*
+	 * All available attributes.
+	 */
+	private static String[] allAvailableAttributeNames = { "Age",
+			"IncomeRange", "Education", "Skills", "Social", "isPaid" };
+
+	public static SFDataset createDataset() {
+		return createDataset(allAvailableAttributeNames);
+	}
+
+	/**
+	 * Creates dataset that uses only attributes with specified names. Other
+	 * attributes will not be loaded.
+	 * 
+	 * @param attrNames
+	 *            attribute names to use.
+	 * @return dataset that uses only specified attributes.
+	 */
+	public static SFDataset createDataset(String[] attrNames) {
+
+		// check that attribute names are valid
+		validateAttrNames(attrNames, allAvailableAttributeNames);
+
+		DataPoint[] allData = loadDataFromFile(YooreekaConfigurator.getHome()
+				+ "/data/ch04/clusteringSF.dat", attrNames);
+
+		NumericDistance dist = new EuclideanDistance();
+		SFDataset sfDataset = new SFDataset(allData, dist);
+		return sfDataset;
+	}
+
+	private static DataPoint[] loadDataFromFile(String filename,
+			String[] attrNames) {
+		List<DataPoint> allData = new ArrayList<DataPoint>();
+		CsvListReader csvReader = null;
+		try {
+			csvReader = new CsvListReader(new BufferedReader(new FileReader(
+					filename)), CsvPreference.EXCEL_PREFERENCE);
+
+			// Load all available headers from CSV file
+			String[] csvHeaders = csvReader.getCSVHeader(true);
+
+			// Map attribute names to field IDs from CSV file using header names
+			int[] attrFieldIndexes = new int[attrNames.length];
+			for (int i = 0; i < attrFieldIndexes.length; i++) {
+				String header = attrNames[i];
+				int csvHeaderId = -1;
+				for (int j = 0; j < csvHeaders.length; j++) {
+					if (header.equalsIgnoreCase(csvHeaders[j])) {
+						csvHeaderId = j;
+						break;
+					}
+				}
+				// If there is no header found it means we have wrong attribute
+				// name or wrong file.
+				if (csvHeaderId == -1) {
+					throw new IllegalStateException(
+							"Attribute name mismatch. "
+									+ "Failed to find attribute name: '"
+									+ header
+									+ "' among cvs file headers. All available headers: "
+									+ Arrays.toString(csvHeaders));
+				} else {
+					attrFieldIndexes[i] = csvHeaderId;
+				}
+			}
+
+			// Read file and include only selected attributes
+			List<String> line = null;
+			while ((line = csvReader.read()) != null) {
+				try {
+					String label = line.get(0);
+					Attribute[] attributes = new Attribute[attrNames.length];
+					for (int i = 0, n = attrNames.length; i < n; i++) {
+						int attrFieldIndex = attrFieldIndexes[i];
+						String value = line.get(attrFieldIndex);
+						attributes[i] = new Attribute(attrNames[i],
+								Double.valueOf(value));
+					}
+					DataPoint dataPoint = new DataPoint(label, attributes);
+					allData.add(dataPoint);
+				} catch (Exception e) {
+					throw new RuntimeException("Error while reading line: '"
+							+ line + "'", e);
+				}
+			}
+
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Error while reading SF data from csv file: '" + filename
+							+ "'. ", e);
+		} finally {
+			try {
+				if (csvReader != null) {
+					csvReader.close();
+				}
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+
+		System.out.println("From file: " + filename);
+		System.out.println("Using attribute names: "
+				+ Arrays.toString(attrNames));
+		System.out.println("Loaded " + allData.size() + " data points.");
+
+		return allData.toArray(new DataPoint[allData.size()]);
+	}
+
+	public static void main(String[] args) {
+
+		// Creates dataset that uses all available attributes
+		SFDataset ds = SFData.createDataset();
+
+		// Creates dataset that uses only a subset of available attributes
+		// SFDataset ds = SFData.createDataset(new String[] {"IncomeRange",
+		// "Age"});
+		// SFDataset ds = SFData.createDataset(new String[] {"Age"});
+
+		ds.printDistanceMatrix();
+
+		// Dendrogram dnd = null;
+
+		// Uncomment one of these two run clustering
+
+		// // Run Single Link Clustering
+		// SingleLinkAlgorithm sla = new SingleLinkAlgorithm(ds.getData(),
+		// ds.getDistanceMatrix());
+		// dnd = sla.cluster();
+		// dnd.print();
+
+		// // Run MST Single Link Clustering
+		// MSTSingleLinkAlgorithm msla = new
+		// MSTSingleLinkAlgorithm(ds.getData(), ds.getDistanceMatrix());
+		// dnd = msla.cluster();
+		// dnd.print();
+
+		// // Run Average Link Clustering
+		// AverageLinkAlgorithm ala = new AverageLinkAlgorithm(ds.getData(),
+		// ds.getDistanceMatrix());
+		// dnd = ala.cluster();
+		// dnd.print();
+
+		// double T = 5.0;
+
+		NearestNeighborAlgorithm nna = new NearestNeighborAlgorithm(
+				ds.getData(), ds.getAdjacencyMatrix(), 5.0);
+		nna.run();
+	}
+
+	private static void validateAttrNames(String[] actualAttrNames,
+			String[] validAttrNames) {
+		List<String> validNames = Arrays.asList(validAttrNames);
+		for (String actualAttrName : actualAttrNames) {
+			if (!validNames.contains(actualAttrName)) {
+				throw new IllegalArgumentException("Invalid attribute name: '"
+						+ actualAttrName + "'. " + "Valid names are: "
+						+ Arrays.toString(allAvailableAttributeNames));
+			}
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/test/SFDataset.java b/src/org/yooreeka/algos/clustering/test/SFDataset.java
new file mode 100644
index 0000000..1f65d16
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/test/SFDataset.java
@@ -0,0 +1,93 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.test;
+
+import java.util.Arrays;
+
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.util.metrics.NumericDistance;
+
+public class SFDataset {
+
+	private DataPoint[] data;
+	private NumericDistance distance;
+	private double[][] adjacencyMatrix;
+
+	public SFDataset(DataPoint[] data, NumericDistance distance) {
+		this.data = data;
+		this.distance = distance;
+		this.adjacencyMatrix = calculateAdjacencyMatrix();
+	}
+
+	/**
+	 * Adjacency matrix for all data instances in the dataset. Each element
+	 * represents distance between corresponding elements.
+	 * 
+	 * @return
+	 */
+	private double[][] calculateAdjacencyMatrix() {
+		int n = data.length;
+		double[][] adjMatrix = new double[n][n];
+
+		DataPoint x = null;
+		DataPoint y = null;
+
+		for (int i = 0; i < n; i++) {
+			x = data[i];
+			for (int j = i + 1; j < n; j++) {
+				y = data[j];
+				adjMatrix[i][j] = distance.getDistance(
+						x.getNumericAttrValues(), y.getNumericAttrValues());
+				adjMatrix[j][i] = adjMatrix[i][j];
+			}
+			adjMatrix[i][i] = 0.0;
+		}
+
+		return adjMatrix;
+	}
+
+	// We might need to move Matrix related methods to separate class
+	// eventually.
+
+	public double[][] getAdjacencyMatrix() {
+		return adjacencyMatrix;
+	}
+
+	public DataPoint[] getData() {
+		return data;
+	}
+
+	public void printDistanceMatrix() {
+		for (int i = 0, n = adjacencyMatrix.length; i < n; i++) {
+			System.out.println(Arrays.toString(adjacencyMatrix[i]));
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/utils/Attributes.java b/src/org/yooreeka/algos/clustering/utils/Attributes.java
new file mode 100644
index 0000000..4234fd5
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/utils/Attributes.java
@@ -0,0 +1,143 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.utils;
+
+import org.yooreeka.algos.clustering.model.Attribute;
+
+/*
+ * Utility methods to simplify operations on attributes.
+ */
+public class Attributes {
+
+	public static boolean allNumeric(Attribute[] attributes) {
+		boolean allNumeric = true;
+		for (Attribute a : attributes) {
+			if (a.isNumeric() == false) {
+				allNumeric = false;
+				break;
+			}
+		}
+		return allNumeric;
+	}
+
+	public static boolean allText(Attribute[] attributes) {
+		boolean allText = true;
+		for (Attribute a : attributes) {
+			if (a.isText() == false) {
+				allText = false;
+				break;
+			}
+		}
+		return allText;
+	}
+
+	public static Attribute[] createAttributes(double[] attrValues) {
+		int n = attrValues.length;
+		Attribute[] attrs = new Attribute[n];
+		for (int i = 0; i < n; i++) {
+			String attrName = "a-" + i;
+			Attribute a = new Attribute(attrName, attrValues[i]);
+			attrs[i] = a;
+		}
+		return attrs;
+	}
+
+	public static Attribute[] createAttributes(String[] attrValues) {
+		int n = attrValues.length;
+		Attribute[] attrs = new Attribute[n];
+		for (int i = 0; i < n; i++) {
+			String attrName = "a-" + i;
+			Attribute a = new Attribute(attrName, attrValues[i]);
+			attrs[i] = a;
+		}
+		return attrs;
+	}
+
+	public static Attribute[] createAttributes(String[] names, double[] values) {
+		int n = names.length;
+		Attribute[] attributes = new Attribute[n];
+		for (int i = 0; i < n; i++) {
+			attributes[i] = new Attribute(names[i], values[i]);
+		}
+		return attributes;
+	}
+
+	public static Attribute[] createAttributes(String[] names, String[] values) {
+		int n = names.length;
+		Attribute[] attributes = new Attribute[n];
+		for (int i = 0; i < n; i++) {
+			attributes[i] = new Attribute(names[i], values[i]);
+		}
+		return attributes;
+	}
+
+	public static String[] getNames(Attribute[] attributes) {
+		int n = attributes.length;
+		String[] names = new String[n];
+		for (int i = 0; i < n; i++) {
+			Attribute a = attributes[i];
+			names[i] = a.getName();
+		}
+		return names;
+	}
+
+	public static double[] getNumericValues(Attribute[] attributes) {
+		int n = attributes.length;
+		double[] values = new double[n];
+		for (int i = 0; i < n; i++) {
+			Attribute a = attributes[i];
+			if (a.isNumeric()) {
+				values[i] = a.getNumericValue();
+			} else {
+				throw new RuntimeException(
+						"Non-numeric attribute encountered. " + "Attribute: "
+								+ a.toString());
+			}
+		}
+		return values;
+	}
+
+	public static String[] getTextValues(Attribute[] attributes) {
+		int n = attributes.length;
+		String[] values = new String[n];
+		for (int i = 0; i < n; i++) {
+			Attribute a = attributes[i];
+			if (a.isText()) {
+				values[i] = a.getTextValue();
+			} else {
+				throw new RuntimeException("Non-text attribute encountered. "
+						+ "Attribute: " + a.toString());
+			}
+		}
+		return values;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/clustering/utils/ObjectToIndexMapping.java b/src/org/yooreeka/algos/clustering/utils/ObjectToIndexMapping.java
new file mode 100644
index 0000000..1ff7a1f
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/utils/ObjectToIndexMapping.java
@@ -0,0 +1,90 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.utils;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Maps object values to an index. Index is zero-based.
+ */
+public class ObjectToIndexMapping<T> implements java.io.Serializable {
+
+	private static final long serialVersionUID = 2031098306406708902L;
+
+	/*
+	 * Index value that will be returned for the next new value.
+	 */
+	private int nextIndex = 0;
+
+	/*
+	 * Maintains mapping from object to index.
+	 */
+	private Map<T, Integer> objMapping = new HashMap<T, Integer>();
+
+	/*
+	 * Maintains mapping from index to value.
+	 */
+	private Map<Integer, T> indexMapping = new HashMap<Integer, T>();
+
+	public ObjectToIndexMapping() {
+		// empty
+	}
+
+	/**
+	 * Returns index assigned to the value. For new values new index will be
+	 * assigned and returned.
+	 */
+	public int getIndex(T value) {
+		Integer index = objMapping.get(value);
+		if (index == null) {
+			index = nextIndex;
+			objMapping.put(value, index);
+			indexMapping.put(index, value);
+			nextIndex++;
+		}
+		return index;
+	}
+
+	/**
+	 * Returns value mapped to the index or null if mapping doesn't exist.
+	 */
+	public T getObject(int index) {
+		return indexMapping.get(index);
+	}
+
+	/**
+	 * Current number of elements.
+	 */
+	public int getSize() {
+		return objMapping.size();
+	}
+}
diff --git a/src/org/yooreeka/algos/clustering/utils/SortedArrayClustering.java b/src/org/yooreeka/algos/clustering/utils/SortedArrayClustering.java
new file mode 100644
index 0000000..87be6e8
--- /dev/null
+++ b/src/org/yooreeka/algos/clustering/utils/SortedArrayClustering.java
@@ -0,0 +1,71 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.clustering.utils;
+
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.clustering.model.DataPoint;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class SortedArrayClustering {
+
+	private static final Logger LOG = Logger.getLogger(SortedArrayClustering.class.getName());
+
+	public static void cluster(DataPoint[] points) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(SortedArrayClustering.class.getName()));
+
+		Arrays.sort(points, new Comparator<DataPoint>() {
+			public int compare(DataPoint p1, DataPoint p2) {
+				int result = 0;
+				// sort based on score value
+				if (p1.getR() < p2.getR()) {
+					result = 1; // sorting in descending order
+				} else if (p1.getR() > p2.getR()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+
+		for (int i = 0; i < points.length; i++) {
+			System.out.println(points[i].toShortString());
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/cache/FileStore.java b/src/org/yooreeka/algos/reco/collab/cache/FileStore.java
new file mode 100644
index 0000000..74feb15
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/cache/FileStore.java
@@ -0,0 +1,134 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.cache;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.logging.Logger;
+
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * Implementation of <code>Store</code> interface. Uses files to store objects
+ * using java serialization. Each object instance is stored in a separate file.
+ */
+public class FileStore implements Store {
+
+	private static final Logger LOG = Logger.getLogger(FileStore.class.getName());
+
+	private File dataDir;
+
+	public FileStore(File dir) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(FileStore.class.getName()));
+
+		if (!dir.exists()) {
+			dir.mkdirs();
+		}
+		this.dataDir = dir;
+	}
+
+	/**
+	 * Creates a new instance that will use specified directory to store
+	 * objects.
+	 * 
+	 * @param dir
+	 *            directory that should be used to store/retrieve objects.
+	 */
+	public FileStore(String dir) {
+		this(new File(dir));
+	}
+
+	public boolean exists(String key) {
+		File f = getFile(key);
+		return f.exists();
+	}
+
+	public Object get(String key) {
+		Object o = null;
+		try {
+			File f = getFile(key);
+			if (f.exists()) {
+				FileInputStream fInStream = new FileInputStream(f);
+				BufferedInputStream bufInStream = new BufferedInputStream(
+						fInStream);
+				ObjectInputStream objInStream = new ObjectInputStream(
+						bufInStream);
+				o = objInStream.readObject();
+				objInStream.close();
+			}
+		} catch (Exception e) {
+			throw new RuntimeException(
+					"Error while loading data from file (dir: '" + dataDir
+							+ "', filename: '" + key + "').", e);
+		}
+		return o;
+	}
+
+	/*
+	 * Derives filename from the key and returns instance of <code>File</code>
+	 */
+	private File getFile(String key) {
+		// key is used as a filename
+		return new File(dataDir, key + ".tmp");
+	}
+
+	public void put(String key, Object o) {
+		try {
+			File f = getFile(key);
+			FileOutputStream foutStream = new FileOutputStream(f);
+			BufferedOutputStream boutStream = new BufferedOutputStream(
+					foutStream);
+			ObjectOutputStream objOutputStream = new ObjectOutputStream(
+					boutStream);
+			objOutputStream.writeObject(o);
+			objOutputStream.flush();
+			boutStream.close();
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Error while saving data into file (dir: '" + dataDir
+							+ "', filename: '" + key + "').", e);
+		}
+	}
+
+	public void remove(String key) {
+		File f = getFile(key);
+		if (f.exists()) {
+			f.delete();
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/cache/Store.java b/src/org/yooreeka/algos/reco/collab/cache/Store.java
new file mode 100644
index 0000000..fdecebd
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/cache/Store.java
@@ -0,0 +1,72 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.cache;
+
+/**
+ * A <code>Store</code> provides service for persisting pre-calculated data.
+ */
+public interface Store {
+	/**
+	 * Checks if key already exists.
+	 * 
+	 * @param key
+	 *            object id.
+	 * @return true if the key already exists.
+	 */
+	public boolean exists(String key);
+
+	/**
+	 * Retrieves object by key.
+	 * 
+	 * @param key
+	 *            identifies data to retrieve.
+	 * @return
+	 */
+	public Object get(String key);
+
+	/**
+	 * Persists object. Overwrites previously stored data with the same id.
+	 * 
+	 * @param key
+	 *            id to identify the object.
+	 * @param o
+	 *            object to be stored.
+	 */
+	public void put(String key, Object o);
+
+	/**
+	 * Deletes object.
+	 * 
+	 * @param key
+	 *            identifies object to retrieve.
+	 */
+	public void remove(String key);
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/BaseDataset.java b/src/org/yooreeka/algos/reco/collab/data/BaseDataset.java
new file mode 100644
index 0000000..e57ebac
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/BaseDataset.java
@@ -0,0 +1,431 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.User;
+
+/**
+ * Dataset implementation that we will use to work with sample data.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class BaseDataset implements Serializable, Dataset {
+
+	// private static final Logger logger = Logger.getLogger(BaseDataset.class);
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 8414181723065929475L;
+
+	public static BaseDataset load(String file) {
+		Object o = null;
+		File f = new File(file);
+		if (f.exists()) {
+			try {
+				FileInputStream fInStream = new FileInputStream(f);
+				BufferedInputStream bufInStream = new BufferedInputStream(
+						fInStream);
+				ObjectInputStream objInStream = new ObjectInputStream(
+						bufInStream);
+				o = objInStream.readObject();
+				objInStream.close();
+			} catch (Exception e) {
+				throw new RuntimeException(
+						"Error while loading data from file: '" + file + "'", e);
+			}
+		} else {
+			throw new IllegalArgumentException("File doesn't exist: '" + file
+					+ "'.");
+		}
+		System.out.println("loaded dataset from file");
+		return (BaseDataset) o;
+	}
+
+	public static void save(String file, BaseDataset o) {
+		try {
+			File f = new File(file);
+			FileOutputStream foutStream = new FileOutputStream(f);
+			BufferedOutputStream boutStream = new BufferedOutputStream(
+					foutStream);
+			ObjectOutputStream objOutputStream = new ObjectOutputStream(
+					boutStream);
+			objOutputStream.writeObject(o);
+			objOutputStream.flush();
+			boutStream.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Error while saving data into file: '"
+					+ file + "'", e);
+		}
+	}
+
+	/*
+	 * Dataset name
+	 */
+	private String name = getClass().getSimpleName()
+			+ System.currentTimeMillis();
+
+	/*
+	 * All item ratings.
+	 */
+	private List<Rating> allRatings = new ArrayList<Rating>();
+
+	/*
+	 * Map of all users.
+	 */
+	private Map<Integer, User> allUsers = new HashMap<Integer, User>();
+
+	/*
+	 * Map of all items.
+	 */
+	private Map<Integer, Item> allItems = new HashMap<Integer, Item>();
+
+	/*
+	 * Map of item ratings by user id.
+	 */
+	Map<Integer, List<Rating>> ratingsByUserId = new HashMap<Integer, List<Rating>>();
+
+	Set<String> allTermsSet = new HashSet<String>();
+
+	/**
+	 * Auxiliary method for loading users one by one. This is for demonstration
+	 * purposes. Use other kind of loaders for loading data en mass.
+	 * 
+	 * @param u
+	 *            denotes a User who has rated certain items and we want to add
+	 *            his ratings in this dataset
+	 * @return true if no errors occurred and all data have been added.
+	 *         Otherwise, return false but do add whatever we can.
+	 */
+	public boolean add(User u) {
+
+		boolean addedUser = true;
+
+		// Auxiliary
+		Item item;
+
+		// Add the ratings
+		Collection<Rating> urc = u.getAllRatings();
+		Rating[] uRatings = urc.toArray(new Rating[urc.size()]);
+
+		// Add the user
+		if (!allUsers.containsKey(u.getId())) {
+			this.allUsers.put(u.getId(), u);
+
+			for (Content content : u.getUserContent()) {
+				updateTerms(content.getTerms());
+			}
+		}
+
+		for (Rating r : uRatings) {
+			if (!this.allRatings.add(r)) {
+				System.out.println("________________________________");
+				System.out.println("ERROR >> Could not add rating! ");
+				System.out.println("      >> User ID: " + r.getUserId());
+				System.out.println("      >> Item ID: " + r.getItemId());
+				System.out.println("      >> Rating : " + r.getRating());
+				System.out.println("________________________________");
+
+				addedUser = false;
+			}
+
+			item = r.getItem();
+
+			/*
+			 * Reuse existing item if it is available. Existing item contains
+			 * ratings of previously added users and we don't want to overwrite
+			 * them in case new item is a different instance with the same id.
+			 */
+			if (!allItems.containsKey(item.getId())) {
+				this.allItems.put(item.getId(), item);
+			}
+
+			// Populate item ratings if item doesn't have them
+			// Note that here we rely on all users/ratings sharing the same
+			// instance of an item.
+			if (item.getUserRating(u.getId()) == null) {
+				item.addUserRating(r);
+			}
+		}
+
+		return addedUser;
+	}
+
+	/*
+	 * Auxiliary method for loading items one by one. This is for demonstration
+	 * purposes. Can be used when we want to link users and items using item
+	 * content instead of rating. In such cases ratings won't be available and
+	 * as a result <code>add(User)</code> won't be able to derive any Items
+	 * through user ratings.
+	 */
+	public boolean addItem(Item item) {
+		boolean addedItem = false;
+		if (!allItems.containsKey(item.getId())) {
+			this.allItems.put(item.getId(), item);
+			addedItem = true;
+
+			Content content = item.getItemContent();
+			updateTerms(content.getTerms());
+		}
+		return addedItem;
+	}
+
+	public Item findItemByName(String name) {
+		Item matchedItem = null;
+		for (Item item : this.allItems.values()) {
+			if (name.equalsIgnoreCase(item.getName())) {
+				matchedItem = item;
+				break;
+			}
+		}
+		return matchedItem;
+
+	}
+
+	public User findUserByName(String name) {
+		User matchedUser = null;
+		for (User user : this.allUsers.values()) {
+			if (name.equalsIgnoreCase(user.getName())) {
+				matchedUser = user;
+				break;
+			}
+		}
+		return matchedUser;
+	}
+
+	public String[] getAllTerms() {
+		return allTermsSet.toArray(new String[allTermsSet.size()]);
+	}
+
+	public double getAverageItemRating(int itemId) {
+		return getItem(itemId).getAverageRating();
+	}
+
+	public double getAverageUserRating(int userId) {
+		return getUser(userId).getAverageRating();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getItem(java.lang.Integer)
+	 */
+	public Item getItem(Integer itemId) {
+		return allItems.get(itemId);
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getItemCount()
+	 */
+	public int getItemCount() {
+		return allItems.size();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getItems()
+	 */
+	public Collection<Item> getItems() {
+		return allItems.values();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getName()
+	 */
+	public String getName() {
+		return name;
+	}
+
+	public List<Item> getRatedItems(Integer userId) {
+		List<Item> ratedItems = new ArrayList<Item>();
+		User user = getUser(userId);
+		Collection<Rating> userRatings = user.getAllRatings();
+		for (Rating r : userRatings) {
+			Item ratedItem = getItem(r.getItemId());
+			ratedItems.add(ratedItem);
+		}
+		return ratedItems;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getRatings()
+	 */
+	public Collection<Rating> getRatings() {
+		return this.allRatings;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getRatingsCount()
+	 */
+	public int getRatingsCount() {
+		return allRatings.size();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getUser(java.lang.Integer)
+	 */
+	public User getUser(Integer userId) {
+		return allUsers.get(userId);
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getUserCount()
+	 */
+	public int getUserCount() {
+		return allUsers.size();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch3.collaborative.model.Dataset#getUsers()
+	 */
+	public Collection<User> getUsers() {
+		return allUsers.values();
+	}
+
+	public boolean isIdMappingRequired() {
+		return true;
+	}
+
+	public ContentItem pickContentItem(String name) {
+		ContentItem contentItem = null;
+
+		for (Map.Entry<Integer, Item> entry : allItems.entrySet()) {
+			Item anItem = entry.getValue();
+			if (name.equals(anItem.getName())) {
+				contentItem = new ContentItem(entry.getValue());
+				break;
+			}
+		}
+		return contentItem;
+	}
+
+	public Item pickItem(String name) {
+		Item item = null;
+		for (Map.Entry<Integer, Item> entry : allItems.entrySet()) {
+			Item anItem = entry.getValue();
+			if (name.equals(anItem.getName())) {
+				item = entry.getValue();
+				break;
+			}
+		}
+		return item;
+	}
+
+	public User pickUser(String name) {
+		User user = null;
+		for (Map.Entry<Integer, User> entry : allUsers.entrySet()) {
+			User aUser = entry.getValue();
+			if (name.equals(aUser.getName())) {
+				user = entry.getValue();
+				break;
+			}
+		}
+		return user;
+	}
+
+	/**
+	 * Prints all ratings by item.
+	 */
+	public void printItemRatings() {
+		System.out.println("\nItem ratings:\n");
+		for (Item item : allItems.values()) {
+			System.out.println("Item: " + item.getName());
+			for (Rating r : item.getAllRatings()) {
+				User user = this.allUsers.get(r.getUserId());
+				System.out.println(" Rated by " + user.getName() + " as "
+						+ r.getRating());
+			}
+		}
+	}
+
+	/**
+	 * Prints all ratings by item.
+	 */
+	public void printUserRatings() {
+		System.out.println("\nUser ratings:\n");
+		for (User user : allUsers.values()) {
+			System.out.println("User: " + user.getName());
+			for (Rating r : user.getAllRatings()) {
+				Item item = allItems.get(r.getItemId());
+				System.out.println(" Rated " + item.getName() + " as "
+						+ r.getRating());
+			}
+		}
+	}
+
+	public void save(String file) {
+		BaseDataset.save(file, this);
+		System.out.println("saved dataset into file");
+	}
+
+	private void updateTerms(String[] terms) {
+		for (String term : terms) {
+			allTermsSet.add(term);
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/ContentItem.java b/src/org/yooreeka/algos/reco/collab/data/ContentItem.java
new file mode 100644
index 0000000..d754247
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/ContentItem.java
@@ -0,0 +1,59 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.util.ArrayList;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+/**
+ * Item for news dataset.
+ */
+public class ContentItem extends Item {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 6349342365379966975L;
+
+	public ContentItem(int id, String name, Content content) {
+		super(id, name, new ArrayList<Rating>(3));
+		setItemContent(content);
+	}
+
+	public ContentItem(Item val) {
+		super(val.getId(), val.getName(), new ArrayList<Rating>(3));
+		this.setItemContent(val.getItemContent());
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/DiggData.java b/src/org/yooreeka/algos/reco/collab/data/DiggData.java
new file mode 100644
index 0000000..7da7fad
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/DiggData.java
@@ -0,0 +1,361 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import org.supercsv.io.CsvListReader;
+import org.supercsv.io.CsvListWriter;
+import org.supercsv.prefs.CsvPreference;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.RecommendationType;
+import org.yooreeka.algos.reco.collab.model.SimilarItem;
+import org.yooreeka.algos.reco.collab.recommender.Delphi;
+import org.yooreeka.algos.reco.content.digg.DiggService;
+import org.yooreeka.algos.reco.content.digg.DiggStoryItem;
+import org.yooreeka.algos.reco.content.digg.DiggUser;
+
+public class DiggData {
+
+	public static List<DiggUser> allUsers = new ArrayList<DiggUser>();
+	public static List<DiggStoryItem> allStories = new ArrayList<DiggStoryItem>();
+
+	private static final String[] CSV_ITEM_HEADERS = new String[] { "id",
+			"username", "title", "category", "topic", "description", "link",
+			"userid" };
+
+	public static BaseDataset createDataset() {
+
+		BaseDataset ds = new BaseDataset();
+
+		Delphi delphiIC = createItemContentDelphi();
+		int topN = 10;
+		for (DiggUser user : allUsers) {
+			List<DiggStoryItem> userItems = findItemsByUsername(user.getName());
+			for (DiggStoryItem item : userItems) {
+
+				// similar items across all categories
+				SimilarItem[] similarItems = delphiIC.findSimilarItems(item,
+						topN);
+
+				// Create a set of biased ratings for user using a subset from
+				// similar items
+				int lowRating = 0;
+				int highRating = 0;
+				if (user.getName().toLowerCase().charAt(0) <= 'd') {
+					// range of ratings for users whose name starts from A to D
+					lowRating = 4;
+					highRating = 5;
+				} else {
+					// range of ratings for users whose name starts from E to Z
+					lowRating = 1;
+					highRating = 3;
+				}
+
+				// select 70% of similar items
+				Item[] randomItems = pickRandomItems(similarItems, 0.7);
+				RatingBuilder ratingBuider = new RatingBuilder();
+				List<Rating> ratings = ratingBuider.createBiasedRatings(
+						user.getId(), randomItems, lowRating, highRating);
+				for (Rating r : ratings) {
+					user.addRating(r);
+				}
+			}
+			ds.add(user);
+			System.out.println("Generated " + user.getAllRatings().size()
+					+ " ratings for user id: " + user.getId() + ", name: "
+					+ user.getName() + ", average rating: "
+					+ user.getAverageRating());
+		}
+
+		System.out.println("Created Dataset with " + ds.getUserCount()
+				+ " users, " + ds.getItemCount() + " items, "
+				+ ds.getRatingsCount() + " ratings.");
+
+		return ds;
+	}
+
+	private static Delphi createItemContentDelphi() {
+		BaseDataset ds = new BaseDataset();
+		for (DiggUser user : allUsers) {
+			ds.add(user);
+		}
+
+		for (DiggStoryItem item : allStories) {
+			System.out.println("Description:" + item.getDescription());
+			ds.addItem(item);
+		}
+
+		return new Delphi(ds, RecommendationType.ITEM_CONTENT_BASED, true);
+	}
+
+	private static List<DiggStoryItem> findItemsByUsername(String username) {
+		List<DiggStoryItem> items = new ArrayList<DiggStoryItem>();
+		for (DiggStoryItem item : allStories) {
+			if (item.getUsername().equals(username)) {
+				items.add(item);
+			}
+		}
+		return items;
+	}
+
+	private static DiggUser findUserByUsername(String username) {
+		DiggUser matchedUser = null;
+		for (DiggUser u : allUsers) {
+			if (u.getName().equals(username)) {
+				matchedUser = u;
+				break;
+			}
+		}
+		return matchedUser;
+	}
+
+	/**
+	 * Load data from csv file.
+	 * 
+	 * @param filename
+	 */
+	public static BaseDataset loadData(String filename) {
+
+		allStories = new ArrayList<DiggStoryItem>();
+		allUsers = new ArrayList<DiggUser>();
+
+		CsvListReader csvReader = null;
+		try {
+			csvReader = new CsvListReader(new BufferedReader(new FileReader(
+					filename)), CsvPreference.EXCEL_PREFERENCE);
+
+			csvReader.getCSVHeader(true);
+
+			List<String> line = null;
+			while ((line = csvReader.read()) != null) {
+				try {
+					int id = Integer.valueOf(line.get(0));
+					String username = line.get(1);
+					String title = line.get(2);
+					String category = line.get(3);
+					String topic = line.get(4);
+					String description = line.get(5);
+					String link = line.get(6);
+					int userid = Integer.valueOf(line.get(7));
+
+					DiggUser user = findUserByUsername(username);
+					if (user == null) {
+						user = new DiggUser(userid, username);
+						allUsers.add(user);
+					}
+
+					DiggStoryItem item = new DiggStoryItem(id, title,
+							description);
+					item.setUsername(username);
+					item.setCategory(category);
+					item.setTopic(topic);
+					item.setLink(link);
+					allStories.add(item);
+
+					// adding item content to the user
+					user.addUserContent(item.getItemContent());
+				} catch (Exception e) {
+					throw new RuntimeException("Error while reading item: ", e);
+				}
+			}
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Error while reading digg items from csv file.", e);
+		} finally {
+			try {
+				if (csvReader != null) {
+					csvReader.close();
+				}
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+
+		System.out.println("From file: " + filename);
+		System.out.println("Loaded " + allUsers.size() + " users.");
+		System.out.println("Loaded " + allStories.size() + " stories (items).");
+
+		return DiggData.createDataset();
+	}
+
+	/*
+	 * Loading data from Digg.
+	 * 
+	 * @param filename file that will be used to save the data.
+	 */
+	public static BaseDataset loadDataFromDigg(String filename) {
+
+		allUsers.clear();
+		allStories.clear();
+
+		Set<String> allKnownUsers = new HashSet<String>();
+		Set<Integer> allKnownStories = new HashSet<Integer>();
+
+		DiggService news = new DiggService();
+		news.setItemCountPerCategory(5);
+		// Top stories across a set of categories (Technology, Sports, ...)
+		List<DiggStoryItem> topStories = news.getAllStories();
+
+		// used to assign unique id to each user
+		int nextUserId = 1;
+
+		// iterate through top stories and collect a set of users
+		for (DiggStoryItem item : topStories) {
+			String username = item.getUsername();
+			if (!allKnownUsers.contains(username)) {
+				allKnownUsers.add(username);
+				int userId = nextUserId++;
+				DiggUser diggUser = new DiggUser(userId, username);
+				allUsers.add(diggUser);
+			}
+		}
+
+		// for every user retrieve up to 5 stories
+		int maxStories = 5;
+		for (DiggUser user : allUsers) {
+			List<DiggStoryItem> userItems = news.getUserStories(user.getName(),
+					maxStories);
+
+			for (DiggStoryItem i : userItems) {
+				if (!allKnownStories.contains(i.getId())) {
+					allStories.add(i);
+					allKnownStories.add(i.getId());
+				} else {
+					System.out.println("Duplicate story: id=" + i.getId()
+							+ ", name=" + i.getName());
+				}
+				// adding item content to the user
+				user.addUserContent(i.getItemContent());
+			}
+		}
+		System.out.println("From Digg:");
+		System.out.println("Loaded " + allUsers.size() + " users.");
+		System.out.println("Loaded " + allStories.size() + " stories (items).");
+
+		DiggData.saveData(filename);
+		return DiggData.createDataset();
+	}
+
+	private static Item[] pickRandomItems(SimilarItem[] items,
+			double percentOfAllItems) {
+
+		if (percentOfAllItems < 0.0 || percentOfAllItems > 1.0) {
+			throw new IllegalArgumentException(
+					"Value for 'percentOfAllItems' argument should be between 0 and 1.");
+		}
+		Random rand = new Random();
+		int sampleSize = (int) Math.round(percentOfAllItems * items.length);
+		Map<Integer, Item> pickedItems = new HashMap<Integer, Item>();
+		while (pickedItems.size() < sampleSize) {
+			int itemId = rand.nextInt(items.length);
+			Item item = items[itemId].getItem();
+			if (!pickedItems.containsKey(item.getId())) {
+				pickedItems.put(item.getId(), item);
+			}
+		}
+
+		return pickedItems.values().toArray(new Item[pickedItems.size()]);
+	}
+
+	/**
+	 * Save data into csv file.
+	 * 
+	 * @param filename
+	 */
+	public static void saveData(String filename) {
+		String[] data = new String[CSV_ITEM_HEADERS.length];
+
+		CsvListWriter csvWriter = null;
+		try {
+			csvWriter = new CsvListWriter(new BufferedWriter(new FileWriter(
+					filename)), CsvPreference.EXCEL_PREFERENCE);
+
+			csvWriter.writeHeader(CSV_ITEM_HEADERS);
+
+			for (DiggStoryItem item : allStories) {
+				try {
+					data[0] = String.valueOf(item.getId());
+					data[1] = item.getUsername();
+					data[2] = item.getTitle();
+					data[3] = item.getCategory();
+					data[4] = item.getTopic();
+					data[5] = item.getDescription();
+					data[6] = item.getLink();
+					DiggUser user = findUserByUsername(item.getUsername());
+					data[7] = String.valueOf(user.getId());
+					csvWriter.write(data);
+				} catch (Exception e) {
+					throw new RuntimeException("Error while writing item "
+							+ item.getName() + ": ", e);
+				}
+			}
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Error while writing digg items into csv file.", e);
+		} finally {
+			try {
+				if (csvWriter != null) {
+					csvWriter.close();
+				}
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+
+		System.out.println("Saved data into file: " + filename);
+		System.out.println("saved " + allUsers.size() + " users.");
+		System.out.println("saved " + allStories.size() + " stories (items).");
+
+	}
+
+	public static void showUsers() {
+		System.out.println("All Users:");
+		for (DiggUser user : allUsers) {
+			System.out.println("User id:" + user.getId() + ", name: "
+					+ user.getName());
+		}
+
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/HTMLContent.java b/src/org/yooreeka/algos/reco/collab/data/HTMLContent.java
new file mode 100644
index 0000000..479e35d
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/HTMLContent.java
@@ -0,0 +1,99 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.util.parsing.html.HTMLDocumentParser;
+import org.yooreeka.util.parsing.html.HTMLDocumentParserException;
+
+public class HTMLContent extends Content {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = -354667863913509004L;
+
+	private static String extractContentFromHtmlDoc(File htmlFile) {
+
+		String htmlText = null;
+		FileInputStream fis = null;
+
+		try {
+			fis = new FileInputStream(htmlFile);
+			Reader reader = new InputStreamReader(new BufferedInputStream(fis));
+			HTMLDocumentParser htmlParser = new HTMLDocumentParser(reader);
+
+			htmlText = htmlParser.getHtmlDoc().getText();
+
+		} catch (IOException e) {
+
+			throw new RuntimeException(e);
+
+		} catch (HTMLDocumentParserException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} finally {
+			if (fis != null) {
+				try {
+					fis.close();
+				} catch (IOException e) {
+					e.printStackTrace();
+				}
+			}
+		}
+		return htmlText;
+	}
+
+	public HTMLContent(String id, File htmlDocFile) {
+		super(id, extractContentFromHtmlDoc(htmlDocFile));
+	}
+
+	public HTMLContent(String id, File htmlDocFile, int topNTerms) {
+		super(id, extractContentFromHtmlDoc(htmlDocFile), topNTerms);
+	}
+
+	public HTMLContent(String id, String htmlDocFilename) {
+		super(id, extractContentFromHtmlDoc(new File(htmlDocFilename)));
+	}
+
+	public HTMLContent(String id, String htmlDocFilename, int topNTerms) {
+		super(id, extractContentFromHtmlDoc(new File(htmlDocFilename)),
+				topNTerms);
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/MovieLensData.java b/src/org/yooreeka/algos/reco/collab/data/MovieLensData.java
new file mode 100644
index 0000000..6ad655c
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/MovieLensData.java
@@ -0,0 +1,83 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.io.File;
+
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * Utility class to create MovieLens dataset.
+ */
+public class MovieLensData {
+
+	/**
+	 * Loads MovieLens dataset from default directory.
+	 */
+	public static MovieLensDataset createDataset() {
+		int numOfTestRatings = 0;
+		return createDataset(numOfTestRatings);
+	}
+
+	public static MovieLensDataset createDataset(int numOfTestRatings) {
+		return createDataset(
+				YooreekaConfigurator.getProperty("iweb2.movielens.data.dir"),
+				numOfTestRatings);
+	}
+
+	/**
+	 * Loads MovieLens dataset from specified directory.
+	 * 
+	 * @param dataDir
+	 *            directory that contains MovieLens files.
+	 * @return
+	 */
+	public static MovieLensDataset createDataset(String dataDir,
+			int numOfTestRatings) {
+		File users = new File(dataDir, MovieLensDataset.USERS_FILENAME);
+		File items = new File(dataDir, MovieLensDataset.ITEMS_FILENAME);
+		File ratings = new File(dataDir, MovieLensDataset.RATINGS_FILENAME);
+
+		System.out.println("*** Loading MovieLens dataset...");
+		System.out.println("make sure that you are using at least: -Xmx1024m");
+
+		MovieLensDataset dataSet = new MovieLensDataset(users, items, ratings,
+				numOfTestRatings);
+
+		System.out.println("\n*** Loaded MovieLens dataset.");
+		System.out.println("users: " + dataSet.getUserCount());
+		System.out.println("movies: " + dataSet.getItemCount());
+		System.out.println("ratings: " + dataSet.getRatingsCount());
+		System.out.println("test ratings: " + dataSet.getTestRatings().size());
+
+		return dataSet;
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/MovieLensDataset.java b/src/org/yooreeka/algos/reco/collab/data/MovieLensDataset.java
new file mode 100644
index 0000000..74af7cf
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/MovieLensDataset.java
@@ -0,0 +1,385 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.User;
+
+/**
+ * Dataset implementation that we will use to work with MovieLens data. All data
+ * is loaded from three files: users, movies (items), and ratings.
+ */
+public class MovieLensDataset implements Dataset {
+
+	public static final String USERS_FILENAME = "users.dat";
+	public static final String ITEMS_FILENAME = "movies.dat";
+	public static final String RATINGS_FILENAME = "ratings.dat";
+
+	/*
+	 * Delimiter that is used by MovieLens data files.
+	 */
+	private static final String FIELD_DELIMITER = "::";
+
+	/**
+	 * Saves provided ratings into a new file. Used to split ratings provided as
+	 * part of MovieLens data set into files that represent various rating sets
+	 * for training and testing.
+	 * 
+	 * @param f
+	 *            file to write to.
+	 * @param ratings
+	 *            ratings to save.
+	 */
+	public static void createNewRatingsFile(File f, Collection<Rating> ratings) {
+		try {
+			PrintWriter pw = new PrintWriter(new BufferedWriter(new FileWriter(
+					f)));
+			for (Rating rating : ratings) {
+				pw.println(rating.getUserId() + FIELD_DELIMITER
+						+ rating.getItemId() + FIELD_DELIMITER
+						+ rating.getRating());
+			}
+			pw.flush();
+			pw.close();
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Failed to save rating into file (file: '"
+							+ f.getAbsolutePath() + "').", e);
+		}
+	}
+
+	private static BufferedReader getReader(File f)
+			throws FileNotFoundException {
+		return new BufferedReader(new FileReader(f));
+	}
+
+	public static List<Rating> loadRatings(File f) {
+		List<Rating> allRatings = new ArrayList<Rating>();
+
+		BufferedReader reader = null;
+		String line = null;
+		try {
+			reader = getReader(f);
+			while ((line = reader.readLine()) != null) {
+				String[] tokens = parseLine(line);
+				int userId = Integer.parseInt(tokens[0]);
+				int itemId = Integer.parseInt(tokens[1]);
+				int rating = Integer.parseInt(tokens[2]);
+				allRatings.add(new Rating(userId, itemId, rating));
+			}
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Failed to load rating from file (file: '"
+							+ f.getAbsolutePath() + "'): ", e);
+		} finally {
+			if (reader != null) {
+				try {
+					reader.close();
+				} catch (Exception e) {
+					System.out.println("ERROR: \n");
+					System.out.println(e.getMessage()
+							+ "\n while closing file reader (file: '"
+							+ f.getAbsolutePath() + "'): ");
+				}
+			}
+		}
+
+		return allRatings;
+	}
+
+	private static String[] parseLine(String line) {
+		// possible field delimiters: "::", "\t", "|"
+		return line.split("::|\t|\\|");
+	}
+	/*
+	 * All item ratings.
+	 */
+	private List<Rating> allRatings = new ArrayList<Rating>();
+
+	/*
+	 * Map of all users.
+	 */
+	private Map<Integer, User> allUsers = new HashMap<Integer, User>();
+
+	/*
+	 * Map of all items.
+	 */
+	private Map<Integer, Item> allItems = new HashMap<Integer, Item>();
+
+	/*
+	 * Parameters for test dataset
+	 */
+	private int numberOfTestRatings = 0;
+
+	private List<Rating> testRatings = new ArrayList<Rating>();
+
+	/*
+	 * Map of item ratings by item id.
+	 */
+	private Map<Integer, List<Rating>> ratingsByItemId = new HashMap<Integer, List<Rating>>();
+
+	/*
+	 * Map of item ratings by user id.
+	 */
+	Map<Integer, List<Rating>> ratingsByUserId = new HashMap<Integer, List<Rating>>();
+
+	private String name;
+
+	public MovieLensDataset(File users, File movies, File ratings) {
+		name = getClass().getSimpleName() + System.currentTimeMillis();
+		loadData(users, movies, ratings, null);
+	}
+
+	public MovieLensDataset(File users, File movies, File ratings,
+			int numOfTestRatings) {
+		name = getClass().getSimpleName() + System.currentTimeMillis();
+		this.numberOfTestRatings = numOfTestRatings;
+		loadData(users, movies, ratings, null);
+	}
+
+	public MovieLensDataset(String name, File users, File movies, File ratings) {
+
+		this.name = name;
+		loadData(users, movies, ratings, null);
+	}
+
+	public MovieLensDataset(String name, File users, File items,
+			List<Rating> ratings) {
+
+		this.name = name;
+		loadData(users, items, null, ratings);
+	}
+
+	private void addRatingToMap(Map<Integer, List<Rating>> map, Integer key,
+			Rating rating) {
+		List<Rating> ratingsForKey = map.get(key);
+		if (ratingsForKey == null) {
+			ratingsForKey = new ArrayList<Rating>();
+			map.put(key, ratingsForKey);
+		}
+		ratingsForKey.add(rating);
+	}
+
+	private Item createNewItem(int itemId, String name) {
+		List<Rating> ratings = ratingsByItemId.get(itemId);
+		if (ratings == null) {
+			ratings = new ArrayList<Rating>();
+		}
+
+		Item item = new Item(itemId, name, ratings);
+
+		// establish link between rating and item
+		for (Rating r : ratings) {
+			r.setItem(item);
+		}
+
+		return item;
+	}
+
+	public String[] getAllTerms() {
+		return new String[0];
+	}
+
+	public double getAverageItemRating(int itemId) {
+		return getItem(itemId).getAverageRating();
+	}
+
+	public double getAverageUserRating(int userId) {
+		return getUser(userId).getAverageRating();
+	}
+
+	public Item getItem(Integer itemId) {
+		return allItems.get(itemId);
+	}
+
+	public int getItemCount() {
+		return allItems.size();
+	}
+
+	public Collection<Item> getItems() {
+		return allItems.values();
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public Collection<Rating> getRatings() {
+		return this.allRatings;
+	}
+
+	public int getRatingsCount() {
+		return allRatings.size();
+	}
+
+	public Collection<Rating> getTestRatings() {
+		return this.testRatings;
+	}
+
+	public User getUser(Integer userId) {
+		return allUsers.get(userId);
+	}
+
+	public int getUserCount() {
+		return allUsers.size();
+	}
+
+	public Collection<User> getUsers() {
+		return allUsers.values();
+	}
+
+	public boolean isIdMappingRequired() {
+		return false;
+	}
+
+	private void loadData(File usersFile, File itemsFile, File ratingsFile,
+			List<Rating> ratings) {
+		try {
+			/* Load all available ratings */
+			if (ratings == null) {
+				allRatings = loadRatings(ratingsFile);
+			} else {
+				allRatings = ratings;
+			}
+
+			/* Exclude ratings if needed */
+			withholdRatings();
+
+			/* build maps that provide access to ratings by userId or itemId */
+			for (Rating rating : allRatings) {
+				addRatingToMap(ratingsByItemId, rating.getItemId(), rating);
+				addRatingToMap(ratingsByUserId, rating.getUserId(), rating);
+			}
+			/*
+			 * load users and item. Each instance will have a set of ratings
+			 * relevant to it
+			 */
+			allUsers = loadUsers(usersFile);
+			allItems = loadItems(itemsFile);
+		} catch (IOException e) {
+			throw new RuntimeException("Failed to load MovieLens data: ", e);
+		}
+	}
+
+	private Map<Integer, Item> loadItems(File moviesFile) throws IOException {
+
+		Map<Integer, Item> items = new HashMap<Integer, Item>();
+
+		BufferedReader reader = getReader(moviesFile);
+		String line = null;
+		int lastId = 0;
+		while ((line = reader.readLine()) != null) {
+
+			String[] tokens = parseLine(line);
+
+			/* at the moment we are only interested in movie id */
+			int itemId = Integer.parseInt(tokens[0]);
+			String title = tokens[1];
+
+			/*
+			 * In some cases we need to create items for missing ids. Movies
+			 * file from MovieLens dataset skips over some ids. To keep things
+			 * simple we made assumption that user and movie (item) ids are
+			 * sequences without gaps that start with 1.
+			 */
+			if (itemId > lastId + 1) {
+
+				for (int i = lastId + 1; i < itemId; i++) {
+					// System.out.println("DEBUG:\n");
+					// System.out.println("Movies file has a gap in ID sequence. ");
+					// System.out.println("Creating artificial item for ID: " +
+					// i);
+
+					Item missingItem = createNewItem(i, "Missing-Item-" + i);
+					items.put(missingItem.getId(), missingItem);
+				}
+			}
+
+			Item item = createNewItem(itemId, title);
+
+			items.put(item.getId(), item);
+			lastId = item.getId();
+		}
+		return items;
+	}
+
+	private Map<Integer, User> loadUsers(File usersFile) throws IOException {
+		Map<Integer, User> users = new HashMap<Integer, User>();
+
+		BufferedReader reader = getReader(usersFile);
+		String line = null;
+
+		while ((line = reader.readLine()) != null) {
+			String[] tokens = parseLine(line);
+			/* at the moment we are only interested in user id */
+			int userId = Integer.parseInt(tokens[0]);
+			List<Rating> userRatings = ratingsByUserId.get(userId);
+			if (userRatings == null) {
+				userRatings = new ArrayList<Rating>();
+			}
+			User user = new User(userId, userRatings);
+			users.put(user.getId(), user);
+		}
+
+		return users;
+	}
+
+	public void setTestRatingsCount(int numberOfRatings) {
+		this.numberOfTestRatings = numberOfRatings;
+	}
+
+	private void withholdRatings() {
+		Random rnd = new Random();
+		while (testRatings.size() < this.numberOfTestRatings) {
+			int randomIndex = rnd.nextInt(allRatings.size());
+			Rating rating = allRatings.remove(randomIndex);
+			testRatings.add(rating);
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/MusicData.java b/src/org/yooreeka/algos/reco/collab/data/MusicData.java
new file mode 100644
index 0000000..d3682a8
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/MusicData.java
@@ -0,0 +1,256 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+/**
+ * Utility class that we use as the source for Music data.
+ */
+public class MusicData {
+
+	public static final String[] USERS = { "Albert", "Alexandra", "Athena",
+			"Aurora", "Babis", "Bill", "Bob", "Carl", "Catherine", "Charlie",
+			"Constantine", "Dmitry", "Elena", "Eric", "Frank", "George",
+			"Jack", "John", "Maria", "Lukas", "Nick", "Terry", "Todd" };
+
+	public static final String[] MUSIC_SAMPLES = {
+			"You've Lost That Lovin' Feelin'", "Mrs. Robinson",
+			"Wind Beneath My Wings", "Fiddler On The Roof", "La Bamba",
+			"Wizard Of Oz", "White Christmas", "Let It Be", "Yesterday",
+			"Singing In The Rain", "Sunday, Bloody Sunday", "Tears In Heaven",
+			"Beethoven: Symphony No. 9 in D minor",
+			"Bach: The Brandenburg Concerti", "Mozart: Symphony #41 (Jupiter)",
+			"What A Wonderful World", "I Love Rock And Roll",
+			"Albinoni: Adagio In G Minor", "Vivaldi: Four Seasons" };
+
+	/**
+	 * Builds data set with all the users where each user rates 80% of all the
+	 * songs. User ratings created randomly with bias:
+	 * <ul>
+	 * <li>Users whose name starts from A to D will have ratings between 3 and
+	 * 5.</li>
+	 * <li>Users whose name starts from E to Z will have ratings between 1 and
+	 * 3.</li>
+	 * </ul>
+	 */
+	public static BaseDataset createDataset() {
+		BaseDataset ds = new BaseDataset();
+
+		double percentOfAllSongs = 0.80;
+
+		/* Create items first */
+		MusicItem[] allItems = loadAllMusicItems();
+
+		for (int i = 0, n = USERS.length; i < n; i++) {
+			int userId = i;
+			String userName = USERS[i];
+			int lowRating = 1;
+			int highRating = 5;
+
+			if (userName.toLowerCase().charAt(0) <= 'd') {
+				// range of ratings for users whose name starts from A to D
+				lowRating = 4;
+				highRating = 5;
+			} else {
+				// range of ratings for users whose name starts from E to Z
+				lowRating = 1;
+				highRating = 3;
+			}
+			MusicItem[] items = pickRandomSongs(allItems, percentOfAllSongs);
+
+			RatingBuilder ratingBuider = new RatingBuilder();
+			List<Rating> ratings = ratingBuider.createBiasedRatings(userId,
+					items, lowRating, highRating);
+
+			MusicUser mu = new MusicUser(userId, userName, ratings);
+
+			ds.add(mu);
+		}
+		return ds;
+	}
+
+	private static MusicItem createItem(String song) {
+		int id = -1;
+		for (int i = 0, n = MUSIC_SAMPLES.length; i < n; i++) {
+			if (MUSIC_SAMPLES[i].equalsIgnoreCase(song)) {
+				id = i;
+				break;
+			}
+		}
+		if (id < 0) {
+			throw new IllegalArgumentException("Invalid song name: '" + song
+					+ "'. This song is not on the list of predefined songs.");
+		}
+
+		return new MusicItem(id, MUSIC_SAMPLES[id]);
+	}
+
+	private static MusicUser createUser(String name) {
+		int id = -1;
+		for (int i = 0, n = USERS.length; i < n; i++) {
+			if (USERS[i].equalsIgnoreCase(name)) {
+				id = i;
+				break;
+			}
+		}
+		if (id < 0) {
+			throw new IllegalArgumentException("Invalid user name: '" + name
+					+ "'. Name is not on the list of predefined user names.");
+		}
+
+		return new MusicUser(id, name);
+	}
+
+	/**
+	 * Returns array of new MusicItem instances for every songs listed in
+	 * <code>MUSIC_SAMPLES</code> array.
+	 */
+	private static MusicItem[] loadAllMusicItems() {
+		MusicItem[] allItems = new MusicItem[MusicData.MUSIC_SAMPLES.length];
+		for (int i = 0, n = allItems.length; i < n; i++) {
+			int id = i;
+			String name = MusicData.MUSIC_SAMPLES[i];
+			MusicItem item = new MusicItem(id, name);
+			allItems[i] = item;
+		}
+		return allItems;
+	}
+
+	public static MusicUser[] loadExample() {
+		MusicUser[] mu = new MusicUser[3];
+
+		mu[0] = createUser("Frank");
+		mu[1] = createUser("Constantine");
+		mu[2] = createUser("Catherine");
+
+		MusicItem[] mi = new MusicItem[11];
+
+		mi[0] = createItem("Tears In Heaven");
+		mi[1] = createItem("La Bamba");
+		mi[2] = createItem("Mrs. Robinson");
+		mi[3] = createItem("Yesterday");
+		mi[4] = createItem("Wizard Of Oz");
+		mi[5] = createItem("Mozart: Symphony #41 (Jupiter)");
+		mi[6] = createItem("Beethoven: Symphony No. 9 in D minor");
+		mi[7] = createItem("Fiddler On The Roof");
+		mi[8] = createItem("What A Wonderful World");
+		mi[9] = createItem("Let It Be");
+		mi[10] = createItem("Sunday, Bloody Sunday");
+
+		ArrayList<Rating> mr0 = new ArrayList<Rating>();
+		ArrayList<Rating> mr1 = new ArrayList<Rating>();
+		ArrayList<Rating> mr2 = new ArrayList<Rating>();
+
+		/*
+		 * Tears In Heaven <- 0 La Bamba <- 1 Mrs. Robinson <- 2 Yesterday <- 3
+		 * Wizard Of Oz <- 4 Mozart: Symphony #41 (Jupiter) <- 5 Beethoven:
+		 * Symphony No. 9 in D <- 6
+		 */
+		mr0.add(new MusicRating(mu[0].getId(), mi[0].getId(), 5));
+		mr0.add(new MusicRating(mu[0].getId(), mi[1].getId(), 4));
+		mr0.add(new MusicRating(mu[0].getId(), mi[2].getId(), 5));
+		mr0.add(new MusicRating(mu[0].getId(), mi[3].getId(), 4));
+		mr0.add(new MusicRating(mu[0].getId(), mi[4].getId(), 5));
+		mr0.add(new MusicRating(mu[0].getId(), mi[5].getId(), 4));
+		mr0.add(new MusicRating(mu[0].getId(), mi[6].getId(), 5));
+
+		/*
+		 * Tears In Heaven <- 0 Fiddler On The Roof <- 7 Mrs. Robinson <- 2 What
+		 * A Wonderful World <- 8 Wizard Of Oz <- 4 Let It Be <- 9 Mozart:
+		 * Symphony #41 (Jupiter) <- 5
+		 */
+
+		mr1.add(new MusicRating(mu[1].getId(), mi[0].getId(), 5));
+		mr1.add(new MusicRating(mu[1].getId(), mi[7].getId(), 5));
+		mr1.add(new MusicRating(mu[1].getId(), mi[2].getId(), 5));
+		mr1.add(new MusicRating(mu[1].getId(), mi[8].getId(), 4));
+		mr1.add(new MusicRating(mu[1].getId(), mi[4].getId(), 4));
+		mr1.add(new MusicRating(mu[1].getId(), mi[9].getId(), 5));
+		mr1.add(new MusicRating(mu[1].getId(), mi[5].getId(), 5));
+
+		/*
+		 * Tears In Heaven <- 0 Mrs. Robinson <- 2 Yesterday <- 3 Beethoven:
+		 * Symphony No. 9 in D <- 6 Sunday, Bloody Sunday <- 10 Yesterday <- 3
+		 * Let It Be <- 9
+		 */
+		mr2.add(new MusicRating(mu[2].getId(), mi[0].getId(), 1));
+		mr2.add(new MusicRating(mu[2].getId(), mi[2].getId(), 2));
+		mr2.add(new MusicRating(mu[2].getId(), mi[3].getId(), 2));
+		mr2.add(new MusicRating(mu[2].getId(), mi[6].getId(), 3));
+		mr2.add(new MusicRating(mu[2].getId(), mi[10].getId(), 1));
+		mr2.add(new MusicRating(mu[2].getId(), mi[3].getId(), 1));
+		mr2.add(new MusicRating(mu[2].getId(), mi[9].getId(), 2));
+
+		mu[0].setRatings(mr0);
+		mu[1].setRatings(mr1);
+		mu[2].setRatings(mr2);
+
+		return mu;
+	}
+
+	/**
+	 * Returns a random selection of songs.
+	 * 
+	 * @param songs
+	 *            list of songs to pick from
+	 * @param percentOfAllSongs
+	 *            determines size of returned selection.
+	 * 
+	 * @return array of songs.
+	 */
+	private static MusicItem[] pickRandomSongs(MusicItem[] songs,
+			double percentOfAllSongs) {
+
+		if (percentOfAllSongs < 0.0 || percentOfAllSongs > 1.0) {
+			throw new IllegalArgumentException(
+					"Value for 'percentOfAllSongs' argument should be between 0 and 1.");
+		}
+		Random rand = new Random();
+		int sampleSize = (int) Math.round(percentOfAllSongs * songs.length);
+		Map<Integer, MusicItem> pickedItems = new HashMap<Integer, MusicItem>();
+		while (pickedItems.size() < sampleSize) {
+			int songId = rand.nextInt(songs.length);
+			MusicItem song = songs[songId];
+			if (!pickedItems.containsKey(song.getId())) {
+				pickedItems.put(song.getId(), song);
+			}
+		}
+
+		return pickedItems.values().toArray(new MusicItem[pickedItems.size()]);
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/MusicItem.java b/src/org/yooreeka/algos/reco/collab/data/MusicItem.java
new file mode 100644
index 0000000..ab5e823
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/MusicItem.java
@@ -0,0 +1,71 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.util.ArrayList;
+
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+/**
+ * Item for music dataset.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class MusicItem extends Item {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 3219691524340585231L;
+
+	String artist;
+
+	public MusicItem(int id, String name) {
+		super(id, name, new ArrayList<Rating>(3));
+	}
+
+	/**
+	 * @return the artist
+	 */
+	public String getArtist() {
+		return artist;
+	}
+
+	/**
+	 * @param artist
+	 *            the artist to set
+	 */
+	public void setArtist(String artist) {
+		this.artist = artist;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/MusicRating.java b/src/org/yooreeka/algos/reco/collab/data/MusicRating.java
new file mode 100644
index 0000000..9046889
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/MusicRating.java
@@ -0,0 +1,52 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+/**
+ * Rating for music dataset.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class MusicRating extends Rating {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 4015578066768031191L;
+
+	public MusicRating(int userId, int songId, int rating) {
+
+		super(userId, songId, rating);
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/MusicUser.java b/src/org/yooreeka/algos/reco/collab/data/MusicUser.java
new file mode 100644
index 0000000..1c8e89a
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/MusicUser.java
@@ -0,0 +1,249 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.util.gui.XyGui;
+
+/**
+ * User for music dataset.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class MusicUser extends User {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 4866915806848833932L;
+
+	public MusicUser(int userId, String name) {
+		super(userId, name);
+	}
+
+	public MusicUser(int userId, String name, List<Rating> ratings) {
+		super(userId, name, ratings);
+	}
+
+	public double getSimilarity(MusicUser u, int simType) {
+
+		double sim = 0.0d;
+		int commonItems = 0;
+
+		/**
+		 * TODO: 3.1 -- Types of similarity (Book section 3.1.2)
+		 * 
+		 * In the following switch, we include two types of similarity You can
+		 * extend the functionality of this method by adding more types. For
+		 * example, the Jaccard similarity could be defined as the ratio of the
+		 * intersection over the union of the items between two users. In other
+		 * words, Number of songs in common Jaccard Similarity =
+		 * ------------------------------------------- Number of all songs
+		 * listened by either user
+		 * 
+		 * Are more complicated similarity metrics more accurate?
+		 */
+
+		switch (simType) {
+
+		case 0:
+			for (Rating r : this.ratingsByItemId.values()) {
+				for (Rating r2 : u.ratingsByItemId.values()) {
+
+					// Find the same item
+					if (r.getItemId() == r2.getItemId()) {
+						commonItems++;
+						sim += Math.pow((r.getRating() - r2.getRating()), 2);
+					}
+				}
+			}
+
+			// If there are not common items, we cannot tell whether
+			// the users are similar or not. So, we let it return 0.
+			if (commonItems > 0) {
+
+				// This is the RMSE, which is more like the distance
+				sim = Math.sqrt(sim / commonItems);
+
+				// Similarity should be between 0 and 1
+				// For the value 0, the two users are as dissimilar as they come
+				// For the value 1, their preferences (based on the available
+				// data) are identical.
+				//
+				// Here is a function that accomplishes exactly that
+				sim = 1.0d - Math.tanh(sim);
+			}
+
+			break;
+
+		// ---------------------------------------------------------
+		case 1:
+			for (Rating r : this.ratingsByItemId.values()) {
+				for (Rating r2 : u.ratingsByItemId.values()) {
+
+					// Find the same item
+					if (r.getItemId() == r2.getItemId()) {
+						commonItems++;
+						sim += Math.pow((r.getRating() - r2.getRating()), 2);
+					}
+				}
+			}
+
+			// If there are not common items, we cannot tell whether
+			// the users are similar or not. So, we let it return 0.
+			if (commonItems > 0) {
+				// Same as before (case 0)
+				sim = Math.sqrt(sim / commonItems);
+
+				// Similarity should be between 0 and 1
+				// For the value 0, the two users are as disimilar as they come
+				// For the value 1, their preferences (based on the available
+				// data) are identical.
+				//
+				// Here is a function that accomplishes exactly that
+				sim = 1.0d - Math.tanh(sim);
+
+				// However, the above calculation takes into account only the
+				// common items
+				// It does not account for the number of items that could have
+				// in common
+				// So, let us consider the following
+
+				// This is the maximum number of items that the two users can
+				// have in common
+				int maxCommonItems = Math.min(this.ratingsByItemId.size(),
+						u.ratingsByItemId.size());
+
+				// Adjust the similarity to account for the importance of the
+				// common terms
+				// through the ratio of the common items over the number of all
+				// possible common items
+
+				sim = sim * ((double) commonItems / (double) maxCommonItems);
+			}
+
+			break;
+		}
+
+		// Let us know what it is
+		System.out.print("\n"); // Just for pretty printing in the Shell
+		System.out.print(" User Similarity between");
+		System.out.print(" " + this.getName());
+		System.out.print(" and " + u.getName());
+		System.out.println(" is equal to " + sim);
+		System.out.print("\n"); // Just for pretty printing in the Shell
+
+		return sim;
+	}
+
+	public void plot() {
+
+		int n = this.ratingsByItemId.size();
+
+		double[] x = new double[n];
+		double[] y = new double[n];
+
+		double xCount = 0;
+		int i;
+		for (Integer itemId : this.ratingsByItemId.keySet()) {
+			i = (int) xCount;
+			x[i] = xCount;
+			y[i] = this.getItemRating(itemId).getRating();
+		}
+
+		XyGui gui = new XyGui("", x, y);
+		gui.plot();
+	}
+
+	public void plot(MusicUser anotherUser) {
+		// ratings for items rated by both users
+		List<Rating[]> sharedRatings = new ArrayList<Rating[]>();
+
+		// iterate through user ratings and check if another user rated the same
+		// items
+		for (Rating r : ratingsByItemId.values()) {
+			Rating anotherUserRating = anotherUser.getItemRating(r.getItemId());
+			if (anotherUserRating != null) {
+				// item was rated by both users. Add both ratings to the list
+				Rating[] itemRatings = new Rating[2];
+				itemRatings[0] = r;
+				itemRatings[1] = anotherUserRating;
+				sharedRatings.add(itemRatings);
+			}
+		}
+
+		// sort shared ratings based on the difference of opinions
+		Collections.sort(sharedRatings, new Comparator<Rating[]>() {
+			public int compare(Rating[] x, Rating[] y) {
+				int result = 0;
+
+				double xDiff = Math.abs(x[0].getRating() - x[1].getRating());
+				double yDiff = Math.abs(y[0].getRating() - y[1].getRating());
+
+				if (xDiff < yDiff) {
+					result = -1;
+				} else if (xDiff > yDiff) {
+					result = 1;
+				} else {
+					result = 0;
+				}
+
+				return result;
+			}
+		});
+
+		double[] data1 = new double[sharedRatings.size()];
+		double[] data2 = new double[sharedRatings.size()];
+		String[] itemNames = new String[sharedRatings.size()];
+		for (int i = 0, n = itemNames.length; i < n; i++) {
+			Rating[] itemRatings = sharedRatings.get(i);
+			// Right now there is no way to get to Item from User or Rating.
+			// Only itemId is available from User or Rating instance.
+			// I'll change loading to include Item in Rating if we need to show
+			// song name on the chart.
+			itemNames[i] = String.valueOf(itemRatings[0].getItemId());
+			data1[i] = itemRatings[0].getRating();
+			data2[i] = itemRatings[1].getRating();
+		}
+
+		XyGui gui = new XyGui("User Similarity", this.getName(),
+				anotherUser.getName(), itemNames, data1, data2);
+
+		gui.plot();
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/NewsData.java b/src/org/yooreeka/algos/reco/collab/data/NewsData.java
new file mode 100644
index 0000000..4077e68
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/NewsData.java
@@ -0,0 +1,202 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * Utility class that we use as the source for Music data.
+ */
+public class NewsData {
+
+	public static final String[] USERS = { "Albert", "Alexandra", "Athena",
+			"Aurora", "Babis", "Bill", "Bob", "Carl", "Catherine", "Charlie",
+			"Constantine", "Dmitry", "Elena", "Eric", "Frank", "George",
+			"Jack", "John", "Maria", "Lukas", "Nick", "Terry", "Todd" };
+
+	public static final String[] DOC_SAMPLES = { "biz-01.html", "biz-02.html",
+			"biz-03.html", "biz-04.html", "biz-05.html", "biz-06.html",
+			"biz-07.html", "sport-01.html", "sport-02.html", "sport-03.html",
+			"usa-01.html", "usa-02.html", "usa-03.html", "usa-04.html",
+			"world-01.html", "world-02.html", "world-03.html", "world-04.html",
+			"world-05.html" };
+
+	/**
+	 * Builds data set with all the users where each user is assigned 80% of all
+	 * the eligible content, as defined below:
+	 * <ul>
+	 * <li>Users whose name starts from A to D will have 'business' and 'sport'
+	 * content.</li>
+	 * <li>Users whose name starts from E to Z will have 'usa' and 'world'
+	 * content.</li>
+	 * </ul>
+	 */
+	public static BaseDataset createDataset() {
+		BaseDataset ds = new BaseDataset();
+
+		/* Create items first */
+		ContentItem[] allItems = loadAllNewsItems();
+
+		for (ContentItem item : allItems) {
+			ds.addItem(item);
+		}
+
+		for (int i = 0, n = USERS.length; i < n; i++) {
+			int userId = i;
+			String userName = USERS[i];
+			ContentItem[] eligibleDocs = null;
+			if (userName.toLowerCase().charAt(0) <= 'd') {
+				eligibleDocs = selectEligibleDocs(allItems, new String[] {
+						"biz", "sport" });
+			} else {
+				eligibleDocs = selectEligibleDocs(allItems, new String[] {
+						"usa", "world" });
+			}
+
+			/*
+			 * Percent of document items that will be selected from provided
+			 * group of items.
+			 */
+			double percentOfDocs = 0.80;
+
+			ContentItem[] docs = pickRandomDocs(eligibleDocs, percentOfDocs);
+
+			NewsUser u = new NewsUser(userId, userName);
+			for (ContentItem doc : docs) {
+				u.addUserContent(doc.getItemContent());
+			}
+
+			ds.add(u);
+		}
+
+		return ds;
+	}
+
+	// private static Item createItem(String docName) {
+	// int id = -1;
+	// for(int i = 0, n = DOC_SAMPLES.length; i < n; i++) {
+	// if( DOC_SAMPLES[i].equals(docName)) {
+	// id = i;
+	// break;
+	// }
+	// }
+	//
+	// if( id < 0 ) {
+	// throw new IllegalArgumentException("Invalid document name: '" + docName +
+	// "'. This document is not on the list of predefined documents.");
+	// }
+	//
+	// return createDocItem(id, docName);
+	// }
+
+	private static ContentItem createNewsItem(int docId, String docName) {
+		Content content = loadContent(docName);
+		ContentItem docItem = new ContentItem(docId, docName, content);
+		// docItem.setItemContent(content);
+		return docItem;
+	}
+
+	/**
+	 * Returns array of new ContentItem instances for every document listed in
+	 * <code>DOC_SAMPLES</code> array.
+	 */
+	private static ContentItem[] loadAllNewsItems() {
+		ContentItem[] allItems = new ContentItem[NewsData.DOC_SAMPLES.length];
+		for (int i = 0, n = allItems.length; i < n; i++) {
+			int id = i;
+			String name = NewsData.DOC_SAMPLES[i];
+			ContentItem item = createNewsItem(id, name);
+			allItems[i] = item;
+		}
+		return allItems;
+	}
+
+	private static Content loadContent(String docName) {
+		return new HTMLContent(docName, YooreekaConfigurator.getHome()
+				+ "/data/ch02/" + docName);
+	}
+
+	/**
+	 * Returns a random selection of documents.
+	 * 
+	 * @param newsItems
+	 *            list of documents to pick from
+	 * @param percentOfDocs
+	 *            determines size of returned selection.
+	 * 
+	 * @return array of songs.
+	 */
+	private static ContentItem[] pickRandomDocs(ContentItem[] newsItems,
+			double percentOfDocs) {
+
+		if (percentOfDocs < 0.0 || percentOfDocs > 1.0) {
+			throw new IllegalArgumentException(
+					"Value for 'percentOfDocs' argument should be "
+							+ "between 0 and 1.");
+		}
+
+		Random rand = new Random();
+		int sampleSize = (int) Math.round(percentOfDocs * newsItems.length);
+		Map<Integer, Item> pickedItems = new HashMap<Integer, Item>();
+		while (pickedItems.size() < sampleSize) {
+			int itemId = rand.nextInt(newsItems.length);
+			Item item = newsItems[itemId];
+			if (!pickedItems.containsKey(item.getId())) {
+				pickedItems.put(item.getId(), item);
+			}
+		}
+
+		return pickedItems.values()
+				.toArray(new ContentItem[pickedItems.size()]);
+	}
+
+	private static ContentItem[] selectEligibleDocs(ContentItem[] docs,
+			String[] prefixes) {
+		List<ContentItem> eligibleDocs = new ArrayList<ContentItem>();
+		for (ContentItem doc : docs) {
+			for (String prefix : prefixes) {
+				if (doc.getName().startsWith(prefix)) {
+					eligibleDocs.add(doc);
+					break;
+				}
+			}
+		}
+		return eligibleDocs.toArray(new ContentItem[eligibleDocs.size()]);
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/NewsItem.java b/src/org/yooreeka/algos/reco/collab/data/NewsItem.java
new file mode 100644
index 0000000..7dedef4
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/NewsItem.java
@@ -0,0 +1,54 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.util.ArrayList;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+/**
+ * Item for news dataset.
+ */
+public class NewsItem extends Item {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 6349342365379966975L;
+
+	public NewsItem(int id, String name, Content content) {
+		super(id, name, new ArrayList<Rating>(3));
+		setItemContent(content);
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/NewsUser.java b/src/org/yooreeka/algos/reco/collab/data/NewsUser.java
new file mode 100644
index 0000000..ae316b5
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/NewsUser.java
@@ -0,0 +1,82 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.User;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class NewsUser extends User implements Serializable {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 3415187707158663184L;
+
+	/**
+	 * @param id
+	 */
+	public NewsUser(int id) {
+		super(id);
+	}
+
+	/**
+	 * @param id
+	 * @param ratings
+	 */
+	public NewsUser(int id, List<Rating> ratings) {
+		super(id, ratings);
+	}
+
+	/**
+	 * @param id
+	 * @param name
+	 */
+	public NewsUser(int id, String name) {
+		super(id, name);
+	}
+
+	/**
+	 * @param id
+	 * @param name
+	 * @param ratings
+	 */
+	public NewsUser(int id, String name, List<Rating> ratings) {
+		super(id, name, ratings);
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/data/RatingBuilder.java b/src/org/yooreeka/algos/reco/collab/data/RatingBuilder.java
new file mode 100644
index 0000000..0491ae2
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/data/RatingBuilder.java
@@ -0,0 +1,94 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.data;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+/**
+ * Utility class to generate random ratings.
+ */
+class RatingBuilder {
+
+	private Random rand = null;
+
+	public RatingBuilder() {
+		rand = new java.util.Random();
+	}
+
+	/**
+	 * Creates biased ratings for all items.
+	 * 
+	 * @param userId
+	 *            rating user.
+	 * @param items
+	 *            to create ratings for.
+	 * @param lowerBias
+	 *            low range for rating value
+	 * @param upperBias
+	 *            high range for rating value
+	 * @return
+	 */
+	public List<Rating> createBiasedRatings(int userId, Item[] items,
+			int lowerBias, int upperBias) {
+		List<Rating> ratings = new ArrayList<Rating>();
+		for (Item item : items) {
+			int biasedRandomRating = getRandomRating(lowerBias, upperBias);
+			Rating rating = new Rating(userId, item.getId(), biasedRandomRating);
+			rating.setItem(item);
+			ratings.add(rating);
+		}
+		return ratings;
+	}
+
+	public int getRandomRating() {
+		// No bias
+		return getRandomRating(5);
+	}
+
+	public int getRandomRating(int upperBias) {
+
+		// Lower bias is 1
+		return getRandomRating(1, upperBias);
+	}
+
+	public int getRandomRating(int lowerBias, int upperBias) {
+
+		// We add 1 at the end because the nextInt(n) call excludes n
+		int n = (upperBias - lowerBias) + 1;
+		return (lowerBias + rand.nextInt(n));
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/evaluation/EvaluationDataProvider.java b/src/org/yooreeka/algos/reco/collab/evaluation/EvaluationDataProvider.java
new file mode 100644
index 0000000..c742f7a
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/evaluation/EvaluationDataProvider.java
@@ -0,0 +1,44 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.evaluation;
+
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+/**
+ * Interface to access previously generated evaluation data.
+ */
+public interface EvaluationDataProvider {
+	List<Rating> loadTestRatings(int testSize, int testSequence);
+
+	List<Rating> loadTrainingRatings(int testSize, int testSequence);
+}
diff --git a/src/org/yooreeka/algos/reco/collab/evaluation/MovieLensEvaluationDataProvider.java b/src/org/yooreeka/algos/reco/collab/evaluation/MovieLensEvaluationDataProvider.java
new file mode 100644
index 0000000..70a45f4
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/evaluation/MovieLensEvaluationDataProvider.java
@@ -0,0 +1,283 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.evaluation;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+import org.yooreeka.algos.reco.collab.data.MovieLensDataset;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Rating;
+
+public class MovieLensEvaluationDataProvider implements EvaluationDataProvider {
+
+	/*
+	 * Location for files with test and training data.
+	 */
+	private String evaluationDataDir;
+
+	/*
+	 * Provides data that will be used to produce training and test files.
+	 */
+	private Dataset dataset;
+
+	/*
+	 * Prefix that will be used in filename for files with test ratings.
+	 */
+	private String testFilenamePrefix;
+
+	/*
+	 * Prefix that will be used in filename for files with training ratings.
+	 */
+	private String trainingFilenamePrefix;
+
+	public MovieLensEvaluationDataProvider(Dataset dataset) {
+		this.dataset = dataset;
+	}
+
+	public void createData(int testSize) {
+		createData(testSize, 1);
+	}
+
+	/**
+	 * Creates evaluation data by splitting original item rating set into two
+	 * sets: training set and test sets. Test set is built by randomly selecting
+	 * ratings from the original ratings set. Training set is built by selecting
+	 * everything that is left from the original set.
+	 * 
+	 * @param testSize
+	 *            number of ratings in test set.
+	 * @param testSequence
+	 *            allows to generate multiple test sets with the same number or
+	 *            ratings.
+	 */
+	public void createData(int testSize, int sequence) {
+
+		/* start with complete list of all available ratings */
+		List<Rating> allRatings = new ArrayList<Rating>(dataset.getRatings());
+
+		/* extract required number of ratings and use them as testing set */
+		List<Rating> testRatings = removeRatings(allRatings, testSize);
+		/* use the of ratings as a training set */
+		List<Rating> trainingRatings = allRatings;
+
+		String testRatingsFilename = createFilename(testFilenamePrefix,
+				testSize, sequence);
+
+		String trainingRatingsFilename = createFilename(trainingFilenamePrefix,
+				testSize, sequence);
+
+		saveRatings(testRatingsFilename, testRatings);
+		saveRatings(trainingRatingsFilename, trainingRatings);
+	}
+
+	/**
+	 * Builds unique filename for file that contains ratings for training or
+	 * test.
+	 * 
+	 * @param namePrefix
+	 *            identifies source of the data and the purpose (testing or
+	 *            training) of the file.
+	 * @param n
+	 *            number or ratings that were randomly selected from the
+	 *            original set of ratings and put in test file. Both training
+	 *            and test files are identified by this number.
+	 * @param sequence
+	 *            random selection sequence. In some cases when we need to
+	 *            generate multiple test files with the same number of ratings
+	 *            but with different selection every time. Defaults to 1.
+	 * 
+	 *            Example:
+	 * 
+	 *            MovieLensRatingsTrainingN16000Rnd1.dat - first training file
+	 *            that was obtained by removing 16000 ratings from original
+	 *            ratings file. MovieLensRatingsTestN16000Rnd1.dat - first test
+	 *            file with 16000 ratings that were removed from original
+	 *            ratings file. MovieLensRatingsTrainingN16000Rnd2.dat - second
+	 *            training file that was obtained by removing 16000 ratings from
+	 *            original ratings file. MovieLensRatingsTestN16000Rnd2.dat -
+	 *            second test file with 16000 ratings that were removed from
+	 *            original ratings file.
+	 */
+	public String createFilename(String namePrefix, int n, int sequence) {
+		return namePrefix + "N" + n + "Rnd" + sequence + ".dat";
+	}
+
+	public String getEvaluationDataDir() {
+		return evaluationDataDir;
+	}
+
+	public String getTestFilenamePrefix() {
+		return testFilenamePrefix;
+	}
+
+	public String getTrainingFilenamePrefix() {
+		return trainingFilenamePrefix;
+	}
+
+	public List<Rating> loadTestRatings(int testSize, int testSequence) {
+		String filename = createFilename(testFilenamePrefix, testSize,
+				testSequence);
+		File f = new File(evaluationDataDir, filename);
+
+		return MovieLensDataset.loadRatings(f);
+	}
+
+	public List<Rating> loadTrainingRatings(int testSize, int testSequence) {
+		String filename = createFilename(trainingFilenamePrefix, testSize,
+				testSequence);
+		File f = new File(evaluationDataDir, filename);
+		return MovieLensDataset.loadRatings(f);
+	}
+
+	/**
+	 * Creates a set of training and test data.
+	 * 
+	 * @param testSize
+	 *            number of ratings that will be used to create testing set.
+	 *            Size of training set is defined as AllAvailableRatings -
+	 *            testSize
+	 */
+	public void prepareTestData(int testSize) {
+		prepareTestData(testSize, 1);
+	}
+
+	/**
+	 * Creates multiple sets of training and test data. Should be used when we
+	 * need to create multiple test files for the same tests.
+	 * 
+	 * @param testSize
+	 *            number of test ratings.
+	 * @param sequence
+	 *            test sequence.
+	 */
+	public void prepareTestData(int testSize, int sequence) {
+		if (!testDataExist(testSize, sequence)) {
+			createData(testSize, sequence);
+		}
+	}
+
+	private void removeFile(String filename) {
+		File f = new File(evaluationDataDir, filename);
+		if (f.exists()) {
+			f.delete();
+		}
+	}
+
+	private List<Rating> removeRatings(List<Rating> allRatings, int n) {
+
+		List<Rating> removedRatings = new ArrayList<Rating>();
+		Random rnd = new Random();
+		while (removedRatings.size() < n) {
+			int randomIndex = rnd.nextInt(allRatings.size());
+			Rating rating = allRatings.remove(randomIndex);
+			removedRatings.add(rating);
+		}
+		return removedRatings;
+	}
+
+	/**
+	 * Deletes test data. Defaults sequence to 1.
+	 * 
+	 * @param testSize
+	 */
+	public void removeTestData(int testSize) {
+		removeTestData(testSize, 1);
+	}
+
+	/**
+	 * Deletes test data.
+	 * 
+	 * @param testSize
+	 * @param sequence
+	 */
+	public void removeTestData(int testSize, int sequence) {
+		String testFilename = createFilename(testFilenamePrefix, testSize,
+				sequence);
+		removeFile(testFilename);
+
+		String trainingFilename = createFilename(trainingFilenamePrefix,
+				testSize, sequence);
+		removeFile(trainingFilename);
+	}
+
+	private void saveRatings(String filename, Collection<Rating> ratings) {
+		File f = new File(evaluationDataDir, filename);
+		MovieLensDataset.createNewRatingsFile(f, ratings);
+	}
+
+	public void setEvaluationDataDir(String value) {
+		this.evaluationDataDir = value;
+	}
+
+	public void setTestFilenamePrefix(String testFilenamePrefix) {
+		this.testFilenamePrefix = testFilenamePrefix;
+	}
+
+	public void setTrainingFilenamePrefix(String trainingFilenamePrefix) {
+		this.trainingFilenamePrefix = trainingFilenamePrefix;
+	}
+
+	public boolean testDataExist(int testSize) {
+		return testDataExist(testSize, 1);
+	}
+
+	/**
+	 * Checks if the test set already exists.
+	 * 
+	 * @param testSize
+	 * @param sequence
+	 * @return
+	 */
+	public boolean testDataExist(int testSize, int sequence) {
+		// create temporary directory if it doesn't exist yet.
+		File tmpDirFile = new File(evaluationDataDir);
+		if (!tmpDirFile.exists()) {
+			tmpDirFile.mkdirs();
+		}
+
+		boolean filesExist = false;
+		String testFilename = createFilename(testFilenamePrefix, testSize,
+				sequence);
+		String trainingFilename = createFilename(trainingFilenamePrefix,
+				testSize, sequence);
+		if (new File(evaluationDataDir, testFilename).exists()
+				&& new File(evaluationDataDir, trainingFilename).exists()) {
+			filesExist = true;
+		}
+
+		return filesExist;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/evaluation/MovieLensRMSE.java b/src/org/yooreeka/algos/reco/collab/evaluation/MovieLensRMSE.java
new file mode 100644
index 0000000..d33c7d7
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/evaluation/MovieLensRMSE.java
@@ -0,0 +1,104 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.evaluation;
+
+import java.io.File;
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.data.MovieLensDataset;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.RecommendationType;
+import org.yooreeka.algos.reco.collab.recommender.Delphi;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * 
+ * @deprecated use <code>RMSEEstimator</code> instead.
+ */
+public class MovieLensRMSE {
+
+	public static void main(String[] args) {
+		MovieLensRMSE rmse = new MovieLensRMSE();
+		rmse.calculate();
+	}
+
+	public MovieLensRMSE() {
+	}
+
+	public double[] calculate() {
+
+		double similarityThreshold = 0.50;
+
+		int N = 5;
+
+		double[] rmse = new double[N];
+
+		RMSEEstimator rmseEstimator = new RMSEEstimator();
+
+		for (int i = 1; i <= N; i++) {
+
+			Dataset ds = createTrainingDataset(i);
+
+			Delphi delphi = new Delphi(ds, RecommendationType.ITEM_BASED);
+			delphi.setSimilarityThreshold(similarityThreshold);
+
+			List<Rating> testRatings = createTestRatings(i);
+
+			double rmseValue = rmseEstimator.calculateRMSE(delphi, testRatings);
+			System.out.println(i + ": rmse = " + rmseValue);
+
+			rmse[i - 1] = rmseValue;
+		}
+
+		return rmse;
+	}
+
+	public List<Rating> createTestRatings(int n) {
+		String dataDir = YooreekaConfigurator
+				.getProperty("iweb2.movielens.data.dir");
+
+		File ratings = new File(dataDir, "u" + n + ".test");
+
+		return MovieLensDataset.loadRatings(ratings);
+	}
+
+	public MovieLensDataset createTrainingDataset(int n) {
+		String dataDir = YooreekaConfigurator
+				.getProperty("iweb2.movielens.data.dir");
+
+		File users = new File(dataDir, MovieLensDataset.USERS_FILENAME);
+		File items = new File(dataDir, MovieLensDataset.ITEMS_FILENAME);
+		File ratings = new File(dataDir, "u" + n + ".base");
+
+		return new MovieLensDataset(users, items, ratings);
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/evaluation/RMSEEstimator.java b/src/org/yooreeka/algos/reco/collab/evaluation/RMSEEstimator.java
new file mode 100644
index 0000000..34b053a
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/evaluation/RMSEEstimator.java
@@ -0,0 +1,173 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.evaluation;
+
+import java.util.Collection;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.reco.collab.data.MovieLensDataset;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.algos.reco.collab.recommender.Recommender;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * Calculates Root Mean Squared Error for the recommender.
+ */
+public class RMSEEstimator {
+
+	private static final Logger LOG = Logger.getLogger(RMSEEstimator.class.getName());
+
+	public RMSEEstimator() {
+		LOG.setLevel(YooreekaConfigurator.getLevel(RMSEEstimator.class.getName()));
+	}
+
+	/**
+	 * Calculates Root Mean Squared Error for the recommender. Uses test rating
+	 * values returned by recommender's dataset.
+	 * 
+	 * @param delphi
+	 *            recommender.
+	 * @return root mean squared error value.
+	 */
+	public double calculateRMSE(Recommender delphi) {
+
+		MovieLensDataset ds = (MovieLensDataset) delphi.getDataset();
+		Collection<Rating> testRatings = ds.getTestRatings();
+
+		return calculateRMSE(delphi, testRatings);
+	}
+
+	/**
+	 * Calculates Root Mean Squared Error for the recommender.
+	 * 
+	 * @param delphi
+	 *            recommender to evaluate.
+	 * @param testRatings
+	 *            ratings that will be used to calculate the error.
+	 * @return root mean squared error.
+	 */
+	public double calculateRMSE(Recommender delphi,
+			Collection<Rating> testRatings) {
+
+		double sum = 0.0;
+
+		Dataset ds = delphi.getDataset();
+
+		int totalSamples = testRatings.size();
+
+		LOG.fine("Calculating RMSE ...");
+		LOG.fine("Training ratings count: "	+ ds.getRatingsCount());
+		LOG.fine("Test ratings count: " + testRatings.size());
+
+		for (Rating r : testRatings) {
+			User user = ds.getUser(r.getUserId());
+			Item item = ds.getItem(r.getItemId());
+			double predictedItemRating = delphi.predictRating(user, item);
+
+			if (predictedItemRating > 5.0) {
+				predictedItemRating = 5.0;
+				LOG.finest("Predicted item rating: " + predictedItemRating);
+			}
+			LOG.finest(
+			 "user: " + r.getUserId() +
+			 ", item: " + r.getItemId() +
+			 ", actual rating: " + r.getRating() +
+			 ", predicted: " + String.valueOf(predictedItemRating));
+
+			sum += Math.pow((predictedItemRating - r.getRating()), 2);
+
+		}
+		double rmse = Math.sqrt(sum / totalSamples);
+
+		LOG.fine("RMSE:" + rmse);
+		
+		return rmse;
+	}
+
+	public void compareRMSEs(Recommender delphi) {
+
+		MovieLensDataset ds = (MovieLensDataset) delphi.getDataset();
+		Collection<Rating> testRatings = ds.getTestRatings();
+
+		compareRMSEs(delphi, testRatings);
+	}
+
+	public void compareRMSEs(Recommender delphi, Collection<Rating> testRatings) {
+
+		double sum = 0.0;
+		double sumAvgItem = 0.0;
+		double sumAvgUser = 0.0;
+
+		Dataset ds = delphi.getDataset();
+
+		int totalSamples = testRatings.size();
+
+		LOG.fine("Calculating RMSE ...");
+		LOG.fine("Training ratings count: "+ds.getRatingsCount());
+		LOG.fine("Test ratings count: " + testRatings.size());
+
+		for (Rating r : testRatings) {
+			User user = ds.getUser(r.getUserId());
+			Item item = ds.getItem(r.getItemId());
+			double predictedItemRating = delphi.predictRating(user, item);
+			double predictedAvgItemRating = delphi
+					.predictBasedOnItemAverage(item);
+			double predictedAvgUserRating = delphi
+					.predictBasedOnUserAverage(user);
+
+			if (predictedItemRating > 5.0) {
+				predictedItemRating = 5.0;
+				LOG.finest("Predicted item rating: " + predictedItemRating);
+			}
+			 LOG.finest(
+			 "user: " + r.getUserId() +
+			 ", item: " + r.getItemId() +
+			 ", actual rating: " + r.getRating() +
+			 ", predicted: " + String.valueOf(predictedItemRating));
+
+			sum += Math.pow((predictedItemRating - r.getRating()), 2);
+			sumAvgItem += Math.pow((predictedAvgItemRating - r.getRating()), 2);
+			sumAvgUser += Math.pow((predictedAvgUserRating - r.getRating()), 2);
+
+		}
+
+		double rmse = Math.sqrt(sum / totalSamples);
+		double rmseAvgItem = Math.sqrt(sumAvgItem / totalSamples);
+		double rmseAvgUser = Math.sqrt(sumAvgUser / totalSamples);
+
+		System.out.println("RMSE:" + rmse);
+		System.out.println("RMSE (based on avg. Item rating):" + rmseAvgItem);
+		System.out.println("RMSE (based on avg. User rating):" + rmseAvgUser);
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/evaluation/RMSEResult.java b/src/org/yooreeka/algos/reco/collab/evaluation/RMSEResult.java
new file mode 100644
index 0000000..298b6be
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/evaluation/RMSEResult.java
@@ -0,0 +1,85 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.evaluation;
+
+public class RMSEResult {
+	private String type;
+	private long testSize;
+	private double similarityThreshold;
+	private double error;
+
+	public RMSEResult(String type, long testSize, double simThreshold,
+			double error) {
+		this.type = type;
+		this.testSize = testSize;
+		this.similarityThreshold = simThreshold;
+		this.error = error;
+	}
+
+	public double getError() {
+		return error;
+	}
+
+	public double getSimilarityThreshold() {
+		return similarityThreshold;
+	}
+
+	public long getTestSize() {
+		return testSize;
+	}
+
+	public String getType() {
+		return type;
+	}
+
+	public void setError(double error) {
+		this.error = error;
+	}
+
+	public void setSimilarityThreshold(double similarityThreshold) {
+		this.similarityThreshold = similarityThreshold;
+	}
+
+	public void setTestSize(long testSize) {
+		this.testSize = testSize;
+	}
+
+	public void setType(String type) {
+		this.type = type;
+	}
+
+	@Override
+	public String toString() {
+		return "RMSE (testSize=" + getTestSize() + ", type=" + getType()
+				+ ", similarityThreshold=" + getSimilarityThreshold() + "): "
+				+ getError();
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/model/Content.java b/src/org/yooreeka/algos/reco/collab/model/Content.java
new file mode 100644
index 0000000..995eca6
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/Content.java
@@ -0,0 +1,182 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.search.lucene.analyzer.TextDocumentTerms;
+import org.yooreeka.config.YooreekaConfigurator;
+
+public class Content implements java.io.Serializable {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 1098727290087922462L;
+	private static final Logger LOG = Logger.getLogger(Content.class.getName());
+
+	private String id;
+	private String text;
+	private String[] terms;
+	private int[] termFrequencies;
+	private Map<String, Integer> tfMap;
+
+	public Content(String id, String text) {
+		this(id, text, 10);
+	}
+
+	public Content(String id, String text, int topNTerms) {
+		
+		LOG.setLevel(YooreekaConfigurator.getLevel(Content.class.getName()));
+
+		this.id = id;
+		this.text = text;
+
+		Map<String, Integer> allTermFrequencyMap = (new TextDocumentTerms(text))
+				.getTf();
+		tfMap = getTopNTermFrequencies(allTermFrequencyMap, topNTerms);
+
+		terms = new String[tfMap.size()];
+		termFrequencies = new int[tfMap.size()];
+
+		int i = 0;
+		for (Map.Entry<String, Integer> e : tfMap.entrySet()) {
+			terms[i] = e.getKey();
+			termFrequencies[i] = e.getValue();
+			i++;
+		}
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public int[] getTermFrequencies() {
+		return termFrequencies;
+	}
+
+	public String[] getTerms() {
+		return terms;
+	}
+
+	public double[] getTermVector(String[] terms) {
+		double[] termVector = new double[terms.length];
+		for (int i = 0, n = terms.length; i < n; i++) {
+			if (tfMap.containsKey(terms[i])) {
+				termVector[i] = 1;
+			} else {
+				termVector[i] = 0;
+			}
+		}
+		return termVector;
+	}
+
+	public String getText() {
+		return text;
+	}
+
+	public Map<String, Integer> getTFMap() {
+		return this.tfMap;
+	}
+
+	// private Map<String, Integer> buildTermFrequencyMap(String text) {
+	//
+	// CustomAnalyzer analyzer = new CustomAnalyzer(Version.LUCENE_40);
+	// TokenStream tokenStream = analyzer.tokenStream("content", new
+	// StringReader(text));
+	//
+	// Map<String, Integer> termFrequencyMap = new HashMap<String, Integer>();
+	//
+	// boolean hasTokens = true;
+	// try {
+	// while (hasTokens) {
+	// Token t = null;//tokenStream.next();
+	// if (t == null) {
+	// hasTokens = false;
+	// } else {
+	// String term = new String(t.termBuffer(), 0, t.termLength());
+	// Integer frequency = termFrequencyMap.get(term);
+	// if( frequency == null ) {
+	// termFrequencyMap.put(term, 1);
+	// }
+	// else {
+	// termFrequencyMap.put(term, frequency + 1);
+	// }
+	// }
+	// }
+	// }
+	// catch(IOException e) {
+	// throw new RuntimeException(e);
+	// }
+	//
+	// return termFrequencyMap;
+	// }
+
+	private Map<String, Integer> getTopNTermFrequencies(
+			Map<String, Integer> termFrequencyMap, int topN) {
+
+		List<Map.Entry<String, Integer>> terms = new ArrayList<Map.Entry<String, Integer>>(
+				termFrequencyMap.entrySet());
+
+		// Different terms can have the same frequency.
+		Collections.sort(terms, new Comparator<Map.Entry<String, Integer>>() {
+			public int compare(Map.Entry<String, Integer> e1,
+					Map.Entry<String, Integer> e2) {
+				int result = 0;
+				if (e1.getValue() < e2.getValue()) {
+					result = 1; // reverse order
+				} else if (e1.getValue() > e2.getValue()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+
+		Map<String, Integer> topNTermsFrequencyMap = new HashMap<String, Integer>();
+		for (Map.Entry<String, Integer> term : terms) {
+			topNTermsFrequencyMap.put(term.getKey(), term.getValue());
+			if (topNTermsFrequencyMap.size() >= topN) {
+				break;
+			}
+		}
+
+		return topNTermsFrequencyMap;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/model/Dataset.java b/src/org/yooreeka/algos/reco/collab/model/Dataset.java
new file mode 100644
index 0000000..c1256df
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/Dataset.java
@@ -0,0 +1,142 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+import java.util.Collection;
+
+/**
+ * Defines service that provides access to all users, items, and ratings.
+ * Recommender and similarity implementations rely on this service to access
+ * data.
+ */
+public interface Dataset {
+
+	/**
+	 * For content-based dataset returns array of terms that represent document
+	 * space.
+	 * 
+	 * @return
+	 */
+	public String[] getAllTerms();
+
+	/**
+	 * Provides the average rating for this item
+	 * 
+	 * @param itemId
+	 * @return
+	 */
+	public double getAverageItemRating(int itemId);
+
+	/**
+	 * Provides the average rating for this user
+	 * 
+	 * @param userId
+	 * @return
+	 */
+	public double getAverageUserRating(int userId);
+
+	/**
+	 * Retrieves a specific item.
+	 * 
+	 * @param itemId
+	 *            item id.
+	 * @return item.
+	 */
+	public Item getItem(Integer itemId);
+
+	/**
+	 * Total number of all available items.
+	 * 
+	 * @return number of items.
+	 */
+	public int getItemCount();
+
+	/**
+	 * Retrieves all items.
+	 * 
+	 * @return collection of all items.
+	 */
+	public Collection<Item> getItems();
+
+	/**
+	 * Logical name for the dataset instance.
+	 * 
+	 * @return name
+	 */
+	public String getName();
+
+	/**
+	 * Provides access to all ratings.
+	 * 
+	 * @return collection of ratings.
+	 */
+	public Collection<Rating> getRatings();
+
+	/**
+	 * Total number of all available item ratings.
+	 * 
+	 * @return number of item ratings by users.
+	 */
+	public int getRatingsCount();
+
+	/**
+	 * Retrieves a specific user.
+	 * 
+	 * @param userId
+	 *            user id.
+	 * @return user.
+	 */
+	public User getUser(Integer userId);
+
+	/**
+	 * Total number of all available users.
+	 * 
+	 * @return number of users.
+	 */
+	public int getUserCount();
+
+	/**
+	 * Retrieves all users.
+	 * 
+	 * @return collection of users.
+	 */
+	public Collection<User> getUsers();
+
+	/**
+	 * Provides information about user and item ids returned by this dataset.
+	 * 
+	 * @return true if ids aren't in sequence and can't be used as array
+	 *         indexes. false if user or items ids can be treated as sequences
+	 *         that start with 1. In this case index will be derived from id:
+	 *         index = id - 1.
+	 */
+	public boolean isIdMappingRequired();
+}
diff --git a/src/org/yooreeka/algos/reco/collab/model/Item.java b/src/org/yooreeka/algos/reco/collab/model/Item.java
new file mode 100644
index 0000000..1c152ec
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/Item.java
@@ -0,0 +1,171 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Generic representation of product or service that users can rate.
+ */
+public class Item implements java.io.Serializable {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 6119040388138010186L;
+
+	public static Integer[] getSharedUserIds(Item x, Item y) {
+		List<Integer> sharedUsers = new ArrayList<Integer>();
+		for (Rating r : x.getAllRatings()) {
+			// same user rated the item
+			if (y.getUserRating(r.getUserId()) != null) {
+				sharedUsers.add(r.getUserId());
+			}
+		}
+		return sharedUsers.toArray(new Integer[sharedUsers.size()]);
+	}
+
+	/*
+	 * Unique id in the dataset.
+	 */
+	private int id;
+
+	/*
+	 * Name.
+	 */
+	private String name;
+
+	/*
+	 * All ratings for this item. Supports only one rating per item for a user.
+	 * Mapping: userId -> rating
+	 */
+	private Map<Integer, Rating> ratingsByUserId;
+
+	private Content itemContent;
+
+	public Item(Integer id, List<Rating> ratings) {
+		this(id, String.valueOf(id), ratings);
+	}
+
+	public Item(Integer id, String name) {
+		this(id, name, new ArrayList<Rating>(3));
+	}
+
+	public Item(Integer id, String name, List<Rating> ratings) {
+		this.id = id;
+		this.name = name;
+		// load ratings into userId -> rating map.
+		ratingsByUserId = new HashMap<Integer, Rating>(ratings.size());
+		for (Rating r : ratings) {
+			ratingsByUserId.put(r.getUserId(), r);
+		}
+	}
+
+	/**
+	 * Updates existing user rating or adds a new user rating for this item.
+	 * 
+	 * @param r
+	 *            rating to add.
+	 */
+	public void addUserRating(Rating r) {
+		ratingsByUserId.put(r.getUserId(), r);
+	}
+
+	/**
+	 * Returns all ratings that we have for this item.
+	 * 
+	 * @return
+	 */
+	public Collection<Rating> getAllRatings() {
+		return ratingsByUserId.values();
+	}
+
+	public double getAverageRating() {
+		double allRatingsSum = 0.0;
+		Collection<Rating> allItemRatings = ratingsByUserId.values();
+		for (Rating rating : allItemRatings) {
+			allRatingsSum += rating.getRating();
+		}
+		// use 2.5 if there are no ratings.
+		return allItemRatings.size() > 0 ? allRatingsSum
+				/ allItemRatings.size() : 2.5;
+	}
+
+	public int getId() {
+		return id;
+	}
+
+	public Content getItemContent() {
+		return itemContent;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	/*
+	 * Utility method to extract array of ratings based on array of user ids.
+	 */
+	public double[] getRatingsForItemList(Integer[] userIds) {
+		double[] ratings = new double[userIds.length];
+		for (int i = 0, n = userIds.length; i < n; i++) {
+			Rating r = getUserRating(userIds[i]);
+			if (r == null) {
+				throw new IllegalArgumentException(
+						"Item doesn't have rating by specified user id ("
+								+ "userId=" + userIds[i] + ", itemId="
+								+ getId());
+			}
+			ratings[i] = r.getRating();
+		}
+		return ratings;
+	}
+
+	/**
+	 * Returns rating that specified user gave to the item.
+	 * 
+	 * @param userId
+	 *            user
+	 * @return user rating or null if user hasn't rated this item.
+	 */
+	public Rating getUserRating(Integer userId) {
+		return ratingsByUserId.get(userId);
+	}
+
+	public void setItemContent(Content content) {
+		this.itemContent = content;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/model/Rating.java b/src/org/yooreeka/algos/reco/collab/model/Rating.java
new file mode 100644
index 0000000..2a3f665
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/Rating.java
@@ -0,0 +1,127 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+/**
+ * Generic representation of a rating given by user to a product (item).
+ */
+public class Rating implements java.io.Serializable {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 1438346522502387789L;
+
+	protected Item item;
+
+	private int userId;
+	private int itemId;
+	private int rating;
+
+	public Rating(int userId, int bookId, int rating) {
+		this.userId = userId;
+		this.itemId = bookId;
+		this.rating = rating;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final Rating other = (Rating) obj;
+		if (itemId != other.itemId)
+			return false;
+		if (rating != other.rating)
+			return false;
+		if (userId != other.userId)
+			return false;
+		return true;
+	}
+
+	/**
+	 * @return the item
+	 */
+	public Item getItem() {
+		return item;
+	}
+
+	public int getItemId() {
+		return itemId;
+	}
+
+	public int getRating() {
+		return rating;
+	}
+
+	public int getUserId() {
+		return userId;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + itemId;
+		result = prime * result + rating;
+		result = prime * result + userId;
+		return result;
+	}
+
+	/**
+	 * @param item
+	 *            the item to set
+	 */
+	public void setItem(Item item) {
+		this.item = item;
+	}
+
+	public void setItemId(int bookId) {
+		this.itemId = bookId;
+	}
+
+	public void setRating(int rating) {
+		this.rating = rating;
+	}
+
+	public void setUserId(int userId) {
+		this.userId = userId;
+	}
+
+	@Override
+	public String toString() {
+		return this.getClass().getSimpleName() + "[userId: " + userId
+				+ ", itemId: " + itemId + ", rating: " + rating + "]";
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/model/RecommendationType.java b/src/org/yooreeka/algos/reco/collab/model/RecommendationType.java
new file mode 100644
index 0000000..7a78128
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/RecommendationType.java
@@ -0,0 +1,38 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+/**
+ * Defines all supported similarity types.
+ */
+public enum RecommendationType {
+	USER_BASED, ITEM_BASED, IMPROVED_USER_BASED, ITEM_PENALTY_BASED, USER_CONTENT_BASED, ITEM_CONTENT_BASED, USER_ITEM_CONTENT_BASED
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/algos/reco/collab/model/SimilarItem.java b/src/org/yooreeka/algos/reco/collab/model/SimilarItem.java
new file mode 100644
index 0000000..aadd2cc
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/SimilarItem.java
@@ -0,0 +1,128 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class SimilarItem {
+
+	public static SimilarItem[] getTopSimilarItems(
+			List<SimilarItem> similarItems, int topN) {
+
+		// sort friends based on itemAgreement
+		SimilarItem.sort(similarItems);
+
+		// select top N friends
+		List<SimilarItem> topItems = new ArrayList<SimilarItem>();
+		for (SimilarItem f : similarItems) {
+			if (topItems.size() >= topN) {
+				// have enough friends.
+				break;
+			}
+			topItems.add(f);
+		}
+
+		return topItems.toArray(new SimilarItem[topItems.size()]);
+	}
+
+	public static void printItems(SimilarItem[] items, String header) {
+		System.out.println("\n" + header + "\n");
+		for (SimilarItem f : items) {
+			System.out.printf("name: %-36s, similarity: %f\n", f.getItem()
+					.getName(), f.getSimilarity());
+		}
+	}
+
+	public static void sort(List<SimilarItem> similarItems) {
+
+		Collections.sort(similarItems, new Comparator<SimilarItem>() {
+
+			public int compare(SimilarItem f1, SimilarItem f2) {
+
+				int result = 0;
+				if (f1.getSimilarity() < f2.getSimilarity()) {
+					result = 1; // reverse order
+				} else if (f1.getSimilarity() > f2.getSimilarity()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+	}
+
+	private Item item;
+
+	/*
+	 * Similarity
+	 */
+	private double similarity = -1;
+
+	public SimilarItem(Item item, double sim) {
+		this.item = item;
+		similarity = sim;
+	}
+
+	// ----------------------------------------------
+	// GETTERS / SETTERS
+	// ----------------------------------------------
+
+	/**
+	 * @return the item
+	 */
+	public Item getItem() {
+		return item;
+	}
+
+	/**
+	 * @return the similarity
+	 */
+	public double getSimilarity() {
+		return similarity;
+	}
+
+	/**
+	 * @param item
+	 *            the item to set
+	 */
+	public void setItem(Item item) {
+		this.item = item;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/model/SimilarUser.java b/src/org/yooreeka/algos/reco/collab/model/SimilarUser.java
new file mode 100644
index 0000000..bda7f25
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/SimilarUser.java
@@ -0,0 +1,134 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Utility class that acts as a holder for user and similarity value that was
+ * assigned to the user.
+ */
+public class SimilarUser {
+
+	public static SimilarUser[] getTopNFriends(List<SimilarUser> similarUsers,
+			int topN) {
+
+		// sort friends based on itemAgreement
+		SimilarUser.sort(similarUsers);
+
+		// select top N friends
+		List<SimilarUser> topFriends = new ArrayList<SimilarUser>();
+		for (SimilarUser f : similarUsers) {
+			if (topFriends.size() >= topN) {
+				// have enough friends.
+				break;
+			}
+
+			// This is useful when we compose results from different
+			// recommenders
+			if (!topFriends.contains(f)) {
+				topFriends.add(f);
+			}
+		}
+
+		return topFriends.toArray(new SimilarUser[topFriends.size()]);
+	}
+
+	/**
+	 * Prints a list of user names with their similarities.
+	 * 
+	 * @param friends
+	 *            similar users
+	 * @param header
+	 *            title that will be printed at the top of the list.
+	 */
+	public static void print(SimilarUser[] friends, String header) {
+		System.out.println("\n" + header + "\n");
+		for (SimilarUser f : friends) {
+			System.out.printf("name: %-36s, similarity: %f\n", f.getName(),
+					f.getSimilarity());
+		}
+	}
+
+	public static void sort(List<SimilarUser> similarUsers) {
+
+		Collections.sort(similarUsers, new Comparator<SimilarUser>() {
+			public int compare(SimilarUser f1, SimilarUser f2) {
+				int result = 0;
+				if (f1.getSimilarity() < f2.getSimilarity()) {
+					result = 1; // reverse order
+				} else if (f1.getSimilarity() > f2.getSimilarity()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+	}
+
+	/*
+	 * The friend User .
+	 */
+	private User friend;
+
+	/*
+	 * Similarity
+	 */
+	private double similarity = -1;
+
+	public SimilarUser(User user, double similarity) {
+		friend = user;
+		this.similarity = similarity;
+	}
+
+	public int getId() {
+		return friend.getId();
+	}
+
+	public String getName() {
+		return friend.getName();
+	}
+
+	/**
+	 * @return the similarity
+	 */
+	public double getSimilarity() {
+		return similarity;
+	}
+
+	public User getUser() {
+		return friend;
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/model/User.java b/src/org/yooreeka/algos/reco/collab/model/User.java
new file mode 100644
index 0000000..3033672
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/model/User.java
@@ -0,0 +1,175 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.model;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Generic representation of user which rates items.
+ */
+public class User implements java.io.Serializable {
+
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = -1884424246968533858L;
+
+	/**
+	 * Utility method to extract item ids that are shared between user A and
+	 * user B.
+	 */
+	public static Integer[] getSharedItems(User x, User y) {
+		List<Integer> sharedItems = new ArrayList<Integer>();
+		for (Rating r : x.getAllRatings()) {
+			if (y.getItemRating(r.getItemId()) != null) {
+				sharedItems.add(r.getItemId());
+			}
+		}
+		return sharedItems.toArray(new Integer[sharedItems.size()]);
+	}
+	int id;
+
+	String name;
+
+	protected Map<Integer, Rating> ratingsByItemId;
+
+	private List<Content> userContent = new ArrayList<Content>();
+
+	public User(int id) {
+		this(id, String.valueOf(id), new ArrayList<Rating>(3));
+	}
+
+	public User(int id, List<Rating> ratings) {
+		this(id, String.valueOf(id), ratings);
+	}
+
+	public User(int id, String name) {
+		this(id, name, new ArrayList<Rating>(3));
+	}
+
+	public User(int id, String name, List<Rating> ratings) {
+		this.id = id;
+		this.name = name;
+		ratingsByItemId = new HashMap<Integer, Rating>(ratings.size());
+		for (Rating r : ratings) {
+			ratingsByItemId.put(r.getItemId(), r);
+		}
+	}
+
+	public void addRating(Rating rating) {
+		ratingsByItemId.put(rating.getItemId(), rating);
+	}
+
+	public void addUserContent(Content content) {
+		userContent.add(content);
+	}
+
+	public Collection<Rating> getAllRatings() {
+		return ratingsByItemId.values();
+	}
+
+	public double getAverageRating() {
+		double allRatingsSum = 0.0;
+		Collection<Rating> allUserRatings = getAllRatings();
+		for (Rating rating : allUserRatings) {
+			allRatingsSum += rating.getRating();
+		}
+		return allUserRatings.size() > 0 ? allRatingsSum
+				/ allUserRatings.size() : 2.5;
+	}
+
+	public int getId() {
+		return id;
+	}
+
+	public Rating getItemRating(Integer itemId) {
+		return ratingsByItemId.get(itemId);
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	/*
+	 * Utility method to extract array of ratings based on array of item ids.
+	 */
+	public double[] getRatingsForItemList(Integer[] itemIds) {
+		double[] ratings = new double[itemIds.length];
+		for (int i = 0, n = itemIds.length; i < n; i++) {
+			Rating r = getItemRating(itemIds[i]);
+			if (r == null) {
+				throw new IllegalArgumentException(
+						"User doesn't have specified item id (" + "userId="
+								+ getId() + ", itemId=" + itemIds[i]);
+			}
+			ratings[i] = r.getRating();
+		}
+		return ratings;
+	}
+
+	public List<Content> getUserContent() {
+		return userContent;
+	}
+
+	public Content getUserContent(String contentId) {
+		Content matchedContent = null;
+		for (Content c : userContent) {
+			if (c.getId().equals(contentId)) {
+				matchedContent = c;
+				break;
+			}
+		}
+		return matchedContent;
+	}
+
+	public void setRatings(List<Rating> ratings) {
+		// Initialize or clean up
+		if (ratingsByItemId == null) {
+			ratingsByItemId = new HashMap<Integer, Rating>(ratings.size());
+		} else {
+			ratingsByItemId.clear();
+		}
+
+		// Load the ratings
+		for (Rating r : ratings) {
+			ratingsByItemId.put(r.getItemId(), r);
+		}
+	}
+
+	public void setUserContent(List<Content> content) {
+		this.userContent = content;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/recommender/Delphi.java b/src/org/yooreeka/algos/reco/collab/recommender/Delphi.java
new file mode 100644
index 0000000..86175c5
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/recommender/Delphi.java
@@ -0,0 +1,545 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.recommender;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.RecommendationType;
+import org.yooreeka.algos.reco.collab.model.SimilarItem;
+import org.yooreeka.algos.reco.collab.model.SimilarUser;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.algos.reco.collab.similarity.naive.SimilarityMatrix;
+import org.yooreeka.algos.reco.collab.similarity.util.SimilarityMatrixRepository;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * Recommender. Has to be initialized with similarity function and data.
+ *
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class Delphi implements Recommender {
+
+	private static final double DEFAULT_SIMILARITY_THRESHOLD = 0.50;
+	private static final double MAX_RATING = 5;
+	private static final Logger LOG = Logger.getLogger(Delphi.class.getName());
+
+	private RecommendationType type;
+	private SimilarityMatrix similarityMatrix;
+	private Dataset dataSet;
+	private boolean verbose = true;
+	private double similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD;
+	private Map<Integer, Double> maxPredictedRating;
+
+	public Delphi(Dataset dataSet, 
+			      RecommendationType type) {
+
+		this(dataSet,type,false);		
+	}
+
+	public Delphi(Dataset dataSet, 
+			      RecommendationType type,	
+			      boolean useSimilarityCache) {
+		
+		this(dataSet,type,useSimilarityCache,null);
+		
+		SimilarityMatrixRepository smRepo = new SimilarityMatrixRepository(useSimilarityCache);
+		setSimilarityMatrix(smRepo.load(type, dataSet));
+	}
+
+	public Delphi(Dataset dataSet, 
+				  RecommendationType type,
+				  boolean useSimilarityCache,
+				  SimilarityMatrix similarityMatrix) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(Delphi.class.getName()));
+		
+		this.type = type;
+
+		this.dataSet = dataSet;
+		maxPredictedRating = new HashMap<Integer, Double>(dataSet.getUserCount() / 2);
+		
+		this.similarityMatrix = similarityMatrix;
+	}
+
+	
+	// --------------------------------------------------------------------
+	// USER BASED SIMILARITY
+	// --------------------------------------------------------------------
+
+	private double estimateItemBasedRating(User user, Item item) {
+
+		double estimatedRating;
+
+		if (item != null && user != null) {
+
+			estimatedRating = item.getAverageRating();
+
+		} else {
+			if (item == null && user == null) {
+				throw new IllegalArgumentException(
+						"At least, one of the arguments must not be null!");
+			} else {
+				return 3.0d;
+			}
+		}
+
+		int itemId = item.getId();
+		int userId = user.getId();
+		double similaritySum = 0.0;
+		double weightedRatingSum = 0.0;
+
+		// check if the user has already rated the item
+		Rating existingRatingByUser = user.getItemRating(item.getId());
+
+		if (existingRatingByUser != null) {
+
+			estimatedRating = existingRatingByUser.getRating();
+
+		} else {
+
+			double similarityBetweenItems = 0;
+			double weightedRating = 0;
+
+			for (Item anotherItem : dataSet.getItems()) {
+
+				// only consider items that were rated by the user
+				Rating anotherItemRating = anotherItem.getUserRating(userId);
+
+				if (anotherItemRating != null) {
+
+					similarityBetweenItems = similarityMatrix.getValue(itemId,
+							anotherItem.getId());
+
+					if (similarityBetweenItems > similarityThreshold) {
+
+						weightedRating = similarityBetweenItems
+								* anotherItemRating.getRating();
+
+						weightedRatingSum += weightedRating;
+						similaritySum += similarityBetweenItems;
+					}
+				}
+			}
+
+			if (similaritySum > 0.0) {
+
+				estimatedRating = weightedRatingSum / similaritySum;
+			}
+		}
+
+		return estimatedRating;
+	}
+
+	// -----------------------------------------------------------
+	// PRIVATE (AUXILIARY) METHODS
+	// -----------------------------------------------------------
+	private double estimateUserBasedRating(User user, Item item) {
+
+		double estimatedRating = user.getAverageRating();
+
+		int itemId = item.getId();
+		int userId = user.getId();
+
+		double similaritySum = 0.0;
+		double weightedRatingSum = 0.0;
+
+		// check if user has already rated this item
+		Rating existingRatingByUser = user.getItemRating(item.getId());
+
+		if (existingRatingByUser != null) {
+
+			estimatedRating = existingRatingByUser.getRating();
+
+		} else {
+			for (User anotherUser : dataSet.getUsers()) {
+
+				Rating itemRating = anotherUser.getItemRating(itemId);
+
+				// only consider users that rated this item
+				if (itemRating != null) {
+
+					/**
+					 * @todo describe how this generalizes to more accurate
+					 *       similarities
+					 */
+					double similarityBetweenUsers = similarityMatrix.getValue(
+							userId, anotherUser.getId());
+
+					double ratingByNeighbor = itemRating.getRating();
+
+					double weightedRating = similarityBetweenUsers
+							* ratingByNeighbor;
+
+					weightedRatingSum += weightedRating;
+					similaritySum += similarityBetweenUsers;
+				}
+			}
+
+			if (similaritySum > 0.0) {
+				estimatedRating = weightedRatingSum / similaritySum;
+			}
+		}
+
+		return estimatedRating;
+	}
+
+	private List<SimilarUser> findFriendsBasedOnUserSimilarity(User user) {
+
+		List<SimilarUser> similarUsers = new ArrayList<SimilarUser>();
+
+		for (User friend : dataSet.getUsers()) {
+
+			if (user.getId() != friend.getId()) {
+
+				double similarity = similarityMatrix.getValue(user.getId(),
+						friend.getId());
+				similarUsers.add(new SimilarUser(friend, similarity));
+			}
+		}
+
+		return similarUsers;
+	}
+
+	// --------------------------------------------------------------------
+	// ITEM BASED SIMILARITY
+	// --------------------------------------------------------------------
+
+	private List<SimilarItem> findItemsBasedOnItemSimilarity(Item item) {
+
+		List<SimilarItem> similarItems = new ArrayList<SimilarItem>();
+
+		int itemId = item.getId();
+
+		for (Item sItem : dataSet.getItems()) {
+
+			if (itemId != sItem.getId()) {
+
+				double similarity = similarityMatrix.getValue(itemId,
+						sItem.getId());
+				if (similarity > 0.0) {
+					similarItems.add(new SimilarItem(sItem, similarity));
+				}
+			}
+		}
+
+		return similarItems;
+	}
+
+	public SimilarItem[] findSimilarItems(Item item) {
+		return findSimilarItems(item, 5);
+	}
+
+	public SimilarItem[] findSimilarItems(Item item, int topN) {
+
+		List<SimilarItem> similarItems = new ArrayList<SimilarItem>();
+
+		if (!isUserBased()) {
+
+			similarItems = findItemsBasedOnItemSimilarity(item);
+
+		} else {
+
+			LOG.warning("Finding similar items based on User similarity is not supported!");
+		}
+
+		SimilarItem[] topSimilarItems = SimilarItem.getTopSimilarItems(
+				similarItems, topN);
+
+		if (verbose) {
+			SimilarItem.printItems(topSimilarItems,
+					"Items like item " + item.getName() + ":");
+		}
+
+		return topSimilarItems;
+	}
+
+	public SimilarUser[] findSimilarUsers(User user) {
+		SimilarUser[] topFriends = findSimilarUsers(user, 5);
+
+		if (verbose) {
+			SimilarUser.print(topFriends,
+					"Top Friends for user " + user.getName() + ":");
+		}
+
+		return topFriends;
+	}
+
+	public SimilarUser[] findSimilarUsers(User user, int topN) {
+
+		List<SimilarUser> similarUsers = new ArrayList<SimilarUser>();
+
+		if (isUserBased()) {
+
+			similarUsers = findFriendsBasedOnUserSimilarity(user);
+
+		} else {
+
+			/**
+			 * TODO: 3.x: Create an algorithm that would allow you to find
+			 * similar users based on item similarities. What kind of results do
+			 * you get? Is it space efficient? How about execution time?
+			 */
+			LOG.warning("Finding friends based on Item similarity is not supported!");
+		}
+
+		return SimilarUser.getTopNFriends(similarUsers, topN);
+	}
+
+	/**
+	 * @return recommender's dataset.
+	 */
+	public Dataset getDataset() {
+		return this.dataSet;
+	}
+
+	/**
+	 * @return the maxPredictedRating of a particular user
+	 */
+	public double getMaxPredictedRating(Integer uID) {
+		Double maxPR = maxPredictedRating.get(uID);
+
+		return (maxPR == null) ? 5.0d : maxPR;
+	}
+
+	// --------------------------------------------------------------------
+	// RATING PREDICTIONS
+	// --------------------------------------------------------------------
+
+	public double getSimilarity(Item i1, Item i2) {
+
+		double sim = similarityMatrix.getValue(i1.getId(), i2.getId());
+
+		if (verbose) {
+			System.out.print("Item similarity between");
+			System.out.print(" ItemID: " + i1.getId());
+			System.out.print(" and");
+			System.out.print(" ItemID: " + i2.getId());
+			System.out.println(" is equal to " + sim);
+		}
+
+		return sim;
+	}
+
+	public double getSimilarity(User u1, User u2) {
+
+		double sim = similarityMatrix.getValue(u1.getId(), u2.getId());
+
+		if (verbose) {
+			System.out.print("User Similarity between");
+			System.out.print(" UserID: " + u1.getId());
+			System.out.print(" and");
+			System.out.print(" UserID: " + u2.getId());
+			System.out.println(" is equal to " + sim);
+		}
+
+		return sim;
+	}
+
+	// --------------------------------------------------------------------
+	// AUXILIARY METHODS
+	// --------------------------------------------------------------------
+
+	public SimilarityMatrix getSimilarityMatrix() {
+		return similarityMatrix;
+	}
+
+	public double getSimilarityThreshold() {
+		return similarityThreshold;
+	}
+
+	public RecommendationType getType() {
+		return type;
+	}
+
+	public double getUserItemSimilarity(User user, Item item) {
+
+		if (!isUserItemBased()) {
+			throw new IllegalStateException(
+					"Not valid for current similarity type:" + type);
+		}
+
+		double sim = similarityMatrix.getValue(user.getId(), item.getId());
+
+		if (verbose) {
+			System.out.print("User Item Similarity between");
+			System.out.print(" UserID: " + user.getId());
+			System.out.print(" and");
+			System.out.print(" ItemID: " + item.getId());
+			System.out.println(" is equal to " + sim);
+		}
+
+		return sim;
+	}
+
+	private boolean isContentBased() {
+		return type.toString().indexOf("CONTENT") >= 0;
+	}
+
+	private boolean isUserBased() {
+		return type.toString().indexOf("USER") >= 0
+				&& type.toString().indexOf("USER_ITEM") < 0;
+	}
+
+	private boolean isUserItemBased() {
+		return type.toString().indexOf("USER_ITEM") >= 0;
+	}
+
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	@Override
+	public double predictBasedOnItemAverage(Item item) {
+		return item.getAverageRating();
+	}
+
+	@Override
+	public double predictBasedOnUserAverage(User user) {
+		return user.getAverageRating();
+	}
+
+	public double predictRating(int userId, int itemId) {
+		return predictRating(dataSet.getUser(userId), dataSet.getItem(itemId));
+	}
+
+	public double predictRating(User user, Item item) {
+		switch (type) {
+		case USER_BASED:
+			return estimateUserBasedRating(user, item);
+		case IMPROVED_USER_BASED:
+			return estimateUserBasedRating(user, item);
+		case ITEM_BASED:
+			return estimateItemBasedRating(user, item);
+		case ITEM_PENALTY_BASED:
+			return estimateItemBasedRating(user, item);
+		case USER_CONTENT_BASED:
+			throw new IllegalStateException(
+					"Not valid for current similarity type:" + type);
+		case ITEM_CONTENT_BASED:
+			throw new IllegalStateException(
+					"Not valid for current similarity type:" + type);
+		case USER_ITEM_CONTENT_BASED:
+			// Using similarity between User and Item
+			return MAX_RATING
+					* similarityMatrix.getValue(user.getId(), item.getId());
+		}
+
+		throw new RuntimeException("Unknown recommendation type:" + type);
+	}
+
+	public List<PredictedItemRating> recommend(Integer userId) {
+		return recommend(dataSet.getUser(userId));
+	}
+
+	// --------------------------------------------------------------------
+	// RECOMMENDATIONS
+	// --------------------------------------------------------------------
+	public List<PredictedItemRating> recommend(User user) {
+		List<PredictedItemRating> recommendedItems = recommend(user, 5);
+		return recommendedItems;
+	}
+
+	public List<PredictedItemRating> recommend(User user, int topN) {
+
+		List<PredictedItemRating> recommendations = new ArrayList<PredictedItemRating>();
+
+		double maxRating = -1.0d;
+
+		for (Item item : dataSet.getItems()) {
+
+			// only consider items that user hasn't rated yet or doesn't own the
+			// content
+			if (!skipItem(user, item)) {
+				double predictedRating = predictRating(user, item);
+
+				if (maxRating < predictedRating) {
+					maxRating = predictedRating;
+				}
+
+				if (!Double.isNaN(predictedRating)) {
+					recommendations.add(new PredictedItemRating(user.getId(),
+							item.getId(), predictedRating));
+				}
+			} else {
+				if (verbose) {
+					System.out.println("Skipping item:" + item.getName());
+				}
+			}
+		}
+
+		this.maxPredictedRating.put(user.getId(), maxRating);
+
+		List<PredictedItemRating> topNRecommendations = PredictedItemRating
+				.getTopNRecommendations(recommendations, topN);
+
+		if (verbose) {
+			PredictedItemRating.printUserRecommendations(user, dataSet,
+					topNRecommendations);
+		}
+
+		return topNRecommendations;
+	}
+
+	public void setSimilarityMatrix(SimilarityMatrix similarityMatrix) {
+		this.similarityMatrix = similarityMatrix;
+	}
+
+	public void setSimilarityThreshold(double similarityThreshold) {
+		this.similarityThreshold = similarityThreshold;
+	}
+
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+	private boolean skipItem(User user, Item item) {
+		boolean skipItem = true;
+		if (isContentBased()) {
+			if (user.getUserContent(item.getItemContent().getId()) == null) {
+				skipItem = false;
+			}
+		} else {
+			if (user.getItemRating(item.getId()) == null) {
+				skipItem = false;
+			}
+		}
+		return skipItem;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/recommender/DiggDelphi.java b/src/org/yooreeka/algos/reco/collab/recommender/DiggDelphi.java
new file mode 100644
index 0000000..5b3c7f2
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/recommender/DiggDelphi.java
@@ -0,0 +1,282 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.recommender;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.yooreeka.algos.reco.collab.data.BaseDataset;
+import org.yooreeka.algos.reco.collab.data.DiggData;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.RecommendationType;
+import org.yooreeka.algos.reco.collab.model.SimilarUser;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.config.YooreekaConfigurator;
+
+public class DiggDelphi {
+
+	public static void main(String[] args) {
+		BaseDataset ds = DiggData.loadData(YooreekaConfigurator.getHome() + "/data/ch03/digg_stories.csv");
+		User user = ds.getUser(1);
+		DiggDelphi delphi = new DiggDelphi(ds);
+		delphi.recommend(user);
+	}
+
+	private Dataset ds;
+	private Delphi delphiUC;
+	private Delphi delphiUIC;
+	private Delphi delphiUR;
+
+	private Delphi delphiIR;
+
+	private boolean verbose = true;
+
+	public DiggDelphi(Dataset ds) {
+		this.ds = ds;
+
+		delphiUC = new Delphi(ds, RecommendationType.USER_CONTENT_BASED);
+
+		delphiUIC = new Delphi(ds, RecommendationType.USER_ITEM_CONTENT_BASED);
+
+		delphiUR = new Delphi(ds, RecommendationType.USER_BASED);
+
+		delphiIR = new Delphi(ds, RecommendationType.ITEM_BASED);
+
+		if (verbose) {
+			System.out
+					.println("Initialized " + this.getClass().getSimpleName());
+		}
+	}
+
+	public SimilarUser[] findSimilarUsers(User user) {
+		SimilarUser[] topFriends = findSimilarUsers(user, 5);
+
+		if (verbose) {
+			SimilarUser.print(topFriends,
+					"Top Friends for user " + user.getName() + ":");
+		}
+
+		return topFriends;
+	}
+
+	public SimilarUser[] findSimilarUsers(User user, int topN) {
+		List<SimilarUser> similarUsers = new ArrayList<SimilarUser>();
+
+		SimilarUser[] simU = delphiUC.findSimilarUsers(user, topN);
+		similarUsers.addAll(Arrays.asList(simU));
+
+		simU = delphiUR.findSimilarUsers(user, topN);
+		similarUsers.addAll(Arrays.asList(simU));
+		// SimilarUser.print(simU, "Top Friends for user " + user.getName() +
+		// ":");
+
+		return SimilarUser.getTopNFriends(similarUsers, topN);
+	}
+
+	public List<PredictedItemRating> naiveRecommend(User user, int topN) {
+		List<PredictedItemRating> recommendations = new ArrayList<PredictedItemRating>();
+
+		recommendations.addAll(delphiUIC.recommend(user, topN));
+		recommendations.addAll(delphiUR.recommend(user, topN));
+		recommendations.addAll(delphiIR.recommend(user, topN));
+
+		return PredictedItemRating
+				.getTopNRecommendations(recommendations, topN);
+	}
+
+	public List<PredictedItemRating> recommend(User user) {
+		List<PredictedItemRating> recommendedItems = recommend(user, 5);
+		if (verbose) {
+			PredictedItemRating.printUserRecommendations(user, ds,
+					recommendedItems);
+		}
+		return recommendedItems;
+	}
+
+	public List<PredictedItemRating> recommend(User user, int topN) {
+		List<PredictedItemRating> recommendations = new ArrayList<PredictedItemRating>();
+
+		// Establish a relative scaling factor
+		double maxR = -1.0d;
+
+		// Get the maximum predicted ratings from each recommender
+		double maxRatingDelphiUIC = delphiUIC.getMaxPredictedRating(user
+				.getId());
+		double maxRatingDelphiUR = delphiUR.getMaxPredictedRating(user.getId());
+		double maxRatingDelphiIR = delphiIR.getMaxPredictedRating(user.getId());
+
+		// Find the maximum predicted rating across all recommendations
+		double[] sortedMaxR = { maxRatingDelphiUIC, maxRatingDelphiUR,
+				maxRatingDelphiIR };
+
+		Arrays.sort(sortedMaxR);
+
+		maxR = sortedMaxR[2]; // This is the maximum predicted rating
+
+		// auxiliary variable
+		double scaledRating = 1.0d;
+
+		// Recommender 1 -- User-to-Item content based
+		double scaling = maxR / maxRatingDelphiUIC;
+
+		// Set an ad hoc threshold and scale it
+		double scaledThreshold = 0.5 * scaling;
+
+		List<PredictedItemRating> uicList = new ArrayList<PredictedItemRating>(
+				topN);
+		uicList = delphiUIC.recommend(user, topN);
+
+		for (PredictedItemRating pR : uicList) {
+
+			scaledRating = pR.getRating(6) * scaling;
+
+			if (scaledRating < scaledThreshold) {
+				uicList.remove(pR);
+			} else {
+				pR.setRating(scaledRating);
+			}
+		}
+
+		// Recommender 2 -- User based collaborative filtering
+		scaling = maxR / maxRatingDelphiUR;
+		scaledThreshold = 0.5 * scaling;
+
+		List<PredictedItemRating> urList = new ArrayList<PredictedItemRating>(
+				topN);
+		urList = delphiUR.recommend(user, topN);
+
+		for (PredictedItemRating pR : urList) {
+
+			scaledRating = pR.getRating(6) * scaling;
+
+			if (scaledRating < scaledThreshold) {
+				urList.remove(pR);
+			} else {
+				pR.setRating(scaledRating);
+			}
+		}
+
+		// Recommender 3 -- Item based collaborative filtering
+		scaling = maxR / maxRatingDelphiIR;
+		scaledThreshold = 0.5 * scaling;
+
+		List<PredictedItemRating> irList = new ArrayList<PredictedItemRating>(
+				topN);
+		irList = delphiIR.recommend(user, topN);
+
+		for (PredictedItemRating pR : irList) {
+
+			scaledRating = pR.getRating(6) * scaling;
+
+			if (scaledRating < scaledThreshold) {
+				irList.remove(pR);
+			} else {
+				pR.setRating(scaledRating);
+			}
+		}
+
+		/*
+		 * At this point, uicList, urList, and irList contain ratings that are
+		 * scaled and exceed the threshold value.
+		 */
+		double uicRating = 0;
+		double urRating = 0;
+		double irRating = 0;
+		double vote = 0;
+
+		// build a set of items produced by all recommenders
+		Set<Integer> allRecommendedItems = new HashSet<Integer>();
+		for (PredictedItemRating pir : urList) {
+			allRecommendedItems.add(pir.getItemId());
+		}
+		for (PredictedItemRating pir : irList) {
+			allRecommendedItems.add(pir.getItemId());
+		}
+		for (PredictedItemRating pir : uicList) {
+			allRecommendedItems.add(pir.getItemId());
+		}
+
+		for (Integer itemId : allRecommendedItems) {
+			// Initialize
+			uicRating = 0;
+			urRating = 0;
+			irRating = 0;
+			vote = 0;
+
+			for (PredictedItemRating uic : urList) {
+				if (itemId == uic.getItemId()) {
+					uicRating = uic.getRating(6);
+				}
+			}
+
+			for (PredictedItemRating ur : urList) {
+				if (itemId == ur.getItemId()) {
+					urRating = ur.getRating(6);
+				}
+			}
+
+			for (PredictedItemRating ir : irList) {
+				if (itemId == ir.getItemId()) {
+					irRating = ir.getRating(6);
+				}
+			}
+
+			vote = (uicRating + urRating + irRating) / 3.0d;
+
+			recommendations.add(new PredictedItemRating(user.getId(), itemId,
+					vote));
+		}
+
+		rescale(recommendations, maxR);
+
+		return PredictedItemRating
+				.getTopNRecommendations(recommendations, topN);
+	}
+
+	private void rescale(List<PredictedItemRating> recommendations,
+			double scaleRange) {
+		int n = recommendations.size();
+		double[] ratings = new double[n];
+		int i = 0;
+		for (PredictedItemRating pir : recommendations) {
+			ratings[i] = pir.getRating(6);
+			i++;
+		}
+		Arrays.sort(ratings);
+		for (PredictedItemRating pir : recommendations) {
+			pir.setRating(pir.getRating(6) * (scaleRange / ratings[n - 1]));
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/recommender/MovieLensDelphi.java b/src/org/yooreeka/algos/reco/collab/recommender/MovieLensDelphi.java
new file mode 100644
index 0000000..c34da2d
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/recommender/MovieLensDelphi.java
@@ -0,0 +1,324 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.recommender;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.SimilarItem;
+import org.yooreeka.algos.reco.collab.model.SimilarUser;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.algos.reco.collab.similarity.movielens.MovieLensItemSimilarity;
+
+/**
+ * Recommender. Has to be initialized with similarity function and dataset.
+ */
+public class MovieLensDelphi implements Recommender {
+
+	private static final double DEFAULT_SIMILARITY_THRESHOLD = 0.35;
+
+	private Dataset dataSet;
+	private boolean verbose = true;
+	private double similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD;
+
+	private MovieLensItemSimilarity itemSimilarityMatrix;
+
+	public MovieLensDelphi(Dataset ds) {
+		System.out.println("Entering MovieLensDelphi(Dataset) constructor ...");
+
+		this.dataSet = ds;
+
+		// ------------------------------------------------------------------------
+		System.out.println("Calculating item based similarities...");
+		long start = System.currentTimeMillis();
+
+		itemSimilarityMatrix = new MovieLensItemSimilarity(ds);
+
+		System.out.println("Item based similarities calculated in "
+				+ (System.currentTimeMillis() - start) / 1000 + "(sec).");
+		System.out.println("Similarities ready.");
+		// ------------------------------------------------------------------------
+
+		System.out.println("Leaving MovieLensDelpi(Dataset) constructor ...");
+	}
+
+	// --------------------------------------------------------------------
+	// USER BASED SIMILARITY
+	// --------------------------------------------------------------------
+
+	// public SimilarUser[] findSimilarUsers(User user) {
+	// SimilarUser[] topFriends = findSimilarUsers(user, 5);
+	//
+	// if( verbose ) {
+	// SimilarUser.print(topFriends, "Top Friends for user " + user.getName() +
+	// ":");
+	// }
+	//
+	// return topFriends;
+	// }
+	//
+	// public SimilarUser[] findSimilarUsers(User user, int topN) {
+	//
+	// List<SimilarUser> similarUsers = new ArrayList<SimilarUser>();
+	//
+	// similarUsers = findFriendsBasedOnUserSimilarity(user);
+	//
+	// System.out.println("Finding friends based on Item similarity is not supported!");
+	//
+	// return SimilarUser.getTopNFriends(similarUsers, topN);
+	// }
+	//
+	//
+	// private List<SimilarUser> findFriendsBasedOnUserSimilarity(User user) {
+	//
+	// List<SimilarUser> similarUsers = new ArrayList<SimilarUser>();
+	//
+	// for(User friend : dataSet.getUsers()) {
+	//
+	// if( user.getId() != friend.getId() ) {
+	//
+	// double similarity =
+	// userSimilarityMatrix.getValue(user.getId(), friend.getId());
+	// similarUsers.add(new SimilarUser(friend, similarity));
+	// }
+	// }
+	//
+	// return similarUsers;
+	// }
+
+	// --------------------------------------------------------------------
+	// ITEM BASED SIMILARITY
+	// --------------------------------------------------------------------
+
+	// -----------------------------------------------------------
+	// PRIVATE (AUXILIARY) METHODS
+	// -----------------------------------------------------------
+	private double estimateItemBasedRating(User user, Item item) {
+
+		double itemRating = item.getAverageRating();
+
+		int itemId = item.getId();
+		int userId = user.getId();
+
+		double itemAvgRating = item.getAverageRating();
+
+		double weightedDeltaSum = 0.0;
+		int sumN = 0;
+
+		// check if the user has already rated the item
+		Rating existingRatingByUser = user.getItemRating(item.getId());
+
+		if (existingRatingByUser != null) {
+
+			itemRating = existingRatingByUser.getRating();
+
+		} else {
+
+			double similarityBetweenItems = 0;
+			double weightedDelta = 0;
+			double delta = 0;
+
+			for (Item anotherItem : dataSet.getItems()) {
+
+				// only consider items that were rated by the user
+				Rating anotherItemRating = anotherItem.getUserRating(userId);
+
+				if (anotherItemRating != null) {
+
+					delta = itemAvgRating - anotherItemRating.getRating();
+
+					similarityBetweenItems = itemSimilarityMatrix.getValue(
+							itemId, anotherItem.getId());
+
+					if (Math.abs(similarityBetweenItems) > similarityThreshold) {
+
+						weightedDelta = similarityBetweenItems * delta;
+
+						weightedDeltaSum += weightedDelta;
+
+						sumN++;
+					}
+				}
+			}
+
+			if (sumN > 0) {
+				itemRating = itemAvgRating - (weightedDeltaSum / sumN);
+			}
+		}
+
+		return itemRating;
+	}
+
+	private List<SimilarItem> findItemsBasedOnItemSimilarity(Item item) {
+
+		List<SimilarItem> similarItems = new ArrayList<SimilarItem>();
+
+		int itemId = item.getId();
+
+		for (Item sItem : dataSet.getItems()) {
+
+			if (itemId != sItem.getId()) {
+
+				double similarity = itemSimilarityMatrix.getValue(itemId,
+						sItem.getId());
+				if (similarity > 0.5) {
+					similarItems.add(new SimilarItem(sItem, similarity));
+				}
+			}
+		}
+
+		return similarItems;
+	}
+
+	public SimilarItem[] findSimilarItems(Item item) {
+		SimilarItem[] topFriends = findSimilarItems(item, 5);
+
+		if (verbose) {
+			SimilarItem.printItems(topFriends,
+					"Items like item " + item.getName() + ":");
+		}
+		return topFriends;
+	}
+
+	public SimilarItem[] findSimilarItems(Item item, int topN) {
+
+		List<SimilarItem> similarItems = new ArrayList<SimilarItem>();
+
+		similarItems = findItemsBasedOnItemSimilarity(item);
+
+		return SimilarItem.getTopSimilarItems(similarItems, topN);
+	}
+
+	public SimilarUser[] findSimilarUsers(User user) {
+		throw new UnsupportedOperationException("Not supported.");
+	}
+
+	public SimilarUser[] findSimilarUsers(User user, int topN) {
+		throw new UnsupportedOperationException("Not supported.");
+	}
+
+	public Dataset getDataset() {
+		return dataSet;
+	}
+
+	// --------------------------------------------------------------------
+	// AUXILIARY METHODS
+	// --------------------------------------------------------------------
+	public double getSimilarityThreshold() {
+		return similarityThreshold;
+	}
+
+	public List<PredictedItemRating> getTopNRecommendations(
+			List<PredictedItemRating> recommendations, int topN) {
+
+		PredictedItemRating.sort(recommendations);
+
+		double maxR = recommendations.get(0).getRating();
+		double scaledR;
+
+		List<PredictedItemRating> topRecommendations = new ArrayList<PredictedItemRating>();
+		for (PredictedItemRating r : recommendations) {
+			if (topRecommendations.size() >= topN) {
+				// have enough recommendations.
+				break;
+			}
+			if (maxR > 5) {
+				scaledR = r.getRating() * (5 / maxR);
+				r.setRating(scaledR);
+			}
+
+			topRecommendations.add(r);
+		}
+
+		return topRecommendations;
+	}
+
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	public double predictBasedOnItemAverage(Item item) {
+		return item.getAverageRating();
+	}
+
+	public double predictBasedOnUserAverage(User user) {
+		return user.getAverageRating();
+	}
+
+	public double predictRating(User user, Item item) {
+		return estimateItemBasedRating(user, item);
+	}
+
+	// --------------------------------------------------------------------
+	// RECOMMENDATIONS
+	// --------------------------------------------------------------------
+	public List<PredictedItemRating> recommend(User user) {
+		List<PredictedItemRating> recommendedItems = recommend(user, 5);
+		if (verbose) {
+			PredictedItemRating.printUserRecommendations(user, dataSet,
+					recommendedItems);
+		}
+		return recommendedItems;
+	}
+
+	public List<PredictedItemRating> recommend(User user, int topN) {
+
+		List<PredictedItemRating> recommendations = new ArrayList<PredictedItemRating>();
+
+		for (Item item : dataSet.getItems()) {
+
+			// only consider items that user hasn't rated yet
+			if (user.getItemRating(item.getId()) == null) {
+
+				double predictedRating = estimateItemBasedRating(user, item);
+
+				if (!Double.isNaN(predictedRating)) {
+					recommendations.add(new PredictedItemRating(user.getId(),
+							item.getId(), predictedRating));
+				}
+			}
+		}
+
+		return getTopNRecommendations(recommendations, topN);
+	}
+
+	public void setSimilarityThreshold(double similarityThreshold) {
+		this.similarityThreshold = similarityThreshold;
+	}
+
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/recommender/PredictedItemRating.java b/src/org/yooreeka/algos/reco/collab/recommender/PredictedItemRating.java
new file mode 100644
index 0000000..463e310
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/recommender/PredictedItemRating.java
@@ -0,0 +1,152 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.recommender;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.User;
+
+/**
+ * Represents predicted user rating of an item. Used to return recommendations
+ * for the user.
+ */
+public class PredictedItemRating {
+	/**
+	 * Sorts list of recommendations in descending order and return topN
+	 * elements.
+	 * 
+	 * @param recommendations
+	 * @param topN
+	 * @return
+	 */
+	public static List<PredictedItemRating> getTopNRecommendations(
+			List<PredictedItemRating> recommendations, int topN) {
+
+		PredictedItemRating.sort(recommendations);
+
+		List<PredictedItemRating> topRecommendations = new ArrayList<PredictedItemRating>();
+		for (PredictedItemRating r : recommendations) {
+			if (topRecommendations.size() >= topN) {
+				// have enough recommendations.
+				break;
+			}
+			topRecommendations.add(r);
+		}
+
+		return topRecommendations;
+	}
+	public static void printUserRecommendations(User user, Dataset ds,
+			List<PredictedItemRating> recommendedItems) {
+		System.out.println("\nRecommendations for user " + user.getName()
+				+ ":\n");
+		for (PredictedItemRating r : recommendedItems) {
+			System.out.printf("Item: %-36s, predicted rating: %f\n", ds
+					.getItem(r.getItemId()).getName(), r.getRating(4));
+		}
+	}
+	/**
+	 * Sorts list by rating value in descending order. Items with higher ratings
+	 * will be in the head of the list.
+	 * 
+	 * @param values
+	 *            list to sort.
+	 */
+	public static void sort(List<PredictedItemRating> values) {
+		Collections.sort(values, new Comparator<PredictedItemRating>() {
+
+			public int compare(PredictedItemRating f1, PredictedItemRating f2) {
+
+				int result = 0;
+				if (f1.getRating() < f2.getRating()) {
+					result = 1; // reverse order
+				} else if (f1.getRating() > f2.getRating()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+	}
+
+	private int userId;
+
+	private int itemId;
+
+	private double rating;
+
+	public PredictedItemRating(int userId, int itemId, double rating) {
+		this.userId = userId;
+		this.itemId = itemId;
+		this.rating = rating;
+	}
+
+	public int getItemId() {
+		return itemId;
+	}
+
+	public double getRating() {
+		return rating;
+	}
+
+	/**
+	 * Returns rounded rating value with number of digits after decimal point
+	 * specified by <code>scale</code> parameter.
+	 * 
+	 * @param scale
+	 *            number of digits to keep after decimal point.
+	 * @return rounded value.
+	 */
+	public double getRating(int scale) {
+		BigDecimal bd = new BigDecimal(rating);
+		return bd.setScale(scale, RoundingMode.HALF_UP).doubleValue();
+	}
+
+	public int getUserId() {
+		return userId;
+	}
+
+	public void setRating(double val) {
+		this.rating = val;
+	}
+
+	@Override
+	public String toString() {
+		return this.getClass().getSimpleName() + "[userId: " + userId
+				+ ", itemId: " + itemId + ", rating: " + rating + "]";
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/recommender/Recommender.java b/src/org/yooreeka/algos/reco/collab/recommender/Recommender.java
new file mode 100644
index 0000000..7b96e42
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/recommender/Recommender.java
@@ -0,0 +1,88 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.recommender;
+
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.SimilarItem;
+import org.yooreeka.algos.reco.collab.model.SimilarUser;
+import org.yooreeka.algos.reco.collab.model.User;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ *
+ */
+public interface Recommender {
+
+	public SimilarItem[] findSimilarItems(Item item);
+
+	public SimilarItem[] findSimilarItems(Item item, int topN);
+
+	// Similarities
+	public SimilarUser[] findSimilarUsers(User user);
+
+	public SimilarUser[] findSimilarUsers(User user, int topN);
+
+	// Auxiliary
+	public Dataset getDataset();
+
+	public double getSimilarityThreshold();
+
+	public double predictBasedOnItemAverage(Item item);
+
+	public double predictBasedOnUserAverage(User user);
+
+	// Predictions
+	public double predictRating(User user, Item item);
+
+	/**
+	 * Returns top 5 recommendations for the user.
+	 * 
+	 * @param user
+	 * @return recommended items with predicted ratings.
+	 */
+	public List<PredictedItemRating> recommend(User user);
+
+	/**
+	 * Returns top N recommendations for the user.
+	 * 
+	 * @param user
+	 * @param topN
+	 *            number of top recommendations to return.
+	 * @return recommended items with predicted ratings.
+	 */
+	public List<PredictedItemRating> recommend(User user, int topN);
+
+	public void setSimilarityThreshold(double similarityThreshold);
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensItemSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensItemSimilarity.java
new file mode 100644
index 0000000..31e0e68
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensItemSimilarity.java
@@ -0,0 +1,92 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.movielens;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.similarity.naive.SimilarityMatrixImpl;
+import org.yooreeka.algos.reco.collab.similarity.util.PearsonCorrelation;
+
+public class MovieLensItemSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 2571216412528879244L;
+
+	public MovieLensItemSimilarity(Dataset ds) {
+		this(MovieLensItemSimilarity.class.getSimpleName(), ds);
+	}
+
+	public MovieLensItemSimilarity(String id, Dataset ds) {
+		this.id = id;
+		this.useObjIdToIndexMapping = ds.isIdMappingRequired();
+		calculate(ds);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nItems = dataSet.getItemCount();
+
+		similarityValues = new double[nItems][nItems];
+
+		// if we want to use mapping from itemId to index then generate
+		// index for every itemId
+		if (useObjIdToIndexMapping) {
+			for (Item item : dataSet.getItems()) {
+				idMapping.getIndex(String.valueOf(item.getId()));
+			}
+		}
+
+		PearsonCorrelation pC = null;
+
+		for (int u = 0; u < nItems; u++) {
+
+			int itemAId = getObjIdFromIndex(u);
+			Item itemA = dataSet.getItem(itemAId);
+
+			// we only need to calculate elements above the main diagonal.
+			for (int v = u + 1; v < nItems; v++) {
+
+				int itemBId = getObjIdFromIndex(v);
+				Item itemB = dataSet.getItem(itemBId);
+
+				pC = new PearsonCorrelation(dataSet, itemA, itemB);
+
+				similarityValues[u][v] = pC.calculate();
+			}
+
+			// for u == v assign 1
+			similarityValues[u][u] = 1.0;
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensUserSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensUserSimilarity.java
new file mode 100644
index 0000000..585da30
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/movielens/MovieLensUserSimilarity.java
@@ -0,0 +1,107 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.movielens;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.algos.reco.collab.similarity.naive.SimilarityMatrixImpl;
+import org.yooreeka.algos.reco.collab.similarity.util.PearsonCorrelation;
+
+public class MovieLensUserSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 8510536889333771002L;
+
+	public MovieLensUserSimilarity(Dataset ds) {
+		this(MovieLensUserSimilarity.class.getSimpleName(), ds);
+	}
+
+	public MovieLensUserSimilarity(String id, Dataset ds) {
+		this.id = id;
+		this.useObjIdToIndexMapping = ds.isIdMappingRequired();
+		calculate(ds);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nUsers = dataSet.getUserCount();
+
+		similarityValues = new double[nUsers][nUsers];
+
+		// if we want to use mapping from userId to index then generate
+		// index for every userId
+		if (useObjIdToIndexMapping) {
+			for (User u : dataSet.getUsers()) {
+				idMapping.getIndex(String.valueOf(u.getId()));
+			}
+		}
+
+		for (int u = 0; u < nUsers; u++) {
+
+			int userAId = getObjIdFromIndex(u);
+			User userA = dataSet.getUser(userAId);
+
+			for (int v = u; v < nUsers; v++) {
+				int userBId = getObjIdFromIndex(v);
+				User userB = dataSet.getUser(userBId);
+
+				/* Collect shared ratings */
+				Integer[] sharedItemIds = User.getSharedItems(userA, userB);
+
+				if (sharedItemIds.length > 0) {
+					double[] ratingsA = userA
+							.getRatingsForItemList(sharedItemIds);
+					double[] ratingsB = userB
+							.getRatingsForItemList(sharedItemIds);
+
+					/* Center ratings by subtracting average */
+					double avgA = userA.getAverageRating();
+					double avgB = userB.getAverageRating();
+					for (int i = 0; i < sharedItemIds.length; i++) {
+						ratingsA[i] = ratingsA[i] - avgA;
+						ratingsB[i] = ratingsB[i] - avgB;
+					}
+
+					/* Calculate similarity - Pearson Correlation */
+					PearsonCorrelation pr = new PearsonCorrelation(ratingsA,
+							ratingsB);
+
+					similarityValues[u][v] = pr.calculate();
+				} else {
+					similarityValues[u][v] = 0.0;
+				}
+			}
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedItemBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedItemBasedSimilarity.java
new file mode 100644
index 0000000..d74ced3
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedItemBasedSimilarity.java
@@ -0,0 +1,120 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+
+public class ImprovedItemBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = -8364129617679022295L;
+
+	public ImprovedItemBasedSimilarity(String id, Dataset dataSet,
+			boolean keepRatingCountMatrix) {
+		this.id = id;
+		this.keepRatingCountMatrix = keepRatingCountMatrix;
+		this.useObjIdToIndexMapping = dataSet.isIdMappingRequired();
+		calculate(dataSet);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+		int nItems = dataSet.getItemCount();
+		int nRatingValues = 5;
+		similarityValues = new double[nItems][nItems];
+
+		if (keepRatingCountMatrix) {
+			ratingCountMatrix = new RatingCountMatrix[nItems][nItems];
+		}
+
+		// if we want to use mapping from itemId to index then generate
+		// index for every itemId
+		if (useObjIdToIndexMapping) {
+			for (Item item : dataSet.getItems()) {
+				idMapping.getIndex(String.valueOf(item.getId()));
+			}
+		}
+
+		int totalCount = 0;
+		int agreementCount = 0;
+
+		for (int u = 0; u < nItems; u++) {
+			int itemAId = getObjIdFromIndex(u);
+			Item itemA = dataSet.getItem(itemAId);
+			// we only need to calculate elements above the main diagonal.
+			for (int v = u + 1; v < nItems; v++) {
+				int itemBId = getObjIdFromIndex(v);
+				Item itemB = dataSet.getItem(itemBId);
+				RatingCountMatrix rcm = new RatingCountMatrix(itemA, itemB,
+						nRatingValues);
+
+				totalCount = rcm.getTotalCount();
+				agreementCount = rcm.getAgreementCount();
+
+				if (agreementCount > 0) {
+					/*
+					 * See ImprovedUserBasedSimilarity class for detailed
+					 * explanation.
+					 */
+					double weightedDisagreements = 0.0;
+					int maxBandId = rcm.getMatrix().length - 1;
+					for (int matrixBandId = 1; matrixBandId <= maxBandId; matrixBandId++) {
+						double bandWeight = matrixBandId;
+						weightedDisagreements += bandWeight
+								* rcm.getBandCount(matrixBandId);
+					}
+
+					double similarityValue = 1.0 - (weightedDisagreements / totalCount);
+
+					// normalizing to [0..1]
+					double normalizedSimilarityValue = (similarityValue - 1.0 + maxBandId)
+							/ maxBandId;
+					similarityValues[u][v] = normalizedSimilarityValue;
+				} else {
+					similarityValues[u][v] = 0.0;
+				}
+
+				// For large datasets
+				if (keepRatingCountMatrix) {
+					ratingCountMatrix[u][v] = rcm;
+				}
+			}
+
+			// for u == v assign 1
+			// ratingCountMatrix wasn't created for this case
+			similarityValues[u][u] = 1.0;
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedUserBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedUserBasedSimilarity.java
new file mode 100644
index 0000000..1bfad22
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/ImprovedUserBasedSimilarity.java
@@ -0,0 +1,129 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+
+public class ImprovedUserBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = -4225607333671670946L;
+
+	public ImprovedUserBasedSimilarity(Dataset dataSet) {
+
+		this(ImprovedUserBasedSimilarity.class.getSimpleName(), dataSet, true);
+	}
+
+	public ImprovedUserBasedSimilarity(String id, Dataset dataSet,
+			boolean keepRatingCountMatrix) {
+		this.id = id;
+		this.keepRatingCountMatrix = keepRatingCountMatrix;
+		this.useObjIdToIndexMapping = dataSet.isIdMappingRequired();
+		calculate(dataSet);
+	}
+
+	// here we assume that userId and bookId are:
+	// - integers,
+	// - start with 1
+	// - have no gaps in sequence.
+	// Otherwise we would have to have a mapping from userId/bookId into index
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nUsers = dataSet.getUserCount();
+		int nRatingValues = 5;
+
+		similarityValues = new double[nUsers][nUsers];
+		if (keepRatingCountMatrix) {
+			ratingCountMatrix = new RatingCountMatrix[nUsers][nUsers];
+		}
+
+		// if we want to use mapping from userId to index then generate
+		// index for every userId
+		if (useObjIdToIndexMapping) {
+			for (User u : dataSet.getUsers()) {
+				idMapping.getIndex(String.valueOf(u.getId()));
+			}
+		}
+
+		for (int u = 0; u < nUsers; u++) {
+
+			int userAId = getObjIdFromIndex(u);
+			User userA = dataSet.getUser(userAId);
+
+			// Notice that we need to consider only the upper triangular matrix
+			for (int v = u + 1; v < nUsers; v++) {
+
+				int userBId = getObjIdFromIndex(v);
+				User userB = dataSet.getUser(userBId);
+
+				RatingCountMatrix rcm = new RatingCountMatrix(userA, userB,
+						nRatingValues);
+				int totalCount = rcm.getTotalCount();
+				int agreementCount = rcm.getAgreementCount();
+
+				if (agreementCount > 0) {
+					double weightedDisagreements = 0.0;
+					int maxBandId = rcm.getMatrix().length - 1;
+					for (int matrixBandId = 1; matrixBandId <= maxBandId; matrixBandId++) {
+						double bandWeight = matrixBandId;
+						weightedDisagreements += bandWeight
+								* rcm.getBandCount(matrixBandId);
+					}
+
+					double similarityValue = 1.0 - (weightedDisagreements / totalCount);
+
+					// normalizing to [0..1]
+					double normalizedSimilarityValue = (similarityValue - 1.0 + maxBandId)
+							/ maxBandId;
+
+					similarityValues[u][v] = normalizedSimilarityValue;
+				} else {
+					similarityValues[u][v] = 0.0;
+				}
+
+				// For large datasets
+				if (keepRatingCountMatrix) {
+					ratingCountMatrix[u][v] = rcm;
+				}
+
+			}
+
+			// for u == v assign 1
+			similarityValues[u][u] = 1.0; // RatingCountMatrix wasn't
+											// created for this case
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemBasedSimilarity.java
new file mode 100644
index 0000000..9017099
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemBasedSimilarity.java
@@ -0,0 +1,110 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+
+public class ItemBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+     * 
+     */
+	private static final long serialVersionUID = 3062035062791168163L;
+
+	public ItemBasedSimilarity(String id, Dataset dataSet,
+			boolean keepRatingCountMatrix) {
+		this.id = id;
+		this.keepRatingCountMatrix = keepRatingCountMatrix;
+		this.useObjIdToIndexMapping = dataSet.isIdMappingRequired();
+		calculate(dataSet);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nItems = dataSet.getItemCount();
+		int nRatingValues = 5;
+
+		similarityValues = new double[nItems][nItems];
+
+		if (keepRatingCountMatrix) {
+			ratingCountMatrix = new RatingCountMatrix[nItems][nItems];
+		}
+
+		// if we want to use mapping from itemId to index then generate
+		// index for every itemId
+		if (useObjIdToIndexMapping) {
+			for (Item item : dataSet.getItems()) {
+				idMapping.getIndex(String.valueOf(item.getId()));
+			}
+		}
+
+		int totalCount = 0;
+		int agreementCount = 0;
+
+		for (int u = 0; u < nItems; u++) {
+
+			int itemAId = getObjIdFromIndex(u);
+			Item itemA = dataSet.getItem(itemAId);
+
+			// we only need to calculate elements above the main diagonal.
+			for (int v = u + 1; v < nItems; v++) {
+
+				int itemBId = getObjIdFromIndex(v);
+
+				Item itemB = dataSet.getItem(itemBId);
+
+				RatingCountMatrix rcm = new RatingCountMatrix(itemA, itemB,
+						nRatingValues);
+
+				totalCount = rcm.getTotalCount();
+				agreementCount = rcm.getAgreementCount();
+
+				if (agreementCount > 0) {
+					similarityValues[u][v] = (double) agreementCount
+							/ (double) totalCount;
+				} else {
+					similarityValues[u][v] = 0.0;
+				}
+
+				if (keepRatingCountMatrix) {
+					ratingCountMatrix[u][v] = rcm;
+				}
+			}
+
+			// for u == v assign 1
+			similarityValues[u][u] = 1.0;
+
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemContentBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemContentBasedSimilarity.java
new file mode 100644
index 0000000..7570b52
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemContentBasedSimilarity.java
@@ -0,0 +1,92 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.util.metrics.CosineSimilarityMeasure;
+
+/**
+ * Similarity between items based on the content associated with items.
+ */
+public class ItemContentBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = -2807190886025734879L;
+
+	public ItemContentBasedSimilarity(String id, Dataset ds) {
+		this.id = id;
+		this.useObjIdToIndexMapping = ds.isIdMappingRequired();
+		calculate(ds);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+		int nItems = dataSet.getItemCount();
+
+		similarityValues = new double[nItems][nItems];
+
+		// if we want to use mapping from itemId to index then generate
+		// index for every itemId
+		if (useObjIdToIndexMapping) {
+			for (Item item : dataSet.getItems()) {
+				idMapping.getIndex(String.valueOf(item.getId()));
+			}
+		}
+
+		CosineSimilarityMeasure cosineMeasure = new CosineSimilarityMeasure();
+		String[] allTerms = dataSet.getAllTerms();
+
+		for (int u = 0; u < nItems; u++) {
+
+			int itemAId = getObjIdFromIndex(u);
+			Item itemA = dataSet.getItem(itemAId);
+
+			// we only need to calculate elements above the main diagonal.
+			for (int v = u + 1; v < nItems; v++) {
+
+				int itemBId = getObjIdFromIndex(v);
+				Item itemB = dataSet.getItem(itemBId);
+
+				similarityValues[u][v] = cosineMeasure.calculate(itemA
+						.getItemContent().getTermVector(allTerms), itemB
+						.getItemContent().getTermVector(allTerms));
+			}
+
+			// for u == v assign 1
+			similarityValues[u][u] = 1.0;
+
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemPenaltyBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemPenaltyBasedSimilarity.java
new file mode 100644
index 0000000..a89a70c
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/ItemPenaltyBasedSimilarity.java
@@ -0,0 +1,161 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+
+public class ItemPenaltyBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = -6137735175034641281L;
+
+	public ItemPenaltyBasedSimilarity(Dataset dataSet) {
+
+		this(ItemPenaltyBasedSimilarity.class.getSimpleName(), dataSet, true);
+	}
+
+	public ItemPenaltyBasedSimilarity(String id, Dataset dataSet,
+			boolean keepRatingCountMatrix) {
+		this.id = id;
+		this.keepRatingCountMatrix = keepRatingCountMatrix;
+		this.useObjIdToIndexMapping = dataSet.isIdMappingRequired();
+		calculate(dataSet);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nItems = dataSet.getItemCount();
+		int nRatingValues = 5;
+
+		/*
+		 * The penalties distort the scale that we use for similarities
+		 * maxBoundWeight is an auxiliary variable for scaling back to [0,1]
+		 */
+		double scaleFactor = 0.0;
+
+		similarityValues = new double[nItems][nItems];
+
+		if (keepRatingCountMatrix) {
+			ratingCountMatrix = new RatingCountMatrix[nItems][nItems];
+		}
+
+		// if we want to use mapping from itemId to index then generate
+		// index for every itemId
+		if (useObjIdToIndexMapping) {
+			for (Item item : dataSet.getItems()) {
+				idMapping.getIndex(String.valueOf(item.getId()));
+			}
+		}
+
+		// By using these variables we reduce the number of method calls
+		// inside the double loop.
+		int totalCount = 0;
+		int agreementCount = 0;
+
+		for (int u = 0; u < nItems; u++) {
+
+			int itemAId = getObjIdFromIndex(u);
+			Item itemA = dataSet.getItem(itemAId);
+
+			// we only need to calculate elements above the main diagonal.
+			for (int v = u + 1; v < nItems; v++) {
+
+				int itemBId = getObjIdFromIndex(v);
+
+				Item itemB = dataSet.getItem(itemBId);
+
+				RatingCountMatrix rcm = new RatingCountMatrix(itemA, itemB,
+						nRatingValues);
+
+				totalCount = rcm.getTotalCount();
+				agreementCount = rcm.getAgreementCount();
+
+				if (agreementCount > 0) {
+
+					/*
+					 * See ImprovedUserBasedSimilarity class for detailed
+					 * explanation.
+					 */
+					double weightedDisagreements = 0.0;
+
+					int maxBandId = rcm.getMatrix().length - 1;
+
+					for (int matrixBandId = 1; matrixBandId <= maxBandId; matrixBandId++) {
+
+						/*
+						 * The following is a heuristic. Can you figure out what
+						 * characteristics are captured in such an expression?
+						 * The numbers 1.8 and 0.4 are arbitrary, however, we
+						 * could define them by solving an optimization problem.
+						 * How would you formulate the problem? How would you
+						 * solve it?
+						 */
+						double bandWeight = 1.8 - Math.exp(1 - matrixBandId);
+						bandWeight = Math.pow(bandWeight, 0.4);
+
+						if (bandWeight > scaleFactor) {
+							scaleFactor = bandWeight;
+						}
+
+						weightedDisagreements += bandWeight
+								* rcm.getBandCount(matrixBandId);
+					}
+
+					double similarityValue = 1.0 - (weightedDisagreements / totalCount);
+
+					// w is the upper (negative) bound of the weighted
+					// similarity scale
+					double w = scaleFactor * (totalCount - agreementCount);
+
+					similarityValues[u][v] = (w + similarityValue) / (w + 1);
+
+				} else {
+					similarityValues[u][v] = 0.0;
+				}
+
+				if (keepRatingCountMatrix) {
+					ratingCountMatrix[u][v] = rcm;
+				}
+			}
+
+			// for u == v assign 1
+			// ratingCountMatrix wasn't created for this case
+			similarityValues[u][u] = 1.0;
+
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrix.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrix.java
new file mode 100644
index 0000000..681742f
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrix.java
@@ -0,0 +1,74 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+
+/**
+ * Defines similarity matrix. For user-oriented methods it represents
+ * similarities between users and for item-oriented methods this matrix
+ * represents similarities between items.
+ * 
+ */
+public interface SimilarityMatrix extends java.io.Serializable {
+
+	/**
+	 * Similarity matrix id.
+	 * 
+	 * @return
+	 */
+	public abstract String getId();
+
+	public abstract RatingCountMatrix getRatingCountMatrix(Integer idX,
+			Integer idY);
+
+	/**
+	 * Returns matrix of similarities. For user-oriented methods it represents
+	 * similarities between users and for item-oriented methods the matrix
+	 * represents similarities between items.
+	 * 
+	 * @return similarity matrix
+	 */
+	public abstract double[][] getSimilarityMatrix();
+
+	/**
+	 * Returns similarity value between two objects identified by their IDs.
+	 * 
+	 * @param idX
+	 * @param idY
+	 * @return
+	 */
+	public abstract double getValue(Integer idX, Integer idY);
+
+	public abstract boolean isRatingCountMatrixAvailable();
+
+	public void print();
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrixImpl.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrixImpl.java
new file mode 100644
index 0000000..fb4bfbd
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/SimilarityMatrixImpl.java
@@ -0,0 +1,148 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import java.util.Arrays;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+import org.yooreeka.util.internet.crawling.util.ValueToIndexMapping;
+
+public abstract class SimilarityMatrixImpl implements SimilarityMatrix {
+
+	private static final long serialVersionUID = -8119322978934551969L;
+
+	protected String id;
+	protected double similarityValues[][] = null;
+	protected RatingCountMatrix ratingCountMatrix[][] = null;
+	protected boolean keepRatingCountMatrix = false;
+
+	protected boolean useObjIdToIndexMapping = true;
+	protected ValueToIndexMapping idMapping = new ValueToIndexMapping();
+
+	protected SimilarityMatrixImpl() {
+	}
+
+	protected abstract void calculate(Dataset dataSet);
+
+	public String getId() {
+		return this.id;
+	}
+
+	/**
+	 * 
+	 * @param objId
+	 *            user or item id.
+	 * @return index that can be used to access the object in the matrix.
+	 */
+	protected int getIndexFromObjId(Integer objId) {
+		int index = 0;
+		if (useObjIdToIndexMapping) {
+			index = idMapping.getIndex(String.valueOf(objId));
+		} else {
+			index = objId - 1;
+		}
+		return index;
+	}
+
+	protected Integer getObjIdFromIndex(int index) {
+		Integer objId;
+		if (useObjIdToIndexMapping) {
+			objId = Integer.parseInt(idMapping.getValue(index));
+		} else {
+			objId = index + 1;
+		}
+		return objId;
+	}
+
+	public RatingCountMatrix getRatingCountMatrix(Integer idX, Integer idY) {
+		int x = getIndexFromObjId(idX);
+		int y = getIndexFromObjId(idY);
+
+		return ratingCountMatrix[x][y];
+	}
+
+	public double[][] getSimilarityMatrix() {
+		return similarityValues;
+	}
+
+	public boolean getUseObjIdToIndexMapping() {
+		return useObjIdToIndexMapping;
+	}
+
+	public double getValue(Integer idX, Integer idY) {
+		if (similarityValues == null) {
+			throw new IllegalStateException(
+					"You have to calculate similarities first.");
+		}
+
+		int x = getIndexFromObjId(idX);
+		int y = getIndexFromObjId(idY);
+
+		int i, j;
+		if (x <= y) {
+			i = x;
+			j = y;
+		} else {
+			i = y;
+			j = x;
+		}
+		return similarityValues[i][j];
+	}
+
+	public boolean isRatingCountMatrixAvailable() {
+		return keepRatingCountMatrix;
+	}
+
+	public void print() {
+		if (similarityValues != null) {
+			for (double[] row : this.similarityValues) {
+				System.out.println(Arrays.toString(row));
+			}
+		}
+	}
+
+	public void print(int nRows) {
+		int count = 0;
+		if (similarityValues != null) {
+			for (double[] row : this.similarityValues) {
+				if (count < nRows) {
+					System.out.println(Arrays.toString(row));
+				}
+				count++;
+			}
+		}
+	}
+
+	public void setUseObjIdToIndexMapping(boolean value) {
+		this.useObjIdToIndexMapping = value;
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/UserBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/UserBasedSimilarity.java
new file mode 100644
index 0000000..8c631de
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/UserBasedSimilarity.java
@@ -0,0 +1,117 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+
+public class UserBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = 5741616253320567238L;
+
+	public UserBasedSimilarity(Dataset dataSet) {
+
+		this(UserBasedSimilarity.class.getSimpleName(), dataSet, true);
+	}
+
+	public UserBasedSimilarity(String id, Dataset dataSet,
+			boolean keepRatingCountMatrix) {
+		this.id = id;
+		this.keepRatingCountMatrix = keepRatingCountMatrix;
+		this.useObjIdToIndexMapping = dataSet.isIdMappingRequired();
+		calculate(dataSet);
+	}
+
+	// here we assume that userId and bookId are:
+	// - integers,
+	// - start with 1
+	// - have no gaps in sequence.
+	// Otherwise we would have to have a mapping from userId/bookId into index
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nUsers = dataSet.getUserCount();
+		int nRatingValues = 5;
+
+		similarityValues = new double[nUsers][nUsers];
+
+		if (keepRatingCountMatrix) {
+			ratingCountMatrix = new RatingCountMatrix[nUsers][nUsers];
+		}
+
+		// if we want to use mapping from userId to index then generate
+		// index for every userId
+		if (useObjIdToIndexMapping) {
+			for (User u : dataSet.getUsers()) {
+				idMapping.getIndex(String.valueOf(u.getId()));
+			}
+		}
+
+		for (int u = 0; u < nUsers; u++) {
+
+			int userAId = getObjIdFromIndex(u);
+			User userA = dataSet.getUser(userAId);
+
+			for (int v = u + 1; v < nUsers; v++) {
+
+				int userBId = getObjIdFromIndex(v);
+				User userB = dataSet.getUser(userBId);
+
+				RatingCountMatrix rcm = new RatingCountMatrix(userA, userB,
+						nRatingValues);
+
+				int totalCount = rcm.getTotalCount();
+				int agreementCount = rcm.getAgreementCount();
+
+				if (agreementCount > 0) {
+
+					similarityValues[u][v] = (double) agreementCount
+							/ (double) totalCount;
+				} else {
+					similarityValues[u][v] = 0.0;
+				}
+
+				// For large datasets
+				if (keepRatingCountMatrix) {
+					ratingCountMatrix[u][v] = rcm;
+				}
+			}
+
+			// for u == v assign 1.
+			// RatingCountMatrix wasn't created for this case
+			similarityValues[u][u] = 1.0;
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/UserContentBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/UserContentBasedSimilarity.java
new file mode 100644
index 0000000..101be1e
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/UserContentBasedSimilarity.java
@@ -0,0 +1,107 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.util.metrics.CosineSimilarityMeasure;
+
+/**
+ * Similarity between users based on the content associated with users.
+ */
+public class UserContentBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 5809078434246172835L;
+
+	public UserContentBasedSimilarity(String id, Dataset ds) {
+		this.id = id;
+		this.useObjIdToIndexMapping = ds.isIdMappingRequired();
+		calculate(ds);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nUsers = dataSet.getUserCount();
+
+		similarityValues = new double[nUsers][nUsers];
+
+		// if we want to use mapping from userId to index then generate
+		// index for every userId
+		if (useObjIdToIndexMapping) {
+			for (User u : dataSet.getUsers()) {
+				idMapping.getIndex(String.valueOf(u.getId()));
+			}
+		}
+
+		CosineSimilarityMeasure cosineMeasure = new CosineSimilarityMeasure();
+		String[] allTerms = dataSet.getAllTerms();
+
+		for (int u = 0; u < nUsers; u++) {
+			int userAId = getObjIdFromIndex(u);
+			User userA = dataSet.getUser(userAId);
+
+			for (int v = u + 1; v < nUsers; v++) {
+
+				int userBId = getObjIdFromIndex(v);
+				User userB = dataSet.getUser(userBId);
+
+				double similarity = 0.0;
+
+				for (Content userAContent : userA.getUserContent()) {
+
+					double bestCosineSimValue = 0.0;
+
+					for (Content userBContent : userB.getUserContent()) {
+						double cosineSimValue = cosineMeasure.calculate(
+								userAContent.getTermVector(allTerms),
+								userBContent.getTermVector(allTerms));
+						bestCosineSimValue = Math.max(bestCosineSimValue,
+								cosineSimValue);
+					}
+
+					similarity += bestCosineSimValue;
+				}
+				// System.out.println("Similarity user[" + u + "][" + v + "]=" +
+				// similarity);
+				similarityValues[u][v] = similarity
+						/ userA.getUserContent().size();
+			}
+
+			// for u == v assign 1.
+			similarityValues[u][u] = 1.0;
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/naive/UserItemContentBasedSimilarity.java b/src/org/yooreeka/algos/reco/collab/similarity/naive/UserItemContentBasedSimilarity.java
new file mode 100644
index 0000000..e41f437
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/naive/UserItemContentBasedSimilarity.java
@@ -0,0 +1,184 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.naive;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.util.internet.crawling.util.ValueToIndexMapping;
+import org.yooreeka.util.metrics.CosineSimilarityMeasure;
+
+/**
+ * Similarity between users based on the content associated with users.
+ */
+public class UserItemContentBasedSimilarity extends SimilarityMatrixImpl {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = -372816966539384847L;
+
+	private ValueToIndexMapping idMappingForUser = new ValueToIndexMapping();
+	private ValueToIndexMapping idMappingForItem = new ValueToIndexMapping();
+
+	public UserItemContentBasedSimilarity(String id, Dataset ds) {
+		this.id = id;
+		this.useObjIdToIndexMapping = ds.isIdMappingRequired();
+		calculate(ds);
+	}
+
+	@Override
+	protected void calculate(Dataset dataSet) {
+
+		int nUsers = dataSet.getUserCount();
+		int nItems = dataSet.getItemCount();
+
+		similarityValues = new double[nUsers][nItems];
+
+		// if we want to use mapping from userId/itemId to matrix index
+		// then we need to generate index for every userId and itemId
+		if (useObjIdToIndexMapping) {
+			for (User u : dataSet.getUsers()) {
+				idMappingForUser.getIndex(String.valueOf(u.getId()));
+			}
+
+			for (Item i : dataSet.getItems()) {
+				idMappingForItem.getIndex(String.valueOf(i.getId()));
+			}
+		}
+
+		CosineSimilarityMeasure cosineMeasure = new CosineSimilarityMeasure();
+		String[] allTerms = dataSet.getAllTerms();
+
+		for (int u = 0; u < nUsers; u++) {
+			int userId = getUserIdForIndex(u);
+			User user = dataSet.getUser(userId);
+
+			for (int v = 0; v < nItems; v++) {
+
+				int itemId = getItemIdFromIndex(v);
+				Item item = dataSet.getItem(itemId);
+
+				double simValue = 0.0;
+				double bestCosineSimValue = 0.0;
+
+				for (Content userContent : user.getUserContent()) {
+
+					simValue = cosineMeasure.calculate(userContent
+							.getTermVector(allTerms), item.getItemContent()
+							.getTermVector(allTerms));
+					bestCosineSimValue = Math.max(bestCosineSimValue, simValue);
+				}
+
+				similarityValues[u][v] = bestCosineSimValue;
+			}
+		}
+	}
+
+	/*
+	 * Utility method to convert itemId into matrix index
+	 */
+	private int getIndexForItemId(Integer itemId) {
+		int index = 0;
+		if (useObjIdToIndexMapping) {
+			index = idMappingForItem.getIndex(String.valueOf(itemId));
+		} else {
+			index = itemId - 1;
+		}
+		return index;
+	}
+
+	/*
+	 * Utility method to convert userId into matrix index.
+	 */
+	private int getIndexForUserId(Integer userId) {
+		int index = 0;
+		if (useObjIdToIndexMapping) {
+			index = idMappingForUser.getIndex(String.valueOf(userId));
+		} else {
+			index = userId - 1;
+		}
+		return index;
+	}
+
+	@Override
+	protected int getIndexFromObjId(Integer objId) {
+		throw new UnsupportedOperationException(
+				"Should not be used. Use user or item specific method istead.");
+	}
+
+	/*
+	 * Utility method to convert matrix index into itemId.
+	 */
+	private Integer getItemIdFromIndex(int index) {
+		Integer objId;
+		if (useObjIdToIndexMapping) {
+			objId = Integer.parseInt(idMappingForItem.getValue(index));
+		} else {
+			objId = index + 1;
+		}
+		return objId;
+	}
+
+	@Override
+	protected Integer getObjIdFromIndex(int index) {
+		throw new UnsupportedOperationException(
+				"Should not be used.  Use user or item specific method istead.");
+	}
+
+	/*
+	 * Utility method to convert matrix index into userId
+	 */
+	private Integer getUserIdForIndex(int index) {
+		Integer objId;
+		if (useObjIdToIndexMapping) {
+			objId = Integer.parseInt(idMappingForUser.getValue(index));
+		} else {
+			objId = index + 1;
+		}
+		return objId;
+	}
+
+	@Override
+	public double getValue(Integer userId, Integer itemId) {
+		if (similarityValues == null) {
+			throw new IllegalStateException(
+					"You have to calculate similarities first.");
+		}
+
+		int x = getIndexForUserId(userId);
+		int y = getIndexForItemId(itemId);
+
+		return similarityValues[x][y];
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrix.java b/src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrix.java
new file mode 100644
index 0000000..55e19cc
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrix.java
@@ -0,0 +1,82 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.triangular;
+
+import java.util.Hashtable;
+
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+
+/**
+ * Defines a similarity matrix, which uses a <code>Hashtable</code>. The
+ * <code>Hashtable</code> store the upper triangular part of the similarity
+ * matrix.
+ * 
+ * Note: If the similarity matrix is <b>not symmetric</b> then this is not an
+ * appropriate representation. For example, in the case of user-oriented methods
+ * you might want the similarity matrix to reflect the assymetry between the
+ * tastes of various individuals. Person A may like person B and considers
+ * himself similar to person B. However, person B may not feel the same way.
+ * 
+ */
+public interface UpperTriangularSimilarityMatrix extends java.io.Serializable {
+
+	/**
+	 * Similarity matrix id.
+	 * 
+	 * @return
+	 */
+	public abstract String getId();
+
+	public abstract RatingCountMatrix getRatingCountMatrix(Integer idX,
+			Integer idY);
+
+	/**
+	 * Returns an upper triangular matrix of similarities. For user-oriented
+	 * methods it represents similarities between users and for item-oriented
+	 * methods the matrix represents similarities between items.
+	 * 
+	 * @return similarity matrix
+	 */
+	public abstract Hashtable<Integer, double[]> getSimilarityMatrix();
+
+	/**
+	 * Returns similarity value between two objects identified by their IDs.
+	 * 
+	 * @param idX
+	 * @param idY
+	 * @return
+	 */
+	public abstract double getValue(Integer idX, Integer idY);
+
+	public abstract boolean isRatingCountMatrixAvailable();
+
+	public void print();
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrixImpl.java b/src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrixImpl.java
new file mode 100644
index 0000000..192cbc3
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/triangular/UpperTriangularSimilarityMatrixImpl.java
@@ -0,0 +1,150 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.triangular;
+
+import java.util.Arrays;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.similarity.naive.SimilarityMatrix;
+import org.yooreeka.algos.reco.collab.similarity.util.RatingCountMatrix;
+import org.yooreeka.util.internet.crawling.util.ValueToIndexMapping;
+
+public abstract class UpperTriangularSimilarityMatrixImpl implements
+		SimilarityMatrix {
+
+	private static final long serialVersionUID = -6083265402166050924L;
+
+	protected String id;
+	protected double similarityValues[][] = null;
+	protected RatingCountMatrix ratingCountMatrix[][] = null;
+	protected boolean keepRatingCountMatrix = false;
+
+	protected boolean useObjIdToIndexMapping = true;
+	protected ValueToIndexMapping idMapping = new ValueToIndexMapping();
+
+	protected UpperTriangularSimilarityMatrixImpl() {
+	}
+
+	protected abstract void calculate(Dataset dataSet);
+
+	public String getId() {
+		return this.id;
+	}
+
+	/**
+	 * 
+	 * @param objId
+	 *            user or item id.
+	 * @return index that can be used to access the object in the matrix.
+	 */
+	protected int getIndexFromObjId(Integer objId) {
+		int index = 0;
+		if (useObjIdToIndexMapping) {
+			index = idMapping.getIndex(String.valueOf(objId));
+		} else {
+			index = objId - 1;
+		}
+		return index;
+	}
+
+	protected Integer getObjIdFromIndex(int index) {
+		Integer objId;
+		if (useObjIdToIndexMapping) {
+			objId = Integer.parseInt(idMapping.getValue(index));
+		} else {
+			objId = index + 1;
+		}
+		return objId;
+	}
+
+	public RatingCountMatrix getRatingCountMatrix(Integer idX, Integer idY) {
+		int x = getIndexFromObjId(idX);
+		int y = getIndexFromObjId(idY);
+
+		return ratingCountMatrix[x][y];
+	}
+
+	public double[][] getSimilarityMatrix() {
+		return similarityValues;
+	}
+
+	public boolean getUseObjIdToIndexMapping() {
+		return useObjIdToIndexMapping;
+	}
+
+	public double getValue(Integer idX, Integer idY) {
+		if (similarityValues == null) {
+			throw new IllegalStateException(
+					"You have to calculate similarities first.");
+		}
+
+		int x = getIndexFromObjId(idX);
+		int y = getIndexFromObjId(idY);
+
+		int i, j;
+		if (x <= y) {
+			i = x;
+			j = y;
+		} else {
+			i = y;
+			j = x;
+		}
+		return similarityValues[i][j];
+	}
+
+	public boolean isRatingCountMatrixAvailable() {
+		return keepRatingCountMatrix;
+	}
+
+	public void print() {
+		if (similarityValues != null) {
+			for (double[] row : this.similarityValues) {
+				System.out.println(Arrays.toString(row));
+			}
+		}
+	}
+
+	public void print(int nRows) {
+		int count = 0;
+		if (similarityValues != null) {
+			for (double[] row : this.similarityValues) {
+				if (count < nRows) {
+					System.out.println(Arrays.toString(row));
+				}
+				count++;
+			}
+		}
+	}
+
+	public void setUseObjIdToIndexMapping(boolean value) {
+		this.useObjIdToIndexMapping = value;
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/util/PearsonCorrelation.java b/src/org/yooreeka/algos/reco/collab/similarity/util/PearsonCorrelation.java
new file mode 100644
index 0000000..e33e90d
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/util/PearsonCorrelation.java
@@ -0,0 +1,170 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.util;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.User;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class PearsonCorrelation {
+
+	private static final double ZERO = 0.0d;
+
+	int n;
+
+	double[] x;
+	double[] y;
+
+	public PearsonCorrelation(Dataset ds, Item iA, Item iB) {
+
+		double aAvgR = iA.getAverageRating();
+		double bAvgR = iB.getAverageRating();
+
+		Integer[] uid = Item.getSharedUserIds(iA, iB);
+		n = uid.length;
+
+		x = new double[n];
+		y = new double[n];
+
+		User u;
+		double urA = 0;
+		double urB = 0;
+
+		for (int i = 0; i < n; i++) {
+
+			u = ds.getUser(uid[i]);
+			urA = u.getItemRating(iA.getId()).getRating();
+			urB = u.getItemRating(iB.getId()).getRating();
+
+			x[i] = urA - aAvgR;
+			y[i] = urB - bAvgR;
+		}
+	}
+
+	public PearsonCorrelation(double[] x, double[] y)
+			throws java.lang.IllegalArgumentException {
+
+		if (x.length != y.length) {
+			throw new IllegalArgumentException(
+					"Arrays x and y should have the same length!");
+		}
+
+		n = x.length;
+		// System.out.print("N="+n);
+
+		this.x = x;
+		this.y = y;
+	}
+
+	public double calculate() {
+
+		if (n == 0) {
+			return 0.0;
+		}
+
+		double rho = 0.0d;
+
+		double avgX = getAverage(x);
+		double avgY = getAverage(y);
+
+		double sX = getStdDev(avgX, x);
+		double sY = getStdDev(avgY, y);
+
+		double xy = 0;
+
+		for (int i = 0; i < n; i++) {
+
+			xy += (x[i] - avgX) * (y[i] - avgY);
+		}
+
+		// No variation -- all points have the same values for either X or Y or
+		// both
+		if (sX == ZERO || sY == ZERO) {
+
+			double indX = ZERO;
+			double indY = ZERO;
+
+			for (int i = 1; i < n; i++) {
+
+				indX += (x[0] - x[i]);
+				indY += (y[0] - y[i]);
+			}
+
+			if (indX == ZERO && indY == ZERO) {
+				// All points refer to the same value
+				// This is a degenerate case of correlation
+				return 1.0;
+			} else {
+				// Either the values of the X vary or the values of Y
+				if (sX == ZERO) {
+					sX = sY;
+				} else {
+					sY = sX;
+				}
+			}
+		}
+
+		rho = xy / (n * (sX * sY));
+
+		return rho;
+	}
+
+	private double getAverage(double[] v) {
+		double avg = 0;
+
+		for (double xi : v) {
+			avg += xi;
+		}
+
+		avg = avg / v.length;
+
+		// System.out.print("Average: "+avg);
+		return avg;
+	}
+
+	private double getStdDev(double m, double[] v) {
+		double sigma = 0;
+
+		for (double xi : v) {
+			sigma += (xi - m) * (xi - m);
+		}
+
+		sigma = sigma / v.length;
+
+		// System.out.print("StdDev: "+Math.sqrt(sigma));
+		return Math.sqrt(sigma);
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/util/RatingCountMatrix.java b/src/org/yooreeka/algos/reco/collab/similarity/util/RatingCountMatrix.java
new file mode 100644
index 0000000..1ca5ae8
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/util/RatingCountMatrix.java
@@ -0,0 +1,131 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.util;
+
+import java.io.Serializable;
+
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.User;
+
+public class RatingCountMatrix implements Serializable {
+
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = -8216800040843757769L;
+
+	private int matrix[][] = null;
+
+	public RatingCountMatrix(Item itemA, Item itemB, int nRatingValues) {
+		init(nRatingValues);
+		calculate(itemA, itemB);
+	}
+
+	public RatingCountMatrix(User userA, User userB, int nRatingValues) {
+		init(nRatingValues);
+		calculate(userA, userB);
+	}
+
+	/*
+	 * Populates matrix using user ratings for provided items. We only consider
+	 * users that rated both items.
+	 */
+	private void calculate(Item itemA, Item itemB) {
+		for (Rating ratingForA : itemA.getAllRatings()) {
+			// check if the same user rated itemB
+			Rating ratingForB = itemB.getUserRating(ratingForA.getUserId());
+			if (ratingForB != null) {
+				// element in the matrix is determined by the rating values.
+				int i = ratingForA.getRating() - 1;
+				int j = ratingForB.getRating() - 1;
+				matrix[i][j]++;
+			}
+		}
+	}
+
+	/*
+	 * Populates matrix using ratings for items that the two users share.
+	 */
+	private void calculate(User userA, User userB) {
+
+		for (Rating ratingByA : userA.getAllRatings()) {
+
+			Rating ratingByB = userB.getItemRating(ratingByA.getItemId());
+
+			if (ratingByB != null) {
+
+				int i = ratingByA.getRating() - 1;
+				int j = ratingByB.getRating() - 1;
+				matrix[i][j]++;
+			}
+		}
+	}
+
+	public int getAgreementCount() {
+		int ratingCount = 0;
+		for (int i = 0, n = matrix.length; i < n; i++) {
+			ratingCount += matrix[i][i];
+		}
+		return ratingCount;
+	}
+
+	public int getBandCount(int bandId) {
+		int bandCount = 0;
+		for (int i = 0, n = matrix.length; (i + bandId) < n; i++) {
+			bandCount += matrix[i][i + bandId];
+			bandCount += matrix[i + bandId][i];
+		}
+		return bandCount;
+	}
+
+	public int[][] getMatrix() {
+		return matrix;
+	}
+
+	public int getTotalCount() {
+
+		int ratingCount = 0;
+		int n = matrix.length;
+
+		for (int i = 0; i < n; i++) {
+			for (int j = 0; j < n; j++) {
+				ratingCount += matrix[i][j];
+			}
+		}
+		return ratingCount;
+	}
+
+	private void init(int nSize) {
+		// starting point - all elements are zero
+		matrix = new int[nSize][nSize];
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixCache.java b/src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixCache.java
new file mode 100644
index 0000000..ac56519
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixCache.java
@@ -0,0 +1,71 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.util;
+
+import java.io.File;
+
+import org.yooreeka.algos.reco.collab.cache.FileStore;
+import org.yooreeka.algos.reco.collab.cache.Store;
+import org.yooreeka.algos.reco.collab.similarity.naive.SimilarityMatrix;
+
+public class SimilarityMatrixCache {
+
+	private Store store;
+	private String location;
+
+	public SimilarityMatrixCache(File location) {
+		store = new FileStore(location);
+		this.location = location.getAbsolutePath();
+	}
+
+	public SimilarityMatrix get(String id) {
+		SimilarityMatrix s = null;
+		if (store.exists(id)) {
+			s = (SimilarityMatrix) store.get(id);
+		}
+		return s;
+	}
+
+	public String getLocation() {
+		return location;
+	}
+
+	public void put(String id, SimilarityMatrix similarityMatrix) {
+		if (store.exists(id)) {
+			store.remove(id);
+		}
+		store.put(id, similarityMatrix);
+	}
+
+	public void remove(String id) {
+		store.remove(id);
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixRepository.java b/src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixRepository.java
new file mode 100644
index 0000000..48eb933
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/collab/similarity/util/SimilarityMatrixRepository.java
@@ -0,0 +1,173 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.collab.similarity.util;
+
+import java.io.File;
+
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.RecommendationType;
+import org.yooreeka.algos.reco.collab.similarity.naive.ImprovedUserBasedSimilarity;
+import org.yooreeka.algos.reco.collab.similarity.naive.ItemBasedSimilarity;
+import org.yooreeka.algos.reco.collab.similarity.naive.ItemContentBasedSimilarity;
+import org.yooreeka.algos.reco.collab.similarity.naive.ItemPenaltyBasedSimilarity;
+import org.yooreeka.algos.reco.collab.similarity.naive.SimilarityMatrix;
+import org.yooreeka.algos.reco.collab.similarity.naive.UserBasedSimilarity;
+import org.yooreeka.algos.reco.collab.similarity.naive.UserContentBasedSimilarity;
+import org.yooreeka.algos.reco.collab.similarity.naive.UserItemContentBasedSimilarity;
+import org.yooreeka.config.YooreekaConfigurator;
+
+public class SimilarityMatrixRepository {
+
+	/**
+	 * Generates id for similarity matrix based on type and dataset name.
+	 * 
+	 * @param type
+	 * @param datasetName
+	 * @return
+	 */
+	public static String getId(RecommendationType type, String datasetName) {
+		String classname = null;
+		switch (type) {
+		case ITEM_BASED:
+			classname = ItemBasedSimilarity.class.getSimpleName();
+			break;
+		case ITEM_PENALTY_BASED:
+			classname = ItemPenaltyBasedSimilarity.class.getSimpleName();
+			break;
+		case USER_BASED:
+			classname = UserBasedSimilarity.class.getSimpleName();
+			break;
+		case IMPROVED_USER_BASED:
+			classname = ImprovedUserBasedSimilarity.class.getSimpleName();
+			break;
+		case USER_CONTENT_BASED:
+			classname = UserContentBasedSimilarity.class.getSimpleName();
+			break;
+		case ITEM_CONTENT_BASED:
+			classname = ItemContentBasedSimilarity.class.getSimpleName();
+			break;
+		case USER_ITEM_CONTENT_BASED:
+			classname = UserItemContentBasedSimilarity.class.getSimpleName();
+			break;
+		default:
+			throw new IllegalArgumentException("Unknown type: " + type);
+		}
+		return classname + "-" + datasetName;
+	}
+
+	SimilarityMatrixCache cache;
+
+	public SimilarityMatrixRepository(boolean useCache) {
+		if (useCache) {
+			String appTempDir = YooreekaConfigurator
+					.getProperty(YooreekaConfigurator.TEMP_DIR);
+			File cacheDir = new File(appTempDir,
+					"ch3/collaborative/SimilarityCache");
+			cache = new SimilarityMatrixCache(cacheDir);
+		} else {
+			cache = null;
+		}
+	}
+
+	public SimilarityMatrixRepository(SimilarityMatrixCache cache) {
+		this.cache = cache;
+	}
+
+	public SimilarityMatrix load(RecommendationType type, Dataset data) {
+		boolean keepRatingCountMatrix = true;
+		return load(type, data, keepRatingCountMatrix);
+	}
+
+	public SimilarityMatrix load(RecommendationType type, Dataset data,
+			boolean keepRatingCountMatrix) {
+		SimilarityMatrix m = null;
+
+		String id = getId(type, data.getName());
+		// if cache is available then try to load from cache first
+		if (cache != null) {
+			m = cache.get(id);
+			if (m == null) {
+				System.out
+						.println("similarity matrix instance doesn't exist in cache: "
+								+ "id: "
+								+ id
+								+ ", cache: '"
+								+ cache.getLocation() + "'.");
+			} else {
+				System.out
+						.println("similarity matrix instance was loaded from cache: "
+								+ "id: "
+								+ id
+								+ ", cache: '"
+								+ cache.getLocation() + "'.");
+			}
+		}
+
+		// create a new instance
+		if (m == null) {
+			switch (type) {
+			case ITEM_BASED:
+				m = new ItemBasedSimilarity(id, data, keepRatingCountMatrix);
+				break;
+			case ITEM_PENALTY_BASED:
+				m = new ItemPenaltyBasedSimilarity(id, data,
+						keepRatingCountMatrix);
+				break;
+			case USER_BASED:
+				m = new UserBasedSimilarity(id, data, keepRatingCountMatrix);
+				break;
+			case IMPROVED_USER_BASED:
+				m = new ImprovedUserBasedSimilarity(id, data,
+						keepRatingCountMatrix);
+				break;
+			case USER_CONTENT_BASED:
+				m = new UserContentBasedSimilarity(id, data);
+				break;
+			case ITEM_CONTENT_BASED:
+				m = new ItemContentBasedSimilarity(id, data);
+				break;
+			case USER_ITEM_CONTENT_BASED:
+				m = new UserItemContentBasedSimilarity(id, data);
+				break;
+			default:
+				throw new IllegalArgumentException(
+						"Unsupported recommendation type: " + type.toString());
+			}
+			// store new instance in cache
+			if (cache != null) {
+				cache.put(id, m);
+			}
+		}
+
+		return m;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/content/digg/DiggCategory.java b/src/org/yooreeka/algos/reco/content/digg/DiggCategory.java
new file mode 100644
index 0000000..367fc68
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/content/digg/DiggCategory.java
@@ -0,0 +1,83 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.content.digg;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.thesuntoucher.jigg.data.Container;
+
+public class DiggCategory extends Container {
+	private static final List<DiggCategory> allCategories = new ArrayList<DiggCategory>();
+
+	public static final DiggCategory TECHNOLOGY = new DiggCategory(
+			"Technology", "technology");
+	public static final DiggCategory WORLD_AND_BUSINESS = new DiggCategory(
+			"World&Business", "world_business");
+	public static final DiggCategory SPORTS = new DiggCategory("Sports",
+			"sports");
+	public static final DiggCategory SCIENCE = new DiggCategory("Science",
+			"science");
+	public static final DiggCategory GAMING = new DiggCategory("Gaming",
+			"gaming");
+	public static final DiggCategory ENTERTAINMENT = new DiggCategory(
+			"Entertainment", "entertainment");
+	public static final DiggCategory VIDEOS = new DiggCategory("Videos",
+			"videos");
+
+	public static List<DiggCategory> getAllCategories() {
+		return DiggCategory.allCategories;
+	}
+
+	public static DiggCategory valueOf(String name) {
+		DiggCategory match = null;
+		for (DiggCategory c : allCategories) {
+			if (c.getName().equalsIgnoreCase(name)) {
+				match = c;
+				break;
+			}
+		}
+		return match;
+	}
+
+	private DiggCategory(String name, String shortName) {
+		super(name, shortName);
+		allCategories.add(this);
+	}
+
+	// Note that default Container.toString() implementation in jigg library
+	// won't work with digg api call.
+	@Override
+	public String toString() {
+		return getShortName();
+	}
+
+}
diff --git a/src/org/yooreeka/algos/reco/content/digg/DiggService.java b/src/org/yooreeka/algos/reco/content/digg/DiggService.java
new file mode 100644
index 0000000..5c7d5b1
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/content/digg/DiggService.java
@@ -0,0 +1,253 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.content.digg;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.headzoo.net.services.digg.Rooster;
+import com.headzoo.net.services.digg.exceptions.DiggRequestException;
+
+import de.thesuntoucher.jigg.Jigg;
+import de.thesuntoucher.jigg.args.StoriesArguments;
+import de.thesuntoucher.jigg.data.Container;
+import de.thesuntoucher.jigg.data.Story;
+import de.thesuntoucher.jigg.data.User;
+
+public class DiggService {
+
+	private static final int MAX_ITEM_COUNT_PER_REQUEST = 100;
+	private static final int DEFAULT_ITEM_COUNT_PER_CATEGORY = 20;
+	private int itemCountPerCategory = 20;
+	private String API_KEY = "http://www.manning.com"; // "http://code.google.com/p/jigg";
+	private Jigg jigg;
+
+	public DiggService() {
+		jigg = new Jigg(API_KEY);
+		setItemCountPerCategory(DEFAULT_ITEM_COUNT_PER_CATEGORY);
+	}
+
+	/**
+	 * Get popular stories in a specific container
+	 * 
+	 * @throws IOException
+	 * @throws DiggRequestException
+	 */
+	public List<DiggStoryItem> fetchPopular(String container)
+			throws DiggRequestException, IOException {
+		/*
+		 * The first thing you need to do is create an instance of the Rooster
+		 * class. You will need to pass your application key as a constructor
+		 * parameter.
+		 * 
+		 * @link http://apidoc.digg.com/ApplicationKeys
+		 */
+		Rooster rooster = new Rooster("http://www.manning.com/marmanis");
+
+		com.headzoo.net.services.digg.types.collections.StoryList stories = null;
+		// com.headzoo.net.services.digg.types.collections.Container c =
+		// getDiggContainer(container);
+		stories = rooster.stories().fetchAll(); // .fetchPopularInContainer(c);
+
+		ArrayList<DiggStoryItem> storiesList = new ArrayList<DiggStoryItem>(
+				stories.size());
+		for (com.headzoo.net.services.digg.types.Story s : stories) {
+			DiggStoryItem dsi = new DiggStoryItem((int) s.getId(),
+					s.getTitle(), s.getDescription());
+			if (s.getUser() != null) {
+				dsi.setUsername(s.getUser().getName());
+			}
+			if (s.getLink() != null) {
+				dsi.setLink(s.getLink().toExternalForm());
+			}
+			storiesList.add(dsi);
+		}
+		return storiesList;
+	}
+
+	/**
+	 * Utility method to retrieve a set of stories from each category.
+	 * 
+	 * @return list of stories.
+	 */
+	public List<DiggStoryItem> getAllStories() {
+		List<DiggStoryItem> newsItems = new ArrayList<DiggStoryItem>();
+		for (DiggCategory c : DiggCategory.getAllCategories()) {
+			newsItems.addAll(getStories(c));
+		}
+		return newsItems;
+	}
+
+	public com.headzoo.net.services.digg.types.collections.Container getDiggContainer(
+			String val) {
+		com.headzoo.net.services.digg.types.collections.Container c;
+
+		if (val.equalsIgnoreCase("tech")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"Technology", "technology");
+
+		} else if (val.equalsIgnoreCase("world")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"World & Business", "world_business");
+
+		} else if (val.equalsIgnoreCase("biz")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"World & Business", "world_business");
+
+		} else if (val.equalsIgnoreCase("sci")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"Science", "science");
+
+		} else if (val.equalsIgnoreCase("game")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"Gaming", "gaming");
+
+		} else if (val.equalsIgnoreCase("life")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"Lifestyle", "lifestyle");
+
+		} else if (val.equalsIgnoreCase("fun")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"Entertainment", "entertainment");
+
+		} else if (val.equalsIgnoreCase("sport")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"Sports", "sports");
+
+		} else if (val.equalsIgnoreCase("offb")) {
+
+			c = new com.headzoo.net.services.digg.types.collections.Container(
+					"Offbeat", "offbeat");
+
+		} else {
+			StringBuilder str = new StringBuilder(
+					"Not known Container alias.\n");
+			str.append("Try one of the following: \n");
+			str.append("    tech   -->  Container(\"Technology\", \"technology\")");
+			str.append("    world  -->  Container(\"World & Business\", \"world_business\")");
+			str.append("    biz    -->  Container(\"World & Business\", \"world_business\")");
+			str.append("    sci    -->  Container(\"Science\", \"science\")");
+			str.append("    game   -->  Container(\"Gaming\", \"gaming\")");
+			str.append("    life   -->  Container(\"Lifestyle\", \"lifestyle\")");
+			str.append("    fun    -->  Container(\"Entertainment\", \"entertainment\")");
+			str.append("    sport  -->  Container(\"Sports\", \"sports\")");
+			str.append("    offb   -->  Container(\"Offbeat\", \"offbeat\")");
+
+			throw new IllegalArgumentException();
+		}
+
+		return c;
+	}
+
+	public int getItemCountPerCategory() {
+		return this.itemCountPerCategory;
+	}
+
+	/**
+	 * 
+	 * @param category
+	 * @return
+	 */
+	public List<DiggStoryItem> getStories(DiggCategory category) {
+
+		StoriesArguments storiesArgs = new StoriesArguments();
+		storiesArgs.setCount(itemCountPerCategory);
+
+		List<Story> stories = jigg.getPopularStories(category, storiesArgs);
+
+		List<DiggStoryItem> items = new ArrayList<DiggStoryItem>();
+
+		for (Story story : stories) {
+
+			int itemId = story.getId();
+			String itemName = story.getTitle();
+			String description = story.getDescription();
+
+			DiggStoryItem item = new DiggStoryItem(itemId, itemName,
+					description);
+			item.print();
+
+			// additional fields
+			item.setLink(story.getLink());
+			item.setTopic(story.getTopic().getName());
+			if (story.getUser() != null) {
+				item.setUsername(story.getUser().getName());
+			}
+
+			items.add(item);
+		}
+		return items;
+	}
+
+	/**
+	 * Retrieves a set of stories submitted by user.
+	 * 
+	 * @param userId
+	 *            Digg username
+	 * @param maxStories
+	 *            max number of stories to retrieve
+	 * @return list of stories or empty list if the user doesn't have any.
+	 */
+	public List<DiggStoryItem> getUserStories(String userId, int maxStories) {
+		User user = new User(userId);
+		StoriesArguments args = new StoriesArguments();
+		args.setCount(maxStories);
+		List<Story> stories = jigg.getStories(user, args);
+		List<DiggStoryItem> items = new ArrayList<DiggStoryItem>();
+		for (Story story : stories) {
+			DiggStoryItem item = new DiggStoryItem(story.getId(),
+					story.getTitle(), story.getDescription());
+			item.setLink(story.getLink());
+			item.setTopic(story.getTopic().getName());
+			Container container = story.getContainer();
+			String categoryName = container.getName();
+			item.setCategory(categoryName);
+			if (story.getUser() != null) {
+				item.setUsername(story.getUser().getName());
+			}
+			items.add(item);
+		}
+		return items;
+	}
+
+	public void setItemCountPerCategory(int count) {
+		this.itemCountPerCategory = Math.min(MAX_ITEM_COUNT_PER_REQUEST, count);
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/content/digg/DiggStoryItem.java b/src/org/yooreeka/algos/reco/content/digg/DiggStoryItem.java
new file mode 100644
index 0000000..0f4a6c8
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/content/digg/DiggStoryItem.java
@@ -0,0 +1,109 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.content.digg;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.reco.collab.model.Item;
+
+public class DiggStoryItem extends Item {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 1924555535749825404L;
+
+	private String link;
+	private String description;
+	private String topic;
+	private String username;
+	private String category;
+
+	public DiggStoryItem(int storyId, String title, String description) {
+		super(storyId, title);
+		this.description = description;
+		String text = title + " " + description;
+		Content content = new Content(String.valueOf(storyId), text);
+		setItemContent(content);
+	}
+
+	public String getCategory() {
+		return category;
+	}
+
+	public String getDescription() {
+		return description;
+	}
+
+	public String getLink() {
+		return link;
+	}
+
+	public String getTitle() {
+		return getName();
+	}
+
+	public String getTopic() {
+		return topic;
+	}
+
+	public String getUsername() {
+		return username;
+	}
+
+	public void print() {
+		System.out
+				.println("---------------------------------------------------------------------");
+		System.out.println("Category: " + this.getCategory()
+				+ "     -- NewsCategory: " + this.getTopic());
+		System.out.println("Title: " + this.getTitle());
+		System.out
+				.println("_____________________________________________________________________");
+		System.out.println("Description:\n" + this.getDescription());
+		System.out
+				.println("_____________________________________________________________________");
+	}
+
+	public void setCategory(String category) {
+		this.category = category;
+	}
+
+	public void setLink(String link) {
+		this.link = link;
+	}
+
+	public void setTopic(String topic) {
+		this.topic = topic;
+	}
+
+	public void setUsername(String username) {
+		this.username = username;
+	}
+}
diff --git a/src/org/yooreeka/algos/reco/content/digg/DiggUser.java b/src/org/yooreeka/algos/reco/content/digg/DiggUser.java
new file mode 100644
index 0000000..1563869
--- /dev/null
+++ b/src/org/yooreeka/algos/reco/content/digg/DiggUser.java
@@ -0,0 +1,45 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.reco.content.digg;
+
+import org.yooreeka.algos.reco.collab.model.User;
+
+public class DiggUser extends User {
+
+	/**
+	 * SVUID
+	 */
+	private static final long serialVersionUID = 5334812189997430446L;
+
+	public DiggUser(int id, String name) {
+		super(id, name);
+	}
+}
diff --git a/src/org/yooreeka/algos/search/data/SearchResult.java b/src/org/yooreeka/algos/search/data/SearchResult.java
new file mode 100644
index 0000000..13fd76e
--- /dev/null
+++ b/src/org/yooreeka/algos/search/data/SearchResult.java
@@ -0,0 +1,180 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.data;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Custom wrapper for the search results.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class SearchResult {
+
+	/**
+	 * Sorts list in descending order of score value.
+	 */
+	public static void sortByScore(List<SearchResult> values) {
+		Collections.sort(values, new Comparator<SearchResult>() {
+			public int compare(SearchResult r1, SearchResult r2) {
+				int result = 0;
+				// sort based on score value
+				if (r1.getScore() < r2.getScore()) {
+					result = 1; // sorting in descending order
+				} else if (r1.getScore() > r2.getScore()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+	}
+	/**
+	 * Sorts array in descending order of score value.
+	 */
+	public static void sortByScore(SearchResult[] values) {
+		Arrays.sort(values, new Comparator<SearchResult>() {
+			public int compare(SearchResult r1, SearchResult r2) {
+				int result = 0;
+				// sort based on score value
+				if (r1.getScore() < r2.getScore()) {
+					result = 1; // sorting in descending order
+				} else if (r1.getScore() > r2.getScore()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+	}
+	private String docId;
+	private String docType;
+
+	private String title;
+
+	private String url;
+
+	private double score;
+
+	public SearchResult(String docId, String docType, String title, String url,
+			double score) {
+
+		this.docId = docId;
+		this.docType = docType;
+		this.title = title;
+		this.url = url;
+		this.score = score;
+	}
+
+	/**
+	 * @return the docId
+	 */
+	public String getDocId() {
+		return docId;
+	}
+
+	public String getDocType() {
+		return docType;
+	}
+
+	/**
+	 * @return the score
+	 */
+	public double getScore() {
+		return score;
+	}
+
+	/**
+	 * @return document title if available
+	 */
+	public String getTitle() {
+		return title;
+	}
+
+	/**
+	 * @return the url
+	 */
+	public String getUrl() {
+		return url;
+	}
+
+	public String print() {
+		StringBuilder strB = new StringBuilder();
+		// strB.append("Document ID    : ").append(docId).append("\n");
+		strB.append("Document Type: ").append(docType).append("\n");
+		strB.append("Document Title : ").append(title).append("\n");
+		strB.append("Document URL: ").append(url).append("  -->  ");
+		strB.append("Relevance Score: ").append(score).append("\n");
+		return strB.toString();
+	}
+
+	/**
+	 * @param docId
+	 *            the docId to set
+	 */
+	public void setDocId(String docId) {
+		this.docId = docId;
+	}
+
+	public void setDocType(String docType) {
+		this.docType = docType;
+	}
+
+	/**
+	 * @param score
+	 *            the score to set
+	 */
+	public void setScore(double score) {
+		this.score = score;
+	}
+
+	/**
+	 * @param title
+	 *            document title
+	 */
+	public void setTitle(String title) {
+		this.title = title;
+	}
+
+	/**
+	 * @param url
+	 *            the url to set
+	 */
+	public void setUrl(String url) {
+		this.url = url;
+	}
+}
diff --git a/src/org/yooreeka/algos/search/lucene/LuceneIndexBuilder.java b/src/org/yooreeka/algos/search/lucene/LuceneIndexBuilder.java
new file mode 100644
index 0000000..df86992
--- /dev/null
+++ b/src/org/yooreeka/algos/search/lucene/LuceneIndexBuilder.java
@@ -0,0 +1,152 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.lucene;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+import org.yooreeka.util.internet.crawling.core.CrawlData;
+import org.yooreeka.util.internet.crawling.core.CrawlDataProcessor;
+import org.yooreeka.util.internet.crawling.db.ProcessedDocsDB;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+public class LuceneIndexBuilder implements CrawlDataProcessor {
+
+	public static final String INDEX_FIELD_DOC_ID = "docid";
+	public static final String INDEX_FIELD_DOC_TYPE = "doctype";
+	public static final String INDEX_FIELD_CONTENT = "content";
+	public static final String INDEX_FIELD_TITLE = "title";
+	public static final String INDEX_FIELD_URL = "url";
+
+	private IndexWriter indexWriter;
+	private CrawlData crawlData;
+	private int RamBufferSizeMB = 128;
+
+	public LuceneIndexBuilder(File indexFile, CrawlData crawlData)
+			throws IOException {
+
+		this.crawlData = crawlData;
+
+		try {
+
+			indexWriter = getIndexWriter(indexFile);
+
+		} catch (IOException ioX) {
+			throw new RuntimeException("Error while creating lucene index: ",
+					ioX);
+		}
+	}
+
+	/* PRIVATE METHODS */
+	private void buildLuceneIndex(String groupId,
+			ProcessedDocsDB parsedDocsService) {
+
+		try {
+
+			List<String> docIdList = parsedDocsService.getDocumentIds(groupId);
+
+			for (String docId : docIdList) {
+				indexDocument(indexWriter,
+						parsedDocsService.loadDocument(docId));
+			}
+
+			indexWriter.close();
+
+		} catch (IOException ioX) {
+			throw new RuntimeException("Error while creating lucene index: ",
+					ioX);
+		}
+	}
+
+	private IndexWriter getIndexWriter(File file) throws IOException {
+		FSDirectory dir = FSDirectory.open(file);
+		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40,
+				new StandardAnalyzer(Version.LUCENE_40));
+		config.setOpenMode(OpenMode.CREATE_OR_APPEND);
+		config.setRAMBufferSizeMB(RamBufferSizeMB);
+		return new IndexWriter(dir, config);
+	}
+
+	private void indexDocument(IndexWriter iw, ProcessedDocument parsedDoc)
+			throws IOException {
+
+		org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
+
+		FieldType customType = new FieldType(TextField.TYPE_STORED);
+		customType.setStoreTermVectors(true);
+		customType.setStoreTermVectorPositions(true);
+		customType.setStoreTermVectorOffsets(false);
+
+		doc.add(new Field(INDEX_FIELD_CONTENT, parsedDoc.getText(), customType));
+
+		doc.add(new StringField(INDEX_FIELD_URL, parsedDoc.getDocumentURL(),
+				Field.Store.YES));
+
+		doc.add(new StringField(INDEX_FIELD_DOC_ID, parsedDoc.getDocumentId(),
+				Field.Store.YES));
+
+		doc.add(new TextField(INDEX_FIELD_TITLE, parsedDoc.getDocumentTitle(),
+				Field.Store.YES));
+
+		doc.add(new StringField(INDEX_FIELD_DOC_TYPE, parsedDoc
+				.getDocumentType(), Field.Store.YES));
+
+		/**
+		 * TODO: 2.2 -- The effect of boosting (Book Section 2.1.2)
+		 * 
+		 * Uncomment the lines below to demonstrate the effect of boosting
+		 */
+		// if ( parsedDoc.getDocumentId().equals("g1-d13")) {
+		// doc.setBoost(2);
+		// }
+
+		iw.addDocument(doc);
+	}
+
+	public void run() {
+		List<String> allGroups = crawlData.getProcessedDocsDB()
+				.getAllGroupIds();
+		for (String groupId : allGroups) {
+			buildLuceneIndex(groupId, crawlData.getProcessedDocsDB());
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/search/lucene/analyzer/CustomAnalyzer.java b/src/org/yooreeka/algos/search/lucene/analyzer/CustomAnalyzer.java
new file mode 100644
index 0000000..2bcbb08
--- /dev/null
+++ b/src/org/yooreeka/algos/search/lucene/analyzer/CustomAnalyzer.java
@@ -0,0 +1,113 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.lucene.analyzer;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ *
+ */
+public class CustomAnalyzer extends StopwordAnalyzerBase {
+
+	/** Default maximum allowed token length */
+	public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+	private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+	/**
+	 * An unmodifiable set containing some common English words that are usually
+	 * not useful for searching.
+	 */
+	public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+	private static final String[] ADDITIONAL_STOP_WORDS = { "should", "would",
+			"from", "up", "i", "s", "it", "his", "has", "he", "she", "her",
+			"said", "been", "being", "final", "now", "hour", "minute",
+			"second", "stop", "start", "first", "third", "fast", "slow",
+			"large", "small" };
+
+	private static CharArraySet MERGED_STOP_WORDS;
+
+	static {
+		MERGED_STOP_WORDS = new CharArraySet(Version.LUCENE_40,
+				STOP_WORDS_SET.size() + ADDITIONAL_STOP_WORDS.length, true);
+	}
+
+	public CustomAnalyzer(Version matchVersion) {
+		this(matchVersion, MERGED_STOP_WORDS);
+	}
+
+	/**
+	 * Builds an analyzer with the given stop words.
+	 * 
+	 * @param matchVersion
+	 *            Lucene version to match See
+	 *            {@link <a href="#version">above</a>}
+	 * @param stopWords
+	 *            stop words
+	 */
+	public CustomAnalyzer(Version matchVersion, CharArraySet stopWords) {
+
+		super(matchVersion, stopWords);
+	}
+
+	@Override
+	protected TokenStreamComponents createComponents(final String fieldName,
+			final Reader reader) {
+
+		final StandardTokenizer src = new StandardTokenizer(matchVersion,
+				reader);
+		src.setMaxTokenLength(maxTokenLength);
+		TokenStream tok = new StandardFilter(matchVersion, src);
+		tok = new LowerCaseFilter(matchVersion, tok);
+		tok = new StopFilter(matchVersion, tok, stopwords);
+		return new TokenStreamComponents(src, tok) {
+			@Override
+			protected void setReader(final Reader reader) throws IOException {
+				src.setMaxTokenLength(CustomAnalyzer.this.maxTokenLength);
+				super.setReader(reader);
+			}
+		};
+	}
+}
diff --git a/src/org/yooreeka/algos/search/lucene/analyzer/TextDocumentTerms.java b/src/org/yooreeka/algos/search/lucene/analyzer/TextDocumentTerms.java
new file mode 100644
index 0000000..8a3dd07
--- /dev/null
+++ b/src/org/yooreeka/algos/search/lucene/analyzer/TextDocumentTerms.java
@@ -0,0 +1,78 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.lucene.analyzer;
+
+import java.util.HashMap;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class TextDocumentTerms {
+
+	HashMap<String, Integer> tf;
+
+	public TextDocumentTerms(String text) {
+
+		String[] terms = text.split("\\s");
+
+		tf = new HashMap<String, Integer>(terms.length);
+
+		for (String s : terms) {
+
+			Integer f = tf.get(s);
+
+			if (f == null) {
+				// This string has not been added yet
+				tf.put(s, Integer.valueOf(1));
+			} else {
+				tf.put(s, ++f);
+			}
+		}
+	}
+
+	public String[] getTerms() {
+
+		String[] terms = new String[tf.size()];
+
+		int i = 0;
+
+		for (String s : tf.keySet()) {
+			terms[i] = s;
+			i++;
+		}
+		return terms;
+	}
+
+	public HashMap<String, Integer> getTf() {
+		return tf;
+	}
+}
diff --git a/src/org/yooreeka/algos/search/ranking/DocRankMatrixBuilder.java b/src/org/yooreeka/algos/search/ranking/DocRankMatrixBuilder.java
new file mode 100644
index 0000000..8619953
--- /dev/null
+++ b/src/org/yooreeka/algos/search/ranking/DocRankMatrixBuilder.java
@@ -0,0 +1,197 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.ranking;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.yooreeka.algos.search.lucene.analyzer.TextDocumentTerms;
+import org.yooreeka.util.internet.crawling.core.CrawlDataProcessor;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+public class DocRankMatrixBuilder implements CrawlDataProcessor {
+
+	// private final int TERMS_TO_KEEP = 3;
+
+	private int termsToKeep = 0;
+
+	private String indexDir;
+	private PageRankMatrixH matrixH;
+
+	public DocRankMatrixBuilder(String indexDir) {
+		this.indexDir = indexDir;
+	}
+
+	private PageRankMatrixH buildMatrixH(IndexReader idxR) throws IOException {
+
+		// only consider URLs that with fetched and parsed content
+		List<Integer> allDocs = getProcessedDocs(idxR);
+
+		PageRankMatrixH docMatrix = new PageRankMatrixH(allDocs.size());
+
+		for (int i = 0, n = allDocs.size(); i < n; i++) {
+
+			for (int j = 0, k = allDocs.size(); j < k; j++) {
+
+				double similarity = 0.0d;
+
+				Document docX = idxR.document(i);
+				String xURL = docX.get("url");
+
+				if (i == j) {
+
+					// Avoid shameless self-promotion ;-)
+					docMatrix.addLink(xURL, xURL, similarity);
+
+				} else {
+
+					TextDocumentTerms xDocumentTerms = new TextDocumentTerms(
+							docX.get("content"));
+
+					Document docY = idxR.document(j);
+					TextDocumentTerms yDocumentTerms = new TextDocumentTerms(
+							docY.get("content"));
+
+					similarity = getImportance(xDocumentTerms, yDocumentTerms);
+
+					// add link from docX to docY
+					String yURL = docY.get("url");
+
+					docMatrix.addLink(xURL, yURL, similarity);
+				}
+			}
+		}
+
+		docMatrix.calculate();
+
+		return docMatrix;
+	}
+
+	/*
+	 * Checks if the index entry belongs to the category that we want to use
+	 * DocRank on.
+	 */
+	private boolean eligibleForDocRank(String doctype) {
+		return ProcessedDocument.TYPE_MSWORD.equalsIgnoreCase(doctype);
+	}
+
+	public PageRankMatrixH getH() {
+		return matrixH;
+	}
+
+	/*
+	 * Calculates importance of document Y in the context of document X
+	 */
+	private double getImportance(TextDocumentTerms xTerms,
+			TextDocumentTerms yTerms) {
+
+		// sharedTerms is the intersection of the two sets
+		Set<String> sharedTerms = xTerms.getTf().keySet();
+		sharedTerms.retainAll(yTerms.getTf().keySet());
+
+		double sharedTermsSum = 0.0;
+
+		// Notice that this way of assigning importance is not symmetric.
+		// That is, if you swap X with Y then you get a different value;
+		// unless the frequencies are equal, of course!
+
+		double xF, yF;
+		for (String term : sharedTerms) {
+
+			xF = xTerms.getTf().get(term).doubleValue();
+			yF = yTerms.getTf().get(term).doubleValue();
+
+			sharedTermsSum += Math.round(Math.tanh(yF / xF));
+		}
+
+		return sharedTermsSum;
+	}
+
+	/*
+	 * Collects doc ids from the index for documents with matching doc type.
+	 */
+	private List<Integer> getProcessedDocs(IndexReader idxR) throws IOException {
+		List<Integer> docs = new ArrayList<Integer>();
+		for (int i = 0, n = idxR.maxDoc(); i < n; i++) {
+			if (idxR.hasDeletions() == false) {
+				Document doc = idxR.document(i);
+				if (eligibleForDocRank(doc.get("doctype"))) {
+					docs.add(i);
+				}
+			}
+		}
+		return docs;
+
+	}
+
+	// private Map<String, Integer> buildFreqMap(String[] terms, int[] freq) {
+	//
+	// int topNTermsToKeep = (termsToKeep == 0)? TERMS_TO_KEEP: termsToKeep;
+	//
+	// Map<String, Integer> freqMap =
+	// TermFreqMapUtils.getTopNTermFreqMap(terms, freq, topNTermsToKeep);
+	//
+	// return freqMap;
+	// }
+
+	/**
+	 * @return the termsToKeep
+	 */
+	public int getTermsToKeep() {
+		return termsToKeep;
+	}
+
+	public void run() {
+		try {
+			DirectoryReader idxR = DirectoryReader.open(new SimpleFSDirectory(
+					new File(indexDir)));
+			matrixH = buildMatrixH(idxR);
+		} catch (Exception e) {
+			throw new RuntimeException("Error while building matrix: ", e);
+		}
+	}
+
+	/**
+	 * @param termsToKeep
+	 *            the termsToKeep to set
+	 */
+	public void setTermsToKeep(int termsToKeep) {
+		this.termsToKeep = termsToKeep;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/search/ranking/PageRankMatrixBuilder.java b/src/org/yooreeka/algos/search/ranking/PageRankMatrixBuilder.java
new file mode 100644
index 0000000..0466e35
--- /dev/null
+++ b/src/org/yooreeka/algos/search/ranking/PageRankMatrixBuilder.java
@@ -0,0 +1,98 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.ranking;
+
+import java.util.List;
+import java.util.Set;
+
+import org.yooreeka.util.internet.crawling.core.CrawlData;
+import org.yooreeka.util.internet.crawling.core.CrawlDataProcessor;
+import org.yooreeka.util.internet.crawling.db.KnownUrlDB;
+import org.yooreeka.util.internet.crawling.db.PageLinkDB;
+import org.yooreeka.util.internet.crawling.model.KnownUrlEntry;
+
+public class PageRankMatrixBuilder implements CrawlDataProcessor {
+
+	// private static final Logger logger =
+	// Logger.getLogger(PageRankMatrixBuilder.class);
+
+	private PageRankMatrixH matrixH;
+	private CrawlData crawlData;
+
+	public PageRankMatrixBuilder(CrawlData crawlData) {
+		this.crawlData = crawlData;
+	}
+
+	private PageRankMatrixH buildMatrixH(KnownUrlDB knownUrlDB,
+			PageLinkDB pageLinkDB) {
+
+		// logger.info("starting calculation of matrix H...");
+
+		// only consider URLs that with fetched and parsed content
+		List<String> allProcessedUrls = knownUrlDB
+				.findProcessedUrls(KnownUrlEntry.STATUS_PROCESSED_SUCCESS);
+
+		PageRankMatrixH pageMatrix = new PageRankMatrixH(
+				allProcessedUrls.size());
+
+		for (String url : allProcessedUrls) {
+
+			// register url here in case it has no outlinks.
+			pageMatrix.addLink(url);
+
+			Set<String> pageOutlinks = pageLinkDB.getOutlinks(url);
+
+			for (String outlink : pageOutlinks) {
+
+				// only consider URLs with parsed content
+				if (knownUrlDB.isSuccessfullyProcessed(outlink)) {
+					pageMatrix.addLink(url, outlink);
+				}
+			}
+		}
+
+		pageMatrix.calculate();
+
+		// logger.info("matrix H is ready. Matrix size: " +
+		// pageMatrix.getMatrix().length);
+
+		return pageMatrix;
+	}
+
+	public PageRankMatrixH getH() {
+		return matrixH;
+	}
+
+	public void run() {
+		this.matrixH = buildMatrixH(crawlData.getKnownUrlsDB(),
+				crawlData.getPageLinkDB());
+	}
+}
diff --git a/src/org/yooreeka/algos/search/ranking/PageRankMatrixH.java b/src/org/yooreeka/algos/search/ranking/PageRankMatrixH.java
new file mode 100644
index 0000000..ed0e248
--- /dev/null
+++ b/src/org/yooreeka/algos/search/ranking/PageRankMatrixH.java
@@ -0,0 +1,184 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.ranking;
+
+import org.yooreeka.util.internet.crawling.util.ValueToIndexMapping;
+
+// Sub-stochastic matrix - some rows will have all zeros
+public class PageRankMatrixH {
+
+	private ValueToIndexMapping indexMapping = new ValueToIndexMapping();
+
+	double[][] matrix;
+
+	private int numberOfPagesWithNoLinks = 0;
+
+	public PageRankMatrixH(int nPages) {
+		matrix = new double[nPages][nPages];
+	}
+
+	/**
+	 * Just associate page url with an index. Used for pages that have no
+	 * outlinks.
+	 */
+	public void addLink(String pageUrl) {
+		indexMapping.getIndex(pageUrl);
+	}
+
+	public void addLink(String fromPageUrl, String toPageUrl) {
+		addLink(fromPageUrl, toPageUrl, 1);
+	}
+
+	public void addLink(String fromPageUrl, String toPageUrl, double weight) {
+		int i = indexMapping.getIndex(fromPageUrl);
+		int j = indexMapping.getIndex(toPageUrl);
+
+		try {
+
+			matrix[i][j] = weight;
+
+		} catch (ArrayIndexOutOfBoundsException e) {
+			System.out.println("fromPageUrl:" + fromPageUrl + ", toPageUrl: "
+					+ toPageUrl);
+		}
+	}
+
+	public void calculate() {
+
+		for (int i = 0, n = matrix.length; i < n; i++) {
+
+			double rowSum = 0;
+
+			for (int j = 0, k = matrix.length; j < k; j++) {
+
+				rowSum += matrix[i][j];
+			}
+
+			if (rowSum > 0) {
+
+				for (int j = 0, k = matrix.length; j < k; j++) {
+
+					if (matrix[i][j] > 0) {
+
+						matrix[i][j] = matrix[i][j] / rowSum;
+					}
+				}
+
+			} else {
+
+				numberOfPagesWithNoLinks++;
+			}
+		}
+	}
+
+	/**
+	 * A <B>dangling node</B> corresponds to a web page that has no outlinks.
+	 * These nodes result in a H row that has all its values equal to 0.
+	 */
+	public int[] getDangling() {
+
+		int n = getSize();
+		int[] d = new int[n];
+
+		boolean foundOne = false;
+
+		for (int i = 0; i < n; i++) {
+
+			for (int j = 0; j < n; j++) {
+
+				if (matrix[i][j] > 0) {
+					foundOne = true;
+					break;
+				}
+			}
+
+			if (foundOne) {
+				d[i] = 0;
+			} else {
+				d[i] = 1;
+			}
+
+			foundOne = false;
+		}
+
+		return d;
+	}
+
+	/**
+	 * @return the indexMapping
+	 */
+	public ValueToIndexMapping getIndexMapping() {
+		return indexMapping;
+	}
+
+	public double[][] getMatrix() {
+		return matrix;
+	}
+
+	public int getNumberOfPagesWithNoLinks() {
+		return this.numberOfPagesWithNoLinks;
+	}
+
+	public int getSize() {
+		return matrix.length;
+	}
+
+	public void print() {
+
+		StringBuilder txt = new StringBuilder("H Matrix\n\n");
+
+		for (int i = 0, n = matrix.length; i < n; i++) {
+			txt.append("Index: ").append(i);
+			txt.append("  -->  ");
+			txt.append("Page ID: ").append(indexMapping.getValue(i));
+			txt.append("\n");
+		}
+
+		txt.append("\n").append("\n");
+
+		for (int i = 0, n = matrix.length; i < n; i++) {
+
+			for (int j = 0, k = matrix.length; j < k; j++) {
+
+				txt.append(" ");
+				txt.append(matrix[i][j]);
+
+				if (j < k - 1) {
+					txt.append(", ");
+				} else {
+					txt.append("\n");
+				}
+			}
+		}
+
+		System.out.println(txt.toString());
+	}
+}
diff --git a/src/org/yooreeka/algos/search/ranking/Rank.java b/src/org/yooreeka/algos/search/ranking/Rank.java
new file mode 100644
index 0000000..1f19d3b
--- /dev/null
+++ b/src/org/yooreeka/algos/search/ranking/Rank.java
@@ -0,0 +1,294 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.ranking;
+
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ *
+ */
+public abstract class Rank {
+
+	private static final Logger LOG = Logger.getLogger(Rank.class.getName());
+
+	public static final double DEFAULT_ALPHA   = 0.8;
+	public static final double DEFAULT_EPSILON = 0.001;
+
+	/**
+	 * This is the percentage of time that a random surfer follows the structure
+	 * of the web.
+	 */
+	private double alpha = DEFAULT_ALPHA;
+
+	/** This is the error tolerance for convergence */
+	private double epsilon = DEFAULT_EPSILON;
+
+	double[] pR;
+
+	public Rank() {
+		LOG.setLevel(YooreekaConfigurator.getLevel(Rank.class.getName()));
+	}
+
+	public void build() throws Exception {
+
+		// check the results
+		// getH().print();
+
+		findPageRank(alpha, epsilon);
+	}
+
+	public void findPageRank(double alpha, double epsilon) {
+
+		// auxiliary variable
+		PageRankMatrixH matrixH = getH();
+
+		// The H matrix has size nxn and the PageRank vector has size n
+		int n = matrixH.getSize();
+
+		// auxiliary variable
+		double inv_n = (double) 1 / n;
+
+		// This is the actual nxn matrix of double values
+		double[][] H = matrixH.getMatrix();
+
+		// A dummy variable that holds our error --
+		// arbitrarily set to an initial value of 1
+		double error = 1;
+
+		// This holds the values of the PageRank vector
+		pR = new double[n];
+
+		// This is a copy of the PageRank vector from the previous iteration
+		double[] tmpPR = new double[n];
+
+		// Set the initial values (ad hoc)
+		for (int i = 0; i < n; i++) {
+			pR[i] = inv_n;
+		}
+
+		/*
+		 * Book Section 2.3 -- Altering the H matrix: Dangling nodes
+		 */
+		double[][] dNodes = getDanglingNodeMatrix();
+
+		/**
+		 * TODO: 2.5 -- Altering the G matrix: Teleportation (Book Section 2.3)
+		 * 
+		 * The following code defines the contribution of the dangling nodes,
+		 * i.e. jumping randomly on a page that is not connected with the one
+		 * that our surfer is currently viewing
+		 * 
+		 * Notice that it is the same for all pages. An interesting variation of
+		 * the algorithm would introduce a "teleportation" contribution that
+		 * relates the probability of an arbitrary transition to the degree of
+		 * interest that a user has for the content of a page.
+		 * 
+		 * Exercise: Could that be done? If so, how? What problems can you see
+		 * with that variation?
+		 */
+		double tNodes = (1 - alpha) * inv_n;
+
+		// Replace the H matrix with the G matrix
+		for (int i = 0; i < n; i++) {
+
+			for (int j = 0; j < n; j++) {
+
+				H[i][j] = alpha * H[i][j] + dNodes[i][j] + tNodes;
+			}
+		}
+
+		// Iterate until convergence.
+
+		// A counter for our iterations
+		int k = 0;
+
+		// We have found the PageRank values if our error is smaller than
+		// epsilon
+		while (error >= epsilon) {
+
+			// Make a copy of the PageRank vector before we update it
+			for (int i = 0; i < n; i++) {
+				tmpPR[i] = pR[i];
+			}
+
+			double dummy = 0;
+			// Now we get the next point in the iteration
+			for (int i = 0; i < n; i++) {
+
+				dummy = 0;
+
+				for (int j = 0; j < n; j++) {
+
+					dummy += tmpPR[j] * H[j][i];
+				}
+
+				pR[i] = dummy;
+			}
+
+			// Get the error, so that we can check convergence
+			error = norm(pR, tmpPR);
+
+			// DEBUG ONLY: Display the progress
+			if (LOG.getLevel() == Level.FINE) {
+				LOG.fine("\n Iteration: " + k
+						+ ",   PageRank convergence error: " + error);
+				for (int i = 0; i < n; i++) {
+					LOG.fine("Index: " + i + " -->  PageRank: " + pR[i]);
+				}
+			}
+			// increase the value of the counter by one
+			k++;
+		}
+
+		// Report the final values
+
+		List<RelevanceScore> allRankings = new ArrayList<RelevanceScore>();
+		for (int i = 0; i < n; i++) {
+			String url = matrixH.getIndexMapping().getValue(i);
+			RelevanceScore r = new RelevanceScore(url, pR[i]);
+			allRankings.add(r);
+		}
+		RelevanceScore.sort(allRankings);
+		LOG.info("\n______________  Calculation Results  _______________\n");
+		LOG.info("\nIterations: " + k);
+		LOG.info("\n____________________________________________________\n");
+		for (RelevanceScore r : allRankings) {
+			LOG.info(MessageFormat.format(
+					"Page URL: %-42s  -->  Rank: %.15f\n", r.getId(),
+					r.getScore()));
+		}
+		LOG.info("\n____________________________________________________\n");
+	}
+
+	/**
+	 * @return the alpha
+	 */
+	public double getAlpha() {
+		return alpha;
+	}
+
+	/**
+	 * TODO: 2.4 -- Altering the G matrix: Dangling nodes (Book Section 2.3)
+	 * 
+	 * The following code defines the contribution of the dangling nodes, i.e.
+	 * nodes that do not link to any other node.
+	 * 
+	 * Notice that the 1/n contribution is arbitrary. Given that we have no
+	 * other information about the random surfer's habits or preferences, the
+	 * 1/n value is fair. However, an interesting variation would take into
+	 * account some statistics related to the number of visits a page gets.
+	 * 
+	 * Exercise: Change the algorithm, so that a dangling node's contribution
+	 * depends on some page visit statistic. You can practice with a small set
+	 * of pages and examine the effect on the ranking of the pages.
+	 */
+	private double[][] getDanglingNodeMatrix() {
+
+		PageRankMatrixH matrixH = getH();
+
+		int n = matrixH.getSize();
+
+		double inv_n = (double) 1 / n;
+
+		// The dangling node vector
+		int[] dangling = matrixH.getDangling();
+
+		double[][] dNodes = new double[n][n];
+
+		for (int i = 0; i < n; i++) {
+			for (int j = 0; j < n; j++) {
+
+				if (dangling[i] == 0) {
+					dNodes[i][j] = 0;
+				} else {
+					dNodes[i][j] = alpha * inv_n;
+				}
+			}
+		}
+
+		return dNodes;
+	}
+
+	/**
+	 * @return the epsilon
+	 */
+	public double getEpsilon() {
+		return epsilon;
+	}
+
+	public abstract PageRankMatrixH getH();
+
+	/**
+	 * @return the pR
+	 */
+	public double getPageRank(String url) {
+
+		int i = getH().getIndexMapping().getIndex(url);
+
+		return pR[i];
+	}
+
+	private double norm(double[] a, double[] b) {
+
+		double norm = 0;
+
+		int n = a.length;
+
+		for (int i = 0; i < n; i++) {
+			norm += Math.abs(a[i] - b[i]);
+		}
+		return norm;
+	}
+
+	/**
+	 * @param alpha
+	 *            the alpha to set
+	 */
+	public void setAlpha(double alpha) {
+		this.alpha = alpha;
+	}
+
+	/**
+	 * @param epsilon
+	 *            the epsilon to set
+	 */
+	public void setEpsilon(double epsilon) {
+		this.epsilon = epsilon;
+	}
+}
diff --git a/src/org/yooreeka/algos/search/ranking/RelevanceScore.java b/src/org/yooreeka/algos/search/ranking/RelevanceScore.java
new file mode 100644
index 0000000..2531d78
--- /dev/null
+++ b/src/org/yooreeka/algos/search/ranking/RelevanceScore.java
@@ -0,0 +1,78 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.ranking;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * Utility class that acts as a holder for double value and id of the object
+ * that this value corresponds.
+ */
+public class RelevanceScore {
+	/**
+	 * Sorts list in descending order of score value.
+	 */
+	public static void sort(List<RelevanceScore> values) {
+		Collections.sort(values, new Comparator<RelevanceScore>() {
+			public int compare(RelevanceScore r1, RelevanceScore r2) {
+				int result = 0;
+				// sort based on score value
+				if (r1.getScore() < r2.getScore()) {
+					result = 1; // sorting in descending order
+				} else if (r1.getScore() > r2.getScore()) {
+					result = -1;
+				} else {
+					result = 0;
+				}
+				return result;
+			}
+		});
+	}
+	private String id;
+
+	private double score;
+
+	public RelevanceScore(String id, double rank) {
+		this.id = id;
+		this.score = rank;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public double getScore() {
+		return score;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/search/util/TermFreqMapUtils.java b/src/org/yooreeka/algos/search/util/TermFreqMapUtils.java
new file mode 100644
index 0000000..69c2dc4
--- /dev/null
+++ b/src/org/yooreeka/algos/search/util/TermFreqMapUtils.java
@@ -0,0 +1,93 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.search.util;
+
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+
+public class TermFreqMapUtils {
+
+	public static Map<String, Integer> buildTermFreqMap(String[] keys,
+			int[] values) {
+		int n = keys.length;
+		Map<String, Integer> map = new HashMap<String, Integer>(n);
+
+		for (int i = 0; i < n; i++) {
+			map.put(keys[i], values[i]);
+		}
+
+		return map;
+	}
+
+	public static Map<String, Integer> getTopNTermFreqMap(String[] terms,
+			int[] frequencies, int topNTerms) {
+
+		Map<String, Integer> tfMap = TermFreqMapUtils.buildTermFreqMap(terms,
+				frequencies);
+		boolean descending = true;
+		String[] sortedTerms = TermFreqMapUtils.sortTermsByFrequencies(tfMap,
+				descending);
+		int n = Math.min(sortedTerms.length, topNTerms);
+		Map<String, Integer> topNTermFreqMap = new HashMap<String, Integer>();
+		for (int i = 0; i < n; i++) {
+			String key = sortedTerms[i];
+			Integer value = tfMap.get(sortedTerms[i]);
+			topNTermFreqMap.put(key, value);
+		}
+
+		return topNTermFreqMap;
+
+	}
+
+	public static String[] sortTermsByFrequencies(
+			final Map<String, Integer> tfMap, final boolean descending) {
+
+		String[] sortedTerms = tfMap.keySet().toArray(new String[tfMap.size()]);
+
+		Arrays.sort(sortedTerms, new Comparator<String>() {
+
+			public int compare(String key1, String key2) {
+				int v1 = tfMap.get(key1);
+				int v2 = tfMap.get(key2);
+				if (descending) {
+					return v2 - v1;
+				} else {
+					return v1 - v2;
+				}
+			}
+
+		});
+
+		return sortedTerms;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/bayesian/NaiveBayes.java b/src/org/yooreeka/algos/taxis/bayesian/NaiveBayes.java
new file mode 100644
index 0000000..c2d7f4e
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/bayesian/NaiveBayes.java
@@ -0,0 +1,327 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.bayesian;
+
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.yooreeka.algos.taxis.core.AttributeValue;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.config.YooreekaConfigurator;
+
+/**
+ * A basic implementation of the Naive Bayes algorithm.
+ * 
+ * The emphasis is on teaching the algorithm, not optimizing its performance.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class NaiveBayes implements Classifier {
+
+	private static final Logger LOG = Logger.getLogger(NaiveBayes.class.getName());
+
+	/**
+	 * You can use the NaiveBayes classifier in many occasions So, let's give it
+	 * a name to identify the instance of the Classifier.
+	 */
+	private String name;
+
+	/**
+	 * Every classifier needs a training set. Notice that both the name of the
+	 * classifier and its training set are intentionally set during the
+	 * Construction phase.
+	 * 
+	 * Once you created an instance of the NaiveBayes classifier you cannot set
+	 * its TrainingSet but you can always get the reference to it and add
+	 * instances.
+	 */
+	protected TrainingSet tSet;
+
+	/**
+	 * These are the probabilities for each concept
+	 */
+	protected Map<Concept, Double> conceptPriors;
+
+	/**
+	 * This structure contains the fundamental calculation elements of the Naive
+	 * Bayes method, i.e. the conditional probabilities.
+	 */
+	protected Map<Concept, Map<Attribute, AttributeValue>> p;
+
+	/**
+	 * These are the attribute indices that we should consider for training
+	 */
+	protected ArrayList<String> attributeList;
+
+	/** An auxiliary variable */
+	protected boolean verbose = false;
+
+	/**
+	 * The only constructor for this classifier takes a name and a training set
+	 * as arguments.
+	 * 
+	 * @param name
+	 *            the name of the classifier
+	 * @param set
+	 *            the training set for this classifier
+	 */
+	public NaiveBayes(String name, TrainingSet set) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(NaiveBayes.class.getName()));
+
+		this.name = name;
+		tSet = set;
+
+		conceptPriors = new HashMap<Concept, Double>(tSet.getNumberOfConcepts());
+		verbose = false;
+	}
+
+	/**
+	 * Strictly speaking these are not the prior probabilities but just the
+	 * counts. However, we want to reuse these counts and the priors can be
+	 * obtained by a simple division.
+	 */
+	private void calculateConceptPriors() {
+
+		for (Concept c : tSet.getConceptSet()) {
+
+			// Calculate the priors for the concepts
+			int totalConceptCount = 0;
+
+			for (Instance i : tSet.getInstances().values()) {
+
+				if (i.getConcept().equals(c)) {
+					totalConceptCount++;
+				}
+			}
+
+			conceptPriors.put(c, new Double(totalConceptCount));
+		}
+	}
+
+	protected void calculateConditionalProbabilities() {
+
+		p = new HashMap<Concept, Map<Attribute, AttributeValue>>();
+
+		for (Instance i : tSet.getInstances().values()) {
+
+			for (Attribute a : i.getAtrributes()) {
+
+				if (a != null && attributeList.contains(a.getName())) {
+
+					if (p.get(i.getConcept()) == null) {
+
+						p.put(i.getConcept(),
+								new HashMap<Attribute, AttributeValue>());
+
+					}
+
+					Map<Attribute, AttributeValue> aMap = p.get(i.getConcept());
+					AttributeValue aV = aMap.get(a);
+					if (aV == null) {
+
+						aV = new AttributeValue(a.getValue());
+						aMap.put(a, aV);
+
+					} else {
+						aV.count();
+					}
+				}
+			}
+		}
+	}
+
+	public Concept classify(Instance instance) {
+
+		Concept bestConcept = null;
+		double bestP = 0.0;
+
+		if (tSet == null || tSet.getConceptSet().size() == 0) {
+			throw new IllegalStateException("You have to train classifier first.");
+		}
+
+		LOG.finest("\n*** Classifying instance: " + instance.toString() + "\n");
+
+		for (Concept c : tSet.getConceptSet()) {
+			
+			double p = getProbability(c, instance);
+			
+			LOG.fine(MessageFormat.format("P(%s|%s) = %.15f\n", c.getName(), instance.toString(), p));
+			
+			if (p >= bestP) {
+				bestConcept = c;
+				bestP = p;
+			}
+		}
+		return bestConcept;
+	}
+
+	/**
+	 * @return the name
+	 */
+	public String getName() {
+		return name;
+	}
+
+	public double getProbability(Concept c) {
+		Double trInstanceCount = conceptPriors.get(c);
+		if (trInstanceCount == null) {
+			trInstanceCount = 0.0;
+		}
+		return trInstanceCount / tSet.getSize();
+	}
+
+	/**
+	 * This method calculates the <I>posterior probability</I> that we deal with
+	 * concept <CODE>c</CODE> provided that we observed instance <CODE>i</CODE>.
+	 * This is the application of Bayes theorem.
+	 * 
+	 * @param c
+	 *            is a probable concept for instance <CODE>i</CODE>
+	 * @param i
+	 *            is the observed instance
+	 * @return posterior probability of <CODE>c</CODE> given instance
+	 *         <CODE>i</CODE>
+	 */
+	public double getProbability(Concept c, Instance i) {
+
+		double cP = 0;
+
+		if (tSet.getConceptSet().contains(c)) {
+
+			cP = (getProbability(i, c) * getProbability(c)) / getProbability(i);
+
+		} else {
+			// We have never seen this concept before
+			// assign to it a "reasonable" value
+			cP = 1 / (tSet.getNumberOfConcepts() + 1.0);
+		}
+
+		return cP;
+	}
+
+	/**
+	 * This method calculates the denumerator of Bayes theorem
+	 * 
+	 * @param <CODE>Instance</CODE> i
+	 * @return the probability of observing <CODE>Instance</CODE> i
+	 */
+	public double getProbability(Instance i) {
+
+		double cP = 0;
+
+		for (Concept c : getTset().getConceptSet()) {
+
+			cP += getProbability(i, c) * getProbability(c);
+		}
+		return (cP == 0) ? (double) 1 / tSet.getSize() : cP;
+	}
+
+	public double getProbability(Instance i, Concept c) {
+
+		double cP = 1;
+
+		for (Attribute a : i.getAtrributes()) {
+
+			if (a != null && attributeList.contains(a.getName())) {
+
+				Map<Attribute, AttributeValue> aMap = p.get(c);
+				AttributeValue aV = aMap.get(a);
+				if (aV == null) {
+					// the specific attribute value is not present for the
+					// current concept.
+					// Can you justify the following estimate?
+					// Can you think of a better choice?
+					cP *= ((double) 1 / (tSet.getSize() + 1));
+				} else {
+					cP *= (aV.getCount() / conceptPriors.get(c));
+				}
+			}
+		}
+
+		return (cP == 1) ? (double) 1 / tSet.getNumberOfConcepts() : cP;
+	}
+
+	/**
+	 * @return the tSet
+	 */
+	public TrainingSet getTset() {
+		return tSet;
+	}
+
+	/**
+	 * Training simply sets the probability for each concept
+	 * 
+	 */
+	public boolean train() {
+
+		long t0 = System.currentTimeMillis();
+
+		boolean hasTrained = false;
+
+		if (attributeList == null || attributeList.size() == 0) {
+
+			String msg = "Can't train the classifier without specifying the attributes"+
+						 " for training!\n"+
+					     "Use the method --> trainOnAttribute(Attribute a)";
+			throw new IllegalStateException(msg);
+
+		} else {
+
+			calculateConceptPriors();
+
+			calculateConditionalProbabilities();
+
+			hasTrained = true;
+		}
+
+		LOG.fine("       Naive Bayes training completed in ");
+		LOG.fine((System.currentTimeMillis() - t0) + " (ms)");
+		
+		return hasTrained;
+	}
+
+	public void trainOnAttribute(String aName) {
+
+		if (attributeList == null) {
+			attributeList = new ArrayList<String>();
+		}
+
+		attributeList.add(aName);
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/boosting/BoostingARCX4Classifier.java b/src/org/yooreeka/algos/taxis/boosting/BoostingARCX4Classifier.java
new file mode 100644
index 0000000..e19cc93
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/boosting/BoostingARCX4Classifier.java
@@ -0,0 +1,190 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.boosting;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.algos.taxis.ensemble.ClassifierEnsemble;
+import org.yooreeka.algos.taxis.ensemble.ConceptMajorityVoter;
+
+public abstract class BoostingARCX4Classifier extends ClassifierEnsemble {
+
+	private TrainingSet originalTSet;
+
+	private int classifierPopulation = 2;
+
+	public BoostingARCX4Classifier(String name, TrainingSet tSet) {
+		super(name);
+		this.originalTSet = tSet;
+	}
+
+	public TrainingSet buildTSet(TrainingSet tSet, double[] w) {
+
+		WeightBasedRandom wRnd = new WeightBasedRandom(w);
+
+		int n = w.length;
+
+		Instance[] sample = new Instance[n];
+
+		Map<Integer, Instance> instances = tSet.getInstances();
+
+		for (int i = 0; i < n; i++) {
+			int instanceIndex = wRnd.nextInt();
+			sample[i] = instances.get(instanceIndex);
+		}
+
+		return new TrainingSet(sample);
+	}
+
+	@Override
+	public Concept classify(Instance instance) {
+
+		ConceptMajorityVoter voter = new ConceptMajorityVoter(instance);
+
+		for (Classifier baseClassifier : baseClassifiers) {
+
+			Concept c = baseClassifier.classify(instance);
+
+			voter.addVote(c);
+		}
+
+		if (verbose) {
+			voter.print();
+		}
+
+		return voter.getWinner();
+	}
+
+	public abstract Classifier getClassifierForTraining(TrainingSet set);
+
+	/**
+	 * @return the classifierPopulation
+	 */
+	public int getClassifierPopulation() {
+		return classifierPopulation;
+	}
+
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	/**
+	 * @param classifierPopulation
+	 *            the classifierPopulation to set
+	 */
+	public void setClassifierPopulation(int classifierPopulation) {
+		this.classifierPopulation = classifierPopulation;
+	}
+
+	@Override
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+	@Override
+	public boolean train() {
+
+		baseClassifiers = new ArrayList<Classifier>();
+
+		int size = originalTSet.getSize();
+
+		/*
+		 * Weights that define sample selection
+		 */
+		double[] w = new double[size];
+
+		/*
+		 * Number of times instance was misclassified by classifiers that are
+		 * currently in ensemble.
+		 */
+		int[] m = new int[size];
+
+		double w0 = 1.0 / size;
+
+		Arrays.fill(w, w0);
+		Arrays.fill(m, 0);
+
+		for (int i = 0; i < classifierPopulation; i++) {
+			if (verbose) {
+				System.out.println("Instance weights: " + Arrays.toString(w));
+				System.out.println("Instance misclassifications: "
+						+ Arrays.toString(m));
+			}
+
+			TrainingSet tSet = buildTSet(originalTSet, w);
+
+			Classifier baseClassifier = getClassifierForTraining(tSet);
+
+			baseClassifier.train();
+
+			updateWeights(originalTSet, w, m, baseClassifier);
+
+			baseClassifiers.add(baseClassifier);
+		}
+
+		return true;
+	}
+
+	public void updateWeights(TrainingSet tSet, double[] w, int[] m,
+			Classifier baseClassifier) {
+
+		int n = w.length;
+
+		// update misclassification counts with results from latest classifier
+		for (int i = 0; i < n; i++) {
+			Instance instance = tSet.getInstance(i);
+			Concept actualConcept = baseClassifier.classify(instance);
+			Concept expectedConcept = instance.getConcept();
+			if (actualConcept == null
+					|| !(actualConcept.getName().equals(expectedConcept
+							.getName()))) {
+				m[i]++;
+			}
+		}
+
+		// update weights
+		double sum = 0.0;
+		for (int i = 0; i < n; i++) {
+			sum += (1.0 + Math.pow(m[i], 4));
+		}
+
+		for (int i = 0; i < n; i++) {
+			w[i] = (1.0 + Math.pow(m[i], 4)) / sum;
+		}
+
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/boosting/WeightBasedRandom.java b/src/org/yooreeka/algos/taxis/boosting/WeightBasedRandom.java
new file mode 100644
index 0000000..c047577
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/boosting/WeightBasedRandom.java
@@ -0,0 +1,80 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.boosting;
+
+import java.util.Random;
+
+public class WeightBasedRandom {
+
+	private double[] w;
+
+	private Random rnd;
+
+	/**
+	 * Creates a new pseudorandom number generator. Distribution and range of
+	 * numbers is defined by array of weights.
+	 * 
+	 * @param w
+	 *            weights that define distribution. All weights should add up to
+	 *            1.
+	 */
+	public WeightBasedRandom(double[] w) {
+		this.w = w;
+		this.rnd = new Random();
+	}
+
+	/*
+	 * Returns next pseudorandom integer between 0 and w.length distributed
+	 * according to weights.
+	 */
+	public int nextInt() {
+
+		/*
+		 * Pseudorandom, uniformly distributed double value between 0.0 and 1.0
+		 */
+		double x = rnd.nextDouble();
+
+		double cdf = 0.0;
+
+		int y = 0;
+
+		for (int i = 0, n = w.length; i < n; i++) {
+			cdf = cdf + w[i];
+			y = i;
+			if (cdf >= x) {
+				break;
+			}
+		}
+
+		return y;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/core/AttributeValue.java b/src/org/yooreeka/algos/taxis/core/AttributeValue.java
new file mode 100644
index 0000000..2b89aa9
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/AttributeValue.java
@@ -0,0 +1,113 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class AttributeValue {
+
+	private Object value;
+
+	private int count;
+
+	public AttributeValue(Object value) {
+		this.value = value;
+		count = 1;
+	}
+
+	public void count() {
+		count++;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.lang.Object#equals(java.lang.Object)
+	 */
+	@Override
+	public boolean equals(Object obj) {
+
+		final AttributeValue other = (AttributeValue) obj;
+
+		if (obj == null) {
+			return false;
+		}
+
+		if (getClass() != obj.getClass()) {
+			return false;
+		}
+
+		if (this == obj) {
+			return true;
+		}
+
+		if (value == null) {
+
+			if (other.value != null) {
+				return false;
+			}
+
+		} else if (!value.equals(other.value)) {
+
+			return false;
+		}
+
+		return true;
+	}
+
+	/**
+	 * @return the count
+	 */
+	public int getCount() {
+		return count;
+	}
+
+	// OVERRIDEN METHODS
+
+	/**
+	 * @return the value
+	 */
+	public Object getValue() {
+		return value;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.lang.Object#toString()
+	 */
+	@Override
+	public String toString() {
+		return "Attribute value: " + value + " was found " + count + "times";
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/core/BaseConcept.java b/src/org/yooreeka/algos/taxis/core/BaseConcept.java
new file mode 100644
index 0000000..5b1e263
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/BaseConcept.java
@@ -0,0 +1,124 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core;
+
+import java.util.ArrayList;
+
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class BaseConcept implements Concept {
+
+	private String name;
+	private BaseConcept parent;
+
+	private ArrayList<Instance> instances = new ArrayList<Instance>();
+
+	public BaseConcept(String name) {
+		this.name = name;
+	}
+
+	public BaseConcept(String name, BaseConcept parent) {
+		this.name = name;
+		this.parent = parent;
+	}
+
+	public synchronized void addInstance(Instance i) {
+		instances.add(i);
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+
+		final BaseConcept other = (BaseConcept) obj;
+
+		if (this == obj) {
+			return true;
+		}
+
+		if (!(obj instanceof BaseConcept)) {
+			return false;
+		}
+
+		if (name == null) {
+			if (other.name != null) {
+				return false;
+			}
+		} else if (!name.equals(other.name)) {
+			return false;
+		}
+
+		if (parent == null) {
+			if (other.parent != null) {
+				return false;
+			}
+		} else if (!parent.equals(other.parent)) {
+			return false;
+		}
+
+		return true;
+	}
+
+	public Instance[] getInstances() {
+		return instances.toArray(new Instance[instances.size()]);
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public Concept getParent() {
+		return parent;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + ((name == null) ? 0 : name.hashCode());
+		result = prime * result + ((parent == null) ? 0 : parent.hashCode());
+		return result;
+	}
+
+	public void setParent(BaseConcept parent) {
+		this.parent = parent;
+	}
+
+	@Override
+	public String toString() {
+		return name;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/core/BaseInstance.java b/src/org/yooreeka/algos/taxis/core/BaseInstance.java
new file mode 100644
index 0000000..796b2fc
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/BaseInstance.java
@@ -0,0 +1,239 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class BaseInstance implements Instance {
+
+	public static BaseInstance createInstance(String conceptName,
+			String[] attrNames, String[] attrValues) {
+		int n = attrNames.length;
+		StringAttribute[] attributes = new StringAttribute[n];
+		for (int i = 0; i < n; i++) {
+			attributes[i] = new StringAttribute(attrNames[i], attrValues[i]);
+		}
+
+		Concept concept = new BaseConcept(conceptName);
+		return new BaseInstance(concept, attributes);
+	}
+	protected Concept concept;
+
+	protected StringAttribute[] attributes;
+
+	public BaseInstance() {
+		// DO NOTHING
+	}
+
+	/**
+	 * @param concept
+	 * @param attributes
+	 */
+	public BaseInstance(Concept concept, StringAttribute[] attributes) {
+		this.concept = concept;
+		this.attributes = attributes;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see java.lang.Object#equals(java.lang.Object)
+	 */
+	@Override
+	public boolean equals(Object obj) {
+
+		final BaseInstance other = (BaseInstance) obj;
+
+		// Check the basics first
+		if (this == obj) {
+			return true;
+		}
+
+		if ((getClass() != obj.getClass()) || obj == null) {
+			return false;
+		}
+
+		// Check the concept
+		if (concept == null) {
+			if (other.concept != null) {
+				return false;
+			}
+		} else {
+			if (!concept.equals(other.concept)) {
+				return false;
+			}
+		}
+
+		// Finally check all the attributes
+		for (int i = 0; i < attributes.length; i++) {
+			if (attributes[i] == null) {
+				if (other.attributes[i] != null) {
+					return false;
+				}
+			} else {
+				if (!attributes[i].getName().equals(
+						other.attributes[i].getName())) {
+					return false;
+				} else {
+					if (!attributes[i].getValue().equals(
+							other.attributes[i].getValue())) {
+						return false;
+					}
+				}
+			}
+		}
+		return true;
+	}
+
+	public Attribute[] getAtrributes() {
+		return attributes;
+	}
+
+	public StringAttribute getAttribute(int i) {
+		return attributes[i];
+	}
+
+	public Attribute getAttributeByName(String attrName) {
+		Attribute matchedAttribute = null;
+
+		if (attributes != null) {
+			for (Attribute a : attributes) {
+				if (attrName.equalsIgnoreCase(a.getName())) {
+					matchedAttribute = a;
+					break;
+				}
+			}
+		}
+
+		return matchedAttribute;
+	}
+
+	public Concept getConcept() {
+		return concept;
+	}
+
+	public BaseInstance[] load(BufferedReader bR) throws IOException {
+
+		ArrayList<BaseInstance> baseInstances = new ArrayList<BaseInstance>();
+
+		String line;
+		boolean hasMoreLines = true;
+
+		while (hasMoreLines) {
+
+			line = bR.readLine();
+
+			if (line == null) {
+
+				hasMoreLines = false;
+
+			} else {
+
+				String[] data = line.split(",");
+
+				int n = data.length;
+
+				StringAttribute[] attributes = new StringAttribute[n - 1];
+
+				for (int i = 0; i < n - 1; i++) {
+					attributes[i] = new StringAttribute("a-" + i, data[i]);
+				}
+
+				// The last value is assumed to be the class/concept
+
+				baseInstances.add(new BaseInstance(
+						new BaseConcept(data[n - 1]), attributes));
+			}
+		}
+
+		return baseInstances.toArray(new BaseInstance[baseInstances.size()]);
+	}
+
+	/**
+	 * This method loads the training instances for the user clicks.
+	 * 
+	 * @param fileName
+	 *            the name of the file that contains the user clicks
+	 * @throws IOException
+	 */
+	public BaseInstance[] load(String fileName) throws IOException {
+
+		File file = new File(fileName);
+		FileReader fReader = new FileReader(file);
+		BufferedReader bR = new BufferedReader(fReader);
+
+		return load(bR);
+	}
+
+	/**
+	 * Pretty print the information for this Instance
+	 */
+	public void print() {
+
+		if (attributes != null) {
+			for (Attribute a : attributes) {
+
+				if (a == null || a.getName() == null) {
+					System.out.print(" -  <NULL ATTRIBUTE> ");
+				} else {
+					if (a.getValue() == null) {
+						System.out.print(" -  <NULL ATTRIBUTE VALUE> ");
+					} else {
+						System.out.print(" -  " + a.getName() + " = "
+								+ a.getValue());
+					}
+				}
+			}
+		}
+
+		System.out.println(" -->  " + getConcept().getName());
+	}
+
+	/**
+	 * @param concept
+	 *            the concept to set
+	 */
+	public void setConcept(Concept concept) {
+		this.concept = concept;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/core/DoubleAttribute.java b/src/org/yooreeka/algos/taxis/core/DoubleAttribute.java
new file mode 100644
index 0000000..6ce39d0
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/DoubleAttribute.java
@@ -0,0 +1,96 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+
+public class DoubleAttribute implements Attribute {
+
+	public static final Double DEFAULT_VALUE = 0.0;
+
+	String name;
+	Double value;
+
+	public DoubleAttribute(String name, Double value) {
+		this.name = name;
+		this.value = value;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final DoubleAttribute other = (DoubleAttribute) obj;
+		if (name == null) {
+			if (other.name != null)
+				return false;
+		} else if (!name.equals(other.name))
+			return false;
+		if (value == null) {
+			if (other.value != null)
+				return false;
+		} else if (!value.equals(other.value))
+			return false;
+		return true;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch2.data.Attribute#getName()
+	 */
+	public String getName() {
+		return name;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch2.data.Attribute#getValue()
+	 */
+	public Object getValue() {
+		return value;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + ((name == null) ? 0 : name.hashCode());
+		result = prime * result + ((value == null) ? 0 : value.hashCode());
+		return result;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/core/StringAttribute.java b/src/org/yooreeka/algos/taxis/core/StringAttribute.java
new file mode 100644
index 0000000..2750ef0
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/StringAttribute.java
@@ -0,0 +1,108 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class StringAttribute implements Attribute {
+
+	public static final String DEFAULT_VALUE = "*";
+
+	String name;
+	String value;
+
+	public StringAttribute(String name, String value) {
+		this.name = name;
+		this.value = value;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final StringAttribute other = (StringAttribute) obj;
+		if (name == null) {
+			if (other.name != null)
+				return false;
+		} else if (!name.equals(other.name))
+			return false;
+		if (value == null) {
+			if (other.value != null)
+				return false;
+		} else if (!value.equals(other.value))
+			return false;
+		return true;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch2.data.Attribute#getName()
+	 */
+	public String getName() {
+		return name;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch2.data.Attribute#getValue()
+	 */
+	public Object getValue() {
+		return value;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + ((name == null) ? 0 : name.hashCode());
+		result = prime * result + ((value == null) ? 0 : value.hashCode());
+		return result;
+	}
+
+	/**
+	 * @param value
+	 *            the value to set
+	 */
+	public void setValue(String value) {
+		this.value = value;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/core/TrainingSet.java b/src/org/yooreeka/algos/taxis/core/TrainingSet.java
new file mode 100644
index 0000000..b678ff8
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/TrainingSet.java
@@ -0,0 +1,173 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class TrainingSet implements Serializable {
+
+	/**
+	 * A unique ID, just in case that we want to serialize our training
+	 * instanceSet.
+	 */
+	private static final long serialVersionUID = 4754213130190809633L;
+
+	/**
+	 * @return the serialVersionUID
+	 */
+	public static long getSerialVersionUID() {
+		return serialVersionUID;
+	}
+
+	private boolean verbose = false;
+	/**
+	 * TODO: 5.x -- Training set management (Book Section 2.4.1 and 5.7)
+	 * 
+	 * For large training sets, it may be beneficial to serialize them and store
+	 * them because loading a large training instanceSet is computationally
+	 * expensive.
+	 * 
+	 * How would you go about merging two training sets? What problems do you
+	 * foresee?
+	 */
+	private HashMap<Integer, Instance> instanceSet;
+	private HashSet<Concept> conceptSet;
+
+	private HashSet<String> attributeNameSet;
+
+	public TrainingSet() {
+
+		instanceSet = new HashMap<Integer, Instance>();
+	}
+
+	public TrainingSet(Instance[] instances) {
+
+		int instanceId = 0;
+
+		instanceSet = new HashMap<Integer, Instance>();
+		conceptSet = new HashSet<Concept>();
+		attributeNameSet = new HashSet<String>();
+
+		Concept c;
+		for (Instance i : instances) {
+
+			// System.out.println("Instance Added: ");
+			// i.print();
+
+			instanceSet.put(instanceId, i);
+
+			c = i.getConcept();
+			if (!conceptSet.contains(c)) {
+
+				conceptSet.add(c);
+			}
+
+			for (Attribute a : i.getAtrributes()) {
+				if (a != null) {
+					attributeNameSet.add(a.getName());
+				}
+			}
+
+			instanceId++;
+		}
+
+		if (verbose) {
+			System.out
+					.println("-------------------------------------------------------------");
+			System.out.print("Loaded " + getSize()
+					+ " instances that belong into ");
+			System.out.println(this.getNumberOfConcepts() + " concepts");
+			System.out
+					.println("-------------------------------------------------------------");
+		}
+	}
+
+	public HashSet<String> getAttributeNameSet() {
+		return attributeNameSet;
+	}
+
+	/**
+	 * @return the conceptSet
+	 */
+	public HashSet<Concept> getConceptSet() {
+		return conceptSet;
+	}
+
+	public Instance getInstance(int index) {
+		return instanceSet.get(index);
+	}
+
+	/**
+	 * @return the instanceSet
+	 */
+	public HashMap<Integer, Instance> getInstances() {
+		return instanceSet;
+	}
+
+	public int getNumberOfConcepts() {
+		return conceptSet.size();
+	}
+
+	/**
+	 * @return the size of the instanceSet
+	 */
+	public int getSize() {
+		return instanceSet.size();
+	}
+
+	/**
+	 * @return the verbose
+	 */
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	public void print() {
+
+		for (Instance i : instanceSet.values()) {
+			i.print();
+		}
+	}
+
+	/**
+	 * @param verbose
+	 *            the verbose to set
+	 */
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/core/intf/Attribute.java b/src/org/yooreeka/algos/taxis/core/intf/Attribute.java
new file mode 100644
index 0000000..d77d5f4
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/intf/Attribute.java
@@ -0,0 +1,42 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core.intf;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public interface Attribute {
+
+	public String getName();
+
+	public Object getValue();
+}
diff --git a/src/org/yooreeka/algos/taxis/core/intf/Classifier.java b/src/org/yooreeka/algos/taxis/core/intf/Classifier.java
new file mode 100644
index 0000000..238ffad
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/intf/Classifier.java
@@ -0,0 +1,51 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core.intf;
+
+/**
+ * Every classifier must be:
+ * <UL>
+ * <LI>able to load a <CODE>TrainingSet</CODE>, and</LI>
+ * <LI>able to classify an <CODE>Instance</CODE></LI>
+ * </UL>
+ * 
+ * This interface reflects these two elementary methods.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public interface Classifier {
+
+	public Concept classify(Instance instance);
+
+	public String getName();
+
+	public boolean train();
+}
diff --git a/src/org/yooreeka/algos/taxis/core/intf/Concept.java b/src/org/yooreeka/algos/taxis/core/intf/Concept.java
new file mode 100644
index 0000000..6db245b
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/intf/Concept.java
@@ -0,0 +1,44 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core.intf;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public interface Concept {
+
+	public Instance[] getInstances();
+
+	public String getName();
+
+	public Concept getParent();
+}
diff --git a/src/org/yooreeka/algos/taxis/core/intf/Instance.java b/src/org/yooreeka/algos/taxis/core/intf/Instance.java
new file mode 100644
index 0000000..4917912
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/core/intf/Instance.java
@@ -0,0 +1,46 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.core.intf;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public interface Instance {
+
+	public Attribute[] getAtrributes();
+
+	public Attribute getAttributeByName(String attrName);
+
+	public Concept getConcept();
+
+	public void print();
+}
diff --git a/src/org/yooreeka/algos/taxis/ensemble/ClassifierEnsemble.java b/src/org/yooreeka/algos/taxis/ensemble/ClassifierEnsemble.java
new file mode 100644
index 0000000..3fdc717
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/ensemble/ClassifierEnsemble.java
@@ -0,0 +1,106 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.ensemble;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+/**
+ * Base implementation for bagging classifier.
+ */
+public abstract class ClassifierEnsemble implements Classifier {
+
+	public enum ClassifierMemberType {
+		NEURAL_NETWORK, DECISION_TREE, NAIVE_BAYES
+	}
+
+	protected String name;
+
+	protected boolean verbose = false;
+
+	protected List<Classifier> baseClassifiers = new ArrayList<Classifier>();
+
+	public ClassifierEnsemble(String name) {
+		this.name = name;
+	}
+
+	public void addMember(Classifier baseClassifier) {
+		baseClassifiers.add(baseClassifier);
+	}
+
+	public Concept classify(Instance instance) {
+
+		ConceptMajorityVoter voter = new ConceptMajorityVoter(instance);
+
+		for (Classifier baseClassifier : baseClassifiers) {
+
+			Concept c = baseClassifier.classify(instance);
+
+			voter.addVote(c);
+		}
+
+		if (verbose) {
+			voter.print();
+		}
+
+		return voter.getWinner();
+	}
+
+	public int getEnsemblePopulation() {
+		return baseClassifiers.size();
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public void removeMember(Classifier c) {
+		baseClassifiers.remove(c);
+	}
+
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+	public boolean train() {
+
+		for (Classifier c : baseClassifiers) {
+			// training base classifier
+			c.train();
+		}
+
+		return true;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/ensemble/ConceptMajorityVoter.java b/src/org/yooreeka/algos/taxis/ensemble/ConceptMajorityVoter.java
new file mode 100644
index 0000000..3cc3bcf
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/ensemble/ConceptMajorityVoter.java
@@ -0,0 +1,87 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.ensemble;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class ConceptMajorityVoter {
+
+	private Map<Concept, Integer> votes = new HashMap<Concept, Integer>();
+
+	private Instance i;
+
+	public ConceptMajorityVoter(Instance i) {
+		this.i = i;
+	}
+
+	public void addVote(Concept c) {
+
+		Integer conceptVoteCount = votes.get(c);
+
+		if (conceptVoteCount == null) {
+			conceptVoteCount = new Integer(1);
+		} else {
+			conceptVoteCount = conceptVoteCount + 1;
+
+		}
+		votes.put(c, conceptVoteCount);
+	}
+
+	public Concept getWinner() {
+
+		int winnerVoteCount = 0;
+		Concept winnerConcept = null;
+
+		for (Map.Entry<Concept, Integer> e : votes.entrySet()) {
+			if (e.getValue() > winnerVoteCount) {
+				winnerConcept = e.getKey();
+				winnerVoteCount = e.getValue();
+			}
+		}
+
+		return winnerConcept;
+	}
+
+	public int getWinnerVoteCount() {
+		Concept winner = getWinner();
+		return votes.get(winner);
+	}
+
+	public void print() {
+		System.out.println("Votes for instace [" + i + "] : " + votes);
+		System.out.println("Winner concept: " + getWinner());
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/evaluation/ClassifierResults.java b/src/org/yooreeka/algos/taxis/evaluation/ClassifierResults.java
new file mode 100644
index 0000000..cf4053b
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/evaluation/ClassifierResults.java
@@ -0,0 +1,70 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.evaluation;
+
+public class ClassifierResults {
+	private String classifierId;
+	private boolean[] results;
+	private int nCorrect;
+
+	public ClassifierResults(String classifierId, int n) {
+		this.classifierId = classifierId;
+		this.results = new boolean[n];
+		this.nCorrect = 0;
+	}
+
+	public double getAccuracy() {
+		return (double) nCorrect / (double) results.length;
+	}
+
+	public String getClassifierId() {
+		return classifierId;
+	}
+
+	public int getN() {
+		return results.length;
+	}
+
+	public int getNCorrect() {
+		return nCorrect;
+	}
+
+	public boolean getResult(int i) {
+		return results[i];
+	}
+
+	public void setResult(int i, boolean value) {
+		results[i] = value;
+		if (value) {
+			nCorrect++;
+		}
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/evaluation/CochransQTest.java b/src/org/yooreeka/algos/taxis/evaluation/CochransQTest.java
new file mode 100644
index 0000000..dc354a8
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/evaluation/CochransQTest.java
@@ -0,0 +1,128 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.evaluation;
+
+public class CochransQTest extends Test {
+
+	private double q = 0.0;
+
+	private ClassifierResults c1;
+	private ClassifierResults c2;
+	private ClassifierResults c3;
+
+	private double L = 3.0;
+
+	public CochransQTest(ClassifierResults c1, ClassifierResults c2,
+			ClassifierResults c3) {
+		this.c1 = c1;
+		this.c2 = c2;
+		this.c3 = c3;
+
+		setStatisticSymbol("Q");
+
+		// Confidence interval: 0.05
+		// Null hypothesis: classifiers are the same
+		// Degrees of freedom L - 1 = 2
+		// Rejected if q > 5.991
+		setThreshold(5.991);
+
+		calculate();
+	}
+
+	@Override
+	protected void calculate() {
+		int n = c1.getN();
+
+		/*
+		 * Total number of correct classifications among all classifiers.
+		 */
+		double T = calculateT();
+
+		double T2 = 0.0;
+
+		for (int i = 0; i < n; i++) {
+			double x = 0.0;
+
+			if (c1.getResult(i)) {
+				x++;
+			}
+			if (c2.getResult(i)) {
+				x++;
+			}
+			if (c3.getResult(i)) {
+				x++;
+			}
+
+			T2 += (x * x);
+		}
+
+		double sum = 0.0;
+		sum = (double) c1.getNCorrect() * c1.getNCorrect()
+				+ (double) c2.getNCorrect() * c2.getNCorrect()
+				+ (double) c3.getNCorrect() * c3.getNCorrect();
+
+		double a = L * sum;
+
+		q = (L - 1) * (a - T * T) / (L * T - T2);
+
+		setStatisticValue(q);
+	}
+
+	/*
+	 * Calculates total number of correct classifications among all classifiers.
+	 */
+	private int calculateT() {
+		return c1.getNCorrect() + c2.getNCorrect() + c3.getNCorrect();
+	}
+
+	// public boolean different() {
+	// return isDifferent(q,getThreshold());
+	// }
+	//
+	@Override
+	public void evaluate() {
+		print("_____________________________________________________");
+
+		print("Evaluating classifiers " + c1.getClassifierId() + ", "
+				+ c2.getClassifierId() + ", " + c3.getClassifierId() + ":");
+		print("_____________________________________________________");
+		print(c1.getClassifierId() + " accuracy: " + c1.getAccuracy());
+		print(c2.getClassifierId() + " accuracy: " + c2.getAccuracy());
+		print(c3.getClassifierId() + " accuracy: " + c3.getAccuracy());
+		print("_____________________________________________________");
+
+		print("Confidence Interval             : 0.05");
+		print("Degrees of Freedom              : 2");
+		print("Statistic threshold (chi-square): 5.991");
+
+		// printResult("Q",q,different());
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/evaluation/Diff2PropTest.java b/src/org/yooreeka/algos/taxis/evaluation/Diff2PropTest.java
new file mode 100644
index 0000000..91bedd7
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/evaluation/Diff2PropTest.java
@@ -0,0 +1,84 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.evaluation;
+
+public class Diff2PropTest extends Test {
+
+	double z = 0.0;
+
+	private ClassifierResults c1;
+	private ClassifierResults c2;
+
+	public Diff2PropTest(ClassifierResults c1, ClassifierResults c2) {
+		this.c1 = c1;
+		this.c2 = c2;
+
+		setStatisticSymbol("|z|");
+
+		/*
+		 * Confidence interval: 0.05 Null hypothesis - classifiers are the same
+		 * Null hypothesis is rejected if |z| > 1.96
+		 */
+		setThreshold(1.96);
+
+		calculate();
+	}
+
+	@Override
+	protected void calculate() {
+
+		double n = c1.getN();
+		double p = 0.5 * (c1.getAccuracy() + c2.getAccuracy());
+		double a = c1.getAccuracy() - c2.getAccuracy();
+		double b = (2.0 * p * (1 - p)) / n;
+		z = a / Math.sqrt(b);
+
+		setStatisticValue(Math.abs(z));
+	}
+
+	@Override
+	public void evaluate() {
+
+		print("_____________________________________________________");
+		print("Evaluating classifiers " + c1.getClassifierId() + " and "
+				+ c2.getClassifierId() + ":");
+
+		print("_____________________________________________________");
+		print(c1.getClassifierId() + " accuracy: " + c1.getAccuracy());
+		print(c2.getClassifierId() + " accuracy: " + c2.getAccuracy());
+		print("_____________________________________________________");
+
+		print("Confidence Interval             : 0.05");
+		print("Statistic threshold (Std Normal): 1.96");
+
+		printResult();
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/evaluation/FTest.java b/src/org/yooreeka/algos/taxis/evaluation/FTest.java
new file mode 100644
index 0000000..5714e8c
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/evaluation/FTest.java
@@ -0,0 +1,182 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.evaluation;
+
+public class FTest extends Test {
+
+	private double F = 0.0;
+
+	private ClassifierResults c1;
+	private ClassifierResults c2;
+	private ClassifierResults c3;
+
+	private double L = 3.0;
+
+	public FTest(ClassifierResults c1, ClassifierResults c2,
+			ClassifierResults c3) {
+		this.c1 = c1;
+		this.c2 = c2;
+		this.c3 = c3;
+
+		setStatisticSymbol("F");
+
+		// For test size: 500
+		// Confidence interval: 0.05
+		// Null hypothesis: classifiers are the same
+		// Degrees of freedom: L - 1 = 2, 2 * (N - 1) = 2 * 499 = 998
+		// F Distribution
+		// Rejected if F > 3.08
+		//
+		// Tabulated values can be found at:
+		// http://www.itl.nist.gov/div898/handbook/eda/section3/eda3673.htm
+
+		setThreshold(3.08);
+
+		calculate();
+	}
+
+	@Override
+	protected void calculate() {
+
+		/*
+		 * Classifier accuracies: <Number of Correct classifications> / N
+		 */
+		double p1 = c1.getAccuracy();
+		double p2 = c2.getAccuracy();
+		double p3 = c3.getAccuracy();
+
+		/*
+		 * Combined accuracy across all classifiers: T / (L * N)
+		 */
+		double p = calculateCombinedAccuracy();
+
+		/*
+		 * Number of test samples.
+		 */
+		double N = c1.getN();
+
+		/*
+		 * SSA
+		 */
+		double SSA = N * (p1 * p1 + p2 * p2 + p3 * p3 - L * p * p);
+
+		/*
+		 * SSB
+		 */
+		double sumOfjL2 = calculateSumOfjL2();
+		double SSB = sumOfjL2 / L - L * N * p;
+
+		/*
+		 * SST
+		 */
+		double SST = N * L * p * (1 - p);
+
+		/*
+		 * SSAB
+		 */
+		double SSAB = SST - SSA - SSB;
+
+		/*
+		 * MSA
+		 */
+		double MSA = SSA / (L - 1);
+		/*
+		 * MSAB
+		 */
+		double MSAB = SSAB / ((L - 1) * (N - 1));
+
+		/*
+		 * F
+		 */
+		F = MSA / MSAB;
+
+		setStatisticValue(F);
+	}
+
+	/*
+	 * Accuracy based on combined results from all classifiers.
+	 */
+	private double calculateCombinedAccuracy() {
+		double nCorrect = c1.getNCorrect() + c2.getNCorrect()
+				+ c3.getNCorrect();
+		double nAll = c1.getN() + c2.getN() + c3.getN();
+		return nCorrect / nAll;
+	}
+
+	/*
+	 * Calculates sum of jL squares. Where jL is the number of classifiers that
+	 * correctly classified instance j.
+	 */
+	private double calculateSumOfjL2() {
+		int n = c1.getN();
+
+		double sumjL2 = 0.0;
+
+		for (int j = 0; j < n; j++) {
+			double jL = 0.0;
+
+			if (c1.getResult(j)) {
+				jL++;
+			}
+			if (c2.getResult(j)) {
+				jL++;
+			}
+			if (c3.getResult(j)) {
+				jL++;
+			}
+
+			sumjL2 += (jL * jL);
+		}
+
+		return sumjL2;
+	}
+
+	@Override
+	public void evaluate() {
+		print("_____________________________________________________");
+		print("Evaluating classifiers " + c1.getClassifierId() + ", "
+				+ c2.getClassifierId() + ", " + c3.getClassifierId() + ":");
+
+		print("_____________________________________________________");
+		print(c1.getClassifierId() + " accuracy: " + c1.getAccuracy());
+		print(c2.getClassifierId() + " accuracy: " + c2.getAccuracy());
+		print(c3.getClassifierId() + " accuracy: " + c3.getAccuracy());
+		print("_____________________________________________________");
+
+		print("Confidence Interval     : 0.05");
+		print("Degrees of Freedom (1st): 2");
+		print("Degrees of Freedom (2nd): 39998");
+		print("Statistic threshold     : 3.08");
+
+		printResult();
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/evaluation/McNemarTest.java b/src/org/yooreeka/algos/taxis/evaluation/McNemarTest.java
new file mode 100644
index 0000000..23d6d39
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/evaluation/McNemarTest.java
@@ -0,0 +1,118 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.evaluation;
+
+public class McNemarTest extends Test {
+
+	private double chi2 = 0.0;
+
+	private ClassifierResults c1;
+	private ClassifierResults c2;
+
+	/*
+	 * Using 'n??' notation. First '?' represents result for first classifier.
+	 * Second '?' represents result for the second classifier. 0 -
+	 * misclassification, 1 - correct classification.
+	 */
+
+	private int n11 = 0; // both classifiers were correct
+	private int n10 = 0; // first is correct, second incorrect
+	private int n01 = 0; // first incorrect, second correct
+	private int n00 = 0; // both incorrect
+
+	public McNemarTest(ClassifierResults c1, ClassifierResults c2) {
+		this.c1 = c1;
+		this.c2 = c2;
+
+		setStatisticSymbol("Chi^2");
+
+		// using level of significance 0.05, 1 degree of freedom:
+		// reject null hypothesis if chi2 > 3.841
+		setThreshold(3.841);
+
+		calculate();
+	}
+
+	@Override
+	protected void calculate() {
+		int n = c1.getN();
+
+		for (int i = 0; i < n; i++) {
+			if (c1.getResult(i) && c2.getResult(i)) {
+				n11++;
+			} else if (c1.getResult(i) && !c2.getResult(i)) {
+				n10++;
+			} else if (!c1.getResult(i) && c2.getResult(i)) {
+				n01++;
+			} else {
+				n00++;
+			}
+		}
+
+		double a = Math.abs(n01 - n10) - 1;
+		chi2 = a * a / (n01 + n10);
+
+		setStatisticValue(chi2);
+	}
+
+	@Override
+	public void evaluate() {
+
+		print("_____________________________________________________");
+		print("Evaluating classifiers " + c1.getClassifierId() + " and "
+				+ c2.getClassifierId() + ":");
+
+		print("_____________________________________________________");
+		print(c1.getClassifierId() + " accuracy: " + c1.getAccuracy());
+		print(c2.getClassifierId() + " accuracy: " + c2.getAccuracy());
+		print("N = " + c1.getN() + ", n00=" + n00 + ", n10=" + n10 + ", n01="
+				+ n01 + ", n11=" + n11);
+		print("_____________________________________________________");
+
+		print("Confidence Interval             : 0.05");
+		print("Degrees of Freedom              : 1");
+		print("Statistic threshold (Chi-square): 3.841");
+
+		printResult();
+	}
+
+	public int getN00() {
+		return n00;
+	}
+
+	public int getN10() {
+		return n10;
+	}
+
+	public int getN11() {
+		return n11;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/evaluation/Test.java b/src/org/yooreeka/algos/taxis/evaluation/Test.java
new file mode 100644
index 0000000..fb43549
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/evaluation/Test.java
@@ -0,0 +1,105 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.evaluation;
+
+public abstract class Test {
+
+	private String statisticSymbol;
+
+	protected double statisticValue;
+	private double threshold;
+	public Test() {
+		super();
+	}
+
+	protected abstract void calculate();
+
+	public abstract void evaluate();
+
+	public String getStatisticSymbol() {
+		return statisticSymbol;
+	}
+
+	public double getStatisticValue() {
+		return statisticValue;
+	}
+
+	public double getThreshold() {
+		return threshold;
+	}
+
+	protected boolean isDifferent(double statistic, double threshold) {
+		if (statistic > threshold) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	protected void print(String val) {
+		System.out.print("      ");
+		System.out.println(val);
+	}
+
+	protected void printResult() {
+
+		boolean btmp = isDifferent(statisticValue, threshold);
+
+		String tmp;
+
+		if (btmp) {
+			tmp = " > ";
+		} else {
+			tmp = " <= ";
+		}
+
+		print("________________________________________________________");
+
+		print(statisticSymbol + " value is " + statisticValue + "which is "
+				+ tmp + threshold);
+
+		print("The two classifiers are different: "
+				+ String.valueOf(btmp).toUpperCase());
+	}
+
+	protected void setStatisticSymbol(String statisticSymbol) {
+		this.statisticSymbol = statisticSymbol;
+	}
+
+	protected void setStatisticValue(double statisticValue) {
+		this.statisticValue = statisticValue;
+	}
+
+	protected void setThreshold(double threshold) {
+		this.threshold = threshold;
+	}
+
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/XORNetwork.java b/src/org/yooreeka/algos/taxis/networks/neural/XORNetwork.java
new file mode 100644
index 0000000..3dd2cc7
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/XORNetwork.java
@@ -0,0 +1,163 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural;
+
+import java.util.Arrays;
+
+import org.yooreeka.algos.taxis.networks.neural.core.BaseNN;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Layer;
+
+public class XORNetwork extends BaseNN {
+
+	private static final long serialVersionUID = -511246579251846775L;
+
+	private static final double TINY_NUMBER = 0.00001d;
+
+	public static void main(String[] args) {
+		XORNetwork nn = new XORNetwork("XOR Test");
+
+		nn.create();
+
+		System.out.println("Classification using untrained network:");
+
+		double[] x = { 0, 0 };
+		double[] y = nn.classify(x);
+
+		// Results before training
+
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+		x = new double[] { 0, 1 };
+		y = nn.classify(x);
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+		x = new double[] { 1, 0 };
+		y = nn.classify(x);
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+		x = new double[] { 1, 1 };
+		y = nn.classify(x);
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+		System.out.println("Training...");
+
+		double nearZero = 0;
+		for (int i = 0; i < 16 * 1024; i++) {
+
+			nn.train(new double[] { nearZero, nearZero }, new double[] { 0.0 });
+			nn.train(new double[] { 1 + nearZero, 1 + nearZero },
+					new double[] { 0.0 });
+			nn.train(new double[] { 1 + nearZero, nearZero },
+					new double[] { 1.0 });
+			nn.train(new double[] { nearZero, 1 + nearZero },
+					new double[] { 1.0 });
+
+			if (Math.random() < 0.5) {
+				nearZero = 0.0d + Math.random() * TINY_NUMBER;
+			} else {
+				nearZero = -(1.0d - Math.random() * TINY_NUMBER);
+			}
+
+			// nn.printWeights();
+		}
+
+		System.out.println("Trained");
+
+		// After training
+
+		System.out.println("Classification using trained network:");
+
+		x = new double[] { 0, 0 };
+		y = nn.classify(x);
+
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+		x = new double[] { 0, 1 };
+		y = nn.classify(x);
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+		x = new double[] { 1, 0 };
+		y = nn.classify(x);
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+		x = new double[] { 1, 1 };
+		y = nn.classify(x);
+		System.out.println(Arrays.toString(x) + " -> " + Arrays.toString(y));
+
+	}
+
+	public XORNetwork(String name) {
+		super(name);
+	}
+
+	/*
+	 * Creates: 2 -> 3 -> 1 network.
+	 */
+	public void create() {
+
+		// 1. Define Layers, Nodes and Node Biases
+		Layer inputLayer = createInputLayer(0, // layer id
+				2 // number of nodes
+		);
+
+		Layer hiddenLayer = createHiddenLayer(1, // layer id
+				3, // number of nodes
+				new double[] { 1, 1, 1 } // node biases
+		);
+
+		Layer outputLayer = createOutputLayer(2, // layer id
+				1, // number of nodes
+				new double[] { 2.25 } // node biases
+		);
+
+		setInputLayer(inputLayer);
+		setOutputLayer(outputLayer);
+		addHiddenLayer(hiddenLayer);
+
+		// 2. Define links and weights between nodes
+		// Id format: <layerId:nodeIdwithinLayer>
+		setLink("0:0", "1:0", 0.25);
+		setLink("0:0", "1:1", 0.5);
+		setLink("0:0", "1:2", 0.25);
+
+		setLink("0:1", "1:0", 0.25);
+		setLink("0:1", "1:1", 0.5);
+		setLink("0:1", "1:2", 0.25);
+
+		setLink("1:0", "2:0", 0.8);
+		setLink("1:1", "2:0", 0.4);
+		setLink("1:2", "2:0", 0.8);
+
+		System.out.println("NN created");
+
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/BaseLayer.java b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseLayer.java
new file mode 100644
index 0000000..699ad9a
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseLayer.java
@@ -0,0 +1,139 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Layer;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Link;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Node;
+
+public class BaseLayer implements Layer {
+
+	private static final long serialVersionUID = -1482668413756729940L;
+
+	private int layerId;
+	private List<Node> nodes;
+
+	public BaseLayer(int layerId) {
+		this.layerId = layerId;
+		this.nodes = new ArrayList<Node>();
+	}
+
+	public void addNode(Node n) {
+		nodes.add(n);
+	}
+
+	public void calculate() {
+		for (Node node : nodes) {
+			node.calculate();
+		}
+	}
+
+	public void calculateWeightAdjustments() {
+		for (Node node : nodes) {
+			node.calculateWeightAdjustments();
+		}
+	}
+
+	public int getId() {
+		return layerId;
+	}
+
+	public List<Node> getNodes() {
+		return nodes;
+	}
+
+	public String getType() {
+		return "";
+	}
+
+	public double[] getValues() {
+
+		double[] y = new double[nodes.size()];
+
+		for (int i = 0, n = y.length; i < n; i++) {
+			y[i] = nodes.get(i).getOutput();
+		}
+
+		return y;
+	}
+
+	public void printWeights() {
+		for (Node n : nodes) {
+			for (Link link : n.getInlinks()) {
+				System.out.println(link.getFromNode().getNodeId() + "->"
+						+ n.getNodeId() + ":" + link.getWeight());
+			}
+		}
+	}
+
+	public void propagate() {
+		for (Node node : nodes) {
+			node.propagate();
+		}
+	}
+
+	public void setExpectedOutputValues(double[] d) {
+		if (nodes.size() != d.length) {
+			throw new RuntimeException("Invalid layer configuration. "
+					+ "Layer id: " + layerId + ", Expected number of nodes: "
+					+ d.length + ", Actual number of nodes: " + nodes.size());
+		}
+
+		for (int i = 0, n = d.length; i < n; i++) {
+			Node node = nodes.get(i);
+			node.setExpectedOutput(d[i]);
+		}
+	}
+
+	public void setInputValues(double[] x) {
+		if (nodes.size() != x.length) {
+			throw new RuntimeException("Invalid layer configuration. "
+					+ "Layer id: " + layerId + ", Expected number of nodes: "
+					+ x.length + ", Actual number of nodes: " + nodes.size());
+		}
+
+		for (int i = 0, n = x.length; i < n; i++) {
+			Node node = nodes.get(i);
+			Link inlink = node.getInlinks().get(0);
+			inlink.setValue(x[i]);
+		}
+	}
+
+	public void updateWeights() {
+		for (Node node : nodes) {
+			node.updateWeights();
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/BaseLink.java b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseLink.java
new file mode 100644
index 0000000..0b9bf1d
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseLink.java
@@ -0,0 +1,85 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core;
+
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Link;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Node;
+
+public class BaseLink implements Link {
+
+	private static final long serialVersionUID = 6462508677299269035L;
+
+	private Node fromNode;
+	private Node toNode;
+	private double value;
+	private double weight;
+	private double weightDelta;
+
+	public Node getFromNode() {
+		return fromNode;
+	}
+
+	public Node getToNode() {
+		return toNode;
+	}
+
+	public double getValue() {
+		return value;
+	}
+
+	public double getWeight() {
+		return weight;
+	}
+
+	public double getWeightDelta() {
+		return weightDelta;
+	}
+
+	public void setFromNode(Node unit) {
+		this.fromNode = unit;
+	}
+
+	public void setToNode(Node unit) {
+		this.toNode = unit;
+	}
+
+	public void setValue(double x) {
+		this.value = x;
+	}
+
+	public void setWeight(double w) {
+		this.weight = w;
+	}
+
+	public void setWeightDelta(double dw) {
+		weightDelta = dw;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/BaseNN.java b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseNN.java
new file mode 100644
index 0000000..a824ade
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseNN.java
@@ -0,0 +1,429 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Layer;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Link;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.NeuralNetwork;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Node;
+
+public class BaseNN implements NeuralNetwork, java.io.Serializable {
+
+	private static final long serialVersionUID = -7859066535923217638L;
+
+	private static final double ERROR_THRESHOLD = 0.001d;
+	private static final double CONVERGENCE_THRESHOLD = 1E-10;
+	private static final double LEARNING_RATE = 0.25;
+
+	private boolean verbose = false;
+
+	/*
+	 * Network name
+	 */
+	private String name;
+
+	/*
+	 * Contains nodes that belong to input layer.
+	 */
+	private Layer inputLayer;
+
+	/*
+	 * Contains nodes that belong to output layer.
+	 */
+	private Layer outputLayer;
+
+	/*
+	 * 0..* hidden layers.
+	 */
+	private List<Layer> hiddenLayers;
+
+	private double learningRate = LEARNING_RATE;
+
+	/*
+     * 
+     */
+	private Map<String, Node> allNodes;
+
+	public BaseNN(String name) {
+		this.name = name;
+		this.hiddenLayers = new ArrayList<Layer>();
+		this.allNodes = new HashMap<String, Node>();
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#addHiddenLayer(iweb2.ch5.
+	 * classification.nn.intf.Layer)
+	 */
+	public void addHiddenLayer(Layer hiddenLayer) {
+		hiddenLayers.add(hiddenLayer);
+		for (Node node : hiddenLayer.getNodes()) {
+			addNode(node);
+		}
+	}
+
+	private void addNode(Node node) {
+		String nodeId = node.getNodeId();
+		if (allNodes.containsKey(nodeId)) {
+			throw new RuntimeException("Duplicate nodeId: " + nodeId);
+		}
+		allNodes.put(nodeId, node);
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#classify(double[])
+	 */
+	public double[] classify(double[] x) {
+
+		inputLayer.setInputValues(x);
+		inputLayer.calculate();
+		inputLayer.propagate();
+
+		for (Layer hLayer : hiddenLayers) {
+			hLayer.calculate();
+			hLayer.propagate();
+		}
+
+		outputLayer.calculate();
+		double[] y = outputLayer.getValues();
+		return y;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#createHiddenLayer(int,
+	 * int, double[])
+	 */
+	public Layer createHiddenLayer(int layerId, int nNodes, double[] bias) {
+		if (bias.length != nNodes) {
+			throw new RuntimeException("Each node should have bias defined.");
+		}
+		BaseLayer baseLayer = new BaseLayer(layerId);
+		for (int i = 0; i < nNodes; i++) {
+			Node node = createHiddenNode(layerId + ":" + i);
+			node.setBias(bias[i]);
+			baseLayer.addNode(node);
+		}
+		return baseLayer;
+	}
+
+	protected Node createHiddenNode(String nodeId) {
+		Node node = new SigmoidNode(nodeId);
+		node.setLearningRate(learningRate);
+		return node;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#createInputLayer(int, int)
+	 */
+	public Layer createInputLayer(int layerId, int nNodes) {
+
+		BaseLayer baseLayer = new BaseLayer(layerId);
+		for (int i = 0; i < nNodes; i++) {
+			Node node = createInputNode(layerId + ":" + i);
+			Link inlink = new BaseLink();
+			inlink.setFromNode(node);
+			inlink.setWeight(1.0);
+			node.addInlink(inlink);
+			baseLayer.addNode(node);
+		}
+
+		return baseLayer;
+	}
+
+	protected Node createInputNode(String nodeId) {
+		Node node = new LinearNode(nodeId);
+		node.setLearningRate(learningRate);
+		return node;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#createOutputLayer(int,
+	 * int, double[])
+	 */
+	public Layer createOutputLayer(int layerId, int nNodes, double[] bias) {
+		if (bias.length != nNodes) {
+			throw new RuntimeException("Each node should have bias defined.");
+		}
+
+		BaseLayer baseLayer = new BaseLayer(layerId);
+		for (int i = 0; i < nNodes; i++) {
+			Node node = createOutputNode(layerId + ":" + i);
+			node.setBias(bias[i]);
+			baseLayer.addNode(node);
+		}
+		return baseLayer;
+	}
+
+	protected Node createOutputNode(String nodeId) {
+		Node node = new LinearNode(nodeId);
+		node.setLearningRate(learningRate);
+		return node;
+	}
+
+	private double error(double[] expectedY, double[] actualY) {
+
+		double sum = 0.0;
+
+		for (int i = 0, n = expectedY.length; i < n; i++) {
+			sum += Math.pow(actualY[i] - expectedY[i], 2.0);
+		}
+
+		return sum / 2;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#getInputNodeCount()
+	 */
+	public int getInputNodeCount() {
+		return getNodeCount(this.inputLayer);
+	}
+
+	public double getLearningRate() {
+		return learningRate;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#getName()
+	 */
+	public String getName() {
+		return name;
+	}
+
+	private int getNodeCount(Layer layer) {
+		int nodeCount = 0;
+
+		if (layer != null) {
+			nodeCount = layer.getNodes().size();
+		}
+
+		return nodeCount;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#getOutputNodeCount()
+	 */
+	public int getOutputNodeCount() {
+		return getNodeCount(this.outputLayer);
+	}
+
+	/**
+	 * @return the verbose
+	 */
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#printWeights()
+	 */
+	public void printWeights() {
+		for (Layer layer : hiddenLayers) {
+			System.out.println(String.valueOf(layer.getId()) + ":");
+			layer.printWeights();
+		}
+		System.out.println(String.valueOf(outputLayer.getId()) + ":");
+		outputLayer.printWeights();
+	}
+
+	public void removeAllNodesAndLayers() {
+		this.allNodes.clear();
+		this.hiddenLayers.clear();
+		this.inputLayer = null;
+		this.outputLayer = null;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#setInputLayer(iweb2.ch5.
+	 * classification.nn.intf.Layer)
+	 */
+	public void setInputLayer(Layer inputLayer) {
+		this.inputLayer = inputLayer;
+		for (Node node : this.inputLayer.getNodes()) {
+			addNode(node);
+		}
+	}
+
+	public void setLearningRate(double learningRate) {
+		this.learningRate = learningRate;
+	}
+
+	public void setLink(String fromNodeId, String toNodeId, double w) {
+		Link link = new BaseLink();
+		Node fromNode = allNodes.get(fromNodeId);
+		if (fromNode == null) {
+			throw new RuntimeException("Unknown node id: " + fromNodeId);
+		}
+		Node toNode = allNodes.get(toNodeId);
+		if (toNode == null) {
+			throw new RuntimeException("Unknown node id: " + toNodeId);
+		}
+
+		link.setFromNode(fromNode);
+		link.setToNode(toNode);
+		link.setWeight(w);
+
+		fromNode.addOutlink(link);
+		toNode.addInlink(link);
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#setName(java.lang.String)
+	 */
+	public void setName(String name) {
+		this.name = name;
+	}
+
+	public void setOutputLayer(Layer outputLayer) {
+		this.outputLayer = outputLayer;
+		for (Node node : this.outputLayer.getNodes()) {
+			addNode(node);
+		}
+	}
+
+	/**
+	 * @param verbose
+	 *            the verbose to set
+	 */
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+	// trains NN with one training sample at a time
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch5.classification.nn.NeuralNetwork#train(double[], double[])
+	 */
+	public void train(double[] tX, double[] tY) {
+
+		double lastError = 0.0;
+		int i = 0;
+		while (true) {
+			i++;
+			// Evaluate sample
+			double[] y = classify(tX);
+
+			double err = error(tY, y);
+
+			if (Double.isInfinite(err) || Double.isNaN(err)) {
+				// Couldn't even evaluate the error. Stop.
+				throw new RuntimeException(
+						"Training failed. Couldn't evaluate the error: "
+								+ err
+								+ ". Try some other NN configuration, parameters.");
+			}
+
+			double convergence = Math.abs(err - lastError);
+
+			if (err <= ERROR_THRESHOLD) {
+				// Good enough. No need to adjust weights for this sample.
+				lastError = err;
+				if (verbose) {
+					System.out.print("Error Threshold: " + ERROR_THRESHOLD);
+					System.out.print(" |  Error Achieved: " + err);
+					System.out.print(" |  Number of Iterations: " + i);
+					System.out.println(" |  Absolute convergence: "
+							+ convergence);
+				}
+				break;
+			}
+
+			if (convergence <= CONVERGENCE_THRESHOLD) { // If we made almost no
+														// progress stop.
+				// No change. Stop.
+				if (verbose) {
+					System.out.print("Error Threshold: " + ERROR_THRESHOLD);
+					System.out.print(" |  Error Achieved: " + err);
+					System.out.print(" |  Number of Iterations: " + i);
+					System.out.println(" |  Absolute convergence: "
+							+ convergence);
+				}
+				break;
+			}
+
+			lastError = err;
+
+			// Set expected values so that we can determine the error
+			outputLayer.setExpectedOutputValues(tY);
+
+			/*
+			 * Calculate weight adjustments in the whole network
+			 */
+
+			outputLayer.calculateWeightAdjustments();
+
+			for (Layer hLayer : hiddenLayers) {
+				// layer order doesn't matter because we will update weights
+				// later
+				hLayer.calculateWeightAdjustments(); // WeightIncrements
+			}
+
+			/*
+			 * Update Weights
+			 */
+
+			outputLayer.updateWeights();
+
+			for (Layer hLayer : hiddenLayers) {
+				// layer order doesn't matter.
+				hLayer.updateWeights();
+			}
+		}
+		// System.out.println("i = " + i + ", err = " + lastError);
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/BaseNode.java b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseNode.java
new file mode 100644
index 0000000..26a26a7
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/BaseNode.java
@@ -0,0 +1,216 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Link;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Node;
+
+abstract class BaseNode implements Node {
+
+	private static final long serialVersionUID = 9035029651203728480L;
+
+	protected String nodeId;
+	protected double x; // input value
+	protected double y; // output value
+	protected double bias;
+	protected double biasDelta;
+	protected List<Link> inlinks;
+	protected List<Link> outlinks;
+
+	protected double learningRate;
+
+	/*
+	 * Used in training mode.
+	 */
+	private double expectedOutput;
+
+	public BaseNode(String nodeId) {
+		this.nodeId = nodeId;
+		this.inlinks = new ArrayList<Link>();
+		this.outlinks = new ArrayList<Link>();
+	}
+
+	public void addInlink(Link inlink) {
+		inlinks.add(inlink);
+	}
+
+	public void addOutlink(Link outlink) {
+		outlinks.add(outlink);
+	}
+
+	public void calculate() {
+		this.x = calculateActivation();
+		this.y = fireNeuron();
+	}
+
+	public double calculateActivation() {
+		double result = bias;
+		for (Link inL : inlinks) {
+			result += inL.getWeight() * inL.getValue();
+		}
+		x = result;
+		return x;
+	}
+
+	public void calculateWeightAdjustments() {
+		double err = getNodeError();
+
+		for (Link link : getInlinks()) {
+			double y = link.getValue();
+			double dW = learningRate * y * err;
+			link.setWeightDelta(link.getWeightDelta() + dW);
+		}
+
+		// Bias adjustments
+		setBiasDelta(getBiasDelta() + learningRate * 1 * err);
+	}
+
+	public abstract double fireNeuron();
+
+	public abstract double fireNeuronDerivative();
+
+	public double getBias() {
+		return bias;
+	}
+
+	public double getBiasDelta() {
+		return biasDelta;
+	}
+
+	public List<Link> getInlinks() {
+		return inlinks;
+	}
+
+	public double getLastInput() {
+		return x;
+	}
+
+	public double getLastOutput() {
+		return y;
+	}
+
+	public double getLearningRate() {
+		return this.learningRate;
+	}
+
+	//
+	public double getNodeError() {
+		// For output node
+		if (outlinks == null || outlinks.size() == 0) {
+			double d = expectedOutput;
+			/*
+			 * Assuming E = 1/2 * ( d - y )^2
+			 */
+			// return (d - y) * (1 - y) * y;
+			return (d - y) * fireNeuronDerivative();
+
+		} else { // for hidden node
+			double s = 0.0;
+
+			for (Link outlink : outlinks) {
+				Node node = outlink.getToNode();
+				s += node.getNodeError() * outlink.getWeight();
+			}
+
+			return fireNeuronDerivative() * s;
+		}
+	}
+
+	public String getNodeId() {
+		return nodeId;
+	}
+
+	public List<Link> getOutlinks() {
+		return outlinks;
+	}
+
+	public double getOutput() {
+		return y;
+	}
+
+	public double getOutputValue() {
+		return y;
+	}
+
+	// Should it be at the link level?
+	public double inputF(List<Link> inputs) {
+		if (inputs == null || inputs.size() == 0) {
+			return y;
+		} else {
+			double result = bias;
+			for (Link inL : inputs) {
+				result += inL.getWeight() * inL.getValue();
+			}
+			return result;
+		}
+	}
+
+	public void propagate() {
+		for (Link outL : outlinks) {
+			outL.setValue(y);
+		}
+	}
+
+	public void setBias(double b) {
+		this.bias = b;
+	}
+
+	public void setBiasDelta(double db) {
+		this.biasDelta = db;
+	}
+
+	public void setExpectedOutput(double d) {
+		this.expectedOutput = d;
+	}
+
+	public void setLearningRate(double learningRate) {
+		this.learningRate = learningRate;
+	}
+
+	public void setOutput(double y) {
+		this.y = y;
+	}
+
+	public void updateWeights() {
+
+		for (Link link : getInlinks()) {
+			link.setWeight(link.getWeight() + link.getWeightDelta());
+			link.setWeightDelta(0.0);
+		}
+
+		// Bias adjustments
+		setBias(getBias() + getBiasDelta());
+		setBiasDelta(0.0);
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/LinearNode.java b/src/org/yooreeka/algos/taxis/networks/neural/core/LinearNode.java
new file mode 100644
index 0000000..6a285e1
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/LinearNode.java
@@ -0,0 +1,59 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core;
+
+public class LinearNode extends BaseNode {
+
+	private static final long serialVersionUID = -6052548906001921511L;
+
+	private double a = 0.0;
+	private double b = 0.0;
+
+	public LinearNode(String nodeId) {
+		this(nodeId, 1.0, 0.0);
+	}
+
+	public LinearNode(String nodeId, double a, double b) {
+		super(nodeId);
+		this.a = a;
+		this.b = b;
+	}
+
+	@Override
+	public double fireNeuron() {
+		return a * x + b;
+	}
+
+	@Override
+	public double fireNeuronDerivative() {
+		return a;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/SigmoidNode.java b/src/org/yooreeka/algos/taxis/networks/neural/core/SigmoidNode.java
new file mode 100644
index 0000000..74a1f8d
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/SigmoidNode.java
@@ -0,0 +1,52 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core;
+
+public class SigmoidNode extends BaseNode {
+
+	private static final long serialVersionUID = 5289776407864851871L;
+
+	public SigmoidNode(String nodeId) {
+		super(nodeId);
+	}
+
+	@Override
+	public double fireNeuron() {
+		// Sigmoid
+		y = Math.tanh(x);
+		return y;
+	}
+
+	@Override
+	public double fireNeuronDerivative() {
+		return (1 - y * y);
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Layer.java b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Layer.java
new file mode 100644
index 0000000..5455825
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Layer.java
@@ -0,0 +1,57 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core.intf;
+
+import java.util.List;
+
+public interface Layer extends java.io.Serializable {
+	public void calculate();
+
+	public void calculateWeightAdjustments();
+
+	int getId();
+
+	List<Node> getNodes();
+
+	String getType(); // input, output, hidden
+
+	public double[] getValues();
+
+	public void printWeights();
+
+	public void propagate();
+
+	public void setExpectedOutputValues(double[] x);
+
+	public void setInputValues(double[] x);
+
+	public void updateWeights();
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Link.java b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Link.java
new file mode 100644
index 0000000..0353aee
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Link.java
@@ -0,0 +1,53 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core.intf;
+
+public interface Link extends java.io.Serializable {
+	Node getFromNode();
+
+	Node getToNode();
+
+	double getValue();
+
+	double getWeight();
+
+	double getWeightDelta();
+
+	void setFromNode(Node fromNode);
+
+	void setToNode(Node toNode);
+
+	void setValue(double x);
+
+	void setWeight(double w);
+
+	void setWeightDelta(double dw);
+}
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/intf/NeuralNetwork.java b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/NeuralNetwork.java
new file mode 100644
index 0000000..6f1430b
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/NeuralNetwork.java
@@ -0,0 +1,69 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core.intf;
+
+public interface NeuralNetwork {
+
+	public abstract void addHiddenLayer(Layer hiddenLayer);
+
+	public abstract double[] classify(double[] x);
+
+	public abstract Layer createHiddenLayer(int layerId, int nNodes,
+			double[] bias);
+
+	public abstract Layer createInputLayer(int layerId, int nNodes);
+
+	public abstract Layer createOutputLayer(int layerId, int nNodes,
+			double[] bias);
+
+	public abstract int getInputNodeCount();
+
+	/**
+	 * @return the name
+	 */
+	public abstract String getName();
+
+	public abstract int getOutputNodeCount();
+
+	public abstract void printWeights();
+
+	public abstract void setInputLayer(Layer inputLayer);
+
+	/**
+	 * @param name
+	 *            the name to set
+	 */
+	public abstract void setName(String name);
+
+	// trains NN with one training sample at a time
+	public abstract void train(double[] tX, double[] tY);
+
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Node.java b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Node.java
new file mode 100644
index 0000000..c9e6b16
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/networks/neural/core/intf/Node.java
@@ -0,0 +1,98 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.networks.neural.core.intf;
+
+import java.util.List;
+
+public interface Node extends java.io.Serializable {
+	void addInlink(Link inlink);
+
+	void addOutlink(Link outlink);
+
+	public void calculate();
+
+	/**
+	 * Net Activation
+	 * 
+	 * @return
+	 */
+	double calculateActivation();
+
+	void calculateWeightAdjustments();
+
+	/**
+	 * Activation function
+	 * 
+	 * @return
+	 */
+	double fireNeuron();
+
+	/**
+	 * Activation function derivative
+	 * 
+	 * @return
+	 */
+	double fireNeuronDerivative();
+
+	double getBias();
+
+	double getBiasDelta();
+
+	List<Link> getInlinks();
+
+	/*
+	 * Learning rate that will be used in training
+	 */
+	double getLearningRate();
+
+	double getNodeError();
+
+	String getNodeId();
+
+	List<Link> getOutlinks();
+
+	double getOutput();
+
+	public void propagate();
+
+	void setBias(double b);
+
+	void setBiasDelta(double bd);
+
+	/*
+	 * For backpropagation
+	 */
+	void setExpectedOutput(double d);
+
+	void setLearningRate(double learningRate);
+
+	void updateWeights();
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/AttributeDefinition.java b/src/org/yooreeka/algos/taxis/tree/AttributeDefinition.java
new file mode 100644
index 0000000..c549568
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/AttributeDefinition.java
@@ -0,0 +1,100 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class allows us to associate type with the attribute.
+ */
+public class AttributeDefinition implements java.io.Serializable {
+
+	private static final long serialVersionUID = -8446442452030956318L;
+
+	/**
+	 * Creates a copy of attribute definitions.
+	 * 
+	 * @param attrs
+	 *            original list of attributes.
+	 * @return new list.
+	 */
+	public static List<AttributeDefinition> copyAttributeDefs(
+			List<AttributeDefinition> attrs) {
+		return new ArrayList<AttributeDefinition>(attrs);
+	}
+
+	/**
+	 * Removes attribute definition with specified name from the list.
+	 * 
+	 * @param attrName
+	 *            attribute name to remove.
+	 * @param attributes
+	 *            list to remove from.
+	 */
+	public static void removeAttributeDef(String attrName,
+			List<AttributeDefinition> attributes) {
+
+		if (attrName != null) {
+			for (int i = 0, n = attributes.size(); i < n; i++) {
+				AttributeDefinition a = attributes.get(i);
+				if (attrName.equalsIgnoreCase(a.getName())) {
+					attributes.remove(i);
+					break;
+				}
+			}
+		}
+	}
+
+	/*
+	 * Attribute name
+	 */
+	private String name;
+
+	/*
+	 * Attribute can be described as continuous (has numeric values) or discrete
+	 * (has nominal/categorical values).
+	 */
+	private boolean isDiscrete;
+
+	public AttributeDefinition(String name, boolean isDiscrete) {
+		this.name = name;
+		this.isDiscrete = isDiscrete;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public boolean isDiscrete() {
+		return isDiscrete;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/AttributeSelector.java b/src/org/yooreeka/algos/taxis/tree/AttributeSelector.java
new file mode 100644
index 0000000..01476e9
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/AttributeSelector.java
@@ -0,0 +1,145 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.List;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class AttributeSelector implements java.io.Serializable {
+
+	private static final long serialVersionUID = 1722498208605607524L;
+
+	public AttributeSelector() {
+
+	}
+
+	/**
+	 * Evaluates all candidate attributes and chooses one that provides the best
+	 * split of the data.
+	 * 
+	 * @param data
+	 *            data that will be used to evaluate split quality.
+	 * @param candidateAttributes
+	 *            attributes to chose from.
+	 * 
+	 * @return information about selected attribute along with the data for
+	 *         every branch produced by this split.
+	 */
+	public SplittingCriterion apply(List<Instance> data,
+			List<AttributeDefinition> candidateAttributes) {
+
+		int n = candidateAttributes.size();
+
+		double bestGainRatio = Double.MIN_VALUE;
+
+		SplittingCriterion splitCriterion = new SplittingCriterion();
+
+		/* Calculate Gain Ratio for every available attribute. */
+		for (int i = 0; i < n; i++) {
+			AttributeDefinition attrDef = candidateAttributes.get(i);
+			String attrName = attrDef.getName();
+			Double splitPoint = null;
+
+			BranchGroup branches = null;
+
+			if (attrDef.isDiscrete()) {
+				/*
+				 * For discrete attribute we split all data into subsets based
+				 * on attribute values.
+				 */
+				branches = BranchGroup.createBranchesFromDiscreteAttr(data,
+						attrName);
+			} else {
+				/*
+				 * For continuous attribute we pick a value that is in the
+				 * middle of min and max attribute values that are present in
+				 * the data.
+				 */
+				splitPoint = pickSplitPoint(data, attrName);
+
+				/*
+				 * All data will be split into two groups: group with values x
+				 * <= splitPoint and group with values x > splitPoint
+				 */
+				branches = BranchGroup.createBranchesFromContiniuousAttr(data,
+						attrName, splitPoint);
+			}
+
+			// Only consider attributes that split the data into more than one
+			// branch
+			if (branches.getBranches().size() > 1) {
+				Double gainRatio = calculateGainRatio(data, branches);
+
+				if (gainRatio > bestGainRatio) {
+					bestGainRatio = gainRatio;
+					splitCriterion.setSplitAttributeName(attrName);
+					splitCriterion.setSplitPoint(splitPoint);
+					splitCriterion.setSplitData(branches);
+				}
+			}
+		}
+
+		return splitCriterion;
+	}
+
+	private Double calculateGainRatio(List<Instance> allData,
+			BranchGroup branches) {
+
+		List<List<Instance>> dataByBranch = branches.getData();
+
+		InfoGain infoGain = new InfoGain();
+
+		return infoGain.gainRatio(allData, dataByBranch);
+	}
+
+	/*
+	 * Calculates a value to split on for continuous valued attributes.
+	 */
+	private Double pickSplitPoint(List<Instance> data, String attrName) {
+		Double minValue = Double.MAX_VALUE;
+		Double maxValue = Double.MIN_VALUE;
+
+		for (Instance i : data) {
+			Attribute a = i.getAttributeByName(attrName);
+			Double value = AttributeUtils.toDouble(a.getValue());
+			if (value != null && value < minValue) {
+				minValue = value;
+			}
+			if (value != null && value > maxValue) {
+				maxValue = value;
+			}
+		}
+
+		return (maxValue - minValue) / 2.0;
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/AttributeUtils.java b/src/org/yooreeka/algos/taxis/tree/AttributeUtils.java
new file mode 100644
index 0000000..3b6def7
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/AttributeUtils.java
@@ -0,0 +1,61 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+public class AttributeUtils {
+
+	public static Double toDouble(Object o) {
+		Double result = null;
+		if (o instanceof Double) {
+			result = (Double) o;
+		} else if (o instanceof String) {
+			result = Double.parseDouble((String) o);
+		} else if (o instanceof Integer) {
+			result = new Double((Integer) o);
+		}
+
+		return result;
+	}
+
+	public static String toString(Object o) {
+		String result = null;
+		if (o instanceof Double) {
+			result = String.valueOf(o);
+		} else if (o instanceof String) {
+			result = (String) o;
+		} else if (o instanceof Integer) {
+			result = String.valueOf(o);
+		}
+
+		return result;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/Branch.java b/src/org/yooreeka/algos/taxis/tree/Branch.java
new file mode 100644
index 0000000..a4e16d7
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/Branch.java
@@ -0,0 +1,92 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class Branch {
+	public static void addInstance(Map<String, Branch> branches,
+			String branchName, Instance i) {
+
+		Branch branch = branches.get(branchName);
+		if (branch == null) {
+			branch = new Branch(branchName);
+			branches.put(branchName, branch);
+		}
+
+		branch.add(i);
+	}
+	private String branchName;
+
+	private List<Instance> data;
+
+	public Branch() {
+		init(null);
+	}
+
+	public Branch(String name) {
+		init(name);
+	}
+
+	public void add(Instance instance) {
+		this.data.add(instance);
+	}
+
+	public void add(List<Instance> multipleInstances) {
+		this.data.addAll(multipleInstances);
+	}
+
+	public List<Instance> getData() {
+		return data;
+	}
+
+	public String getName() {
+		return branchName;
+	}
+
+	private void init(String name) {
+		branchName = name;
+		data = new ArrayList<Instance>();
+	}
+
+	public void setData(List<Instance> data) {
+		this.data = data;
+	}
+
+	public void setName(String name) {
+		this.branchName = name;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/BranchGroup.java b/src/org/yooreeka/algos/taxis/tree/BranchGroup.java
new file mode 100644
index 0000000..534d506
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/BranchGroup.java
@@ -0,0 +1,130 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class BranchGroup {
+	/**
+	 * Value that is used to identify data subset when the split is done on
+	 * continuous value.
+	 */
+	public static class BinaryBranchNames {
+		public static final String TRUE_BRANCH = "true";
+		public static final String FALSE_BRANCH = "false";
+
+		private BinaryBranchNames() {
+		}
+	}
+	public static BranchGroup createBranchesFromContiniuousAttr(
+			List<Instance> data, String attrName, Double splitPoint) {
+
+		BranchGroup branches = new BranchGroup(attrName);
+
+		for (Instance i : data) {
+			Attribute a = i.getAttributeByName(attrName);
+			Double value = AttributeUtils.toDouble(a.getValue());
+			String branchName = SplittingCriterion.getBranchName(value,
+					splitPoint);
+
+			branches.add(branchName, i);
+		}
+
+		return branches;
+	}
+
+	public static BranchGroup createBranchesFromDiscreteAttr(
+			List<Instance> data, String attrName) {
+
+		// Separate branch for each attribute value
+		BranchGroup branches = new BranchGroup(attrName);
+
+		for (Instance i : data) {
+			Attribute a = i.getAttributeByName(attrName);
+			String attrValue = AttributeUtils.toString(a.getValue());
+			String branchName = SplittingCriterion.getBranchName(attrValue);
+
+			branches.add(branchName, i);
+		}
+
+		return branches;
+	}
+
+	private String name;
+
+	private Map<String, Branch> branches;
+
+	public BranchGroup(String name) {
+		this.name = name;
+		branches = new HashMap<String, Branch>();
+	}
+
+	public void add(String branchName, Instance i) {
+
+		Branch branch = branches.get(branchName);
+		if (branch == null) {
+			branch = new Branch(branchName);
+			branches.put(branchName, branch);
+		}
+
+		branch.add(i);
+	}
+
+	public Branch getBranch(String branchName) {
+		return branches.get(branchName);
+	}
+
+	public List<Branch> getBranches() {
+		return new ArrayList<Branch>(branches.values());
+	};
+
+	public List<List<Instance>> getData() {
+		List<List<Instance>> allData = new ArrayList<List<Instance>>();
+
+		for (Branch b : branches.values()) {
+			List<Instance> branchData = b.getData();
+			allData.add(branchData);
+		}
+
+		return allData;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/ConceptUtils.java b/src/org/yooreeka/algos/taxis/tree/ConceptUtils.java
new file mode 100644
index 0000000..4d1139a
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/ConceptUtils.java
@@ -0,0 +1,85 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class ConceptUtils {
+
+	public static Map<String, Integer> countConcepts(List<Instance> instances) {
+
+		Map<String, Integer> conceptCounts = new HashMap<String, Integer>();
+
+		for (Instance i : instances) {
+			String conceptName = i.getConcept().getName();
+			Integer count = conceptCounts.get(conceptName);
+			if (count == null) {
+				count = 1;
+			} else {
+				count++;
+			}
+			conceptCounts.put(conceptName, count);
+		}
+
+		return conceptCounts;
+	}
+
+	public static String findMostFrequentConcept(List<Instance> instances) {
+
+		Map<String, Integer> conceptCounts = countConcepts(instances);
+
+		String mostFrequentConceptLabel = null;
+
+		int n = 0;
+		for (Map.Entry<String, Integer> e : conceptCounts.entrySet()) {
+			if (e.getValue() > n) {
+				n = e.getValue();
+				mostFrequentConceptLabel = e.getKey();
+			}
+		}
+
+		return mostFrequentConceptLabel;
+	}
+
+	public static String[] getUniqueConcepts(List<Instance> instances) {
+		Set<String> concepts = new HashSet<String>();
+		for (Instance i : instances) {
+			concepts.add(i.getConcept().getName());
+		}
+		return concepts.toArray(new String[concepts.size()]);
+	}
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/DecisionTreeClassifier.java b/src/org/yooreeka/algos/taxis/tree/DecisionTreeClassifier.java
new file mode 100644
index 0000000..c966eec
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/DecisionTreeClassifier.java
@@ -0,0 +1,248 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.core.BaseConcept;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class DecisionTreeClassifier implements Classifier, java.io.Serializable {
+
+	private static final long serialVersionUID = -3360341002492465102L;
+
+	private String name;
+
+	protected boolean verbose = false;
+
+	/*
+	 * Selects best attribute to split on.
+	 */
+	private AttributeSelector attributeSelector;
+
+	protected Node rootTreeNode;
+
+	/*
+	 * No need to keep training data for serialization.
+	 */
+	private transient TrainingSet trainingData;
+
+	/*
+	 * Attributes that should be considered for tree training.
+	 */
+	private List<AttributeDefinition> availableAttributes;
+
+	public DecisionTreeClassifier(String name, TrainingSet trainingData) {
+		this.name = name;
+		rootTreeNode = null;
+		attributeSelector = new AttributeSelector();
+		this.trainingData = trainingData;
+		this.availableAttributes = new ArrayList<AttributeDefinition>();
+	}
+
+	public DecisionTreeClassifier(TrainingSet trainingData) {
+		this(DecisionTreeClassifier.class.getSimpleName(), trainingData);
+	}
+
+	/**
+	 * Builds subtree using provided data and attributes.
+	 * 
+	 * @param data
+	 *            training instances that should be considered for subtree.
+	 * @param candidateAttributes
+	 *            available attributes.
+	 */
+	private Node buildTree(List<Instance> data,
+			List<AttributeDefinition> candidateAttributes) {
+
+		/*
+		 * Node that will represent the subtree.
+		 */
+		Node node = new Node();
+
+		String[] concepts = ConceptUtils.getUniqueConcepts(data);
+		String mostFrequentConcept = ConceptUtils.findMostFrequentConcept(data);
+		node.setMostFrequentConceptName(mostFrequentConcept);
+		node.setNodeTrainingData(data);
+
+		/*
+		 * No need to split if there is only on concept left.
+		 */
+		if (concepts.length == 1) {
+			node.setLeaf(true);
+			node.setConceptName(concepts[0]);
+			node.setAttributeName(null);
+			return node;
+		}
+
+		/*
+		 * We've run out of attributes to split on. Just use the most frequent
+		 * concept.
+		 */
+		if (candidateAttributes == null || candidateAttributes.size() == 0) {
+			node.setLeaf(true);
+			node.setConceptName(mostFrequentConcept);
+			node.setAttributeName(null);
+			return node;
+		}
+
+		/*
+		 * Determines the best attribute to split on.
+		 */
+		SplittingCriterion bestSplitCriterion = attributeSelector.apply(data,
+				candidateAttributes);
+
+		if (bestSplitCriterion == null
+				|| bestSplitCriterion.getSplitAttributeName() == null) {
+			node.setLeaf(true);
+			node.setConceptName(concepts[0]); // pick first concept from the
+												// list
+			node.setAttributeName(null);
+			return node;
+		}
+
+		/*
+		 * For non-leaf nodes we don't have the class label.
+		 */
+		node.setConceptName(null);
+		node.setAttributeName(bestSplitCriterion.getSplitAttributeName());
+		node.setSplitValue(bestSplitCriterion.getSplitPoint());
+
+		if (bestSplitCriterion.isDiscreteValueSplit()) {
+			// Split on discrete attribute value
+			BranchGroup branches = bestSplitCriterion.getSplitData();
+			for (Branch branch : branches.getBranches()) {
+
+				List<Instance> selectedData = branch.getData();
+
+				// build a list of attributes for child node
+				List<AttributeDefinition> childNodeAttrs = AttributeDefinition
+						.copyAttributeDefs(candidateAttributes);
+				// remove current attribute from consideration
+				AttributeDefinition.removeAttributeDef(
+						bestSplitCriterion.getSplitAttributeName(),
+						childNodeAttrs);
+
+				Node childNode = buildTree(selectedData, childNodeAttrs);
+
+				node.addChild(branch.getName(), childNode);
+			}
+		} else {
+			// split on continuous-valued attribute
+			BranchGroup branches = bestSplitCriterion.getSplitData();
+			for (Branch branch : branches.getBranches()) {
+
+				List<AttributeDefinition> childNodeAttrs = AttributeDefinition
+						.copyAttributeDefs(candidateAttributes);
+
+				Node childNode = buildTree(branch.getData(), childNodeAttrs);
+				node.addChild(branch.getName(), childNode);
+			}
+		}
+
+		return node;
+	}
+
+	public Concept classify(Instance i) {
+
+		String category = rootTreeNode.classify(i);
+		return createConcept(category);
+
+	}
+
+	/*
+	 * Allows suclasses to provide specific implementation of the concept.
+	 */
+	protected Concept createConcept(String category) {
+		return new BaseConcept(category);
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	/**
+	 * @return the verbose
+	 */
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	/**
+	 * Prints information about tree.
+	 */
+	public void printTree() {
+		System.out.println("--- Tree ---");
+		rootTreeNode.print(0);
+		System.out.println("------------");
+	}
+
+	public void pruneTree() {
+		this.rootTreeNode.prune();
+	}
+
+	/**
+	 * @param verbose
+	 *            the verbose to set
+	 */
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+	public boolean train() {
+
+		long t0 = System.currentTimeMillis();
+
+		HashMap<Integer, Instance> instances = trainingData.getInstances();
+		ArrayList<Instance> trainingInstances = new ArrayList<Instance>(
+				instances.values());
+
+		rootTreeNode = buildTree(trainingInstances, availableAttributes);
+
+		if (verbose) {
+			System.out.print("       Decision tree training completed in ");
+			System.out.println((System.currentTimeMillis() - t0) + " (ms)");
+		}
+
+		return true;
+	}
+
+	public void trainOnAttribute(String name, boolean isDiscrete) {
+		AttributeDefinition attrDef = new AttributeDefinition(name, isDiscrete);
+		availableAttributes.add(attrDef);
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/InfoGain.java b/src/org/yooreeka/algos/taxis/tree/InfoGain.java
new file mode 100644
index 0000000..353e6ab
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/InfoGain.java
@@ -0,0 +1,151 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class InfoGain {
+
+	public InfoGain() {
+	}
+
+	/**
+	 * Entropy of the dataset.
+	 * 
+	 * @param data
+	 * @return
+	 */
+	public Double entropy(List<Instance> data) {
+
+		/*
+		 * How many times each class (category) occurs in the data.
+		 */
+		Map<String, Integer> instanceCountByClassMap = ConceptUtils
+				.countConcepts(data);
+
+		int n = data.size();
+
+		double sum = 0.0;
+
+		for (Integer count : instanceCountByClassMap.values()) {
+
+			double p = (double) count / (double) n;
+
+			sum += p * log2(p);
+
+		}
+
+		return -sum;
+
+	}
+
+	public Double expectedInformation(List<Instance> allData,
+			List<List<Instance>> allDataSubsets) {
+
+		double sum = 0.0;
+
+		int n = allData.size();
+
+		for (List<Instance> dataSubset : allDataSubsets) {
+
+			sum += (double) dataSubset.size() / (double) n
+					* entropy(dataSubset);
+
+		}
+
+		return sum;
+
+	}
+
+	/**
+	 * Information gain for a given split.
+	 * 
+	 * @param allData
+	 *            initial set of instances.
+	 * @param allDataSubsets
+	 *            initial set split into subsets.
+	 * 
+	 * @return information gain.
+	 */
+	public Double gain(List<Instance> allData,
+			List<List<Instance>> allDataSubsets) {
+
+		return entropy(allData) - expectedInformation(allData, allDataSubsets);
+
+	}
+
+	/**
+	 * Gain ratio.
+	 * 
+	 * @param allData
+	 *            initial set of instances.
+	 * @param allDataSubsets
+	 *            initial set split into subsets.
+	 * 
+	 * @return gain ratio.
+	 */
+	public Double gainRatio(List<Instance> allData,
+			List<List<Instance>> allDataSubsets) {
+
+		return gain(allData, allDataSubsets)
+				/ splitInfo(allData, allDataSubsets);
+
+	}
+
+	private double log2(double d) {
+
+		return Math.log(d) / Math.log(2.0);
+
+	}
+
+	public Double splitInfo(List<Instance> allData,
+			List<List<Instance>> allDataSubsets) {
+
+		double sum = 0.0;
+
+		int n = allData.size();
+
+		for (List<Instance> dataSubset : allDataSubsets) {
+
+			double ratio = (double) dataSubset.size() / (double) n;
+
+			sum += ratio * log2(ratio);
+
+		}
+
+		return -sum;
+
+	}
+
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/algos/taxis/tree/Node.java b/src/org/yooreeka/algos/taxis/tree/Node.java
new file mode 100644
index 0000000..78d9ac6
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/Node.java
@@ -0,0 +1,403 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+/**
+ * Decision tree node.
+ */
+class Node implements java.io.Serializable {
+
+	private static final long serialVersionUID = -4282027910521283908L;
+
+	/**
+	 * Auxiliary variable for printing
+	 */
+	private boolean isVerbose = false;
+
+	/*
+	 * Instance attribute name that this node will use to choose the branch.
+	 */
+	private String attributeName;
+
+	/*
+	 * Map of child nodes keyed by branch name. Branch name depends on type of
+	 * attribute. For discrete attribute actual attribute value will be used and
+	 * for continuous-valued attributes we only have two branches that split all
+	 * values into two subsets.
+	 */
+	private Map<String, Node> childNodesByBranchName;
+
+	/*
+	 * Indicates that this node is a leaf node.
+	 */
+	private boolean isLeaf;
+
+	/*
+	 * Predicted class (concept) name.
+	 */
+	private String conceptName;
+
+	/*
+	 * Split value that will be used to determine child node when evaluating
+	 * continuous-valued attribute.
+	 */
+	private Double splitValue;
+
+	/*
+	 * This value will be used as a result of classification whenever non-leaf
+	 * node can not choose next tree node. This can happen when there are no
+	 * available attributes anymore but the node is not a leaf node.
+	 */
+	private String mostFrequentConceptName;
+
+	/*
+	 * Number of training instances that reached this node. Will only be used by
+	 * pruning during the training phase.
+	 */
+	private transient List<Instance> nodeTrainingData;
+
+	public Node() {
+		childNodesByBranchName = new HashMap<String, Node>();
+	}
+
+	public void addChild(String value, Node node) {
+		this.childNodesByBranchName.put(value, node);
+	}
+
+	public String classify(Instance i) {
+		Node subtree = this;
+
+		while (subtree.isLeaf() == false) {
+			Node childNode = subtree.selectChild(i);
+
+			if (childNode == null) {
+				// Decision tree couldn't choose next child
+				break;
+			}
+
+			subtree = childNode;
+		}
+
+		String category = null;
+
+		if (subtree.isLeaf()) {
+			category = subtree.getConceptName();
+		} else {
+			category = subtree.getMostFrequentConceptName();
+		}
+
+		return category;
+	}
+
+	private double estimateErrorRate(int n, int e) {
+		TrueErrorRateEstimator ter = new TrueErrorRateEstimator();
+		/* Using default confidence range: 25% (z = 0.69) */
+		return ter.errorRate(n, e);
+	}
+
+	private double estimateLeafErrorRate() {
+
+		/*
+		 * Calculate observed error rate (error rate based on our training data)
+		 * if we use most frequent category as classification result of this
+		 * node.
+		 */
+		int n = nodeTrainingData.size();
+		int e = 0;
+		for (Instance i : nodeTrainingData) {
+			if (!mostFrequentConceptName.equalsIgnoreCase(i.getConcept()
+					.getName())) {
+				e++;
+			}
+		}
+
+		return estimateErrorRate(n, e);
+	}
+
+	private double estimateTreeErrorRate(Node subtree, List<Instance> data) {
+
+		/*
+		 * Calculate observed error rate (error rate based on our training data)
+		 * if we use most frequent category as classification result of this
+		 * node.
+		 */
+		int n = data.size();
+		int e = 0;
+		for (Instance i : data) {
+			String category = subtree.classify(i);
+			if (!category.equals(i.getConcept().getName())) {
+				e++;
+			}
+		}
+
+		return estimateErrorRate(n, e);
+	}
+
+	public String getAttributeName() {
+		return attributeName;
+	}
+
+	public String getConceptName() {
+		return conceptName;
+	}
+
+	public String getMostFrequentConceptName() {
+		return mostFrequentConceptName;
+	}
+
+	public List<Instance> getNodeTrainingData() {
+		return nodeTrainingData;
+	}
+
+	public Double getSplitValue() {
+		return splitValue;
+	}
+
+	public boolean isLeaf() {
+		return isLeaf;
+	}
+
+	/**
+	 * @return the isVerbose
+	 */
+	public boolean isVerbose() {
+		return isVerbose;
+	}
+
+	public void print(int level) {
+
+		String padding = StringUtils.leftPad("", level * 5);
+
+		String nodeInfo = "Node:" + "attrName=" + this.attributeName
+				+ ",isLeaf=" + this.isLeaf + ",concept=" + this.conceptName;
+
+		System.out.println(padding + nodeInfo);
+		for (Map.Entry<String, Node> e : childNodesByBranchName.entrySet()) {
+			if (splitValue == null) {
+				System.out.println(padding + "-> Branch: [" + attributeName
+						+ "=" + e.getKey() + "]");
+			} else {
+				String condition;
+				if (BranchGroup.BinaryBranchNames.TRUE_BRANCH
+						.equalsIgnoreCase(e.getKey())) {
+					condition = "<=";
+				} else {
+					condition = ">";
+				}
+				System.out.println(padding + "-> Branch: " + e.getKey() + " ["
+						+ attributeName + condition + this.splitValue + "]");
+
+			}
+			e.getValue().print(level + 1);
+		}
+	}
+
+	public void prune() {
+
+		if (isLeaf) {
+			return;
+		}
+
+		/*
+		 * First prune all child nodes (child subtrees).
+		 */
+		for (Node childNode : childNodesByBranchName.values()) {
+			childNode.prune();
+		}
+
+		// find most popular subtree
+		Node mostPopularSubtree = selectMostFrequentSubtree();
+
+		/*
+		 * Evaluate current node (subtree)
+		 */
+
+		double leafErrorRate = 0.0;
+		double nodeErrorRate = 0.0;
+		double mostPopularSubtreeErrorRate = 0.0;
+
+		/*
+		 * Estimate error rate for the case when we use the most frequent
+		 * concept from the node training set.
+		 */
+		leafErrorRate = estimateLeafErrorRate();
+
+		/*
+		 * Estimate error rate using current tree
+		 */
+		nodeErrorRate = estimateTreeErrorRate(this, nodeTrainingData);
+
+		/*
+		 * Estimate error rate for most popular subtree
+		 */
+		mostPopularSubtreeErrorRate = estimateTreeErrorRate(mostPopularSubtree,
+				nodeTrainingData);
+
+		if (isVerbose) {
+			System.out.printf("Pruning: " + this.attributeName
+					+ ", tree error rate: %.5f" + ", subtree error rate: %.5f"
+					+ ", leaf error rate: %.5f\n", nodeErrorRate,
+					mostPopularSubtreeErrorRate, leafErrorRate);
+		}
+
+		if (nodeErrorRate >= leafErrorRate
+				|| nodeErrorRate >= mostPopularSubtreeErrorRate) {
+
+			// We can get better error rate after pruning
+
+			if (leafErrorRate <= mostPopularSubtreeErrorRate) {
+
+				if (isVerbose) {
+					System.out.println("Replacing current node with leaf node");
+				}
+
+				// replace current node with leaf node.
+				this.setLeaf(true);
+				this.childNodesByBranchName.clear();
+				this.conceptName = this.mostFrequentConceptName;
+				this.splitValue = null;
+
+			} else {
+
+				if (isVerbose) {
+					System.out.println("Replacing current node with subtree");
+				}
+
+				// replace current node with subtree
+				this.childNodesByBranchName.clear();
+				this.attributeName = mostPopularSubtree.getAttributeName();
+				this.isLeaf = mostPopularSubtree.isLeaf();
+				this.childNodesByBranchName = mostPopularSubtree.childNodesByBranchName;
+				this.conceptName = mostPopularSubtree.conceptName;
+				this.splitValue = mostPopularSubtree.splitValue;
+				// Note: we are keeping current training data of the node and
+				// most frequent concept name that is based on training data.
+			}
+		}
+
+	}
+
+	/**
+	 * Returns next node from the tree that fits provided instance.
+	 * 
+	 * @param t
+	 *            instance that we are trying to classify.
+	 * 
+	 * @return next tree node or null.
+	 */
+	public Node selectChild(Instance t) {
+
+		Node child = null;
+
+		Attribute a = t.getAttributeByName(attributeName);
+
+		if (a != null) {
+
+			String branchName = null;
+
+			if (splitValue != null) {
+				Double attrValue = AttributeUtils.toDouble(a.getValue());
+				branchName = SplittingCriterion.getBranchName(attrValue,
+						splitValue);
+			} else {
+				String attrValue = AttributeUtils.toString(a.getValue());
+				branchName = SplittingCriterion.getBranchName(attrValue);
+			}
+			child = childNodesByBranchName.get(branchName);
+		}
+
+		// can be null if instance attribute is missing or has value that we
+		// haven't seen during training (for discrete attributes)
+
+		return child;
+	}
+
+	/**
+	 * Selects child node (subtree) that is most frequent outcome of the current
+	 * node (has the most training samples).
+	 */
+	private Node selectMostFrequentSubtree() {
+		Node selectedNode = null;
+		int maxTrainingSamples = 0;
+		for (Node childNode : childNodesByBranchName.values()) {
+			if (childNode.getNodeTrainingData() != null) {
+				int n = childNode.getNodeTrainingData().size();
+				if (n > maxTrainingSamples) {
+					selectedNode = childNode;
+					maxTrainingSamples = n;
+				}
+			}
+		}
+
+		return selectedNode;
+	}
+
+	public void setAttributeName(String attributeName) {
+		this.attributeName = attributeName;
+	}
+
+	public void setConceptName(String conceptName) {
+		this.conceptName = conceptName;
+	}
+
+	public void setLeaf(boolean isLeaf) {
+		this.isLeaf = isLeaf;
+	}
+
+	public void setMostFrequentConceptName(String mostFrequentConceptName) {
+		this.mostFrequentConceptName = mostFrequentConceptName;
+	}
+
+	public void setNodeTrainingData(List<Instance> nodeTrainingData) {
+		this.nodeTrainingData = nodeTrainingData;
+	}
+
+	public void setSplitValue(Double splitValue) {
+		this.splitValue = splitValue;
+	}
+
+	/**
+	 * @param isVerbose
+	 *            the isVerbose to set
+	 */
+	public void setVerbose(boolean isVerbose) {
+		this.isVerbose = isVerbose;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/SplittingCriterion.java b/src/org/yooreeka/algos/taxis/tree/SplittingCriterion.java
new file mode 100644
index 0000000..a853183
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/SplittingCriterion.java
@@ -0,0 +1,124 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+/**
+ * Represents information about the split.
+ */
+public class SplittingCriterion {
+
+	/**
+	 * Returns branch name for continuous attributes.
+	 * 
+	 * @param attrValue
+	 *            attribute value that should be evaluated.
+	 * @param splitValue
+	 *            split point for continuous attributes.
+	 * 
+	 * @return name of the branch.
+	 */
+	public static String getBranchName(Double attrValue, Double splitValue) {
+		String branchName = null;
+
+		if (attrValue <= splitValue) {
+			branchName = BranchGroup.BinaryBranchNames.TRUE_BRANCH;
+		} else {
+			branchName = BranchGroup.BinaryBranchNames.FALSE_BRANCH;
+		}
+
+		return branchName;
+	}
+
+	/**
+	 * Returns branch name for discrete attributes. Currently we always create a
+	 * separate branch for every discrete attribute.
+	 * 
+	 * @param attrValue
+	 *            attribute value that should be evaluated.
+	 * 
+	 * @return name of the branch.
+	 */
+	public static String getBranchName(String attrValue) {
+		// Using attribute value as a branch name.
+		return attrValue;
+	}
+
+	/*
+	 * Attribute name to split on
+	 */
+	private String splitAttributeName;
+
+	/*
+	 * Only relevant for continuous attributes. Indicates value that will be
+	 * used to decide true/false branch.
+	 */
+	private Double splitPoint;
+
+	/*
+	 * Data by branch. Each branch will have a subset of instances from the
+	 * initial set that reached the node. We return it to avoid calculating this
+	 * data for every branch again.
+	 */
+	private BranchGroup splitData;
+
+	public String getSplitAttributeName() {
+		return splitAttributeName;
+	}
+
+	public BranchGroup getSplitData() {
+		return splitData;
+	}
+
+	public Double getSplitPoint() {
+		return splitPoint;
+	}
+
+	public boolean isContinuousValueSplit() {
+		return splitPoint != null;
+	}
+
+	public boolean isDiscreteValueSplit() {
+		return splitPoint == null;
+	}
+
+	public void setSplitAttributeName(String splitAttributeName) {
+		this.splitAttributeName = splitAttributeName;
+	}
+
+	public void setSplitData(BranchGroup splitData) {
+		this.splitData = splitData;
+	}
+
+	public void setSplitPoint(Double splitPoint) {
+		this.splitPoint = splitPoint;
+	}
+
+}
diff --git a/src/org/yooreeka/algos/taxis/tree/TrueErrorRateEstimator.java b/src/org/yooreeka/algos/taxis/tree/TrueErrorRateEstimator.java
new file mode 100644
index 0000000..5174e39
--- /dev/null
+++ b/src/org/yooreeka/algos/taxis/tree/TrueErrorRateEstimator.java
@@ -0,0 +1,71 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.algos.taxis.tree;
+
+/*
+ * Estimates true error rate for tree pruning. Based on 
+ * heuristic for C4.5. 
+ */
+public class TrueErrorRateEstimator {
+
+	/*
+	 * Default value.
+	 */
+	private double z = 0.69; // for confidence: 0.25 or 25%
+
+	/**
+	 * Calculates true error rate for a node using error observed on training
+	 * data. C4.5 uses upper confidence limit for error rate to represent true
+	 * error rate.
+	 * 
+	 * @param n
+	 *            total number of training samples at the node
+	 * @param e
+	 *            number of misclassified samples at the node
+	 * @return
+	 */
+	public double errorRate(double n, double e) {
+		/*
+		 * Observed error rate based on our training data.
+		 */
+		double oe = e / n;
+
+		/*
+		 * Calculating upper confidence limit to use an estimate of the error
+		 * rate
+		 */
+		double tmp1 = oe / n - (oe * oe) / n + (z * z) / (4 * n * n);
+		double numerator = oe + (z * z) / (2 * n) + z * Math.sqrt(tmp1);
+		double denominator = 1 + (z * z) / n;
+
+		return numerator / denominator;
+	}
+}
diff --git a/src/org/yooreeka/config/YooreekaConfigurator.java b/src/org/yooreeka/config/YooreekaConfigurator.java
new file mode 100644
index 0000000..bce24ee
--- /dev/null
+++ b/src/org/yooreeka/config/YooreekaConfigurator.java
@@ -0,0 +1,220 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.config;
+
+import java.io.InputStream;
+import java.util.Properties;
+import java.util.logging.Level;
+
+import org.yooreeka.util.P;
+
+/**
+ * Central place to access to application properties.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class YooreekaConfigurator {
+
+	public static final String DATA_DIR = "iweb2.data.dir";
+	public static final String CRAWL_DATA_DIR="iweb2.crawl.dir";
+	public static final String TEMP_DIR = "iweb2.temp.dir";
+	public static final String MOVIELENS_DATA_DIR = "iweb2.movielens.data.dir";
+	public static final String MOVIELENSTEST_DATA_DIR = "iweb2.ch3.movielenstest.data.dir";
+
+	public static final String LOG_LEVEL_SEVERE = "SEVERE";
+	public static final String LOG_LEVEL_WARNING = "WARNING";
+	public static final String LOG_LEVEL_INFO = "INFO";
+	public static final String LOG_LEVEL_CONFIG = "CONFIG";
+	public static final String LOG_LEVEL_FINE = "FINE";
+	public static final String LOG_LEVEL_FINER = "FINER";
+	public static final String LOG_LEVEL_FINEST = "FINEST";
+		
+	/*
+	 * System property name that can be used to override default properties
+	 * file.
+	 */
+	private static String systemPropertyName = "iweb2.configuration";
+
+	/*
+	 * Default resource name that will be used to load properties.
+	 */
+	private static String defaultResourceName = "/iweb2.properties";
+
+	private static Properties props = new Properties();
+
+	private static Properties logProps = new Properties();
+	static {
+		// logger.debug("Initializing application properties...");
+		String resourceName = System.getProperty(systemPropertyName);
+		if (resourceName == null) {
+			resourceName = defaultResourceName;
+			// logger.debug("System property '" + systemPropertyName +
+			// "' not found. Loading configuration from default resource: '" +
+			// defaultResourceName + "'.");
+		} else {
+			System.out
+					.println("Loading configuration from resource defined through system property: "
+							+ systemPropertyName + "=" + resourceName);
+		}
+
+		props = readProperties(resourceName);
+	}
+
+	public static String getHome() {
+
+		return props.getProperty("iweb2.home");
+	}
+
+	public static Level getLevel(String cName) {
+
+		String logLevel = getLogProperty("log.level." + cName);
+
+		if (logLevel == null)
+			logLevel = LOG_LEVEL_WARNING;
+
+		Level l = null;
+
+		switch (logLevel) {
+		case LOG_LEVEL_SEVERE:
+			l = Level.SEVERE;
+			break;
+		case LOG_LEVEL_WARNING:
+			l = Level.WARNING;
+			break;
+		case LOG_LEVEL_INFO:
+			l = Level.INFO;
+			break;
+		case LOG_LEVEL_CONFIG:
+			l = Level.CONFIG;
+			break;
+		case LOG_LEVEL_FINE:
+			l = Level.FINE;
+			break;
+		case LOG_LEVEL_FINER:
+			l = Level.FINER;
+			break;
+		case LOG_LEVEL_FINEST:
+			l = Level.FINEST;
+			break;
+		default:
+			l = Level.WARNING;
+			break;
+		}
+		return l;
+	}
+
+	public static String getLogProperty(String key) {
+		return logProps.getProperty(key);
+	}
+
+	/**
+	 * First checks if there is a system property with the same key. Then
+	 * attempts to load property from the configuration file.
+	 * 
+	 * @return null if property not found.
+	 */
+	public static String getProperty(String key) {
+		// allow to override property using -D<property name>=<property value>
+		return System.getProperty(key, props.getProperty(key));
+	}
+
+	/**
+	 * First checks if there is a system property with the same key. Then
+	 * attempts to load property from the configuration file.
+	 * 
+	 * @param key
+	 *            identifies property.
+	 * @param defaultValue
+	 *            default value that will be used if property is not found.
+	 * @return property value or default value.
+	 */
+	public static String getProperty(String key, String defaultValue) {
+		// allow to override property using -D<property name>=<property value>
+		return System.getProperty(key, props.getProperty(key, defaultValue));
+	}
+
+	public static Properties readProperties(String resourceName) {
+
+		Properties props = new Properties();
+
+		try {
+
+			InputStream inStream = YooreekaConfigurator.class.getResourceAsStream(resourceName);
+			
+			if (inStream != null) {
+				props.load(inStream);
+			} else {
+				printNoPropertiesFound();
+				setStaticProperties();
+			}
+		} catch (Exception e) {
+			String message = "Failed to load properties from resource: '"
+					+ resourceName + "'.";
+			System.out.println("ERROR:\n" + message + "\n" + e.getMessage());
+			throw new RuntimeException(message, e);
+		}
+		return props;
+	}
+	
+	/**
+	 * Set the following values if <tt>iweb2.properties</tt> cannot be found:
+	 * <pre>
+	 *   iweb2.home=C:/iWeb2
+	 *   iweb2.data.dir=C:/iWeb2/data
+	 *   iweb2.crawl.dir=C:/iWeb2/data/crawls
+	 *   iweb2.temp.dir=C:/iWeb2/deploy/temp
+	 *   iweb2.movielens.data.dir=C:/iWeb2/data/ch03/MovieLens
+	 * </pre>
+	 * 
+	 * NOTE: This shouldn't happen but rather than having people getting stuck with setting up properties
+	 * we can provide a default set of values (which is what they would get from the "Download" distro by
+	 * default anyway) ...
+	 * 
+	 * Obviously, this will only work on MS Windows ...
+	 */
+	public static void setStaticProperties() {
+		props.put("iweb2.home", "C:/iWeb2");
+		props.put("iweb2.data.dir", "C:/iWeb2/data");
+		props.put("iweb2.crawl.dir", "C:/iWeb2/data/crawls");
+		props.put("iweb2.temp.dir", "C:/iWeb2/deploy/temp");
+		props.put("iweb2.movielens.data.dir", "C:/iWeb2/data/ch03/MovieLens");
+	}
+	
+	private static void printNoPropertiesFound() {
+		P.hline();
+		P.println("  Oops!");
+		P.println("  The file __ iweb2.properties __ was not found!");
+		P.println("  Did you set up the system properly?");
+		P.hline();
+		P.println("  WARNING: Loading DEFAULT property values ...");
+		P.hline();
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/BaggingCreditClassifier.java b/src/org/yooreeka/examples/credit/BaggingCreditClassifier.java
new file mode 100644
index 0000000..9ffccdb
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/BaggingCreditClassifier.java
@@ -0,0 +1,79 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.ensemble.ClassifierEnsemble;
+import org.yooreeka.examples.credit.data.UserDataset;
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.util.BootstrapTrainingSetBuilder;
+import org.yooreeka.examples.credit.util.UserInstanceBuilder;
+
+public class BaggingCreditClassifier extends ClassifierEnsemble {
+
+	private UserInstanceBuilder instanceBuilder;
+	private BootstrapTrainingSetBuilder bootstrapTSetBuilder;
+
+	public BaggingCreditClassifier(UserDataset ds) {
+
+		super(BaggingCreditClassifier.class.getSimpleName());
+
+		/* Creating instance builder for this classifier */
+		instanceBuilder = new UserInstanceBuilder(false);
+
+		/*
+		 * Creating original training set that will be used to generate
+		 * bootstrap sets
+		 */
+		TrainingSet originalTSet = instanceBuilder.createTrainingSet(ds);
+
+		bootstrapTSetBuilder = new BootstrapTrainingSetBuilder(originalTSet);
+	}
+
+	public Concept classify(User user) {
+
+		if (verbose) {
+			System.out.println("User:\n  >> " + user.toString());
+		}
+
+		return classify(instanceBuilder.createInstance(user));
+	}
+
+	public TrainingSet getBootstrapSet() {
+		return bootstrapTSetBuilder.buildBootstrapSet();
+	}
+
+	public UserInstanceBuilder getInstanceBuilder() {
+		return instanceBuilder;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/BoostingCreditClassifier.java b/src/org/yooreeka/examples/credit/BoostingCreditClassifier.java
new file mode 100644
index 0000000..d77ec31
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/BoostingCreditClassifier.java
@@ -0,0 +1,131 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import org.yooreeka.algos.taxis.boosting.BoostingARCX4Classifier;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.examples.credit.data.UserDataset;
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.util.UserInstanceBuilder;
+
+public class BoostingCreditClassifier extends BoostingARCX4Classifier {
+
+	private UserInstanceBuilder instanceBuilder;
+
+	private ClassifierMemberType classifierType;
+
+	public BoostingCreditClassifier(String name, UserDataset ds,
+			UserInstanceBuilder instanceBuilder) {
+		this(name, instanceBuilder, instanceBuilder.createTrainingSet(ds));
+	}
+
+	public BoostingCreditClassifier(String name,
+			UserInstanceBuilder instanceBuilder, TrainingSet tSet) {
+
+		super(name, tSet);
+
+		this.instanceBuilder = instanceBuilder;
+	}
+
+	public BoostingCreditClassifier(UserDataset ds) {
+
+		this(BoostingCreditClassifier.class.getSimpleName(), ds,
+				new UserInstanceBuilder(false));
+
+	}
+
+	public Concept classify(User user) {
+
+		if (verbose) {
+			System.out.println("User:\n  >> " + user.toString());
+		}
+
+		return classify(instanceBuilder.createInstance(user));
+	}
+
+	@Override
+	public Classifier getClassifierForTraining(TrainingSet set) {
+
+		Classifier baseClassifier = null;
+
+		switch (classifierType) {
+		case NEURAL_NETWORK:
+			NNCreditClassifier nnClassifier = new NNCreditClassifier(set);
+			nnClassifier.setLearningRate(0.01);
+			nnClassifier.useDefaultAttributes();
+			baseClassifier = nnClassifier;
+			break;
+		case DECISION_TREE:
+			DTCreditClassifier dtClassifier = new DTCreditClassifier(set);
+			dtClassifier.useDefaultAttributes();
+			dtClassifier.setPruneAfterTraining(true);
+			baseClassifier = dtClassifier;
+			break;
+		case NAIVE_BAYES:
+			NBCreditClassifier nbClassifier = new NBCreditClassifier(set);
+			nbClassifier.useDefaultAttributes();
+			baseClassifier = nbClassifier;
+			break;
+		default:
+			throw new RuntimeException("Invalid classifier member type!");
+		}
+
+		return baseClassifier;
+	}
+
+	/**
+	 * @return the classifierType
+	 */
+	public ClassifierMemberType getClassifierType() {
+		return classifierType;
+	}
+
+	public UserInstanceBuilder getInstanceBuilder() {
+		return instanceBuilder;
+	}
+
+	/**
+	 * @param classifierType
+	 *            the classifierType to set
+	 */
+	public void setClassifierType(String type) {
+
+		if (type.equalsIgnoreCase("decision tree")) {
+			this.classifierType = ClassifierMemberType.DECISION_TREE;
+		} else if (type.equalsIgnoreCase("neural network")) {
+			this.classifierType = ClassifierMemberType.NEURAL_NETWORK;
+		} else if (type.equalsIgnoreCase("naive bayes")) {
+			this.classifierType = ClassifierMemberType.NAIVE_BAYES;
+		}
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/CreditConcept.java b/src/org/yooreeka/examples/credit/CreditConcept.java
new file mode 100644
index 0000000..5f897a9
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/CreditConcept.java
@@ -0,0 +1,92 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import org.yooreeka.algos.taxis.core.BaseConcept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.examples.credit.data.users.UserType;
+
+public class CreditConcept extends BaseConcept {
+
+	public static final String CONCEPT_LABEL_EX = UserType.EXCELLENT;
+	public static final String CONCEPT_LABEL_VG = UserType.VERY_GOOD;
+	public static final String CONCEPT_LABEL_GD = UserType.GOOD;
+	public static final String CONCEPT_LABEL_BD = UserType.BAD;
+	public static final String CONCEPT_LABEL_DN = UserType.DANGEROUS;
+
+	public static int getIndex(String val) {
+		int index = -1;
+		if (val.equals(CONCEPT_LABEL_EX)) {
+			index = 0;
+		} else if (val.equals(CONCEPT_LABEL_VG)) {
+			index = 1;
+		} else if (val.equals(CONCEPT_LABEL_GD)) {
+			index = 2;
+		} else if (val.equals(CONCEPT_LABEL_BD)) {
+			index = 3;
+		} else if (val.equals(CONCEPT_LABEL_DN)) {
+			index = 4;
+		} else {
+			throw new IllegalArgumentException("Unknown CreditConcept name!");
+		}
+		return index;
+	}
+
+	public static String getLabel(int val) {
+
+		String label = null;
+
+		if (val == 0) {
+			label = CONCEPT_LABEL_EX;
+		} else if (val == 1) {
+			label = CONCEPT_LABEL_VG;
+		} else if (val == 2) {
+			label = CONCEPT_LABEL_GD;
+		} else if (val == 3) {
+			label = CONCEPT_LABEL_BD;
+		} else if (val == 4) {
+			label = CONCEPT_LABEL_DN;
+		} else {
+			throw new IllegalArgumentException(
+					"Unknown CreditConcept index for label!");
+		}
+		return label;
+	}
+
+	public CreditConcept(String name) {
+		super(name);
+	}
+
+	@Override
+	public Instance[] getInstances() {
+		throw new UnsupportedOperationException("not implemented.");
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/CreditInstance.java b/src/org/yooreeka/examples/credit/CreditInstance.java
new file mode 100644
index 0000000..77f176b
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/CreditInstance.java
@@ -0,0 +1,121 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class CreditInstance implements Instance {
+
+	public static final String ATTR_NAME_USERID = "userid";
+	public static final String ATTR_NAME_JOB_CLASS = "jobClass";
+	public static final String ATTR_NAME_INCOME_TYPE = "incomeType";
+	public static final String ATTR_NAME_CAR_OWNERSHIP = "carOwnership";
+	public static final String ATTR_NAME_MOTOR_BICYCLE_OWNERSHIP = "motorBicycleOwnership";
+	public static final String ATTR_NAME_OTHER_PROPERTY_OWNERSHIP = "otherPropertyOwnership";
+	public static final String ATTR_NAME_RETIREMENT_ACCOUNT = "retirementAccount";
+	public static final String ATTR_NAME_CREDIT_SCORE = "creditScore";
+	public static final String ATTR_NAME_AGE = "age";
+	public static final String ATTR_NAME_MORTGAGE_DOWN_PAYMENT = "mortgageDownPayment";
+	public static final String ATTR_NAME_BANKRUPTCY = "priorDeclaredBankruptcy";
+	public static final String ATTR_NAME_CRIMINAL_RECORD = "priorCriminalRecord";
+
+	protected CreditConcept concept;
+	protected Attribute[] attributes;
+
+	public CreditInstance(CreditConcept c, Attribute[] attrs) {
+		this.concept = c;
+		this.attributes = attrs;
+	}
+
+	public CreditInstance(CreditConcept c, List<Attribute> attrs) {
+		this(c, attrs.toArray(new Attribute[attrs.size()]));
+	}
+
+	public Attribute[] getAtrributes() {
+		return attributes;
+	}
+
+	public Attribute getAttributeByName(String attrName) {
+		Attribute matchedAttribute = null;
+
+		if (attributes != null) {
+			for (Attribute a : attributes) {
+				if (attrName.equalsIgnoreCase(a.getName())) {
+					matchedAttribute = a;
+					break;
+				}
+			}
+		}
+
+		return matchedAttribute;
+	}
+
+	public CreditConcept getConcept() {
+		return concept;
+	}
+
+	public void print() {
+		print(new PrintWriter(System.out));
+	}
+
+	public void print(PrintWriter writer) {
+		if (attributes != null) {
+			for (Attribute a : attributes) {
+
+				if (a == null || a.getName() == null) {
+					writer.print(" -  <NULL ATTRIBUTE> ");
+				} else {
+					if (a.getValue() == null) {
+						writer.print(" -  <NULL ATTRIBUTE VALUE> ");
+					} else {
+						writer.print(" -  " + a.getName() + " = "
+								+ a.getValue());
+					}
+				}
+			}
+		}
+
+		writer.println(" -->  " + getConcept().getName());
+	}
+
+	@Override
+	public String toString() {
+		StringWriter sw = new StringWriter();
+		print(new PrintWriter(sw));
+		return sw.toString();
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/DTCreditClassifier.java b/src/org/yooreeka/examples/credit/DTCreditClassifier.java
new file mode 100644
index 0000000..959ff75
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/DTCreditClassifier.java
@@ -0,0 +1,194 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.tree.DecisionTreeClassifier;
+import org.yooreeka.examples.credit.data.UserDataset;
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.util.UserInstanceBuilder;
+
+public class DTCreditClassifier extends DecisionTreeClassifier {
+
+	private static final long serialVersionUID = 5491106283513021975L;
+
+	private static String createDefaultClassifierName() {
+		return DTCreditClassifier.class.getSimpleName();
+	}
+
+	private static UserInstanceBuilder createDefaultInstanceBuilder() {
+		// using Instance Builder configured to produce instances with String
+		// attributes
+		return new UserInstanceBuilder(false);
+	}
+
+	public static DTCreditClassifier loadClassifier(String filename) {
+
+		Object o = null;
+		File f = new File(filename);
+		if (f.exists()) {
+			try {
+				FileInputStream fInStream = new FileInputStream(f);
+				BufferedInputStream bufInStream = new BufferedInputStream(
+						fInStream);
+				ObjectInputStream objInStream = new ObjectInputStream(
+						bufInStream);
+				o = objInStream.readObject();
+				objInStream.close();
+			} catch (Exception e) {
+				throw new RuntimeException(
+						"Error while loading data from file: '" + filename
+								+ "'", e);
+			}
+		} else {
+			throw new IllegalArgumentException("File doesn't exist: '"
+					+ filename + "'.");
+		}
+
+		System.out.println("loaded classifier from file: " + filename);
+
+		return (DTCreditClassifier) o;
+
+	}
+
+	public static void saveClassifier(String filename, DTCreditClassifier o) {
+
+		try {
+			File f = new File(filename);
+			FileOutputStream foutStream = new FileOutputStream(f);
+			BufferedOutputStream boutStream = new BufferedOutputStream(
+					foutStream);
+			ObjectOutputStream objOutputStream = new ObjectOutputStream(
+					boutStream);
+			objOutputStream.writeObject(o);
+			objOutputStream.flush();
+			boutStream.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Error while saving data into file: '"
+					+ filename + "'", e);
+		}
+
+		System.out.println("saved classifier in file: " + filename);
+	}
+
+	private UserInstanceBuilder instanceBuilder;
+
+	private boolean pruneAfterTraining;
+
+	public DTCreditClassifier(String name, TrainingSet ts,
+			UserInstanceBuilder instanceBuilder) {
+
+		super(name, ts);
+
+		this.instanceBuilder = instanceBuilder;
+		this.pruneAfterTraining = true;
+
+	}
+
+	public DTCreditClassifier(String name, UserDataset ds) {
+		this(name, ds, createDefaultInstanceBuilder());
+
+	}
+
+	public DTCreditClassifier(String name, UserDataset ds,
+			UserInstanceBuilder instanceBuilder) {
+
+		this(name, instanceBuilder.createTrainingSet(ds), instanceBuilder);
+	}
+
+	public DTCreditClassifier(TrainingSet ts) {
+		this(createDefaultClassifierName(), ts, createDefaultInstanceBuilder());
+	}
+
+	public DTCreditClassifier(UserDataset ds) {
+		this(createDefaultClassifierName(), ds);
+	}
+
+	public Concept classify(User u) {
+		return classify(instanceBuilder.createInstance(u));
+	}
+
+	public Concept classify(User u, boolean print) {
+		Concept c = classify(u);
+		if (print) {
+			System.out.println("Actual ---> " + u.getCategory()
+					+ "\nAssigned -> " + c.getName());
+		}
+		return c;
+	}
+
+	public UserInstanceBuilder getInstanceBuilder() {
+		return this.instanceBuilder;
+	}
+
+	public boolean isPruneAfterTraining() {
+		return pruneAfterTraining;
+	}
+
+	public void setPruneAfterTraining(boolean pruneAfterTraining) {
+		this.pruneAfterTraining = pruneAfterTraining;
+	}
+
+	@Override
+	public boolean train() {
+		boolean result = super.train();
+		if (result && pruneAfterTraining) {
+			this.pruneTree();
+		}
+		return result;
+	}
+
+	public void useDefaultAttributes() {
+		trainOnAttribute(CreditInstance.ATTR_NAME_JOB_CLASS, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_INCOME_TYPE, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_AGE, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CAR_OWNERSHIP, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CREDIT_SCORE, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_MORTGAGE_DOWN_PAYMENT, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_MOTOR_BICYCLE_OWNERSHIP, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_OTHER_PROPERTY_OWNERSHIP,
+				true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CRIMINAL_RECORD, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_BANKRUPTCY, true);
+		trainOnAttribute(CreditInstance.ATTR_NAME_RETIREMENT_ACCOUNT, true);
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/NBCreditClassifier.java b/src/org/yooreeka/examples/credit/NBCreditClassifier.java
new file mode 100644
index 0000000..1d57be5
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/NBCreditClassifier.java
@@ -0,0 +1,121 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import org.yooreeka.algos.taxis.bayesian.NaiveBayes;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.examples.credit.data.UserDataset;
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.util.UserInstanceBuilder;
+
+public class NBCreditClassifier extends NaiveBayes {
+
+	private static String createDefaultClassifierName() {
+		return NBCreditClassifier.class.getSimpleName();
+	}
+
+	private static UserInstanceBuilder createDefaultInstanceBuilder() {
+		// using Instance Builder configured to produce instances with String
+		// attributes
+		return new UserInstanceBuilder(false);
+	}
+
+	private UserInstanceBuilder instanceBuilder;
+
+	public NBCreditClassifier(String name, TrainingSet ts,
+			UserInstanceBuilder instanceBuilder) {
+
+		super(name, ts);
+
+		this.instanceBuilder = instanceBuilder;
+	}
+
+	public NBCreditClassifier(String name, UserDataset ds) {
+		this(name, ds, createDefaultInstanceBuilder());
+	}
+
+	public NBCreditClassifier(String name, UserDataset ds,
+			UserInstanceBuilder instanceBuilder) {
+
+		this(name, instanceBuilder.createTrainingSet(ds), instanceBuilder);
+
+	}
+
+	public NBCreditClassifier(TrainingSet ts) {
+
+		super(createDefaultClassifierName(), ts);
+
+		this.instanceBuilder = createDefaultInstanceBuilder();
+	}
+
+	public NBCreditClassifier(UserDataset ds) {
+		this(createDefaultClassifierName(), ds);
+	}
+
+	@Override
+	public Concept classify(Instance instance) {
+		return super.classify(instance);
+	}
+
+	public Concept classify(User user) {
+		return classify(instanceBuilder.createInstance(user));
+	}
+
+	public Concept classify(User u, boolean print) {
+		Concept c = classify(u);
+		if (print) {
+			System.out.println("Actual ---> " + u.getCategory()
+					+ "\nAssigned -> " + c.getName());
+		}
+		return c;
+	}
+
+	public UserInstanceBuilder getInstanceBuilder() {
+		return this.instanceBuilder;
+	}
+
+	public void useDefaultAttributes() {
+		trainOnAttribute(CreditInstance.ATTR_NAME_JOB_CLASS);
+		trainOnAttribute(CreditInstance.ATTR_NAME_INCOME_TYPE);
+		trainOnAttribute(CreditInstance.ATTR_NAME_AGE);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CAR_OWNERSHIP);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CREDIT_SCORE);
+		trainOnAttribute(CreditInstance.ATTR_NAME_MORTGAGE_DOWN_PAYMENT);
+		trainOnAttribute(CreditInstance.ATTR_NAME_MOTOR_BICYCLE_OWNERSHIP);
+		trainOnAttribute(CreditInstance.ATTR_NAME_OTHER_PROPERTY_OWNERSHIP);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CRIMINAL_RECORD);
+		trainOnAttribute(CreditInstance.ATTR_NAME_BANKRUPTCY);
+		trainOnAttribute(CreditInstance.ATTR_NAME_RETIREMENT_ACCOUNT);
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/NNCreditClassifier.java b/src/org/yooreeka/examples/credit/NNCreditClassifier.java
new file mode 100644
index 0000000..d9d6c3a
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/NNCreditClassifier.java
@@ -0,0 +1,406 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.core.DoubleAttribute;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.examples.credit.data.UserDataset;
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.util.UserInstanceBuilder;
+
+public class NNCreditClassifier implements Classifier, java.io.Serializable {
+
+	private static final long serialVersionUID = 8584476885427513654L;
+
+	private static final String SERIALIZATION_PATH = YooreekaConfigurator
+			.getHome() + "\\data\\ch06\\";
+
+	private static String createDefaultClassifierName() {
+		return NNCreditClassifier.class.getSimpleName();
+	}
+
+	private static UserInstanceBuilder createDefaultInstanceBuilder() {
+		// using Instance Builder configured to produce instances with Double
+		// attributes
+		return new UserInstanceBuilder(true);
+	}
+
+	public static NNCreditClassifier load(String filename) {
+
+		Object o = null;
+		File f = new File(SERIALIZATION_PATH + filename);
+		if (f.exists()) {
+			try {
+				FileInputStream fInStream = new FileInputStream(f);
+				BufferedInputStream bufInStream = new BufferedInputStream(
+						fInStream);
+				ObjectInputStream objInStream = new ObjectInputStream(
+						bufInStream);
+				o = objInStream.readObject();
+				objInStream.close();
+			} catch (Exception e) {
+				throw new RuntimeException(
+						"Error while loading data from file: '" + filename
+								+ "'", e);
+			}
+		} else {
+			throw new IllegalArgumentException("File doesn't exist: '"
+					+ filename + "'.");
+		}
+
+		System.out.println("loaded classifier from file: " + filename);
+
+		return (NNCreditClassifier) o;
+
+	}
+
+	private boolean verbose = false;
+
+	private String name;
+
+	/*
+	 * Neural Network that will be used by this classifier.
+	 */
+	private UserCreditNN nn;
+
+	private int DEFAULT_TRAINING_ITERATIONS = 10;
+
+	/*
+	 * Number of times to feed training instances into the network during
+	 * training.
+	 */
+	private int nTrainingIterations = DEFAULT_TRAINING_ITERATIONS;
+
+	private double DEFAULT_LEARNING_RATE = 0.025;
+
+	/*
+	 * Learning rate that will be used in NN training.
+	 */
+	private double learningRate = DEFAULT_LEARNING_RATE;
+
+	private transient TrainingSet ts;
+
+	private UserInstanceBuilder instanceBuilder;
+
+	/*
+	 * Attribute names that should be used as Neural Network inputs.
+	 */
+	private List<String> availableAttributeNames;
+
+	String[] categories = new String[] { CreditConcept.CONCEPT_LABEL_EX,
+			CreditConcept.CONCEPT_LABEL_VG, CreditConcept.CONCEPT_LABEL_GD,
+			CreditConcept.CONCEPT_LABEL_BD, CreditConcept.CONCEPT_LABEL_DN };
+
+	public NNCreditClassifier(String name, TrainingSet ts,
+			UserInstanceBuilder instanceBuilder) {
+
+		this.name = name;
+
+		this.ts = ts;
+
+		this.instanceBuilder = instanceBuilder;
+
+		this.availableAttributeNames = new ArrayList<String>();
+
+		nn = createNeuralNetwork();
+	}
+
+	public NNCreditClassifier(String name, UserDataset ds) {
+		// using Instance Builder configured to produce instances with Double
+		// attributes
+		this(name, ds, createDefaultInstanceBuilder());
+	}
+
+	public NNCreditClassifier(String name, UserDataset ds,
+			UserInstanceBuilder instanceBuilder) {
+		this(name, instanceBuilder.createTrainingSet(ds), instanceBuilder);
+	}
+
+	public NNCreditClassifier(TrainingSet ts) {
+		this(createDefaultClassifierName(), ts, createDefaultInstanceBuilder());
+	}
+
+	public NNCreditClassifier(UserDataset ds) {
+		this(createDefaultClassifierName(), ds);
+	}
+
+	public Concept classify(Instance instance) {
+
+		double[] x = createNNInputs(instance);
+
+		double[] y = nn.classify(x);
+
+		Concept c = createConceptFromNNOutput(y);
+
+		if (verbose) {
+			System.out.println("\nAssessment:\n  >> This is a " + c.getName());
+		}
+		return c;
+	}
+
+	public Concept classify(User user) {
+		if (verbose) {
+			System.out.println("User:\n  >> " + user.toString());
+		}
+		return classify(instanceBuilder.createInstance(user));
+	}
+
+	public Concept classify(User u, boolean print) {
+		Concept c = classify(u);
+		if (print) {
+			System.out.println("Actual ---> " + u.getCategory()
+					+ "\nAssigned -> " + c.getName());
+		}
+		return c;
+	}
+
+	private Concept createConceptFromNNOutput(double[] y) {
+
+		int categoryIndex = 0;
+		for (int i = 1, n = y.length; i < n; i++) {
+			if (y[i] > y[categoryIndex]) {
+				categoryIndex = i;
+			}
+		}
+
+		return new CreditConcept(categories[categoryIndex]);
+	}
+
+	private UserCreditNN createNeuralNetwork() {
+
+		String nnName = "NNUserCreditClassifierNN";
+
+		UserCreditNN nn = new UserCreditNN(nnName);
+		// set custom parameters and recreate the network
+		nn.setLearningRate(learningRate);
+		nn.removeAllNodesAndLayers();
+		nn.create();
+		return nn;
+	}
+
+	public double[] createNNInputs(Instance instance) {
+
+		/*
+		 * Converting all String attributes into Double attributes.
+		 */
+		Instance convertedInstance = instanceBuilder.createInstance(instance);
+
+		int nInputNodes = nn.getInputNodeCount();
+
+		double[] x = new double[nInputNodes];
+
+		for (int i = 0; i < nInputNodes; i++) {
+
+			String attrName = this.availableAttributeNames.get(i);
+			Attribute a = convertedInstance.getAttributeByName(attrName);
+
+			if (a instanceof DoubleAttribute) {
+				x[i] = (Double) a.getValue();
+			} else {
+				if (a == null) {
+					throw new RuntimeException(
+							"Failed to find attribute with name: '" + attrName
+									+ "'. Instance: "
+									+ convertedInstance.toString());
+				} else {
+					throw new RuntimeException(
+							"Invalid attribute type. Only "
+									+ DoubleAttribute.class.getSimpleName()
+									+ " attribute"
+									+ " types can be used in NN. Actual attribute type: "
+									+ a.getClass().getSimpleName());
+				}
+			}
+
+		}
+
+		return x;
+	}
+
+	public double[] createNNOutputs(Instance i) {
+
+		int nOutputNodes = nn.getOutputNodeCount();
+
+		double[] y = new double[nOutputNodes];
+		for (int n = 0; n < nOutputNodes; n++) {
+			String category = i.getConcept().getName();
+			y[n] = getOutputValue(n, category);
+		}
+
+		return y;
+	}
+
+	public UserInstanceBuilder getInstanceBuilder() {
+		return this.instanceBuilder;
+	}
+
+	public double getLearningRate() {
+		return learningRate;
+	}
+
+	/**
+	 * @return the name
+	 */
+	public String getName() {
+		return name;
+	}
+
+	private double getOutputValue(int i, String category) {
+		if (categories[i].equals(category)) {
+			return 1.0;
+		} else {
+			return 0.0;
+		}
+	}
+
+	/**
+	 * @return the verbose
+	 */
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	public void save() {
+
+		String filename = SERIALIZATION_PATH + this.getName();
+		try {
+			File f = new File(filename);
+			FileOutputStream foutStream = new FileOutputStream(f);
+			BufferedOutputStream boutStream = new BufferedOutputStream(
+					foutStream);
+			ObjectOutputStream objOutputStream = new ObjectOutputStream(
+					boutStream);
+			objOutputStream.writeObject(this);
+			objOutputStream.flush();
+			boutStream.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Error while saving data into file: '"
+					+ filename + "'", e);
+		}
+
+		System.out.println("saved classifier in file: " + filename);
+	}
+
+	public void setLearningRate(double learningRate) {
+		this.learningRate = learningRate;
+	}
+
+	public void setNTrainingIterations(int trainingIterations) {
+		nTrainingIterations = trainingIterations;
+	}
+
+	/**
+	 * @param verbose
+	 *            the verbose to set
+	 */
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+	public boolean train() {
+
+		long t0 = System.currentTimeMillis();
+
+		if (ts == null) {
+			throw new RuntimeException(
+					"Can't train classifier - training dataset is null.");
+		}
+
+		if (nn.getInputNodeCount() != availableAttributeNames.size()) {
+			throw new RuntimeException(
+					"Number of attributes doesn't match with the number of input nodes."
+							+ "Attributes: " + availableAttributeNames.size()
+							+ ", Input nodes: " + nn.getInputNodeCount());
+		}
+
+		trainNeuralNetwork(nTrainingIterations);
+
+		System.out.print("       Neural network training completed in ");
+		System.out.println((System.currentTimeMillis() - t0) + " (ms)");
+
+		return true;
+	}
+
+	private void trainNeuralNetwork(int nIterations) {
+
+		for (int i = 1; i <= nIterations; i++) {
+			for (Instance instance : ts.getInstances().values()) {
+				double[] nnInput = createNNInputs(instance);
+				double[] nnExpectedOutput = createNNOutputs(instance);
+
+				nn.train(nnInput, nnExpectedOutput);
+			}
+
+			if (verbose) {
+				System.out.println("finished training pass: " + i + " out of "
+						+ nIterations);
+			}
+		}
+	}
+
+	public void trainOnAttribute(String name) {
+		availableAttributeNames.add(name);
+	}
+
+	/**
+	 * This methods facilitates the loading of training attributes
+	 */
+	public void useDefaultAttributes() {
+		trainOnAttribute(CreditInstance.ATTR_NAME_JOB_CLASS);
+		trainOnAttribute(CreditInstance.ATTR_NAME_INCOME_TYPE);
+		trainOnAttribute(CreditInstance.ATTR_NAME_AGE);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CAR_OWNERSHIP);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CREDIT_SCORE);
+		trainOnAttribute(CreditInstance.ATTR_NAME_MORTGAGE_DOWN_PAYMENT);
+		trainOnAttribute(CreditInstance.ATTR_NAME_MOTOR_BICYCLE_OWNERSHIP);
+		trainOnAttribute(CreditInstance.ATTR_NAME_OTHER_PROPERTY_OWNERSHIP);
+		trainOnAttribute(CreditInstance.ATTR_NAME_CRIMINAL_RECORD);
+		trainOnAttribute(CreditInstance.ATTR_NAME_BANKRUPTCY);
+		trainOnAttribute(CreditInstance.ATTR_NAME_RETIREMENT_ACCOUNT);
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/UserCreditNN.java b/src/org/yooreeka/examples/credit/UserCreditNN.java
new file mode 100644
index 0000000..17090e1
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/UserCreditNN.java
@@ -0,0 +1,211 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit;
+
+import org.yooreeka.algos.taxis.networks.neural.core.BaseNN;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Layer;
+
+public class UserCreditNN extends BaseNN {
+
+	private static final long serialVersionUID = 5049921699478904263L;
+
+	public UserCreditNN(String name) {
+		super(name);
+
+		create();
+	}
+
+	public void create() {
+		createNN_11_7_5();
+	}
+
+	private void createNN_11_7_5() {
+
+		// 1. Define Layers, Nodes and Node Biases
+		Layer inputLayer = createInputLayer(0, // layer id
+				11 // number of nodes
+		);
+
+		Layer hiddenLayer = createHiddenLayer(1, // layer id
+				7, // number of nodes
+				new double[] { 0.5, -1, 1.5, 0.5, 1, -0.2, 0.1 } // node biases
+		);
+
+		Layer outputLayer = createOutputLayer(2, // layer id
+				5, // number of nodes
+				new double[] { -1.5, 0.5, -1, 0.5, 1 } // node biases
+		);
+
+		setInputLayer(inputLayer);
+		setOutputLayer(outputLayer);
+		addHiddenLayer(hiddenLayer);
+
+		// 2. Define links and weights between nodes
+		// Id format: <layerId:nodeIdwithinLayer>
+
+		// Weights for links from Input Layer to Hidden Layer
+		setLink("0:0", "1:0", 0.25);
+		setLink("0:0", "1:1", -0.7);
+		setLink("0:0", "1:2", 0.25);
+		setLink("0:0", "1:3", 0.25);
+		setLink("0:0", "1:4", -0.3);
+		setLink("0:0", "1:5", 0.25);
+		setLink("0:0", "1:6", -0.5);
+
+		setLink("0:1", "1:0", 0.25);
+		setLink("0:1", "1:1", -0.5);
+		setLink("0:1", "1:2", 0.25);
+		setLink("0:1", "1:3", 0.25);
+		setLink("0:1", "1:4", 0.5);
+		setLink("0:1", "1:5", 0.25);
+		setLink("0:1", "1:6", 0.5);
+
+		setLink("0:2", "1:0", 0.25);
+		setLink("0:2", "1:1", -0.5);
+		setLink("0:2", "1:2", 0.25);
+		setLink("0:2", "1:3", 0.25);
+		setLink("0:2", "1:4", -0.5);
+		setLink("0:2", "1:5", 0.25);
+		setLink("0:2", "1:6", -0.5);
+
+		setLink("0:3", "1:0", 0.25);
+		setLink("0:3", "1:1", -0.5);
+		setLink("0:3", "1:2", -0.25);
+		setLink("0:3", "1:3", -0.25);
+		setLink("0:3", "1:4", -0.5);
+		setLink("0:3", "1:5", 0.25);
+		setLink("0:3", "1:6", 0.5);
+
+		setLink("0:4", "1:0", 0.25);
+		setLink("0:4", "1:1", -0.5);
+		setLink("0:4", "1:2", 0.25);
+		setLink("0:4", "1:3", 0.25);
+		setLink("0:4", "1:4", -0.5);
+		setLink("0:4", "1:5", 0.25);
+		setLink("0:4", "1:6", -0.5);
+
+		setLink("0:5", "1:0", 0.25);
+		setLink("0:5", "1:1", -0.5);
+		setLink("0:5", "1:2", 0.25);
+		setLink("0:5", "1:3", 0.25);
+		setLink("0:5", "1:4", -0.5);
+		setLink("0:5", "1:5", 0.25);
+		setLink("0:5", "1:6", -0.5);
+
+		setLink("0:6", "1:0", -0.25);
+		setLink("0:6", "1:1", 0.5);
+		setLink("0:6", "1:2", -0.25);
+		setLink("0:6", "1:3", 0.25);
+		setLink("0:6", "1:4", -0.5);
+		setLink("0:6", "1:5", 0.25);
+		setLink("0:6", "1:6", 0.5);
+
+		setLink("0:7", "1:0", 0.25);
+		setLink("0:7", "1:1", -0.5);
+		setLink("0:7", "1:2", 0.25);
+		setLink("0:7", "1:3", 0.25);
+		setLink("0:7", "1:4", -0.5);
+		setLink("0:7", "1:5", 0.25);
+		setLink("0:7", "1:6", -0.5);
+
+		setLink("0:8", "1:0", 0.25);
+		setLink("0:8", "1:1", -0.5);
+		setLink("0:8", "1:2", 0.25);
+		setLink("0:8", "1:3", 0.25);
+		setLink("0:8", "1:4", -0.5);
+		setLink("0:8", "1:5", 0.25);
+		setLink("0:8", "1:6", 0.8);
+
+		setLink("0:9", "1:0", 0.25);
+		setLink("0:9", "1:1", 0.5);
+		setLink("0:9", "1:2", -0.25);
+		setLink("0:9", "1:3", -0.25);
+		setLink("0:9", "1:4", 0.5);
+		setLink("0:9", "1:5", 0.25);
+		setLink("0:9", "1:6", 0.5);
+
+		setLink("0:10", "1:0", 0.25);
+		setLink("0:10", "1:1", -0.5);
+		setLink("0:10", "1:2", 0.25);
+		setLink("0:10", "1:3", 0.25);
+		setLink("0:10", "1:4", 0.5);
+		setLink("0:10", "1:5", 0.25);
+		setLink("0:10", "1:6", -0.5);
+
+		// Weights for links from Hidden Layer to Output Layer
+
+		setLink("1:0", "2:0", -0.5);
+		setLink("1:1", "2:0", 0.5);
+		setLink("1:2", "2:0", 0.5);
+		setLink("1:3", "2:0", 0.5);
+		setLink("1:4", "2:0", 0.5);
+		setLink("1:5", "2:0", -0.5);
+		setLink("1:6", "2:0", 0.5);
+
+		setLink("1:0", "2:1", -0.5);
+		setLink("1:1", "2:1", 0.5);
+		setLink("1:2", "2:1", -0.5);
+		setLink("1:3", "2:1", -0.5);
+		setLink("1:4", "2:1", 0.5);
+		setLink("1:5", "2:1", -0.5);
+		setLink("1:6", "2:1", 0.5);
+
+		setLink("1:0", "2:2", -0.5);
+		setLink("1:1", "2:2", 0.5);
+		setLink("1:2", "2:2", -0.5);
+		setLink("1:3", "2:2", -0.5);
+		setLink("1:4", "2:2", 0.5);
+		setLink("1:5", "2:2", -0.5);
+		setLink("1:6", "2:2", 0.5);
+
+		setLink("1:0", "2:3", -0.5);
+		setLink("1:1", "2:3", 0.5);
+		setLink("1:2", "2:3", -0.5);
+		setLink("1:3", "2:3", -0.5);
+		setLink("1:4", "2:3", 0.5);
+		setLink("1:5", "2:3", -0.5);
+		setLink("1:6", "2:3", 0.5);
+
+		setLink("1:0", "2:4", -0.5);
+		setLink("1:1", "2:4", 0.5);
+		setLink("1:2", "2:4", -0.5);
+		setLink("1:3", "2:4", -0.5);
+		setLink("1:4", "2:4", 0.5);
+		setLink("1:5", "2:4", -0.5);
+		setLink("1:6", "2:4", 0.5);
+
+		if (isVerbose()) {
+			System.out.println("NN created");
+		}
+
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/data/UseCaseData.java b/src/org/yooreeka/examples/credit/data/UseCaseData.java
new file mode 100644
index 0000000..5baa5f8
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/UseCaseData.java
@@ -0,0 +1,194 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.examples.credit.data.users.BadUserType;
+import org.yooreeka.examples.credit.data.users.DangerousUserType;
+import org.yooreeka.examples.credit.data.users.ExcellentUserType;
+import org.yooreeka.examples.credit.data.users.GoodUserType;
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.data.users.UserType;
+import org.yooreeka.examples.credit.data.users.VeryGoodUserType;
+import org.yooreeka.examples.credit.util.CreditDataUtils;
+import org.yooreeka.examples.credit.util.DataGenerator;
+
+/**
+ * Example for how to configure and generate file with transactions.
+ */
+public class UseCaseData {
+
+	/*
+	 * Generated transactions will be saved into this file.
+	 */
+	public static String TRAINING_USERS_FILENAME = YooreekaConfigurator
+			.getHome() + "/data/ch06/generated-training-users.txt";
+
+	public static String TEST_USERS_FILENAME = YooreekaConfigurator.getHome()
+			+ "/data/ch06/generated-test-users.txt";
+
+	public static void main(String[] args) {
+
+		UseCaseData useCaseData = new UseCaseData(100000, 50000);
+		// UseCaseData useCaseData = new UseCaseData(10000,5000);
+		useCaseData.create();
+	}
+
+	DataGenerator dataGenerator = new DataGenerator();
+	// INSTANCE VARIABLES
+	int nTrainingUsers;
+
+	int nTestUsers;
+
+	public UseCaseData(int nTrainingUsers, int nTestUsers) {
+		this.nTrainingUsers = nTrainingUsers;
+		this.nTestUsers = nTestUsers;
+	}
+
+	public void create() {
+
+		System.out
+				.println("Creating data for the credit worthiness (score) use case:");
+		System.out.println("   Number of users in the training set: "
+				+ nTrainingUsers);
+		System.out.println("    Number of users in the testing set: "
+				+ nTestUsers);
+		System.out
+				.println("___________________________________________________________");
+
+		List<UserType> trainingUserTypes = createUserTypes(nTrainingUsers);
+		int userIdSequenceStart = 1;
+		generateUsers(TRAINING_USERS_FILENAME, userIdSequenceStart,
+				trainingUserTypes);
+
+		dataGenerator.setNoiseOn(true);
+
+		List<UserType> testUserTypes = createUserTypes(nTestUsers);
+		userIdSequenceStart = 500000;
+		// generateUsers(TEST_USERS_FILENAME, 2*nTrainingUsers, testUserTypes);
+		generateUsers(TEST_USERS_FILENAME, userIdSequenceStart, testUserTypes);
+
+		System.out.println("Done!");
+	}
+
+	public void create(boolean overwrite) {
+		if (overwrite) {
+			TRAINING_USERS_FILENAME = YooreekaConfigurator.getHome()
+					+ "/data/ch06/training-users.txt";
+			TEST_USERS_FILENAME = YooreekaConfigurator.getHome()
+					+ "/data/ch06/test-users.txt";
+		}
+		create();
+	}
+
+	public List<UserType> createUserTypes(int nUsers) {
+		List<UserType> allUserTypes = new ArrayList<UserType>();
+
+		// Excellent credit users
+		// 5% of the total number of users
+		UserType userType = new ExcellentUserType();
+		userType.setNUsers((int) (nUsers * 0.05));
+
+		allUserTypes.add(userType);
+
+		// Very good credit users
+		// 15% of the total number of users
+		userType = new VeryGoodUserType();
+		userType.setNUsers((int) (nUsers * 0.15));
+
+		allUserTypes.add(userType);
+
+		// Good credit users
+		// 50% of the total number of users
+		userType = new GoodUserType();
+		userType.setNUsers((int) (nUsers * 0.50));
+
+		allUserTypes.add(userType);
+
+		// Bad credit users
+		// 25% of the total number of users
+		userType = new BadUserType();
+		userType.setNUsers((int) (nUsers * 0.25));
+
+		allUserTypes.add(userType);
+
+		// Dangerous credit users
+		// 5% of the total number of users
+		userType = new DangerousUserType();
+		userType.setNUsers((int) (nUsers * 0.05));
+		allUserTypes.add(userType);
+
+		return allUserTypes;
+	}
+
+	public void generateUsers(String filename, int nextUserId,
+			List<UserType> userTypes) {
+
+		dataGenerator.setNextUserId(nextUserId);
+		System.out.println("Generating users...");
+		List<User> allUsers = dataGenerator.generateUsers(userTypes);
+		System.out.println("Saving users into '" + filename + "'");
+		CreditDataUtils.saveUsers(filename, allUsers);
+	}
+
+	/**
+	 * @return the nTestUsers
+	 */
+	public int getTestUsers() {
+		return nTestUsers;
+	}
+
+	/**
+	 * @return the nTrainingUsers
+	 */
+	public int getTrainingUsers() {
+		return nTrainingUsers;
+	}
+
+	/**
+	 * @param testUsers
+	 *            the nTestUsers to set
+	 */
+	public void setTestUsers(int n) {
+		nTestUsers = n;
+	}
+
+	/**
+	 * @param trainingUsers
+	 *            the nTrainingUsers to set
+	 */
+	public void setTrainingUsers(int n) {
+		nTrainingUsers = n;
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/data/UserDataset.java b/src/org/yooreeka/examples/credit/data/UserDataset.java
new file mode 100644
index 0000000..9e7f3be
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/UserDataset.java
@@ -0,0 +1,80 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.examples.credit.data.users.User;
+
+public class UserDataset {
+
+	private Map<String, User> usersByUsernameMap;
+
+	public UserDataset(List<User> userList) {
+		this.usersByUsernameMap = new HashMap<String, User>(userList.size());
+
+		for (User e : userList) {
+			String username = e.getUsername();
+			usersByUsernameMap.put(username, e);
+		}
+	}
+
+	public User findUserByUsername(String username) {
+		return usersByUsernameMap.get(username);
+	}
+
+	public int getSize() {
+		return usersByUsernameMap.size();
+	}
+
+	public List<User> getUsers() {
+		return new ArrayList<User>(usersByUsernameMap.values());
+	}
+
+	public void printAll() {
+		for (Map.Entry<String, User> e : usersByUsernameMap.entrySet()) {
+			User u = e.getValue();
+			System.out.println(u);
+		}
+	}
+
+	public void printUser(String username) {
+		User e = findUserByUsername(username);
+		if (e != null) {
+			System.out.println(e.toString());
+		} else {
+			System.out.println("User not found (username: '" + username + "')");
+		}
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/data/UserLoader.java b/src/org/yooreeka/examples/credit/data/UserLoader.java
new file mode 100644
index 0000000..14a2dcf
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/UserLoader.java
@@ -0,0 +1,70 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data;
+
+import java.util.List;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.util.CreditDataUtils;
+
+public class UserLoader {
+
+	public static final String TRAINING_USERS_FILE = YooreekaConfigurator
+			.getHome() + "/data/ch06/training-users.txt";
+
+	public static final String TEST_USERS_FILE = YooreekaConfigurator.getHome()
+			+ "/data/ch06/test-users.txt";
+
+	public static UserDataset loadTestDataset() {
+		List<User> allUsers = loadUsers(TEST_USERS_FILE);
+		return new UserDataset(allUsers);
+	}
+
+	public static UserDataset loadTestDataset(String filename) {
+		List<User> allUsers = loadUsers(filename);
+		return new UserDataset(allUsers);
+	}
+
+	public static UserDataset loadTrainingDataset() {
+		List<User> allUsers = loadUsers(TRAINING_USERS_FILE);
+		return new UserDataset(allUsers);
+	}
+
+	public static UserDataset loadTrainingDataset(String filename) {
+		List<User> allUsers = loadUsers(filename);
+		return new UserDataset(allUsers);
+	}
+
+	public static List<User> loadUsers(String filename) {
+		return CreditDataUtils.loadUsers(filename);
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/data/users/BadUserType.java b/src/org/yooreeka/examples/credit/data/users/BadUserType.java
new file mode 100644
index 0000000..da776d9
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/users/BadUserType.java
@@ -0,0 +1,53 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data.users;
+
+public class BadUserType extends UserType {
+
+	{
+		setAge(new int[] { 1, 8, 9, 10 });
+		setBancruptcy(new int[] { 0, 1 });
+		setCarOwnership(new int[] { 0, 1 });
+		setCreditScore(new int[] { 1, 2, 3, 4 });
+		setCriminalRecord(new int[] { 0 });
+		setDownPayment(new int[] { 1, 2 });
+		setIncome(new int[] { 3, 4, 5, 6 });
+		setJobClass(new int[] { 4, 5 });
+		setMotorcycleOwnership(new int[] { 0, 1 });
+		setPropertyOwnership(new int[] { 0 });
+		setRetirementAccounts(new int[] { 1, 2 });
+	}
+
+	@Override
+	public String getUserType() {
+		return UserType.BAD;
+	}
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/examples/credit/data/users/DangerousUserType.java b/src/org/yooreeka/examples/credit/data/users/DangerousUserType.java
new file mode 100644
index 0000000..8483a2f
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/users/DangerousUserType.java
@@ -0,0 +1,53 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data.users;
+
+public class DangerousUserType extends UserType {
+
+	{
+		setAge(new int[] { 1, 2, 9, 10 });
+		setBancruptcy(new int[] { 1 });
+		setCarOwnership(new int[] { 0 });
+		setCreditScore(new int[] { 1, 2 });
+		setCriminalRecord(new int[] { 1 });
+		setDownPayment(new int[] { 1, 2 });
+		setIncome(new int[] { 1, 2, 3 });
+		setJobClass(new int[] { 4, 5 });
+		setMotorcycleOwnership(new int[] { 0 });
+		setPropertyOwnership(new int[] { 0 });
+		setRetirementAccounts(new int[] { 1 });
+	}
+
+	@Override
+	public String getUserType() {
+		return UserType.DANGEROUS;
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/data/users/ExcellentUserType.java b/src/org/yooreeka/examples/credit/data/users/ExcellentUserType.java
new file mode 100644
index 0000000..d938a16
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/users/ExcellentUserType.java
@@ -0,0 +1,53 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data.users;
+
+public class ExcellentUserType extends UserType {
+
+	{
+		setAge(new int[] { 1, 8, 9, 10 });
+		setBancruptcy(new int[] { 0 });
+		setCarOwnership(new int[] { 1 });
+		setCreditScore(new int[] { 6, 7, 8 });
+		setCriminalRecord(new int[] { 0 });
+		setDownPayment(new int[] { 4 });
+		setIncome(new int[] { 7, 8, 9, 10 });
+		setJobClass(new int[] { 2, 3, 4, 5 });
+		setMotorcycleOwnership(new int[] { 0, 1 });
+		setPropertyOwnership(new int[] { 1 });
+		setRetirementAccounts(new int[] { 5, 6, 7, 8 });
+	}
+
+	@Override
+	public String getUserType() {
+		return UserType.EXCELLENT;
+	}
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/examples/credit/data/users/GoodUserType.java b/src/org/yooreeka/examples/credit/data/users/GoodUserType.java
new file mode 100644
index 0000000..7b0927d
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/users/GoodUserType.java
@@ -0,0 +1,53 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data.users;
+
+public class GoodUserType extends UserType {
+
+	{
+		setAge(new int[] { 2, 3, 4, 5, 6, 7, 8 });
+		setBancruptcy(new int[] { 0 });
+		setCarOwnership(new int[] { 1 });
+		setCreditScore(new int[] { 3, 4, 5, 6 });
+		setCriminalRecord(new int[] { 0 });
+		setDownPayment(new int[] { 2, 3 });
+		setIncome(new int[] { 5, 6, 7, 8 });
+		setJobClass(new int[] { 2, 3, 4, 5 });
+		setMotorcycleOwnership(new int[] { 0, 1 });
+		setPropertyOwnership(new int[] { 0, 1 });
+		setRetirementAccounts(new int[] { 1, 2, 3, 4 });
+	}
+
+	@Override
+	public String getUserType() {
+		return UserType.GOOD;
+	}
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/examples/credit/data/users/User.java b/src/org/yooreeka/examples/credit/data/users/User.java
new file mode 100644
index 0000000..693d51f
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/users/User.java
@@ -0,0 +1,319 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data.users;
+
+public class User {
+
+	private String username;
+	private int jobClass;
+	private int carOwnership;
+	private int bicycleOwnership;
+	private int propertyOwnership;
+	private int retirementAccount;
+	private int creditScore;
+	private int age;
+	private int downPayment;
+	private int bankruptcy;
+	private int criminalRecord;
+	private int income;
+
+	public User() {
+		// empty
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final User other = (User) obj;
+		if (age != other.age)
+			return false;
+		if (carOwnership != other.carOwnership)
+			return false;
+		if (creditScore != other.creditScore)
+			return false;
+		if (income != other.income)
+			return false;
+		if (jobClass != other.jobClass)
+			return false;
+		if (downPayment != other.downPayment)
+			return false;
+		if (bicycleOwnership != other.bicycleOwnership)
+			return false;
+		if (propertyOwnership != other.propertyOwnership)
+			return false;
+		if (criminalRecord != other.criminalRecord)
+			return false;
+		if (bankruptcy != other.bankruptcy)
+			return false;
+		if (retirementAccount != other.retirementAccount)
+			return false;
+		if (username == null) {
+			if (other.username != null)
+				return false;
+		} else if (!username.equals(other.username))
+			return false;
+		return true;
+	}
+
+	/**
+	 * @return the age
+	 */
+	public int getAge() {
+		return age;
+	}
+
+	/**
+	 * @return the bankruptcy
+	 */
+	public int getBankruptcy() {
+		return bankruptcy;
+	}
+
+	/**
+	 * @return the bicycleOwnership
+	 */
+	public int getBicycleOwnership() {
+		return bicycleOwnership;
+	}
+
+	/**
+	 * @return the carOwnership
+	 */
+	public int getCarOwnership() {
+		return carOwnership;
+	}
+
+	public String getCategory() {
+		return username.substring(0, 2);
+	}
+
+	/**
+	 * @return the creditScore
+	 */
+	public int getCreditScore() {
+		return creditScore;
+	}
+
+	/**
+	 * @return the criminalRecord
+	 */
+	public int getCriminalRecord() {
+		return criminalRecord;
+	}
+
+	/**
+	 * @return the downPayment
+	 */
+	public int getDownPayment() {
+		return downPayment;
+	}
+
+	/**
+	 * @return the income
+	 */
+	public int getIncome() {
+		return income;
+	}
+
+	/**
+	 * @return the jobClass
+	 */
+	public int getJobClass() {
+		return jobClass;
+	}
+
+	/**
+	 * @return the propertyOwnership
+	 */
+	public int getPropertyOwnership() {
+		return propertyOwnership;
+	}
+
+	/**
+	 * @return the retirementAccount
+	 */
+	public int getRetirementAccount() {
+		return retirementAccount;
+	}
+
+	public String getUsername() {
+		return username;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + age;
+		result = prime * result + carOwnership;
+		result = prime * result + creditScore;
+		result = prime * result + income;
+		result = prime * result + jobClass;
+		result = prime * result + downPayment;
+		result = prime * result + bicycleOwnership;
+		result = prime * result + propertyOwnership;
+		result = prime * result + criminalRecord;
+		result = prime * result + bankruptcy;
+		result = prime * result + retirementAccount;
+		result = prime * result
+				+ ((username == null) ? 0 : username.hashCode());
+		return result;
+	}
+
+	public void loadFromExternalString(String text) {
+
+		String[] values = text.split(":");
+
+		username = values[0];
+		jobClass = Integer.parseInt(values[1]);
+		carOwnership = Integer.parseInt(values[2]);
+		bicycleOwnership = Integer.parseInt(values[3]);
+		propertyOwnership = Integer.parseInt(values[4]);
+		retirementAccount = Integer.parseInt(values[5]);
+		creditScore = Integer.parseInt(values[6]);
+		age = Integer.parseInt(values[7]);
+		downPayment = Integer.parseInt(values[8]);
+		bankruptcy = Integer.parseInt(values[9]);
+		criminalRecord = Integer.parseInt(values[10]);
+		income = Integer.parseInt(values[11]);
+	}
+
+	/**
+	 * @param age
+	 *            the age to set
+	 */
+	public void setAge(int age) {
+		this.age = age;
+	}
+
+	/**
+	 * @param bankruptcy
+	 *            the bankruptcy to set
+	 */
+	public void setBankruptcy(int bankruptcy) {
+		this.bankruptcy = bankruptcy;
+	}
+
+	/**
+	 * @param bicycleOwnership
+	 *            the bicycleOwnership to set
+	 */
+	public void setBicycleOwnership(int bicycleOwnership) {
+		this.bicycleOwnership = bicycleOwnership;
+	}
+
+	/**
+	 * @param carOwnership
+	 *            the carOwnership to set
+	 */
+	public void setCarOwnership(int carOwnership) {
+		this.carOwnership = carOwnership;
+	}
+
+	/**
+	 * @param creditScore
+	 *            the creditScore to set
+	 */
+	public void setCreditScore(int creditScore) {
+		this.creditScore = creditScore;
+	}
+
+	/**
+	 * @param criminalRecord
+	 *            the criminalRecord to set
+	 */
+	public void setCriminalRecord(int criminalRecord) {
+		this.criminalRecord = criminalRecord;
+	}
+
+	/**
+	 * @param downPayment
+	 *            the downPayment to set
+	 */
+	public void setDownPayment(int downPayment) {
+		this.downPayment = downPayment;
+	}
+
+	/**
+	 * @param income
+	 *            the income to set
+	 */
+	public void setIncome(int incomeType) {
+		this.income = incomeType;
+	}
+
+	/**
+	 * @param jobClass
+	 *            the jobClass to set
+	 */
+	public void setJobClass(int jobClass) {
+		this.jobClass = jobClass;
+	}
+
+	/**
+	 * @param propertyOwnership
+	 *            the propertyOwnership to set
+	 */
+	public void setPropertyOwnership(int propertyOwnership) {
+		this.propertyOwnership = propertyOwnership;
+	}
+
+	/**
+	 * @param retirementAccount
+	 *            the retirementAccount to set
+	 */
+	public void setRetirementAccount(int retirementAccount) {
+		this.retirementAccount = retirementAccount;
+	}
+
+	public void setUsername(String username) {
+		this.username = username;
+	}
+
+	public String toExternalString() {
+		return username + ":" + jobClass + ":" + carOwnership + ":"
+				+ bicycleOwnership + ":" + propertyOwnership + ":"
+				+ retirementAccount + ":" + creditScore + ":" + age + ":"
+				+ downPayment + ":" + bankruptcy + ":" + criminalRecord + ":"
+				+ income;
+	}
+
+	@Override
+	public String toString() {
+		return toExternalString();
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/data/users/UserType.java b/src/org/yooreeka/examples/credit/data/users/UserType.java
new file mode 100644
index 0000000..75f4de6
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/users/UserType.java
@@ -0,0 +1,512 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data.users;
+
+import java.util.HashMap;
+import java.util.Random;
+
+public abstract class UserType {
+
+	public static final String EXCELLENT = "EX";
+	public static final String VERY_GOOD = "VG";
+	public static final String GOOD = "GD";
+	public static final String BAD = "BD";
+	public static final String DANGEROUS = "DN";
+
+	private static volatile HashMap<String, Double[]> noiseLevels;
+
+	/**
+	 * This method allows the insertion of custom noise levels by credit type.
+	 * 
+	 * @param type
+	 * @param levels
+	 */
+	public static void addNoiseLevel(String type, Double[] levels) {
+
+		if (noiseLevels.containsKey(type)) {
+			System.out.println("WARN: Replacing noise levels for credit type: "
+					+ type);
+		}
+		UserType.noiseLevels.put(type, levels);
+	}
+
+	/**
+	 * This method returns the noise levels by credit type
+	 * 
+	 * @return the noiseLevels
+	 */
+	public static HashMap<String, Double[]> getNoiseLevels() {
+		return UserType.noiseLevels;
+	}
+
+	/**
+	 * This method allows the insertion of custom noise levels in bulk
+	 * 
+	 * @param noiseLevels
+	 *            the noiseLevels to set
+	 */
+	public static void setNoiseLevels(HashMap<String, Double[]> noiseLevels) {
+		UserType.noiseLevels = noiseLevels;
+	}
+	private Random rnd = new Random();
+	private int nUsers;
+	private int[] jobClass;
+	private int[] carOwnership;
+	private int[] motorcycleOwnership;
+	private int[] propertyOwnership;
+	private int[] retirementAccounts;
+	private int[] creditScore;
+	private int[] age;
+	private int[] downPayment;
+
+	private int[] bancruptcy;
+
+	private int[] criminalRecord;
+
+	private int[] income;
+
+	static {
+		// Set the default noise levels
+		noiseLevels = new HashMap<String, Double[]>();
+
+		Double[] exLevels = new Double[] { 1.0d, 3.0d, 7.5d, 10.0d };
+		Double[] vgLevels = new Double[] { 1.0d, 3.0d, 6.0d, 10.0d };
+		Double[] gdLevels = new Double[] { 1.0d, 3.0d, 4.0d, 8.0d };
+		Double[] bdLevels = new Double[] { 1.0d, 3.0d, 7.5d, 10.0d };
+		Double[] dnLevels = new Double[] { 1.0d, 4.5d, 9.0d, 13.5d };
+
+		noiseLevels.put(EXCELLENT, exLevels);
+		noiseLevels.put(VERY_GOOD, vgLevels);
+		noiseLevels.put(GOOD, gdLevels);
+		noiseLevels.put(BAD, bdLevels);
+		noiseLevels.put(DANGEROUS, dnLevels);
+	}
+
+	public UserType() {
+		// empty
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final UserType other = (UserType) obj;
+		if (getUserType() == null) {
+			if (other.getUserType() != null)
+				return false;
+		} else if (!getUserType().equals(other.getUserType()))
+			return false;
+		return true;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the age
+	 */
+	public int[] getAge() {
+		return age;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the bancruptcy
+	 */
+	public int[] getBancruptcy() {
+		return bancruptcy;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the carOwnership
+	 */
+	public int[] getCarOwnership() {
+		return carOwnership;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the creditScore
+	 */
+	public int[] getCreditScore() {
+		return creditScore;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the criminalRecord
+	 */
+	public int[] getCriminalRecord() {
+		return criminalRecord;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the downPayment
+	 */
+	public int[] getDownPayment() {
+		return downPayment;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the income
+	 */
+	public int[] getIncome() {
+		return income;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the jobClass
+	 */
+	public int[] getJobClass() {
+		return jobClass;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the motorcycleOwnership
+	 */
+	public int[] getMotorcycleOwnership() {
+		return motorcycleOwnership;
+	}
+
+	public String getNoisyType() {
+
+		double gaussian = rnd.nextGaussian();
+
+		String noisyType = null;
+
+		String userType = getUserType();
+
+		Double[] nLevels = noiseLevels.get(userType);
+
+		if (getUserType().equals(EXCELLENT)) {
+
+			if (gaussian <= nLevels[0]) {
+
+				noisyType = EXCELLENT;
+
+			} else if (gaussian > nLevels[0] && gaussian <= nLevels[1]) {
+
+				noisyType = VERY_GOOD;
+
+			} else if (gaussian > nLevels[1] && gaussian <= nLevels[2]) {
+
+				noisyType = GOOD;
+
+			} else if (gaussian > nLevels[2] && gaussian <= nLevels[3]) {
+
+				noisyType = BAD;
+
+			} else {
+
+				noisyType = DANGEROUS;
+			}
+
+		} else if (getUserType().equals(VERY_GOOD)) {
+
+			if (gaussian <= nLevels[0]) {
+
+				noisyType = VERY_GOOD;
+
+			} else if (gaussian > nLevels[0] && gaussian <= nLevels[1]) {
+
+				noisyType = GOOD;
+
+			} else if (gaussian > nLevels[1] && gaussian <= nLevels[2]) {
+
+				noisyType = EXCELLENT;
+
+			} else if (gaussian > nLevels[2] && gaussian <= nLevels[3]) {
+
+				noisyType = BAD;
+
+			} else {
+
+				noisyType = DANGEROUS;
+			}
+
+		} else if (getUserType().equals(GOOD)) {
+
+			if (gaussian <= nLevels[0]) {
+
+				noisyType = GOOD;
+
+			} else if (gaussian > nLevels[0] && gaussian <= nLevels[1]) {
+
+				noisyType = VERY_GOOD;
+
+			} else if (gaussian > nLevels[1] && gaussian <= nLevels[2]) {
+
+				noisyType = EXCELLENT;
+
+			} else if (gaussian > nLevels[2] && gaussian <= nLevels[3]) {
+
+				noisyType = BAD;
+
+			} else {
+
+				noisyType = DANGEROUS;
+			}
+
+		} else if (getUserType().equals(BAD)) {
+
+			if (gaussian <= nLevels[0]) {
+
+				noisyType = BAD;
+
+			} else if (gaussian > nLevels[0] && gaussian <= nLevels[1]) {
+
+				noisyType = GOOD;
+
+			} else if (gaussian > nLevels[1] && gaussian <= nLevels[2]) {
+
+				noisyType = DANGEROUS;
+
+			} else if (gaussian > nLevels[2] && gaussian <= nLevels[3]) {
+
+				noisyType = VERY_GOOD;
+
+			} else {
+
+				noisyType = EXCELLENT;
+			}
+
+		} else if (getUserType().equals(DANGEROUS)) {
+
+			if (gaussian <= nLevels[0]) {
+
+				noisyType = DANGEROUS;
+
+			} else if (gaussian > nLevels[0] && gaussian <= nLevels[1]) {
+
+				noisyType = BAD;
+
+			} else if (gaussian > nLevels[1] && gaussian <= nLevels[2]) {
+
+				noisyType = GOOD;
+
+			} else if (gaussian > nLevels[2] && gaussian <= nLevels[3]) {
+
+				noisyType = VERY_GOOD;
+
+			} else {
+
+				noisyType = EXCELLENT;
+			}
+		}
+
+		return noisyType;
+	}
+
+	public int getNUsers() {
+		return nUsers;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the propertyOwnership
+	 */
+	public int[] getPropertyOwnership() {
+		return propertyOwnership;
+	}
+
+	// -----------------------------------------------------------------
+	/**
+	 * @return the retirementAccounts
+	 */
+	public int[] getRetirementAccounts() {
+		return retirementAccounts;
+	}
+
+	public abstract String getUserType();
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result
+				+ ((getUserType() == null) ? 0 : getUserType().hashCode());
+		return result;
+	}
+
+	public int pickAge() {
+		return age[rnd.nextInt(age.length)];
+	}
+
+	public int pickBancruptcy() {
+		return bancruptcy[rnd.nextInt(bancruptcy.length)];
+	}
+
+	public int pickCarOwnership() {
+		return carOwnership[rnd.nextInt(carOwnership.length)];
+	}
+
+	public int pickCreditScore() {
+		return creditScore[rnd.nextInt(creditScore.length)];
+	}
+
+	public int pickCriminalRecord() {
+		return criminalRecord[rnd.nextInt(criminalRecord.length)];
+	}
+
+	public int pickDownPayment() {
+		return downPayment[rnd.nextInt(downPayment.length)];
+	}
+
+	public int pickIncome() {
+		return income[rnd.nextInt(income.length)];
+	}
+
+	/**
+	 * This method, and the other "pickX()" methods in this class, select a
+	 * random value from the set of eligible values for a particular
+	 * <code>UserType</code>. Hence, clearly, the returned values will be
+	 * different for the different <code>UserType</code>s.
+	 * 
+	 * @return a random selection from the set of eligible job classes.
+	 */
+	public int pickJobClass() {
+		return jobClass[rnd.nextInt(jobClass.length)];
+	}
+
+	public int pickMotorcycleOwnership() {
+		return motorcycleOwnership[rnd.nextInt(motorcycleOwnership.length)];
+	}
+
+	public int pickPropertyOwnership() {
+		return propertyOwnership[rnd.nextInt(propertyOwnership.length)];
+	}
+
+	public int pickRetirementAccounts() {
+		return retirementAccounts[rnd.nextInt(retirementAccounts.length)];
+	}
+
+	/**
+	 * @param age
+	 *            the age to set
+	 */
+	public void setAge(int[] age) {
+		this.age = age;
+	}
+
+	/**
+	 * @param bancruptcy
+	 *            the bancruptcy to set
+	 */
+	public void setBancruptcy(int[] bancruptcy) {
+		this.bancruptcy = bancruptcy;
+	}
+
+	/**
+	 * @param carOwnership
+	 *            the carOwnership to set
+	 */
+	public void setCarOwnership(int[] carOwnership) {
+		this.carOwnership = carOwnership;
+	}
+
+	/**
+	 * @param creditScore
+	 *            the creditScore to set
+	 */
+	public void setCreditScore(int[] creditScore) {
+		this.creditScore = creditScore;
+	}
+
+	/**
+	 * @param criminalRecord
+	 *            the criminalRecord to set
+	 */
+	public void setCriminalRecord(int[] criminalRecord) {
+		this.criminalRecord = criminalRecord;
+	}
+
+	/**
+	 * @param downPayment
+	 *            the downPayment to set
+	 */
+	public void setDownPayment(int[] downPayment) {
+		this.downPayment = downPayment;
+	}
+
+	// -----------------------------------------------------------------
+
+	/**
+	 * @param income
+	 *            the income to set
+	 */
+	public void setIncome(int[] income) {
+		this.income = income;
+	}
+
+	/**
+	 * @param jobClass
+	 *            the jobClass to set
+	 */
+	public void setJobClass(int[] jobClass) {
+		this.jobClass = jobClass;
+	}
+
+	/**
+	 * @param motorcycleOwnership
+	 *            the motorcycleOwnership to set
+	 */
+	public void setMotorcycleOwnership(int[] bicycleOwnership) {
+		this.motorcycleOwnership = bicycleOwnership;
+	}
+
+	public void setNUsers(int nUsers) {
+		this.nUsers = nUsers;
+	}
+
+	/**
+	 * @param propertyOwnership
+	 *            the propertyOwnership to set
+	 */
+	public void setPropertyOwnership(int[] propertyOwnership) {
+		this.propertyOwnership = propertyOwnership;
+	}
+
+	/**
+	 * @param retirementAccounts
+	 *            the retirementAccounts to set
+	 */
+	public void setRetirementAccounts(int[] retirementAccounts) {
+		this.retirementAccounts = retirementAccounts;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/data/users/VeryGoodUserType.java b/src/org/yooreeka/examples/credit/data/users/VeryGoodUserType.java
new file mode 100644
index 0000000..608f28d
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/data/users/VeryGoodUserType.java
@@ -0,0 +1,53 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.data.users;
+
+public class VeryGoodUserType extends UserType {
+
+	{
+		setAge(new int[] { 1, 2, 3, 4, 5, 6, 7, 8 });
+		setBancruptcy(new int[] { 0 });
+		setCarOwnership(new int[] { 1 });
+		setCreditScore(new int[] { 5, 6, 7 });
+		setCriminalRecord(new int[] { 0 });
+		setDownPayment(new int[] { 3, 4 });
+		setIncome(new int[] { 4, 5, 6, 7 });
+		setJobClass(new int[] { 2, 3, 4, 5 });
+		setMotorcycleOwnership(new int[] { 0, 1 });
+		setPropertyOwnership(new int[] { 1 });
+		setRetirementAccounts(new int[] { 3, 4, 5 });
+	}
+
+	@Override
+	public String getUserType() {
+		return UserType.VERY_GOOD;
+	}
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/examples/credit/util/AttributeInfo.java b/src/org/yooreeka/examples/credit/util/AttributeInfo.java
new file mode 100644
index 0000000..4645d3c
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/AttributeInfo.java
@@ -0,0 +1,68 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+public class AttributeInfo {
+	private String name;
+	private int minValue;
+	private int maxValue;
+
+	public AttributeInfo(String name, int min, int max) {
+		this.name = name;
+		this.minValue = min;
+		this.maxValue = max;
+	}
+
+	public int getMaxValue() {
+		return maxValue;
+	}
+
+	public int getMinValue() {
+		return minValue;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public void setMaxValue(int maxValue) {
+		this.maxValue = maxValue;
+	}
+
+	public void setMinValue(int minValue) {
+		this.minValue = minValue;
+	}
+
+	public void setName(String name) {
+		this.name = name;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/util/AttributeUtils.java b/src/org/yooreeka/examples/credit/util/AttributeUtils.java
new file mode 100644
index 0000000..53fc69e
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/AttributeUtils.java
@@ -0,0 +1,88 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.yooreeka.examples.credit.CreditInstance;
+
+public class AttributeUtils {
+
+	private static Map<String, AttributeInfo> attributeInfoMap = new HashMap<String, AttributeInfo>();
+
+	static {
+		AttributeInfo ai = null;
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_AGE, 1, 10);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_CAR_OWNERSHIP, 0, 1);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_CREDIT_SCORE, 1, 8);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_INCOME_TYPE, 1, 10);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_JOB_CLASS, 1, 5);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_MORTGAGE_DOWN_PAYMENT,
+				1, 4);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(
+				CreditInstance.ATTR_NAME_MOTOR_BICYCLE_OWNERSHIP, 0, 1);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(
+				CreditInstance.ATTR_NAME_OTHER_PROPERTY_OWNERSHIP, 0, 1);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_CRIMINAL_RECORD, 0, 1);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_BANKRUPTCY, 0, 1);
+		attributeInfoMap.put(ai.getName(), ai);
+
+		ai = new AttributeInfo(CreditInstance.ATTR_NAME_RETIREMENT_ACCOUNT, 1,
+				8);
+		attributeInfoMap.put(ai.getName(), ai);
+	}
+
+	public static double getNormalizedValue(String attrName, double value) {
+		AttributeInfo ai = attributeInfoMap.get(attrName);
+		return (value - ai.getMinValue())
+				/ (ai.getMaxValue() - ai.getMinValue());
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/util/BootstrapTrainingSetBuilder.java b/src/org/yooreeka/examples/credit/util/BootstrapTrainingSetBuilder.java
new file mode 100644
index 0000000..80d8750
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/BootstrapTrainingSetBuilder.java
@@ -0,0 +1,121 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+import java.util.Map;
+import java.util.Random;
+
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+/**
+ * Builds bootstrap training sets from the original training set.
+ */
+public class BootstrapTrainingSetBuilder {
+
+	private TrainingSet originalTrainingSet;
+
+	/**
+	 * 
+	 * @param originalTrainingSet
+	 *            bootstrap training sets will be derived from this training
+	 *            set.
+	 * @param bootstrapSampleSize
+	 *            size of bootstrap training sets that should be produced.
+	 */
+	public BootstrapTrainingSetBuilder(TrainingSet originalTrainingSet) {
+
+		this.originalTrainingSet = originalTrainingSet;
+	}
+
+	public TrainingSet buildBootstrapSet() {
+
+		int N = originalTrainingSet.getSize();
+
+		Map<Integer, Instance> instances = originalTrainingSet.getInstances();
+
+		Instance[] selectedInstances = new Instance[N];
+		/*
+		 * Building a new training set of size N by sampling N instances from
+		 * the original data set with replacement. As a result, some instances
+		 * from the original data set will be missing and some will be
+		 * duplicated.
+		 */
+		Random rnd = new Random();
+
+		// pick a center
+		int center = rnd.nextInt(N);
+
+		int countN = 0;
+
+		while (countN < N) {
+
+			if (countN % (N / 5) == 0) {
+				center = rnd.nextInt(N);
+			}
+
+			int selectedInstanceId = pickInstanceId(N, center);
+
+			Instance selectedInstance = instances.get(selectedInstanceId);
+			selectedInstances[countN] = selectedInstance;
+			countN++;
+		}
+
+		TrainingSet tS = new TrainingSet(selectedInstances);
+
+		return tS;
+	}
+
+	private int pickInstanceId(int N, int center) {
+
+		Random rnd = new Random();
+		boolean loop = true;
+		int selectedInstanceId = -1;
+
+		// create the scale factor
+		double scale = (N / 2) / 4.0d;
+
+		while (loop) {
+
+			// center the distribution to be N/2 left and right of the center
+			// with almost certainty
+			selectedInstanceId = new Double(center + rnd.nextGaussian() * scale)
+					.intValue();
+
+			// do not break the loop unless we found a valid instance
+			if (selectedInstanceId >= 0 && selectedInstanceId < N) {
+				loop = false;
+			}
+		}
+		return selectedInstanceId;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/util/ClassifierResults.java b/src/org/yooreeka/examples/credit/util/ClassifierResults.java
new file mode 100644
index 0000000..65c59d1
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/ClassifierResults.java
@@ -0,0 +1,70 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+public class ClassifierResults {
+	private String classifierId;
+	private boolean[] results;
+	private int nCorrect;
+
+	public ClassifierResults(String classifierId, int n) {
+		this.classifierId = classifierId;
+		this.results = new boolean[n];
+		this.nCorrect = 0;
+	}
+
+	public double getAccuracy() {
+		return (double) nCorrect / (double) results.length;
+	}
+
+	public String getClassifierId() {
+		return classifierId;
+	}
+
+	public int getN() {
+		return results.length;
+	}
+
+	public int getNCorrect() {
+		return nCorrect;
+	}
+
+	public boolean getResult(int i) {
+		return results[i];
+	}
+
+	public void setResult(int i, boolean value) {
+		results[i] = value;
+		if (value) {
+			nCorrect++;
+		}
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/util/CreditDataUtils.java b/src/org/yooreeka/examples/credit/util/CreditDataUtils.java
new file mode 100644
index 0000000..3e62db8
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/CreditDataUtils.java
@@ -0,0 +1,100 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.examples.credit.data.users.User;
+
+public class CreditDataUtils {
+
+	public static List<User> loadUsers(String filename) {
+		List<User> users = new ArrayList<User>();
+
+		FileReader fReader = null;
+		try {
+			fReader = new FileReader(filename);
+		} catch (FileNotFoundException fnfX) {
+			fnfX.printStackTrace();
+		}
+
+		try {
+			BufferedReader reader = new BufferedReader(fReader);
+			String line = null;
+			while ((line = reader.readLine()) != null) {
+				if (line.trim().length() > 0) {
+					User user = new User();
+					user.loadFromExternalString(line);
+					users.add(user);
+				}
+			}
+		} catch (IOException ioX) {
+			throw new RuntimeException("Failed to load users from file: '"
+					+ filename + "' ", ioX);
+		}
+
+		try {
+			fReader.close();
+		} catch (IOException ioX) {
+			ioX.printStackTrace();
+		}
+
+		return users;
+	}
+
+	public static void saveUsers(String filename, List<User> users) {
+		try {
+			FileWriter fout = new FileWriter(filename);
+			BufferedWriter writer = new BufferedWriter(fout);
+			for (User user : users) {
+				writer.write(user.toExternalString());
+				writer.write("\n");
+			}
+			writer.flush();
+			writer.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Failed to save users in file: '"
+					+ filename + "' ", e);
+		}
+	}
+
+	private CreditDataUtils() {
+		// empty
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/util/CreditErrorEstimator.java b/src/org/yooreeka/examples/credit/util/CreditErrorEstimator.java
new file mode 100644
index 0000000..2d6b339
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/CreditErrorEstimator.java
@@ -0,0 +1,231 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.examples.credit.BaggingCreditClassifier;
+import org.yooreeka.examples.credit.BoostingCreditClassifier;
+import org.yooreeka.examples.credit.CreditConcept;
+import org.yooreeka.examples.credit.DTCreditClassifier;
+import org.yooreeka.examples.credit.NBCreditClassifier;
+import org.yooreeka.examples.credit.NNCreditClassifier;
+import org.yooreeka.examples.credit.data.UserDataset;
+import org.yooreeka.examples.credit.data.users.User;
+
+public class CreditErrorEstimator {
+
+	private Classifier classifier;
+	private UserInstanceBuilder instanceBuilder;
+	private UserDataset testDS;
+	private ClassifierResults classifierResults;
+
+	int[][] confusionMatrix = new int[5][5];
+
+	private int correctCount = 0;
+	private int misclassifiedInstanceCount = 0;
+	private boolean verbose = true;
+
+	public CreditErrorEstimator(UserDataset testDS,
+			BaggingCreditClassifier classifier) {
+
+		this.testDS = testDS;
+		this.classifier = classifier;
+		this.instanceBuilder = classifier.getInstanceBuilder();
+		this.classifierResults = new ClassifierResults(classifier.getName(),
+				testDS.getSize());
+	}
+
+	public CreditErrorEstimator(UserDataset testDS,
+			BoostingCreditClassifier classifier) {
+
+		this.testDS = testDS;
+		this.classifier = classifier;
+		this.instanceBuilder = classifier.getInstanceBuilder();
+		this.classifierResults = new ClassifierResults(classifier.getName(),
+				testDS.getSize());
+	}
+
+	public CreditErrorEstimator(UserDataset testDS,
+			DTCreditClassifier classifier) {
+
+		this.testDS = testDS;
+		this.classifier = classifier;
+		this.instanceBuilder = classifier.getInstanceBuilder();
+		this.classifierResults = new ClassifierResults(classifier.getName(),
+				testDS.getSize());
+	}
+
+	public CreditErrorEstimator(UserDataset testDS,
+			NBCreditClassifier classifier) {
+
+		this.testDS = testDS;
+		this.classifier = classifier;
+		this.instanceBuilder = classifier.getInstanceBuilder();
+		this.classifierResults = new ClassifierResults(classifier.getName(),
+				testDS.getSize());
+	}
+
+	public CreditErrorEstimator(UserDataset testDS,
+			NNCreditClassifier classifier) {
+
+		this.testDS = testDS;
+		this.classifier = classifier;
+		this.instanceBuilder = classifier.getInstanceBuilder();
+		this.classifierResults = new ClassifierResults(classifier.getName(),
+				testDS.getSize());
+	}
+
+	public double getAccuracy() {
+		return (double) correctCount / (double) testDS.getSize();
+	}
+
+	/**
+	 * @return the confusionMatrix
+	 */
+	public int[][] getConfusionMatrix() {
+		return confusionMatrix;
+	}
+
+	public int getCorrectCount() {
+		return correctCount;
+	}
+
+	public int getMisclassifiedInstanceCount() {
+		return this.misclassifiedInstanceCount;
+	}
+
+	public ClassifierResults getResults() {
+		return classifierResults;
+	}
+
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	public void run() {
+
+		correctCount = 0;
+		misclassifiedInstanceCount = 0;
+
+		int idx = 0;
+
+		for (int i = 0; i < 5; i++) {
+			for (int j = 0; j < 5; j++) {
+				confusionMatrix[i][j] = 0;
+			}
+		}
+
+		long tStart = System.currentTimeMillis();
+
+		for (User user : testDS.getUsers()) {
+
+			Instance instance = instanceBuilder.createInstance(user);
+			Concept concept = classifier.classify(instance);
+			Concept expectedConcept = new CreditConcept(user.getCategory());
+
+			String actualCreditLabel = expectedConcept.getName();
+			String predictedCreditLabel = concept.getName();
+
+			// Build the confusion matrix
+			int i = CreditConcept.getIndex(actualCreditLabel);
+			int j = CreditConcept.getIndex(predictedCreditLabel);
+
+			confusionMatrix[i][j]++;
+
+			if (actualCreditLabel.equals(predictedCreditLabel)) {
+
+				correctCount++;
+
+				classifierResults.setResult(idx, true);
+
+			} else {
+				// Uncomment the following lines to see the details of the
+				// misclassifications
+				// System.out.print("Classified as: " + concept.getName() +
+				// " ");
+				// instance.print();
+
+				misclassifiedInstanceCount++;
+
+				classifierResults.setResult(idx, false);
+			}
+
+			idx++;
+		}
+
+		if (verbose) {
+
+			long tEnd = System.currentTimeMillis();
+
+			// SUMMARY
+			System.out.println(" Classification completed in " + 0.001
+					* (tEnd - tStart) + " seconds.\n");
+			int totalCount = testDS.getSize();
+
+			System.out.println(" Total test dataset txns: " + totalCount);
+
+			System.out.println("    Classified correctly: " + getCorrectCount()
+					+ ", Misclassified: " + getMisclassifiedInstanceCount());
+
+			System.out.println("                Accuracy: " + getAccuracy());
+			System.out
+					.println("___________________________________________________________\n");
+			// DETAILS
+			System.out.println("                CONFUSION MATRIX");
+			System.out
+					.println("___________________________________________________________\n");
+
+			System.out.printf("%4s", "");
+			for (int i = 0; i < 5; i++) {
+				System.out.printf("%7s", CreditConcept.getLabel(i));
+			}
+			System.out.println();
+
+			for (int i = 0; i < 5; i++) {
+				System.out.printf("%4s", CreditConcept.getLabel(i));
+				for (int j = 0; j < 5; j++) {
+					System.out.printf("%7s", confusionMatrix[i][j]);
+				}
+				System.out.println();
+			}
+			System.out
+					.println("___________________________________________________________\n");
+
+		}
+	}
+
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/credit/util/DataGenerator.java b/src/org/yooreeka/examples/credit/util/DataGenerator.java
new file mode 100644
index 0000000..9263ae3
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/DataGenerator.java
@@ -0,0 +1,130 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.yooreeka.examples.credit.data.users.User;
+import org.yooreeka.examples.credit.data.users.UserType;
+
+public class DataGenerator {
+
+	private long nextUserId = 1;
+
+	private boolean isNoiseOn = false;
+	private HashMap<UserType, Integer> userTypeDistributions;
+
+	public DataGenerator() {
+		userTypeDistributions = new HashMap<UserType, Integer>();
+	}
+
+	private long generateNextUniqueUserId() {
+		return nextUserId++;
+	}
+
+	public User generateUser(UserType userType) {
+
+		User user = new User();
+
+		long userId = generateNextUniqueUserId();
+
+		String username;
+
+		if (isNoiseOn) {
+			username = userType.getNoisyType();
+		} else {
+			username = userType.getUserType();
+		}
+
+		username = username + String.valueOf(userId);
+
+		user.setUsername(username);
+
+		user.setAge(userType.pickAge());
+		user.setCarOwnership(userType.pickCarOwnership());
+		user.setCreditScore(userType.pickCreditScore());
+		user.setIncome(userType.pickIncome());
+		user.setJobClass(userType.pickJobClass());
+		user.setDownPayment(userType.pickDownPayment());
+		user.setBicycleOwnership(userType.pickMotorcycleOwnership());
+		user.setPropertyOwnership(userType.pickPropertyOwnership());
+		user.setCriminalRecord(userType.pickCriminalRecord());
+		user.setBankruptcy(userType.pickBancruptcy());
+		user.setRetirementAccount(userType.pickRetirementAccounts());
+
+		return user;
+	}
+
+	public List<User> generateUsers(List<UserType> userTypes) {
+		List<User> allUsers = new ArrayList<User>();
+
+		for (UserType userType : userTypes) {
+			allUsers.addAll(generateUsers(userType, userType.getNUsers()));
+		}
+
+		return allUsers;
+	}
+
+	public List<User> generateUsers(UserType userType, int n) {
+
+		List<User> users = new ArrayList<User>();
+
+		userTypeDistributions.put(userType, n);
+
+		for (int i = 0; i < n; i++) {
+			User u = generateUser(userType);
+			users.add(u);
+		}
+
+		return users;
+	}
+
+	/**
+	 * @return the isNoiseOn
+	 */
+	public boolean isNoiseOn() {
+		return isNoiseOn;
+	}
+
+	public void setNextUserId(long nextUserId) {
+		this.nextUserId = nextUserId;
+	}
+
+	/**
+	 * @param isNoiseOn
+	 *            the isNoiseOn to set
+	 */
+	public void setNoiseOn(boolean isNoiseOn) {
+		this.isNoiseOn = isNoiseOn;
+	}
+}
diff --git a/src/org/yooreeka/examples/credit/util/UserInstanceBuilder.java b/src/org/yooreeka/examples/credit/util/UserInstanceBuilder.java
new file mode 100644
index 0000000..9b8cb73
--- /dev/null
+++ b/src/org/yooreeka/examples/credit/util/UserInstanceBuilder.java
@@ -0,0 +1,167 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.credit.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.core.DoubleAttribute;
+import org.yooreeka.algos.taxis.core.StringAttribute;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.examples.credit.CreditConcept;
+import org.yooreeka.examples.credit.CreditInstance;
+import org.yooreeka.examples.credit.data.UserDataset;
+import org.yooreeka.examples.credit.data.users.User;
+
+public class UserInstanceBuilder {
+
+	private boolean useDoubleAttributes;
+
+	public UserInstanceBuilder() {
+		this(false);
+	}
+
+	/**
+	 * 
+	 * @param useDoubleAttributes
+	 *            determines whether instance builder should produce instances
+	 *            with string attributes or double attributes.
+	 */
+	public UserInstanceBuilder(boolean useDoubleAttributes) {
+
+		this.useDoubleAttributes = useDoubleAttributes;
+	}
+
+	private CreditInstance convertToDoubleAttributes(Instance instance) {
+
+		CreditInstance creditInstance = (CreditInstance) instance;
+
+		List<Attribute> attributes = new ArrayList<Attribute>();
+
+		for (Attribute a : creditInstance.getAtrributes()) {
+			DoubleAttribute da = null;
+			if (a instanceof StringAttribute) {
+				String name = a.getName();
+				double value = Double.valueOf((String) a.getValue());
+				// double normalizedValue = value;
+				double normalizedValue = AttributeUtils.getNormalizedValue(
+						name, value);
+				da = new DoubleAttribute(name, normalizedValue);
+			} else if (a instanceof DoubleAttribute) {
+				da = (DoubleAttribute) a;
+			} else {
+				throw new RuntimeException("Unexpected attribute type: "
+						+ a.getClass().getSimpleName() + ", attribute name: "
+						+ a.getName() + ", attribute value: " + a.getValue());
+			}
+
+			attributes.add(da);
+		}
+
+		return new CreditInstance(creditInstance.getConcept(), attributes);
+	}
+
+	public Instance createInstance(Instance i) {
+		if (useDoubleAttributes) {
+			return convertToDoubleAttributes(i);
+		} else {
+			return i;
+		}
+	}
+
+	public Instance createInstance(User u) {
+		List<Attribute> attributes = new ArrayList<Attribute>();
+
+		attributes.add(new StringAttribute(CreditInstance.ATTR_NAME_JOB_CLASS,
+				String.valueOf(u.getJobClass())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_INCOME_TYPE, String.valueOf(u
+						.getIncome())));
+
+		attributes.add(new StringAttribute(CreditInstance.ATTR_NAME_AGE, String
+				.valueOf(u.getAge())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_CAR_OWNERSHIP, String.valueOf(u
+						.getCarOwnership())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_CREDIT_SCORE, String.valueOf(u
+						.getCreditScore())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_MORTGAGE_DOWN_PAYMENT, String
+						.valueOf(u.getDownPayment())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_MOTOR_BICYCLE_OWNERSHIP, String
+						.valueOf(u.getBicycleOwnership())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_OTHER_PROPERTY_OWNERSHIP, String
+						.valueOf(u.getPropertyOwnership())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_CRIMINAL_RECORD, String.valueOf(u
+						.getCriminalRecord())));
+
+		attributes.add(new StringAttribute(CreditInstance.ATTR_NAME_BANKRUPTCY,
+				String.valueOf(u.getBankruptcy())));
+
+		attributes.add(new StringAttribute(
+				CreditInstance.ATTR_NAME_RETIREMENT_ACCOUNT, String.valueOf(u
+						.getRetirementAccount())));
+
+		CreditConcept c = new CreditConcept(u.getCategory());
+
+		CreditInstance instance = new CreditInstance(c, attributes);
+
+		return createInstance(instance);
+	}
+
+	public TrainingSet createTrainingSet(UserDataset ds) {
+		List<User> users = ds.getUsers();
+		int nUsers = users.size();
+		Instance[] instances = new Instance[nUsers];
+		for (int i = 0; i < nUsers; i++) {
+			User u = users.get(i);
+			instances[i] = createInstance(u);
+		}
+
+		TrainingSet tS = new TrainingSet(instances);
+
+		return tS;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/DTFraudClassifier.java b/src/org/yooreeka/examples/fraud/DTFraudClassifier.java
new file mode 100644
index 0000000..c402c7a
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/DTFraudClassifier.java
@@ -0,0 +1,136 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.tree.DecisionTreeClassifier;
+import org.yooreeka.examples.fraud.data.Transaction;
+import org.yooreeka.examples.fraud.data.TransactionDataset;
+import org.yooreeka.examples.fraud.data.TransactionInstanceBuilder;
+
+public class DTFraudClassifier extends DecisionTreeClassifier {
+
+	private static final long serialVersionUID = 5491106283513021975L;
+
+	public static DTFraudClassifier loadClassifier(String filename) {
+
+		Object o = null;
+		File f = new File(filename);
+		if (f.exists()) {
+			try {
+				FileInputStream fInStream = new FileInputStream(f);
+				BufferedInputStream bufInStream = new BufferedInputStream(
+						fInStream);
+				ObjectInputStream objInStream = new ObjectInputStream(
+						bufInStream);
+				o = objInStream.readObject();
+				objInStream.close();
+			} catch (Exception e) {
+				throw new RuntimeException(
+						"Error while loading data from file: '" + filename
+								+ "'", e);
+			}
+		} else {
+			throw new IllegalArgumentException("File doesn't exist: '"
+					+ filename + "'.");
+		}
+
+		System.out.println("loaded classifier from file: " + filename);
+
+		return (DTFraudClassifier) o;
+
+	}
+
+	public static void saveClassifier(String filename, DTFraudClassifier o) {
+
+		try {
+			File f = new File(filename);
+			FileOutputStream foutStream = new FileOutputStream(f);
+			BufferedOutputStream boutStream = new BufferedOutputStream(
+					foutStream);
+			ObjectOutputStream objOutputStream = new ObjectOutputStream(
+					boutStream);
+			objOutputStream.writeObject(o);
+			objOutputStream.flush();
+			boutStream.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Error while saving data into file: '"
+					+ filename + "'", e);
+		}
+
+		System.out.println("saved classifier in file: " + filename);
+	}
+
+	private TransactionInstanceBuilder instanceBuilder;
+
+	public DTFraudClassifier(String name, TransactionDataset ds) {
+
+		super(name, ds.createTrainingDataset());
+		this.instanceBuilder = ds.getInstanceBuilder();
+	}
+
+	public DTFraudClassifier(TransactionDataset ds) {
+		this(DTFraudClassifier.class.getSimpleName(), ds);
+	}
+
+	public Concept classify(Transaction t) {
+		return classify(instanceBuilder.createInstance(t));
+	}
+
+	@Override
+	protected Concept createConcept(String category) {
+		return new TransactionConcept(category);
+	}
+
+	public TransactionInstanceBuilder getInstanceBuilder() {
+		return instanceBuilder;
+	}
+
+	public void setInstanceBuilder(TransactionInstanceBuilder instanceBuilder) {
+		this.instanceBuilder = instanceBuilder;
+	}
+
+	public void useDefaultAttributes() {
+		trainOnAttribute(TransactionInstance.ATTR_NAME_N_DESCRIPTION, false);
+		trainOnAttribute(TransactionInstance.ATTR_NAME_N_LOCATION, false);
+		trainOnAttribute(TransactionInstance.ATTR_NAME_N_TXN_AMT, false);
+
+	}
+}
diff --git a/src/org/yooreeka/examples/fraud/NNFraudClassifier.java b/src/org/yooreeka/examples/fraud/NNFraudClassifier.java
new file mode 100644
index 0000000..ee4a7a0
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/NNFraudClassifier.java
@@ -0,0 +1,356 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.algos.taxis.core.DoubleAttribute;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.examples.fraud.data.Transaction;
+import org.yooreeka.examples.fraud.data.TransactionDataset;
+import org.yooreeka.examples.fraud.data.TransactionInstanceBuilder;
+
+public class NNFraudClassifier implements Classifier, java.io.Serializable {
+
+	private static final long serialVersionUID = -1567098614540042277L;
+
+	private static final String SERIALIZATION_PATH = YooreekaConfigurator
+			.getHome() + "\\data\\ch05\\";
+
+	public static NNFraudClassifier load(String filename) {
+
+		Object o = null;
+		File f = new File(SERIALIZATION_PATH + filename);
+		if (f.exists()) {
+			try {
+				FileInputStream fInStream = new FileInputStream(f);
+				BufferedInputStream bufInStream = new BufferedInputStream(
+						fInStream);
+				ObjectInputStream objInStream = new ObjectInputStream(
+						bufInStream);
+				o = objInStream.readObject();
+				objInStream.close();
+			} catch (Exception e) {
+				throw new RuntimeException(
+						"Error while loading data from file: '" + filename
+								+ "'", e);
+			}
+		} else {
+			throw new IllegalArgumentException("File doesn't exist: '"
+					+ filename + "'.");
+		}
+
+		System.out.println("loaded classifier from file: " + filename);
+
+		return (NNFraudClassifier) o;
+
+	}
+
+	private boolean verbose = false;
+
+	private String name;
+
+	/*
+	 * Neural Network that will be used by this classifier.
+	 */
+	private TransactionNN nn;
+
+	private int DEFAULT_TRAINING_ITERATIONS = 10;
+
+	/*
+	 * Number of times to feed training instances into the network during
+	 * training.
+	 */
+	private int nTrainingIterations = DEFAULT_TRAINING_ITERATIONS;
+
+	private TransactionDataset ds;
+
+	private transient TrainingSet ts;
+
+	private TransactionInstanceBuilder instanceBuilder;
+
+	/*
+	 * Attribute names that should be used as Neural Network inputs.
+	 */
+	private List<String> availableAttributeNames;
+
+	public NNFraudClassifier(String name, TransactionDataset ds) {
+
+		this.name = name;
+
+		this.ds = ds;
+
+		this.ts = ds.createTrainingDataset();
+
+		this.instanceBuilder = ds.getInstanceBuilder();
+
+		this.availableAttributeNames = new ArrayList<String>();
+
+		nn = createNeuralNetwork();
+
+	}
+
+	public NNFraudClassifier(TransactionDataset ds) {
+		this(NNFraudClassifier.class.getSimpleName(), ds);
+	}
+
+	public Concept classify(Instance instance) {
+
+		double[] x = createNNInputs(instance);
+
+		double[] y = nn.classify(x);
+
+		Concept c = createConceptFromNNOutput(y);
+
+		if (verbose) {
+			System.out.println("\nAssessment:\n  >> This is a " + c.getName());
+		}
+		return c;
+	}
+
+	public Concept classify(String transactionId) {
+		setVerbose(true);
+		Transaction t = ds.findTransactionById(transactionId);
+		return classify(t);
+	}
+
+	public Concept classify(Transaction t) {
+		if (verbose) {
+			System.out.println("Transaction:\n  >> " + t.toString());
+		}
+		return classify(instanceBuilder.createInstance(t));
+	}
+
+	private Concept createConceptFromNNOutput(double[] y) {
+
+		double threshold = 0.5;
+
+		Concept c = null;
+
+		if (y[0] >= threshold) {
+			c = new TransactionConcept(TransactionConcept.CONCEPT_LABEL_FRAUD);
+		} else {
+			c = new TransactionConcept(TransactionConcept.CONCEPT_LABEL_VALID);
+		}
+
+		return c;
+	}
+
+	private TransactionNN createNeuralNetwork() {
+
+		String nnName = TransactionNN.class.getSimpleName();
+
+		return new TransactionNN(nnName);
+	}
+
+	public double[] createNNInputs(Instance instance) {
+
+		int nInputNodes = nn.getInputNodeCount();
+
+		double[] x = new double[nInputNodes];
+
+		for (int i = 0; i < nInputNodes; i++) {
+
+			String attrName = this.availableAttributeNames.get(i);
+			Attribute a = instance.getAttributeByName(attrName);
+
+			if (a instanceof DoubleAttribute) {
+				x[i] = (Double) a.getValue();
+			} else {
+				if (a == null) {
+					throw new RuntimeException(
+							"Failed to find attribute with name: '" + attrName
+									+ "'. Instance: " + instance.toString());
+				} else {
+					throw new RuntimeException(
+							"Invalid attribute type. Only "
+									+ DoubleAttribute.class.getSimpleName()
+									+ " attribute"
+									+ " types can be used in NN. Actual attribute type: "
+									+ a.getClass().getSimpleName());
+				}
+			}
+
+		}
+
+		return x;
+	}
+
+	public double[] createNNOutputs(Instance i) {
+
+		int nOutputNodes = nn.getOutputNodeCount();
+
+		double[] y = new double[nOutputNodes];
+
+		if (TransactionConcept.CONCEPT_LABEL_FRAUD.equals(i.getConcept()
+				.getName())) {
+			y[0] = 1;
+		} else {
+			y[0] = 0;
+		}
+		return y;
+	}
+
+	public TransactionInstanceBuilder getInstanceBuilder() {
+		return this.instanceBuilder;
+	}
+
+	/**
+	 * @return the name
+	 */
+	public String getName() {
+		return name;
+	}
+
+	/**
+	 * @return the verbose
+	 */
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	public void save() {
+
+		String filename = SERIALIZATION_PATH + this.getName();
+		try {
+			File f = new File(filename);
+			FileOutputStream foutStream = new FileOutputStream(f);
+			BufferedOutputStream boutStream = new BufferedOutputStream(
+					foutStream);
+			ObjectOutputStream objOutputStream = new ObjectOutputStream(
+					boutStream);
+			objOutputStream.writeObject(this);
+			objOutputStream.flush();
+			boutStream.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Error while saving data into file: '"
+					+ filename + "'", e);
+		}
+
+		System.out.println("saved classifier in file: " + filename);
+	}
+
+	/**
+	 * @param name
+	 *            the name to set
+	 */
+	public void setName(String name) {
+		this.name = name;
+	}
+
+	public void setNTrainingIterations(int trainingIterations) {
+		nTrainingIterations = trainingIterations;
+	}
+
+	/**
+	 * @param verbose
+	 *            the verbose to set
+	 */
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+	public boolean train() {
+
+		if (ts == null) {
+			throw new RuntimeException(
+					"Can't train classifier - training dataset is null.");
+		}
+
+		if (nn == null) {
+			throw new RuntimeException(
+					"No Neural Network found. Can't proceed.");
+		}
+
+		if (nn.getInputNodeCount() != availableAttributeNames.size()) {
+			throw new RuntimeException(
+					"Number of attributes doesn't match with the number of input nodes."
+							+ "Attributes: " + availableAttributeNames.size()
+							+ ", Input nodes: " + nn.getInputNodeCount());
+		}
+
+		if (nn.getOutputNodeCount() != 1) {
+			throw new RuntimeException("NN has " + nn.getOutputNodeCount()
+					+ " output nodes. "
+					+ "Classifier expects network with only one output node.");
+		}
+
+		// Build and train NN
+		trainNeuralNetwork(nTrainingIterations);
+
+		return true;
+	}
+
+	private void trainNeuralNetwork(int nIterations) {
+
+		for (int i = 1; i <= nIterations; i++) {
+			for (Instance instance : ts.getInstances().values()) {
+				double[] nnInput = createNNInputs(instance);
+				double[] nnExpectedOutput = createNNOutputs(instance);
+
+				nn.train(nnInput, nnExpectedOutput);
+			}
+
+			if (verbose) {
+				System.out.println("finished training pass: " + i + " out of "
+						+ nIterations);
+			}
+		}
+
+	}
+
+	public void trainOnAttribute(String name) {
+		availableAttributeNames.add(name);
+	}
+
+	/**
+	 * This methods facilitates the loading of training attributes
+	 */
+	public void useDefaultAttributes() {
+		trainOnAttribute(TransactionInstance.ATTR_NAME_N_TXN_AMT);
+		trainOnAttribute(TransactionInstance.ATTR_NAME_N_LOCATION);
+		trainOnAttribute(TransactionInstance.ATTR_NAME_N_DESCRIPTION);
+	}
+}
diff --git a/src/org/yooreeka/examples/fraud/TransactionConcept.java b/src/org/yooreeka/examples/fraud/TransactionConcept.java
new file mode 100644
index 0000000..6c8d68b
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/TransactionConcept.java
@@ -0,0 +1,92 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud;
+
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class TransactionConcept implements Concept {
+
+	public static final String CONCEPT_LABEL_FRAUD = "FRAUD_TXN";
+	public static final String CONCEPT_LABEL_VALID = "VALID_TXN";
+
+	private String name;
+
+	public TransactionConcept(boolean isFraud) {
+		if (isFraud) {
+			name = CONCEPT_LABEL_FRAUD;
+		} else {
+			name = CONCEPT_LABEL_VALID;
+		}
+	}
+
+	public TransactionConcept(String name) {
+		this.name = name;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final TransactionConcept other = (TransactionConcept) obj;
+		if (name == null) {
+			if (other.name != null)
+				return false;
+		} else if (!name.equals(other.name))
+			return false;
+		return true;
+	}
+
+	public Instance[] getInstances() {
+		throw new UnsupportedOperationException("not implemented.");
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public Concept getParent() {
+		return null;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + ((name == null) ? 0 : name.hashCode());
+		return result;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/TransactionInstance.java b/src/org/yooreeka/examples/fraud/TransactionInstance.java
new file mode 100644
index 0000000..c8b306a
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/TransactionInstance.java
@@ -0,0 +1,99 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud;
+
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+
+public class TransactionInstance implements Instance {
+
+	public static final String ATTR_NAME_N_TXN_AMT = "n_txnamt";
+	public static final String ATTR_NAME_N_LOCATION = "n_location";
+	public static final String ATTR_NAME_N_DESCRIPTION = "n_description";
+	public static final String ATTR_NAME_USERID = "userid";
+	public static final String ATTR_NAME_TXNID = "txnid";
+	public static final String ATTR_NAME_TXN_AMT = "txnamt";
+	public static final String ATTR_NAME_LOCATION_X = "location_x";
+	public static final String ATTR_NAME_LOCATION_Y = "location_y";
+	public static final String ATTR_NAME_DESCRIPTION = "description";
+
+	protected TransactionConcept concept;
+	protected Attribute[] attributes;
+
+	public TransactionInstance(TransactionConcept c, Attribute[] attrs) {
+		this.concept = c;
+		this.attributes = attrs;
+	}
+
+	public Attribute[] getAtrributes() {
+		return attributes;
+	}
+
+	public Attribute getAttributeByName(String attrName) {
+		Attribute matchedAttribute = null;
+
+		if (attributes != null) {
+			for (Attribute a : attributes) {
+				if (attrName.equalsIgnoreCase(a.getName())) {
+					matchedAttribute = a;
+					break;
+				}
+			}
+		}
+
+		return matchedAttribute;
+	}
+
+	public TransactionConcept getConcept() {
+		return concept;
+	}
+
+	public void print() {
+		if (attributes != null) {
+			for (Attribute a : attributes) {
+
+				if (a == null || a.getName() == null) {
+					System.out.print(" -  <NULL ATTRIBUTE> ");
+				} else {
+					if (a.getValue() == null) {
+						System.out.print(" -  <NULL ATTRIBUTE VALUE> ");
+					} else {
+						System.out.print(" -  " + a.getName() + " = "
+								+ a.getValue());
+					}
+				}
+			}
+		}
+
+		System.out.println(" -->  " + getConcept().getName());
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/TransactionNN.java b/src/org/yooreeka/examples/fraud/TransactionNN.java
new file mode 100644
index 0000000..b35640c
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/TransactionNN.java
@@ -0,0 +1,106 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud;
+
+import org.yooreeka.algos.taxis.networks.neural.core.BaseNN;
+import org.yooreeka.algos.taxis.networks.neural.core.intf.Layer;
+
+public class TransactionNN extends BaseNN {
+
+	private static final long serialVersionUID = -3840865001527729603L;
+
+	public TransactionNN(String name) {
+		super(name);
+
+		createNN351();
+	}
+
+	/*
+	 * Creates: 3 -> 5 -> 1 network.
+	 */
+	private void createNN351() {
+
+		// 1. Define Layers, Nodes and Node Biases
+		Layer inputLayer = createInputLayer(0, // layer id
+				3 // number of nodes
+		);
+
+		Layer hiddenLayer = createHiddenLayer(1, // layer id
+				5, // number of nodes
+				new double[] { 1, 1.5, 1, 0.5, 1 } // node biases
+		);
+
+		Layer outputLayer = createOutputLayer(2, // layer id
+				1, // number of nodes
+				new double[] { 1.5 } // node biases
+		);
+
+		setInputLayer(inputLayer);
+		setOutputLayer(outputLayer);
+		addHiddenLayer(hiddenLayer);
+
+		// 2. Define links and weights between nodes
+		// Id format: <layerId:nodeIdwithinLayer>
+
+		// Weights for links from Input Layer to Hidden Layer
+		setLink("0:0", "1:0", 0.25);
+		setLink("0:0", "1:1", -0.5);
+		setLink("0:0", "1:2", 0.25);
+		setLink("0:0", "1:3", 0.25);
+		setLink("0:0", "1:4", -0.5);
+
+		setLink("0:1", "1:0", 0.25);
+		setLink("0:1", "1:1", -0.5);
+		setLink("0:1", "1:2", 0.25);
+		setLink("0:1", "1:3", 0.25);
+		setLink("0:1", "1:4", -0.5);
+
+		setLink("0:2", "1:0", 0.25);
+		setLink("0:2", "1:1", -0.5);
+		setLink("0:2", "1:2", 0.25);
+		setLink("0:2", "1:3", 0.25);
+		setLink("0:2", "1:4", -0.5);
+
+		// Weights for links from Hidden Layer to Output Layer
+
+		setLink("1:0", "2:0", -0.5);
+		setLink("1:1", "2:0", 0.5);
+		setLink("1:2", "2:0", -0.5);
+		setLink("1:3", "2:0", -0.5);
+		setLink("1:4", "2:0", 0.5);
+
+		if (isVerbose()) {
+			System.out.println("NN created");
+		}
+
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/data/Transaction.java b/src/org/yooreeka/examples/fraud/data/Transaction.java
new file mode 100644
index 0000000..f219ae5
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/data/Transaction.java
@@ -0,0 +1,124 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.data;
+
+public class Transaction implements java.io.Serializable {
+
+	private static final long serialVersionUID = -4537757080789309552L;
+
+	private String description;
+
+	private TransactionLocation location;
+
+	private double amount;
+
+	private boolean fraud;
+
+	private int userId;
+
+	private long txnId;
+
+	public Transaction() {
+	}
+
+	public double getAmount() {
+		return amount;
+	}
+
+	public String getDescription() {
+		return description;
+	}
+
+	public TransactionLocation getLocation() {
+		return location;
+	}
+
+	public long getTxnId() {
+		return txnId;
+	}
+
+	public int getUserId() {
+		return userId;
+	}
+
+	public boolean isFraud() {
+		return fraud;
+	}
+
+	public void loadFromExternalString(String text) {
+
+		String[] values = text.split(":");
+
+		userId = Integer.parseInt(values[0]);
+		txnId = Long.parseLong(values[1]);
+		description = values[2];
+		amount = Double.parseDouble(values[3]);
+		double x = Double.parseDouble(values[4]);
+		double y = Double.parseDouble(values[5]);
+		location = new TransactionLocation(x, y);
+		fraud = Boolean.parseBoolean(values[6]);
+	}
+
+	public void setAmount(double amount) {
+		this.amount = amount;
+	}
+
+	public void setDescription(String description) {
+		this.description = description;
+	}
+
+	public void setFraud(boolean fraud) {
+		this.fraud = fraud;
+	}
+
+	public void setLocation(TransactionLocation location) {
+		this.location = location;
+	}
+
+	public void setTxnId(long txnId) {
+		this.txnId = txnId;
+	}
+
+	public void setUserId(int userId) {
+		this.userId = userId;
+	}
+
+	public String toExternalString() {
+		return userId + ":" + txnId + ":" + description + ":" + amount + ":"
+				+ location.getX() + ":" + location.getY() + ":" + fraud;
+	}
+
+	@Override
+	public String toString() {
+		return toExternalString();
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/data/TransactionDataset.java b/src/org/yooreeka/examples/fraud/data/TransactionDataset.java
new file mode 100644
index 0000000..b817d96
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/data/TransactionDataset.java
@@ -0,0 +1,134 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.examples.fraud.util.UserStatisticsCalculator;
+
+public class TransactionDataset implements java.io.Serializable {
+
+	private static final long serialVersionUID = 3061645520644719411L;
+
+	private Map<Integer, List<Transaction>> txnsByUserIdMap;
+	private Map<String, Transaction> txnsByTxnIdMap;
+	private Integer maxUserId;
+	private TransactionInstanceBuilder instanceBuilder;
+
+	public TransactionDataset(List<Transaction> txnsList) {
+		this.txnsByUserIdMap = new HashMap<Integer, List<Transaction>>();
+		this.txnsByTxnIdMap = new HashMap<String, Transaction>(txnsList.size());
+
+		for (Transaction e : txnsList) {
+
+			txnsByTxnIdMap.put(String.valueOf(e.getTxnId()), e);
+
+			Integer userId = e.getUserId();
+			List<Transaction> userTxns = txnsByUserIdMap.get(userId);
+			if (userTxns == null) {
+				userTxns = new ArrayList<Transaction>();
+				txnsByUserIdMap.put(userId, userTxns);
+			}
+
+			if (maxUserId == null || e.getUserId() > maxUserId) {
+				maxUserId = e.getUserId();
+			}
+
+			userTxns.add(e);
+		}
+
+		instanceBuilder = new TransactionInstanceBuilder();
+
+	}
+
+	public void calculateUserStats() {
+		UserStatisticsCalculator userStatsCalculator = new UserStatisticsCalculator();
+
+		instanceBuilder.setUserStatisticsMap(userStatsCalculator
+				.calculateStatistics(this));
+	}
+
+	public TrainingSet createTrainingDataset() {
+		return instanceBuilder.createTrainingSet(this);
+	}
+
+	public Transaction findTransactionById(String id) {
+		return txnsByTxnIdMap.get(id);
+	}
+
+	public List<Transaction> findUserTxns(Integer userId) {
+		return new ArrayList<Transaction>(txnsByUserIdMap.get(userId));
+	}
+
+	/**
+	 * @return the instanceBuilder
+	 */
+	public TransactionInstanceBuilder getInstanceBuilder() {
+		return instanceBuilder;
+	}
+
+	public Integer getMaxUserId() {
+		return maxUserId;
+	}
+
+	public int getSize() {
+		return txnsByTxnIdMap.size();
+	}
+
+	public List<Transaction> getTransactions() {
+		return new ArrayList<Transaction>(txnsByTxnIdMap.values());
+	}
+
+	public List<Integer> getUsers() {
+		return new ArrayList<Integer>(txnsByUserIdMap.keySet());
+	}
+
+	public void printAll() {
+		for (Map.Entry<String, Transaction> e : txnsByTxnIdMap.entrySet()) {
+			Transaction t = e.getValue();
+			System.out.println(t);
+		}
+	}
+
+	public void printTransaction(String id) {
+		Transaction e = findTransactionById(id);
+		if (e != null) {
+			System.out.println(e.toString());
+		} else {
+			System.out.println("Transaction not found (txn id: '" + id + "')");
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/data/TransactionInstanceBuilder.java b/src/org/yooreeka/examples/fraud/data/TransactionInstanceBuilder.java
new file mode 100644
index 0000000..64aea1d
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/data/TransactionInstanceBuilder.java
@@ -0,0 +1,224 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.yooreeka.algos.taxis.core.DoubleAttribute;
+import org.yooreeka.algos.taxis.core.StringAttribute;
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.examples.fraud.TransactionConcept;
+import org.yooreeka.examples.fraud.TransactionInstance;
+import org.yooreeka.examples.fraud.util.UserStatistics;
+import org.yooreeka.util.metrics.JaccardCoefficient;
+import org.yooreeka.util.metrics.SimilarityMeasure;
+
+public class TransactionInstanceBuilder implements java.io.Serializable {
+
+	private static final long serialVersionUID = -2334221990318430678L;
+
+	/*
+	 * For every user we keep a set of user-specific values to normalize data.
+	 */
+	private Map<Integer, UserStatistics> userStatisticsMap;
+
+	/*
+	 * Similarity measure that will be used to evaluate similarity between
+	 * transaction descriptions.
+	 */
+	private SimilarityMeasure descriptionSim;
+
+	public TransactionInstanceBuilder() {
+		userStatisticsMap = new HashMap<Integer, UserStatistics>();
+		descriptionSim = new JaccardCoefficient();
+	}
+
+	private Double calculateDescriptionSimilarity(String txnDescription,
+			UserStatistics u) {
+
+		String[] termsX = tokenizeTxnDescription(txnDescription);
+		Set<String> validTxnDescriptions = u.getDescriptions();
+
+		double bestSim = 0.0;
+		for (String valueY : validTxnDescriptions) {
+			String[] termsY = u.getDescriptionTokens(valueY);
+			if (termsY == null) {
+				termsY = tokenizeTxnDescription(valueY);
+				u.setDescriptionTokens(valueY, termsY);
+			}
+			double sim = descriptionSim.similarity(termsX, termsY);
+			if (sim > bestSim) {
+				bestSim = sim;
+			}
+		}
+
+		return bestSim;
+	}
+
+	public TransactionInstance createInstance(Transaction t) {
+
+		int userId = t.getUserId();
+		UserStatistics userStats = getUserStatistics(userId);
+
+		if (userStats == null) {
+			throw new RuntimeException(
+					"Can't create instance. There are no statistics for user: "
+							+ userId);
+		}
+
+		/*
+		 * Calculate distance between user location centroid and instance
+		 * location
+		 */
+		TransactionLocation nLocation = normalizeLocation(t.getLocation(),
+				userStats);
+		TransactionLocation nCentroid = normalizeLocation(
+				userStats.getLocationCentroid(), userStats);
+		double nLocationDistance = nCentroid.distance(nLocation);
+
+		double nAmt = normalizeAmount(t.getAmount(), userStats);
+
+		double nDescriptionSim = calculateDescriptionSimilarity(
+				t.getDescription(), userStats);
+
+		double nUserId = t.getUserId();
+
+		List<Attribute> attributes = new ArrayList<Attribute>();
+
+		// Attributes that will be used by NN
+		attributes.add(new DoubleAttribute(
+				TransactionInstance.ATTR_NAME_N_TXN_AMT, nAmt));
+		attributes.add(new DoubleAttribute(
+				TransactionInstance.ATTR_NAME_N_LOCATION, nLocationDistance));
+		attributes.add(new DoubleAttribute(
+				TransactionInstance.ATTR_NAME_N_DESCRIPTION, nDescriptionSim));
+
+		// Adding informational attributes
+		attributes.add(new StringAttribute(
+				TransactionInstance.ATTR_NAME_USERID, String.valueOf(nUserId)));
+		attributes.add(new StringAttribute(TransactionInstance.ATTR_NAME_TXNID,
+				String.valueOf(t.getTxnId())));
+
+		attributes.add(new DoubleAttribute(
+				TransactionInstance.ATTR_NAME_TXN_AMT, t.getAmount()));
+		attributes.add(new DoubleAttribute(
+				TransactionInstance.ATTR_NAME_LOCATION_X, t.getLocation()
+						.getX()));
+		attributes.add(new DoubleAttribute(
+				TransactionInstance.ATTR_NAME_LOCATION_Y, t.getLocation()
+						.getY()));
+		attributes.add(new StringAttribute(
+				TransactionInstance.ATTR_NAME_DESCRIPTION, t.getDescription()));
+
+		TransactionConcept c = null;
+		if (t.isFraud()) {
+			c = new TransactionConcept(TransactionConcept.CONCEPT_LABEL_FRAUD);
+		} else {
+			c = new TransactionConcept(TransactionConcept.CONCEPT_LABEL_VALID);
+		}
+
+		return new TransactionInstance(c, attributes.toArray(new Attribute[0]));
+	}
+
+	public TrainingSet createTrainingSet(TransactionDataset data) {
+		List<Transaction> txns = data.getTransactions();
+		int nTxns = txns.size();
+		Instance[] instances = new Instance[nTxns];
+		for (int i = 0; i < nTxns; i++) {
+			Transaction t = txns.get(i);
+			instances[i] = createInstance(t);
+		}
+		return new TrainingSet(instances);
+	}
+
+	public Map<Integer, UserStatistics> getUserStatistics() {
+		return userStatisticsMap;
+	}
+
+	public UserStatistics getUserStatistics(int userId) {
+		return userStatisticsMap.get(userId);
+	}
+
+	/**
+	 * @return the userStatisticsMap
+	 */
+	public Map<Integer, UserStatistics> getUserStatisticsMap() {
+		return userStatisticsMap;
+	}
+
+	private Double normalizeAmount(Double amt, UserStatistics u) {
+		Double min = u.getTxnAmtMin();
+		Double max = u.getTxnAmtMax();
+		Double v = (amt - min) / (max - min);
+		return v; // Valid values should fall into [0..1] and fraud outside.
+	}
+
+	private TransactionLocation normalizeLocation(TransactionLocation location,
+			UserStatistics u) {
+
+		double nX = (location.getX() - u.getLocationMinX())
+				/ (u.getLocationMaxX() - u.getLocationMinX());
+
+		double nY = (location.getY() - u.getLocationMinY())
+				/ (u.getLocationMaxY() - u.getLocationMinY());
+
+		return new TransactionLocation(nX, nY);
+	}
+
+	public void printUserStats(int userId) {
+		UserStatistics userProps = userStatisticsMap.get(userId);
+		System.out.println("Properties for userId: " + userId
+				+ " calculated from training data:");
+		System.out.println(userProps.toString());
+	}
+
+	/**
+	 * @param userStatisticsMap
+	 *            the userStatisticsMap to set
+	 */
+	public void setUserStatisticsMap(
+			Map<Integer, UserStatistics> userStatisticsMap) {
+		this.userStatisticsMap = userStatisticsMap;
+	}
+
+	private String[] tokenizeTxnDescription(String description) {
+		String[] terms = description.split("\\s");
+
+		return terms;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/data/TransactionLoader.java b/src/org/yooreeka/examples/fraud/data/TransactionLoader.java
new file mode 100644
index 0000000..b107668
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/data/TransactionLoader.java
@@ -0,0 +1,59 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.data;
+
+import java.util.List;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.examples.fraud.util.FraudDataUtils;
+
+public class TransactionLoader {
+
+	public static final String TRAINING_TXNS_FILE = YooreekaConfigurator
+			.getHome() + "/data/ch05/fraud/training-txns.txt";
+
+	public static final String TEST_TXNS_FILE = YooreekaConfigurator.getHome()
+			+ "/data/ch05/fraud/test-txns.txt";
+
+	public static TransactionDataset loadTestDataset() {
+		List<Transaction> allTxns = loadTxns(TEST_TXNS_FILE);
+		return new TransactionDataset(allTxns);
+	}
+
+	public static TransactionDataset loadTrainingDataset() {
+		List<Transaction> allTxns = loadTxns(TRAINING_TXNS_FILE);
+		return new TransactionDataset(allTxns);
+	}
+
+	public static List<Transaction> loadTxns(String filename) {
+		return FraudDataUtils.loadTransactions(filename);
+	}
+}
diff --git a/src/org/yooreeka/examples/fraud/data/TransactionLocation.java b/src/org/yooreeka/examples/fraud/data/TransactionLocation.java
new file mode 100644
index 0000000..a8c5d95
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/data/TransactionLocation.java
@@ -0,0 +1,94 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.data;
+
+public class TransactionLocation implements java.io.Serializable {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 7742289669577088001L;
+
+	private double x;
+	private double y;
+
+	public TransactionLocation(double x, double y) {
+		this.x = x;
+		this.y = y;
+	}
+
+	public double distance(TransactionLocation location) {
+		return Math.sqrt((x - location.getX()) * (x - location.getX())
+				+ (y - location.getY()) * (y - location.getY()));
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final TransactionLocation other = (TransactionLocation) obj;
+		if (Double.doubleToLongBits(x) != Double.doubleToLongBits(other.x))
+			return false;
+		if (Double.doubleToLongBits(y) != Double.doubleToLongBits(other.y))
+			return false;
+		return true;
+	}
+
+	public double getX() {
+		return x;
+	}
+
+	public double getY() {
+		return y;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		long temp;
+		temp = Double.doubleToLongBits(x);
+		result = prime * result + (int) (temp ^ (temp >>> 32));
+		temp = Double.doubleToLongBits(y);
+		result = prime * result + (int) (temp ^ (temp >>> 32));
+		return result;
+	}
+
+	@Override
+	public String toString() {
+		return "[" + "x=" + x + ", y=" + y + "]";
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/util/DataGenerator.java b/src/org/yooreeka/examples/fraud/util/DataGenerator.java
new file mode 100644
index 0000000..c0abfe5
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/util/DataGenerator.java
@@ -0,0 +1,119 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.examples.fraud.data.Transaction;
+import org.yooreeka.examples.fraud.data.TransactionLocation;
+
+public class DataGenerator {
+
+	private long nextTxnId = 0;
+
+	public DataGenerator() {
+		// default value
+		this.setNextTxnId(1);
+	}
+
+	private double generateAmt(TransactionSetProfile user) {
+		return FraudDataUtils.nextTxnAmount(user.getTxnAmtMean(),
+				user.getTxnAmtStd());
+	}
+
+	private String generateDescription(TransactionSetProfile userParams) {
+		int txnDescriptionId;
+		String[] txnDescriptions;
+		txnDescriptions = userParams.getTxnDescriptions();
+		txnDescriptionId = FraudDataUtils.randomInt(txnDescriptions.length);
+		return txnDescriptions[txnDescriptionId];
+	}
+
+	private TransactionLocation generateLocation(
+			TransactionSetProfile userParams) {
+
+		int minX = userParams.getLocationMinX();
+		int maxX = userParams.getLocationMaxX();
+		int minY = userParams.getLocationMinY();
+		int maxY = userParams.getLocationMaxY();
+
+		int x = FraudDataUtils.randomInt(minX, maxX);
+		int y = FraudDataUtils.randomInt(minY, maxY);
+
+		return new TransactionLocation(x, y);
+	}
+
+	private long generateNextUniqueTxnId() {
+		return nextTxnId++;
+	}
+
+	private Transaction generateTxn(TransactionSetProfile userParams) {
+		Transaction e = new Transaction();
+
+		e.setUserId(userParams.getUserId());
+		e.setTxnId(generateNextUniqueTxnId());
+
+		// Txn Amount
+		double amt = generateAmt(userParams);
+		e.setAmount(amt);
+
+		// Txn Description
+		String txnDescription = generateDescription(userParams);
+		e.setDescription(txnDescription);
+
+		// Txn Location
+		TransactionLocation location = generateLocation(userParams);
+		e.setLocation(location);
+
+		// Txn fraud flag
+		e.setFraud(userParams.isFraud());
+
+		return e;
+	}
+
+	public List<Transaction> generateTxns(TransactionSetProfile[] allUsers) {
+		List<Transaction> allTransactions = new ArrayList<Transaction>();
+		for (int i = 0, n = allUsers.length; i < n; i++) {
+			TransactionSetProfile user = allUsers[i];
+
+			for (int j = 0; j < user.getNTxns(); j++) {
+				allTransactions.add(generateTxn(user));
+			}
+
+		}
+		return allTransactions;
+	}
+
+	public void setNextTxnId(long nextTxnId) {
+		this.nextTxnId = nextTxnId;
+	}
+}
diff --git a/src/org/yooreeka/examples/fraud/util/FraudDataUtils.java b/src/org/yooreeka/examples/fraud/util/FraudDataUtils.java
new file mode 100644
index 0000000..53fb086
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/util/FraudDataUtils.java
@@ -0,0 +1,148 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.util;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.yooreeka.examples.fraud.data.Transaction;
+
+public class FraudDataUtils {
+
+	private static Random rnd = new Random();
+
+	private static Random txnAmountRnd = new Random();
+
+	public static List<Transaction> loadTransactions(String filename) {
+		List<Transaction> txns = new ArrayList<Transaction>();
+		try {
+			FileReader fReader = new FileReader(filename);
+			BufferedReader reader = new BufferedReader(fReader);
+			String line = null;
+			while ((line = reader.readLine()) != null) {
+				if (line.trim().length() > 0) {
+					Transaction txn = new Transaction();
+					txn.loadFromExternalString(line);
+					txns.add(txn);
+				}
+			}
+
+			fReader.close();
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Failed to load transactions from file: '" + filename
+							+ "' ", e);
+		}
+
+		return txns;
+	}
+
+	static String[] loadTxnDescriptions(String filename) {
+
+		List<String> descriptions = new ArrayList<String>();
+
+		FileReader fReader = null;
+		try {
+			fReader = new FileReader(filename);
+		} catch (FileNotFoundException fnfX) {
+			fnfX.printStackTrace();
+		}
+
+		try {
+			BufferedReader reader = new BufferedReader(fReader);
+			String line = null;
+			while ((line = reader.readLine()) != null) {
+				if (line.trim().length() > 0) {
+					descriptions.add(line);
+				}
+			}
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Failed to load descriptions from file: '" + filename
+							+ "' ", e);
+		}
+
+		try {
+			fReader.close();
+		} catch (IOException ioX) {
+			ioX.printStackTrace();
+		}
+
+		return descriptions.toArray(new String[descriptions.size()]);
+	}
+
+	public static double nextTxnAmount(double mean, double std) {
+		double amt = 0.0;
+		do {
+			// deriving gaussian with our custom std and mean from Standard
+			// Normal Distribution.
+			amt = txnAmountRnd.nextGaussian() * std + mean;
+		} while (amt <= 0.0);
+
+		BigDecimal db = new BigDecimal(amt);
+		db = db.setScale(2, BigDecimal.ROUND_HALF_UP);
+		return db.doubleValue();
+	}
+
+	static int randomInt(int n) {
+		return FraudDataUtils.randomInt(0, n);
+	}
+
+	static int randomInt(int min, int max) {
+		return min + rnd.nextInt(max - min);
+	}
+
+	static void saveTransactions(String filename, List<Transaction> txns) {
+		try {
+			FileWriter fout = new FileWriter(filename);
+			BufferedWriter writer = new BufferedWriter(fout);
+			for (Transaction txn : txns) {
+				writer.write(txn.toExternalString());
+				writer.write("\n");
+			}
+			writer.flush();
+			writer.close();
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Failed to load descriptions from file: '" + filename
+							+ "' ", e);
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/util/FraudErrorEstimator.java b/src/org/yooreeka/examples/fraud/util/FraudErrorEstimator.java
new file mode 100644
index 0000000..347aa41
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/util/FraudErrorEstimator.java
@@ -0,0 +1,123 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.util;
+
+import org.yooreeka.algos.taxis.core.intf.Classifier;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.examples.fraud.DTFraudClassifier;
+import org.yooreeka.examples.fraud.NNFraudClassifier;
+import org.yooreeka.examples.fraud.TransactionConcept;
+import org.yooreeka.examples.fraud.data.Transaction;
+import org.yooreeka.examples.fraud.data.TransactionDataset;
+import org.yooreeka.examples.fraud.data.TransactionInstanceBuilder;
+
+public class FraudErrorEstimator {
+
+	private Classifier classifier;
+	private TransactionInstanceBuilder instanceBuilder;
+	private TransactionDataset testDS;
+
+	private int correctCount = 0;
+	private int incorrectValidCount = 0;
+	private int incorrectFraudCount = 0;
+	private int totalFraudTxnsCount = 0;
+
+	public FraudErrorEstimator(TransactionDataset testDS,
+			DTFraudClassifier classifier) {
+
+		this.testDS = testDS;
+		this.classifier = classifier;
+		this.instanceBuilder = classifier.getInstanceBuilder();
+	}
+
+	public FraudErrorEstimator(TransactionDataset testDS,
+			NNFraudClassifier classifier) {
+
+		this.testDS = testDS;
+
+		if (classifier.isVerbose()) {
+			classifier.setVerbose(false);
+		}
+
+		this.classifier = classifier;
+		this.instanceBuilder = classifier.getInstanceBuilder();
+	}
+
+	public int getCorrectCount() {
+		return correctCount;
+	}
+
+	public int getIncorrectFraudCount() {
+		return incorrectFraudCount;
+	}
+
+	public int getIncorrectValidCount() {
+		return incorrectValidCount;
+	}
+
+	public int getTotalFraudTxnsCount() {
+		return totalFraudTxnsCount;
+	}
+
+	public void run() {
+
+		for (Transaction txn : testDS.getTransactions()) {
+			Instance i = instanceBuilder.createInstance(txn);
+			Concept concept = classifier.classify(i);
+			Concept expectedConcept = new TransactionConcept(txn.isFraud());
+
+			if (txn.isFraud()) {
+				totalFraudTxnsCount++;
+			}
+
+			if (concept.getName().equals(expectedConcept.getName())) {
+				correctCount++;
+			} else {
+				// Print classified instance
+				i.print();
+				if (!txn.isFraud()) {
+					incorrectValidCount++;
+				} else {
+					incorrectFraudCount++;
+				}
+			}
+		}
+
+		System.out.println("Total test dataset txns: " + testDS.getSize()
+				+ ", Number of fraud txns:" + getTotalFraudTxnsCount());
+
+		System.out.println("Classified correctly: " + getCorrectCount()
+				+ ", Misclassified valid txns: " + getIncorrectValidCount()
+				+ ", Misclassified fraud txns: " + getIncorrectFraudCount());
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/util/TenUsersSample.java b/src/org/yooreeka/examples/fraud/util/TenUsersSample.java
new file mode 100644
index 0000000..27f4801
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/util/TenUsersSample.java
@@ -0,0 +1,363 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.examples.fraud.data.Transaction;
+
+/**
+ * Example for how to configure and generate file with transactions.
+ */
+public class TenUsersSample {
+
+	/*
+	 * File with descriptions to be used for valid transactions.
+	 */
+	public static String TXN_DESC_FILENAME = YooreekaConfigurator.getHome()
+			+ "/data/ch05/fraud/descriptions.txt";
+
+	/*
+	 * File with descriptions to be used for fraud transactions.
+	 */
+	public static String FRAUD_TXN_DESC_FILENAME = YooreekaConfigurator
+			.getHome() + "/data/ch05/fraud/fraud-descriptions.txt";
+
+	/*
+	 * Generated transactions will be saved into this file.
+	 */
+	public static String TRAINING_TXN_FILENAME = YooreekaConfigurator.getHome()
+			+ "/data/ch05/fraud/generated-training-txns.txt";
+
+	public static String TEST_TXN_FILENAME = YooreekaConfigurator.getHome()
+			+ "/data/ch05/fraud/generated-test-txns.txt";
+
+	public static TransactionSetProfile[] createUsersForTest() {
+		List<TransactionSetProfile> allUserParams = new ArrayList<TransactionSetProfile>();
+
+		String[] txnDescriptions = FraudDataUtils
+				.loadTxnDescriptions(TXN_DESC_FILENAME);
+		String[] fraudTxnDescriptions = FraudDataUtils
+				.loadTxnDescriptions(FRAUD_TXN_DESC_FILENAME);
+
+		// Each user will have a set of valid and fraud txns.
+		// Using user ids from training set that didn't have any fraud txns.
+
+		for (int userId = 21; userId <= 22; userId++) {
+			allUserParams.addAll(createUserType1(userId, 100, 10,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 23; userId <= 24; userId++) {
+			allUserParams.addAll(createUserType2(userId, 100, 10,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 25; userId <= 26; userId++) {
+			allUserParams.addAll(createUserType3(userId, 100, 10,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 27; userId <= 28; userId++) {
+			allUserParams.addAll(createUserType4(userId, 100, 10,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 29; userId <= 30; userId++) {
+			allUserParams.addAll(createUserType5(userId, 100, 10,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		return allUserParams.toArray(new TransactionSetProfile[0]);
+	}
+
+	public static TransactionSetProfile[] createUsersForTraining() {
+		List<TransactionSetProfile> allUserParams = new ArrayList<TransactionSetProfile>();
+
+		String[] txnDescriptions = FraudDataUtils
+				.loadTxnDescriptions(TXN_DESC_FILENAME);
+		String[] fraudTxnDescriptions = FraudDataUtils
+				.loadTxnDescriptions(FRAUD_TXN_DESC_FILENAME);
+
+		// We have 5 types/profiles of users.
+
+		// First, create 2 users for each profile with fraud txns.
+
+		for (int userId = 1; userId <= 2; userId++) {
+			allUserParams.addAll(createUserType1(userId, 300, 25,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 3; userId <= 4; userId++) {
+			allUserParams.addAll(createUserType2(userId, 400, 15,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 5; userId <= 6; userId++) {
+			allUserParams.addAll(createUserType3(userId, 300, 30,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 7; userId <= 8; userId++) {
+			allUserParams.addAll(createUserType4(userId, 300, 10,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 9; userId <= 10; userId++) {
+			allUserParams.addAll(createUserType5(userId, 600, 20,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		// Now, create a couple of users from each profile without fraud txns
+		// these users will be used in test dataset as well
+
+		for (int userId = 21; userId <= 22; userId++) {
+			allUserParams.addAll(createUserType1(userId, 400, 0,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 23; userId <= 24; userId++) {
+			allUserParams.addAll(createUserType2(userId, 400, 0,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 25; userId <= 26; userId++) {
+			allUserParams.addAll(createUserType3(userId, 400, 0,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 27; userId <= 28; userId++) {
+			allUserParams.addAll(createUserType4(userId, 500, 0,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		for (int userId = 29; userId <= 30; userId++) {
+			allUserParams.addAll(createUserType5(userId, 600, 0,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		// Users that we will be using for test
+		for (int userId = 29; userId <= 30; userId++) {
+			allUserParams.addAll(createUserType5(userId, 600, 0,
+					txnDescriptions, fraudTxnDescriptions));
+		}
+
+		return allUserParams.toArray(new TransactionSetProfile[0]);
+	}
+
+	/*
+	 * Transaction sequence configuration for Type 1 User.
+	 */
+	public static List<TransactionSetProfile> createUserType1(int userId,
+			int nValidTxns, int nFraudTxns, String[] txnDescriptions,
+			String[] fraudTxnDescriptions) {
+
+		TransactionSetProfile[] profiles = new TransactionSetProfile[2];
+
+		profiles[0] = new TransactionSetProfile();
+		profiles[1] = new TransactionSetProfile();
+
+		profiles[0].setUserId(userId);
+		profiles[0].setNTxns(nValidTxns);
+		profiles[0].setTxnDescriptions(txnDescriptions);
+		profiles[0].setLocations(700, 700, 1000, 1000);
+		profiles[0].setTxnAmtMean(50);
+		profiles[0].setTxnAmtStd(20);
+		profiles[0].setFraud(false);
+
+		profiles[1].setUserId(userId);
+		profiles[1].setNTxns(nFraudTxns);
+		profiles[1].setTxnAmtMean(4000);
+		profiles[1].setTxnAmtStd(100);
+		profiles[1].setLocations(50, 50, 200, 200);
+		profiles[1].setTxnDescriptions(fraudTxnDescriptions);
+		profiles[1].setFraud(true);
+
+		return Arrays.asList(profiles);
+
+	}
+
+	/*
+	 * Transaction sequence configuration for Type 2 User.
+	 */
+	public static List<TransactionSetProfile> createUserType2(int userId,
+			int nValidTxns, int nFraudTxns, String[] txnDescriptions,
+			String[] fraudTxnDescriptions) {
+
+		TransactionSetProfile[] profiles = new TransactionSetProfile[2];
+
+		profiles[0] = new TransactionSetProfile();
+		profiles[1] = new TransactionSetProfile();
+
+		profiles[0].setUserId(userId);
+		profiles[0].setNTxns(nValidTxns);
+		profiles[0].setTxnDescriptions(txnDescriptions);
+		profiles[0].setLocations(500, 500, 1000, 1000);
+		profiles[0].setTxnAmtMean(60);
+		profiles[0].setTxnAmtStd(20);
+		profiles[0].setFraud(false);
+
+		profiles[1].setUserId(userId);
+		profiles[1].setNTxns(nFraudTxns);
+		profiles[1].setTxnAmtMean(1000);
+		profiles[1].setTxnAmtStd(100);
+		profiles[1].setLocations(100, 100, 600, 600);
+		profiles[1].setTxnDescriptions(fraudTxnDescriptions);
+		profiles[1].setFraud(true);
+
+		return Arrays.asList(profiles);
+
+	}
+
+	/*
+	 * Transaction sequence configuration for Type 3 User.
+	 */
+	public static List<TransactionSetProfile> createUserType3(int userId,
+			int nValidTxns, int nFraudTxns, String[] txnDescriptions,
+			String[] fraudTxnDescriptions) {
+
+		TransactionSetProfile[] profiles = new TransactionSetProfile[2];
+
+		profiles[0] = new TransactionSetProfile();
+		profiles[1] = new TransactionSetProfile();
+
+		profiles[0].setUserId(userId);
+		profiles[0].setNTxns(nValidTxns);
+		profiles[0].setTxnDescriptions(txnDescriptions);
+		profiles[0].setLocations(500, 500, 800, 800);
+		profiles[0].setTxnAmtMean(80);
+		profiles[0].setTxnAmtStd(20);
+		profiles[0].setFraud(false);
+
+		profiles[1].setUserId(userId);
+		profiles[1].setNTxns(nFraudTxns);
+		profiles[1].setTxnAmtMean(800);
+		profiles[1].setTxnAmtStd(50);
+		profiles[1].setLocations(100, 100, 400, 400);
+		profiles[1].setTxnDescriptions(fraudTxnDescriptions);
+		profiles[1].setFraud(true);
+
+		return Arrays.asList(profiles);
+
+	}
+
+	/*
+	 * Transaction sequence configuration for Type 4 User.
+	 */
+	public static List<TransactionSetProfile> createUserType4(int userId,
+			int nValidTxns, int nFraudTxns, String[] txnDescriptions,
+			String[] fraudTxnDescriptions) {
+
+		TransactionSetProfile[] profiles = new TransactionSetProfile[2];
+
+		profiles[0] = new TransactionSetProfile();
+		profiles[1] = new TransactionSetProfile();
+
+		profiles[0].setUserId(userId);
+		profiles[0].setNTxns(nValidTxns);
+		profiles[0].setTxnDescriptions(txnDescriptions);
+		profiles[0].setLocations(100, 100, 400, 400);
+		profiles[0].setTxnAmtMean(200);
+		profiles[0].setTxnAmtStd(20);
+		profiles[0].setFraud(false);
+
+		profiles[1].setUserId(userId);
+		profiles[1].setNTxns(nFraudTxns);
+		profiles[1].setTxnAmtMean(2000);
+		profiles[1].setTxnAmtStd(100);
+		profiles[1].setLocations(600, 600, 800, 800);
+		profiles[1].setTxnDescriptions(fraudTxnDescriptions);
+		profiles[1].setFraud(true);
+
+		return Arrays.asList(profiles);
+	}
+
+	/*
+	 * Transaction sequence configuration for Type 5 User.
+	 */
+	public static List<TransactionSetProfile> createUserType5(int userId,
+			int nValidTxns, int nFraudTxns, String[] txnDescriptions,
+			String[] fraudTxnDescriptions) {
+
+		TransactionSetProfile[] profiles = new TransactionSetProfile[2];
+		profiles[0] = new TransactionSetProfile();
+		profiles[1] = new TransactionSetProfile();
+
+		profiles[0].setUserId(userId);
+		profiles[0].setNTxns(nValidTxns);
+		profiles[0].setTxnAmtMean(700);
+		profiles[0].setTxnAmtStd(500);
+		profiles[0].setTxnDescriptions(txnDescriptions);
+		profiles[0].setLocations(100, 100, 400, 400);
+		profiles[0].setFraud(false);
+
+		profiles[1].setUserId(userId);
+		profiles[1].setNTxns(nFraudTxns);
+		profiles[1].setTxnAmtMean(700);
+		profiles[1].setTxnAmtStd(100);
+		profiles[1].setLocations(500, 500, 700, 700);
+		profiles[1].setTxnDescriptions(fraudTxnDescriptions);
+		profiles[1].setFraud(true);
+
+		return Arrays.asList(profiles);
+	}
+
+	public static void generateTxns(String txnFilename, int startTxnId,
+			TransactionSetProfile[] allUsers) {
+		DataGenerator dataGenerator = new DataGenerator();
+		dataGenerator.setNextTxnId(startTxnId);
+		System.out.println("Generating transactions...");
+		List<Transaction> allTxns = dataGenerator.generateTxns(allUsers);
+		System.out.println("Saving transactions into '" + txnFilename + "'");
+		FraudDataUtils.saveTransactions(txnFilename, allTxns);
+	}
+
+	public static void main(String[] args) {
+
+		TransactionSetProfile[] userProfiles = createUsersForTraining();
+		generateTxns(TRAINING_TXN_FILENAME, 1, userProfiles);
+		userProfiles = createUsersForTest();
+		generateTxns(TEST_TXN_FILENAME, 500000, userProfiles);
+	}
+
+	public static void printTxns(String txnFilename) {
+		System.out.println("Loading transactions from '" + txnFilename + "'");
+		List<Transaction> allTxns = FraudDataUtils
+				.loadTransactions(txnFilename);
+		System.out.println("Printing loaded transactions:");
+		for (Transaction e : allTxns) {
+			System.out.println(e);
+		}
+	}
+}
diff --git a/src/org/yooreeka/examples/fraud/util/TransactionSetProfile.java b/src/org/yooreeka/examples/fraud/util/TransactionSetProfile.java
new file mode 100644
index 0000000..a4f8061
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/util/TransactionSetProfile.java
@@ -0,0 +1,145 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.util;
+
+/**
+ * Configuration properties to control generation of user transactions.
+ */
+public class TransactionSetProfile {
+
+	/*
+	 * Identifies Credit Card User.
+	 */
+	private int userId;
+
+	private int nTxns;
+
+	/*
+	 * Mean value for transaction amount.
+	 */
+	private double txnAmtMean;
+
+	/*
+	 * Standard deviation for transaction amount.
+	 */
+	private double txnAmtStd;
+
+	/*
+	 * Location coordinates.
+	 */
+	private int locationMinX;
+	private int locationMaxX;
+	private int locationMinY;
+	private int locationMaxY;
+
+	/*
+	 * Descriptions that will be used for valid transactions.
+	 */
+	private String[] txnDescriptions;
+
+	private boolean isFraud;
+
+	public TransactionSetProfile() {
+		// empty
+	}
+
+	public int getLocationMaxX() {
+		return locationMaxX;
+	}
+
+	public int getLocationMaxY() {
+		return locationMaxY;
+	}
+
+	public int getLocationMinX() {
+		return locationMinX;
+	}
+
+	public int getLocationMinY() {
+		return locationMinY;
+	}
+
+	public int getNTxns() {
+		return nTxns;
+	}
+
+	public double getTxnAmtMean() {
+		return txnAmtMean;
+	}
+
+	public double getTxnAmtStd() {
+		return txnAmtStd;
+	}
+
+	public String[] getTxnDescriptions() {
+		return txnDescriptions;
+	}
+
+	public int getUserId() {
+		return userId;
+	}
+
+	public boolean isFraud() {
+		return isFraud;
+	}
+
+	public void setFraud(boolean isFraud) {
+		this.isFraud = isFraud;
+	}
+
+	public void setLocations(int minX, int minY, int maxX, int maxY) {
+		this.locationMinX = minX;
+		this.locationMinY = minY;
+		this.locationMaxX = maxX;
+		this.locationMaxY = maxY;
+	}
+
+	public void setNTxns(int txns) {
+		nTxns = txns;
+	}
+
+	public void setTxnAmtMean(double txnAmtMean) {
+		this.txnAmtMean = txnAmtMean;
+	}
+
+	public void setTxnAmtStd(double txnAmtStd) {
+		this.txnAmtStd = txnAmtStd;
+	}
+
+	public void setTxnDescriptions(String[] txnDescriptions) {
+		this.txnDescriptions = txnDescriptions;
+	}
+
+	public void setUserId(int userId) {
+		this.userId = userId;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/util/UserStatistics.java b/src/org/yooreeka/examples/fraud/util/UserStatistics.java
new file mode 100644
index 0000000..47983c7
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/util/UserStatistics.java
@@ -0,0 +1,153 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.util;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.yooreeka.examples.fraud.data.TransactionLocation;
+
+/**
+ * Holds user-specific statistics that are calculated from training data.
+ */
+public class UserStatistics implements java.io.Serializable {
+
+	private static final long serialVersionUID = -7537387975282866317L;
+
+	private int userId;
+	private double txnAmtMin;
+	private double txnAmtMax;
+	private Map<String, String[]> descriptionTokensMap;
+	private TransactionLocation locationCentroid;
+	private double locationMinX;
+	private double locationMaxX;
+	private double locationMinY;
+	private double locationMaxY;
+
+	public UserStatistics() {
+		descriptionTokensMap = new HashMap<String, String[]>();
+	}
+
+	public Set<String> getDescriptions() {
+		return descriptionTokensMap.keySet();
+	}
+
+	public String[] getDescriptionTokens(String d) {
+		return this.descriptionTokensMap.get(d);
+	}
+
+	public TransactionLocation getLocationCentroid() {
+		return locationCentroid;
+	}
+
+	public double getLocationMaxX() {
+		return locationMaxX;
+	}
+
+	public double getLocationMaxY() {
+		return locationMaxY;
+	}
+
+	public double getLocationMinX() {
+		return locationMinX;
+	}
+
+	public double getLocationMinY() {
+		return locationMinY;
+	}
+
+	public Double getTxnAmtMax() {
+		return txnAmtMax;
+	}
+
+	public Double getTxnAmtMin() {
+		return txnAmtMin;
+	}
+
+	public int getUserId() {
+		return userId;
+	}
+
+	public void setDescriptions(Set<String> descriptions) {
+		descriptionTokensMap.clear();
+		for (String d : descriptions) {
+			this.descriptionTokensMap.put(d, null);
+		}
+	}
+
+	public void setDescriptionTokens(String d, String[] tokens) {
+		this.descriptionTokensMap.put(d, tokens);
+	}
+
+	public void setLocationCentroid(TransactionLocation locationCentroid) {
+		this.locationCentroid = locationCentroid;
+	}
+
+	public void setLocationMaxX(double locationMaxX) {
+		this.locationMaxX = locationMaxX;
+	}
+
+	public void setLocationMaxY(double locationMaxY) {
+		this.locationMaxY = locationMaxY;
+	}
+
+	public void setLocationMinX(double locationMinX) {
+		this.locationMinX = locationMinX;
+	}
+
+	public void setLocationMinY(double locationMinY) {
+		this.locationMinY = locationMinY;
+	}
+
+	public void setTxnAmtMax(Double txnAmountMax) {
+		this.txnAmtMax = txnAmountMax;
+	}
+
+	public void setTxnAmtMin(Double txnAmountMin) {
+		this.txnAmtMin = txnAmountMin;
+	}
+
+	public void setUserId(int userId) {
+		this.userId = userId;
+	}
+
+	@Override
+	public String toString() {
+		return "[userId=" + userId + ", txnAmtMin=" + txnAmtMin
+				+ ", txnAmtMax=" + txnAmtMax + ", locationMinX=" + locationMinX
+				+ ", locationMaxX=" + locationMaxX + ", locationMinY="
+				+ locationMinY + ", locationMaxY=" + locationMaxY
+				+ ", descriptions=" + descriptionTokensMap.keySet().toString()
+				+ ", locationCentroid=" + locationCentroid.toString() + "]";
+	}
+
+}
diff --git a/src/org/yooreeka/examples/fraud/util/UserStatisticsCalculator.java b/src/org/yooreeka/examples/fraud/util/UserStatisticsCalculator.java
new file mode 100644
index 0000000..4dc95b3
--- /dev/null
+++ b/src/org/yooreeka/examples/fraud/util/UserStatisticsCalculator.java
@@ -0,0 +1,164 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.fraud.util;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.yooreeka.examples.fraud.data.Transaction;
+import org.yooreeka.examples.fraud.data.TransactionDataset;
+import org.yooreeka.examples.fraud.data.TransactionLocation;
+
+public class UserStatisticsCalculator {
+
+	public Map<Integer, UserStatistics> calculateStatistics(
+			TransactionDataset trainingData) {
+
+		Map<Integer, UserStatistics> statsByUserMap = new HashMap<Integer, UserStatistics>();
+
+		List<Integer> users = trainingData.getUsers();
+
+		for (Integer userId : users) {
+			List<Transaction> userTxns = trainingData.findUserTxns(userId);
+
+			UserStatistics userProps = calculateUserProperties(userId, userTxns);
+
+			statsByUserMap.put(userId, userProps);
+		}
+
+		return statsByUserMap;
+
+	}
+
+	private UserStatistics calculateUserProperties(Integer userId,
+			List<Transaction> userTxns) {
+
+		UserStatistics props = new UserStatistics();
+
+		props.setUserId(userId);
+
+		/*
+		 * Unique descriptions of non-fraud transactions from training set for
+		 * this user.
+		 */
+		Set<String> descriptions = new HashSet<String>();
+
+		/*
+		 * Total number of non-fraud transactions from training set for this
+		 * user.
+		 */
+		int nonFraudTxnCount = 0;
+
+		/*
+		 * All locations of non-fraud transaction from training set for this
+		 * user.
+		 */
+		List<TransactionLocation> locations = new ArrayList<TransactionLocation>();
+
+		Double minAmount = null;
+		Double maxAmount = null;
+
+		Double locationMinX = null;
+		Double locationMaxX = null;
+		Double locationMinY = null;
+		Double locationMaxY = null;
+
+		for (Transaction t : userTxns) {
+			if (t.isFraud()) {
+				// do not use fraud transactions to calculate user statistics
+			} else {
+				nonFraudTxnCount++;
+
+				descriptions.add(t.getDescription());
+
+				locations.add(t.getLocation());
+
+				double x = t.getLocation().getX();
+				double y = t.getLocation().getY();
+
+				// update min/max values for location
+				if (locationMinX == null || x < locationMinX) {
+					locationMinX = x;
+				}
+				if (locationMinY == null || y < locationMinY) {
+					locationMinY = y;
+				}
+				if (locationMaxX == null || x > locationMaxX) {
+					locationMaxX = x;
+				}
+				if (locationMaxY == null || y > locationMaxY) {
+					locationMaxY = y;
+				}
+
+				double amt = t.getAmount();
+
+				if (minAmount == null || amt < minAmount) {
+					minAmount = amt;
+				}
+
+				if (maxAmount == null || amt > maxAmount) {
+					maxAmount = amt;
+				}
+
+			}
+		}
+
+		TransactionLocation locationCentroid = centroid(locations);
+		props.setDescriptions(descriptions);
+		props.setLocationCentroid(locationCentroid);
+		props.setTxnAmtMin(minAmount);
+		props.setTxnAmtMax(maxAmount);
+		props.setLocationMaxX(locationMaxX);
+		props.setLocationMinX(locationMinX);
+		props.setLocationMaxY(locationMaxY);
+		props.setLocationMinY(locationMinY);
+
+		return props;
+	}
+
+	private TransactionLocation centroid(List<TransactionLocation> locations) {
+		double x = 0.0;
+		double y = 0.0;
+		double n = locations.size();
+
+		for (TransactionLocation location : locations) {
+			x += location.getX();
+			y += location.getY();
+		}
+
+		return new TransactionLocation(x / n, y / n);
+	}
+
+}
diff --git a/src/org/yooreeka/examples/newsgroups/NewsCrawler.java b/src/org/yooreeka/examples/newsgroups/NewsCrawler.java
new file mode 100644
index 0000000..55a8bf5
--- /dev/null
+++ b/src/org/yooreeka/examples/newsgroups/NewsCrawler.java
@@ -0,0 +1,195 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.newsgroups;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.internet.crawling.core.BasicWebCrawler;
+import org.yooreeka.util.internet.crawling.core.CrawlData;
+import org.yooreeka.util.internet.crawling.core.URLFilter;
+import org.yooreeka.util.internet.crawling.core.URLNormalizer;
+
+/**
+ * A basic news crawler.
+ * 
+ * Remember to use <tt>setOffline(false)</tt>, if you want to use the local
+ * files
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class NewsCrawler {
+
+	public static final int DEFAULT_MAX_DEPTH = 1;
+	public static final int DEFAULT_MAX_DOCS = 1000;
+
+	private BasicWebCrawler webCrawler;
+
+	private String crawlDataDir;
+
+	private int maxDepth = DEFAULT_MAX_DEPTH;
+
+	private int maxDocs = DEFAULT_MAX_DOCS;
+
+	private List<String> seedUrls;
+
+	/**
+	 * This variable determines whether we will crawl the Internet or local
+	 * files Remember to use <tt>setOffline(false)</tt>, if you want to use the
+	 * local files
+	 */
+	private boolean isOffline = false;
+
+	/*
+	 * Directory that contains "previously unseen" documents.
+	 */
+	public static final String TEST_FILES_DIR_CH7 = YooreekaConfigurator
+			.getHome() + "/data/ch07/test";
+
+	public NewsCrawler(String rootDir, int maxDepth, int maxDocs) {
+
+		this.crawlDataDir = buildUniqueDirectoryName(rootDir);
+
+		this.maxDepth = maxDepth;
+
+		this.maxDocs = maxDocs;
+
+		seedUrls = new ArrayList<String>();
+
+		webCrawler = new BasicWebCrawler(crawlDataDir);
+
+	}
+
+	public void addSeedUrl(String val) {
+		URLNormalizer urlNormalizer = new URLNormalizer();
+		seedUrls.add(urlNormalizer.normalizeUrl(val));
+	}
+
+	private String buildUniqueDirectoryName(String rootDir) {
+		return rootDir + System.getProperty("file.separator") + "crawl-"
+				+ System.currentTimeMillis();
+	}
+
+	public CrawlData getCrawlData() {
+		return webCrawler.getCrawlData();
+	}
+
+	/**
+	 * @return the rootDir
+	 */
+	public String getCrawlDataDir() {
+		return crawlDataDir;
+	}
+
+	public List<String> getSeedUrls() {
+		return seedUrls;
+	}
+
+	/**
+	 * @return the isOffline
+	 */
+	public boolean isOffline() {
+		return isOffline;
+	}
+
+	private List<String> loadFileUrls(String dir) {
+
+		List<String> fileUrls = new ArrayList<String>();
+
+		File dirFile = new File(dir);
+
+		File[] docs = dirFile.listFiles(new FilenameFilter() {
+			public boolean accept(File dir, String name) {
+				return name.endsWith(".html");
+			}
+		});
+
+		try {
+			for (File f : docs) {
+				URL url = f.toURI().toURL();
+				fileUrls.add(url.toExternalForm());
+			}
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Error while converting filename into URL: ", e);
+		}
+
+		return fileUrls;
+	}
+
+	public void run() {
+
+		webCrawler.addSeedUrls(getSeedUrls());
+
+		URLFilter urlFilter = new URLFilter();
+
+		if (isOffline()) {
+			urlFilter.setAllowFileUrls(true);
+			urlFilter.setAllowHttpUrls(false);
+		} else {
+			urlFilter.setAllowFileUrls(false);
+			urlFilter.setAllowHttpUrls(true);
+		}
+		webCrawler.setURLFilter(urlFilter);
+
+		long t0 = System.currentTimeMillis();
+
+		/* run crawl - crawler will fetch and parse the documents */
+		webCrawler.fetchAndProcess(maxDepth, maxDocs);
+
+		System.out.println("Timer (s): [Crawler processed data] --> "
+				+ (System.currentTimeMillis() - t0) * 0.001);
+	}
+
+	public void setAllSeedUrls() {
+
+		seedUrls.clear();
+
+		List<String> fileUrls = loadFileUrls(TEST_FILES_DIR_CH7);
+
+		for (String url : fileUrls) {
+			addSeedUrl(url);
+		}
+	}
+
+	/**
+	 * @param isOffline
+	 *            the isOffline to set
+	 */
+	public void setOffline(boolean isOffline) {
+		this.isOffline = isOffline;
+	}
+}
diff --git a/src/org/yooreeka/examples/recommender/MovieLensRMSESample.java b/src/org/yooreeka/examples/recommender/MovieLensRMSESample.java
new file mode 100644
index 0000000..d819fa0
--- /dev/null
+++ b/src/org/yooreeka/examples/recommender/MovieLensRMSESample.java
@@ -0,0 +1,61 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.recommender;
+
+import org.yooreeka.algos.reco.collab.data.MovieLensData;
+import org.yooreeka.algos.reco.collab.data.MovieLensDataset;
+import org.yooreeka.algos.reco.collab.evaluation.RMSEEstimator;
+import org.yooreeka.algos.reco.collab.recommender.MovieLensDelphi;
+
+public class MovieLensRMSESample {
+
+	public static void main(String[] args) throws Exception {
+
+		int testSize = Integer.parseInt(args[0]);
+
+		MovieLensDataset ds = MovieLensData.createDataset(testSize);
+
+		// Create an instance of our recommender
+		MovieLensDelphi delphi = new MovieLensDelphi(ds);
+
+		// Create an instance of the RMSE estimator
+		RMSEEstimator rmseEstimator = new RMSEEstimator();
+
+		// Calculate the RMSE
+		// rmseEstimator.calculateRMSE(delphi);
+
+		// Compare RMSEs
+		for (int i = 0; i < 25; i++) {
+			delphi.setSimilarityThreshold(0.05d + i * 0.01d);
+			rmseEstimator.compareRMSEs(delphi);
+		}
+	}
+}
diff --git a/src/org/yooreeka/examples/recommender/RatingGrapher.java b/src/org/yooreeka/examples/recommender/RatingGrapher.java
new file mode 100644
index 0000000..f0698c0
--- /dev/null
+++ b/src/org/yooreeka/examples/recommender/RatingGrapher.java
@@ -0,0 +1,174 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.recommender;
+
+import java.io.File;
+import java.util.Collection;
+
+import org.yooreeka.algos.reco.collab.data.MovieLensDataset;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.Item;
+import org.yooreeka.algos.reco.collab.model.Rating;
+import org.yooreeka.algos.reco.collab.model.User;
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.gui.XyGui;
+
+public class RatingGrapher {
+
+	private static Dataset getMovieLensData() {
+		String dataDir = YooreekaConfigurator
+				.getProperty(YooreekaConfigurator.MOVIELENS_DATA_DIR);
+		File users = new File(dataDir, MovieLensDataset.USERS_FILENAME);
+		File items = new File(dataDir, MovieLensDataset.ITEMS_FILENAME);
+		File ratings = new File(dataDir, MovieLensDataset.RATINGS_FILENAME);
+		Dataset ds = new MovieLensDataset("MovieLensDataset", users, items,
+				ratings);
+		return ds;
+	}
+
+	public static void main(String[] args) {
+		// RatingGrapher.plotAverageItemRating();
+		// RatingGrapher.plotAverageUserRating();
+		RatingGrapher.plotRatingsDistribution();
+
+	}
+
+	/**
+	 * Plots average item rating for MovieLens dataset.
+	 */
+	public static void plotAverageItemRating() {
+		Dataset ds = getMovieLensData();
+		Collection<Item> items = ds.getItems();
+		double[] x = new double[items.size()];
+		double[] y = new double[items.size()];
+		int i = 0;
+		for (Item item : items) {
+			x[i] = item.getId();
+			y[i] = item.getAverageRating();
+			i++;
+		}
+
+		XyGui gui = new XyGui(ds.getName(), x, y);
+		gui.plot();
+	}
+
+	/**
+	 * Plots average user rating for MovieLens dataset.
+	 */
+	public static void plotAverageUserRating() {
+		Dataset ds = getMovieLensData();
+		Collection<User> users = ds.getUsers();
+		double[] x = new double[users.size()];
+		double[] y = new double[users.size()];
+		int i = 0;
+		for (User user : users) {
+			x[i] = user.getId();
+			y[i] = user.getAverageRating();
+			i++;
+		}
+
+		XyGui gui = new XyGui(ds.getName(), x, y);
+		gui.plot();
+	}
+
+	public static void plotNumberOfRatingsPerItem() {
+		Dataset ds = getMovieLensData();
+		Collection<Item> items = ds.getItems();
+		double[] x = new double[items.size()];
+		double[] y = new double[items.size()];
+		int i = 0;
+		for (Item item : items) {
+			x[i] = item.getId();
+			y[i] = item.getAllRatings().size();
+			i++;
+		}
+
+		XyGui gui = new XyGui(ds.getName(), x, y);
+		gui.plot();
+	}
+
+	public static void plotNumberOfRatingsPerUser() {
+		Dataset ds = getMovieLensData();
+		Collection<User> users = ds.getUsers();
+		double[] x = new double[users.size()];
+		double[] y = new double[users.size()];
+		int i = 0;
+		for (User user : users) {
+			x[i] = user.getId();
+			y[i] = user.getAllRatings().size();
+			i++;
+		}
+
+		XyGui gui = new XyGui(ds.getName(), x, y);
+		gui.plot();
+	}
+
+	public static void plotRatingsDistribution() {
+		Dataset ds = getMovieLensData();
+		plotRatingsDistribution(
+				"Ratings for all items by all users, n=" + ds.getRatingsCount(),
+				ds.getRatings());
+	}
+
+	private static void plotRatingsDistribution(String plotName,
+			Collection<Rating> ratings) {
+		double[] x = { 1, 2, 3, 4, 5 };
+		double[] y = { 0.0, 0.0, 0.0, 0.0, 0.0 };
+
+		if (ratings != null && ratings.size() > 0) {
+			for (Rating r : ratings) {
+				y[r.getRating() - 1]++;
+			}
+
+			int nRatings = ratings.size();
+			for (int i = 0, n = x.length; i < n; i++) {
+				y[i] = y[i] / nRatings;
+			}
+		}
+		XyGui gui = new XyGui(plotName, x, y);
+		gui.plot();
+	}
+
+	public static void plotRatingsDistributionForItem(int itemId) {
+		Dataset ds = getMovieLensData();
+		Collection<Rating> ratings = ds.getItem(itemId).getAllRatings();
+		plotRatingsDistribution("Ratings distribution for item: " + itemId
+				+ ", n=" + ratings.size(), ratings);
+	}
+
+	public static void plotRatingsDistributionForUser(int userId) {
+		Dataset ds = getMovieLensData();
+		Collection<Rating> ratings = ds.getUser(userId).getAllRatings();
+		plotRatingsDistribution("Ratings distribution for user: " + userId
+				+ ", n=" + ratings.size(), ratings);
+	}
+
+}
diff --git a/src/org/yooreeka/examples/recommender/Recommender.java b/src/org/yooreeka/examples/recommender/Recommender.java
new file mode 100644
index 0000000..1ed7cdc
--- /dev/null
+++ b/src/org/yooreeka/examples/recommender/Recommender.java
@@ -0,0 +1,119 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.recommender;
+
+import java.io.File;
+import java.util.List;
+
+import org.yooreeka.algos.reco.collab.data.MovieLensDataset;
+import org.yooreeka.algos.reco.collab.model.Dataset;
+import org.yooreeka.algos.reco.collab.model.RecommendationType;
+import org.yooreeka.algos.reco.collab.recommender.Delphi;
+import org.yooreeka.algos.reco.collab.recommender.PredictedItemRating;
+
+/**
+ * @deprecated not used at the moment.
+ */
+@Deprecated
+class Recommender {
+
+	// private static final Logger logger = Logger.getLogger(Recommender.class);
+
+	public static void main(String[] args) throws Exception {
+		Recommender m = new Recommender(args[0]);
+		boolean useSimilarityCacheWhenAvailable = true;
+		m.recommendOnMovieLens(useSimilarityCacheWhenAvailable);
+	}
+
+	private Dataset dataset;
+
+	private Recommender(String dataDir) {
+		// Load MovieLens dataset
+		File users = new File(dataDir, MovieLensDataset.USERS_FILENAME);
+		File items = new File(dataDir, MovieLensDataset.ITEMS_FILENAME);
+		File ratings = new File(dataDir, MovieLensDataset.RATINGS_FILENAME);
+		this.dataset = new MovieLensDataset("MovieLensDataset", users, items,
+				ratings);
+	}
+
+	private void printFirstN(List<PredictedItemRating> sortedRecommendations,
+			int printNum) {
+		for (int i = 0, n = sortedRecommendations.size(); i < n && i < printNum; i++) {
+			System.out.println(sortedRecommendations.get(i));
+		}
+	}
+
+	private void printMinMax(List<PredictedItemRating> c) {
+		int minId = 0;
+		double minIdRating = 6.0;
+		int maxId = 0;
+		double maxIdRating = 0.0;
+		for (PredictedItemRating r : c) {
+			if (r.getRating() < minIdRating) {
+				minId = r.getItemId();
+				minIdRating = r.getRating();
+			}
+			if (r.getRating() > maxIdRating) {
+				maxId = r.getItemId();
+				maxIdRating = r.getRating();
+			}
+		}
+		System.out.println("minId=" + minId + ",minIdRating=" + maxIdRating
+				+ ",maxId=" + maxId + ",maxIdRating=" + maxIdRating);
+	}
+
+	private void recommendOnMovieLens(boolean useSimilarityCache)
+			throws Exception {
+
+		long start = System.currentTimeMillis();
+		Delphi delphi = new Delphi(dataset,
+				RecommendationType.ITEM_PENALTY_BASED, useSimilarityCache);
+		System.out.println("Time:" + (System.currentTimeMillis() - start)
+				/ 1000 + "(sec)");
+		List<PredictedItemRating> r = delphi.recommend(4);
+		System.out.println("4: size: " + r.size());
+		printMinMax(r);
+		printFirstN(r, 3);
+		r = delphi.recommend(3);
+		System.out.println("3: size: " + r.size());
+		printMinMax(r);
+		printFirstN(r, 3);
+		r = delphi.recommend(100);
+		System.out.println("100: size: " + r.size());
+		printMinMax(r);
+		printFirstN(r, 3);
+		r = delphi.recommend(50);
+		System.out.println("50: size: " + r.size());
+		printMinMax(r);
+		printFirstN(r, 3);
+	}
+
+}
diff --git a/src/org/yooreeka/examples/search/DocRank.java b/src/org/yooreeka/examples/search/DocRank.java
new file mode 100644
index 0000000..df50dd0
--- /dev/null
+++ b/src/org/yooreeka/examples/search/DocRank.java
@@ -0,0 +1,57 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.search;
+
+import org.yooreeka.algos.search.ranking.DocRankMatrixBuilder;
+import org.yooreeka.algos.search.ranking.PageRankMatrixH;
+import org.yooreeka.algos.search.ranking.Rank;
+
+/**
+ * A PageRank-like algorithm for documents.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class DocRank extends Rank {
+
+	DocRankMatrixBuilder docRankBuilder;
+
+	public DocRank(String luceneIndexDir, int termsToKeep) {
+		docRankBuilder = new DocRankMatrixBuilder(luceneIndexDir);
+		docRankBuilder.setTermsToKeep(termsToKeep);
+		docRankBuilder.run();
+	}
+
+	@Override
+	public PageRankMatrixH getH() {
+		return docRankBuilder.getH();
+	}
+}
diff --git a/src/org/yooreeka/examples/search/LuceneIndexer.java b/src/org/yooreeka/examples/search/LuceneIndexer.java
new file mode 100644
index 0000000..66e0c73
--- /dev/null
+++ b/src/org/yooreeka/examples/search/LuceneIndexer.java
@@ -0,0 +1,87 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.search;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.yooreeka.algos.search.lucene.LuceneIndexBuilder;
+import org.yooreeka.util.internet.crawling.core.CrawlData;
+import org.yooreeka.util.internet.crawling.core.CrawlDataProcessor;
+import org.yooreeka.util.internet.crawling.util.FileUtils;
+
+public class LuceneIndexer {
+
+	private String baseDir;
+
+	private String luceneIndexDir;
+
+	public LuceneIndexer(String dir) {
+
+		baseDir = dir;
+		luceneIndexDir = baseDir + System.getProperty("file.separator")
+				+ "lucene-index";
+	}
+
+	public String getLuceneDir() {
+
+		return luceneIndexDir;
+	}
+
+	public void run() {
+
+		// load existing data
+		CrawlData crawlData = new CrawlData(baseDir);
+		crawlData.init();
+
+		File luceneIndexRootDir = new File(getLuceneDir());
+
+		// Delete the index directory, if it exists
+		FileUtils.deleteDir(luceneIndexRootDir);
+		luceneIndexRootDir.mkdirs();
+
+		CrawlDataProcessor luceneIndexBuilder = null;
+		try {
+			luceneIndexBuilder = new LuceneIndexBuilder(luceneIndexRootDir,
+					crawlData);
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+		System.out.print("Starting the indexing ... ");
+
+		luceneIndexBuilder.run();
+
+		System.out.println("Indexing completed! \n");
+	}
+
+}
diff --git a/src/org/yooreeka/examples/search/MySearcher.java b/src/org/yooreeka/examples/search/MySearcher.java
new file mode 100644
index 0000000..c66a174
--- /dev/null
+++ b/src/org/yooreeka/examples/search/MySearcher.java
@@ -0,0 +1,360 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.search;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
+import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.yooreeka.algos.search.data.SearchResult;
+import org.yooreeka.algos.search.lucene.LuceneIndexBuilder;
+import org.yooreeka.algos.search.ranking.Rank;
+import org.yooreeka.algos.taxis.bayesian.NaiveBayes;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.util.internet.behavior.UserClick;
+import org.yooreeka.util.internet.behavior.UserQuery;
+
+public class MySearcher {
+
+	/**
+	 * An arbitrary small value
+	 */
+	public static final double EPSILON = 0.0001;
+
+	private static final String PRETTY_LINE = "_______________________________________________________________________";
+
+	private File indexFile;
+	private NaiveBayes learner = null;
+
+	private boolean verbose = true;
+
+	public MySearcher(String indexDir) {
+		indexFile = new File(indexDir);
+	}
+
+	public boolean isVerbose() {
+		return verbose;
+	}
+
+	private void printResults(String header, String query,
+			SearchResult[] values, boolean showDocTitle) {
+
+		if (verbose) {
+			StringWriter sw = new StringWriter();
+			PrintWriter pw = new PrintWriter(sw);
+
+			boolean printEntrySeparator = false;
+			if (showDocTitle) { // multiple lines per entry
+				printEntrySeparator = true;
+			}
+
+			pw.print("\n");
+			pw.println(header);
+			if (query != null) {
+				pw.println(query);
+			}
+			pw.print("\n");
+			for (int i = 0, n = values.length; i < n; i++) {
+				if (values[i] != null) {
+					if (showDocTitle) {
+						pw.printf("Document Title: %s\n", values[i].getTitle());
+					}
+					pw.printf(
+							"Document URL: %-46s  -->  Relevance Score: %.15f\n",
+							values[i].getUrl(), values[i].getScore());
+					if (printEntrySeparator) {
+						pw.printf(PRETTY_LINE);
+						pw.printf("\n");
+					}
+				} else {
+					pw.printf("Document: %s\n",
+							"Not available, values[i] is NULL");
+				}
+			}
+			if (!printEntrySeparator) {
+				pw.print(PRETTY_LINE);
+			}
+
+			System.out.println(sw.toString());
+		}
+	}
+
+	public SearchResult[] search(String query, int numberOfMatches) {
+
+		SearchResult[] docResults = null;
+
+		IndexSearcher is = null;
+
+		Directory dir = null;
+		try {
+			dir = FSDirectory.open(indexFile);
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+		DirectoryReader dirReader = null;
+		try {
+
+			dirReader = DirectoryReader.open(dir);
+			is = new IndexSearcher(dirReader);
+
+		} catch (IOException ioX) {
+			System.out.println("ERROR: " + ioX.getMessage());
+		}
+
+		StandardQueryParser queryParserHelper = new StandardQueryParser();
+		Query q = null;
+
+		try {
+
+			q = queryParserHelper.parse(query,
+					LuceneIndexBuilder.INDEX_FIELD_CONTENT);
+
+		} catch (QueryNodeException e) {
+			e.printStackTrace();
+		}
+
+		TopDocs hits = null;
+		try {
+			hits = is.search(q, numberOfMatches);
+
+			docResults = new SearchResult[hits.scoreDocs.length];
+
+			for (int i = 0; i < hits.scoreDocs.length; i++) {
+
+				Document hitDoc = is.doc(hits.scoreDocs[i].doc);
+
+				docResults[i] = new SearchResult(hitDoc.get("docid"),
+						hitDoc.get("doctype"), hitDoc.get("title"),
+						hitDoc.get("url"), hits.scoreDocs[i].score);
+			}
+
+			dirReader.close();
+			dir.close();
+
+		} catch (IOException ioX) {
+			System.out.println("ERROR: " + ioX.getMessage());
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+
+		String header = "Search results using Lucene index scores:";
+		boolean showTitle = true;
+		printResults(header, "Query: " + query, docResults, showTitle);
+
+		return docResults;
+	}
+
+	/**
+	 * A method that combines the score of an index based search and the score
+	 * of the PageRank algorithm to achieve better relevance results.
+	 */
+	public SearchResult[] search(String query, int numberOfMatches, Rank pR) {
+
+		SearchResult[] docResults = search(query, numberOfMatches);
+
+		String url;
+
+		int n = pR.getH().getSize();
+
+		/**
+		 * TODO: 2.3 -- The PageRank scaling factor m (Book Section 2.3)
+		 * 
+		 * When the number of pages in your graph are few, the PageRank values
+		 * need some boosting. As the number of pages increases m approaches the
+		 * value 1 quickly because 1/n goes to zero.
+		 */
+		double m = 1 - (double) 1 / n;
+
+		// actualNumberOfMatches <= numberOfMatches
+		int i = 0;
+
+		while (i < docResults.length && docResults[i] != null) {
+
+			url = docResults[i].getUrl();
+
+			double hScore = docResults[i].getScore()
+					* Math.pow(pR.getPageRank(url), m);
+
+			// Update the score of the results
+			docResults[i].setScore(hScore);
+
+			i++;
+		}
+
+		// sort results by score
+		SearchResult.sortByScore(docResults);
+
+		String header = "Search results using combined Lucene scores and page rank scores:";
+		boolean showTitle = false;
+		printResults(header, "Query: " + query, docResults, showTitle);
+
+		return docResults;
+	}
+
+	/**
+	 * A method that combines the score of an index based search and the score
+	 * of the PageRank algorithm to achieve better relevance results, while
+	 * personalizing the result set based on past user clicks on the same or
+	 * similar queries.
+	 * 
+	 * NOTE: You would typically refactor all these search methods in order to
+	 * consider it production quality code. Here, we repeat the code of the
+	 * previous method, so that it is easier to read.
+	 * 
+	 * @param userID
+	 *            identifies the person who issues the query
+	 * @param query
+	 *            is the whole query
+	 * @param numberOfMatches
+	 *            defines the maximim number of desired matches
+	 * @param pR
+	 *            the PageRank vector
+	 * @return the result set
+	 */
+	public SearchResult[] search(UserQuery uQuery, int numberOfMatches, Rank pR) {
+
+		SearchResult[] docResults = search(uQuery.getQueryString(),
+				numberOfMatches);
+
+		String url;
+
+		int docN = docResults.length;
+
+		if (docN > 0) {
+
+			int loop = (docN < numberOfMatches) ? docN : numberOfMatches;
+
+			for (int i = 0; i < loop; i++) {
+
+				url = docResults[i].getUrl();
+
+				UserClick uClick = new UserClick(uQuery, url);
+
+				/**
+				 * TODO: 2.6 -- Weighing the scores to meet your needs (Book
+				 * Section 2.4.2)
+				 * 
+				 * At this point, we have three scores of relevance. The
+				 * relevance score that is based on the index search, the
+				 * PageRank score, and the score that is based on the user's
+				 * prior selections. There is no golden formula for everybody.
+				 * Below we are selecting a formula that we think would make
+				 * sense for most people.
+				 * 
+				 * Feel free to change the formula, experiment with different
+				 * weighting factors, to find out the choices that are most
+				 * appropriate for your own site.
+				 * 
+				 */
+				double indexScore = docResults[i].getScore();
+
+				double pageRankScore = pR.getPageRank(url);
+
+				double userClickScore = 0.0;
+
+				for (Concept bC : learner.getTset().getConceptSet()) {
+					if (bC.getName().equalsIgnoreCase(url)) {
+						userClickScore = learner.getProbability(bC, uClick);
+					}
+				}
+
+				// Create the final score
+				double hScore;
+
+				if (userClickScore == 0) {
+
+					hScore = indexScore * pageRankScore * EPSILON;
+
+				} else {
+
+					hScore = indexScore * pageRankScore * userClickScore;
+				}
+
+				// Update the score of the results
+				docResults[i].setScore(hScore);
+
+				/*
+				 * Uncomment this block to show the various scores in the
+				 * BeanShell
+				 * 
+				 * StringBuilder b = new StringBuilder();
+				 * 
+				 * System.out.println(
+				 * "________________________________________________________________________________"
+				 * );
+				 * 
+				 * b.append("Document      : ").append(docResults[i].getUrl()).
+				 * append("\n");
+				 * b.append("UserClick URL :").append(uClick.getUrl
+				 * ()).append("\n"); b.append("\n");
+				 * b.append("Index score: ").append(indexScore).append(", ");
+				 * b.append
+				 * ("PageRank score: ").append(pageRankScore).append(", ");
+				 * b.append("User click score: ").append(userClickScore);
+				 * System.out.println(b.toString());
+				 */
+			}
+		}
+
+		// Sort array of results
+		SearchResult.sortByScore(docResults);
+
+		String header = "Search results using combined Lucene scores, "
+				+ "page rank scores and user clicks:";
+		String query = "Query: user=" + uQuery.getUid() + ", query text="
+				+ uQuery.getQueryString();
+		boolean showTitle = false;
+		printResults(header, query, docResults, showTitle);
+
+		return docResults;
+	}
+
+	public void setUserLearner(NaiveBayes nb) {
+		learner = nb;
+	}
+
+	public void setVerbose(boolean verbose) {
+		this.verbose = verbose;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/search/PageRank.java b/src/org/yooreeka/examples/search/PageRank.java
new file mode 100644
index 0000000..a571237
--- /dev/null
+++ b/src/org/yooreeka/examples/search/PageRank.java
@@ -0,0 +1,56 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.search;
+
+import org.yooreeka.algos.search.ranking.PageRankMatrixBuilder;
+import org.yooreeka.algos.search.ranking.PageRankMatrixH;
+import org.yooreeka.algos.search.ranking.Rank;
+import org.yooreeka.util.internet.crawling.core.CrawlData;
+
+public class PageRank extends Rank {
+
+	PageRankMatrixBuilder pageRankBuilder;
+
+	public PageRank(CrawlData crawlData) {
+		try {
+			pageRankBuilder = new PageRankMatrixBuilder(crawlData);
+			pageRankBuilder.run();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+
+	@Override
+	public PageRankMatrixH getH() {
+		return pageRankBuilder.getH();
+	}
+
+}
diff --git a/src/org/yooreeka/examples/spamfilter/EmailClassifier.java b/src/org/yooreeka/examples/spamfilter/EmailClassifier.java
new file mode 100644
index 0000000..a1993f9
--- /dev/null
+++ b/src/org/yooreeka/examples/spamfilter/EmailClassifier.java
@@ -0,0 +1,247 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.spamfilter;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.bayesian.NaiveBayes;
+import org.yooreeka.algos.taxis.core.AttributeValue;
+import org.yooreeka.algos.taxis.core.intf.Attribute;
+import org.yooreeka.algos.taxis.core.intf.Concept;
+import org.yooreeka.algos.taxis.core.intf.Instance;
+import org.yooreeka.examples.spamfilter.data.Email;
+import org.yooreeka.examples.spamfilter.data.EmailData;
+import org.yooreeka.examples.spamfilter.data.EmailDataset;
+import org.yooreeka.util.metrics.JaccardCoefficient;
+
+public class EmailClassifier extends NaiveBayes {
+
+	private EmailDataset emailDataset;
+	private int topNTerms;
+	private boolean verbose = true;
+	private double jaccardThreshold = 0.25;
+
+	public EmailClassifier(EmailDataset emailDataset, int topNTerms) {
+		super("EmailClassifier", emailDataset.getTrainingSet(topNTerms));
+		this.emailDataset = emailDataset;
+		this.topNTerms = topNTerms;
+	}
+
+	@Override
+	protected void calculateConditionalProbabilities() {
+
+		p = new HashMap<Concept, Map<Attribute, AttributeValue>>();
+
+		for (Instance i : tSet.getInstances().values()) {
+
+			// In this specific implementation we have exactly one attribute
+			// In general, you need a loop over the attributes
+			Attribute a = i.getAtrributes()[0];
+
+			Map<Attribute, AttributeValue> aMap = p.get(i.getConcept());
+
+			if (aMap == null) {
+				aMap = new HashMap<Attribute, AttributeValue>();
+				p.put(i.getConcept(), aMap);
+			}
+
+			/**
+			 * TODO: 5.3
+			 */
+			AttributeValue bestAttributeValue = findBestAttributeValue(aMap, a);
+
+			if (bestAttributeValue != null) {
+
+				bestAttributeValue.count();
+
+			} else {
+				AttributeValue aV = new AttributeValue(a.getValue());
+				// register attribute as representative attribute
+				aMap.put(a, aV);
+			}
+		}
+	}
+
+	public String classify(Email email) {
+		EmailInstance i = emailDataset.toEmailInstance(email, topNTerms);
+		Concept c = classify(i);
+		if (verbose) {
+			System.out.println("Classified " + email.getId() + " as "
+					+ c.getName());
+		}
+		return c.getName();
+	}
+
+	@Override
+	public Concept classify(Instance instance) {
+		return super.classify(instance);
+	}
+
+	/*
+	 * Finds best match for attribute value among existing attribute value
+	 * representatives.
+	 * 
+	 * @param aMap map of all attribute representatives.
+	 * 
+	 * @param a new attribute to compare against
+	 * 
+	 * @return representative attribute that is the best match for a new
+	 * attribute or null if no satisfactory match was found.
+	 */
+	private AttributeValue findBestAttributeValue(
+			Map<Attribute, AttributeValue> aMap, Attribute a) {
+
+		JaccardCoefficient jaccardCoeff = new JaccardCoefficient();
+
+		String aValue = (String) a.getValue();
+		String[] aTerms = aValue.split(" ");
+		Attribute bestMatch = null;
+		double bestSim = 0.0;
+
+		/*
+		 * Here we only check representative attribute values. Other attribute
+		 * values associated with representative attribute values will be
+		 * ignored by this implementation.
+		 */
+		for (Attribute attr : aMap.keySet()) {
+			String attrValue = (String) attr.getValue();
+			String[] attrTerms = attrValue.split(" ");
+			double sim = jaccardCoeff.similarity(aTerms, attrTerms);
+			if (sim > jaccardThreshold && sim > bestSim) {
+				bestSim = sim;
+				bestMatch = attr;
+			}
+		}
+
+		return aMap.get(bestMatch);
+	}
+
+	/**
+	 * @return the jaccardThreshold
+	 */
+	public double getJaccardThreshold() {
+		return jaccardThreshold;
+	}
+
+	@Override
+	public double getProbability(Instance i, Concept c) {
+
+		double cP = 1;
+
+		for (Attribute a : i.getAtrributes()) {
+
+			if (a != null && attributeList.contains(a.getName())) {
+
+				Map<Attribute, AttributeValue> aMap = p.get(c);
+
+				AttributeValue bestAttributeValue = findBestAttributeValue(
+						aMap, a);
+
+				if (bestAttributeValue == null) {
+
+					// the specific attribute value is not present for the
+					// current concept.
+					// Can you justify the following estimate?
+					// Can you think of a better choice?
+					cP *= ((double) 1 / (tSet.getSize() + 1));
+
+				} else {
+
+					cP *= (bestAttributeValue.getCount() / conceptPriors.get(c));
+				}
+			}
+		}
+		return (cP == 1) ? (double) 1 / tSet.getNumberOfConcepts() : cP;
+	}
+
+	public void sample() {
+
+		Email email;
+		// TRAINING SET
+		System.out.println("________________________________________________");
+		System.out.println("Validating with emails from the training dataset");
+		System.out.println("________________________________________________");
+		email = emailDataset.findEmailById("biz-04.html");
+		classify(email);
+
+		email = emailDataset.findEmailById("usa-03.html");
+		classify(email);
+
+		// TEST SET
+		System.out.println("_______________________________________________");
+		System.out.println("Testing with unseen emails");
+		System.out.println("_______________________________________________");
+
+		EmailDataset testEmailDS = EmailData.createTestDataset();
+		email = testEmailDS.findEmailById("biz-01.html");
+		classify(email);
+
+		email = testEmailDS.findEmailById("sport-01.html");
+		classify(email);
+
+		email = testEmailDS.findEmailById("usa-01.html");
+		classify(email);
+
+		email = testEmailDS.findEmailById("world-01.html");
+		classify(email);
+
+		email = testEmailDS.findEmailById("spam-biz-01.html");
+		classify(email);
+	}
+
+	/**
+	 * @param jaccardThreshold
+	 *            the jaccardThreshold to set
+	 */
+	public void setJaccardThreshold(double jaccardThreshold) {
+		this.jaccardThreshold = jaccardThreshold;
+	}
+
+	@Override
+	public boolean train() {
+
+		if (emailDataset.getSize() == 0) {
+			System.out
+					.println("Can't train classifier - training dataset is empty.");
+			return false;
+		}
+
+		for (String attrName : getTset().getAttributeNameSet()) {
+			trainOnAttribute(attrName);
+		}
+
+		super.train();
+
+		return true;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/spamfilter/EmailInstance.java b/src/org/yooreeka/examples/spamfilter/EmailInstance.java
new file mode 100644
index 0000000..85436c6
--- /dev/null
+++ b/src/org/yooreeka/examples/spamfilter/EmailInstance.java
@@ -0,0 +1,86 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.spamfilter;
+
+import java.util.Map;
+
+import org.yooreeka.algos.reco.collab.model.Content;
+import org.yooreeka.algos.taxis.core.BaseConcept;
+import org.yooreeka.algos.taxis.core.BaseInstance;
+import org.yooreeka.algos.taxis.core.StringAttribute;
+import org.yooreeka.examples.spamfilter.data.Email;
+
+/**
+ * Instance for classification.
+ */
+public class EmailInstance extends BaseInstance {
+
+	private static int DEFAULT_TOP_N_TERMS = 10;
+
+	private String id;
+
+	public EmailInstance(String emailCategory, Email email) {
+		this(emailCategory, email, DEFAULT_TOP_N_TERMS);
+	}
+
+	public EmailInstance(String emailCategory, Email email, int topNTerms) {
+		super();
+		this.id = email.getId();
+		// email category is our concept/class
+		this.setConcept(new BaseConcept(emailCategory));
+
+		/**
+		 * TODO: 5.3 -- Considering more attributes as part of the EmailInstance
+		 * 
+		 * -- Separate "subject" and "body" -- timestamp -- "from" -- "to" --
+		 * "to" cardinality
+		 */
+		// extract top N terms from email content and subject
+		String text = email.getSubject() + " " + email.getTextBody();
+		Content content = new Content(email.getId(), text, topNTerms);
+		Map<String, Integer> tfMap = content.getTFMap();
+
+		attributes = new StringAttribute[1];
+
+		String attrName = "Email_Text_Attribute";
+		String attrValue = "";
+		for (Map.Entry<String, Integer> tfEntry : tfMap.entrySet()) {
+			attrValue = attrValue + " " + tfEntry.getKey();
+		}
+		attributes[0] = new StringAttribute(attrName, attrValue);
+	}
+
+	@Override
+	public String toString() {
+		return id;
+	}
+
+}
diff --git a/src/org/yooreeka/examples/spamfilter/data/Email.java b/src/org/yooreeka/examples/spamfilter/data/Email.java
new file mode 100644
index 0000000..6432704
--- /dev/null
+++ b/src/org/yooreeka/examples/spamfilter/data/Email.java
@@ -0,0 +1,119 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.spamfilter.data;
+
+/**
+ * Represents one email document.
+ */
+public class Email {
+
+	/*
+	 * ID that we will use to identify email.
+	 */
+	private String id;
+
+	/*
+	 * Email subject line
+	 */
+	private String subject;
+
+	/*
+	 * Email Text body
+	 */
+	private String textBody;
+
+	private String from;
+
+	private String to;
+
+	int ruleFired = 0;
+
+	public Email() {
+		// empty
+	}
+
+	public String getFrom() {
+		return from;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public int getRuleFired() {
+		return ruleFired;
+	}
+
+	public String getSubject() {
+		return subject;
+	}
+
+	public String getTextBody() {
+		return textBody;
+	}
+
+	public String getTo() {
+		return to;
+	}
+
+	public void setFrom(String from) {
+		this.from = from;
+	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public void setRuleFired(int ruleNum) {
+		System.out.println("Invoked " + this.getClass().getSimpleName()
+				+ ".setRuleFired(" + ruleNum + "), current value ruleFired="
+				+ this.ruleFired + ", emailId: " + id);
+		this.ruleFired = ruleNum;
+	}
+
+	public void setSubject(String subject) {
+		this.subject = subject;
+	}
+
+	public void setTextBody(String textBody) {
+		this.textBody = textBody;
+	}
+
+	public void setTo(String to) {
+		this.to = to;
+	}
+
+	@Override
+	public String toString() {
+		return "id: " + id + "\n" + "from: " + from + "\n" + "to: " + to + "\n"
+				+ "subject: " + subject + "\n" + textBody + "\n";
+	}
+}
diff --git a/src/org/yooreeka/examples/spamfilter/data/EmailData.java b/src/org/yooreeka/examples/spamfilter/data/EmailData.java
new file mode 100644
index 0000000..6da1161
--- /dev/null
+++ b/src/org/yooreeka/examples/spamfilter/data/EmailData.java
@@ -0,0 +1,223 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.spamfilter.data;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.examples.spamfilter.EmailClassifier;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+import org.yooreeka.util.parsing.html.HTMLDocumentParser;
+
+public class EmailData {
+
+	/*
+	 * List of html files that we will treat as emails.
+	 */
+	public static String[][] TRAINING_DATA = new String[][] {
+			{ YooreekaConfigurator.getHome() + "/data/ch02/biz-02.html",
+					"A@sengerhost", "1@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/biz-03.html",
+					"B@sengerhost", "2@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/biz-04.html",
+					"C@sengerhost", "3@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/biz-05.html",
+					"D@sengerhost", "4@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/biz-06.html",
+					"E@sengerhost", "5@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/biz-07.html",
+					"F@sengerhost", "6@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/sport-02.html",
+					"G@sengerhost", "7@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/sport-03.html",
+					"H@sengerhost", "8@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/usa-02.html",
+					"I@sengerhost", "9@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/usa-03.html",
+					"J@sengerhost", "10@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/usa-04.html",
+					"K@sengerhost", "11@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/world-02.html",
+					"L@sengerhost", "12@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/world-03.html",
+					"M@sengerhost", "13@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/world-04.html",
+					"N@sengerhost", "14@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/world-05.html",
+					"O@sengerhost", "15@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/spam-biz-02.html",
+					"P@sengerhost", "16@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/spam-biz-03.html",
+					"Q@sengerhost", "17@host" } };
+
+	public static String[][] TEST_DATA = new String[][] {
+			{ YooreekaConfigurator.getHome() + "/data/ch02/biz-01.html",
+					"aa@senderhost", "100@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/sport-01.html",
+					"bb@senderhost", "101@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/usa-01.html",
+					"cc@senderhost", "102@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/world-01.html",
+					"dd@senderhost", "103@host" },
+			{ YooreekaConfigurator.getHome() + "/data/ch02/spam-biz-01.html",
+					"friend@senderhost", "104@host" } };
+
+	public static EmailDataset createTestDataset() {
+		List<Email> allEmails = loadEmails(TEST_DATA);
+		return new EmailDataset(allEmails);
+	}
+
+	public static EmailDataset createTrainingDataset() {
+		List<Email> allEmails = loadEmails(TRAINING_DATA);
+		return new EmailDataset(allEmails);
+	}
+
+	public static Email loadEmailFromHtml(String htmlFile) {
+
+		ProcessedDocument htmlDoc = processHtmlDoc(htmlFile);
+		Email email = new Email();
+		email.setSubject(htmlDoc.getDocumentTitle());
+		email.setTextBody(htmlDoc.getText());
+
+		return email;
+	}
+
+	public static List<Email> loadEmails(String[][] allEmails) {
+
+		List<Email> emailList = new ArrayList<Email>();
+		for (String[] emailData : allEmails) {
+			String filename = emailData[0];
+			Email email = loadEmailFromHtml(filename);
+			email.setFrom(emailData[1]);
+			email.setTo(emailData[2]);
+			// use filename as unique id
+			String id = filename.substring(filename.lastIndexOf("/") + 1);
+			email.setId(id);
+
+			emailList.add(email);
+		}
+
+		return emailList;
+	}
+
+	public static void main(String[] args) {
+		// // Create and train classifier
+		// EmailDataset trainEmailDS = EmailData.createTrainingDataset();
+		// EmailClassifier emailClassifier = new EmailClassifier(trainEmailDS,
+		// 10);
+		// emailClassifier.train();
+		//
+		// // Let's classify some emails from training set. If we can't get them
+		// right
+		// // then we are in trouble :-)
+		// Email email = null;
+		// email = trainEmailDS.findEmailById("biz-04.html");
+		// emailClassifier.classify(email);
+		//
+		// email = trainEmailDS.findEmailById("usa-03.html");
+		// emailClassifier.classify(email);
+		//
+		// // Now, let's classify previously unseen emails
+		//
+		// EmailDataset testEmailDS = EmailData.createTestDataset();
+		// email = testEmailDS.findEmailById("biz-01.html");
+		// emailClassifier.classify(email);
+		//
+		// email = testEmailDS.findEmailById("sport-01.html");
+		// emailClassifier.classify(email);
+		//
+		// email = testEmailDS.findEmailById("usa-01.html");
+		// emailClassifier.classify(email);
+		//
+		// email = testEmailDS.findEmailById("world-01.html");
+		// emailClassifier.classify(email);
+		//
+		// email = testEmailDS.findEmailById("spam-biz-01.html");
+		// emailClassifier.classify(email);
+
+		// Create and train classifier
+		EmailDataset trainEmailDS = EmailData.createTrainingDataset();
+		EmailClassifier spamFilter = new EmailClassifier(trainEmailDS, 10);
+		spamFilter.train();
+
+		// Let's classify some emails from training set. If we can't get them
+		// right
+		// then we are in trouble :-)
+		Email email = null;
+		email = trainEmailDS.findEmailById("biz-04.html");
+		spamFilter.classify(email);
+
+		email = trainEmailDS.findEmailById("usa-03.html");
+		spamFilter.classify(email);
+
+		// Now, let's classify previously unseen emails
+
+		EmailDataset testEmailDS = EmailData.createTestDataset();
+		email = testEmailDS.findEmailById("biz-01.html");
+		spamFilter.classify(email);
+
+		email = testEmailDS.findEmailById("sport-01.html");
+		spamFilter.classify(email);
+
+		email = testEmailDS.findEmailById("usa-01.html");
+		spamFilter.classify(email);
+
+		email = testEmailDS.findEmailById("world-01.html");
+		spamFilter.classify(email);
+
+		email = testEmailDS.findEmailById("spam-biz-01.html");
+		spamFilter.classify(email);
+
+	}
+
+	private static ProcessedDocument processHtmlDoc(String htmlFile) {
+
+		ProcessedDocument doc = null;
+		try {
+			HTMLDocumentParser htmlParser = new HTMLDocumentParser();
+			InputStream inputStream = new BufferedInputStream(
+					new FileInputStream(htmlFile));
+			Reader reader = new InputStreamReader(inputStream, "UTF-8");
+			doc = htmlParser.parse(reader);
+		} catch (Exception e) {
+			throw new RuntimeException("Failed to parse html from file: "
+					+ htmlFile, e);
+		}
+
+		return doc;
+	}
+}
diff --git a/src/org/yooreeka/examples/spamfilter/data/EmailDataset.java b/src/org/yooreeka/examples/spamfilter/data/EmailDataset.java
new file mode 100644
index 0000000..3382ce0
--- /dev/null
+++ b/src/org/yooreeka/examples/spamfilter/data/EmailDataset.java
@@ -0,0 +1,137 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.examples.spamfilter.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.algos.taxis.core.TrainingSet;
+import org.yooreeka.examples.spamfilter.EmailInstance;
+
+public class EmailDataset {
+
+	private Map<String, Email> emails;
+
+	// By default we set up an email dataset for binary classification
+	private boolean isBinary = true;
+
+	public EmailDataset(List<Email> emailList) {
+		this.emails = new HashMap<String, Email>(emailList.size());
+		for (Email e : emailList) {
+			emails.put(e.getId(), e);
+		}
+	}
+
+	private List<EmailInstance> createEmailInstances(int topNTerms) {
+		List<EmailInstance> allInstances = new ArrayList<EmailInstance>();
+		for (Email email : getEmails()) {
+			EmailInstance i = toEmailInstance(email, topNTerms);
+			allInstances.add(i);
+		}
+		return allInstances;
+	}
+
+	public Email findEmailById(String id) {
+		return emails.get(id);
+	}
+
+	private String getEmailCategory(Email email) {
+
+		if (isBinary()) {
+			if (email.getId().startsWith("spam-")) {
+				return "SPAM";
+			} else {
+				return "NOT SPAM";
+			}
+		} else {
+			// relying id to have pattern: "biz-???", "world-???", ...
+			String[] parts = email.getId().split("-");
+			if (parts.length < 2) {
+				throw new RuntimeException(
+						"Unsupported id format. Expected id format: '<catgory>-???'");
+			}
+			return parts[0].toUpperCase();
+		}
+	}
+
+	public List<Email> getEmails() {
+		return new ArrayList<Email>(emails.values());
+	}
+
+	public int getSize() {
+		return emails.size();
+	}
+
+	public TrainingSet getTrainingSet(int topNTerms) {
+		List<EmailInstance> allInstances = createEmailInstances(topNTerms);
+		EmailInstance[] instances = allInstances
+				.toArray(new EmailInstance[allInstances.size()]);
+		return new TrainingSet(instances);
+	}
+
+	/**
+	 * @return the isBinary
+	 */
+	public boolean isBinary() {
+		return isBinary;
+	}
+
+	public void printAll() {
+		for (Map.Entry<String, Email> e : emails.entrySet()) {
+			Email email = e.getValue();
+			System.out.println(email);
+		}
+	}
+
+	public void printEmail(String id) {
+		Email e = findEmailById(id);
+		if (e != null) {
+			System.out.println(e.toString());
+		} else {
+			System.out.println("Email not found (email id: '" + id + "')");
+		}
+	}
+
+	/**
+	 * @param isBinary
+	 *            the isBinary to set
+	 */
+	public void setBinary(boolean isBinary) {
+		this.isBinary = isBinary;
+	}
+
+	public EmailInstance toEmailInstance(Email email, int topNTerms) {
+		String emailCategory = getEmailCategory(email);
+		return new EmailInstance(emailCategory, email, topNTerms);
+	}
+}
diff --git a/src/org/yooreeka/util/C.java b/src/org/yooreeka/util/C.java
new file mode 100644
index 0000000..89c70da
--- /dev/null
+++ b/src/org/yooreeka/util/C.java
@@ -0,0 +1,64 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009    Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-2012 Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *
+ */
+package org.yooreeka.util;
+
+/**
+ * Various constants to avoid typing literals and values in the code.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ *
+ */
+public class C {
+
+	/*
+	 * NUMERICAL CONSTANTS
+	 */
+	public final static int  ZERO_INT = 0;
+	public final static long ZERO_LONG = 0;
+	public final static double ZERO_DOUBLE = 0.0;
+
+	public final static int  ONE_INT = 1;
+	public final static long ONE_LONG = 1;
+	public final static double ONE_DOUBLE = 1.0;
+
+	/*
+	 * LITERAL CONSTANTS
+	 */
+	public static final String EMPTY_STRING="";
+	public static final String LINE_FEED="\n";
+	public static final String UNDERSCORE="_";
+	public static final String DASH="-";
+	public static final String SEMICOLON=";";
+	public static final String COMMA=",";
+	public static final String COLON=":";
+	public static final String DOT=".";
+	
+}
diff --git a/src/org/yooreeka/util/P.java b/src/org/yooreeka/util/P.java
new file mode 100644
index 0000000..6a43ac1
--- /dev/null
+++ b/src/org/yooreeka/util/P.java
@@ -0,0 +1,57 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util;
+
+import java.nio.charset.Charset;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class P {
+
+	/**
+	 * Print a 54 character (-) horizontal line.
+	 */
+	public static void hline() {
+		println("---------- ---------- ---------- ---------- ---------- ----------");
+	}
+
+	public static void main(String[] args) {
+		println(Charset.defaultCharset().displayName());
+		println("" + P.class.getName());
+	}
+
+	public static void println(String s) {
+		System.out.println(s);
+	}
+}
diff --git a/src/org/yooreeka/util/gui/GraphGui.java b/src/org/yooreeka/util/gui/GraphGui.java
new file mode 100644
index 0000000..ad01801
--- /dev/null
+++ b/src/org/yooreeka/util/gui/GraphGui.java
@@ -0,0 +1,152 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.gui;
+
+import java.awt.Color;
+import java.awt.geom.Point2D;
+import java.awt.geom.Rectangle2D;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.swing.BorderFactory;
+import javax.swing.JFrame;
+import javax.swing.JScrollPane;
+import javax.swing.WindowConstants;
+
+import org.jgraph.JGraph;
+import org.jgraph.graph.DefaultCellViewFactory;
+import org.jgraph.graph.DefaultEdge;
+import org.jgraph.graph.DefaultGraphCell;
+import org.jgraph.graph.DefaultGraphModel;
+import org.jgraph.graph.GraphConstants;
+import org.jgraph.graph.GraphLayoutCache;
+import org.jgraph.graph.GraphModel;
+
+public class GraphGui {
+
+	private double nodeWidth = 500;
+	private double nodeHeight = 20;
+
+	private JGraph graph = null;
+
+	private Map<String, DefaultGraphCell> nodeCells = new HashMap<String, DefaultGraphCell>();
+	private List<DefaultGraphCell> edgeCells = new ArrayList<DefaultGraphCell>();
+
+	public GraphGui() {
+		createGraph();
+	}
+
+	public void addEdge(String sourceNodeName, String targetNodeName) {
+		DefaultGraphCell sourceNodeCell = getNodeForEdge(sourceNodeName);
+		DefaultGraphCell targetNodeCell = getNodeForEdge(targetNodeName);
+		DefaultGraphCell edgeCell = createEdge(sourceNodeCell, targetNodeCell);
+		edgeCells.add(edgeCell);
+	}
+
+	public void addNode(String name, String extraText, double x, double y) {
+		String nodeLabel = name;
+		if (extraText != null) {
+			nodeLabel += " (" + extraText + ")";
+		}
+		DefaultGraphCell nodeCell = createCell(nodeLabel, x, y);
+		nodeCells.put(name, nodeCell);
+	}
+
+	private DefaultGraphCell createCell(String name, double x, double y) {
+		DefaultGraphCell cell = new DefaultGraphCell(name);
+		GraphConstants.setBounds(cell.getAttributes(), new Rectangle2D.Double(
+				x, y, nodeWidth, nodeHeight));
+		GraphConstants.setBorder(cell.getAttributes(),
+				BorderFactory.createRaisedBevelBorder());
+		GraphConstants.setOpaque(cell.getAttributes(), true);
+		GraphConstants.setGradientColor(cell.getAttributes(), Color.orange);
+		cell.addPort(new Point2D.Double(0, 0));
+		return cell;
+	}
+
+	private DefaultGraphCell createEdge(DefaultGraphCell source,
+			DefaultGraphCell target) {
+		DefaultEdge edge = new DefaultEdge();
+		source.addPort();
+		edge.setSource(source.getChildAt(source.getChildCount() - 1));
+		target.addPort();
+		edge.setTarget(target.getChildAt(target.getChildCount() - 1));
+		GraphConstants.setLabelAlongEdge(edge.getAttributes(), true);
+		GraphConstants.setLineEnd(edge.getAttributes(),
+				GraphConstants.ARROW_CLASSIC);
+		// GraphConstants.setRouting(edge.getAttributes(),
+		// GraphConstants.ROUTING_DEFAULT);
+		// GraphConstants.setRouting(edge.getAttributes(),
+		// GraphConstants.ROUTING_SIMPLE);
+		return edge;
+	}
+
+	private void createGraph() {
+		GraphModel model = new DefaultGraphModel();
+		GraphLayoutCache view = new GraphLayoutCache(model,
+				new DefaultCellViewFactory());
+		graph = new JGraph(model, view);
+	}
+
+	private DefaultGraphCell getNodeForEdge(String nodeName) {
+		DefaultGraphCell nodeCell = nodeCells.get(nodeName);
+		if (nodeCell == null) {
+			throw new RuntimeException("Node doesn't exist " + "(nodeName="
+					+ nodeName + ").");
+		}
+		return nodeCell;
+	}
+
+	private void insertAllCells() {
+		List<DefaultGraphCell> allCells = new ArrayList<DefaultGraphCell>();
+		allCells.addAll(nodeCells.values());
+		allCells.addAll(edgeCells);
+
+		DefaultGraphCell[] cells = allCells
+				.toArray(new DefaultGraphCell[nodeCells.size()]);
+
+		graph.getGraphLayoutCache().insert(cells);
+		graph.setEditable(false);
+	}
+
+	public void showGraph() {
+		insertAllCells();
+		JFrame frame = new JFrame();
+		frame.getContentPane().add(new JScrollPane(graph));
+		// frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+		frame.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE);
+		frame.pack();
+		frame.setVisible(true);
+	}
+
+}
diff --git a/src/org/yooreeka/util/gui/XyGui.java b/src/org/yooreeka/util/gui/XyGui.java
new file mode 100644
index 0000000..e5a6ae6
--- /dev/null
+++ b/src/org/yooreeka/util/gui/XyGui.java
@@ -0,0 +1,203 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.gui;
+
+import java.awt.event.WindowEvent;
+
+import org.jfree.chart.ChartFactory;
+import org.jfree.chart.ChartPanel;
+import org.jfree.chart.JFreeChart;
+import org.jfree.chart.plot.PlotOrientation;
+import org.jfree.chart.util.ApplicationFrame;
+import org.jfree.chart.util.RefineryUtilities;
+import org.jfree.data.category.DefaultCategoryDataset;
+import org.jfree.data.xy.XYSeries;
+import org.jfree.data.xy.XYSeriesCollection;
+
+/**
+ * 
+ * This is going to be a convenience class for doing basic XY plots. here is how
+ * it would be used within the Bean Shell interpreter:
+ * 
+ * <quote> bsh % double[] x = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; bsh
+ * % double[] y = {1.0, 4.0, 9.0, 16.0, 20.0, 29.0, 35, 40., 42.0}; bsh % gui =
+ * new iweb2.util.gui.XyGui ("A plot",x,y); bsh % gui.plot(); </quote>
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class XyGui extends ApplicationFrame {
+
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 2878334413514645876L;
+
+	private StringBuilder errMsg;
+	private int loopInt;
+
+	public XyGui(String title, double[] x, double[] y) {
+
+		super(title);
+
+		errMsg = new StringBuilder();
+		setLoopInt(x.length);
+
+		if (checkX(x) && checkY(x.length, y)) {
+
+			XYSeries xydata = new XYSeries("X-Y Plot");
+
+			for (int i = 0; i < loopInt; i++) {
+				xydata.add(x[i], y[i]);
+			}
+
+			XYSeriesCollection xycollection = new XYSeriesCollection(xydata);
+
+			final JFreeChart chart = ChartFactory.createXYLineChart(
+					"XY Series", "X", "Y", xycollection,
+					PlotOrientation.VERTICAL, true, true, false);
+
+			final ChartPanel chartPanel = new ChartPanel(chart);
+			chartPanel.setPreferredSize(new java.awt.Dimension(500, 270));
+			setContentPane(chartPanel);
+		} else {
+			System.err.println(errMsg.toString());
+		}
+	}
+
+	/**
+	 * @param title
+	 *            chart title
+	 * @param nameForData1
+	 *            identifier for a data group/series
+	 * @param nameForData2
+	 *            identifier for a data group/series
+	 * @param items
+	 *            values/categories that correspond to data values
+	 */
+	public XyGui(String title, String nameForData1, String nameForData2,
+			String[] items, double[] data1, double[] data2) {
+
+		super(title);
+		DefaultCategoryDataset dataset = new DefaultCategoryDataset();
+		for (int i = 0, n = items.length; i < n; i++) {
+			dataset.addValue(data1[i], nameForData1, items[i]);
+			dataset.addValue(data2[i], nameForData2, items[i]);
+		}
+
+		final JFreeChart chart = ChartFactory.createLineChart(
+				"User Similarity", "Items", "Rating", dataset,
+				PlotOrientation.VERTICAL, true, true, false);
+
+		final ChartPanel chartPanel = new ChartPanel(chart);
+		chartPanel.setPreferredSize(new java.awt.Dimension(500, 270));
+		setContentPane(chartPanel);
+	}
+
+	private boolean checkX(double[] val) {
+
+		boolean isOK = true;
+
+		if (val == null || val.length <= 0) {
+
+			errMsg.append("The array of data for the X-axis is null or does not contain data!");
+			isOK = false;
+		}
+
+		return isOK;
+	}
+
+	private boolean checkY(int n, double[] val) {
+
+		boolean isOK = true;
+
+		if (val == null || val.length <= 0) {
+			errMsg.append("---------------------------------------------------------------------\n");
+			errMsg.append("ERROR:\n");
+			errMsg.append("The array of data for the Y-axis is null or does not contain data!");
+			errMsg.append("---------------------------------------------------------------------\n");
+			isOK = false;
+		}
+
+		if (val.length > n) {
+
+			errMsg.append("---------------------------------------------------------------------\n");
+			errMsg.append("WARNING: \n");
+			errMsg.append("     The length of the array for the Y-axis data is greater than \n");
+			errMsg.append(" the length of the array for the X-axis data. \n");
+			errMsg.append(" Only the first " + n
+					+ " points will be considered in the plot.");
+			errMsg.append("---------------------------------------------------------------------\n");
+
+		} else if (val.length < n) {
+
+			errMsg.append("---------------------------------------------------------------------\n");
+			errMsg.append("WARNING:\n");
+			errMsg.append("     The length of the array for the Y-axis data is less than \n");
+			errMsg.append(" the length of the array for the X-axis data. \n");
+			errMsg.append(" Only the first " + n
+					+ " points of the X-will be considered in the plot.");
+			errMsg.append("---------------------------------------------------------------------\n");
+			setLoopInt(val.length);
+		}
+
+		return isOK;
+	}
+
+	public void plot() {
+		this.pack();
+		RefineryUtilities.centerFrameOnScreen(this);
+		this.setVisible(true);
+	}
+
+	private void setLoopInt(int val) {
+		loopInt = val;
+	}
+
+	/**
+	 * Listens for the main window closing, and shuts down the application.
+	 * 
+	 * @param event
+	 *            information about the window event.
+	 */
+	@Override
+	public void windowClosing(WindowEvent event) {
+		if (event.getWindow() == this) {
+			dispose();
+
+			// Overriding the ApplicationFrame behavior
+			// Do not shutdown the JVM
+			// System.exit(0);
+			// -----------------------------------------
+		}
+	}
+
+}
diff --git a/src/org/yooreeka/util/internet/behavior/UserClick.java b/src/org/yooreeka/util/internet/behavior/UserClick.java
new file mode 100644
index 0000000..66311d4
--- /dev/null
+++ b/src/org/yooreeka/util/internet/behavior/UserClick.java
@@ -0,0 +1,157 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.behavior;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.yooreeka.algos.taxis.core.BaseConcept;
+import org.yooreeka.algos.taxis.core.BaseInstance;
+import org.yooreeka.algos.taxis.core.StringAttribute;
+
+/**
+ * Auxiliary class that captures a user click.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class UserClick extends BaseInstance {
+
+	UserQuery userQuery;
+	String url;
+
+	public UserClick() {
+		super();
+	}
+
+	public UserClick(UserQuery uQ, String url) {
+
+		super();
+
+		userQuery = uQ;
+		this.setConcept(new BaseConcept(url));
+
+		attributes = new StringAttribute[userQuery.getQueryTerms().length + 1];
+
+		attributes[0] = new StringAttribute("UserName", userQuery.getUid());
+
+		int j = 1;
+		for (String s : uQ.getQueryTerms()) {
+			attributes[j] = new StringAttribute("QueryTerm_" + j, s);
+			j++;
+		}
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final UserClick other = (UserClick) obj;
+		if (getUrl() == null) {
+			if (other.getUrl() != null)
+				return false;
+		} else if (!getUrl().equals(other.getUrl()))
+			return false;
+		if (userQuery == null) {
+			if (other.userQuery != null)
+				return false;
+		} else if (!userQuery.equals(other.userQuery))
+			return false;
+		return true;
+	}
+
+	/**
+	 * The concept of a user click is its URL
+	 * 
+	 * @return the url
+	 */
+	public String getUrl() {
+		return getConcept().getName();
+	}
+
+	/**
+	 * @return the userQuery
+	 */
+	public UserQuery getUserQuery() {
+		return userQuery;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result
+				+ ((getUrl() == null) ? 0 : getUrl().hashCode());
+		result = prime * result
+				+ ((userQuery == null) ? 0 : userQuery.hashCode());
+		return result;
+	}
+
+	@Override
+	public UserClick[] load(BufferedReader bR) throws IOException {
+
+		ArrayList<UserClick> userClicks = new ArrayList<UserClick>();
+
+		String line;
+		boolean hasMoreLines = true;
+
+		while (hasMoreLines) {
+
+			line = bR.readLine();
+
+			if (line == null) {
+
+				hasMoreLines = false;
+
+			} else {
+
+				String[] data = line.split(",");
+
+				UserQuery uQ = new UserQuery(data[0], data[1]);
+
+				UserClick userClick = new UserClick(uQ, data[2].substring(1,
+						data[2].length() - 1));
+
+				userClick.print();
+
+				userClicks.add(userClick);
+			}
+		}
+
+		return userClicks.toArray(new UserClick[userClicks.size()]);
+	}
+
+}
diff --git a/src/org/yooreeka/util/internet/behavior/UserQuery.java b/src/org/yooreeka/util/internet/behavior/UserQuery.java
new file mode 100644
index 0000000..1a974f9
--- /dev/null
+++ b/src/org/yooreeka/util/internet/behavior/UserQuery.java
@@ -0,0 +1,159 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.behavior;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+
+/**
+ * This is a class that encapsulates a personalized query
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class UserQuery {
+
+	private String uid;
+	private String queryString;
+	private String[] queryTerms;
+	private Query query;
+
+	public UserQuery(String uid, String q) throws IOException {
+
+		setUid(uid);
+		setQueryString(q);
+
+		PhraseQuery query = new PhraseQuery();
+		query.add(new Term("content", q));
+
+		Term[] terms = query.getTerms();
+		queryTerms = new String[terms.length];
+
+		for (int i = 0; i < terms.length; i++) {
+
+			queryTerms[i] = terms[i].text();
+		}
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final UserQuery other = (UserQuery) obj;
+		if (queryString == null) {
+			if (other.queryString != null)
+				return false;
+		} else if (!queryString.equals(other.queryString))
+			return false;
+		if (!Arrays.equals(queryTerms, other.queryTerms))
+			return false;
+		if (uid == null) {
+			if (other.uid != null)
+				return false;
+		} else if (!uid.equals(other.uid))
+			return false;
+		return true;
+	}
+
+	public String getName() {
+		return UserQuery.class.getCanonicalName();
+	}
+
+	public Query getQuery() {
+		return query;
+	}
+
+	/**
+	 * @return the query
+	 */
+	public String getQueryString() {
+		return queryString;
+	}
+
+	/**
+	 * @return the queryTerms
+	 */
+	public String[] getQueryTerms() {
+		return queryTerms;
+	}
+
+	/**
+	 * @return the uid
+	 */
+	public String getUid() {
+		return uid;
+	}
+
+	public UserQuery getValue() {
+
+		return this;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result
+				+ ((queryString == null) ? 0 : queryString.hashCode());
+		result = prime * result + Arrays.hashCode(queryTerms);
+		result = prime * result + ((uid == null) ? 0 : uid.hashCode());
+		return result;
+	}
+
+	public void setQuery(Query query) {
+		this.query = query;
+	}
+
+	/**
+	 * @param query
+	 *            the query to set
+	 */
+	public void setQueryString(String query) {
+		this.queryString = query;
+	}
+
+	/**
+	 * @param uid
+	 *            the uid to set
+	 */
+	public void setUid(String uid) {
+		this.uid = uid;
+	}
+
+}
diff --git a/src/org/yooreeka/util/internet/crawling/FetchAndProcessCrawler.java b/src/org/yooreeka/util/internet/crawling/FetchAndProcessCrawler.java
new file mode 100644
index 0000000..0342962
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/FetchAndProcessCrawler.java
@@ -0,0 +1,310 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.internet.crawling.core.BasicWebCrawler;
+import org.yooreeka.util.internet.crawling.core.CrawlData;
+import org.yooreeka.util.internet.crawling.core.URLFilter;
+import org.yooreeka.util.internet.crawling.core.URLNormalizer;
+
+public class FetchAndProcessCrawler {
+
+	public static final int DEFAULT_MAX_DEPTH = 3;
+	public static final int DEFAULT_MAX_DOCS = 1000;
+
+	// INSTANCE VARIABLES
+	// A reference to the crawler
+	BasicWebCrawler webCrawler;
+
+	// The location where we will store the fetched data
+	String rootDir;
+
+	// Total number of crawlers
+	int numberOfCrawlers = 4;
+
+	// total number of iterations
+	int maxDepth = DEFAULT_MAX_DEPTH;
+
+	// max number of pages that will be fetched within every crawl/iteration.
+	int maxDocs = DEFAULT_MAX_DOCS;
+
+	List<String> seedUrls;
+
+	URLFilter urlFilter;
+
+	public FetchAndProcessCrawler(String dir, int maxDepth, int maxDocs) {
+
+		rootDir = dir;
+
+		// If the root directory is not set or if its length is zero
+		if (rootDir == null || rootDir.trim().length() == 0) {
+
+			// Create a default location for storing the data, relative to the
+			// IWEB2_HOME location
+			rootDir = System.getProperty("iweb2.home")
+					+ System.getProperty("file.separator") + "data";
+		}
+
+		rootDir = rootDir + System.getProperty("file.separator") + "crawl-"
+				+ System.currentTimeMillis();
+
+		this.maxDepth = maxDepth;
+
+		this.maxDocs = maxDocs;
+
+		this.seedUrls = new ArrayList<String>();
+
+		/* default url filter configuration */
+		this.urlFilter = new URLFilter();
+		urlFilter.setAllowFileUrls(true);
+		urlFilter.setAllowHttpUrls(true);
+
+		webCrawler = new BasicWebCrawler(rootDir);
+
+	}
+
+	public void addDocSpam() {
+
+		String iWeb2Home = YooreekaConfigurator.getHome();
+
+		addUrl("file:///" + iWeb2Home + "/data/ch02/spam-biz-01.doc");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/spam-biz-02.doc");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/spam-biz-03.doc");
+	}
+
+	public void addUrl(String val) {
+		URLNormalizer urlNormalizer = new URLNormalizer();
+		seedUrls.add(urlNormalizer.normalizeUrl(val));
+	}
+
+	public CrawlData getCrawlData() {
+		return webCrawler.getCrawlData();
+	}
+
+	/**
+	 * @return the maxNumberOfCrawls
+	 */
+	public int getMaxNumberOfCrawls() {
+		return maxDepth;
+	}
+
+	/**
+	 * @return the maxNumberOfDocsPerCrawl
+	 */
+	public int getMaxNumberOfDocsPerCrawl() {
+		return maxDocs;
+	}
+
+	/**
+	 * @return the rootDir
+	 */
+	public String getRootDir() {
+		return rootDir;
+	}
+
+	public List<String> getSeedUrls() {
+
+		return seedUrls;
+	}
+
+	public void run() {
+
+		webCrawler.addSeedUrls(getSeedUrls());
+
+		webCrawler.setURLFilter(urlFilter);
+
+		long t0 = System.currentTimeMillis();
+
+		/* run crawl */
+		webCrawler.fetchAndProcess(maxDepth, maxDocs);
+
+		System.out.println("Timer (s): [Crawler processed data] --> "
+				+ (System.currentTimeMillis() - t0) * 0.001);
+
+	}
+
+	public void setAllUrls() {
+
+		setDefaultUrls();
+
+		String iWeb2Home = YooreekaConfigurator.getHome();
+
+		// Include the spam pages ... all of them!
+		addUrl("file:///" + iWeb2Home + "/data/ch02/spam-01.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/spam-biz-01.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/spam-biz-02.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/spam-biz-03.html");
+	}
+
+	public void setDefaultUrls() {
+
+		String iWeb2Home = YooreekaConfigurator.getHome();
+
+		addUrl("file:///" + iWeb2Home + "/data/ch02/biz-01.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/biz-02.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/biz-03.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/biz-04.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/biz-05.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/biz-06.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/biz-07.html");
+
+		addUrl("file:///" + iWeb2Home + "/data/ch02/sport-01.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/sport-02.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/sport-03.html");
+
+		addUrl("file:///" + iWeb2Home + "/data/ch02/usa-01.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/usa-02.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/usa-03.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/usa-04.html");
+
+		addUrl("file:///" + iWeb2Home + "/data/ch02/world-01.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/world-02.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/world-03.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/world-04.html");
+		addUrl("file:///" + iWeb2Home + "/data/ch02/world-05.html");
+
+		setFilesOnlyUrlFilter();
+	}
+
+	private void setFilesOnlyUrlFilter() {
+		/* configure url filter to accept only file:// urls */
+		URLFilter urlFilter = new URLFilter();
+		urlFilter.setAllowFileUrls(true);
+		urlFilter.setAllowHttpUrls(false);
+		setUrlFilter(urlFilter);
+	}
+
+	/**
+	 * @param maxNumberOfCrawls
+	 *            the maxNumberOfCrawls to set
+	 */
+	public void setMaxNumberOfCrawls(int maxNumberOfCrawls) {
+		this.maxDepth = maxNumberOfCrawls;
+	}
+
+	/**
+	 * @param maxNumberOfDocsPerCrawl
+	 *            the maxNumberOfDocsPerCrawl to set
+	 */
+	public void setMaxNumberOfDocsPerCrawl(int maxNumberOfDocsPerCrawl) {
+		this.maxDocs = maxNumberOfDocsPerCrawl;
+	}
+
+	/**
+	 * @param rootDir
+	 *            the rootDir to set
+	 */
+	public void setRootDir(String rootDir) {
+		this.rootDir = rootDir;
+	}
+
+	public void setUrlFilter(URLFilter urlFilter) {
+		this.urlFilter = urlFilter;
+	}
+
+	public void setUrls(String val) {
+
+		String iWeb2Home = YooreekaConfigurator.getHome();
+
+		setFilesOnlyUrlFilter();
+
+		this.seedUrls.clear();
+
+		if (val.equalsIgnoreCase("biz")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-01.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-02.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-03.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-04.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-05.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-06.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-07.html");
+
+		} else if (val.equalsIgnoreCase("sport")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/sport-01.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/sport-02.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/sport-03.html");
+
+		} else if (val.equalsIgnoreCase("usa")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-01.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-02.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-03.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-04.html");
+
+		} else if (val.equalsIgnoreCase("world")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-01.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-02.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-03.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-04.html");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-05.html");
+		} else if (val.equalsIgnoreCase("biz-docs")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-01.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-02.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-03.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-04.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-05.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-06.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/biz-07.doc");
+
+		} else if (val.equalsIgnoreCase("sport-docs")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/sport-01.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/sport-02.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/sport-03.doc");
+
+		} else if (val.equalsIgnoreCase("usa-docs")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-01.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-02.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-03.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/usa-04.doc");
+
+		} else if (val.equalsIgnoreCase("world-docs")) {
+
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-01.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-02.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-03.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-04.doc");
+			addUrl("file:///" + iWeb2Home + "/data/ch02/world-05.doc");
+		} else {
+			throw new IllegalArgumentException("Unknown value: '" + val + "'");
+		}
+
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/YCrawler.java b/src/org/yooreeka/util/internet/crawling/YCrawler.java
new file mode 100644
index 0000000..58f062a
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/YCrawler.java
@@ -0,0 +1,197 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import edu.uci.ics.crawler4j.crawler.CrawlConfig;
+import edu.uci.ics.crawler4j.crawler.CrawlController;
+import edu.uci.ics.crawler4j.crawler.Page;
+import edu.uci.ics.crawler4j.crawler.WebCrawler;
+import edu.uci.ics.crawler4j.fetcher.PageFetcher;
+import edu.uci.ics.crawler4j.parser.HtmlParseData;
+import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
+import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
+import edu.uci.ics.crawler4j.url.WebURL;
+
+/**
+ * A general crawler based on the Crawler4J library.
+ * 
+ * {@link http://code.google.com/p/crawler4j/}
+ * 
+ */
+public class YCrawler extends WebCrawler {
+
+	// PUBLIC STATIC CONSTANTS
+	public final static int CONNECTION_TIMEOUT = 5000;
+
+	// PRIVATE STATIC CONSTANTS
+	private final static Pattern FILTERS = Pattern
+			.compile(".*(\\.(css|js|bmp|gif|jpe?g"
+					+ "|png|tiff?|mid|mp2|mp3|mp4"
+					+ "|wav|avi|mov|mpeg|ram|m4v"
+					+ "|rm|smil|wmv|swf|wma|zip|rar|gz))$");
+
+	// INSTANCE VARIABLES
+
+	public static void main(String[] args) throws Exception {
+
+		YCrawler crawler = new YCrawler();
+
+		// To change the root dir you can invoke setRootDir() here
+		// before the setup()
+		CrawlController controller = crawler.setup();
+
+		/*
+		 * Start the crawl. This is a blocking operation, meaning that your code
+		 * will reach the line after this only when crawling is finished.
+		 */
+		controller.start(YCrawler.class, crawler.getNumberOfCrawlers());
+	}
+
+	/**
+	 * The location where we will store the fetched data. Note that this is a
+	 * location for all the crawls of this class. If you would like to change it
+	 * use the <tt>setRootDir()</tt> method.
+	 */
+	private String rootDir;
+
+	private int numberOfCrawlers = 5;
+
+	private int connectionTimeout = CONNECTION_TIMEOUT;
+
+	private int getNumberOfCrawlers() {
+		return numberOfCrawlers;
+	}
+
+	private String getRootDir() {
+
+		// If the root directory is not set or if its length is zero
+		if (rootDir == null || rootDir.trim().length() == 0) {
+
+			// Create a default location for storing the data, relative to the
+			// IWEB2_HOME location
+			rootDir = System.getProperty("iweb2.home")
+					+ System.getProperty("file.separator") + "data";
+		}
+
+		rootDir = rootDir + System.getProperty("file.separator") + "crawl-"
+				+ System.currentTimeMillis();
+
+		return rootDir;
+	}
+
+	public void setNumberOfCrawlers(int numberOfCrawlers) {
+		this.numberOfCrawlers = numberOfCrawlers;
+	}
+
+	public void setRootDir(String rootDir) {
+		this.rootDir = rootDir;
+	}
+
+	private CrawlController setup() {
+
+		CrawlConfig crawlConfiguration = new CrawlConfig();
+		crawlConfiguration.setConnectionTimeout(connectionTimeout);
+		crawlConfiguration.setCrawlStorageFolder(getRootDir());
+		crawlConfiguration.setFollowRedirects(true);
+		crawlConfiguration.setIncludeBinaryContentInCrawling(true);
+		crawlConfiguration.setIncludeHttpsPages(true);
+
+		// The default value is 100
+		crawlConfiguration.setMaxConnectionsPerHost(32);
+
+		// Try 32 Mb; the default is 1 Mb
+		crawlConfiguration.setMaxDownloadSize(32 * 1024 * 1024);
+
+		// LIMIT THE MAX NUMBER OF PAGES!!!
+		// Unless you know what you are doing, technically and business wise ...
+		crawlConfiguration.setMaxPagesToFetch(64);
+
+		PageFetcher pageFetcher = new PageFetcher(crawlConfiguration);
+
+		RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
+
+		RobotstxtServer robotsTextServer = new RobotstxtServer(robotstxtConfig,
+				pageFetcher);
+
+		CrawlController controller = null;
+		try {
+			controller = new CrawlController(crawlConfiguration, pageFetcher,
+					robotsTextServer);
+		} catch (Exception e) {
+			// TODO proper logging ...
+			e.printStackTrace();
+		}
+
+		/*
+		 * For each crawl, you need to add some seed urls. These are the first
+		 * URLs that are fetched and then the crawler starts following links
+		 * which are found in these pages
+		 */
+		controller.addSeed("http://arxiv.org/");
+
+		return controller;
+	}
+
+	/**
+	 * You should implement this function to specify whether the given url
+	 * should be crawled or not (based on your crawling logic).
+	 */
+	@Override
+	public boolean shouldVisit(WebURL url) {
+		String href = url.getURL().toLowerCase();
+		return !FILTERS.matcher(href).matches()
+				&& href.startsWith("http://www.ics.uci.edu/");
+	}
+
+	/**
+	 * This function is called when a page is fetched and ready to be processed
+	 * by your program.
+	 */
+	@Override
+	public void visit(Page page) {
+		String url = page.getWebURL().getURL();
+		System.out.println("URL: " + url);
+
+		if (page.getParseData() instanceof HtmlParseData) {
+			HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
+			String text = htmlParseData.getText();
+			String html = htmlParseData.getHtml();
+			List<WebURL> links = htmlParseData.getOutgoingUrls();
+
+			System.out.println("Text length: " + text.length());
+			System.out.println("Html length: " + html.length());
+			System.out.println("Number of outgoing links: " + links.size());
+		}
+	}
+}
\ No newline at end of file
diff --git a/src/org/yooreeka/util/internet/crawling/core/BasicWebCrawler.java b/src/org/yooreeka/util/internet/crawling/core/BasicWebCrawler.java
new file mode 100644
index 0000000..b320b62
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/core/BasicWebCrawler.java
@@ -0,0 +1,332 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.core;
+
+import java.util.List;
+
+import org.yooreeka.util.P;
+import org.yooreeka.util.internet.crawling.db.FetchedDocsDB;
+import org.yooreeka.util.internet.crawling.db.KnownUrlDB;
+import org.yooreeka.util.internet.crawling.db.ProcessedDocsDB;
+import org.yooreeka.util.internet.crawling.model.FetchedDocument;
+import org.yooreeka.util.internet.crawling.model.KnownUrlEntry;
+import org.yooreeka.util.internet.crawling.model.Outlink;
+import org.yooreeka.util.internet.crawling.transport.common.Transport;
+import org.yooreeka.util.internet.crawling.transport.file.FileTransport;
+import org.yooreeka.util.internet.crawling.transport.http.HTTPTransport;
+import org.yooreeka.util.internet.crawling.util.DocumentIdUtils;
+import org.yooreeka.util.internet.crawling.util.UrlGroup;
+import org.yooreeka.util.internet.crawling.util.UrlUtils;
+import org.yooreeka.util.parsing.common.AbstractDocument;
+import org.yooreeka.util.parsing.common.DocumentParser;
+import org.yooreeka.util.parsing.common.DocumentParserFactory;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+public class BasicWebCrawler {
+
+	private CrawlData crawlData;
+
+	private URLFilter urlFilter;
+
+	private static final int DEFAULT_MAX_BATCH_SIZE = 50;
+
+	private long DEFAULT_PAUSE_IN_MILLIS = 500;
+	private long pauseBetweenFetchesInMillis = DEFAULT_PAUSE_IN_MILLIS;
+
+	/*
+	 * Number of URLs to fetch and parse at a time.
+	 */
+	private int maxBatchSize = DEFAULT_MAX_BATCH_SIZE;
+
+	/*
+	 * Number of fetched and parsed URLs so far.
+	 */
+	private int processedUrlCount = 0;
+
+	public BasicWebCrawler(String rootDir) {
+		crawlData = new CrawlData(rootDir);
+	}
+
+	public void addSeedUrls(List<String> seedUrls) {
+		int seedUrlDepth = 0;
+		KnownUrlDB knownUrlsDB = crawlData.getKnownUrlsDB();
+		for (String url : seedUrls) {
+			knownUrlsDB.addNewUrl(url, seedUrlDepth);
+		}
+	}
+
+	public void fetchAndProcess(int maxDepth, int maxDocs) {
+
+		boolean maxUrlsLimitReached = false;
+		int documentGroup = 1;
+
+		crawlData.init();
+
+		if (maxBatchSize <= 0) {
+			throw new RuntimeException("Invalid value for maxBatchSize = "
+					+ maxBatchSize);
+		}
+
+		for (int depth = 0; depth < maxDepth; depth++) {
+
+			int urlsProcessedAtThisDepth = 0;
+
+			boolean noMoreUrlsAtThisDepth = false;
+
+			while (maxUrlsLimitReached == false
+					&& noMoreUrlsAtThisDepth == false) {
+
+				System.out.println("Starting url group: " + documentGroup
+						+ ", current depth: " + depth + ", total known urls: "
+						+ crawlData.getKnownUrlsDB().getTotalUrlCount()
+						+ ", maxDepth: " + maxDepth + ", maxDocs: " + maxDocs
+						+ ", maxDocs per group: " + maxBatchSize
+						+ ", pause between docs: "
+						+ pauseBetweenFetchesInMillis + "(ms)");
+
+				List<String> urlsToProcess = selectNextBatchOfUrlsToCrawl(
+						maxBatchSize, depth);
+
+				/* for batch of urls create a separate document group */
+				String currentGroupId = String.valueOf(documentGroup);
+				fetchPages(urlsToProcess, crawlData.getFetchedDocsDB(),
+						currentGroupId);
+
+				// process downloaded data
+				processPages(currentGroupId, crawlData.getProcessedDocsDB(),
+						crawlData.getFetchedDocsDB());
+
+				// get processed doc, get links, add links to all-known-urls.dat
+				processLinks(currentGroupId, depth + 1,
+						crawlData.getProcessedDocsDB());
+
+				int lastProcessedBatchSize = urlsToProcess.size();
+				processedUrlCount += lastProcessedBatchSize;
+				urlsProcessedAtThisDepth += lastProcessedBatchSize;
+
+				System.out.println("Finished url group: " + documentGroup
+						+ ", urls processed in this group: "
+						+ lastProcessedBatchSize + ", current depth: " + depth
+						+ ", total urls processed: " + processedUrlCount);
+
+				documentGroup += 1;
+
+				if (processedUrlCount >= maxDocs) {
+					maxUrlsLimitReached = true;
+				}
+
+				if (lastProcessedBatchSize == 0) {
+					noMoreUrlsAtThisDepth = true;
+				}
+			}
+
+			if (urlsProcessedAtThisDepth == 0) {
+				break;
+			}
+
+			if (maxUrlsLimitReached) {
+				break;
+			}
+
+		}
+	}
+
+	private void fetchPages(List<String> urls, FetchedDocsDB fetchedDocsDB,
+			String groupId) {
+		DocumentIdUtils docIdUtils = new DocumentIdUtils();
+		int docSequenceInGroup = 1;
+		List<UrlGroup> urlGroups = UrlUtils.groupByProtocolAndHost(urls);
+		for (UrlGroup urlGroup : urlGroups) {
+			Transport t = getTransport(urlGroup.getProtocol());
+			try {
+				t.init();
+				for (String url : urlGroup.getUrls()) {
+					try {
+						FetchedDocument doc = t.fetch(url);
+						String documentId = docIdUtils.getDocumentId(groupId,
+								docSequenceInGroup);
+						doc.setDocumentId(documentId);
+						fetchedDocsDB.saveDocument(doc);
+						if (t.pauseRequired()) {
+							pause();
+						}
+					} catch (Exception e) {
+						System.out
+								.println("Failed to fetch document from url: '"
+										+ url + "'.\n" + e.getMessage());
+						crawlData.getKnownUrlsDB().updateUrlStatus(url,
+								KnownUrlEntry.STATUS_PROCESSED_ERROR);
+					}
+					docSequenceInGroup++;
+				}
+			} finally {
+				t.clear();
+			}
+		}
+	}
+
+	public CrawlData getCrawlData() {
+		return crawlData;
+	}
+
+	public long getPauseBetweenFetchesInMillis() {
+		return pauseBetweenFetchesInMillis;
+	}
+
+	private Transport getTransport(String protocol) {
+		if ("http".equalsIgnoreCase(protocol)) {
+			return new HTTPTransport();
+		} else if ("file".equalsIgnoreCase(protocol)) {
+			return new FileTransport();
+		} else {
+			throw new RuntimeException("Unsupported protocol: '" + protocol
+					+ "'.");
+		}
+	}
+
+	public URLFilter getURLFilter() {
+		return urlFilter;
+	}
+
+	public void pause() {
+		try {
+			Thread.sleep(pauseBetweenFetchesInMillis);
+		} catch (InterruptedException e) {
+			// do nothing
+		}
+	}
+
+	private void processLinks(String groupId, int currentDepth,
+			ProcessedDocsDB parsedDocs) {
+		URLNormalizer urlNormalizer = new URLNormalizer();
+		if (urlFilter == null) {
+			urlFilter = new URLFilter();
+			urlFilter.setAllowFileUrls(true);
+			urlFilter.setAllowHttpUrls(false);
+			System.out
+					.println("Using default URLFilter configuration that only accepts 'file://' urls");
+		}
+
+		List<String> docIds = parsedDocs.getDocumentIds(groupId);
+		for (String documentId : docIds) {
+			ProcessedDocument doc = parsedDocs.loadDocument(documentId);
+			// register url without any outlinks first
+			crawlData.getPageLinkDB().addLink(doc.getDocumentURL());
+			List<Outlink> outlinks = doc.getOutlinks();
+			for (Outlink outlink : outlinks) {
+				String url = outlink.getLinkUrl();
+				String normalizedUrl = urlNormalizer.normalizeUrl(url);
+				if (urlFilter.accept(normalizedUrl)) {
+					crawlData.getKnownUrlsDB().addNewUrl(url, currentDepth);
+					crawlData.getPageLinkDB()
+							.addLink(doc.getDocumentURL(), url);
+				}
+			}
+		}
+		crawlData.getKnownUrlsDB().save();
+		crawlData.getPageLinkDB().save();
+	}
+
+	private void processPages(String groupId,
+			ProcessedDocsDB parsedDocsService, FetchedDocsDB fetchedDocsDB) {
+
+		List<String> docIds = fetchedDocsDB.getDocumentIds(groupId);
+
+		for (String id : docIds) {
+			AbstractDocument doc = null;
+			try {
+				doc = fetchedDocsDB.getDocument(id);
+				String url = doc.getDocumentURL();
+
+				String contentType = doc.getContentType();
+
+				DocumentParser docParser = DocumentParserFactory.getInstance()
+						.getDocumentParser(contentType);
+
+				// DEBBUG
+				P.println(docParser.toString());
+				P.println(doc.toString());
+
+				ProcessedDocument parsedDoc = docParser.parse(doc);
+
+				parsedDocsService.saveDocument(parsedDoc);
+
+				crawlData.getKnownUrlsDB().updateUrlStatus(url,
+						KnownUrlEntry.STATUS_PROCESSED_SUCCESS);
+
+			} catch (Exception e) {
+
+				if (doc != null) {
+
+					System.out.println("ERROR:\n");
+					System.out
+							.println("Unexpected exception while processing: '"
+									+ id + "', ");
+					System.out.println("   URL='" + doc.getDocumentURL()
+							+ "'\n");
+					System.out.println("Exception message: " + e.getMessage());
+
+				} else {
+					System.out.println("ERROR:\n");
+					System.out
+							.println("Unexpected exception while processing: '"
+									+ id + "', ");
+					System.out.println("Exception message: " + e.getMessage());
+				}
+			}
+		}
+	}
+
+	private List<String> selectNextBatchOfUrlsToCrawl(int maxBatchSize,
+			int depth) {
+		return crawlData.getKnownUrlsDB().findUnprocessedUrls(maxBatchSize,
+				depth);
+	}
+
+	/**
+	 * @deprecated use method that uses depth
+	 * 
+	 * @param maxDocs
+	 * @return
+	 */
+	@Deprecated
+	public List<String> selectURLsForNextCrawl(int maxDocs) {
+		return crawlData.getKnownUrlsDB().findUnprocessedUrls(maxDocs);
+	}
+
+	public void setPauseBetweenFetchesInMillis(long pauseBetweenFetchesInMillis) {
+		this.pauseBetweenFetchesInMillis = pauseBetweenFetchesInMillis;
+	}
+
+	public void setURLFilter(URLFilter urlFilter) {
+		this.urlFilter = urlFilter;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/core/CrawlData.java b/src/org/yooreeka/util/internet/crawling/core/CrawlData.java
new file mode 100644
index 0000000..c35ca61
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/core/CrawlData.java
@@ -0,0 +1,99 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.core;
+
+import java.io.File;
+
+import org.yooreeka.util.internet.crawling.db.FetchedDocsDB;
+import org.yooreeka.util.internet.crawling.db.KnownUrlDB;
+import org.yooreeka.util.internet.crawling.db.PageLinkDB;
+import org.yooreeka.util.internet.crawling.db.ProcessedDocsDB;
+
+public class CrawlData {
+
+	private File crawlRootDir;
+
+	private FetchedDocsDB fetchedDocsDB;
+	private ProcessedDocsDB processedDocsDB;
+	private KnownUrlDB knownUrlsDB;
+	private PageLinkDB pageLinkDB;
+
+	public CrawlData(String rootDir) {
+		this.crawlRootDir = new File(rootDir);
+		crawlRootDir.mkdirs();
+
+		File fetchedDocsDBRoot = new File(crawlRootDir, "fetched");
+		this.fetchedDocsDB = new FetchedDocsDB(fetchedDocsDBRoot);
+
+		File processedDocsDBRoot = new File(crawlRootDir, "processed");
+		this.processedDocsDB = new ProcessedDocsDB(processedDocsDBRoot);
+
+		File knownUrlsDBRoot = new File(crawlRootDir, "knownurls");
+		this.knownUrlsDB = new KnownUrlDB(knownUrlsDBRoot);
+
+		File pageLinkDBRoot = new File(crawlRootDir, "pagelinks");
+		this.pageLinkDB = new PageLinkDB(pageLinkDBRoot);
+	}
+
+	public void delete() {
+		this.fetchedDocsDB.delete();
+		this.processedDocsDB.delete();
+		this.knownUrlsDB.delete();
+		this.pageLinkDB.delete();
+	}
+
+	public File getCrawlRootDir() {
+		return crawlRootDir;
+	}
+
+	public FetchedDocsDB getFetchedDocsDB() {
+		return fetchedDocsDB;
+	}
+
+	public KnownUrlDB getKnownUrlsDB() {
+		return knownUrlsDB;
+	}
+
+	public PageLinkDB getPageLinkDB() {
+		return pageLinkDB;
+	}
+
+	public ProcessedDocsDB getProcessedDocsDB() {
+		return processedDocsDB;
+	}
+
+	public void init() {
+		this.fetchedDocsDB.init();
+		this.processedDocsDB.init();
+		this.knownUrlsDB.init();
+		this.pageLinkDB.init();
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/core/CrawlDataProcessor.java b/src/org/yooreeka/util/internet/crawling/core/CrawlDataProcessor.java
new file mode 100644
index 0000000..3ad785f
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/core/CrawlDataProcessor.java
@@ -0,0 +1,46 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.core;
+
+/**
+ * Represents module that performs processing based on crawling results.
+ * <p>
+ * Some module examples are:
+ * <ul>
+ * <li>Build Lucene index</li>
+ * <li>Build matrix H for html pages</li>
+ * <li>Build matrix H for documents</li>
+ * </ul>
+ * </p>
+ */
+public interface CrawlDataProcessor {
+	public void run();
+}
diff --git a/src/org/yooreeka/util/internet/crawling/core/DocumentFilter.java b/src/org/yooreeka/util/internet/crawling/core/DocumentFilter.java
new file mode 100644
index 0000000..23d252e
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/core/DocumentFilter.java
@@ -0,0 +1,44 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.core;
+
+import org.yooreeka.util.internet.crawling.model.FetchedDocument;
+
+public class DocumentFilter {
+
+	/*
+	 * Supposed to detect if we've already processed document with the same
+	 * content through some other url.
+	 */
+	public boolean duplicateContentExists(FetchedDocument doc) {
+		return false;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/core/URLFilter.java b/src/org/yooreeka/util/internet/crawling/core/URLFilter.java
new file mode 100644
index 0000000..5ed6ad0
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/core/URLFilter.java
@@ -0,0 +1,79 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.core;
+
+/**
+ * Performs url filtering before url is registered in 'known urls' database.
+ */
+public class URLFilter {
+
+	private boolean allowFileUrls = true;
+	private boolean allowHttpUrls = true;
+
+	public URLFilter() {
+		// empty
+	}
+
+	/**
+	 * Basic implementation of url filter. Only allows urls that start with
+	 * 'http:' and 'file:'
+	 * 
+	 * <p>
+	 * Other features that can be added are:
+	 * <ul>
+	 * <li>extract host from the url and check against robots.txt</li>
+	 * <li>check against the list of excluded urls</li>
+	 * <li>user defined criteria</li>
+	 * </ul>
+	 * </p>
+	 */
+	public boolean accept(String url) {
+		boolean acceptUrl = false;
+		if (allowFileUrls && url.startsWith("file:")) {
+			acceptUrl = true;
+		} else if (allowHttpUrls && url.startsWith("http:")) {
+			acceptUrl = true;
+		} else {
+			acceptUrl = false;
+			System.out.println("DEBUG: Filtered url: '" + url + "'");
+		}
+
+		return acceptUrl;
+	}
+
+	public void setAllowFileUrls(boolean flag) {
+		this.allowFileUrls = flag;
+	}
+
+	public void setAllowHttpUrls(boolean flag) {
+		this.allowHttpUrls = flag;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/core/URLNormalizer.java b/src/org/yooreeka/util/internet/crawling/core/URLNormalizer.java
new file mode 100644
index 0000000..eab2de9
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/core/URLNormalizer.java
@@ -0,0 +1,77 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.core;
+
+import java.net.URL;
+
+/**
+ * Performs url normalization.
+ */
+public class URLNormalizer {
+	public URLNormalizer() {
+		// empty
+	}
+
+	private String normalizeFileUrl(String fileUrl) {
+		try {
+			URL url = new URL(fileUrl);
+			return url.toExternalForm();
+		} catch (Exception e) {
+			throw new RuntimeException("URL Normalization error: ", e);
+		}
+	}
+
+	/**
+	 * Implementation that does nothing.
+	 * 
+	 * <p>
+	 * Other features that can be added are:
+	 * <ul>
+	 * <li>convert IP address into DNS name</li>
+	 * <li>lower-case DNS name</li>
+	 * <li>extract session id from the URL</li>
+	 * <li>process escape sequences</li>
+	 * <li>remove default port</li>
+	 * <li>remove fragment portion from the url</li>
+	 * <li>sort variables</li>
+	 * <li>...and a lot more</li>
+	 * </ul>
+	 * </p>
+	 * 
+	 */
+	public String normalizeUrl(String url) {
+		String normalizedUrl = url;
+		if (url.startsWith("file://")) {
+			normalizedUrl = normalizeFileUrl(url);
+		}
+		return normalizedUrl;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/db/FetchedDocsDB.java b/src/org/yooreeka/util/internet/crawling/db/FetchedDocsDB.java
new file mode 100644
index 0000000..b221ac3
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/db/FetchedDocsDB.java
@@ -0,0 +1,305 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.db;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.util.internet.crawling.model.FetchedDocument;
+import org.yooreeka.util.internet.crawling.util.DocumentIdUtils;
+import org.yooreeka.util.internet.crawling.util.FileUtils;
+
+public class FetchedDocsDB {
+
+	private File rootDirFile = null;
+	private Map<String, File> groupFiles = null;
+	private DocumentIdUtils docIdUtils = new DocumentIdUtils();
+
+	public FetchedDocsDB(File rootDirFile) {
+		this.rootDirFile = rootDirFile;
+	}
+
+	/*
+	 * Creates directories for a new group if they don't exist yet.
+	 */
+	private void createGroup(String groupId) {
+		File groupFile = groupFiles.get(groupId);
+		if (groupFile == null) {
+			groupFile = new File(rootDirFile, String.valueOf(groupId));
+			groupFile.mkdir();
+			groupFiles.put(groupFile.getName(), groupFile);
+		}
+	}
+
+	public void delete() {
+		FileUtils.deleteDir(rootDirFile);
+	}
+
+	private String geFetchedFilePropertiesExt() {
+		return ".meta";
+	}
+
+	public List<String> getAllGroupIds() {
+		List<String> groupIds = new ArrayList<String>(groupFiles.keySet());
+		Collections.sort(groupIds);
+		return groupIds;
+	}
+
+	private File getDataFile(String documentId) {
+		return getDocumentFile(documentId, getFetchedFileExt());
+	}
+
+	// document id contains the set encoded in it
+	public FetchedDocument getDocument(String documentId) {
+		File dataFile = getDataFile(documentId);
+		if (!dataFile.exists()) {
+			throw new RuntimeException("Document with id: '" + documentId
+					+ "' doesn't exist.");
+		}
+		FetchedDocument doc = new FetchedDocument();
+		doc.setDocumentId(documentId);
+
+		byte[] data = readData(dataFile);
+		doc.setDocumentContent(data);
+
+		File propsFile = getPropertiesFile(documentId);
+		if (!propsFile.exists()) {
+			throw new RuntimeException("Properties for document with id: '"
+					+ documentId + "' don't exist.");
+		}
+		readMetaData(propsFile, doc);
+
+		return doc;
+	}
+
+	private File getDocumentFile(String documentId, String ext) {
+		String groupId = docIdUtils.getDocumentGroupId(documentId);
+		File docDirFile = new File(rootDirFile, groupId);
+		String docFilename = docIdUtils.getDocumentSequence(documentId) + ext;
+		File docFile = new File(docDirFile, docFilename);
+		return docFile;
+	}
+
+	public List<String> getDocumentIds() {
+		List<String> documentIds = new ArrayList<String>();
+		for (File setFile : groupFiles.values()) {
+			documentIds.addAll(getDocumentIds(setFile));
+		}
+		return documentIds;
+	}
+
+	private List<String> getDocumentIds(File setFile) {
+		File[] dataFiles = setFile.listFiles(new FilenameFilter() {
+			String ext = getFetchedFileExt();
+
+			public boolean accept(File dir, String name) {
+				if (name.endsWith(ext)) {
+					return true;
+				} else {
+					return false;
+				}
+			}
+		});
+
+		List<String> documentIds = new ArrayList<String>();
+		String groupId = setFile.getName();
+		if (dataFiles != null) {
+			for (File f : dataFiles) {
+				String name = f.getName();
+				String itemId = name.substring(0, name.indexOf("."));
+				String documentId = docIdUtils.getDocumentId(groupId, itemId);
+				documentIds.add(documentId);
+			}
+		}
+		return documentIds;
+	}
+
+	public List<String> getDocumentIds(String groupId) {
+		return getDocumentIds(new File(rootDirFile, groupId));
+	}
+
+	private String getFetchedFileExt() {
+		return ".fetched";
+	}
+
+	private File getPropertiesFile(String documentId) {
+		return getDocumentFile(documentId, geFetchedFilePropertiesExt());
+	}
+
+	public void init() {
+		init(true);
+	}
+
+	private void init(boolean keepExistingData) {
+		groupFiles = new HashMap<String, File>();
+		if (rootDirFile.exists()) {
+			if (keepExistingData) {
+				/* Load information about existing groups */
+				File[] existingFileGroups = rootDirFile
+						.listFiles(new FileFilter() {
+							public boolean accept(File f) {
+								return f.isDirectory();
+							}
+						});
+				for (File groupDirFile : existingFileGroups) {
+					groupFiles.put(groupDirFile.getName(), groupDirFile);
+				}
+			} else {
+				/* load all existing file groups */
+				FileUtils.deleteDir(rootDirFile);
+				rootDirFile.mkdirs();
+			}
+		} else {
+			rootDirFile.mkdirs();
+		}
+	}
+
+	private byte[] readData(File f) {
+		byte[] data = new byte[(int) f.length()];
+		try {
+			BufferedInputStream in = new BufferedInputStream(
+					new FileInputStream(f));
+			in.read(data);
+			in.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Error while reading file: '"
+					+ f.getAbsolutePath() + "'", e);
+		}
+		return data;
+	}
+
+	private void readMetaData(File f, FetchedDocument doc) {
+		try {
+			InputStreamReader is = new InputStreamReader(
+					new FileInputStream(f), "UTF-8");
+			BufferedReader reader = new BufferedReader(is);
+			Map<String, String> metadata = new HashMap<String, String>();
+			String line = null;
+			while ((line = reader.readLine()) != null) {
+				if (line.length() == 0) {
+					continue;
+				}
+
+				String[] values = line.split(":", 2);
+				String key = values[0];
+				String value = values[1];
+				if ("url".equalsIgnoreCase(key)) {
+					doc.setDocumentURL(value);
+				} else if ("host".equalsIgnoreCase(key)) {
+					// skip, do nothing
+				} else if ("Content-Type".equalsIgnoreCase(key)) {
+					doc.setContentType(value);
+				} else if ("Charset".equalsIgnoreCase(key)) {
+					doc.setContentCharset(value);
+				} else {
+					metadata.put(key, value);
+				}
+			}
+			reader.close();
+			doc.setDocumentMetadata(metadata);
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Error while reading metadata from file: '"
+							+ f.getAbsolutePath() + "'", e);
+		}
+
+	}
+
+	private void saveContent(File f, byte[] content) {
+		try {
+			FileOutputStream fout = new FileOutputStream(f);
+			BufferedOutputStream bout = new BufferedOutputStream(fout);
+			bout.write(content);
+			bout.flush();
+			bout.close();
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public void saveDocument(FetchedDocument doc) {
+		/* create directory for current group if it doesn't exist yet. */
+		String groupId = docIdUtils.getDocumentGroupId(doc.getDocumentId());
+		createGroup(groupId);
+
+		File dataFile = getDataFile(doc.getDocumentId());
+		saveContent(dataFile, doc.getDocumentContent());
+
+		File metadataFile = getPropertiesFile(doc.getDocumentId());
+		saveMetadata(metadataFile, doc);
+	}
+
+	private void saveMetadata(File f, FetchedDocument doc) {
+		try {
+			OutputStreamWriter ow = new OutputStreamWriter(
+					new FileOutputStream(f), "UTF-8");
+			BufferedWriter bw = new BufferedWriter(ow);
+
+			writeProperty(bw, "url", doc.getDocumentURL());
+			writeProperty(bw, "Content-Type", doc.getContentType());
+			writeProperty(bw, "Charset", doc.getContentCharset());
+
+			Map<String, String> metadata = doc.getDocumentMetadata();
+			for (String key : metadata.keySet()) {
+				writeProperty(bw, key, metadata.get(key));
+			}
+			bw.flush();
+			bw.close();
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	private void writeProperty(BufferedWriter w, String key, String value)
+			throws IOException {
+		w.write(key);
+		w.write(":");
+		if (value != null) {
+			w.write(value);
+		}
+		w.newLine();
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/db/KnownUrlDB.java b/src/org/yooreeka/util/internet/crawling/db/KnownUrlDB.java
new file mode 100644
index 0000000..29a4b4a
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/db/KnownUrlDB.java
@@ -0,0 +1,279 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.db;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.util.internet.crawling.model.KnownUrlEntry;
+import org.yooreeka.util.internet.crawling.util.FileUtils;
+
+public class KnownUrlDB {
+
+	private static final String DB_FILENAME = "knownurlsdb.dat";
+
+	private Map<String, KnownUrlEntry> processedURLs = new HashMap<String, KnownUrlEntry>();
+
+	private Map<String, KnownUrlEntry> unprocessedURLs = new HashMap<String, KnownUrlEntry>();
+
+	private File rootDir = null;
+	private File dbFile = null;
+
+	private static final String FIELD_DELIMITER = "|";
+
+	public KnownUrlDB(File f) {
+		this.rootDir = f;
+
+	}
+
+	public boolean addNewUrl(String url, int depth) {
+		boolean isAdded = false;
+
+		if (isKnownUrl(url) == false) {
+
+			String status = KnownUrlEntry.STATUS_UNPROCESSED;
+			KnownUrlEntry r = new KnownUrlEntry();
+			r.setUrl(url);
+			r.setStatus(status);
+			r.setDepth(depth);
+			unprocessedURLs.put(url, r);
+			isAdded = true;
+		} else {
+			isAdded = false;
+		}
+
+		return isAdded;
+	}
+
+	public void delete() {
+		FileUtils.deleteDir(rootDir);
+	}
+
+	public List<String> findAllKnownUrls() {
+		List<String> allUrls = new ArrayList<String>();
+		allUrls.addAll(unprocessedURLs.keySet());
+		allUrls.addAll(processedURLs.keySet());
+		return allUrls;
+	}
+
+	public List<String> findProcessedUrls(String status) {
+		ArrayList<String> selectedUrls = new ArrayList<String>();
+		for (Map.Entry<String, KnownUrlEntry> mapEntry : processedURLs
+				.entrySet()) {
+			KnownUrlEntry urlEntry = mapEntry.getValue();
+			if (status.equalsIgnoreCase(urlEntry.getStatus())) {
+				selectedUrls.add(urlEntry.getUrl());
+			}
+		}
+		return selectedUrls;
+	}
+
+	public List<String> findUnprocessedUrls() {
+		return new ArrayList<String>(unprocessedURLs.keySet());
+	}
+
+	/**
+	 * @deprecated will be removed. Use method with depth instead.
+	 * 
+	 * @param maxDocs
+	 * @return
+	 */
+	@Deprecated
+	public List<String> findUnprocessedUrls(int maxDocs) {
+		return findUnprocessedUrls(maxDocs, 0);
+	}
+
+	public List<String> findUnprocessedUrls(int maxDocs, int depth) {
+		List<String> selectedUrls = new ArrayList<String>();
+
+		for (Map.Entry<String, KnownUrlEntry> e : unprocessedURLs.entrySet()) {
+			if (selectedUrls.size() >= maxDocs) {
+				break;
+			}
+			KnownUrlEntry ku = e.getValue();
+			if (ku.getDepth() == depth) {
+				selectedUrls.add(ku.getUrl());
+			}
+		}
+
+		return selectedUrls;
+	}
+
+	public int getTotalUrlCount() {
+		return unprocessedURLs.size() + processedURLs.size();
+	}
+
+	public void init() {
+		rootDir.mkdirs();
+
+		this.dbFile = new File(rootDir, DB_FILENAME);
+		try {
+
+			// creates a new file if the file doesn't exist
+			dbFile.createNewFile();
+
+		} catch (IOException e) {
+			throw new RuntimeException("Can't create db file: '"
+					+ dbFile.getAbsolutePath() + "'.", e);
+		}
+
+		load();
+	}
+
+	public boolean inProcessedUrl(String url) {
+		return processedURLs.containsKey(url);
+	}
+
+	public boolean inUnprocessedUrl(String url) {
+		return unprocessedURLs.containsKey(url);
+	}
+
+	public boolean isKnownUrl(String url) {
+		return processedURLs.containsKey(url)
+				|| unprocessedURLs.containsKey(url);
+	}
+
+	public boolean isSuccessfullyProcessed(String url) {
+		KnownUrlEntry r = processedURLs.get(url);
+		if (r != null
+				&& KnownUrlEntry.STATUS_PROCESSED_SUCCESS.equalsIgnoreCase(r
+						.getStatus())) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	private void load() {
+		try {
+			FileInputStream fis = new FileInputStream(dbFile);
+			InputStreamReader r = new InputStreamReader(fis, "UTF-8");
+			BufferedReader br = new BufferedReader(r);
+			String line = null;
+			while ((line = br.readLine()) != null) {
+				int delimiterIndex = line.indexOf(FIELD_DELIMITER);
+				String status = line.substring(0, delimiterIndex);
+				int secondDelimiterIndex = line.indexOf(FIELD_DELIMITER,
+						delimiterIndex + 1);
+				int depth = Integer.valueOf(line.substring(delimiterIndex
+						+ FIELD_DELIMITER.length(), secondDelimiterIndex));
+				String url = line.substring(secondDelimiterIndex
+						+ FIELD_DELIMITER.length());
+				loadUrl(url, status, depth);
+			}
+			br.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Failed to load data: ", e);
+		}
+	}
+
+	private void loadUrl(String url, String status, int depth) {
+		if (isKnownUrl(url) == false) {
+			KnownUrlEntry r = new KnownUrlEntry();
+			r.setUrl(url);
+			r.setStatus(status);
+			r.setDepth(depth);
+			if (KnownUrlEntry.STATUS_PROCESSED_SUCCESS.equalsIgnoreCase(status)
+					|| KnownUrlEntry.STATUS_PROCESSED_ERROR
+							.equalsIgnoreCase(status)) {
+				processedURLs.put(url, r);
+			} else if (KnownUrlEntry.STATUS_UNPROCESSED
+					.equalsIgnoreCase(status)) {
+				unprocessedURLs.put(url, r);
+			} else {
+				throw new RuntimeException("Unsupported status value: '"
+						+ status + "', url: '" + url + "'.");
+			}
+		} else {
+			throw new RuntimeException("Duplicate url: '" + url + "'");
+		}
+	}
+
+	public void save() {
+		try {
+			OutputStreamWriter w = new OutputStreamWriter(new FileOutputStream(
+					dbFile), "UTF-8");
+			BufferedWriter bw = new BufferedWriter(w);
+			for (KnownUrlEntry r : unprocessedURLs.values()) {
+				writeRecord(bw, r);
+			}
+			for (KnownUrlEntry r : processedURLs.values()) {
+				writeRecord(bw, r);
+			}
+			bw.flush();
+			bw.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Failed to save data: ", e);
+		}
+	}
+
+	public void updateUrlStatus(String url, String status) {
+		if (KnownUrlEntry.STATUS_PROCESSED_SUCCESS.equalsIgnoreCase(status)
+				|| KnownUrlEntry.STATUS_PROCESSED_ERROR
+						.equalsIgnoreCase(status)) {
+			KnownUrlEntry r = unprocessedURLs.remove(url);
+			if (r != null) {
+				r.setStatus(status);
+			} else {
+				throw new RuntimeException("Unknown url: '" + url);
+			}
+			processedURLs.put(url, r);
+		} else if (KnownUrlEntry.STATUS_UNPROCESSED.equalsIgnoreCase(status)) {
+			KnownUrlEntry r = processedURLs.remove(url);
+			if (r != null) {
+				r.setStatus(status);
+			} else {
+				throw new RuntimeException("Unknown url: '" + url);
+			}
+			unprocessedURLs.put(url, r);
+		}
+	}
+
+	private void writeRecord(BufferedWriter w, KnownUrlEntry ku)
+			throws IOException {
+
+		w.write(ku.getStatus() + FIELD_DELIMITER
+				+ String.valueOf(ku.getDepth()) + FIELD_DELIMITER + ku.getUrl());
+		w.newLine();
+
+	}
+
+}
diff --git a/src/org/yooreeka/util/internet/crawling/db/PageLinkDB.java b/src/org/yooreeka/util/internet/crawling/db/PageLinkDB.java
new file mode 100644
index 0000000..fce1cd0
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/db/PageLinkDB.java
@@ -0,0 +1,163 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.db;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.yooreeka.util.internet.crawling.util.FileUtils;
+
+public class PageLinkDB {
+	private static final String DB_FILENAME = "pagelinkdb.dat";
+
+	private Map<String, Set<String>> pageOutLinks = new HashMap<String, Set<String>>();
+	private Map<String, Set<String>> pageInLinks = new HashMap<String, Set<String>>();
+
+	private File rootDir = null;
+	private File dbFile = null;
+
+	public PageLinkDB(File f) {
+		this.rootDir = f;
+	}
+
+	public void addLink(String pageUrl) {
+		Set<String> outlinks = pageOutLinks.get(pageUrl);
+		if (outlinks == null) {
+			outlinks = new TreeSet<String>();
+			pageOutLinks.put(pageUrl, outlinks);
+		}
+	}
+
+	public void addLink(String pageUrl, String outlinkUrl) {
+		Set<String> outLinks = pageOutLinks.get(pageUrl);
+		if (outLinks == null) {
+			outLinks = new TreeSet<String>();
+			pageOutLinks.put(pageUrl, outLinks);
+		}
+		outLinks.add(outlinkUrl);
+
+		Set<String> inLinks = pageInLinks.get(outlinkUrl);
+		if (inLinks == null) {
+			inLinks = new TreeSet<String>();
+			pageInLinks.put(outlinkUrl, inLinks);
+		}
+		inLinks.add(pageUrl);
+	}
+
+	public void delete() {
+		FileUtils.deleteDir(rootDir);
+	}
+
+	public Set<String> getInlinks(String url) {
+		Set<String> result = pageInLinks.get(url);
+		return result != null ? result : new TreeSet<String>();
+	}
+
+	public Set<String> getOutlinks(String url) {
+		Set<String> result = pageOutLinks.get(url);
+		return result != null ? result : new TreeSet<String>();
+	}
+
+	public void init() {
+		rootDir.mkdirs();
+
+		this.dbFile = new File(rootDir, DB_FILENAME);
+		try {
+			// creates a new file if the file doesn't exist
+			dbFile.createNewFile();
+		} catch (IOException e) {
+			throw new RuntimeException("Can't create db file: '"
+					+ dbFile.getAbsolutePath() + "'.", e);
+		}
+
+		load();
+	}
+
+	private void load() {
+		try {
+			InputStreamReader r = new InputStreamReader(new FileInputStream(
+					dbFile), "UTF-8");
+			BufferedReader br = new BufferedReader(r);
+			String line = null;
+			String currentPage = null;
+			while ((line = br.readLine()) != null) {
+				int delimiterIndex = line.indexOf("|");
+				String type = line.substring(0, delimiterIndex);
+				String value = line.substring(delimiterIndex + "|".length());
+				if ("page".equalsIgnoreCase(type)) {
+					currentPage = value;
+				} else {
+					String outlink = value;
+					addLink(currentPage, outlink);
+				}
+			}
+			br.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Failed to load data: ", e);
+		}
+	}
+
+	public void save() {
+		try {
+			OutputStreamWriter w = new OutputStreamWriter(new FileOutputStream(
+					dbFile), "UTF-8");
+			BufferedWriter bw = new BufferedWriter(w);
+			for (Map.Entry<String, Set<String>> mapEntry : pageOutLinks
+					.entrySet()) {
+				String pageUrl = mapEntry.getKey();
+				writeRecord(bw, "page", pageUrl);
+				for (String outlink : mapEntry.getValue()) {
+					writeRecord(bw, "outlink", outlink);
+				}
+			}
+			bw.flush();
+			bw.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Failed to save data: ", e);
+		}
+	}
+
+	private void writeRecord(BufferedWriter w, String id, String value)
+			throws IOException {
+		w.write(id + "|" + value);
+		w.newLine();
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/db/ProcessedDocsDB.java b/src/org/yooreeka/util/internet/crawling/db/ProcessedDocsDB.java
new file mode 100644
index 0000000..6424090
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/db/ProcessedDocsDB.java
@@ -0,0 +1,413 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.db;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.yooreeka.util.internet.crawling.model.Outlink;
+import org.yooreeka.util.internet.crawling.util.DocumentIdUtils;
+import org.yooreeka.util.internet.crawling.util.FileUtils;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+public class ProcessedDocsDB {
+
+	private enum FileType {
+		CONTENT(".content", "content"), TXT(".txt", "txt"), PROPERTIES(
+				".properties", "properties"), OUTLINKS(".outlinks", "outlinks");
+
+		private final String ext;
+		private final String dir;
+
+		FileType(String ext, String dir) {
+			this.ext = ext;
+			this.dir = dir;
+		}
+
+		public String getDir() {
+			return dir;
+		}
+
+		public String getExt() {
+			return ext;
+		}
+	}
+	private File rootDirFile = null;
+	private Map<String, File> groupFiles = null;
+
+	private DocumentIdUtils docIdUtils = new DocumentIdUtils();
+
+	public ProcessedDocsDB(File rootDir) {
+		this.rootDirFile = rootDir;
+	}
+
+	private File createDir(File parent, String dirName) {
+		File newDir = new File(parent, dirName);
+		if (!newDir.exists()) {
+			newDir.mkdir();
+		}
+		return newDir;
+	}
+
+	/*
+	 * Creates directories for a new group if they don't exist yet.
+	 */
+	private void createGroup(String groupId) {
+		File groupFile = groupFiles.get(groupId);
+		if (groupFile == null) {
+			groupFile = new File(rootDirFile, String.valueOf(groupId));
+			groupFile.mkdir();
+			createDir(groupFile, FileType.CONTENT.getDir());
+			createDir(groupFile, FileType.PROPERTIES.getDir());
+			createDir(groupFile, FileType.OUTLINKS.getDir());
+			createDir(groupFile, FileType.TXT.getDir());
+			groupFiles.put(groupFile.getName(), groupFile);
+		}
+	}
+
+	public void delete() {
+		FileUtils.deleteDir(rootDirFile);
+	}
+
+	public List<String> getAllGroupIds() {
+		return new ArrayList<String>(groupFiles.keySet());
+	}
+
+	private byte[] getBytes(String text) {
+		try {
+			return text.getBytes("UTF-8");
+		} catch (UnsupportedEncodingException e) {
+			throw new RuntimeException("Error while saving data: ", e);
+		}
+	}
+
+	private File getContentFile(String documentId) {
+		return getDocumentFile(documentId, FileType.CONTENT);
+	}
+
+	private File getDocumentFile(String documentId, FileType type) {
+		String groupId = docIdUtils.getDocumentGroupId(documentId);
+		File groupDirFile = new File(rootDirFile, groupId);
+		File docDirFile = new File(groupDirFile, type.getDir());
+		String itemId = docIdUtils.getDocumentSequence(documentId);
+		File docFile = new File(docDirFile, itemId + type.getExt());
+		return docFile;
+	}
+
+	public List<String> getDocumentIds() {
+		List<String> documentIds = new ArrayList<String>();
+		for (File groupFile : groupFiles.values()) {
+			documentIds.addAll(getDocumentIds(groupFile));
+		}
+		return documentIds;
+	}
+
+	private List<String> getDocumentIds(File setFile) {
+		if (setFile == null) {
+			return new ArrayList<String>();
+		}
+		final FileType type = FileType.CONTENT;
+		File dir = new File(setFile, type.dir);
+		File[] dataFiles = dir.listFiles(new FilenameFilter() {
+			public boolean accept(File dir, String name) {
+				if (name.endsWith(type.ext)) {
+					return true;
+				} else {
+					return false;
+				}
+			}
+		});
+
+		String groupId = setFile.getName();
+		List<String> documentIds = new ArrayList<String>();
+		for (File f : dataFiles) {
+			String name = f.getName();
+			String itemId = name.substring(0, name.indexOf("."));
+			String documentId = docIdUtils.getDocumentId(groupId, itemId);
+			documentIds.add(documentId);
+		}
+		return documentIds;
+	}
+
+	public List<String> getDocumentIds(String groupId) {
+		return getDocumentIds(groupFiles.get(groupId));
+	}
+
+	private File getOutlinksFile(String documentId) {
+		return getDocumentFile(documentId, FileType.OUTLINKS);
+	}
+
+	private File getPropertiesFile(String documentId) {
+		return getDocumentFile(documentId, FileType.PROPERTIES);
+	}
+
+	private String getText(byte[] data) {
+		try {
+			return new String(data, "UTF-8");
+		} catch (UnsupportedEncodingException e) {
+			throw new RuntimeException("Error loading data: ", e);
+		}
+	}
+
+	private File getTextFile(String documentId) {
+		return getDocumentFile(documentId, FileType.TXT);
+	}
+
+	public void init() {
+		init(true);
+	}
+
+	private void init(boolean keepExistingData) {
+		groupFiles = new HashMap<String, File>();
+
+		if (rootDirFile.exists()) {
+			if (keepExistingData) {
+				/* load all existing file groups */
+				File[] existingFileGroups = rootDirFile
+						.listFiles(new FileFilter() {
+							public boolean accept(File f) {
+								return f.isDirectory();
+							}
+						});
+				for (File groupDirFile : existingFileGroups) {
+					groupFiles.put(groupDirFile.getName(), groupDirFile);
+				}
+			} else {
+				/* delete all existing data and create brand new directory */
+				FileUtils.deleteDir(rootDirFile);
+				rootDirFile.mkdirs();
+			}
+		} else {
+			rootDirFile.mkdirs();
+		}
+	}
+
+	public List<ProcessedDocument> loadAllDocumentsInGroup(String groupId) {
+		List<ProcessedDocument> allDocsInGroup = new ArrayList<ProcessedDocument>();
+
+		for (String docId : getDocumentIds(groupId)) {
+			ProcessedDocument doc = loadDocument(docId);
+			allDocsInGroup.add(doc);
+		}
+
+		return allDocsInGroup;
+	}
+
+	private String loadContent(File f) {
+		byte[] data = loadData(f);
+		return getText(data);
+	}
+
+	private byte[] loadData(File f) {
+		byte[] data = new byte[(int) f.length()];
+		try {
+			BufferedInputStream in = new BufferedInputStream(
+					new FileInputStream(f));
+			in.read(data);
+			in.close();
+		} catch (IOException e) {
+			throw new RuntimeException("Error while reading file: '"
+					+ f.getAbsolutePath() + "'", e);
+		}
+		return data;
+	}
+
+	/**
+	 * Loads previously saved document details.
+	 * 
+	 * @param documentId
+	 * @return
+	 */
+	public ProcessedDocument loadDocument(String documentId) {
+		File propertiesFile = getPropertiesFile(documentId);
+		Map<String, String> properties = loadProperties(propertiesFile, ":");
+
+		File contentFile = getContentFile(documentId);
+		String content = loadContent(contentFile);
+
+		File textFile = getTextFile(documentId);
+		String text = loadText(textFile);
+
+		File outlinksFile = getOutlinksFile(documentId);
+		List<Outlink> outlinks = loadOutlinks(outlinksFile);
+
+		ProcessedDocument doc = new ProcessedDocument();
+		doc.setDocumentType(properties.get("doctype"));
+		doc.setDocumentURL(properties.get("url"));
+		doc.setText(text);
+		doc.setContent(content);
+		doc.setOutlinks(outlinks);
+		doc.setDocumentId(documentId);
+		doc.setDocumentTitle(properties.get("title"));
+
+		return doc;
+	}
+
+	private List<Outlink> loadOutlinks(File f) {
+		List<Outlink> outlinks = new ArrayList<Outlink>();
+		Map<String, String> props = loadProperties(f, "|");
+
+		for (String key : props.keySet()) {
+			String url = key;
+			String anchorText = props.get(key);
+			Outlink o = new Outlink(url, anchorText);
+			outlinks.add(o);
+		}
+		return outlinks;
+	}
+
+	private Map<String, String> loadProperties(File f, String delimiter) {
+		Map<String, String> props = new HashMap<String, String>();
+		try {
+			InputStreamReader r = new InputStreamReader(new FileInputStream(f),
+					"UTF-8");
+			BufferedReader reader = new BufferedReader(r);
+			String line = null;
+			while ((line = reader.readLine()) != null) {
+				if (line.length() == 0) {
+					continue;
+				}
+
+				int delimiterIndex = line.indexOf(delimiter);
+				String key = line.substring(0, delimiterIndex);
+				String value = line.substring(delimiterIndex + 1);
+				props.put(key, value);
+			}
+			reader.close();
+		} catch (IOException e) {
+			throw new RuntimeException(
+					"Error while reading metadata from file: '"
+							+ f.getAbsolutePath() + "'", e);
+		}
+		return props;
+	}
+
+	private String loadText(File f) {
+		byte[] data = loadData(f);
+		return getText(data);
+	}
+
+	private void saveContent(File f, String content) {
+		saveData(f, getBytes(content));
+	}
+
+	private void saveData(File f, byte[] content) {
+		try {
+			FileOutputStream fout = new FileOutputStream(f);
+			BufferedOutputStream bout = new BufferedOutputStream(fout);
+			bout.write(content);
+			bout.flush();
+			bout.close();
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	/**
+	 * Persists the document.
+	 * 
+	 * @param doc
+	 */
+	public void saveDocument(ProcessedDocument doc) {
+		String groupId = docIdUtils.getDocumentGroupId(doc.getDocumentId());
+		createGroup(groupId);
+
+		File contentFile = getContentFile(doc.getDocumentId());
+		saveContent(contentFile, doc.getContent());
+
+		File textFile = getTextFile(doc.getDocumentId());
+		saveText(textFile, doc.getText());
+
+		File propertiesFile = getPropertiesFile(doc.getDocumentId());
+		Map<String, String> props = new HashMap<String, String>();
+		props.put("url", doc.getDocumentURL());
+		props.put("title", doc.getDocumentTitle());
+		props.put("doctype", doc.getDocumentType());
+		saveProperties(propertiesFile, props, ":");
+
+		File outlinksFile = getOutlinksFile(doc.getDocumentId());
+		saveOutlinks(outlinksFile, doc.getOutlinks());
+	}
+
+	private void saveOutlinks(File f, List<Outlink> outlinks) {
+		Map<String, String> props = new HashMap<String, String>();
+		for (Outlink outlink : outlinks) {
+			props.put(outlink.getLinkUrl(), outlink.getText());
+		}
+		saveProperties(f, props, "|");
+	}
+
+	private void saveProperties(File f, Map<String, String> props,
+			String delimiter) {
+		try {
+			OutputStreamWriter w = new OutputStreamWriter(new FileOutputStream(
+					f), "UTF-8");
+			BufferedWriter bw = new BufferedWriter(w);
+			for (String key : props.keySet()) {
+				String value = props.get(key);
+				writeProperty(bw, key, value, delimiter);
+			}
+			bw.flush();
+			bw.close();
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	private void saveText(File f, String text) {
+		saveData(f, getBytes(text));
+	}
+
+	private void writeProperty(BufferedWriter w, String key, String value,
+			String delimiter) throws IOException {
+		w.write(key);
+		w.write(delimiter);
+		if (value != null) {
+			w.write(value);
+		}
+		w.newLine();
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/model/FetchedDocument.java b/src/org/yooreeka/util/internet/crawling/model/FetchedDocument.java
new file mode 100644
index 0000000..f62974c
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/model/FetchedDocument.java
@@ -0,0 +1,143 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.model;
+
+import java.nio.charset.Charset;
+import java.util.Map;
+
+import org.yooreeka.util.P;
+import org.yooreeka.util.parsing.common.AbstractDocument;
+
+/**
+ * Collection of raw (unprocessed) data about crawled/fetched document.
+ */
+public class FetchedDocument implements AbstractDocument {
+
+	/*
+	 * Document id that was assigned by the FetcherModule.
+	 */
+	private String documentId;
+
+	/*
+	 * Document URL. URL that was used to fetch the document.
+	 */
+	private String url;
+
+	/*
+	 * MIME content type that was derived from transport protocol (HTTP
+	 * headers), document content or document URL.
+	 */
+	private String contentType;
+
+	/*
+	 * Character encoding that was derived from transport protocol (HTTP
+	 * headers), document content.
+	 */
+	private String contentCharset;
+
+	/*
+	 * Raw document content.
+	 */
+	private byte[] documentContent;
+
+	/*
+	 * Various optional meta data about the document that was extracted from the
+	 * protocol.
+	 */
+	private Map<String, String> documentMetadata;
+
+	public FetchedDocument() {
+	}
+
+	public String getContentCharset() {
+		return contentCharset;
+	}
+
+	public long getContentLength() {
+		return documentContent.length;
+	}
+
+	public String getContentType() {
+		return contentType;
+	}
+
+	public byte[] getDocumentContent() {
+		return documentContent;
+	}
+
+	public String getDocumentId() {
+		return documentId;
+	}
+
+	public Map<String, String> getDocumentMetadata() {
+		return documentMetadata;
+	}
+
+	public String getDocumentURL() {
+		return url;
+	}
+
+	public void print() {
+		P.println("Document ID    : " + this.documentId);
+		P.println("Content URL    : " + this.url);
+		P.println("Content Type   : " + this.contentType);
+		P.println("Content Charset: " + this.contentCharset);
+		P.hline();
+		P.println("CONTENT\n"
+				+ new String(this.getDocumentContent(), Charset
+						.forName(contentCharset)));
+		P.hline();
+	}
+
+	public void setContentCharset(String contentCharset) {
+		this.contentCharset = contentCharset;
+	}
+
+	public void setContentType(String contentType) {
+		this.contentType = contentType;
+	}
+
+	public void setDocumentContent(byte[] data) {
+		this.documentContent = data;
+	}
+
+	public void setDocumentId(String documentId) {
+		this.documentId = documentId;
+	}
+
+	public void setDocumentMetadata(Map<String, String> metadata) {
+		this.documentMetadata = metadata;
+	}
+
+	public void setDocumentURL(String url) {
+		this.url = url;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/model/KnownUrlEntry.java b/src/org/yooreeka/util/internet/crawling/model/KnownUrlEntry.java
new file mode 100644
index 0000000..3c0c1e2
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/model/KnownUrlEntry.java
@@ -0,0 +1,77 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.model;
+
+public class KnownUrlEntry {
+
+	public static final String STATUS_UNPROCESSED = "unprocessed";
+	public static final String STATUS_PROCESSED_SUCCESS = "processed";
+	public static final String STATUS_PROCESSED_ERROR = "error";
+
+	private String url;
+	private String status;
+	private int depth;
+
+	public KnownUrlEntry() {
+
+	}
+
+	public KnownUrlEntry(String url, String status, int depth) {
+		this.url = url;
+		this.status = status;
+		this.depth = depth;
+	}
+
+	public int getDepth() {
+		return depth;
+	}
+
+	public String getStatus() {
+		return status;
+	}
+
+	public String getUrl() {
+		return url;
+	}
+
+	public void setDepth(int depth) {
+		this.depth = depth;
+	}
+
+	public void setStatus(String status) {
+		this.status = status;
+	}
+
+	public void setUrl(String url) {
+		this.url = url;
+	}
+
+}
diff --git a/src/org/yooreeka/util/internet/crawling/model/Outlink.java b/src/org/yooreeka/util/internet/crawling/model/Outlink.java
new file mode 100644
index 0000000..b73943a
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/model/Outlink.java
@@ -0,0 +1,55 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.model;
+
+public class Outlink {
+
+	private String linkUrl;
+	private String text;
+
+	public Outlink(String linkUrl, String text) {
+		this.linkUrl = linkUrl;
+		this.text = text;
+	}
+
+	public String getLinkUrl() {
+		return linkUrl;
+	}
+
+	public String getText() {
+		return text;
+	}
+
+	@Override
+	public String toString() {
+		return "[link:" + linkUrl + ", text:" + text + "]";
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/transport/common/Transport.java b/src/org/yooreeka/util/internet/crawling/transport/common/Transport.java
new file mode 100644
index 0000000..4b50f5b
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/transport/common/Transport.java
@@ -0,0 +1,43 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.transport.common;
+
+import org.yooreeka.util.internet.crawling.model.FetchedDocument;
+
+public interface Transport {
+	public void clear();
+
+	public FetchedDocument fetch(String url) throws TransportException;
+
+	public void init();
+
+	public boolean pauseRequired();
+}
diff --git a/src/org/yooreeka/util/internet/crawling/transport/common/TransportException.java b/src/org/yooreeka/util/internet/crawling/transport/common/TransportException.java
new file mode 100644
index 0000000..f20d037
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/transport/common/TransportException.java
@@ -0,0 +1,47 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.transport.common;
+
+public class TransportException extends Exception {
+
+	/**
+	 * Distinct SVUID for the org.yooreeka classes
+	 */
+	private static final long serialVersionUID = -2821101482190551697L;
+
+	public TransportException(String message) {
+		super(message);
+	}
+
+	public TransportException(String message, Throwable t) {
+		super(message, t);
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/transport/file/FileTransport.java b/src/org/yooreeka/util/internet/crawling/transport/file/FileTransport.java
new file mode 100644
index 0000000..69493c1
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/transport/file/FileTransport.java
@@ -0,0 +1,134 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.transport.file;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.HashMap;
+
+import org.yooreeka.util.internet.crawling.model.FetchedDocument;
+import org.yooreeka.util.internet.crawling.transport.common.Transport;
+import org.yooreeka.util.internet.crawling.transport.common.TransportException;
+
+public class FileTransport implements Transport {
+
+	public FileTransport() {
+	}
+
+	public void clear() {
+		// DO NOTHING
+	}
+
+	private FetchedDocument createDocument(String targetURL)
+			throws IOException, FileTransportException {
+		FetchedDocument doc = new FetchedDocument();
+
+		/*
+		 * Maximum document length.
+		 */
+		int MAX_DOCUMENT_LENGTH = 512 * 1024; // 512K
+
+		URL url = new URL(targetURL);
+		File f = null;
+		try {
+			f = new File(url.toURI());
+		} catch (URISyntaxException e) {
+			throw new FileTransportException(
+					"Error while converting url to file path: ", e);
+		}
+
+		/* IOException will be thrown for documents that exceed max length */
+		byte[] data = loadData(f, MAX_DOCUMENT_LENGTH);
+
+		String DEFAULT_CONTENT_TYPE = "text/html";
+		String contentType = DEFAULT_CONTENT_TYPE;
+		if (f.getName().endsWith(".doc")) {
+			contentType = "application/msword";
+		}
+
+		String DEFAULT_CONTENT_CHARSET = "UTF-8";
+		String contentCharset = DEFAULT_CONTENT_CHARSET;
+
+		doc.setContentType(contentType);
+		doc.setDocumentURL(targetURL);
+		doc.setContentCharset(contentCharset);
+		doc.setDocumentContent(data);
+		doc.setDocumentMetadata(new HashMap<String, String>());
+		return doc;
+	}
+
+	public FetchedDocument fetch(String documentUrl) throws TransportException {
+
+		FetchedDocument doc = null;
+		try {
+			doc = createDocument(documentUrl);
+		} catch (Exception eX) {
+			System.out.println("ERROR:\n" + eX.getMessage());
+			throw new FileTransportException("Failed to fetch url: '"
+					+ documentUrl + "': ", eX);
+		} finally {
+		}
+
+		return doc;
+	}
+
+	public void init() {
+		// DO NOTHING
+	}
+
+	private byte[] loadData(File f, int maxLength) throws IOException {
+		if (f.length() > maxLength) {
+			throw new IOException("The document is too long (doc: "
+					+ f.getCanonicalPath() + ", size: " + f.length()
+					+ ", max size: " + maxLength);
+		}
+
+		InputStream in = new BufferedInputStream(new FileInputStream(f));
+		byte[] data = new byte[(int) f.length()];
+		int offset = 0;
+		int i = 0;
+		while ((offset < data.length)
+				&& (i = in.read(data, offset, data.length - offset)) >= 0) {
+			offset += i;
+		}
+		in.close();
+		return data;
+	}
+
+	public boolean pauseRequired() {
+		return false;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/transport/file/FileTransportException.java b/src/org/yooreeka/util/internet/crawling/transport/file/FileTransportException.java
new file mode 100644
index 0000000..6181fd8
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/transport/file/FileTransportException.java
@@ -0,0 +1,49 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.transport.file;
+
+import org.yooreeka.util.internet.crawling.transport.common.TransportException;
+
+public class FileTransportException extends TransportException {
+
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = -6380601992826152509L;
+
+	public FileTransportException(String msg) {
+		super(msg);
+	}
+
+	public FileTransportException(String msg, Throwable t) {
+		super(msg, t);
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransport.java b/src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransport.java
new file mode 100644
index 0000000..3554f1a
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransport.java
@@ -0,0 +1,260 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.transport.http;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.CookieStore;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.protocol.ClientContext;
+import org.apache.http.cookie.Cookie;
+import org.apache.http.impl.client.BasicCookieStore;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.protocol.BasicHttpContext;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.util.EntityUtils;
+import org.yooreeka.util.internet.crawling.model.FetchedDocument;
+import org.yooreeka.util.internet.crawling.transport.common.Transport;
+import org.yooreeka.util.internet.crawling.transport.common.TransportException;
+
+public class HTTPTransport implements Transport {
+
+	HttpClient httpclient = null;
+	CookieStore cookieStore = null;
+	HttpContext localContext = null;
+
+	public HTTPTransport() {
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch2.webcrawler.transport.common.Transport#clear()
+	 */
+	public void clear() {
+		httpclient = null;
+		// initialState = null;
+	}
+
+	private FetchedDocument createDocument(String targetURL, HttpEntity entity)
+			throws IOException, HTTPTransportException {
+		FetchedDocument doc = new FetchedDocument();
+
+		/*
+		 * Maximum document length that transport will attempt to download
+		 * without issuing a warning ...
+		 */
+		int MAX_DOCUMENT_LENGTH = 8 * 1024 * 1024; // 8Mb
+
+		BufferedInputStream bufferedInput = null;
+		byte[] buffer = new byte[1024];
+
+		int contentLength = (int) entity.getContentLength();
+		if (contentLength > MAX_DOCUMENT_LENGTH)
+			System.out.println("WARNING: Retrieved document larger than "
+					+ MAX_DOCUMENT_LENGTH + " [bytes]");
+
+		ByteBuffer byteBuffer = ByteBuffer.allocate(contentLength);
+
+		// Construct the BufferedInputStream object
+		bufferedInput = new BufferedInputStream(entity.getContent());
+
+		// Keep reading while there is content
+		// when the end of the stream has been reached, -1 is returned
+		while (bufferedInput.read(buffer) != -1) {
+
+			// Process the chunk of bytes read
+			byteBuffer.put(buffer);
+		}
+
+		/* IOException will be thrown for documents that exceed max length */
+		byte[] data = byteBuffer.array();
+
+		/*
+		 * Check if server sent content in compressed form and uncompress the
+		 * content if necessary.
+		 */
+		Header contentEncodingHeader = entity.getContentEncoding();
+		if (contentEncodingHeader != null) {
+			data = HTTPUtils.decodeContent(contentEncodingHeader.getValue(),
+					data);
+		}
+
+		/* 'Content-Type' HTTP header value */
+		String contentTypeHeaderValue = null;
+		Header header = entity.getContentType();
+		if (header != null) {
+			contentTypeHeaderValue = header.getValue();
+		}
+
+		/*
+		 * Determine MIME type of the document.
+		 * 
+		 * It is easy if we have Content-Type http header. In cases when this
+		 * header is missing or for protocols that don't pass metadata about the
+		 * documents (ftp://, file://) we would have to resort to url and/or
+		 * content analysis to determine MIME type.
+		 */
+		String DEFAULT_CONTENT_TYPE = "text/html";
+		String contentType = HTTPUtils.getContentType(contentTypeHeaderValue,
+				targetURL, data);
+		if (contentType == null) {
+			contentType = DEFAULT_CONTENT_TYPE;
+		}
+
+		/*
+		 * Determine Character encoding used in the document. In some cases it
+		 * may be specified in the http header, in html file itself or we have
+		 * to perform content analysis to choose the encoding.
+		 */
+		String DEFAULT_CONTENT_CHARSET = "UTF-8";
+		String contentCharset = HTTPUtils.getCharset(contentTypeHeaderValue,
+				contentType, data);
+		if (contentCharset == null) {
+			contentCharset = DEFAULT_CONTENT_CHARSET;
+		}
+
+		doc.setContentType(contentType);
+		doc.setDocumentURL(targetURL);
+		doc.setContentCharset(contentCharset);
+		doc.setDocumentContent(data);
+		doc.setDocumentMetadata(new HashMap<String, String>());
+		return doc;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * iweb2.ch2.webcrawler.transport.common.Transport#fetch(java.lang.String)
+	 */
+	public FetchedDocument fetch(String documentUrl) throws TransportException {
+
+		FetchedDocument doc = null;
+
+		HttpGet httpget = new HttpGet(documentUrl);
+
+		System.out.println("executing request " + httpget.getURI());
+
+		// Pass local context as a parameter
+		HttpResponse response = null;
+		try {
+			response = httpclient.execute(httpget, localContext);
+		} catch (IOException e1) {
+			// TODO Auto-generated catch block
+			e1.printStackTrace();
+		}
+		HttpEntity entity = response.getEntity();
+
+		System.out.println("----------------------------------------");
+		System.out.println(response.getStatusLine());
+		if (entity != null) {
+			System.out.println("Response content length: "
+					+ entity.getContentLength());
+		}
+		List<Cookie> cookies = cookieStore.getCookies();
+		for (int i = 0; i < cookies.size(); i++) {
+			System.out.println("Local cookie: " + cookies.get(i));
+		}
+
+		try {
+			doc = createDocument(documentUrl, entity);
+		} catch (IOException e) {
+			throw new TransportException("Failed to fetch url: '" + documentUrl
+					+ "': ", e);
+		} finally {
+			// Consume response content
+			try {
+				EntityUtils.consume(entity);
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			}
+
+			System.out.println("----------------------------------------");
+
+			// When HttpClient instance is no longer needed,
+			// shut down the connection manager to ensure
+			// immediate deallocation of all system resources
+			httpclient.getConnectionManager().shutdown();
+		}
+
+		return doc;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see iweb2.ch2.webcrawler.transport.common.Transport#init()
+	 */
+	public void init() {
+
+		System.out.println("Initializing HTTPTransport ...");
+
+		httpclient = new DefaultHttpClient();
+
+		// Create a local instance of cookie store
+		cookieStore = new BasicCookieStore();
+
+		// Create local HTTP context
+		localContext = new BasicHttpContext();
+
+		// Bind custom cookie store to the local context
+		localContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);
+
+		// httpclient.getHttpConnectionManager().getParams().setConnectionTimeout(30000);
+		// httpclient.getHttpConnectionManager().getParams().setSoTimeout(30000);
+		// httpclient.setState(initialState);
+		// httpclient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
+		//
+		// //httpclient.getParams().setParameter(HttpClientParams.ALLOW_CIRCULAR_REDIRECTS,
+		// Boolean.TRUE);
+		//
+		// // Set default number of connections per host to 1
+		// httpclient.getHttpConnectionManager().
+		// getParams().setMaxConnectionsPerHost(
+		// HostConfiguration.ANY_HOST_CONFIGURATION, 1);
+		// // Set max for total number of connections
+		// httpclient.getHttpConnectionManager().getParams().setMaxTotalConnections(10);
+	}
+
+	public boolean pauseRequired() {
+		return true;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransportException.java b/src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransportException.java
new file mode 100644
index 0000000..497f5c6
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/transport/http/HTTPTransportException.java
@@ -0,0 +1,46 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.transport.http;
+
+import org.yooreeka.util.internet.crawling.transport.common.TransportException;
+
+public class HTTPTransportException extends TransportException {
+
+	private static final long serialVersionUID = 546574708933803471L;
+
+	public HTTPTransportException(String msg) {
+		super(msg);
+	}
+
+	public HTTPTransportException(String msg, Throwable t) {
+		super(msg, t);
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/transport/http/HTTPUtils.java b/src/org/yooreeka/util/internet/crawling/transport/http/HTTPUtils.java
new file mode 100644
index 0000000..80ab133
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/transport/http/HTTPUtils.java
@@ -0,0 +1,142 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.transport.http;
+
+class HTTPUtils {
+
+	/**
+	 * Decodes content according to content encoding. This is just a place
+	 * holder.
+	 * 
+	 * @param contentEncoding
+	 *            content type.
+	 * @param encodedContent
+	 *            content received from the server
+	 * @return decoded content.
+	 */
+	public static byte[] decodeContent(String contentEncoding,
+			byte[] encodedContent) throws HTTPTransportException {
+		byte[] decodedContent = null;
+		if ("gzip".equalsIgnoreCase(contentEncoding)) {
+			throw new HTTPTransportException(
+					"Content-Encoding 'gzip' is not supported.");
+		} else if ("deflate".equalsIgnoreCase(contentEncoding)) {
+			throw new HTTPTransportException(
+					"Content-Encoding 'deflate' is not supported.");
+		} else if ("compress".equalsIgnoreCase(contentEncoding)) {
+			throw new HTTPTransportException(
+					"Content-Encoding 'compress' is not supported.");
+		} else {
+			decodedContent = encodedContent;
+		}
+
+		return decodedContent;
+	}
+
+	private static String getCharset(String contentTypeHeaderValue) {
+		String charset = null;
+		String ATTR_NAME = "charset=";
+		if (contentTypeHeaderValue != null) {
+			int i = contentTypeHeaderValue.toLowerCase().indexOf(ATTR_NAME);
+			if (i > -1) {
+				charset = contentTypeHeaderValue.substring(
+						i + ATTR_NAME.length()).toUpperCase();
+			}
+		}
+
+		return charset;
+	}
+
+	/**
+	 * Extracts charset from HTTP header. If HTTP header is missing an attempt
+	 * can be made to determine charset based on content type and data.
+	 * 
+	 * For example, documents with type 'text/html' can define document charset
+	 * using 'meta' tag. Such documents should use characters compatible with
+	 * ISO-8859-1 charset until the meta tag that defines document charset. For
+	 * more details see: http://www.w3.org/TR/html4/charset.html#h-5.2.2
+	 * 
+	 * @param contentTypeHeaderValue
+	 * @param contentType
+	 *            type of data. Can be used to interpret the data.
+	 * @param data
+	 * @return charset or null.
+	 */
+	public static String getCharset(String contentTypeHeaderValue,
+			String contentType, byte[] data) {
+		String charset = getCharset(contentTypeHeaderValue);
+		if (charset == null || charset.trim().length() == 0) {
+			/*
+			 * here we can implement charset detection based on content
+			 * analysis.
+			 */
+		}
+
+		return charset;
+	}
+
+	/**
+	 * Extracts MIME type. Ideally the value should be extracted from HTTP
+	 * header. But if it is missing an attempt can be made to determine content
+	 * type based on URL and/or data.
+	 * 
+	 * @param contentTypeHeaderValue
+	 * @param url
+	 *            document URL.
+	 * @param data
+	 *            document content
+	 * 
+	 * @return MIME type for document content or null if couldn't determine the
+	 *         type.
+	 */
+	public static String getContentType(String contentTypeHeaderValue,
+			String url, byte[] data) {
+		String type = null;
+		if (contentTypeHeaderValue != null
+				&& contentTypeHeaderValue.trim().length() > 0) {
+			int i = contentTypeHeaderValue.indexOf(";");
+			if (i > -1) {
+				type = contentTypeHeaderValue.substring(0, i);
+			} else {
+				type = contentTypeHeaderValue.substring(0);
+			}
+		}
+
+		if (type == null) {
+			/*
+			 * here url and content itself can be used to determine content
+			 * type.
+			 */
+		}
+
+		return type;
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/util/DocumentIdUtils.java b/src/org/yooreeka/util/internet/crawling/util/DocumentIdUtils.java
new file mode 100644
index 0000000..b45359e
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/util/DocumentIdUtils.java
@@ -0,0 +1,56 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.util;
+
+public class DocumentIdUtils {
+
+	private static final String GROUP_PREFIX = "g";
+	private static final String SEQUENCE_PREFIX = "d";
+	private static final String ID_COMPONENTS_DELIMITER = "-";
+
+	public String getDocumentGroupId(String documentId) {
+		String[] idComponents = documentId.split(ID_COMPONENTS_DELIMITER);
+		return idComponents[0].substring(GROUP_PREFIX.length());
+	}
+
+	public String getDocumentId(String docGroupId, int docSequence) {
+		return getDocumentId(docGroupId, String.valueOf(docSequence));
+	}
+
+	public String getDocumentId(String docGroupId, String docSequence) {
+		return "g" + docGroupId + "-d" + docSequence;
+	}
+
+	public String getDocumentSequence(String documentId) {
+		String[] idComponents = documentId.split(ID_COMPONENTS_DELIMITER);
+		return idComponents[1].substring(SEQUENCE_PREFIX.length());
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/util/FileUtils.java b/src/org/yooreeka/util/internet/crawling/util/FileUtils.java
new file mode 100644
index 0000000..c8bb618
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/util/FileUtils.java
@@ -0,0 +1,130 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.util;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+
+/**
+ * Utility methods for files and directories.
+ */
+public class FileUtils {
+
+	/**
+	 * Deletes directory with its content.
+	 * 
+	 * @param dir
+	 *            directory to delete.
+	 * @return true if delete was successful.
+	 */
+	public static boolean deleteDir(java.io.File dir) {
+
+		if (dir == null || dir.isDirectory() == false) {
+			return false;
+		}
+
+		for (String filename : dir.list()) {
+			boolean success = false;
+			File f = new File(dir, filename);
+			if (f.isDirectory()) {
+				success = deleteDir(f);
+			} else {
+				success = f.delete();
+			}
+			if (!success) {
+				return success;
+			}
+		}
+
+		return dir.delete();
+	}
+
+	/**
+	 * Deletes directory with its content.
+	 * 
+	 * @param dir
+	 *            directory to delete.
+	 * @return true if delete was successful.
+	 */
+	public static boolean deleteDir(String dir) {
+		File f = new File(dir);
+		if (f.exists() && f.isDirectory()) {
+			return deleteDir(f);
+		} else {
+			return false;
+		}
+	}
+
+	/**
+	 * Finds files that start with specified prefix.
+	 * 
+	 * @param directory
+	 *            directory with files to search
+	 * @param filenamePrefix
+	 *            defines files that will be returned.
+	 * @return files with names that start with specified prefix.
+	 */
+	public static File[] findMatchingFiles(final File directory,
+			final String filenamePrefix) {
+		return directory.listFiles(new FilenameFilter() {
+			public boolean accept(File dir, String name) {
+				return name.startsWith(filenamePrefix);
+			}
+		});
+	}
+
+	public static void prepareDir(File dir, boolean useExisting)
+			throws IOException {
+		if (dir.exists()) {
+			if (useExisting == false) {
+				if (!FileUtils.deleteDir(dir)) {
+					throw new IOException("Failed to delete directory: '"
+							+ dir.getAbsolutePath() + "'");
+				}
+			}
+		}
+		if (!dir.exists()) {
+			if (!dir.mkdir()) {
+				throw new IOException("Failed to create directory: '"
+						+ dir.getAbsolutePath() + "'");
+			}
+		}
+	}
+
+	/*
+	 * All methods are static. There should be no instances of this class.
+	 */
+	private FileUtils() {
+		// empty
+	}
+
+}
diff --git a/src/org/yooreeka/util/internet/crawling/util/UrlGroup.java b/src/org/yooreeka/util/internet/crawling/util/UrlGroup.java
new file mode 100644
index 0000000..147db6e
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/util/UrlGroup.java
@@ -0,0 +1,71 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Group of URLs for specific host and protocol.
+ */
+public class UrlGroup {
+	private String protocol;
+	private String host;
+	private List<String> urls;
+
+	public UrlGroup(String protocol, String host) {
+		this.protocol = protocol;
+		this.host = host;
+		this.urls = new ArrayList<String>();
+	}
+
+	public void addUrl(String url) {
+		urls.add(url);
+	}
+
+	public String getHost() {
+		return host;
+	}
+
+	public String getProtocol() {
+		return protocol;
+	}
+
+	public List<String> getUrls() {
+		return urls;
+	}
+
+	@Override
+	public String toString() {
+		return "[protocol: " + protocol + ", host: " + host + ", url count: "
+				+ urls.size() + "]";
+	}
+}
diff --git a/src/org/yooreeka/util/internet/crawling/util/UrlUtils.java b/src/org/yooreeka/util/internet/crawling/util/UrlUtils.java
new file mode 100644
index 0000000..cca56e2
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/util/UrlUtils.java
@@ -0,0 +1,65 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.util;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class UrlUtils {
+
+	public static List<UrlGroup> groupByProtocolAndHost(List<String> urls) {
+		Map<String, UrlGroup> allGroups = new HashMap<String, UrlGroup>();
+		for (String url : urls) {
+			URL u = null;
+			try {
+				u = new URL(url);
+				String protocol = u.getProtocol();
+				String host = u.getHost();
+				String key = protocol + "|" + host;
+				UrlGroup urlGroup = allGroups.get(key);
+				if (urlGroup == null) {
+					urlGroup = new UrlGroup(protocol, host);
+					allGroups.put(key, urlGroup);
+				}
+				urlGroup.addUrl(url);
+			} catch (MalformedURLException e) {
+				throw new RuntimeException("Invalid url format url: '" + url
+						+ "': ", e);
+			}
+		}
+		return new ArrayList<UrlGroup>(allGroups.values());
+	}
+
+}
diff --git a/src/org/yooreeka/util/internet/crawling/util/ValueToIndexMapping.java b/src/org/yooreeka/util/internet/crawling/util/ValueToIndexMapping.java
new file mode 100644
index 0000000..c10050d
--- /dev/null
+++ b/src/org/yooreeka/util/internet/crawling/util/ValueToIndexMapping.java
@@ -0,0 +1,93 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.internet.crawling.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Maps string values to an index. This class is used for mapping strings to
+ * arrays or matrices. Index is zero-based.
+ */
+public class ValueToIndexMapping implements java.io.Serializable {
+	/**
+	 * Unique identifier for serialization
+	 */
+	private static final long serialVersionUID = -2077767183898369580L;
+
+	/*
+	 * Index value that will be returned for the next new string value.
+	 */
+	private int nextIndex = 0;
+
+	/*
+	 * Maintains mapping from value to index.
+	 */
+	private Map<String, Integer> valueMapping = new HashMap<String, Integer>();
+
+	/*
+	 * Maintains mapping from index to value.
+	 */
+	private Map<Integer, String> indexMapping = new HashMap<Integer, String>();
+
+	public ValueToIndexMapping() {
+		// empty
+	}
+
+	/**
+	 * Returns index assigned to the value. For new values new index will be
+	 * assigned and returned.
+	 */
+	public int getIndex(String value) {
+		Integer index = valueMapping.get(value);
+		if (index == null) {
+			index = nextIndex;
+			valueMapping.put(value, index);
+			indexMapping.put(index, value);
+			nextIndex++;
+		}
+		return index;
+	}
+
+	/**
+	 * Current number of elements.
+	 */
+	public int getSize() {
+		return valueMapping.size();
+	}
+
+	/**
+	 * Returns value mapped to the index or null if mapping doesn't exist.
+	 */
+	public String getValue(int index) {
+		return indexMapping.get(index);
+	}
+}
diff --git a/src/org/yooreeka/util/metrics/CosineDistance.java b/src/org/yooreeka/util/metrics/CosineDistance.java
new file mode 100644
index 0000000..d1db320
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/CosineDistance.java
@@ -0,0 +1,58 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+import java.util.Arrays;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ *
+ */
+public class CosineDistance implements NumericDistance {
+
+	private CosineSimilarity cosin = new CosineSimilarity();
+
+	public double getDistance(double[] x, double[] y) {
+
+		double sim = cosin.sim(x, y);
+
+		if (sim < 0.0) {
+			throw new RuntimeException(
+					"Can't use this value to calculate distance." + "x[]="
+							+ Arrays.toString(x) + ", y[]="
+							+ Arrays.toString(y) + ", cosin.sim(x,y)=" + sim);
+		}
+
+		return 1.0 - sim;
+	}
+
+}
diff --git a/src/org/yooreeka/util/metrics/CosineSimilarity.java b/src/org/yooreeka/util/metrics/CosineSimilarity.java
new file mode 100644
index 0000000..4c42533
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/CosineSimilarity.java
@@ -0,0 +1,76 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+public class CosineSimilarity implements SimilarityMeasure {
+
+	private static final long serialVersionUID = -3470234210362615980L;
+
+	private double getDotProduct(double[] v1, double[] v2) {
+		double sum = 0.0;
+		for (int i = 0, n = v1.length; i < n; i++) {
+			sum += v1[i] * v2[i];
+		}
+		return sum;
+	}
+
+	private double getNorm(double[] v) {
+		double sum = 0.0;
+		for (int i = 0, n = v.length; i < n; i++) {
+			sum += v[i] * v[i];
+		}
+		return Math.sqrt(sum);
+	}
+
+	public double sim(double[] v1, double[] v2) {
+		double a = getDotProduct(v1, v2);
+		double b = getNorm(v1) * getNorm(v2);
+		return a / b;
+	}
+
+	/**
+	 * Calculates cosine similarity between two sets of terms by converting them
+	 * into term frequency vectors. It should be clear that, unlike numerical
+	 * vectors, the definition of this similarity is to a large extent
+	 * arbitrary.
+	 */
+	public double similarity(String[] x, String[] y) {
+
+		double[][] termFrequencyVectors = TermFrequencyBuilder
+				.buildTermFrequencyVectors(x, y);
+
+		double[] termFrequencyForX = termFrequencyVectors[0];
+		double[] termFrequencyForY = termFrequencyVectors[1];
+
+		return sim(termFrequencyForX, termFrequencyForY);
+	}
+
+}
diff --git a/src/org/yooreeka/util/metrics/CosineSimilarityMeasure.java b/src/org/yooreeka/util/metrics/CosineSimilarityMeasure.java
new file mode 100644
index 0000000..874fe1f
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/CosineSimilarityMeasure.java
@@ -0,0 +1,56 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+public class CosineSimilarityMeasure {
+
+	public double calculate(double[] v1, double[] v2) {
+		double a = getDotProduct(v1, v2);
+		double b = getNorm(v1) * getNorm(v2);
+		return a / b;
+	}
+
+	private double getDotProduct(double[] v1, double[] v2) {
+		double sum = 0.0;
+		for (int i = 0, n = v1.length; i < n; i++) {
+			sum += v1[i] * v2[i];
+		}
+		return sum;
+	}
+
+	private double getNorm(double[] v) {
+		double sum = 0.0;
+		for (int i = 0, n = v.length; i < n; i++) {
+			sum += v[i] * v[i];
+		}
+		return Math.sqrt(sum);
+	}
+}
diff --git a/src/org/yooreeka/util/metrics/EuclideanDistance.java b/src/org/yooreeka/util/metrics/EuclideanDistance.java
new file mode 100644
index 0000000..b0e245b
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/EuclideanDistance.java
@@ -0,0 +1,55 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+public class EuclideanDistance implements NumericDistance {
+
+	public EuclideanDistance() {
+		// empty
+	}
+
+	public double getDistance(double[] x, double[] y) {
+		double sumXY2 = 0.0;
+		for (int i = 0, n = x.length; i < n; i++) {
+			sumXY2 += Math.pow(x[i] - y[i], 2);
+		}
+		return Math.sqrt(sumXY2);
+	}
+
+	public double getDistance(Double[] x, Double[] y) {
+		double sumXY2 = 0.0;
+		for (int i = 0, n = x.length; i < n; i++) {
+			sumXY2 += Math.pow(x[i] - y[i], 2);
+		}
+		return Math.sqrt(sumXY2);
+	}
+
+}
diff --git a/src/org/yooreeka/util/metrics/JaccardCoefficient.java b/src/org/yooreeka/util/metrics/JaccardCoefficient.java
new file mode 100644
index 0000000..7a49a3f
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/JaccardCoefficient.java
@@ -0,0 +1,77 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Calculates Jaccard coefficient for two sets of items.
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ */
+public class JaccardCoefficient implements SimilarityMeasure {
+
+	private static final long serialVersionUID = -5051498381470492495L;
+
+	public JaccardCoefficient() {
+		// empty
+	}
+
+	public double similarity(List<String> x, List<String> y) {
+
+		if (x.size() == 0 || y.size() == 0) {
+			return 0.0;
+		}
+
+		Set<String> unionXY = new HashSet<String>(x);
+		unionXY.addAll(y);
+
+		Set<String> intersectionXY = new HashSet<String>(x);
+		intersectionXY.retainAll(y);
+
+		return (double) intersectionXY.size() / (double) unionXY.size();
+	}
+	
+	public double similarity(String[] x, String[] y) {
+		double sim = 0.0d;
+		if ((x != null && y != null) && (x.length > 0 || y.length > 0)) {
+			sim = similarity(Arrays.asList(x), Arrays.asList(y));
+		} else {
+			throw new IllegalArgumentException(
+					"The arguments x and y must be not NULL and either x or y must be non-empty.");
+		}
+		return sim;
+	}
+
+}
diff --git a/src/org/yooreeka/util/metrics/JaccardDistance.java b/src/org/yooreeka/util/metrics/JaccardDistance.java
new file mode 100644
index 0000000..6da33a6
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/JaccardDistance.java
@@ -0,0 +1,57 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009    Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-2012 Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *
+ */
+package org.yooreeka.util.metrics;
+
+import java.util.List;
+
+import org.yooreeka.util.C;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ *
+ */
+public class JaccardDistance {
+
+	JaccardCoefficient jc;
+	
+	public JaccardDistance() {
+		jc = new JaccardCoefficient();
+	}
+	
+	public double getDistance(List<String> x, List<String> y) {
+		
+		double s = jc.similarity(x, y);
+		
+		return (C.ONE_DOUBLE-s);
+	}
+
+}
diff --git a/src/org/yooreeka/util/metrics/NumericDistance.java b/src/org/yooreeka/util/metrics/NumericDistance.java
new file mode 100644
index 0000000..7d5218f
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/NumericDistance.java
@@ -0,0 +1,40 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ *
+ */
+public interface NumericDistance {
+	double getDistance(double[] x, double[] y);
+}
diff --git a/src/org/yooreeka/util/metrics/SimilarityMeasure.java b/src/org/yooreeka/util/metrics/SimilarityMeasure.java
new file mode 100644
index 0000000..d00a032
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/SimilarityMeasure.java
@@ -0,0 +1,43 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+/**
+ * Interface for similarity measures.
+ */
+public interface SimilarityMeasure extends java.io.Serializable {
+
+	/**
+	 * Calculates similarity value between two sets. Each set is represented by
+	 * array of strings. Arrays can have different length.
+	 */
+	public double similarity(String[] x, String[] y);
+}
diff --git a/src/org/yooreeka/util/metrics/TermFrequencyBuilder.java b/src/org/yooreeka/util/metrics/TermFrequencyBuilder.java
new file mode 100644
index 0000000..a4c5dca
--- /dev/null
+++ b/src/org/yooreeka/util/metrics/TermFrequencyBuilder.java
@@ -0,0 +1,78 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.metrics;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class TermFrequencyBuilder {
+
+	/**
+	 * Calculates term frequency vectors based on two sets of terms.
+	 */
+	public static double[][] buildTermFrequencyVectors(String[] x, String[] y) {
+
+		// create a set of terms with flags
+		Map<String, Integer> allAttributes = new HashMap<String, Integer>();
+		for (String s : x) {
+			// set flags to indicate that this term is present only in x[]
+			allAttributes.put(s, 0x01);
+		}
+		for (String s : y) {
+			if (!allAttributes.containsKey(s)) {
+				// set flags to indicate that this term is present only in y[]
+				allAttributes.put(s, 0x02);
+			} else {
+				// set flags to indicate that this term is present in x[] and
+				// y[]
+				allAttributes.put(s, 0x03);
+			}
+		}
+
+		// create term frequency vectors
+		int n = allAttributes.size();
+		double[] termFrequencyForX = new double[n];
+		double[] termFrequencyForY = new double[n];
+		int i = 0;
+		for (Map.Entry<String, Integer> e : allAttributes.entrySet()) {
+			// 0x01 - x[] only ,
+			// 0x02 - y[] only,
+			// 0x03 - x[] and y[]
+			int flags = e.getValue();
+			termFrequencyForX[i] = flags & 0x01;
+			termFrequencyForY[i] = flags >> 1;
+			i++;
+		}
+
+		return new double[][] { termFrequencyForX, termFrequencyForY };
+	}
+
+}
diff --git a/src/org/yooreeka/util/parsing/common/AbstractDocument.java b/src/org/yooreeka/util/parsing/common/AbstractDocument.java
new file mode 100644
index 0000000..d9c6553
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/AbstractDocument.java
@@ -0,0 +1,48 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public interface AbstractDocument {
+
+	public String getContentCharset();
+
+	public String getContentType();
+
+	public byte[] getDocumentContent();
+
+	public String getDocumentId();
+
+	public String getDocumentURL();
+}
diff --git a/src/org/yooreeka/util/parsing/common/DataEntry.java b/src/org/yooreeka/util/parsing/common/DataEntry.java
new file mode 100644
index 0000000..c313dd5
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/DataEntry.java
@@ -0,0 +1,40 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+abstract public class DataEntry {
+
+	abstract public DataEntry getDataEntry();
+}
diff --git a/src/org/yooreeka/util/parsing/common/DataField.java b/src/org/yooreeka/util/parsing/common/DataField.java
new file mode 100644
index 0000000..b6fde6d
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/DataField.java
@@ -0,0 +1,68 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class DataField {
+
+	private String name;
+	private DataType dataType;
+
+	public DataField(String name, DataType dataType) {
+		this.name = name;
+		this.dataType = dataType;
+	}
+
+	public DataType getDataType() {
+		return dataType;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public void setDataType(DataType dataType) {
+		this.dataType = dataType;
+	}
+
+	public void setName(String name) {
+		this.name = name;
+	}
+
+	public boolean validate(String s) {
+		boolean isValid = true;
+
+		return isValid;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/common/DataType.java b/src/org/yooreeka/util/parsing/common/DataType.java
new file mode 100644
index 0000000..67abe91
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/DataType.java
@@ -0,0 +1,40 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public enum DataType {
+
+	INTEGER, LONG, FLOAT, DOUBLE, STRING, DATE
+}
diff --git a/src/org/yooreeka/util/parsing/common/DocumentParser.java b/src/org/yooreeka/util/parsing/common/DocumentParser.java
new file mode 100644
index 0000000..eae3a1e
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/DocumentParser.java
@@ -0,0 +1,44 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+/**
+ * Interface for parsing document that was retrieved/fetched during collection
+ * phase.
+ */
+public interface DocumentParser {
+
+	public DataEntry getDataEntry(int i);
+
+	public ProcessedDocument parse(AbstractDocument doc)
+			throws DocumentParserException;
+
+}
diff --git a/src/org/yooreeka/util/parsing/common/DocumentParserException.java b/src/org/yooreeka/util/parsing/common/DocumentParserException.java
new file mode 100644
index 0000000..533aac2
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/DocumentParserException.java
@@ -0,0 +1,45 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+public class DocumentParserException extends Exception {
+
+	// Distinct SVUID for the org.yooreeka.* classes
+	private static final long serialVersionUID = 4938858042489090351L;
+
+	public DocumentParserException(String msg) {
+		super(msg);
+	}
+
+	public DocumentParserException(String msg, Throwable t) {
+		super(msg, t);
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/common/DocumentParserFactory.java b/src/org/yooreeka/util/parsing/common/DocumentParserFactory.java
new file mode 100644
index 0000000..9103e24
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/DocumentParserFactory.java
@@ -0,0 +1,68 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+import org.yooreeka.util.parsing.html.HTMLDocumentParser;
+import org.yooreeka.util.parsing.msword.MSWordDocumentParser;
+
+public class DocumentParserFactory {
+
+	private static DocumentParserFactory instance = new DocumentParserFactory();
+
+	public static DocumentParserFactory getInstance() {
+		return instance;
+	}
+
+	private DocumentParserFactory() {
+		// empty
+	}
+
+	/**
+	 * Returns an instance of the <code>DocumentParser</code> based on the
+	 * document type.
+	 * 
+	 * @param type
+	 *            document type.
+	 * @return
+	 * @throws DocumentParserException
+	 */
+	public DocumentParser getDocumentParser(String type)
+			throws DocumentParserException {
+		if (ProcessedDocument.TYPE_HTML.equalsIgnoreCase(type)) {
+			return new HTMLDocumentParser();
+		} else if (ProcessedDocument.TYPE_MSWORD.equalsIgnoreCase(type)) {
+			return new MSWordDocumentParser();
+		} else {
+			throw new DocumentParserException("Unsupported document type: '"
+					+ type + "'.");
+		}
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/common/ProcessedDocument.java b/src/org/yooreeka/util/parsing/common/ProcessedDocument.java
new file mode 100644
index 0000000..6dbe7b2
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/common/ProcessedDocument.java
@@ -0,0 +1,198 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.common;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.mozilla.universalchardet.UniversalDetector;
+import org.yooreeka.util.P;
+import org.yooreeka.util.internet.crawling.model.Outlink;
+
+/**
+ * Represents Processed document with attributes that we are interested in.
+ */
+public class ProcessedDocument implements AbstractDocument {
+
+	public static final String TYPE_TEXT = "text/plain";
+	public static final String TYPE_HTML = "text/html";
+	public static final String TYPE_MSWORD = "application/msword";
+
+	/*
+	 * Unique document id.
+	 */
+	private String documentId;
+
+	/*
+	 * All document outlinks (links that document has to other documents).
+	 */
+	private List<Outlink> outlinks = new ArrayList<Outlink>();
+
+	/*
+	 * URL that was used to retrieve the document.
+	 */
+	private String documentURL;
+
+	/*
+	 * Document title.
+	 */
+	private String title;
+
+	/*
+	 * Processed document content. In case of HTML doc it can be HTML with only
+	 * relevant tags (<P>, <B>,..) preserved.
+	 */
+	private String content;
+
+	/*
+	 * Text extracted from the document with all formatting removed.
+	 */
+	private String text;
+
+	/*
+	 * Document type.
+	 */
+	private String documentType;
+
+	public ProcessedDocument() {
+	}
+
+	public String getContent() {
+		return this.content;
+	}
+
+	@Override
+	public String getContentCharset() {
+		byte[] buf = new byte[4096];
+
+		ByteArrayInputStream fis = new ByteArrayInputStream(getContent()
+				.getBytes());
+
+		// (1)
+		UniversalDetector detector = new UniversalDetector(null);
+
+		// (2)
+		int nread;
+		try {
+			while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
+				detector.handleData(buf, 0, nread);
+			}
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		// (3)
+		detector.dataEnd();
+
+		// (4)
+		String encoding = detector.getDetectedCharset();
+		if (encoding != null) {
+			P.println("Detected encoding = " + encoding);
+		} else {
+			P.println("No encoding detected.");
+		}
+
+		// (5)
+		detector.reset();
+		return encoding;
+	}
+
+	@Override
+	public String getContentType() {
+		return getDocumentType();
+	}
+
+	@Override
+	public byte[] getDocumentContent() {
+		return getContent().getBytes(Charset.forName(getContentCharset()));
+	}
+
+	public String getDocumentId() {
+		return documentId;
+	}
+
+	public String getDocumentTitle() {
+		return this.title;
+	}
+
+	public String getDocumentType() {
+		return documentType;
+	}
+
+	public String getDocumentURL() {
+		return documentURL;
+	}
+
+	public List<Outlink> getOutlinks() {
+		return outlinks;
+	}
+
+	public String getText() {
+		return text;
+	}
+
+	public void setContent(String content) {
+		this.content = content;
+	}
+
+	public void setDocumentId(String docId) {
+		this.documentId = docId;
+	}
+
+	public void setDocumentTitle(String title) {
+		this.title = title;
+	}
+
+	public void setDocumentType(String docType) {
+		this.documentType = docType;
+	}
+
+	public void setDocumentURL(String documentURL) {
+		this.documentURL = documentURL;
+	}
+
+	public void setOutlinks(List<Outlink> outlinks) {
+		this.outlinks = outlinks;
+	}
+
+	public void setText(String text) {
+		this.text = text;
+	}
+
+	@Override
+	public String toString() {
+		return "[docId: " + documentId + ", type: " + documentType + ", url: "
+				+ documentURL + "]";
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/csv/CSVDocument.java b/src/org/yooreeka/util/parsing/csv/CSVDocument.java
new file mode 100644
index 0000000..31f3006
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/csv/CSVDocument.java
@@ -0,0 +1,93 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.csv;
+
+import java.util.ArrayList;
+
+import org.yooreeka.util.P;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+/**
+ * A <tt>CSVDocument</tt> is an <tt>ArrayList</tt> of <tt>CSVEntry</tt>s
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class CSVDocument extends ProcessedDocument {
+
+	private CSVEntry headers;
+	private ArrayList<CSVEntry> csvData;
+	private boolean hasHeaders;
+
+	public CSVDocument() {
+		csvData = new ArrayList<CSVEntry>();
+	}
+	
+	public CSVDocument(ArrayList<CSVEntry> data) {
+		csvData = data;
+	}
+
+	public CSVEntry getHeaders() {
+		return headers;
+	}
+	
+	public boolean hasHeaders() {
+		return	hasHeaders;
+	}
+	
+	public void hasHeaders(boolean val) {
+		hasHeaders = val;
+	}
+
+	/**
+	 * @return the csvData
+	 */
+	public ArrayList<CSVEntry> getCsvData() {
+		return csvData;
+	}
+	
+	public void print(String printSeparator) {
+		P.hline();
+		P.println(getHeaders().toString(printSeparator));
+		P.hline();
+		for (CSVEntry e : csvData) {
+			P.println(e.toString(printSeparator));
+		}
+		P.hline();
+	}
+
+	/**
+	 * @param headers the headers to set
+	 */
+	public void setHeaders(CSVEntry headers) {
+		this.headers = headers;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/csv/CSVEntry.java b/src/org/yooreeka/util/parsing/csv/CSVEntry.java
new file mode 100644
index 0000000..7578f38
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/csv/CSVEntry.java
@@ -0,0 +1,108 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.csv;
+
+import org.yooreeka.util.parsing.common.DataEntry;
+
+/**
+ * A <tt>CSVEntry</tt> is simply an array of <tt>String</tt>s. The default
+ * separator is the comma character, i.e. ",".
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class CSVEntry extends DataEntry {
+
+	public static final String DEFAULT_SEPARATOR = ",";
+	private String separator;
+	
+	private String[] data;
+
+	public CSVEntry(String csvLine) {
+		this(csvLine,null);
+	}
+	
+	public CSVEntry(String csvLine, String sepChar) {
+
+		if (sepChar == null) {
+			setSeparator(CSVEntry.DEFAULT_SEPARATOR);
+		} else {
+			setSeparator(sepChar);
+		}
+		
+		data = csvLine.split(getSeparator());
+	}
+
+	public String[] getData() {
+		return data;
+	}
+
+	@Override
+	public DataEntry getDataEntry() {
+
+		return this;
+	}
+
+	@Override
+	public String toString() {
+
+		return toString(CSVEntry.DEFAULT_SEPARATOR);
+	}
+
+	public String toString(String printSeparator) {
+		StringBuilder sb = new StringBuilder();
+		int i=1;
+
+		for (String s : data) {
+			if (i<data.length) {
+				sb.append(s).append(printSeparator);
+			} else {
+				sb.append(s);
+			}
+			i++;
+		}
+		return sb.toString();
+	}
+	
+	/**
+	 * @return the separatorChar
+	 */
+	public String getSeparator() {
+		return separator;
+	}
+
+	/**
+	 * @param separatorChar the separatorChar to set
+	 */
+	public void setSeparator(String separatorChar) {
+		this.separator = separatorChar;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/csv/CSVFile.java b/src/org/yooreeka/util/parsing/csv/CSVFile.java
new file mode 100644
index 0000000..833c472
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/csv/CSVFile.java
@@ -0,0 +1,149 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.csv;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+import org.yooreeka.util.parsing.common.DataField;
+import org.yooreeka.util.parsing.common.DataType;
+
+/**
+ * 
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class CSVFile {
+
+	private File file;
+
+	private String separator;
+
+	private CSVDocument doc;
+	
+	// Whether a CSV file has Headers
+	private boolean hasHeaders;
+
+	public CSVFile(String fileName, boolean hasHeaders, CSVSchema schema) {
+
+		this.hasHeaders = hasHeaders;
+
+		file = new File(fileName);
+	}
+
+	public CSVEntry getHeaders() {
+
+		CSVEntry e = null;
+
+		if (doc.hasHeaders()) {
+			e = doc.getHeaders();
+		}
+
+		return e;
+	}
+
+	public boolean hasHeaders() {
+		return hasHeaders;
+	}
+
+	public CSVDocument read() throws IOException {
+
+		FileReader fReader = new FileReader(file);
+		BufferedReader bReader = new BufferedReader(fReader);
+
+		CSVParser csvParser = new CSVParser(this);
+		doc = csvParser.parse(bReader);
+
+		bReader.close();
+		
+		return doc;
+	}
+
+	/**
+	 * @return the doc
+	 */
+	public CSVDocument getDoc() {
+		return doc;
+	}
+
+	/**
+	 * @param args
+	 * @throws IOException
+	 */
+	public static void main(String[] args) throws IOException {
+		CSVSchema s = new CSVSchema();
+
+		DataField f1 = new DataField("Customer Id", DataType.LONG);
+		s.addColumn(f1);
+
+		DataField f2 = new DataField("Customer Status", DataType.STRING);
+		s.addColumn(f2);
+
+		DataField f3 = new DataField("Total Order  amt, USD", DataType.DOUBLE);
+		s.addColumn(f3);
+
+		DataField f4 = new DataField("Content Id", DataType.STRING);
+		s.addColumn(f4);
+
+		DataField f5 = new DataField("Title/Journal Id", DataType.LONG);
+		s.addColumn(f5);
+
+		DataField f6 = new DataField("Title/Journal Name", DataType.STRING);
+		s.addColumn(f6);
+
+		DataField f7 = new DataField("Title/Journal Publisher", DataType.STRING);
+		s.addColumn(f7);
+
+		// s.addColumn(DataType.STRING_DATA_TYPE);
+		// s.addColumn(DataType.DOUBLE_DATA_TYPE);
+		// s.addColumn(DataType.STRING_DATA_TYPE);
+
+		CSVFile f = new CSVFile(args[0], true, s);
+		f.read();
+	}
+
+	/**
+	 * @return the separatorChar
+	 */
+	public String getSeparator() {
+		return separator;
+	}
+
+	/**
+	 * @param separatorChar the separatorChar to set
+	 */
+	public void setSeparator(String val) {
+		separator = val;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/csv/CSVParser.java b/src/org/yooreeka/util/parsing/csv/CSVParser.java
new file mode 100644
index 0000000..6808a14
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/csv/CSVParser.java
@@ -0,0 +1,135 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.csv;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.charset.Charset;
+
+import org.yooreeka.util.parsing.common.AbstractDocument;
+import org.yooreeka.util.parsing.common.DataEntry;
+import org.yooreeka.util.parsing.common.DocumentParser;
+import org.yooreeka.util.parsing.common.DocumentParserException;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class CSVParser implements DocumentParser {
+
+	/**
+	 * 
+	 */
+	private CSVDocument d;
+
+	private CSVFile csvFile;
+	
+	private long linesParsed = 0;
+
+	/**
+	 * 
+	 */
+	public CSVParser(CSVFile f) {
+		this.csvFile = f;
+	}
+
+	@Override
+	public DataEntry getDataEntry(int i) {
+		return d.getCsvData().get(i);
+	}
+
+	public long getLinesParsed() {
+		return linesParsed;
+	}
+
+	@Override
+	public ProcessedDocument parse(AbstractDocument abstractDocument)
+			throws DocumentParserException {
+		ProcessedDocument processedDocument = null;
+		String content = new String(abstractDocument.getDocumentContent(),
+				Charset.forName(abstractDocument.getContentCharset()));
+		BufferedReader reader = new BufferedReader(new StringReader(content));
+		try {
+			abstractDocument = parse(reader);
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+		return processedDocument;
+	}
+
+	/**
+	 * 
+	 * @param bR
+	 * @return
+	 * @throws IOException
+	 */
+	public CSVDocument parse(BufferedReader bR) throws IOException {
+
+		d = new CSVDocument();
+
+		linesParsed = 0;
+
+		boolean hasMoreLines = true;
+		String line;
+
+		while (hasMoreLines) {
+
+			line = bR.readLine();
+
+			if (line == null) {
+
+				hasMoreLines = false;
+
+			} else {
+
+				CSVEntry csvEntry = new CSVEntry(line, getSeparator());
+				if (linesParsed == 0) {
+					d.setHeaders(csvEntry); 
+				} else {
+					d.getCsvData().add(csvEntry);					
+				}
+				linesParsed++;
+			}
+		}
+
+		return d;
+	}
+
+	/**
+	 * @return the separator
+	 */
+	public String getSeparator() {
+		return csvFile.getSeparator();
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/csv/CSVSchema.java b/src/org/yooreeka/util/parsing/csv/CSVSchema.java
new file mode 100644
index 0000000..b38042d
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/csv/CSVSchema.java
@@ -0,0 +1,58 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.csv;
+
+import java.util.HashMap;
+
+import org.yooreeka.util.parsing.common.DataField;
+
+/**
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class CSVSchema {
+
+	private int column = 0;
+	private HashMap<Integer, DataField> columnMap;
+
+	public CSVSchema() {
+		columnMap = new HashMap<>();
+	}
+
+	public void addColumn(DataField field) {
+		columnMap.put(column, field);
+		column++;
+	}
+
+	public int getNumberOfColumns() {
+		return columnMap.size();
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/html/CompositeFilter.java b/src/org/yooreeka/util/parsing/html/CompositeFilter.java
new file mode 100644
index 0000000..0382168
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/html/CompositeFilter.java
@@ -0,0 +1,64 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.html;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.w3c.dom.Node;
+import org.w3c.dom.traversal.NodeFilter;
+
+/*
+ * Combines multiple filters into one using OR logic.
+ */
+class CompositeFilter implements NodeFilter {
+
+	List<NodeFilter> acceptFilters = new ArrayList<NodeFilter>();
+
+	public CompositeFilter() {
+	}
+
+	public short acceptNode(Node n) {
+		short result = NodeFilter.FILTER_SKIP;
+		for (NodeFilter f : acceptFilters) {
+			result = f.acceptNode(n);
+			if (result == NodeFilter.FILTER_ACCEPT) {
+				break;
+			}
+		}
+		return result;
+	}
+
+	public void addAcceptFilter(NodeFilter nestedFilter) {
+		acceptFilters.add(nestedFilter);
+	}
+
+}
diff --git a/src/org/yooreeka/util/parsing/html/ElementNodeFilter.java b/src/org/yooreeka/util/parsing/html/ElementNodeFilter.java
new file mode 100644
index 0000000..d308823
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/html/ElementNodeFilter.java
@@ -0,0 +1,61 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.html;
+
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.traversal.NodeFilter;
+
+/**
+ * Filter for nodes that are elements with specified name and attribute.
+ */
+class ElementNodeFilter implements NodeFilter {
+	private String elementName = null;
+	private String attributeName = null;
+
+	public ElementNodeFilter(String elementName, String attributeName) {
+		this.elementName = elementName;
+		this.attributeName = attributeName;
+	}
+
+	public short acceptNode(Node n) {
+		short result = FILTER_SKIP;
+		if (Node.ELEMENT_NODE == n.getNodeType()) {
+			Element e = (Element) n;
+			if (e.getNodeName().equalsIgnoreCase(elementName)) {
+				if (e.getAttributeNode(attributeName) != null) {
+					result = FILTER_ACCEPT;
+				}
+			}
+		}
+		return result;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/html/HTMLDocumentParser.java b/src/org/yooreeka/util/parsing/html/HTMLDocumentParser.java
new file mode 100644
index 0000000..d7e39e6
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/html/HTMLDocumentParser.java
@@ -0,0 +1,457 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.html;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.xerces.xni.parser.XMLDocumentFilter;
+import org.cyberneko.html.filters.ElementRemover;
+import org.cyberneko.html.parsers.DOMParser;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.traversal.DocumentTraversal;
+import org.w3c.dom.traversal.NodeFilter;
+import org.w3c.dom.traversal.NodeIterator;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.yooreeka.util.P;
+import org.yooreeka.util.internet.crawling.model.Outlink;
+import org.yooreeka.util.parsing.common.AbstractDocument;
+import org.yooreeka.util.parsing.common.DataEntry;
+import org.yooreeka.util.parsing.common.DocumentParser;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+/**
+ * Parser for HTML documents.
+ */
+public class HTMLDocumentParser implements DocumentParser {
+
+	ProcessedDocument htmlDoc;
+
+	public HTMLDocumentParser() {
+		// NOTHING YET
+	}
+
+	public HTMLDocumentParser(Reader reader) throws HTMLDocumentParserException {
+		HTMLDocumentParser p = new HTMLDocumentParser();
+		htmlDoc = p.parse(reader);
+	}
+
+	/*
+	 * Builds absolute URL. For relative URLs will use source document URL and
+	 * base URL.
+	 */
+	private String buildUrl(String href, String baseUrl, String documentUrl) {
+
+		String url = null;
+
+		String protocol = extractProtocol(href);
+
+		if (protocol != null) {
+			url = href;
+		} else if (baseUrl != null) {
+			url = baseUrl + href;
+		} else if (href.startsWith("/")) {
+			try {
+				URL docUrl = new URL(documentUrl);
+				if (docUrl.getPort() == -1) {
+					url = docUrl.getProtocol() + "://" + docUrl.getHost()
+							+ href;
+				} else {
+					url = docUrl.getProtocol() + "://" + docUrl.getHost() + ":"
+							+ docUrl.getPort() + href;
+				}
+			} catch (MalformedURLException e) {
+				url = null;
+			}
+		} else {
+			url = extractParent(documentUrl) + href;
+		}
+
+		return url;
+	}
+
+	private String cleanText(String text) {
+		if (text == null) {
+			return null;
+		}
+		String t = text.replaceAll("[ \t]+", " ");
+		t = t.replaceAll("[ \t][\r\n]", "\n");
+		t = t.replaceAll("[\r\n]+", "\n");
+		return t;
+	}
+
+	private List<Outlink> extractLinks(Node node, String docUrl, String baseUrl) {
+		if (isNoFollowForDocument(node)) {
+			return new ArrayList<Outlink>();
+		}
+
+		org.w3c.dom.Document doc = getDocumentNode(node);
+		DocumentTraversal traversableDoc = (DocumentTraversal) doc;
+		NodeFilter linkFilter = getLinkNodeFilter();
+		NodeIterator iterator = traversableDoc.createNodeIterator(node,
+				NodeFilter.SHOW_ELEMENT, linkFilter, true);
+		Node currentNode = null;
+
+		List<Outlink> outlinks = new ArrayList<Outlink>();
+
+		while ((currentNode = iterator.nextNode()) != null) {
+			String href = currentNode.getAttributes().getNamedItem("href")
+					.getNodeValue();
+			boolean nofollow = isNoFollowPresent(currentNode);
+			if (nofollow == false) {
+				if ("BASE".equalsIgnoreCase(node.getNodeName())) {
+					// ignore this link
+				} else {
+					String url = buildUrl(href, baseUrl, docUrl);
+					if (url != null) {
+						String anchorText = getAnchorText(currentNode);
+						Outlink link = new Outlink(url, anchorText);
+						outlinks.add(link);
+					}
+				}
+			}
+		}
+
+		return outlinks;
+	}
+
+	private String extractParent(String url) {
+		String parent = url;
+		int i = url.lastIndexOf("/");
+		if (i > -1) {
+			parent = url.substring(0, i + "/".length());
+		}
+		return parent;
+	}
+
+	/*
+	 * Extracts url protocol if present. Handles two cases:
+	 * 
+	 * 1. "<protocol>://<host>" 2. "mailto:<email address>"
+	 */
+	private String extractProtocol(String url) {
+		String protocol = null;
+		if (url.startsWith("mailto:")) {
+			protocol = "mailto";
+		} else {
+			int i = url.indexOf("://");
+			if (i > -1) {
+				protocol = url.substring(0, i);
+			}
+		}
+		return protocol;
+	}
+
+	private String getAnchorText(Node currentNode) {
+		String text = getText(currentNode);
+		String cleanText = null;
+		if (text != null) {
+			cleanText = text.replaceAll("[\r\n\t]", " ").trim();
+		}
+		return cleanText;
+	}
+
+	private String getBaseUrl(Node node) {
+		if (node == null) {
+			return null;
+		}
+		org.w3c.dom.Document doc = getDocumentNode(node);
+		NodeList nodeList = doc.getElementsByTagName("base");
+		Node baseNode = nodeList.item(0);
+		if (baseNode != null) {
+			NamedNodeMap attrs = baseNode.getAttributes();
+			if (attrs != null) {
+				Node href = attrs.getNamedItem("href");
+				if (href != null) {
+					return href.getNodeValue();
+				}
+			}
+		}
+		return null;
+	}
+
+	@Override
+	public DataEntry getDataEntry(int i) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	private org.w3c.dom.Document getDocumentNode(Node node) {
+		if (node == null) {
+			return null;
+		}
+
+		if (Node.DOCUMENT_NODE == node.getNodeType()) {
+			return (org.w3c.dom.Document) node;
+		} else {
+			return node.getOwnerDocument();
+		}
+	}
+
+	public ProcessedDocument getHtmlDoc() {
+		return htmlDoc;
+	}
+
+	private NodeFilter getLinkNodeFilter() {
+		CompositeFilter linkFilter = new CompositeFilter();
+		// For now doing the simplest thing possible - only consider <A>
+		// elements
+		linkFilter.addAcceptFilter(new ElementNodeFilter("a", "href"));
+		/*
+		 * Other elements to consider:
+		 * 
+		 * linkFilter.addAcceptFilter(new LinkNodeFilter("frame", "src"));
+		 * linkFilter.addAcceptFilter(new LinkNodeFilter("link", "href"));
+		 */
+		return linkFilter;
+	}
+
+	private String getRobotsMeta(Node node) {
+		if (node == null) {
+			return null;
+		}
+		org.w3c.dom.Document doc = getDocumentNode(node);
+		NodeList nodeList = doc.getElementsByTagName("meta");
+		if (nodeList != null) {
+			for (int i = 0, n = nodeList.getLength(); i < n; i++) {
+				Node currentNode = nodeList.item(i);
+				NamedNodeMap attrs = currentNode.getAttributes();
+				if (attrs != null) {
+					Node contentNode = attrs.getNamedItem("content");
+					Node nameNode = attrs.getNamedItem("name");
+					if (nameNode != null && contentNode != null) {
+						if ("ROBOTS".equalsIgnoreCase(nameNode.getNodeValue())) {
+							if (contentNode != null) {
+								return contentNode.getNodeValue();
+							}
+						}
+					}
+				}
+			}
+		}
+		return null;
+	}
+
+	private String getText(Node node) {
+		if (node == null) {
+			return "";
+		}
+
+		org.w3c.dom.Document doc = getDocumentNode(node);
+		org.w3c.dom.traversal.DocumentTraversal traversable = (DocumentTraversal) doc;
+		int whatToShow = NodeFilter.SHOW_TEXT;
+		NodeIterator nodeIterator = traversable.createNodeIterator(node,
+				whatToShow, null, true);
+
+		StringBuffer text = new StringBuffer();
+		Node currentNode = null;
+		while ((currentNode = nodeIterator.nextNode()) != null) {
+			text.append(currentNode.getNodeValue());
+		}
+		return text.toString();
+	}
+
+	private String getTitle(Node node) {
+		if (node == null) {
+			return "";
+		}
+
+		String cleanTitle = null;
+		org.w3c.dom.Document doc = getDocumentNode(node);
+		NodeList nodeList = doc.getElementsByTagName("title");
+		Node matchedNode = nodeList.item(0);
+		if (matchedNode != null) {
+			String title = matchedNode.getTextContent();
+			if (title != null) {
+				cleanTitle = title.replaceAll("[\r\n\t]", " ").trim();
+			}
+		}
+
+		return cleanTitle;
+	}
+
+	private boolean isNoFollowForDocument(Node node) {
+		boolean noFollow = false;
+
+		// Check <META name="robots" content="..."/>
+		String robotsMeta = getRobotsMeta(node);
+		if (robotsMeta != null
+				&& robotsMeta.toLowerCase().indexOf("nofollow") > -1) {
+			noFollow = true;
+		}
+
+		return noFollow;
+	}
+
+	/*
+	 * Checks for presense of rel="nofollow" attribute.
+	 */
+	private boolean isNoFollowPresent(Node currentNode) {
+		Node relAttr = currentNode.getAttributes().getNamedItem("rel");
+		boolean nofollow = false;
+		if (relAttr != null) {
+			String relAttrValue = relAttr.getNodeValue();
+			if ("nofollow".equalsIgnoreCase(relAttrValue)) {
+				nofollow = true;
+			}
+		}
+		return nofollow;
+	}
+
+	public ProcessedDocument parse(AbstractDocument doc)
+			throws HTMLDocumentParserException {
+		P.println("Entering HTMLDocumentParser.parse(FetchedDocument doc) ...");
+		ProcessedDocument htmlDoc = new ProcessedDocument();
+		htmlDoc.setDocumentType(ProcessedDocument.TYPE_HTML);
+		htmlDoc.setDocumentId(doc.getDocumentId());
+		htmlDoc.setDocumentURL(doc.getDocumentURL());
+		String documentCharset = doc.getContentCharset();
+
+		P.println("Converting the content bytes into a string ...");
+
+		InputStream contentBytes = new ByteArrayInputStream(
+				doc.getDocumentContent());
+		try {
+			/*
+			 * Up to this point document content was treated as byte array. Here
+			 * we convert byte array into character based stream. Processed
+			 * document will be stored using UTF-8 encoding.
+			 */
+			InputStreamReader characterStream = new InputStreamReader(
+					contentBytes, documentCharset);
+			InputSource inputSource = new InputSource();
+			inputSource.setCharacterStream(characterStream);
+			parseHTML(htmlDoc, inputSource);
+		} catch (UnsupportedEncodingException e) {
+			e.printStackTrace();
+			throw new HTMLDocumentParserException("Document parsing error: ", e);
+		}
+		return htmlDoc;
+	}
+
+	public ProcessedDocument parse(Reader reader)
+			throws HTMLDocumentParserException {
+		P.println("Entering HTMLDocumentParser.parse(Reader reader) ...");
+		ProcessedDocument processedDocument = new ProcessedDocument();
+		processedDocument.setDocumentType(ProcessedDocument.TYPE_HTML);
+		processedDocument.setDocumentId(null);
+		processedDocument.setDocumentURL(null);
+		InputSource inputSource = new InputSource();
+		inputSource.setCharacterStream(reader);
+		parseHTML(processedDocument, inputSource);
+		return processedDocument;
+	}
+
+	private void parseHTML(ProcessedDocument htmlDoc, InputSource inputSource)
+			throws HTMLDocumentParserException {
+		// NekoHTML parser
+		DOMParser parser = new DOMParser();
+
+		// Create filter to remove elements that we don't care about.
+		ElementRemover remover = new ElementRemover();
+		// keep only a subset of elements (text and links)
+		remover.acceptElement("html", null);
+		remover.acceptElement("meta", new String[] { "name", "content" });
+		remover.acceptElement("title", null);
+		remover.acceptElement("body", null);
+		remover.acceptElement("base", new String[] { "href" });
+		remover.acceptElement("b", null);
+		remover.acceptElement("i", null);
+		remover.acceptElement("u", null);
+		remover.acceptElement("p", null);
+		remover.acceptElement("br", null);
+		remover.acceptElement("a", new String[] { "href", "rel" });
+		// completely remove these elements
+		remover.removeElement("script");
+		remover.removeElement("style");
+
+		StringWriter sw = new StringWriter();
+		XMLDocumentFilter writer = new HTMLWriter(sw, "UTF-8");
+
+		XMLDocumentFilter[] filters = { remover, writer };
+		try {
+			parser.setProperty("http://cyberneko.org/html/properties/filters",
+					filters);
+		} catch (SAXException e) {
+			e.printStackTrace();
+			throw new HTMLDocumentParserException("Property is not supported",
+					e);
+		}
+
+		try {
+			parser.parse(inputSource);
+		} catch (SAXException e) {
+			e.printStackTrace();
+			throw new HTMLDocumentParserException("Parsing error: ", e);
+		} catch (IOException e) {
+			e.printStackTrace();
+			throw new HTMLDocumentParserException("Parsing error: ", e);
+		}
+
+		// cleaned up html.
+		String cleanHTML = cleanText(sw.toString());
+		htmlDoc.setContent(cleanHTML);
+
+		// just the text
+		Node node = parser.getDocument();
+		String text = cleanText(getText(node));
+		htmlDoc.setText(text);
+
+		// content of <title/>
+		String title = getTitle(node);
+		htmlDoc.setDocumentTitle(title);
+
+		if (htmlDoc.getDocumentURL() != null) {
+			String baseUrl = getBaseUrl(node);
+
+			// links to other pages
+			List<Outlink> outlinks = extractLinks(node,
+					htmlDoc.getDocumentURL(), baseUrl);
+			htmlDoc.setOutlinks(outlinks);
+		}
+	}
+
+	public void setHtmlDoc(ProcessedDocument htmlDoc) {
+		this.htmlDoc = htmlDoc;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/html/HTMLDocumentParserException.java b/src/org/yooreeka/util/parsing/html/HTMLDocumentParserException.java
new file mode 100644
index 0000000..a839e98
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/html/HTMLDocumentParserException.java
@@ -0,0 +1,49 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.html;
+
+import org.yooreeka.util.parsing.common.DocumentParserException;
+
+public class HTMLDocumentParserException extends DocumentParserException {
+
+	/**
+	 * Distinct SVUID for the org.yooreeka.* classes
+	 */
+	private static final long serialVersionUID = 3397930132653232196L;
+
+	public HTMLDocumentParserException(String msg) {
+		super(msg);
+	}
+
+	public HTMLDocumentParserException(String msg, Throwable t) {
+		super(msg, t);
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/html/HTMLWriter.java b/src/org/yooreeka/util/parsing/html/HTMLWriter.java
new file mode 100644
index 0000000..2c36cd2
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/html/HTMLWriter.java
@@ -0,0 +1,119 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.html;
+
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.cyberneko.html.filters.Writer;
+
+/**
+ * Extending NekoHTML Writer filter to override its behavior (most probably a
+ * bug).
+ */
+public class HTMLWriter extends Writer {
+
+	public HTMLWriter(java.io.Writer writer, String encoding) {
+		super(writer, encoding);
+	}
+
+	/**
+	 * This code was copied from
+	 * org.cyberneko.html.filters.Writer.printStartElement It overrides original
+	 * version with minor adjustment for bug fix.
+	 * 
+	 * Original version would wipe out value of 'content' attribute from in meta
+	 * elements. In our case we are interested in:
+	 * 
+	 * <meta name="robots" content="...."/>
+	 */
+	@Override
+	protected void printStartElement(QName element, XMLAttributes attributes) {
+		// modify META[@http-equiv='content-type']/@content value
+		int contentIndex = -1;
+		String originalContent = null;
+		if (element.rawname.toLowerCase().equals("meta")) {
+			String httpEquiv = null;
+			int length = attributes.getLength();
+			for (int i = 0; i < length; i++) {
+				String aname = attributes.getQName(i).toLowerCase();
+				if (aname.equals("http-equiv")) {
+					httpEquiv = attributes.getValue(i);
+				} else if (aname.equals("content")) {
+					contentIndex = i;
+				}
+			}
+			if (httpEquiv != null
+					&& httpEquiv.toLowerCase().equals("content-type")) {
+				fSeenHttpEquiv = true;
+				String content = null;
+				if (contentIndex != -1) {
+					originalContent = attributes.getValue(contentIndex);
+					content = originalContent.toLowerCase();
+				}
+				if (content != null) {
+					int charsetIndex = content.indexOf("charset=");
+					if (charsetIndex != -1) {
+						content = content.substring(0, charsetIndex + 8);
+					} else {
+						content += ";charset=";
+					}
+					content += fEncoding;
+					attributes.setValue(contentIndex, content);
+				}
+			} else {
+				// this is the difference from original code
+				contentIndex = -1;
+			}
+		}
+
+		// print element
+		fPrinter.print('<');
+		fPrinter.print(element.rawname);
+		int attrCount = attributes != null ? attributes.getLength() : 0;
+		for (int i = 0; i < attrCount; i++) {
+			String aname = attributes.getQName(i);
+			String avalue = attributes.getValue(i);
+			fPrinter.print(' ');
+			fPrinter.print(aname);
+			fPrinter.print("=\"");
+			printAttributeValue(avalue);
+			fPrinter.print('"');
+		}
+		fPrinter.print('>');
+		fPrinter.flush();
+
+		// return original META[@http-equiv]/@content value
+		if (contentIndex != -1) {
+			attributes.setValue(contentIndex, originalContent);
+		}
+
+	} // printStartElement(QName,XMLAttributes)
+}
diff --git a/src/org/yooreeka/util/parsing/html/LinkNodeFilter.java b/src/org/yooreeka/util/parsing/html/LinkNodeFilter.java
new file mode 100644
index 0000000..ac0522d
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/html/LinkNodeFilter.java
@@ -0,0 +1,58 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.html;
+
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.traversal.NodeFilter;
+
+class LinkNodeFilter implements NodeFilter {
+	private String elementName = null;
+	private String attributeName = null;
+
+	public LinkNodeFilter(String elementName, String attributeName) {
+		this.elementName = elementName;
+		this.attributeName = attributeName;
+	}
+
+	public short acceptNode(Node n) {
+		short result = FILTER_SKIP;
+		if (Node.ELEMENT_NODE == n.getNodeType()) {
+			Element e = (Element) n;
+			if (e.getNodeName().equalsIgnoreCase(elementName)) {
+				if (e.getAttributeNode(attributeName) != null) {
+					result = FILTER_ACCEPT;
+				}
+			}
+		}
+		return result;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/html/MultiFilter.java b/src/org/yooreeka/util/parsing/html/MultiFilter.java
new file mode 100644
index 0000000..e977c47
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/html/MultiFilter.java
@@ -0,0 +1,61 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.html;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.w3c.dom.Node;
+import org.w3c.dom.traversal.NodeFilter;
+
+class MultiFilter implements NodeFilter {
+
+	List<NodeFilter> acceptFilters = new ArrayList<NodeFilter>();
+
+	public MultiFilter() {
+	}
+
+	public short acceptNode(Node n) {
+		short result = NodeFilter.FILTER_SKIP;
+		for (NodeFilter f : acceptFilters) {
+			result = f.acceptNode(n);
+			if (result == NodeFilter.FILTER_ACCEPT) {
+				break;
+			}
+		}
+		return result;
+	}
+
+	public void addAcceptFilter(NodeFilter nestedFilter) {
+		acceptFilters.add(nestedFilter);
+	}
+
+}
diff --git a/src/org/yooreeka/util/parsing/msword/MSWordDocumentParser.java b/src/org/yooreeka/util/parsing/msword/MSWordDocumentParser.java
new file mode 100644
index 0000000..311786c
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/msword/MSWordDocumentParser.java
@@ -0,0 +1,103 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.msword;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+
+import org.textmining.extraction.TextExtractor;
+import org.textmining.extraction.word.WordTextExtractorFactory;
+import org.yooreeka.util.parsing.common.AbstractDocument;
+import org.yooreeka.util.parsing.common.DataEntry;
+import org.yooreeka.util.parsing.common.DocumentParser;
+import org.yooreeka.util.parsing.common.DocumentParserException;
+import org.yooreeka.util.parsing.common.ProcessedDocument;
+
+public class MSWordDocumentParser implements DocumentParser {
+
+	ProcessedDocument wordDoc = new ProcessedDocument();
+
+	@Override
+	public DataEntry getDataEntry(int i) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	/*
+	 * Finds the first non-empty line in the document.
+	 */
+	private String getTitle(String text) throws IOException {
+		if (text == null) {
+			return null;
+		}
+		String title = "";
+
+		StringReader sr = new StringReader(text);
+		BufferedReader r = new BufferedReader(sr);
+		String line = null;
+		while ((line = r.readLine()) != null) {
+			if (line.trim().length() > 0) {
+				title = line.trim();
+				break;
+			}
+		}
+
+		return title;
+	}
+
+	public ProcessedDocument parse(AbstractDocument doc)
+			throws DocumentParserException {
+
+		wordDoc.setDocumentType(ProcessedDocument.TYPE_MSWORD);
+		wordDoc.setDocumentId(doc.getDocumentId());
+		wordDoc.setDocumentURL(doc.getDocumentURL());
+
+		InputStream contentData = new ByteArrayInputStream(
+				doc.getDocumentContent());
+		WordTextExtractorFactory wteFactory = new WordTextExtractorFactory();
+
+		try {
+			TextExtractor txtExtractor = wteFactory.textExtractor(contentData);
+			String text = txtExtractor.getText();
+			wordDoc.setText(text);
+			// using the same value as text
+			wordDoc.setContent(text);
+			wordDoc.setDocumentTitle(getTitle(text));
+		} catch (Exception e) {
+			throw new MSWordDocumentParserException(
+					"MSWord Document parsing error: ", e);
+		}
+		return wordDoc;
+	}
+}
diff --git a/src/org/yooreeka/util/parsing/msword/MSWordDocumentParserException.java b/src/org/yooreeka/util/parsing/msword/MSWordDocumentParserException.java
new file mode 100644
index 0000000..aacdd94
--- /dev/null
+++ b/src/org/yooreeka/util/parsing/msword/MSWordDocumentParserException.java
@@ -0,0 +1,49 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-${year} Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *   
+ */
+package org.yooreeka.util.parsing.msword;
+
+import org.yooreeka.util.parsing.common.DocumentParserException;
+
+public class MSWordDocumentParserException extends DocumentParserException {
+
+	/**
+	 * Distinct SVUID for the org.yooreeka.* classes
+	 */
+	private static final long serialVersionUID = -3005082246637918030L;
+
+	public MSWordDocumentParserException(String msg) {
+		super(msg);
+	}
+
+	public MSWordDocumentParserException(String msg, Throwable t) {
+		super(msg, t);
+	}
+}
diff --git a/src/org/yooreeka/util/text/AlphabetProjection.java b/src/org/yooreeka/util/text/AlphabetProjection.java
new file mode 100644
index 0000000..268010e
--- /dev/null
+++ b/src/org/yooreeka/util/text/AlphabetProjection.java
@@ -0,0 +1,313 @@
+/*
+ *   ________________________________________________________________________________________
+ *   
+ *   Y O O R E E K A
+ *   A library for data mining, machine learning, soft computing, and mathematical analysis
+ *   ________________________________________________________________________________________ 
+ *    
+ *   The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web " 
+ *   (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms 
+ *   are valuable in any software application.
+ *  
+ *   Copyright (c) 2007-2009    Haralambos Marmanis & Dmitry Babenko
+ *   Copyright (c) 2009-2012 Marmanis Group LLC and individual contributors as indicated by the @author tags.  
+ * 
+ *   Certain library functions depend on other Open Source software libraries, which are covered 
+ *   by different license agreements. See the NOTICE file distributed with this work for additional 
+ *   information regarding copyright ownership and licensing.
+ * 
+ *   Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License"); 
+ *   you may not use this file except in compliance with the License.  
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software distributed under 
+ *   the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+ *   either express or implied. See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *
+ */
+package org.yooreeka.util.text;
+
+import java.util.logging.Logger;
+
+import org.yooreeka.config.YooreekaConfigurator;
+import org.yooreeka.util.C;
+import org.yooreeka.util.P;
+import org.yooreeka.util.metrics.EuclideanDistance;
+
+import com.wcohen.ss.JaroWinkler;
+import com.wcohen.ss.Level2Jaro;
+import com.wcohen.ss.MongeElkan;
+import com.wcohen.ss.NeedlemanWunsch;
+import com.wcohen.ss.api.StringDistance;
+
+/**
+ * 
+ * @author <a href="mailto:babis@marmanis.com">Babis Marmanis</a>
+ * 
+ */
+public class AlphabetProjection {
+
+	private static final Logger LOG = Logger.getLogger(AlphabetProjection.class.getName());
+
+	/**
+	 * <tt>dimensionality</tt> determines the number of <tt>String</tt> vectors
+	 * that we will use.
+	 */
+	public static final int DEFAULT_DIMENSIONALITY = 10;
+	private int dimensionality;
+	
+	/**
+	 * <tt>baseLength</tt> determines the length of the <TT>String</TT> vectors
+	 * that we will use.
+	 */
+	public static final int DEFAULT_BASELENGTH = 10;
+	private int baselength;
+	
+	// TODO: This covers only the English language. Create a separate character basis class
+	//       that has all the character bases and invoke them statically as needed.
+	
+	public static final char[] DEFAULT_CHARACTER_BASIS = { 'e', 't', 'a', 'o', 'n', 'r', 'i', 's',
+			'h', 'd', 'l', 'f', 'c', 'm', 'u', 'g', 'y', 'p', 'w', 'b', 'v',
+			'k', 'x', 'j', 'q', 'z' };
+	private char[] characterBasis;
+	
+	private String[] projectionBasis = null;
+
+	//TODO: These should be passed to the projection class. Take them out and define an 
+	//      appropriate encapsulation
+	
+	// String Edit Distance Metrics
+	private NeedlemanWunsch needlemanWunch;
+	private JaroWinkler jaroWinkler;
+	private Level2Jaro level2Jaro;
+	private MongeElkan mongeElkan;
+
+	// String Distances
+	private StringDistance needlemanWunchDistance = null;
+	private StringDistance jaroWinklerDistance    = null;
+    private StringDistance level2JaroDistance     = null;
+    private StringDistance mongeElkanDistance     = null;
+    
+	// --------------------------------------------------------------------------------
+	// CONSTRUCTORS
+	// --------------------------------------------------------------------------------
+	public AlphabetProjection(int dim, int length, char[] charBasis) {
+
+		LOG.setLevel(YooreekaConfigurator.getLevel(AlphabetProjection.class.getName()));
+
+		if (dim > 0) {
+			dimensionality = dim;
+		} else {
+			dimensionality = AlphabetProjection.DEFAULT_DIMENSIONALITY;
+		}
+		
+		if (length <=0) {
+			baselength = length;
+		} else {
+			baselength = AlphabetProjection.DEFAULT_BASELENGTH;
+		}
+		
+		if (charBasis != null) {
+			characterBasis = charBasis;
+		} else {
+			characterBasis = AlphabetProjection.DEFAULT_CHARACTER_BASIS;
+		}
+		
+		// Initialize the projection
+		initProjection();
+		
+		// Initialize the String edit distance metrics
+		initMetrics();
+	}
+	
+	// --------------------------------------------------------------------------------
+	// INITIALIZATION
+	// --------------------------------------------------------------------------------
+	/**
+	 * Initialize the configuration space.
+	 */
+	private void initProjection() {
+
+		projectionBasis = new String[dimensionality];
+
+		// First define the String basis onto which we will project a given
+		// String
+		for (int i = 0; i < dimensionality; i++) {
+			projectionBasis[i] = getEigenvector(characterBasis[i]);
+		}
+	}
+	
+	private void initMetrics() {
+		needlemanWunch = new NeedlemanWunsch();
+		jaroWinkler    = new JaroWinkler();
+		level2Jaro     = new Level2Jaro();
+		mongeElkan     = new MongeElkan();		
+	}
+	
+
+	// --------------------------------------------------------------------------------
+	// PROJECTION METHODS
+	// --------------------------------------------------------------------------------
+	/**
+	 * 
+	 * @param target
+	 *            the String that we want to project onto the base vectors
+	 * @param projections
+	 *            of the <CODE>target</CODE> onto each one of the base vectors.
+	 * 
+	 * 
+	 */
+	public double[] project(String target) throws IllegalArgumentException {
+
+		double[] projections = new double[dimensionality];
+
+		if (target == null) {
+			target = C.EMPTY_STRING;
+		}
+
+		target.toLowerCase();
+		
+		jaroWinklerDistance    = jaroWinkler.getDistance();
+	    level2JaroDistance     = level2Jaro.getDistance();
+	    mongeElkanDistance     = mongeElkan.getDistance();
+		needlemanWunchDistance = needlemanWunch.getDistance();
+
+	    double p = 0;
+	    
+		for (int i = 0; i < dimensionality; i++) {
+
+			p=jaroWinklerDistance.score(projectionBasis[i], target);
+			p += level2JaroDistance.score(projectionBasis[i], target);
+			p += mongeElkanDistance.score(projectionBasis[i], target);
+			p += needlemanWunchDistance.score(projectionBasis[i], target);
+			
+			projections[i] = p*0.25;
+		}
+
+		return projections;
+	}
+
+	/**
+	 * 
+	 * @param target
+	 *            the String that we want to project onto the base vectors
+	 * @param projections
+	 *            of the <CODE>target</CODE> onto each one of the base vectors.
+	 * 
+	 * 
+	 */
+	public double[] project(String target, StringDistance d) throws IllegalArgumentException {
+
+		double[] projections = new double[dimensionality];
+
+		if (target == null) {
+			target = C.EMPTY_STRING;
+		}
+
+		target.toLowerCase();
+		
+		for (int i = 0; i < dimensionality; i++) {
+
+			projections[i] = d.score(projectionBasis[i], target);
+		}
+
+		return projections;
+	}
+
+	// --------------------------------------------------------------------------------
+	// AUXILIARY METHODS
+	// --------------------------------------------------------------------------------
+    /**
+     * Creates instance with default parameters (suitable when you are unaware of 
+     * best parameters to constructor.
+     * 
+     * @return instance with default parameters applied.
+     * 
+     */
+    public static AlphabetProjection getDefault() {
+        return new AlphabetProjection(DEFAULT_DIMENSIONALITY, DEFAULT_BASELENGTH, DEFAULT_CHARACTER_BASIS);
+    }
+    
+	public double distance(String val1, String val2) {
+
+		EuclideanDistance euclid = new EuclideanDistance();
+
+		return euclid.getDistance(project(val1), project(val2));
+	}
+
+	/**
+	 * @param val
+	 *            the single character of the base vector
+	 * 
+	 * @return the base vector for the <tt>val</tt> character.
+	 */
+	public String getEigenvector(char val) {
+
+		StringBuffer buf = new StringBuffer();
+
+		for (int i = 0; i < baselength; i++) {
+
+			buf.append(val);
+		}
+
+		return buf.toString();
+	}
+	
+	// --------------------------------------------------------------------------------
+	// MAIN METHOD
+	// --------------------------------------------------------------------------------
+	public static void main(String[] args) throws Exception {
+
+		AlphabetProjection aProjection = new AlphabetProjection(10,10,AlphabetProjection.DEFAULT_CHARACTER_BASIS);
+
+		final String TEST_STRING_1 = "Андре́й Никола́евич Колмого́ров";//"Andrei Nikolaevitch Kolmogorov";
+		final String TEST_STRING_2 = "Колмого́ров Андре́й Никола́евич";//"Kolmogorov Andrei Nikolaevitch";
+		final String TEST_STRING_3 = "Nikolai";
+
+		P.println("d[T1,T2] = "
+				+ aProjection.distance(TEST_STRING_1, TEST_STRING_2));
+		P.println("d[T1,T3] = "
+				+ aProjection.distance(TEST_STRING_1, TEST_STRING_3));
+	}
+
+	// --------------------------------------------------------------------------------
+	// GETTERS -- SETTERS
+	// --------------------------------------------------------------------------------
+	
+	public static int[] getDefaultProjectionProperties() {
+		return new int[] { DEFAULT_DIMENSIONALITY, DEFAULT_BASELENGTH };
+	}
+
+	public int getBaselength() {
+		return baselength;
+	}
+
+	public void setBaselength(int baselength) {
+		this.baselength = baselength;
+	}
+
+	public int getDimensionality() {
+		return dimensionality;
+	}
+
+	public void setDimensionality(int dimensionality) {
+		this.dimensionality = dimensionality;
+	}
+
+	/**
+	 * @return the characterBasis
+	 */
+	public char[] getCharacterBasis() {
+		return characterBasis;
+	}
+
+	/**
+	 * @param characterBasis the characterBasis to set
+	 */
+	public void setCharacterBasis(char[] characterBasis) {
+		this.characterBasis = characterBasis;
+	}
+}