Skip to content

Commit

Permalink
Synchromizing the Github version with the Google Code SVN
Browse files Browse the repository at this point in the history
  • Loading branch information
marmanis committed Feb 26, 2013
1 parent 6391f57 commit cc25979
Show file tree
Hide file tree
Showing 41 changed files with 5,265 additions and 0 deletions.
6 changes: 6 additions & 0 deletions deploy/conf/yooreeka.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
yooreeka.home=C:/code/GoogleCode/yooreeka
yooreeka.data.dir=C:/code/GoogleCode/yooreeka/data
yooreeka.crawl.dir=C:/code/GoogleCode/yooreeka/data/crawls
yooreeka.temp.dir=C:/code/GoogleCode/yooreeka/deploy/temp
yooreeka.movielens.data.dir=C:/code/GoogleCode/yooreeka/data/ch03/MovieLens
yooreeka.movielenstest.data.dir=C:/code/GoogleCode/yooreeka/data/ch03/MovieLens/test
89 changes: 89 additions & 0 deletions src/org/yooreeka/algos/taxis/rules/RuleEngine.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package org.yooreeka.algos.taxis.rules;

import java.util.Collection;

import org.drools.KnowledgeBase;
import org.drools.KnowledgeBaseFactory;
import org.drools.builder.KnowledgeBuilder;
import org.drools.builder.KnowledgeBuilderFactory;
import org.drools.builder.ResourceType;
import org.drools.definition.KnowledgePackage;
import org.drools.io.Resource;
import org.drools.io.ResourceFactory;
import org.drools.runtime.StatefulKnowledgeSession;
import org.yooreeka.examples.spamfilter.ClassificationResult;
import org.yooreeka.examples.spamfilter.data.Email;
import org.yooreeka.util.P;

public class RuleEngine {

//private RuleBase rules;
private KnowledgeBase kbase;

public RuleEngine(String rulesFile) throws RuleEngineException {

try {
// TODO: Remove. This is the old code that corresponded to Drools
// 4.x
// Reader source = new InputStreamReader(
// new BufferedInputStream(new FileInputStream(rulesFile)));
//
// // switch to JANINO compiler
// Properties properties = new Properties();
// properties.setProperty( "drools.dialect.java.compiler",
// "JANINO" );
// PackageBuilderConfiguration cfg =
// new PackageBuilderConfiguration( properties );

/*
* A <tt>KnowledgeBuilder</tt> is used to turn a DRL source file
* into <tt>Package</tt> objects which the Knowledge Base can
* consume.
*/
KnowledgeBuilder kbuilder = KnowledgeBuilderFactory.newKnowledgeBuilder();

// this will parse and compile the DRL file
Resource r = ResourceFactory.newFileResource(rulesFile);
kbuilder.add(r, ResourceType.DRL);

// Check the builder for errors
if (kbuilder.hasErrors()) {
P.println(kbuilder.getErrors().toString());
throw new RuntimeException("Unable to compile the DRL file: "
+ rulesFile);
}

// get the compiled packages
final Collection<KnowledgePackage> pkgs = kbuilder.getKnowledgePackages();

// add the packages to a KnowledgeBase (deploy the knowledge
// packages).
kbase = KnowledgeBaseFactory.newKnowledgeBase();
kbase.addKnowledgePackages(pkgs);

// // build a rule package
// PackageBuilder builder = new PackageBuilder(cfg);
//
// // parse and compile rules
// builder.addPackageFromDrl(source);
//
// Package pkg = builder.getPackage();
//
// rules = RuleBaseFactory.newRuleBase();
// rules.addPackage(pkg);

} catch (Exception e) {
throw new RuleEngineException("Could not load/compile rules from DRL file: '"
+ rulesFile+ "' ", e);
}
}

public void executeRules(ClassificationResult classificationResult, Email email) {

final StatefulKnowledgeSession ksession = kbase.newStatefulKnowledgeSession();

ksession.setGlobal("classificationResult", classificationResult);
ksession.insert(email);
ksession.fireAllRules();
}
}
15 changes: 15 additions & 0 deletions src/org/yooreeka/algos/taxis/rules/RuleEngineException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.yooreeka.algos.taxis.rules;

public class RuleEngineException extends RuntimeException {

private static final long serialVersionUID = 4267289121996977169L;

public RuleEngineException(String message) {
super(message);
}

public RuleEngineException(String message, Throwable cause) {
super(message, cause);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package org.yooreeka.examples.newsgroups.classification;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import org.yooreeka.algos.taxis.core.intf.Concept;

public class ClassificationResult {
private Concept concept;
private double score;

public ClassificationResult(Concept concept, double score) {
this.concept = concept;
this.score = score;
}

public Concept getConcept() {
return concept;
}

public void setConcept(Concept concept) {
this.concept = concept;
}

public double getScore() {
return score;
}

public void setScore(double score) {
this.score = score;
}

@Override
public int hashCode() {
final int prime = 31;
int result = 1;
// only take into account concept name
result = prime * result + ((concept == null) ? 0 : concept.getName().hashCode());
return result;
}

@Override
public boolean equals(Object obj) {
// only take into account concept name
if (this == obj)
return true;
if (obj == null)
return false;
if (! (obj instanceof ClassificationResult))
return false;
final ClassificationResult other = (ClassificationResult) obj;
// only take into account concept name
if (concept == null) {
if (other.concept != null)
return false;
} else if (!concept.getName().equals(other.concept.getName()))
return false;
return true;
}

@Override
public String toString() {
String name = null;
if( concept != null ) {
name = concept.getName();
}
return "[" + name + "->" + score + "]";
}

public static void sort(List<ClassificationResult> results) {

Collections.sort(results, new Comparator<ClassificationResult>() {

public int compare(ClassificationResult f1, ClassificationResult f2) {

int result = 0;
if( f1.getScore() < f2.getScore() ) {
result = 1;
}
else if( f1.getScore() > f2.getScore() ) {
result = -1;
}
else {
result = 0;
}
return result;
}
});
}

public static List<ClassificationResult> getTopResults(
List<ClassificationResult> results, int topN) {

// sort friends based on itemAgreement
ClassificationResult.sort(results);

// select top N friends
List<ClassificationResult> bestScores = new ArrayList<ClassificationResult>();
for(ClassificationResult f : results) {
if( bestScores.size() >= topN ) {
// have enough items.
break;
}
bestScores.add(f);
}

return bestScores;
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.yooreeka.examples.newsgroups.classification;

import org.yooreeka.examples.newsgroups.core.NewsStory;
import org.yooreeka.examples.newsgroups.core.NewsStoryGroup;

public interface ClassificationStrategy {
public void assignTopicToCluster(NewsStoryGroup cluster);
public void assignTopicToStory(NewsStory newsStory);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package org.yooreeka.examples.newsgroups.classification;

import java.util.Arrays;
import java.util.List;

import org.yooreeka.algos.taxis.core.intf.Concept;
import org.yooreeka.algos.taxis.core.intf.Instance;
import org.yooreeka.examples.newsgroups.core.NewsCategory;
import org.yooreeka.examples.newsgroups.core.NewsStory;
import org.yooreeka.examples.newsgroups.core.NewsStoryGroup;

public class ClassificationStrategyImpl implements ClassificationStrategy {

private NBStoryClassifier storyClassifier;

private boolean isVerbose=false;

public ClassificationStrategyImpl() {
// EMPTY
}

public NBStoryClassifier getStoryClassifier() {
return storyClassifier;
}


public void setStoryClassifier(NBStoryClassifier storyClassifier) {
this.storyClassifier = storyClassifier;
}

/**
* The selection of a representative story can be construed in many ways.
* This implementation delegates its job to the <tt>selectLongestStory</tt>.
*
* @param newsStories
* @return
*
*/
private NewsStory selectRepresentativeStory(List<NewsStory> newsStories) {

return selectLongestStory(newsStories);
}

private NewsStory selectLongestStory(List<NewsStory> newsStories) {

NewsStory representativeStory = null;

int maxContentLength = 0;

for(NewsStory newsStory : newsStories) {

int storyContentLength = newsStory.getContent().getText().length();

if( storyContentLength > maxContentLength ) {
maxContentLength = storyContentLength;
representativeStory = newsStory;
}
}

return representativeStory;
}

public void assignTopicToCluster(NewsStoryGroup cluster) {

List<NewsStory> newsStories = cluster.getStories();

NewsStory representativeStory = selectRepresentativeStory(newsStories);

NewsCategory bestTopic = selectBestMatchingTopic(representativeStory);

cluster.setTopic(bestTopic);
cluster.setRepresentativeStory(representativeStory);

if (isVerbose) {
boolean skipValidMatches = false;
evaluateAndPrintResult(representativeStory, bestTopic, skipValidMatches);
}
}

public void assignTopicToStory(NewsStory newsStory) {

Instance instance = storyClassifier.toInstance(newsStory);
Concept concept = storyClassifier.classify(instance);

NewsCategory bestTopic = storyClassifier.toTopic(concept);
newsStory.setTopic(bestTopic);

// for debugging purposes
boolean skipValidMatches = false;
evaluateAndPrintResult(newsStory, bestTopic, skipValidMatches);
}

private NewsCategory selectBestMatchingTopic(NewsStory newsStory) {
Instance instance = storyClassifier.toInstance(newsStory);
Concept concept = storyClassifier.classify(instance);
return storyClassifier.toTopic(concept);
}

private boolean evaluateAndPrintResult(NewsStory newsStory, NewsCategory matchedTopic, boolean skipValidMatches) {
// NewsCategory that was assigned to story during loading.
// Derived from document name.
NewsCategory actualTopic = newsStory.getTopic();
boolean isCorrect = false;
if( actualTopic != null && actualTopic.equals(matchedTopic) ) {
isCorrect = true;
}

if( skipValidMatches && isCorrect ) {
return isCorrect;
}

System.out.print(isCorrect + " " +
newsStory.getTitle() + " -> " + matchedTopic.getName());

// Show topN scores
int topN = 4;
List<ClassificationResult> scores = storyClassifier.getTopNScores(topN);
System.out.print(" ( Top " + topN + " scores: ");
for(ClassificationResult r : scores) {
System.out.printf("%s -> %.3f, ", r.getConcept().getName(), r.getScore());
}

System.out.print(") ");
System.out.println("Doc terms: " + Arrays.toString(newsStory.getTopNTerms()));

return isCorrect;
}

/**
* @return the isVerbose
*/
public boolean isVerbose() {
return isVerbose;
}

/**
* @param isVerbose the isVerbose to set
*/
public void setVerbose(boolean isVerbose) {
this.isVerbose = isVerbose;
}

}
Loading

0 comments on commit cc25979

Please sign in to comment.