forked from marmanis/yooreeka
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Synchromizing the Github version with the Google Code SVN
- Loading branch information
Showing
41 changed files
with
5,265 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
yooreeka.home=C:/code/GoogleCode/yooreeka | ||
yooreeka.data.dir=C:/code/GoogleCode/yooreeka/data | ||
yooreeka.crawl.dir=C:/code/GoogleCode/yooreeka/data/crawls | ||
yooreeka.temp.dir=C:/code/GoogleCode/yooreeka/deploy/temp | ||
yooreeka.movielens.data.dir=C:/code/GoogleCode/yooreeka/data/ch03/MovieLens | ||
yooreeka.movielenstest.data.dir=C:/code/GoogleCode/yooreeka/data/ch03/MovieLens/test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
package org.yooreeka.algos.taxis.rules; | ||
|
||
import java.util.Collection; | ||
|
||
import org.drools.KnowledgeBase; | ||
import org.drools.KnowledgeBaseFactory; | ||
import org.drools.builder.KnowledgeBuilder; | ||
import org.drools.builder.KnowledgeBuilderFactory; | ||
import org.drools.builder.ResourceType; | ||
import org.drools.definition.KnowledgePackage; | ||
import org.drools.io.Resource; | ||
import org.drools.io.ResourceFactory; | ||
import org.drools.runtime.StatefulKnowledgeSession; | ||
import org.yooreeka.examples.spamfilter.ClassificationResult; | ||
import org.yooreeka.examples.spamfilter.data.Email; | ||
import org.yooreeka.util.P; | ||
|
||
public class RuleEngine { | ||
|
||
//private RuleBase rules; | ||
private KnowledgeBase kbase; | ||
|
||
public RuleEngine(String rulesFile) throws RuleEngineException { | ||
|
||
try { | ||
// TODO: Remove. This is the old code that corresponded to Drools | ||
// 4.x | ||
// Reader source = new InputStreamReader( | ||
// new BufferedInputStream(new FileInputStream(rulesFile))); | ||
// | ||
// // switch to JANINO compiler | ||
// Properties properties = new Properties(); | ||
// properties.setProperty( "drools.dialect.java.compiler", | ||
// "JANINO" ); | ||
// PackageBuilderConfiguration cfg = | ||
// new PackageBuilderConfiguration( properties ); | ||
|
||
/* | ||
* A <tt>KnowledgeBuilder</tt> is used to turn a DRL source file | ||
* into <tt>Package</tt> objects which the Knowledge Base can | ||
* consume. | ||
*/ | ||
KnowledgeBuilder kbuilder = KnowledgeBuilderFactory.newKnowledgeBuilder(); | ||
|
||
// this will parse and compile the DRL file | ||
Resource r = ResourceFactory.newFileResource(rulesFile); | ||
kbuilder.add(r, ResourceType.DRL); | ||
|
||
// Check the builder for errors | ||
if (kbuilder.hasErrors()) { | ||
P.println(kbuilder.getErrors().toString()); | ||
throw new RuntimeException("Unable to compile the DRL file: " | ||
+ rulesFile); | ||
} | ||
|
||
// get the compiled packages | ||
final Collection<KnowledgePackage> pkgs = kbuilder.getKnowledgePackages(); | ||
|
||
// add the packages to a KnowledgeBase (deploy the knowledge | ||
// packages). | ||
kbase = KnowledgeBaseFactory.newKnowledgeBase(); | ||
kbase.addKnowledgePackages(pkgs); | ||
|
||
// // build a rule package | ||
// PackageBuilder builder = new PackageBuilder(cfg); | ||
// | ||
// // parse and compile rules | ||
// builder.addPackageFromDrl(source); | ||
// | ||
// Package pkg = builder.getPackage(); | ||
// | ||
// rules = RuleBaseFactory.newRuleBase(); | ||
// rules.addPackage(pkg); | ||
|
||
} catch (Exception e) { | ||
throw new RuleEngineException("Could not load/compile rules from DRL file: '" | ||
+ rulesFile+ "' ", e); | ||
} | ||
} | ||
|
||
public void executeRules(ClassificationResult classificationResult, Email email) { | ||
|
||
final StatefulKnowledgeSession ksession = kbase.newStatefulKnowledgeSession(); | ||
|
||
ksession.setGlobal("classificationResult", classificationResult); | ||
ksession.insert(email); | ||
ksession.fireAllRules(); | ||
} | ||
} |
15 changes: 15 additions & 0 deletions
15
src/org/yooreeka/algos/taxis/rules/RuleEngineException.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package org.yooreeka.algos.taxis.rules; | ||
|
||
public class RuleEngineException extends RuntimeException { | ||
|
||
private static final long serialVersionUID = 4267289121996977169L; | ||
|
||
public RuleEngineException(String message) { | ||
super(message); | ||
} | ||
|
||
public RuleEngineException(String message, Throwable cause) { | ||
super(message, cause); | ||
} | ||
|
||
} |
113 changes: 113 additions & 0 deletions
113
src/org/yooreeka/examples/newsgroups/classification/ClassificationResult.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
package org.yooreeka.examples.newsgroups.classification; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
|
||
import org.yooreeka.algos.taxis.core.intf.Concept; | ||
|
||
public class ClassificationResult { | ||
private Concept concept; | ||
private double score; | ||
|
||
public ClassificationResult(Concept concept, double score) { | ||
this.concept = concept; | ||
this.score = score; | ||
} | ||
|
||
public Concept getConcept() { | ||
return concept; | ||
} | ||
|
||
public void setConcept(Concept concept) { | ||
this.concept = concept; | ||
} | ||
|
||
public double getScore() { | ||
return score; | ||
} | ||
|
||
public void setScore(double score) { | ||
this.score = score; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
final int prime = 31; | ||
int result = 1; | ||
// only take into account concept name | ||
result = prime * result + ((concept == null) ? 0 : concept.getName().hashCode()); | ||
return result; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object obj) { | ||
// only take into account concept name | ||
if (this == obj) | ||
return true; | ||
if (obj == null) | ||
return false; | ||
if (! (obj instanceof ClassificationResult)) | ||
return false; | ||
final ClassificationResult other = (ClassificationResult) obj; | ||
// only take into account concept name | ||
if (concept == null) { | ||
if (other.concept != null) | ||
return false; | ||
} else if (!concept.getName().equals(other.concept.getName())) | ||
return false; | ||
return true; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
String name = null; | ||
if( concept != null ) { | ||
name = concept.getName(); | ||
} | ||
return "[" + name + "->" + score + "]"; | ||
} | ||
|
||
public static void sort(List<ClassificationResult> results) { | ||
|
||
Collections.sort(results, new Comparator<ClassificationResult>() { | ||
|
||
public int compare(ClassificationResult f1, ClassificationResult f2) { | ||
|
||
int result = 0; | ||
if( f1.getScore() < f2.getScore() ) { | ||
result = 1; | ||
} | ||
else if( f1.getScore() > f2.getScore() ) { | ||
result = -1; | ||
} | ||
else { | ||
result = 0; | ||
} | ||
return result; | ||
} | ||
}); | ||
} | ||
|
||
public static List<ClassificationResult> getTopResults( | ||
List<ClassificationResult> results, int topN) { | ||
|
||
// sort friends based on itemAgreement | ||
ClassificationResult.sort(results); | ||
|
||
// select top N friends | ||
List<ClassificationResult> bestScores = new ArrayList<ClassificationResult>(); | ||
for(ClassificationResult f : results) { | ||
if( bestScores.size() >= topN ) { | ||
// have enough items. | ||
break; | ||
} | ||
bestScores.add(f); | ||
} | ||
|
||
return bestScores; | ||
} | ||
|
||
|
||
} |
9 changes: 9 additions & 0 deletions
9
src/org/yooreeka/examples/newsgroups/classification/ClassificationStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package org.yooreeka.examples.newsgroups.classification; | ||
|
||
import org.yooreeka.examples.newsgroups.core.NewsStory; | ||
import org.yooreeka.examples.newsgroups.core.NewsStoryGroup; | ||
|
||
public interface ClassificationStrategy { | ||
public void assignTopicToCluster(NewsStoryGroup cluster); | ||
public void assignTopicToStory(NewsStory newsStory); | ||
} |
143 changes: 143 additions & 0 deletions
143
src/org/yooreeka/examples/newsgroups/classification/ClassificationStrategyImpl.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
package org.yooreeka.examples.newsgroups.classification; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
import org.yooreeka.algos.taxis.core.intf.Concept; | ||
import org.yooreeka.algos.taxis.core.intf.Instance; | ||
import org.yooreeka.examples.newsgroups.core.NewsCategory; | ||
import org.yooreeka.examples.newsgroups.core.NewsStory; | ||
import org.yooreeka.examples.newsgroups.core.NewsStoryGroup; | ||
|
||
public class ClassificationStrategyImpl implements ClassificationStrategy { | ||
|
||
private NBStoryClassifier storyClassifier; | ||
|
||
private boolean isVerbose=false; | ||
|
||
public ClassificationStrategyImpl() { | ||
// EMPTY | ||
} | ||
|
||
public NBStoryClassifier getStoryClassifier() { | ||
return storyClassifier; | ||
} | ||
|
||
|
||
public void setStoryClassifier(NBStoryClassifier storyClassifier) { | ||
this.storyClassifier = storyClassifier; | ||
} | ||
|
||
/** | ||
* The selection of a representative story can be construed in many ways. | ||
* This implementation delegates its job to the <tt>selectLongestStory</tt>. | ||
* | ||
* @param newsStories | ||
* @return | ||
* | ||
*/ | ||
private NewsStory selectRepresentativeStory(List<NewsStory> newsStories) { | ||
|
||
return selectLongestStory(newsStories); | ||
} | ||
|
||
private NewsStory selectLongestStory(List<NewsStory> newsStories) { | ||
|
||
NewsStory representativeStory = null; | ||
|
||
int maxContentLength = 0; | ||
|
||
for(NewsStory newsStory : newsStories) { | ||
|
||
int storyContentLength = newsStory.getContent().getText().length(); | ||
|
||
if( storyContentLength > maxContentLength ) { | ||
maxContentLength = storyContentLength; | ||
representativeStory = newsStory; | ||
} | ||
} | ||
|
||
return representativeStory; | ||
} | ||
|
||
public void assignTopicToCluster(NewsStoryGroup cluster) { | ||
|
||
List<NewsStory> newsStories = cluster.getStories(); | ||
|
||
NewsStory representativeStory = selectRepresentativeStory(newsStories); | ||
|
||
NewsCategory bestTopic = selectBestMatchingTopic(representativeStory); | ||
|
||
cluster.setTopic(bestTopic); | ||
cluster.setRepresentativeStory(representativeStory); | ||
|
||
if (isVerbose) { | ||
boolean skipValidMatches = false; | ||
evaluateAndPrintResult(representativeStory, bestTopic, skipValidMatches); | ||
} | ||
} | ||
|
||
public void assignTopicToStory(NewsStory newsStory) { | ||
|
||
Instance instance = storyClassifier.toInstance(newsStory); | ||
Concept concept = storyClassifier.classify(instance); | ||
|
||
NewsCategory bestTopic = storyClassifier.toTopic(concept); | ||
newsStory.setTopic(bestTopic); | ||
|
||
// for debugging purposes | ||
boolean skipValidMatches = false; | ||
evaluateAndPrintResult(newsStory, bestTopic, skipValidMatches); | ||
} | ||
|
||
private NewsCategory selectBestMatchingTopic(NewsStory newsStory) { | ||
Instance instance = storyClassifier.toInstance(newsStory); | ||
Concept concept = storyClassifier.classify(instance); | ||
return storyClassifier.toTopic(concept); | ||
} | ||
|
||
private boolean evaluateAndPrintResult(NewsStory newsStory, NewsCategory matchedTopic, boolean skipValidMatches) { | ||
// NewsCategory that was assigned to story during loading. | ||
// Derived from document name. | ||
NewsCategory actualTopic = newsStory.getTopic(); | ||
boolean isCorrect = false; | ||
if( actualTopic != null && actualTopic.equals(matchedTopic) ) { | ||
isCorrect = true; | ||
} | ||
|
||
if( skipValidMatches && isCorrect ) { | ||
return isCorrect; | ||
} | ||
|
||
System.out.print(isCorrect + " " + | ||
newsStory.getTitle() + " -> " + matchedTopic.getName()); | ||
|
||
// Show topN scores | ||
int topN = 4; | ||
List<ClassificationResult> scores = storyClassifier.getTopNScores(topN); | ||
System.out.print(" ( Top " + topN + " scores: "); | ||
for(ClassificationResult r : scores) { | ||
System.out.printf("%s -> %.3f, ", r.getConcept().getName(), r.getScore()); | ||
} | ||
|
||
System.out.print(") "); | ||
System.out.println("Doc terms: " + Arrays.toString(newsStory.getTopNTerms())); | ||
|
||
return isCorrect; | ||
} | ||
|
||
/** | ||
* @return the isVerbose | ||
*/ | ||
public boolean isVerbose() { | ||
return isVerbose; | ||
} | ||
|
||
/** | ||
* @param isVerbose the isVerbose to set | ||
*/ | ||
public void setVerbose(boolean isVerbose) { | ||
this.isVerbose = isVerbose; | ||
} | ||
|
||
} |
Oops, something went wrong.