Skip to content

Commit

Permalink
Parity with the Google Code project
Browse files Browse the repository at this point in the history
  • Loading branch information
H. Marmanis committed Jan 1, 2013
1 parent 5c38f00 commit 5bc3d2c
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.yooreeka.algos.reco.collab.similarity.naive.UserContentBasedSimilarity;
import org.yooreeka.algos.reco.collab.similarity.naive.UserItemContentBasedSimilarity;
import org.yooreeka.config.YooreekaConfigurator;
import org.yooreeka.util.P;

public class SimilarityMatrixRepository {

Expand Down Expand Up @@ -115,15 +116,13 @@ public SimilarityMatrix load(RecommendationType type, Dataset data,
if (cache != null) {
m = cache.get(id);
if (m == null) {
System.out
.println("similarity matrix instance doesn't exist in cache: "
P.println("similarity matrix instance doesn't exist in cache: "
+ "id: "
+ id
+ ", cache: '"
+ cache.getLocation() + "'.");
} else {
System.out
.println("similarity matrix instance was loaded from cache: "
P.println("similarity matrix instance was loaded from cache: "
+ "id: "
+ id
+ ", cache: '"
Expand Down
25 changes: 23 additions & 2 deletions src/org/yooreeka/util/P.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,33 @@
public class P {

/**
* Print a 54 character (-) horizontal line.
* Print a horizontal line with 65 characters.
*/
public static void hline() {
println("---------- ---------- ---------- ---------- ---------- ----------");
}



/**
* Auxiliary method for sending time information to the standard output.
* Time is measured in milliseconds, see the documentation
* of <tt>System.currentTimeMillis()</tt> for details.
*
*/
public static void time() {
println("Time: "+System.currentTimeMillis());
}

/**
* Auxiliary method for sending time information to the standard output.
* The time is given in milliseconds and in relation to a given moment
* in the past, determined by the value of the argument <tt>t</tt>.
*
*/
public static void time(long t) {
println("Time: "+ (System.currentTimeMillis()-t));
}

public static void main(String[] args) {
println(Charset.defaultCharset().displayName());
println("" + P.class.getName());
Expand Down
86 changes: 86 additions & 0 deletions src/org/yooreeka/util/parsing/html/BookmarkParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* ________________________________________________________________________________________
*
* Y O O R E E K A
* A library for data mining, machine learning, soft computing, and mathematical analysis
* ________________________________________________________________________________________
*
* The Yooreeka project started with the code of the book "Algorithms of the Intelligent Web "
* (Manning 2009). Although the term "Web" prevailed in the title, in essence, the algorithms
* are valuable in any software application.
*
* Copyright (c) 2007-2009 Haralambos Marmanis & Dmitry Babenko
* Copyright (c) 2009-2012 Marmanis Group LLC and individual contributors as indicated by the @author tags.
*
* Certain library functions depend on other Open Source software libraries, which are covered
* by different license agreements. See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* Marmanis Group LLC licenses this file to You under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.yooreeka.util.parsing.html;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;

import org.yooreeka.util.parsing.common.ProcessedDocument;

/**
* @author haris
*
*/
public class BookmarkParser extends HTMLDocumentParser {

/**
*
*/
public BookmarkParser() {
// TODO Auto-generated constructor stub
}

/**
* @param reader
* @throws HTMLDocumentParserException
*/
public BookmarkParser(Reader reader) throws HTMLDocumentParserException {
super(reader);
// TODO Auto-generated constructor stub
}

/**
* @param args
*/
public static void main(String[] args) {

String filename = args[0];
BookmarkParser bookParser = null;
ProcessedDocument doc = null;
try {
bookParser = new BookmarkParser();
InputStream inputStream = new BufferedInputStream(
new FileInputStream(filename));
Reader reader = new InputStreamReader(inputStream, "UTF-8");
doc = bookParser.parse(reader);
} catch (Exception e) {
throw new RuntimeException("Failed to parse html from file: "
+ filename, e);
}

//P.println(doc.getText());

}

}
10 changes: 0 additions & 10 deletions test/org/yooreeka/test/TestSandbox.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@
*/
package org.yooreeka.test;

import org.yooreeka.config.YooreekaConfigurator;
import org.yooreeka.examples.newsgroups.NewsCrawler;


/**
Expand All @@ -49,13 +47,5 @@ public class TestSandbox {
* @throws Exception
*/
public static void main(String[] args) throws Exception {
String rootDir = YooreekaConfigurator.getProperty(YooreekaConfigurator.CRAWL_DATA_DIR);

NewsCrawler crawler = new NewsCrawler(rootDir, 2, 10);

crawler.addSeedUrl("http://www.manning.com/");

crawler.run();

}
}

0 comments on commit 5bc3d2c

Please sign in to comment.