Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#178 - Provide method to retrieve a page's categories by page title #179

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ long __getId() {
}

/**
* Returns a unique page id.
* @return A unique page id.
*/
public int getPageId() {
Expand All @@ -161,7 +160,6 @@ public int getPageId() {
}

/**
* Returns a set containing parents (supercategories) of this category.
* @return A set containing parents (supercategories) of this category.
*/
public Set<Category> getParents() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,6 @@ public int getPageId()
}

/**
* Returns a set of categories that this page belongs to.
*
* @return The a set of categories that this page belongs to.
*/
public Set<Category> getCategories()
Expand Down Expand Up @@ -473,9 +471,7 @@ public Title getTitle()
}

/**
* Returns the set of strings that are redirects to this page.
*
* @return The set of redirect strings.
* @return The set of strings that are redirects to this page.
*/
public Set<String> getRedirects()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,8 @@
*******************************************************************************/
package de.tudarmstadt.ukp.wikipedia.api;

import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.*;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Star import :)

Copy link
Contributor Author

@mawiesne mawiesne Jul 17, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

;) - I disabled the auto-optimization feature already.

import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
Expand Down Expand Up @@ -403,10 +395,7 @@ protected Map<Page, Double> getSimilarPages(String pPattern, int pSize) throws W

Session session = this.__getHibernateSession();
session.beginTransaction();
Iterator results = session.createQuery(
"select pml.pageID, pml.name from PageMapLine as pml")
.list()
.iterator();
Iterator results = session.createQuery("select pml.pageID, pml.name from PageMapLine as pml").list().iterator();
while (results.hasNext()) {
Object[] row = (Object[]) results.next();
int pageID = (Integer) row[0];
Expand Down Expand Up @@ -496,6 +485,37 @@ public Iterable<Category> getCategories() {
return new CategoryIterable(this);
}


/**
* Gets the {@link Category categories} for a given {@link Page} identified by its {@code pageTitle}.
* @param pageTitle The title of a {@link Page}, not a category.
* @return The category objects which are associated with the given {@code pageTitle}.
* @throws WikiPageNotFoundException Thrown if no {@link Page} exists for the given {@code pageTitle}.
*/
public Set<Category> getCategories(String pageTitle) throws WikiPageNotFoundException
{
if (pageTitle == null || pageTitle.length() == 0) {
throw new WikiPageNotFoundException();
}

Session session = this.__getHibernateSession();
session.beginTransaction();
List<Integer> categoryHibernateIds = session.createQuery(
"select c from Page p left join p.categories c where p.name = :pageTitle", Integer.class)
.setParameter("pageTitle", pageTitle).list();
session.getTransaction().commit();

Set<Category> categorySet = new HashSet<Category>(categoryHibernateIds.size());
for (int hibernateId : categoryHibernateIds) {
try {
categorySet.add(new Category(this, hibernateId));
} catch (WikiPageNotFoundException e) {
logger.warn("Could not load Category by it's HibernateId = '"+hibernateId+"'");
}
}
return categorySet;
}

/**
* Get all wikipedia {@link Category categories}.
* Returns only an iterable, as a collection may not fit into memory for a large wikipedia.
Expand Down Expand Up @@ -778,7 +798,6 @@ public String getWikipediaId() {
sb.append(this.getDatabaseConfiguration().getLanguage());
return sb.toString();
}

}

class ValueComparator implements Comparator<Map.Entry<Integer,Double>> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,49 @@ public void testGetCategoryInvalid2() {
assertNull(wiki.getCategory(Integer.MAX_VALUE));
}

@Test
public void testGetCategoriesByPageTitle() {
int expectedCategoryPageId = 9;
String expectedCategoryTitle = "Publications of UKP";
try {
Set<Category> categories = wiki.getCategories(A_FAMOUS_PAGE);
assertNotNull(categories);
assertFalse(categories.isEmpty());
assertEquals(1, categories.size());
Category c = categories.iterator().next();
assertNotNull(c);
assertEquals(expectedCategoryPageId, c.getPageId());
assertEquals(expectedCategoryTitle, c.getTitle().toString());
} catch (WikiTitleParsingException e) {
fail("A WikiTitleParsingException occurred while getting the categories of a page by its title");
} catch (WikiPageNotFoundException e) {
fail("A WikiPageNotFoundException occurred while getting the categories of a page by its title");
}
}

@Test
public void testGetCategoriesByPageTitleInvalid1() {
try {
wiki.getCategories("");
} catch (WikiPageNotFoundException wpnfe) {
// this is expected here
} catch (RuntimeException re) {
fail("Expected a WikiPageNotFoundException, yet encountered RuntimeException: " + re.getLocalizedMessage());
}
}

@Test
public void testGetCategoriesByPageTitleInvalid2() {
try {
wiki.getCategories(null);
} catch (WikiPageNotFoundException wpnfe) {
// this is expected here
} catch (RuntimeException re) {
fail("Expected a WikiPageNotFoundException, yet encountered RuntimeException: " + re.getLocalizedMessage());
}
}


@Test
public void testGetLanguage() {
assertNotNull(wiki.getLanguage());
Expand Down