From 066de9708ab11e31780a0dfc4080fb2c563c8e45 Mon Sep 17 00:00:00 2001 From: Martin Wiesner Date: Mon, 16 Jul 2018 16:21:51 +0200 Subject: [PATCH] #178 - Provide method to retrieve a page's categories by page title - Fixes this issue by introducing `Set getCategories(String pageTitle)` in Wikipedia - Adds new test cases to `WikipediaTest` to cover the new method - Sanitizes some JavaDoc in Category and Page --- .../ukp/wikipedia/api/Category.java | 9 +--- .../tudarmstadt/ukp/wikipedia/api/Page.java | 6 +-- .../ukp/wikipedia/api/Wikipedia.java | 47 +++++++++++++------ .../ukp/wikipedia/api/WikipediaTest.java | 43 +++++++++++++++++ 4 files changed, 78 insertions(+), 27 deletions(-) diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java index fe5d6782..b842e273 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Category.java @@ -79,7 +79,7 @@ public Category(Wikipedia wiki, String pName) throws WikiApiException { } /** - * @see de.tudarmstadt.ukp.wikipedia.api.Category#Category(long) + * @see de.tudarmstadt.ukp.wikipedia.api.Category#Category(Wikipedia, long) */ private void createCategory(long id) throws WikiPageNotFoundException { Session session = this.wiki.__getHibernateSession(); @@ -144,7 +144,6 @@ public long __getId() { } /** - * Returns a unique page id. * @return A unique page id. */ public int getPageId() { @@ -157,7 +156,6 @@ public int getPageId() { } /** - * Returns a set containing parents (supercategories) of this category. * @return A set containing parents (supercategories) of this category. */ public Set getParents() { @@ -208,7 +206,6 @@ public Set getParentIDs() { } /** - * Returns a set containing the children (subcategories) of this category. * @return A set containing the children (subcategories) of this category. */ public Set getChildren() { @@ -259,7 +256,6 @@ public Set getChildrenIDs() { } /** - * Returns the title of the category. * @return The title of the category. * @throws WikiTitleParsingException */ @@ -275,7 +271,6 @@ public Title getTitle() throws WikiTitleParsingException { /** - * Returns the set of pages that are categorized under this category. * @return The set of pages that are categorized under this category. * @throws WikiApiException * @deprecated Use {@link #getArticles()} instead. @@ -296,7 +291,6 @@ public Set getPages() throws WikiApiException { } /** - * Returns the set of articles that are categorized under this category. * @return The set of articles that are categorized under this category. * @throws WikiApiException */ @@ -315,7 +309,6 @@ public Set getArticles() throws WikiApiException { } /** - * Returns the set of article ids that are categorized under this category. * @return The set of article ids that are categorized under this category. */ public Set getArticleIds() { diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java index fbc27b46..32e1121f 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Page.java @@ -260,8 +260,6 @@ public int getPageId() } /** - * Returns a set of categories that this page belongs to. - * * @return The a set of categories that this page belongs to. */ public Set getCategories() @@ -471,9 +469,7 @@ public Title getTitle() } /** - * Returns the set of strings that are redirects to this page. - * - * @return The set of redirect strings. + * @return The set of strings that are redirects to this page. */ public Set getRedirects() { diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java index 06ed06da..8d3140e8 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/main/java/de/tudarmstadt/ukp/wikipedia/api/Wikipedia.java @@ -17,16 +17,8 @@ *******************************************************************************/ package de.tudarmstadt.ukp.wikipedia.api; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; -import java.util.Set; -import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -403,10 +395,7 @@ protected Map getSimilarPages(String pPattern, int pSize) throws W Session session = this.__getHibernateSession(); session.beginTransaction(); - Iterator results = session.createQuery( - "select pml.pageID, pml.name from PageMapLine as pml") - .list() - .iterator(); + Iterator results = session.createQuery("select pml.pageID, pml.name from PageMapLine as pml").list().iterator(); while (results.hasNext()) { Object[] row = (Object[]) results.next(); int pageID = (Integer) row[0]; @@ -496,6 +485,37 @@ public Iterable getCategories() { return new CategoryIterable(this); } + + /** + * Gets the {@link Category categories} for a given {@link Page} identified by its {@code pageTitle}. + * @param pageTitle The title of a {@link Page}, not a category. + * @return The category objects which are associated with the given {@code pageTitle}. + * @throws WikiPageNotFoundException Thrown if no {@link Page} exists for the given {@code pageTitle}. + */ + public Set getCategories(String pageTitle) throws WikiPageNotFoundException + { + if (pageTitle == null || pageTitle.length() == 0) { + throw new WikiPageNotFoundException(); + } + + Session session = this.__getHibernateSession(); + session.beginTransaction(); + List categoryHibernateIds = session.createQuery( + "select c from Page p left join p.categories c where p.name = :pageTitle", Integer.class) + .setParameter("pageTitle", pageTitle).list(); + session.getTransaction().commit(); + + Set categorySet = new HashSet(categoryHibernateIds.size()); + for (int hibernateId : categoryHibernateIds) { + try { + categorySet.add(new Category(this, hibernateId)); + } catch (WikiPageNotFoundException e) { + logger.warn("Could not load Category by it's HibernateId = '"+hibernateId+"'"); + } + } + return categorySet; + } + /** * Get all wikipedia {@link Category categories}. * Returns only an iterable, as a collection may not fit into memory for a large wikipedia. @@ -778,7 +798,6 @@ public String getWikipediaId() { sb.append(this.getDatabaseConfiguration().getLanguage()); return sb.toString(); } - } class ValueComparator implements Comparator> { diff --git a/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/WikipediaTest.java b/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/WikipediaTest.java index 5f923dfd..a4628cfc 100644 --- a/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/WikipediaTest.java +++ b/de.tudarmstadt.ukp.wikipedia.api/src/test/java/de/tudarmstadt/ukp/wikipedia/api/WikipediaTest.java @@ -322,6 +322,49 @@ public void testGetCategoryInvalid2() { assertNull(wiki.getCategory(Integer.MAX_VALUE)); } + @Test + public void testGetCategoriesByPageTitle() { + int expectedCategoryPageId = 9; + String expectedCategoryTitle = "Publications of UKP"; + try { + Set categories = wiki.getCategories(A_FAMOUS_PAGE); + assertNotNull(categories); + assertFalse(categories.isEmpty()); + assertEquals(1, categories.size()); + Category c = categories.iterator().next(); + assertNotNull(c); + assertEquals(expectedCategoryPageId, c.getPageId()); + assertEquals(expectedCategoryTitle, c.getTitle().toString()); + } catch (WikiTitleParsingException e) { + fail("A WikiTitleParsingException occurred while getting the categories of a page by its title"); + } catch (WikiPageNotFoundException e) { + fail("A WikiPageNotFoundException occurred while getting the categories of a page by its title"); + } + } + + @Test + public void testGetCategoriesByPageTitleInvalid1() { + try { + wiki.getCategories(""); + } catch (WikiPageNotFoundException wpnfe) { + // this is expected here + } catch (RuntimeException re) { + fail("Expected a WikiPageNotFoundException, yet encountered RuntimeException: " + re.getLocalizedMessage()); + } + } + + @Test + public void testGetCategoriesByPageTitleInvalid2() { + try { + wiki.getCategories(null); + } catch (WikiPageNotFoundException wpnfe) { + // this is expected here + } catch (RuntimeException re) { + fail("Expected a WikiPageNotFoundException, yet encountered RuntimeException: " + re.getLocalizedMessage()); + } + } + + @Test public void testGetLanguage() { assertNotNull(wiki.getLanguage());