Skip to content

Commit

Permalink
Fix deprecation warnings in Wikipedia and WikipediaInfo
Browse files Browse the repository at this point in the history
Adds related unit tests
  • Loading branch information
rzo1 committed Dec 1, 2023
1 parent 3d50052 commit c0441c8
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 66 deletions.
32 changes: 10 additions & 22 deletions dkpro-jwpl-api/src/main/java/org/dkpro/jwpl/api/Wikipedia.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,8 @@
package org.dkpro.jwpl.api;

import java.lang.invoke.MethodHandles;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiInitializationException;
Expand Down Expand Up @@ -463,8 +455,7 @@ public Iterable<Page> getDiscussionArchives(Page articlePage) throws WikiApiExce
* @throws WikiApiException
* Thrown if errors occurred.
*/
//// I do not want to make this public at the moment (TZ, March, 2007)
protected Map<Page, Double> getSimilarPages(String pPattern, int pSize) throws WikiApiException
public Map<Page, Double> getSimilarPages(String pPattern, int pSize) throws WikiApiException
{
Title title = new Title(pPattern);
String pattern = title.getWikiStyleTitle();
Expand All @@ -476,20 +467,17 @@ protected Map<Page, Double> getSimilarPages(String pPattern, int pSize) throws W
// holds a mapping of the best distance values to page IDs
Map<Integer, Double> distanceMap = new HashMap<>();

final LevenshteinStringDistance lsd = new LevenshteinStringDistance();
Session session = this.__getHibernateSession();
session.beginTransaction();
for (Object o : session.createQuery("select pml.pageID, pml.name from PageMapLine as pml")
for (PageTuple o : session.createQuery("select new org.dkpro.jwpl.api.Wikipedia$PageTuple(pml.pageID, pml.name) from PageMapLine as pml", PageTuple.class)
.list()) {
Object[] row = (Object[]) o;
int pageID = (Integer) row[0];
String pageName = (String) row[1];

// this returns a similarity - if we want to use it, we have to change the semantics the
// ordering of the results
// double distance = new Levenshtein().getSimilarity(pageName, pPattern);
double distance = new LevenshteinStringDistance().distance(pageName, pattern);
double distance = lsd.distance(o.name(), pattern);

distanceMap.put(pageID, distance);
distanceMap.put(o.id(), distance);

// if there are more than "pSize" entries in the map remove the last one (it has the
// biggest distance)
Expand All @@ -512,10 +500,7 @@ protected Map<Page, Double> getSimilarPages(String pPattern, int pSize) throws W
page = this.getPage(pageID);
}
catch (WikiPageNotFoundException e) {
logger.error("Page with pageID " + pageID
+ " could not be found. Fatal error. Terminating.");
e.printStackTrace();
System.exit(1);
logger.error("Page with pageID {} could not be found. Fatal error. Terminating.", pageID, e);
}
pageMap.put(page, distanceMap.get(pageID));
}
Expand Down Expand Up @@ -938,4 +923,7 @@ public int compare(Entry<Integer, Double> e1, Entry<Integer, Double> e2)
}
}

private record PageTuple(int id, String name) {

}
}
61 changes: 19 additions & 42 deletions dkpro-jwpl-api/src/main/java/org/dkpro/jwpl/api/WikipediaInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,7 @@
package org.dkpro.jwpl.api;

import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
Expand All @@ -51,7 +45,7 @@ public class WikipediaInfo
private Map<Integer, Integer> degreeDistribution;
private Set<Integer> categorizedArticleSet;

private Wikipedia wiki;
private final Wikipedia wiki;

/**
* Get infos for the whole wikipedia.
Expand All @@ -63,25 +57,29 @@ public class WikipediaInfo
*/
public WikipediaInfo(Wikipedia pWiki) throws WikiApiException
{
this.wiki = pWiki;
new WikipediaInfo(this.wiki.getPages());

this(pWiki.getPages(), pWiki);
}

/**
* Get infos only for a subset of articles.
*
* @param pPages
* A set of pages. Only this subset of wiki pages is used in the info object.
*
* @param pWiki
* The wiki object.
* @throws WikiApiException Thrown if errors occurred.
*/
public WikipediaInfo(Iterable<Page> pPages) throws WikiApiException
public WikipediaInfo(Iterable<Page> pPages, Wikipedia pWiki) throws WikiApiException
{
if (pPages == null) {
throw new WikiApiException("The page set has to be initialized.");
}

if (pWiki == null) {
throw new WikiApiException("The wiki instance is not set.");
}

wiki = pWiki;
pages = pPages;
averageFanOut = -1.0; // lazy initialization => it is computed and stored when it is
// accessed
Expand All @@ -91,11 +89,11 @@ public WikipediaInfo(Iterable<Page> pPages) throws WikiApiException

// get number of pages
numberOfPages = 0;
while (pages.iterator().hasNext()) {
Iterator<Page> it = pages.iterator();
while (it.hasNext()) {
numberOfPages++;
pages.iterator().next();
it.next();
}

}

/**
Expand All @@ -109,34 +107,14 @@ public WikipediaInfo(Iterable<Page> pPages) throws WikiApiException
private double computeAverageFanOut(Iterable<Page> pages)
{

Set<Integer> pageIDs = new HashSet<>();
while (pages.iterator().hasNext()) {
pageIDs.add(pages.iterator().next().getPageId());
}
final Iterator<Page> it = pages.iterator();

if (pageIDs.isEmpty()) {
logger.warn("Cannot compute average fan-out of an empty page set.");
return 0.0;
double sum = 0;
while (it.hasNext()) {
sum += it.next().getOutlinks().size();
}

int fanOutCounter = 0;

Session session = this.wiki.__getHibernateSession();
session.beginTransaction();
for (Object o : session.createQuery("select page.outLinks, page.pageId from Page as page")
.list()) {
Object[] row = (Object[]) o;
Set outLinks = (Set) row[0];
Integer pageId = (Integer) row[1];

// if the current page ID is in the desired result set => add outlink value
if (pageIDs.contains(pageId)) {
fanOutCounter += outLinks.size();
}
}
session.getTransaction().commit();

return (double) fanOutCounter / this.getNumberOfPages();
return sum / this.getNumberOfPages();
}

/**
Expand Down Expand Up @@ -489,5 +467,4 @@ private int computeShortestPathLenghts(int pStartNode, CategoryGraph catGraph)
}
return shortestPathLengthSum;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ public double distance(String s, String t)
cost = 1;
}
// Step 6
d[i][j] = Minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
d[i][j] = minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
}
}
// Step 7
return Integer.valueOf(d[n][m]).doubleValue();
}

private int Minimum(int a, int b, int c)
private int minimum(int a, int b, int c)
{
int min;
min = a;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.api;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

public class WikipediaInfoTest
extends BaseJWPLTest
{

private static WikipediaInfo wikipediaInfo;

/**
* Made this static so that following tests don't run if assumption fails. (With AT_Before,
* tests also would not be executed but marked as passed) This could be changed back as soon as
* JUnit ignored tests after failed assumptions
*/
@BeforeAll
public static void setupWikipedia()
{
DatabaseConfiguration db = obtainHSDLDBConfiguration();
try {
wikipediaInfo = new WikipediaInfo(new Wikipedia(db));
}
catch (Exception e) {
fail("WikipediaInfo could not be initialized: " + e.getLocalizedMessage());
}
}

@Test
public void testGetAverageFanOut() {
double average = wikipediaInfo.getAverageFanOut();
assertTrue(average > 0);
assertEquals(1.1176470588235294d, average);
//call it twice
average = wikipediaInfo.getAverageFanOut();
assertEquals(1.1176470588235294d, average);
}

}
18 changes: 18 additions & 0 deletions dkpro-jwpl-api/src/test/java/org/dkpro/jwpl/api/WikipediaTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,18 @@

import java.lang.invoke.MethodHandles;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.api.exception.WikiTitleParsingException;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -437,6 +441,20 @@ public void testGetLanguage()
assertNotNull(wiki.getLanguage());
}

@ParameterizedTest
@ValueSource(strings = {
"Wikipedia_AP",
"Wikipedia_API"
})
public void testGetSimilarPages(String val) throws WikiApiException {
final Map<Page, Double> similarPages = wiki.getSimilarPages(val, 1);
assertNotNull(similarPages);
assertEquals(1, similarPages.size());
Map.Entry<Page, Double> entry = similarPages.entrySet().iterator().next();
assertTrue(entry.getKey().getTitle().getRawTitleText().startsWith(val));
assertTrue(entry.getValue() <= 1);
}

/* INTERNAL TEST HELPER METHODS */

private void getNotExistingPage(String title)
Expand Down

0 comments on commit c0441c8

Please sign in to comment.