Skip to content

Commit

Permalink
Merge pull request #466 from ClearTK/refactoring/465-Remove-use-of-de…
Browse files Browse the repository at this point in the history
…precated-OSS-Sonatype-parent-POM

Issue #465: Remove use of deprecated OSS Sonatype parent POM
  • Loading branch information
reckart authored Nov 28, 2022
2 parents 6b48058 + 07777a9 commit 2dff806
Show file tree
Hide file tree
Showing 21 changed files with 215 additions and 329 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
.factorypath
.checkstyle
target
.apt_generated/
.apt_generated_tests/
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
* <p>
*
* @author Lee Becker
*
*/
@Beta
public abstract class Tokenizer_ImplBase<TOKEN_TYPE extends Annotation, SENTENCE_TYPE extends Annotation> extends
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,17 @@
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.component.ViewCreatorAnnotator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.SofaCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.cleartk.util.ViewUriUtil;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.component.ViewCreatorAnnotator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.SofaCapability;

import com.google.common.annotations.Beta;

Expand Down Expand Up @@ -80,20 +80,18 @@ public class PennTreebankReader extends JCasCollectionReader_ImplBase {
public static final String PARAM_CORPUS_DIRECTORY_NAME = "corpusDirectoryName";

private static final String CORPUS_DIRECTORY_DESCRIPTION = "Specifies the location of WSJ/PennTreebank treebank files. "
+ "The directory should contain subdirectories corresponding to the sections (e.g. '00', '01', etc.) "
+ "That is, if a local copy of PennTreebank sits at C:/Data/PTB/wsj/mrg, then the the subdirectory C:/Data/PTB/wsj/mrg/00 should exist. "
+ "There are 24 sections in PTB corresponding to the directories 00, 01, 02, ... 24. ";
+ "The directory should contain subdirectories corresponding to the sections (e.g. '00', '01', etc.) "
+ "That is, if a local copy of PennTreebank sits at C:/Data/PTB/wsj/mrg, then the the subdirectory C:/Data/PTB/wsj/mrg/00 should exist. "
+ "There are 24 sections in PTB corresponding to the directories 00, 01, 02, ... 24. ";

@ConfigurationParameter(
name = PARAM_CORPUS_DIRECTORY_NAME,
mandatory = true, description = CORPUS_DIRECTORY_DESCRIPTION)
@ConfigurationParameter(name = PARAM_CORPUS_DIRECTORY_NAME, mandatory = true, description = CORPUS_DIRECTORY_DESCRIPTION)
private String corpusDirectoryName;

public static final String PARAM_SECTIONS_SPECIFIER = "sectionsSpecifier";

private static final String SECTIONS_DESCRIPTION = "specifies which sections of PTB to read in. "
+ "The required format for values of this parameter allows for comma-separated section numbers and section ranges, "
+ "for example '02,07-12,16'.";
+ "The required format for values of this parameter allows for comma-separated section numbers and section ranges, "
+ "for example '02,07-12,16'.";

@ConfigurationParameter(name = PARAM_SECTIONS_SPECIFIER, defaultValue = "00-24", description = SECTIONS_DESCRIPTION)
private String sectionsSpecifier;
Expand All @@ -119,7 +117,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
}

/**
* This will add all the <tt>.mrg</tt> files in the given WSJ sections to <em>treebankFiles</em>.
* This will add all the {@code .mrg} files in the given WSJ sections to <em>treebankFiles</em>.
*
* @param wsjDirectory
* The top level of the WSJ part of Treebank. Underneath here are the section
Expand All @@ -130,22 +128,23 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
* The set of sections to include.
*/
@Beta
public static void collectSections(
File wsjDirectory,
List<File> treebankFiles,
ListSpecification wsjSections) {
if (!wsjDirectory.isDirectory())
public static void collectSections(File wsjDirectory, List<File> treebankFiles,
ListSpecification wsjSections) {
if (!wsjDirectory.isDirectory()) {
return;
}

for (File subFile : wsjDirectory.listFiles()) {
if (!subFile.isDirectory())
if (!subFile.isDirectory()) {
continue;
}

try {
int section = Integer.valueOf(subFile.getName());

if (!wsjSections.contains(section))
if (!wsjSections.contains(section)) {
continue;
}
} catch (NumberFormatException e) {
continue;
}
Expand All @@ -167,37 +166,37 @@ static void collectFiles(File file, List<File> treebankFiles) {
/**
* Reads the next file and stores its text in <b>cas</b> as the "TreebankView" SOFA.
*/
@Override
public void getNext(JCas jCas) throws IOException, CollectionException {
File treebankFile = files.removeFirst();
getUimaContext().getLogger().log(
Level.FINEST,
"reading treebank file: " + treebankFile.getPath());
getUimaContext().getLogger().log(Level.FINEST,
"reading treebank file: " + treebankFile.getPath());
ViewUriUtil.setURI(jCas, treebankFile.toURI());
try {
JCas treebankView = ViewCreatorAnnotator.createViewSafely(
jCas,
TREEBANK_VIEW);
JCas treebankView = ViewCreatorAnnotator.createViewSafely(jCas, TREEBANK_VIEW);
treebankView.setSofaDataString(FileUtils.file2String(treebankFile), "text/plain");
} catch (AnalysisEngineProcessException aepe) {
throw new CollectionException(aepe);
}
}

@Override
public void close() throws IOException {
}

@Override
public Progress[] getProgress() {
return new Progress[] { new ProgressImpl(
numberOfFiles - files.size(),
numberOfFiles,
Progress.ENTITIES) };
return new Progress[] {
new ProgressImpl(numberOfFiles - files.size(), numberOfFiles, Progress.ENTITIES) };
}

@Override
public boolean hasNext() throws IOException, CollectionException {
if (files.size() > 0)
if (files.size() > 0) {
return true;
else
} else {
return false;
}
}

@Beta
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
*
*
* <p>
* PropbankCollectionReader reads all <tt>.mrg</tt> files of the WSJ part of Treebank in lexical
* PropbankCollectionReader reads all {@code .mrg} files of the WSJ part of Treebank in lexical
* order, then reads the corresponding Propbank entries, and populates the "TreebankView" and
* "PropbankView" SOFAs.
* </p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
*
* <p>
* A <em>Propbank object</em> represents one entry in Propbank. It closely reflects the structure of
* one line in <tt>prop.txt</tt>.
* one line in {@code prop.txt}.
* </p>
*
* @author Philipp Wetzler, Philip Ogren, Steven Bethard
Expand All @@ -57,7 +57,7 @@ public class Propbank {
* Parses one Propbank entry and returns its representation as a <em>Propbank</em> object.
*
* @param propTxt
* one line from <tt>prop.txt</tt>
* one line from {@code prop.txt}
*
* @return a <em>Propbank</em> object representing <b>propTxt</b>
*/
Expand Down Expand Up @@ -104,10 +104,10 @@ public static Propbank fromString(String propTxt) {

/**
* A convenience funtion to quickly read only the filename portion of a line from
* <tt>prop.txt</tt>.
* {@code prop.txt}.
*
* @param propTxt
* one line from <tt>prop.txt</tt>
* one line from {@code prop.txt}
*
* @return the filename part of <b>propTxt</b>
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public abstract class PropbankRelation {
* <em>PropbankRelation</em> object.
*
* @param s
* the textual representation of a relation taken from <tt>prop.txt</tt>
* the textual representation of a relation taken from {@code prop.txt}
*
* @return a <em>PropbankRelation</em> object representing <b>s</b>
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@
import java.util.List;
import java.util.Set;

import org.apache.uima.fit.util.FSCollectionFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.srl.type.SemanticArgument;
import org.cleartk.syntax.constituent.type.TopTreebankNode;
import org.cleartk.util.AnnotationUtil;
import org.apache.uima.fit.util.FSCollectionFactory;

import com.google.common.annotations.Beta;

Expand All @@ -58,7 +58,7 @@ public class Proplabel {
* <em>Proplabel</em> object.
*
* @param lblTxt
* one label part of one line from <tt>prop.txt</tt>
* one label part of one line from {@code prop.txt}
*
* @return a <em>Proplabel</em> object representing <b>lblTxt</b>
*/
Expand Down Expand Up @@ -103,16 +103,13 @@ static Proplabel fromString(String lblTxt) {

// throw some exceptions for bad input
if (columns.length != expectedLength) {
throw new PropbankFormatException(String.format(
"Expected %d items, found %d",
expectedLength,
columns.length));
throw new PropbankFormatException(
String.format("Expected %d items, found %d", expectedLength, columns.length));
}
if (Proplabel.labelsRequiringFeatures.contains(columns[1])) {
if (proplabel.getFeature() == null) {
throw new PropbankFormatException(String.format(
"Label %s requires a feature",
proplabel.getLabel()));
throw new PropbankFormatException(
String.format("Label %s requires a feature", proplabel.getLabel()));
}
}
return proplabel;
Expand Down Expand Up @@ -247,26 +244,29 @@ public String toString() {

buffer.append(getRelation().toString());
buffer.append("-" + getLabel());
if (getFeature() != null)
if (getFeature() != null) {
buffer.append("-" + getFeature());
if (getHyphenTag() != null)
}
if (getHyphenTag() != null) {
buffer.append("-" + getHyphenTag());
if (getPreposition() != null)
}
if (getPreposition() != null) {
buffer.append("-" + getPreposition());
}

return buffer.toString();
}

private static final Set<String> labels = new HashSet<String>(
Arrays.asList("rel|Support|ARG0|ARG1|ARG2|ARG3|ARG4|ARG5|ARGA|ARGM".split("\\|")));
Arrays.asList("rel|Support|ARG0|ARG1|ARG2|ARG3|ARG4|ARG5|ARGA|ARGM".split("\\|")));

private static final Set<String> labelsRequiringFeatures = new HashSet<String>(
Arrays.asList(new String[] { "ARGM" }));
Arrays.asList(new String[] { "ARGM" }));

private static final Set<String> features = new HashSet<String>(
Arrays.asList("ADV|CAU|DIR|DIS|EXT|LOC|MNR|MOD|NEG|PNC|PRD|REC|TMP".split("\\|")));
Arrays.asList("ADV|CAU|DIR|DIS|EXT|LOC|MNR|MOD|NEG|PNC|PRD|REC|TMP".split("\\|")));

private static final Set<String> hyphenTags = new HashSet<String>(
Arrays.asList("H0|H1|H2|H3|H4|H5|H6|H7|H8|H9|XX".split("\\|")));
Arrays.asList("H0|H1|H2|H3|H4|H5|H6|H7|H8|H9|XX".split("\\|")));

}
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public Evaluation_ImplBase(File baseDirectory) {
* @return The statistics that result from testing the model.
*/
public STATS_TYPE trainAndTest(List<ITEM_TYPE> trainItems, List<ITEM_TYPE> testItems)
throws Exception {
throws Exception {
File subDirectory = new File(this.baseDirectory, "train_and_test");
subDirectory.mkdirs();
this.train(this.getCollectionReader(trainItems), subDirectory);
Expand Down Expand Up @@ -120,7 +120,8 @@ public List<STATS_TYPE> crossValidation(List<ITEM_TYPE> items, int nFolds) throw
* @param nFolds
* The total number of folds in this cross validation.
* @param fold
* The index of the fold (0 <= fold < nFolds) whose training items are to be selected.
* The index of the fold (0 &lt;= fold &lt; nFolds) whose training items are to be
* selected.
* @return The items that should be used for training.
*/
protected List<ITEM_TYPE> selectFoldTrainItems(List<ITEM_TYPE> items, int nFolds, int fold) {
Expand All @@ -144,7 +145,7 @@ protected List<ITEM_TYPE> selectFoldTrainItems(List<ITEM_TYPE> items, int nFolds
* @param nFolds
* The total number of folds in this cross validation.
* @param fold
* The index of the fold (0 <= fold < nFolds) whose test items are to be selected.
* The index of the fold (0 &lt;= fold &lt; nFolds) whose test items are to be selected.
* @return The items that should be used for testing.
*/
protected List<ITEM_TYPE> selectFoldTestItems(List<ITEM_TYPE> items, int nFolds, int fold) {
Expand Down Expand Up @@ -192,6 +193,6 @@ protected List<ITEM_TYPE> selectFoldTestItems(List<ITEM_TYPE> items, int nFolds,
* @return The statistics that result from testing the model
*/
protected abstract STATS_TYPE test(CollectionReader collectionReader, File directory)
throws Exception;
throws Exception;

}
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ public class SyntacticPathExtractor implements FeatureExtractor2<TreebankNode, T
* @param pathMemberExtractor
* this extractor will be used to get a feature for every node on the path, which will
* then be combined to form a single string. The extractor should preferably generate
* exactly one <tt>StringFeature</tt>, but must generate at least one
* <tt>StringFeature</tt>, <tt>LongFeature</tt>, <tt>DoubleFeature</tt>, or
* <tt>BooleanFeature</tt>. Only the first feature will then be used and naively
* exactly one {@code StringFeature}, but must generate at least one
* {@code StringFeature}, {@code LongFeature}, {@code DoubleFeature}, or
* {@code BooleanFeature}. Only the first feature will then be used and naively
* converted to a string.
* @param partial
* if true, generate a partial path only, i.e. from the first node up to the lowest
Expand Down
Loading

0 comments on commit 2dff806

Please sign in to comment.