Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented csv export. #1385

Merged
merged 13 commits into from
Jan 12, 2024
2 changes: 2 additions & 0 deletions cli/src/main/java/de/jplag/cli/CLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ public static void main(String[] args) {
JPlagResult result = JPlag.run(options);
ReportObjectFactory reportObjectFactory = new ReportObjectFactory();
reportObjectFactory.createAndSaveReport(result, cli.getResultFolder());
TwoOfTwelve marked this conversation as resolved.
Show resolved Hide resolved

OutputFileGenerator.generateCsvOutput(result, new File(cli.getResultFolder()), cli.options);
}
} catch (ExitException exception) {
logger.error(exception.getMessage()); // do not pass exception here to keep log clean
Expand Down
3 changes: 3 additions & 0 deletions cli/src/main/java/de/jplag/cli/CliOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ public static class Advanced {
"--similarity-threshold"}, description = "Comparison similarity threshold [0.0-1.0]: All comparisons above this threshold will "
+ "be saved (default: 0.0)%n")
public double similarityThreshold = JPlagOptions.DEFAULT_SIMILARITY_THRESHOLD;

@Option(names = "--csv-export", description = "If present, a csv export will be generated in addition to the zip file.")
public boolean csvExport = false;
}

public static class Clustering {
Expand Down
36 changes: 36 additions & 0 deletions cli/src/main/java/de/jplag/cli/OutputFileGenerator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package de.jplag.cli;

import java.io.File;
import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.JPlagResult;
import de.jplag.csv.comparisons.CsvComparisonOutput;

public final class OutputFileGenerator {
private static final Logger LOGGER = LoggerFactory.getLogger(OutputFileGenerator.class);

private OutputFileGenerator() {
// Prevents default constructor
}

/**
* Exports the given result as csvs, if the csvExport is activated in the options. Both a full and an anonymized version
* will be written.
* @param result The result to export
* @param outputRoot The root folder for the output
* @param options The cli options
*/
public static void generateCsvOutput(JPlagResult result, File outputRoot, CliOptions options) {
if (options.advanced.csvExport) {
try {
CsvComparisonOutput.writeCsvResults(result.getAllComparisons(), false, outputRoot, "results");
CsvComparisonOutput.writeCsvResults(result.getAllComparisons(), true, outputRoot, "results-anonymous");
} catch (IOException e) {
LOGGER.warn("Could not write csv results", e);
}
}
}
}
22 changes: 22 additions & 0 deletions core/src/main/java/de/jplag/csv/CsvDataMapper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package de.jplag.csv;

import java.util.Optional;

/**
* Provides mappings for csv rows and optionally names for the columns. Needs to always return the same number of
* columns.
* @param <T> The type of data that is mapped
*/
public interface CsvDataMapper<T> {
/**
* Provides the cell values for one row
* @param value The original object
* @return The cell values
*/
String[] provideData(T value);

/**
* @return The names of the columns if present
*/
Optional<String[]> getTitleRow();
}
144 changes: 144 additions & 0 deletions core/src/main/java/de/jplag/csv/CsvPrinter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package de.jplag.csv;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;

import de.jplag.util.FileUtils;

/**
* Prints a csv according to the specification in
* <a href="https://datatracker.ietf.org/doc/html/rfc4180#section-2">...</a>. If you need to deviate from this
* definition slightly you can modify the line end and separator characters.
* @param <T>
*/
public class CsvPrinter<T> {
private static final char DEFAULT_SEPARATOR = ',';
private static final String DEFAULT_LINE_END = "\r\n"; // not System.lineSeparator(), because of csv specification
private static final char LITERAL = '"';

private final CsvDataMapper<T> dataSource;
private final List<String[]> data;

private char separator;
private String lineEnd;

/**
* @param dataSource The data source used to map the given object to rows.
*/
public CsvPrinter(CsvDataMapper<T> dataSource) {
this.dataSource = dataSource;
this.data = new ArrayList<>();

this.separator = DEFAULT_SEPARATOR;
this.lineEnd = DEFAULT_LINE_END;
}

/**
* Adds a new row to this csv
* @param value the value to add
*/
public void addRow(T value) {
this.data.add(this.dataSource.provideData(value));
}

/**
* Adds multiple rows to this csv
* @param values The values to add
*/
public void addRows(Collection<T> values) {
values.forEach(this::addRow);
}

/**
* Changes the separator between cells
* @param separator The new separator
*/
public void setSeparator(char separator) {
this.separator = separator;
}

/**
* Sets the string to separate lines with
* @param lineEnd the new line end
*/
public void setLineEnd(String lineEnd) {
this.lineEnd = lineEnd;
}

/**
* Prints this csv with all current data to a file
* @param file The file to write
* @throws IOException on io errors
*/
public void printToFile(File file) throws IOException {
try (Writer writer = FileUtils.openFileWriter(file)) {
this.printCsv(writer);
}
}

public String printToString() throws IOException {
TwoOfTwelve marked this conversation as resolved.
Show resolved Hide resolved
String csv;

try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
try (Writer writer = new OutputStreamWriter(outputStream)) {
this.printCsv(writer);
}

csv = outputStream.toString();
}

return csv;
}

private void printCsv(Writer writer) throws IOException {
this.writeTitleRow(writer);

for (String[] datum : this.data) {
this.printRow(writer, datum);
}
}

private void writeTitleRow(Writer writer) throws IOException {
Optional<String[]> titleRow = this.dataSource.getTitleRow();
if (titleRow.isPresent()) {
this.printRow(writer, titleRow.get());
}
}

private void printRow(Writer writer, String[] data) throws IOException {
Iterator<String> dataIterator = Arrays.stream(data).iterator();

if (dataIterator.hasNext()) {
printCell(writer, dataIterator.next());
}

while (dataIterator.hasNext()) {
writer.write(this.separator);
printCell(writer, dataIterator.next());
}

writer.write(this.lineEnd);
}

private void printCell(Writer writer, String cellValue) throws IOException {
boolean literalsNeeded = cellValue.contains(String.valueOf(LITERAL));
String actualValue = cellValue;
if (literalsNeeded) {
writer.write(LITERAL);
actualValue = actualValue.replace("\"", "\"\"");
}
writer.write(actualValue);
if (literalsNeeded) {
writer.write(LITERAL);
}
}
}
18 changes: 18 additions & 0 deletions core/src/main/java/de/jplag/csv/CsvValue.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package de.jplag.csv;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
* Used with {@link ReflectiveCsvDataMapper} to identify fields and methods, that should be used for the csv.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface CsvValue {
/**
* The index of the csv field. Has to be used as the compiler sometimes changes the order of fields/methods
*/
int value();
}
58 changes: 58 additions & 0 deletions core/src/main/java/de/jplag/csv/HardcodedCsvDataMapper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package de.jplag.csv;

import java.util.Optional;
import java.util.function.Function;

/**
* Can be used to hardcode mappings to csv. Uses the given function to map values.
* @param <T> The mapped type.
*/
public class HardcodedCsvDataMapper<T> implements CsvDataMapper<T> {
private final Function<T, Object[]> mappingFunction;
private final int columnCount;

private String[] titles;

/**
* @param columnCount The number of columns
* @param mappingFunction The function returning the column values. Must return as many values as specified in
* columnCount
*/
public HardcodedCsvDataMapper(int columnCount, Function<T, Object[]> mappingFunction) {
this.mappingFunction = mappingFunction;
this.columnCount = columnCount;
this.titles = null;
}

/**
* @param columnCount The number of columns
* @param mappingFunction The function returning the column values. Must return as many values as specified in
* columnCount
* @param titles The titles for the csv
*/
public HardcodedCsvDataMapper(int columnCount, Function<T, Object[]> mappingFunction, String[] titles) {
this(columnCount, mappingFunction);
this.titles = titles;
}

@Override
public String[] provideData(T value) {
Object[] values = this.mappingFunction.apply(value);

if (values.length != this.columnCount) {
throw new IllegalStateException("You need to return the appropriate number of columns");
}

String[] data = new String[this.columnCount];
for (int i = 0; i < this.columnCount; i++) {
data[i] = String.valueOf(values[i]);
}

return data;
}

@Override
public Optional<String[]> getTitleRow() {
return Optional.ofNullable(this.titles);
}
}
89 changes: 89 additions & 0 deletions core/src/main/java/de/jplag/csv/ReflectiveCsvDataMapper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package de.jplag.csv;

import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;

import org.apache.commons.math3.util.Pair;

/**
* Mapped data automatically based on the exposed fields and methods.
* @param <T> The mapped type. Mark included methods and fields with @{@link CsvValue}
*/
public class ReflectiveCsvDataMapper<T> implements CsvDataMapper<T> {
private final List<Pair<Integer, GetterFunction<T>>> values;
private String[] titles;

/**
* @param type The mapped type.
*/
public ReflectiveCsvDataMapper(Class<T> type) {
this.values = new ArrayList<>();

for (Field field : type.getFields()) {
if (field.getAnnotation(CsvValue.class) != null) {
this.values.add(new Pair<>(field.getAnnotation(CsvValue.class).value(), field::get));
}
}

for (Method method : type.getMethods()) {
if (method.getAnnotation(CsvValue.class) != null) {
if (method.getParameters().length != 0) {
throw new IllegalStateException(
String.format("Method %s in %s must not have parameters to be a csv value", method.getName(), type.getName()));
}
if (method.getReturnType().equals(Void.class)) {
throw new IllegalStateException(
String.format("Method %s in %s must not return void to be a csv value", method.getName(), type.getName()));
}

this.values.add(new Pair<>(method.getAnnotation(CsvValue.class).value(), method::invoke));
}
}

this.values.sort(Comparator.comparing(Pair::getKey));
this.titles = null;
}

/**
* @param type The mapped type
* @param titles The titles for the csv. Must be as many as @{@link CsvValue} annotation in the given type.
*/
public ReflectiveCsvDataMapper(Class<T> type, String[] titles) {
this(type);

if (this.values.size() != titles.length) {
throw new IllegalArgumentException("Csv data must have the same number of tiles and values per row.");
}

this.titles = titles;
}

@Override
public String[] provideData(T value) {
String[] data = new String[this.values.size()];

for (int i = 0; i < data.length; i++) {
try {
data[i] = String.valueOf(this.values.get(i).getValue().get(value));
} catch (IllegalAccessException | InvocationTargetException e) {
throw new IllegalStateException(e);
}
}

return data;
}

@Override
public Optional<String[]> getTitleRow() {
return Optional.ofNullable(this.titles);
}

private interface GetterFunction<T> {
Object get(T instance) throws IllegalAccessException, InvocationTargetException;
}
}
Loading
Loading