diff --git a/jplag/src/main/java/jplag/AllMatches.java b/jplag/src/main/java/jplag/AllMatches.java index db64c957ae..0be3ba909c 100644 --- a/jplag/src/main/java/jplag/AllMatches.java +++ b/jplag/src/main/java/jplag/AllMatches.java @@ -13,10 +13,6 @@ public class AllMatches extends Matches implements Comparator { public Submission subA; public Submission subB; - public AllBasecodeMatches bcmatchesA = null; - public AllBasecodeMatches bcmatchesB = null; - - public AllMatches(Submission subA, Submission subB) { super(); this.subA = subA; @@ -92,9 +88,9 @@ public final float roundedPercent() { } public final float percent() { float sa, sb; - if(bcmatchesB != null && bcmatchesA != null){ - sa = subA.size() - subA.files.length - bcmatchesA.tokensMatched(); - sb = subB.size() - subB.files.length - bcmatchesB.tokensMatched(); + if(subA.bcMatches != null && subB.bcMatches != null){ + sa = subA.size() - subA.files.length - subA.bcMatches.tokensMatched(); + sb = subB.size() - subB.files.length - subB.bcMatches.tokensMatched(); } else{ sa = subA.size() - subA.files.length; @@ -104,14 +100,22 @@ public final float percent() { } public final float percentA() { int divisor; - if(bcmatchesA != null) divisor = subA.size()-subA.files.length-bcmatchesA.tokensMatched(); - else divisor = subA.size()-subA.files.length; + + if (subA.bcMatches != null) + divisor = subA.size() - subA.files.length - subA.bcMatches.tokensMatched(); + else + divisor = subA.size() - subA.files.length; + return (divisor == 0 ? 0f : (tokensMatched()*100 / (float) divisor)); } public final float percentB() { int divisor; - if(bcmatchesB != null) divisor = subB.size()-subB.files.length-bcmatchesB.tokensMatched(); - else divisor = subB.size()-subB.files.length; + + if (subB.bcMatches != null) + divisor = subB.size() - subB.files.length - subB.bcMatches.tokensMatched(); + else + divisor = subB.size() - subB.files.length; + return (divisor == 0 ? 0f : (tokensMatched()*100 / (float) divisor)); } @@ -139,11 +143,11 @@ public final float percentMinAB() { public final float percentBasecodeA(){ float sa = subA.size() - subA.files.length; - return bcmatchesA.tokensMatched() * 100 / sa; + return subA.bcMatches.tokensMatched() * 100 / sa; } public final float percentBasecodeB(){ float sb = subB.size() - subB.files.length; - return bcmatchesB.tokensMatched() * 100 / sb; + return subB.bcMatches.tokensMatched() * 100 / sb; } public final float roundedPercentBasecodeA() { float percent = percentBasecodeA(); @@ -154,6 +158,14 @@ public final float roundedPercentBasecodeB() { return ((int)(percent * 10)) / (float)10; } + public AllBasecodeMatches getBcMatchesA() { + return subA.bcMatches; + } + + public AllBasecodeMatches getBcMatchesB() { + return subB.bcMatches; + } + /* Returns the name of the submissions which were compared * Parameter: i == 0 submission A, * i != 0 submission B. diff --git a/jplag/src/main/java/jplag/GSTiling.java b/jplag/src/main/java/jplag/GSTiling.java index 35efdf42ff..dd6941d61d 100644 --- a/jplag/src/main/java/jplag/GSTiling.java +++ b/jplag/src/main/java/jplag/GSTiling.java @@ -75,7 +75,13 @@ public void create_hashes(Structure s, int hashLength, boolean makeTable) { } public final AllMatches compare(Submission subA, Submission subB) { + if (subA.struct == null || subB.struct == null) { + return null; + } + Submission A, B, tmp; + AllMatches matches; + if (subA.struct.size() > subB.struct.size()) { A = subB; B = subA; } else { @@ -83,12 +89,13 @@ public final AllMatches compare(Submission subA, Submission subB) { } // if hashtable exists in first but not in second structure: flip around! if (B.struct.table == null && A.struct.table != null) { - tmp = A; - A = B; - B = tmp; + matches = compare(B, A, this.program.get_min_token_match()); + } else { + matches = compare(A, B, this.program.get_min_token_match()); } - return compare(A, B, this.program.get_min_token_match()); + System.out.println("Comparing " + subA.name + "-" + subB.name + ": " + matches.percent()); + return matches; } // first parameter should contain the smaller sequence!!! @@ -180,19 +187,19 @@ private final AllMatches compare(Submission subA, Submission subB, int mml) { public final AllBasecodeMatches compareWithBasecode(Submission subA, Submission subB) { Submission A, B, tmp; + if (subA.struct.size() > subB.struct.size()) { A = subB; B = subA; } else { - A = subB; B = subA; + A = subA; B = subB; } + // if hashtable exists in first but not in second structure: flip around! if (B.struct.table == null && A.struct.table != null) { - tmp = A; - A = B; - B = tmp; + return compareWithBasecode(B, A, program.get_min_token_match()); + } else { + return compareWithBasecode(A, B, program.get_min_token_match()); } - - return compareWithBasecode(A, B, this.program.get_min_token_match()); } private final AllBasecodeMatches compareWithBasecode(Submission subA, Submission subB, int mml) { diff --git a/jplag/src/main/java/jplag/Program.java b/jplag/src/main/java/jplag/Program.java index e16afd1564..1665ad03dd 100644 --- a/jplag/src/main/java/jplag/Program.java +++ b/jplag/src/main/java/jplag/Program.java @@ -8,13 +8,13 @@ import java.io.IOException; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; +import java.awt.Color; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; import java.util.Enumeration; import java.util.HashSet; -import java.util.Hashtable; import java.util.Iterator; import java.util.Properties; import java.util.TimeZone; @@ -85,8 +85,6 @@ else if (normal != null) protected GSTiling gSTiling = new GSTiling(this); - private Hashtable htBasecodeMatches = new Hashtable(30); - private Vector included = null; // experiment end @@ -99,7 +97,7 @@ else if (normal != null) private Runtime runtime = Runtime.getRuntime(); - private Vector submissions; + private Vector submissions, archivalSubmissions; private FileWriter writer = null; @@ -139,6 +137,17 @@ protected int validSubmissions() { return size; } + protected int validArchivalSubmissions() { + if (archivalSubmissions == null) + return 0; + int archSize = 0; + for (int i = archivalSubmissions.size() - 1; i >= 0; i--) { + if (!archivalSubmissions.elementAt(i).errors) + archSize++; + } + return archSize; + } + /** * Like the validSubmissions(), but this time all the submissions are * returned as a string, separated by "separator". @@ -157,6 +166,20 @@ protected String allValidSubmissions(String separator) { return res; } + protected String allValidArchivalSubmissions(String separator) { + String res = ""; + int size = archivalSubmissions.size(); + boolean firsterr = true; + for (int i = 0; i < size; i++) { + Submission archSubm = archivalSubmissions.elementAt(i); + if (!archSubm.errors) { + res += ((!firsterr) ? separator : "") + archSubm.name; + firsterr = false; + } + } + return res; + } + /** * Returns a " - " separated list of invalid submission names */ @@ -174,15 +197,20 @@ public void closeWriter() { writer = null; } - private void throwNotEnoughSubmissions() throws jplag.ExitException { - StringBuilder errorStr = new StringBuilder(); - for (String str : errorVector) { - errorStr.append(str); - errorStr.append('\n'); - } + private void terminateIfNotEnoughSubmissions() throws jplag.ExitException { + int allValidSubmissions = validSubmissions() + validArchivalSubmissions(); - throw new ExitException("Not enough valid submissions! (only " + validSubmissions() + " " - + (validSubmissions() != 1 ? "are" : "is") + " valid):\n" + errorStr.toString(), ExitException.NOT_ENOUGH_SUBMISSIONS_ERROR); + if (validSubmissions() == 0 || allValidSubmissions < 2) { + StringBuilder errorStr = new StringBuilder(); + for (String str : errorVector) { + errorStr.append(str); + errorStr.append('\n'); + } + + throw new ExitException("Not enough valid submissions! (only " + allValidSubmissions + " " + + (allValidSubmissions != 1 ? "are" : "is") + " valid):\n" + errorStr.toString(), + ExitException.NOT_ENOUGH_SUBMISSIONS_ERROR); + } } private void throwBadBasecodeSubmission() throws jplag.ExitException { @@ -201,90 +229,56 @@ private void throwBadBasecodeSubmission() throws jplag.ExitException { * Now the actual comparison: All submissions are compared pairwise. */ private void compare() throws jplag.ExitException { - int size = submissions.size(); + int size = submissions.size(), archSize = 0, dist[] = new int[10]; + SortedVector avgmatches = new SortedVector(new AllMatches.AvgComparator()), + maxmatches = new SortedVector(new AllMatches.MaxComparator()); - SortedVector avgmatches, maxmatches; - int[] dist = new int[10]; - - // Result vector - avgmatches = new SortedVector(new AllMatches.AvgComparator()); - maxmatches = new SortedVector(new AllMatches.MaxComparator()); - - long msec; - - AllBasecodeMatches bcmatch; - Submission s1, s2; + if (archivalSubmissions != null) + archSize = archivalSubmissions.size(); options.setState(Options.COMPARING); - options.setProgress(0); + compareWithBasecode(size + archSize); - if (this.options.useBasecode) { - // print("\nComparing with Basecode:\n", validSubmissions() - // + " submissions"); - int countBC = 0; - // System.out.println("BC size: "+basecodeSubmission.size()); - msec = System.currentTimeMillis(); - for (int i = 0; i < (size); i++) { - s1 = submissions.elementAt(i); - // System.out.println("basecode recognition for: "+s1.name); - bcmatch = this.gSTiling.compareWithBasecode(s1, basecodeSubmission); - htBasecodeMatches.put(s1.name, bcmatch); - this.gSTiling.resetBaseSubmission(basecodeSubmission); - countBC++; - options.setProgress(countBC * 100 / size); - } - long timebc = System.currentTimeMillis() - msec; - print("\n\n", "\nTime for comparing with Basecode: " + ((timebc / 3600000 > 0) ? (timebc / 3600000) + " h " : "") - + ((timebc / 60000 > 0) ? ((timebc / 60000) % 60000) + " min " : "") + (timebc / 1000 % 60) + " sec\n" - + "Time per basecode comparison: " + (timebc / size) + " msec\n\n"); - } - - // print("\nComparing:\n", validSubmissions() + " submissions"); - - int totalcomps = (size - 1) * size / 2; - int i, j, anz = 0, count = 0; + long startMillis = System.currentTimeMillis(); + int compsTotal = (size - 1) * size / 2 + size * archSize, + i, j, compsDone = 0, count = 0; + Submission s1, s2; AllMatches match; options.setProgress(0); - msec = System.currentTimeMillis(); - for (i = 0; i < (size - 1); i++) { - s1 = submissions.elementAt(i); - if (s1.struct == null) { - count += (size - i - 1); - continue; - } + for (i = 0; i < archSize; i++) { + for (j = 0; j < size; j++) { + match = gSTiling.compare(archivalSubmissions.elementAt(i), submissions.elementAt(j)); - for (j = (i + 1); j < size; j++) { - s2 = submissions.elementAt(j); - if (s2.struct == null) { - count++; - continue; + if (match != null) { + compsDone++; + registerMatch(match, dist, avgmatches, maxmatches, null, i, j); } - match = this.gSTiling.compare(s1, s2); - - anz++; + options.setProgress(count++ * 100 / compsTotal); + } + } - System.out.println("Comparing " + s1.name + "-" + s2.name + ": " + match.percent()); + for (i = 0; i < (size - 1); i++) { + for (j = (i + 1); j < size; j++) { + match = gSTiling.compare(submissions.elementAt(i), submissions.elementAt(j)); - // histogram: - if (options.useBasecode) { - match.bcmatchesA = htBasecodeMatches.get(match.subA.name); - match.bcmatchesB = htBasecodeMatches.get(match.subB.name); + if (match != null) { + compsDone++; + registerMatch(match, dist, avgmatches, maxmatches, null, i, j); } - - registerMatch(match, dist, avgmatches, maxmatches, null, i, j); - count++; - options.setProgress(count * 100 / totalcomps); + + options.setProgress(count++ * 100 / compsTotal); } } + options.setProgress(100); - long time = System.currentTimeMillis() - msec; + long time = System.currentTimeMillis() - startMillis; print("\n", "Total time for comparing submissions: " + ((time / 3600000 > 0) ? (time / 3600000) + " h " : "") + ((time / 60000 > 0) ? ((time / 60000) % 60000) + " min " : "") + (time / 1000 % 60) + " sec\n" + "Time per comparison: " - + (time / anz) + " msec\n"); + + (time / compsDone) + " msec\n"); Cluster cluster = null; if (options.clustering) @@ -320,8 +314,7 @@ private void revisionCompare() throws jplag.ExitException { msec = System.currentTimeMillis(); for (int i = 0; i < size; i++) { s1 = submissions.elementAt(i); - bcmatch = gSTiling.compareWithBasecode(s1, basecodeSubmission); - htBasecodeMatches.put(s1.name, bcmatch); + s1.bcMatches = gSTiling.compareWithBasecode(s1, basecodeSubmission); gSTiling.resetBaseSubmission(basecodeSubmission); options.setProgress((i + 1) * 100 / size); } @@ -358,16 +351,6 @@ private void revisionCompare() throws jplag.ExitException { anz++; - /* - * System.out.println("Comparing "+s1.name+"-"+s2.name+": "+ - * match.percent()); - */ - // histogram: - if (options.useBasecode) { - match.bcmatchesA = htBasecodeMatches.get(match.subA.name); - match.bcmatchesB = htBasecodeMatches.get(match.subB.name); - } - registerMatch(match, dist, avgmatches, maxmatches, minmatches, i, j); count++; options.setProgress(count * 100 / totalcomps); @@ -388,12 +371,8 @@ private void revisionCompare() throws jplag.ExitException { writeResults(dist, avgmatches, maxmatches, minmatches, cluster); } - private void createSubmissions() throws jplag.ExitException { - submissions = new Vector(); - File f = new File(options.root_dir); - if (f == null || !f.isDirectory()) { - throw new jplag.ExitException("\"" + options.root_dir + "\" is not a directory!"); - } + private Vector createSubmissions(File f, SubmissionType type) throws jplag.ExitException { + Vector result = new Vector(); String[] list = null; try { list = f.list(); @@ -419,27 +398,45 @@ private void createSubmissions() throws jplag.ExitException { if (!ok) continue; - submissions.addElement(new Submission(name, f, this, get_language())); + result.addElement(new Submission(name, f, this, get_language(), type)); continue; } - if (options.exp && excludeFile(subm_dir.toString())) { // EXPERIMENT - // !! + if (options.exp && excludeFile(subm_dir.toString())) { // EXPERIMENT !! System.err.println("excluded: " + subm_dir); continue; } - File file_dir = ((options.sub_dir == null) ? // - S option - subm_dir - : new File(subm_dir, options.sub_dir)); + File file_dir = ((options.sub_dir == null) ? // -S option + subm_dir : new File(subm_dir, options.sub_dir)); if (file_dir.isDirectory()) { - if (options.basecode.equals(subm_dir.getName())) { - basecodeSubmission = new Submission(subm_dir.getName(), file_dir, options.read_subdirs, this, get_language()); - } else { - submissions.addElement(new Submission(subm_dir.getName(), file_dir, options.read_subdirs, this, get_language())); // -s + System.out.println(subm_dir.getName()); + if (options.root_dir.equals(options.archivalSubmissions + File.separator + subm_dir.getName()) + || options.archivalSubmissions.equals(options.root_dir + File.separator + subm_dir.getName()) + || options.basecode.equals(subm_dir.getName())) { + /* TODO These checks can be fooled e.g. by mixing relative and absolute paths + when specifying root_dir/-a/-bc. AFAIK getCanonicalPath() should do better */ + continue; } + + result.addElement(new Submission(subm_dir.getName(), file_dir, options.read_subdirs, this, + get_language(), type)); // -s option } else throw new ExitException("Cannot find directory: " + file_dir.toString()); } + + return result; + } + + private Submission createBasecodeSubmission() throws jplag.ExitException { + if (!options.useBasecode) + return null; + + File basecode_dir = new File(options.root_dir + File.separator + options.basecode); + File file_dir = ((options.sub_dir == null) ? // - S option + basecode_dir : new File(basecode_dir, options.sub_dir)); + + return new Submission(basecode_dir.getName(), file_dir, options.read_subdirs, this, + get_language(), SubmissionType.BASECODE); } /** @@ -468,7 +465,8 @@ private void createSubmissionsExp() throws jplag.ExitException { subm_dir : new File(subm_dir, options.sub_dir)); if (file_dir != null && file_dir.isDirectory()) - submissions.addElement(new Submission(subm_dir.getName(), file_dir, options.read_subdirs, this, this.get_language())); // -s + submissions.addElement(new Submission(subm_dir.getName(), file_dir, options.read_subdirs, this, + this.get_language(), SubmissionType.REGULAR)); // -s option else if (options.sub_dir == null) { throw new jplag.ExitException(options.root_dir + " is not a directory!"); } @@ -755,6 +753,34 @@ private int fillMemory(int from, int size) { return index; } + void compareWithBasecode(int allSize) throws jplag.ExitException { + if (options.useBasecode) { + int count = 0; + long startMillis = System.currentTimeMillis(), timebc; + + options.setProgress(0); + + if (options.useArchivalSubmissions) { + for (Submission as : archivalSubmissions) { + as.bcMatches = gSTiling.compareWithBasecode(as, basecodeSubmission); + gSTiling.resetBaseSubmission(basecodeSubmission); + options.setProgress(++count * 100 / allSize); + } + } + + for (Submission s : submissions) { + s.bcMatches = gSTiling.compareWithBasecode(s, basecodeSubmission); + gSTiling.resetBaseSubmission(basecodeSubmission); + options.setProgress(++count * 100 / allSize); + } + + timebc = System.currentTimeMillis() - startMillis; + print("\n\n", "\nTime for comparing with Basecode: " + ((timebc / 3600000 > 0) ? (timebc / 3600000) + " h " : "") + + ((timebc / 60000 > 0) ? ((timebc / 60000) % 60000) + " min " : "") + (timebc / 1000 % 60) + " sec\n" + + "Time per basecode comparison: " + (timebc / allSize) + " msec\n\n"); + } + } + public String get_basecode() { return this.options.basecode; } @@ -885,14 +911,8 @@ private void myWrite(String str) { /* * Compiles all "submissions" */ - private void parseAll() throws jplag.ExitException { - if (submissions == null) { - System.out.println(" Nothing to parse!"); - return; - } - // lets go:) + private void parseAll(Vector submissions) throws jplag.ExitException { int count = 0; - int totalcount = submissions.size(); options.setState(Options.PARSING); options.setProgress(0); long msec = System.currentTimeMillis(); @@ -907,7 +927,7 @@ private void parseAll() throws jplag.ExitException { Submission subm = iter.next(); print(null, "------ Parsing submission: " + subm.name + "\n"); currentSubmissionName = subm.name; - options.setProgress(count * 100 / totalcount); + options.setProgress(count * 100 / submissions.size()); if (!(ok = subm.parse())) errors++; @@ -1125,18 +1145,27 @@ public void run() throws jplag.ExitException { // this file contains all files names which are excluded readExclusionFile(); if (options.include_file == null) { - createSubmissions(); - System.out.println(submissions.size() + " submissions"); + submissions = createSubmissions(new File(options.root_dir), SubmissionType.REGULAR); + basecodeSubmission = createBasecodeSubmission(); + System.out.println(submissions.size() + " submissions\n"); } else createSubmissionsExp(); + if (options.useArchivalSubmissions) { + archivalSubmissions = createSubmissions(new File(options.archivalSubmissions), + SubmissionType.ARCHIVAL); + System.out.println(archivalSubmissions.size() + " archival submissions\n"); + } + if (!options.skipParse) { try { - parseAll(); + parseAll(submissions); + if (options.useArchivalSubmissions) + parseAll(archivalSubmissions); System.gc(); parseBasecodeSubmission(); } catch (OutOfMemoryError e) { - submissions = null; + submissions = archivalSubmissions = null; System.gc(); System.out.println("[" + new Date() + "] OutOfMemoryError " + "during parsing of submission \"" + currentSubmissionName + "\""); @@ -1152,9 +1181,7 @@ public void run() throws jplag.ExitException { } else print("Skipping parsing...\n", null); - if (validSubmissions() < 2) { - throwNotEnoughSubmissions(); - } + terminateIfNotEnoughSubmissions(); errorVector = null; // errorVector is not needed anymore if (options.clustering) { @@ -1162,31 +1189,22 @@ public void run() throws jplag.ExitException { options.similarity = new SimilarityMatrix(submissions.size()); } System.gc(); - if (options.exp) { // EXPERIMENT - expCompare(); - } else if (options.externalSearch) { - try { + + try { + if (options.exp) // EXPERIMENT + expCompare(); + else if (options.externalSearch) externalCompare(); - } catch (OutOfMemoryError e) { - e.printStackTrace(); - } - } else { - if (options.compare > 0) + else if (options.compare > 0) specialCompare(); // compare every submission to x others - else { - switch (options.comparisonMode) { - case Options.COMPMODE_NORMAL: - compare(); - break; - - case Options.COMPMODE_REVISION: - revisionCompare(); - break; - - default: - throw new ExitException("Illegal comparison mode: \"" + options.comparisonMode + "\""); - } - } + else if (options.comparisonMode == Options.COMPMODE_NORMAL) + compare(); + else if (options.comparisonMode == Options.COMPMODE_REVISION) + revisionCompare(); + else + throw new ExitException("Illegal comparison mode: \"" + options.comparisonMode + "\""); + } catch (OutOfMemoryError e) { + e.printStackTrace(); } closeWriter(); @@ -1200,6 +1218,8 @@ public void run() throws jplag.ExitException { str += " n_of_programs = " + sp + submissions.size() + sp; str += " errors = " + sp + get_language().errorsCount() + sp; str += " path_to_files = " + sp + toUTF8((options.sub_dir != null) ? options.sub_dir : "") + sp; + str += " archival_dir = " + sp + toUTF8((options.archivalSubmissions != null) + ? options.archivalSubmissions : "") + sp; str += " basecode_dir = " + sp + toUTF8((options.basecode != null) ? options.basecode : "") + sp; str += " read_subdirs = " + sp + this.options.read_subdirs + sp; str += " clustertype = " + sp + this.options.getClusterTyp() + sp; @@ -1295,15 +1315,15 @@ private void specialCompare() throws jplag.ExitException { for (Iterator iter = matches.iterator(); iter.hasNext();) { match = iter.next(); if (once) { - f.println("" + s1.name + f.println("" + s1.name + "->"); once = false; } int other = (match.subName(0).equals(s1.name) ? 1 : 0); - f.println(" " + match.subName(other) + "
(" + match.roundedPercent() + "%)"); + + report.color(match.percent(), Color.BLACK, Color.RED) + "\">(" + match.roundedPercent() + "%)"); this.report.writeMatch(root, matchIndex++, match); } f.println(""); @@ -1379,6 +1399,10 @@ public boolean use_verbose_quiet() { return this.options.verbose_quiet; } + public boolean useArchivalSubmissions() { + return this.options.useArchivalSubmissions; + } + public boolean useBasecode() { return this.options.useBasecode; } diff --git a/jplag/src/main/java/jplag/Report.java b/jplag/src/main/java/jplag/Report.java index eac40bdade..c69bc4065d 100644 --- a/jplag/src/main/java/jplag/Report.java +++ b/jplag/src/main/java/jplag/Report.java @@ -5,6 +5,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.awt.Color; import java.util.Comparator; import java.util.Date; import java.util.Enumeration; @@ -125,8 +126,8 @@ private void writeDistribution(HTMLFile f) { f.println("

" + this.msg.getString("Report.Distribution") + ":

\n
"); f.println(""); for (int i = 9; i >= 0; i--) { - f.print("
" + (i * 10) + "% - " - + (i * 10 + 10) + "%" + "" + dist[i] + ""); + f.print("
" + + (i * 10) + "% - " + (i * 10 + 10) + "%" + "" + dist[i] + ""); for (int j = (dist[i] * bar_length / max); j > 0; j--) f.print("#"); if (dist[i] * bar_length / max == 0) { @@ -153,54 +154,58 @@ abstract class MatchesHelper { public abstract float getPercent(AllMatches matches); } - private void writeLinksToMatches(HTMLFile f, SortedVector matches, MatchesHelper helper, String headerStr) { - // output all the matches - // Set namesPrinted = new Set(); + private void writeLinksToMatches(HTMLFile f, SortedVector matches, MatchesHelper helper, + String headerStr) { + Set matchesPrinted = new HashSet(); - f.println(headerStr + " (" - + msg.getString("Report.WhatIsThis") + "):"); + f.println(headerStr + " (" + + "" + msg.getString("Report.WhatIsThis") + "):"); f.println(""); int anz = matches.size(); for (int i = 0; ((i < anz) && (matchesPrinted.size() != anz)); i++) { AllMatches match = matches.elementAt(i); if (!matchesPrinted.contains(match)) { - // !namesPrinted.contains(match.subName(j))) { - int a = 0, b = 0; - String nameA = match.subName(0); - String nameB = match.subName(1); - // Which of both submissions is referenced more often in "matches"? + Submission subA = match.subA, subB = match.subB; + + // Swap if needed, so that A is referenced more often in not printed matches than B + int totalA = 0, totalB = 0; for (int x = 0; x < anz; x++) { AllMatches tmp = matches.elementAt(x); if (tmp != match && !matchesPrinted.contains(tmp)) { - String tmpA = tmp.subName(0); - String tmpB = tmp.subName(1); - if (nameA.equals(tmpA) || nameA.equals(tmpB)) - a += helper.getPercent(tmp); - if (nameB.equals(tmpA) || nameB.equals(tmpB)) - b += helper.getPercent(tmp); + Submission subC = tmp.subA, subD = tmp.subB; + if (subA == subC || subA == subD) + totalA += helper.getPercent(tmp); + if (subB == subC || subB == subD) + totalB += helper.getPercent(tmp); } } - String name = (a >= b ? nameA : nameB); + + if (totalA < totalB) { + Submission subTemp = subA; + subA = subB; + subB = subTemp; + } + boolean header = false; - // namesPrinted.put(name); + for (int x = i; x < anz; x++) { + AllMatches output = matches.elementAt(x); + if (!matchesPrinted.contains(output) && (output.subA == subA || output.subB == subA)) { + Submission subOther = (subA == output.subA ? output.subB : output.subA); - AllMatches output; - for (int x = 0; x < anz; x++) { - output = matches.elementAt(x); - if (!matchesPrinted.contains(output) && (output.subName(0).equals(name) || output.subName(1).equals(name))) { - matchesPrinted.add(output); - int other = (output.subName(0).equals(name) ? 1 : 0); if (!header) { // only print header when necessary! + f.print(""); + if (program.useArchivalSubmissions() && program.validArchivalSubmissions() > 0) { + f.println(""); + } f.println(""); f.print(""); + + "" }) + ")"); + } + f.println(""); + if (program.getErrors() != 0) { f.println(""); } - f.println(""); if (this.program.useBasecode()) { f.print("" + ""); @@ -443,17 +465,21 @@ private int writeClusters(Cluster clustering) throws jplag.ExitException { } /* - * Two colors, represented by Rl,Gl,Bl and Rh,Gh,Bh respectively are mixed - * according to the percentage "percent" + * Two Colors are mixed according to the percentage "percent" and converted to HTML format */ - public final String color(float percent, int Rl, int Rh, int Gl, int Gh, int Bl, int Bh) { - int farbeR = (int) (Rl + (Rh - Rl) * percent / 100); - int farbeG = (int) (Gl + (Gh - Gl) * percent / 100); - int farbeB = (int) (Bl + (Bh - Bl) * percent / 100); - String helpR = (farbeR < 16 ? "0" : "") + Integer.toHexString(farbeR); - String helpG = (farbeG < 16 ? "0" : "") + Integer.toHexString(farbeG); - String helpB = (farbeB < 16 ? "0" : "") + Integer.toHexString(farbeB); - return "#" + helpR + helpG + helpB; + public static String color(float percent, Color l, Color h) { + int red = (int) (l.getRed() + (h.getRed() - l.getRed()) * percent / 100), + green = (int) (l.getGreen() + (h.getGreen() - l.getGreen()) * percent / 100), + blue = (int) (l.getBlue() + (h.getBlue() - l.getBlue()) * percent / 100); + + return "#" + + (red < 16 ? "0" : "") + Integer.toHexString(red) + + (green < 16 ? "0" : "") + Integer.toHexString(green) + + (blue < 16 ? "0" : "") + Integer.toHexString(blue); + } + + public static String color(float percent, SubmissionType type) { + return color(percent, type.lColor, type.hColor); } // MATCHES @@ -614,8 +640,8 @@ private int writeNormalSubmission(int i, AllMatches match, int j) throws jplag.E } } - if (this.program.useBasecode() && match.bcmatchesA != null && match.bcmatchesB != null) { - AllBasecodeMatches bcmatch = (j == 0 ? match.bcmatchesA : match.bcmatchesB); + if (program.useBasecode()) { + AllBasecodeMatches bcmatch = (j == 0 ? match.getBcMatchesA() : match.getBcMatchesB()); for (int x = 0; x < bcmatch.size(); x++) { onematch = bcmatch.matches[x]; Token start = tokens[onematch.startA]; @@ -788,8 +814,8 @@ else if (col1 < col2) } } - if (this.program.useBasecode() && match.bcmatchesA != null && match.bcmatchesB != null) { - AllBasecodeMatches bcmatch = (j == 0 ? match.bcmatchesA : match.bcmatchesB); + if (program.useBasecode()) { + AllBasecodeMatches bcmatch = (j == 0 ? match.getBcMatchesA() : match.getBcMatchesB()); for (int x = 0; x < bcmatch.size(); x++) { Match onematch = bcmatch.matches[x]; Token start = tokens[onematch.startA]; diff --git a/jplag/src/main/java/jplag/Submission.java b/jplag/src/main/java/jplag/Submission.java index 342439adab..9bc7e7e82d 100644 --- a/jplag/src/main/java/jplag/Submission.java +++ b/jplag/src/main/java/jplag/Submission.java @@ -9,9 +9,27 @@ import java.io.FilenameFilter; import java.io.IOException; import java.net.URL; +import java.awt.Color; import java.text.DecimalFormat; import java.util.Vector; +/* + * Following enumeration serves as a type-tag, distinguishing archival submissions (loaded with + * the -a option) from regular ones + */ +enum SubmissionType { + REGULAR(new Color(128, 128, 255), new Color(192, 192, 255)), + ARCHIVAL(new Color(144, 144, 176), new Color(208, 208, 208)), + BASECODE(Color.BLACK, Color.BLACK); + + final Color lColor, hColor; + + SubmissionType(Color lColor, Color hColor) { + this.lColor = lColor; + this.hColor = hColor; + } +} + /* * Everything about a single submission is stored in this object. (directory, * files, ...) @@ -40,12 +58,20 @@ public class Submission implements Comparable { public DecimalFormat format = new DecimalFormat("0000"); - public Submission(String name, File dir, boolean readSubDirs, Program p, Language language) { + public SubmissionType type; + + AllBasecodeMatches bcMatches = null; + + public Submission(String name, File dir, boolean readSubDirs, Program p, Language language, + SubmissionType type) { + this.program = p; this.language = language; this.dir = dir; this.name = name; this.readSubDirs = readSubDirs; + this.type = type; + try { lookupDir(dir, ""); } catch (Throwable b) { @@ -57,12 +83,13 @@ public Submission(String name, File dir, boolean readSubDirs, Program p, Languag } } - public Submission(String name, File dir, Program p, Language language) { + public Submission(String name, File dir, Program p, Language language, SubmissionType type) { this.language = language; this.program = p; this.dir = dir; this.name = name; this.readSubDirs = false; + this.type = type; files = new String[1]; files[0] = name; diff --git a/jplag/src/main/java/jplag/options/CommandLineOptions.java b/jplag/src/main/java/jplag/options/CommandLineOptions.java index 37268162e9..30e644048a 100644 --- a/jplag/src/main/java/jplag/options/CommandLineOptions.java +++ b/jplag/src/main/java/jplag/options/CommandLineOptions.java @@ -123,10 +123,13 @@ private int scanOption(String[] args, int i) } else if (arg.equals("-o") && i + 1 < args.length) { output_file = args[i + 1]; i++; + } else if (arg.equals("-a") && i + 1 < args.length) { + useArchivalSubmissions = true; + archivalSubmissions = args[i + 1]; // will be validated in initializeSecondStep() + i++; } else if (arg.equals("-bc") && i + 1 < args.length) { - // Will be validated later as root_dir is not set yet useBasecode = true; - basecode = args[i + 1]; + basecode = args[i + 1]; // will be validated in initializeSecondStep() i++; } else if (arg.equals("-d") && i + 1 < args.length) { // original directory - when used in the server environment. @@ -368,39 +371,75 @@ public void initializeSecondStep(Program program) throws jplag.ExitException { this.min_token_match = this.language.min_token_match(); if (!suffixes_set) this.suffixes = this.language.suffixes(); - checkBasecodeOption(); + + checkDirectories(); } - + + private void checkDirectories() throws jplag.ExitException { + File f = new File(root_dir); + + if (!f.exists() || !f.isDirectory()) { + throw new jplag.ExitException("\"" + root_dir + "\" does not exist or is not a" + + " directory!", + ExitException.BAD_PARAMETER); + } + + if (useBasecode) + checkBasecodeOption(); + if (useArchivalSubmissions) + checkArchivalSubmissionsOption(); + } + /** * This method checks whether the basecode directory value is valid */ private void checkBasecodeOption() throws jplag.ExitException { - if (useBasecode) { - if (basecode == null || basecode.equals("")) { - throw new ExitException("Basecode option used but none " + - "specified!",ExitException.BAD_PARAMETER); - } - String baseC = root_dir + File.separator + basecode; - if (!(new File(root_dir)).exists()) { - throw new ExitException("Root directory \"" + root_dir - + "\" doesn't exist!",ExitException.BAD_PARAMETER); - } - File f = new File(baseC); - if (!f.exists()) { // Basecode dir doesn't exist. - throw new ExitException("Basecode directory \"" + baseC - + "\" doesn't exist!",ExitException.BAD_PARAMETER); - } - if(sub_dir != null && sub_dir.length()!=0) { - f = new File(baseC, sub_dir); - if(!f.exists()) { - throw new ExitException("Basecode directory doesn't contain" - + " the subdirectory \"" + sub_dir + "\"!", - ExitException.BAD_PARAMETER); - } - } - System.out.println("Basecode directory \"" + baseC - + "\" will be used"); + if (basecode == null || basecode.equals("")) { + throw new ExitException("Basecode option used but none " + + "specified!", ExitException.BAD_PARAMETER); } + + String baseC = root_dir + File.separator + basecode; + File f = new File(baseC); + if (!f.exists() || !f.isDirectory()) { // Basecode dir doesn't exist. + throw new ExitException("Basecode directory \"" + baseC + + "\" doesn't exist!", ExitException.BAD_PARAMETER); + } + if(sub_dir != null && sub_dir.length()!=0) { + f = new File(baseC, sub_dir); + if(!f.exists()) { + throw new ExitException("Basecode directory doesn't contain" + + " the subdirectory \"" + sub_dir + "\"!", + ExitException.BAD_PARAMETER); + } + } + System.out.println("Basecode directory \"" + baseC + + "\" will be used"); } + private void checkArchivalSubmissionsOption() throws jplag.ExitException { + if (archivalSubmissions == null || archivalSubmissions.equals("")) { + throw new ExitException("Archival submissions option used but none specified!", + ExitException.BAD_PARAMETER); + } + + File f = new File(archivalSubmissions); + if (!f.exists()) { + throw new ExitException("Archival submissions directory \"" + archivalSubmissions + + "\" doesn't exist!", + ExitException.BAD_PARAMETER); + } + + if (sub_dir != null && sub_dir.length() != 0) { + f = new File(archivalSubmissions, sub_dir); + if (!f.exists()) { + throw new ExitException("Archival submissions directory doesn't contain" + + " the subdirectory \"" + sub_dir + "\"!", + ExitException.BAD_PARAMETER); + } + } + + System.out.println("Archival submissions directory \"" + archivalSubmissions + + "\" will be used"); + } } diff --git a/jplag/src/main/java/jplag/options/Options.java b/jplag/src/main/java/jplag/options/Options.java index 3cf761f830..53c1006522 100644 --- a/jplag/src/main/java/jplag/options/Options.java +++ b/jplag/src/main/java/jplag/options/Options.java @@ -102,6 +102,8 @@ public abstract class Options { public String basecode = ""; + public String archivalSubmissions = ""; + public String commandLine = ""; // "Ronald Kostoff" specials @@ -113,6 +115,8 @@ public abstract class Options { public boolean useBasecode = false; + public boolean useArchivalSubmissions = false; + public String languageName = null; public String countryTag = "en"; @@ -182,7 +186,7 @@ public static void usage() { System.out.print(Program.name_long + ", Copyright (c) 2004-2017 KIT - IPD Tichy, Guido Malpohl, and others.\n" + "Usage: JPlag [ options ] \n" - + " The root-directory that contains all submissions\n\n" + + " The root-directory that contains all submissions\n\n" + "options are:\n" + " -v[qlpd] (Verbose)\n" + " q: (Quiet) no output\n" @@ -203,7 +207,9 @@ public static void usage() { + " -m

% All matches with more than

% similarity will be saved.\n" + " -r

(Result) Name of directory in which the web pages will be\n" + " stored (default: result)\n" - + " -bc Name of the directory which contains the basecode (common framework)\n" + + " -a (Archival) Directory containing archived submissions, which\n" + + " will be only compared against\n" + + " -bc Directory which contains the basecode (common framework)\n" + " -l (Language) Supported Languages:\n "); for (int i = 0; i < languages.length - 2; i += 2) System.out.print(languages[i] + (i == 0 ? " (default), " : ", ")); diff --git a/jplag/src/main/resources/jplag/options/util/messages_de.properties b/jplag/src/main/resources/jplag/options/util/messages_de.properties index 2f9fe8faee..d0bbf2b8c2 100644 --- a/jplag/src/main/resources/jplag/options/util/messages_de.properties +++ b/jplag/src/main/resources/jplag/options/util/messages_de.properties @@ -11,9 +11,11 @@ Report.Clustering_Results=Clustering Ergebnisse Report.Title=Titel Report.Directory=Verzeichnis Report.Programs=Quellen +Report.ArchivalPrograms=Archival programs Report.Language=Sprache Report.Not_available=Nicht verfügbar Report.Submissions=Eingaben +Report.archival=archival Report.Invalid_submissions=Ungültige Eingaben Report.see_LOGBEG_log_file_LOGEND=(siehe {1_LOGBEG}Log-Datei{2_LOGEND}) Report.Basecode_submission=Referenzeingabe diff --git a/jplag/src/main/resources/jplag/options/util/messages_en.properties b/jplag/src/main/resources/jplag/options/util/messages_en.properties index 5fde9beb8e..2117a86ea0 100644 --- a/jplag/src/main/resources/jplag/options/util/messages_en.properties +++ b/jplag/src/main/resources/jplag/options/util/messages_en.properties @@ -11,9 +11,11 @@ Report.Clustering_Results=Clustering Results Report.Title=Title Report.Directory=Directory Report.Programs=Programs +Report.ArchivalPrograms=Archival programs Report.Language=Language Report.Not_available=Not available Report.Submissions=Submissions +Report.archival=archival Report.Invalid_submissions=Invalid submissions Report.see_LOGBEG_log_file_LOGEND=(see {1_LOGBEG}log file{2_LOGEND}) Report.Basecode_submission=Basecode submission diff --git a/jplag/src/main/resources/jplag/options/util/messages_es.properties b/jplag/src/main/resources/jplag/options/util/messages_es.properties index d1efe7f2c6..4ab8e0945a 100644 --- a/jplag/src/main/resources/jplag/options/util/messages_es.properties +++ b/jplag/src/main/resources/jplag/options/util/messages_es.properties @@ -11,9 +11,11 @@ Report.Clustering_Results=Resultados del Clustering Report.Title=T�tulo Report.Directory=Directorio Report.Programs=Programas +Report.ArchivalPrograms=Archival programs Report.Language=Lenguaje Report.Not_available=No disponible Report.Submissions=Solicitudes +Report.archival=archival Report.Invalid_submissions=Solicitudes no v�lidas Report.see_LOGBEG_log_file_LOGEND=(ver {1_LOGBEG}archivo de informe{2_LOGEND}) Report.Basecode_submission=Solicitud del c�digo base diff --git a/jplag/src/main/resources/jplag/options/util/messages_fr.properties b/jplag/src/main/resources/jplag/options/util/messages_fr.properties index 8d22678c50..633bf612f4 100644 --- a/jplag/src/main/resources/jplag/options/util/messages_fr.properties +++ b/jplag/src/main/resources/jplag/options/util/messages_fr.properties @@ -11,9 +11,11 @@ Report.Clustering_Results=Resultats pour les reseaux Report.Title=Title Report.Directory=Repertoire Report.Programs=Code source +Report.ArchivalPrograms=Archival programs Report.Language=Languages Report.Not_available=Invisible Report.Submissions=Differents codes source +Report.archival=archival Report.Invalid_submissions=Invalid submissions Report.see_LOGBEG_log_file_LOGEND=(see {1_LOGBEG}log file{2_LOGEND}) Report.Basecode_submission=Code source de reference diff --git a/jplag/src/main/resources/jplag/options/util/messages_pt.properties b/jplag/src/main/resources/jplag/options/util/messages_pt.properties index 5fde9beb8e..2117a86ea0 100644 --- a/jplag/src/main/resources/jplag/options/util/messages_pt.properties +++ b/jplag/src/main/resources/jplag/options/util/messages_pt.properties @@ -11,9 +11,11 @@ Report.Clustering_Results=Clustering Results Report.Title=Title Report.Directory=Directory Report.Programs=Programs +Report.ArchivalPrograms=Archival programs Report.Language=Language Report.Not_available=Not available Report.Submissions=Submissions +Report.archival=archival Report.Invalid_submissions=Invalid submissions Report.see_LOGBEG_log_file_LOGEND=(see {1_LOGBEG}log file{2_LOGEND}) Report.Basecode_submission=Basecode submission diff --git a/jplag/src/main/resources/jplag/options/util/messages_ptbr.properties b/jplag/src/main/resources/jplag/options/util/messages_ptbr.properties index 56f0b9d32c..e0788aafca 100644 --- a/jplag/src/main/resources/jplag/options/util/messages_ptbr.properties +++ b/jplag/src/main/resources/jplag/options/util/messages_ptbr.properties @@ -11,9 +11,11 @@ Report.Clustering_Results=Resultado dos agrupamentos Report.Title=T�tulo Report.Directory=Diret�rio Report.Programs=Programas +Report.ArchivalPrograms=Archival programs Report.Language=Linguagem Report.Not_available=N�o dispon�vel Report.Submissions=Submiss�es +Report.archival=archival Report.Invalid_submissions=Submiss�es inv�lidas Report.see_LOGBEG_log_file_LOGEND=(veja {1_LOGBEG}registro arquivo{2_LOGEND}) Report.Basecode_submission=Basecode submission
" + + subA.name + "->"); header = true; - f.print("
" + name - + "->"); } - float percent = helper.getPercent(output); - f.print("" + output.subName(other) + "
(" + (((int) (percent * 10)) / (float) 10) + "%)"); + + float p = helper.getPercent(output); + f.printf("
" + subOther.name + "
(%.1f%%)", p); + + matchesPrinted.add(output); } } if (header) @@ -289,18 +294,35 @@ public void writeIndexBegin(HTMLFile f, String title) { f.println("
" + msg.getString("Report.Programs") + ":"); f.println("" + program.allValidSubmissions(" - ") + "
" + msg.getString("Report.ArchivalPrograms") + ":"); + f.println("" + program.allValidArchivalSubmissions(" - ") + "
" + msg.getString("Report.Language") + ":" + this.language.name() + "
" + msg.getString("Report.Submissions") + ":" + this.program.validSubmissions()); + if (program.useArchivalSubmissions()) { + f.print(" (" + this.program.validArchivalSubmissions() + " " + msg.getString("Report.archival")); + + if (program.getErrors() == 0) + f.printf(")"); + else + f.printf(", "); + } if (program.getErrors() != 0) { + if (!this.program.useArchivalSubmissions()) + f.printf(" ("); + if (this.program.getErrors() == 1) - f.print(" (" + msg.getString("Report.1_has_not_been_parsed_successfully") + ")"); + f.print("" + msg.getString("Report.1_has_not_been_parsed_successfully") + ")"); else if (this.program.getErrors() > 1) - f.print(" (" + f.print("" + TagParser.parse(msg.getString("Report.X_have_not_been_parsed_successfully"), new String[] { program.getErrors() - + "" }) + ")"); - f.println("
" + msg.getString("Report.Invalid_submissions")); if (options.output_file != null) { f.println(" " @@ -310,8 +332,8 @@ else if (this.program.getErrors() > 1) } f.println(":"); f.println("" + this.program.allInvalidSubmissions() + ""); + f.println("
" + msg.getString("Report.Basecode_submission") + ":" + this.program.get_basecode() + "