From c44839d8aa305ccdcf1f0f01b849496a315de6f6 Mon Sep 17 00:00:00 2001 From: fakeAuthor Date: Wed, 20 Nov 2019 18:40:08 +0800 Subject: [PATCH] Add comments and refactor code --- .../analyzer/AuthorshipAnalyzer.java | 98 +++++++++++-------- .../authorship/model/CandidateLine.java | 2 +- .../authorship/AuthorshipAnalyzerTest.java | 6 +- 3 files changed, 62 insertions(+), 44 deletions(-) diff --git a/src/main/java/reposense/authorship/analyzer/AuthorshipAnalyzer.java b/src/main/java/reposense/authorship/analyzer/AuthorshipAnalyzer.java index 6b325d186c..531cb2f7e1 100644 --- a/src/main/java/reposense/authorship/analyzer/AuthorshipAnalyzer.java +++ b/src/main/java/reposense/authorship/analyzer/AuthorshipAnalyzer.java @@ -28,9 +28,9 @@ public class AuthorshipAnalyzer { Pattern.compile("\n(-){3} a?/(?.*)\n(\\+){3} b?/(?.*)\n"); private static final String PRE_IMAGE_FILE_PATH_GROUP_NAME = "preImageFilePath"; private static final String POST_IMAGE_FILE_PATH_GROUP_NAME = "postImageFilePath"; - private static final String NONEXISTENT_FILE_SYMBOL = "/dev/null"; - private static final String LINE_CHUNKS_SEPARATOR = "\n@@ "; - private static final int LINE_CHANGED_HEADER_INDEX = 0; + private static final String FILE_ADDED_SYMBOL = "dev/null"; + private static final String HUNK_SEPARATOR = "\n@@ "; + private static final int LINES_CHANGED_HEADER_INDEX = 0; private static final Pattern STARTING_LINE_NUMBER_PATTERN = Pattern.compile( "-(?\\d+),?\\d* \\+\\d+,?\\d* @@"); private static final String PREIMAGE_START_LINE_GROUP_NAME = "preImageStartLine"; @@ -45,32 +45,34 @@ public class AuthorshipAnalyzer { /** * Analyzes the authorship of {@code lineContent} in {@code filePath}. - * Returns {@code true} if {@code currentAuthor} should be assigned full credit, false otherwise. + * Returns {@code true} if {@code currentAuthor} should be assigned full credit, {@code false} otherwise. */ public static boolean analyzeAuthorship( RepoConfiguration config, String filePath, String lineContent, String commitHash, Author currentAuthor) { + // Empty lines are ignored and given full credit if (lineContent.length() == 0) { return true; } CandidateLine deletedLine = getDeletedLineWithHighestSimilarity(config, filePath, lineContent, commitHash); - if (deletedLine == null) { + // Give full credit if there are no deleted lines found or deleted line is less than similarity threshold + if (deletedLine == null || deletedLine.getSimilarityScore() < SIMILARITY_THRESHOLD) { return true; } - GitBlameLineInfo gitBlameInfo = getGitBlameResults(config, deletedLine); + GitBlameLineInfo deletedLineInfo = getGitBlameLineInfo(config, deletedLine); - if (gitBlameInfo.getAuthor().equals(Author.UNKNOWN_AUTHOR) - || gitBlameInfo.getTimestampMilliseconds() < config.getSinceDate().getTime() - || CommitHash.isInsideCommitList(gitBlameInfo.getCommitHash(), config.getIgnoreCommitList()) - || gitBlameInfo.getAuthor().getIgnoreGlobMatcher().matches(Paths.get(deletedLine.getFilePath()))) { + if (deletedLineInfo.getAuthor().equals(Author.UNKNOWN_AUTHOR) + || deletedLineInfo.getTimestampMilliseconds() < config.getSinceDate().getTime() + || CommitHash.isInsideCommitList(deletedLineInfo.getCommitHash(), config.getIgnoreCommitList()) + || deletedLineInfo.getAuthor().getIgnoreGlobMatcher().matches(Paths.get(deletedLine.getFilePath()))) { return true; } - if (!gitBlameInfo.getAuthor().equals(currentAuthor)) { + // Give partial credit if currentAuthor is not the author of the previous version + if (!currentAuthor.equals(deletedLineInfo.getAuthor())) { return false; - } else { - return analyzeAuthorship(config, deletedLine.getFilePath(), deletedLine.getLineContent(), - gitBlameInfo.getCommitHash(), gitBlameInfo.getAuthor()); } + return analyzeAuthorship(config, deletedLine.getFilePath(), deletedLine.getLineContent(), + deletedLineInfo.getCommitHash(), deletedLineInfo.getAuthor()); } /** @@ -84,6 +86,7 @@ private static CandidateLine getDeletedLineWithHighestSimilarity( CandidateLine highestSimilarityLine = null; for (String parentCommit : parentCommits) { + // Generate diff between commit and parent commit String gitDiffResult = GitDiff.diffCommits(config.getRepoRoot(), parentCommit, commitHash); String[] fileDiffResultList = gitDiffResult.split(DIFF_FILE_CHUNK_SEPARATOR); @@ -94,7 +97,9 @@ private static CandidateLine getDeletedLineWithHighestSimilarity( } String preImageFilePath = filePathMatcher.group(PRE_IMAGE_FILE_PATH_GROUP_NAME); String postImageFilePath = filePathMatcher.group(POST_IMAGE_FILE_PATH_GROUP_NAME); - if (preImageFilePath.equals(NONEXISTENT_FILE_SYMBOL) || !postImageFilePath.equals(filePath)) { + + // If file was added in the commit or file name does not match + if (preImageFilePath.equals(FILE_ADDED_SYMBOL) || !postImageFilePath.equals(filePath)) { continue; } CandidateLine candidateLine = getDeletedLineWithHighestSimilarityInDiff( @@ -113,25 +118,30 @@ private static CandidateLine getDeletedLineWithHighestSimilarity( */ private static CandidateLine getDeletedLineWithHighestSimilarityInDiff( String fileDiffResult, String lineContent, String commitHash, String filePath) { - CandidateLine mostSimilarLine = null; + CandidateLine highestSimilarityLine = null; + + String[] hunks = fileDiffResult.split(HUNK_SEPARATOR); + + // skip the diff header, index starts from 1 + for (int index = 1; index < hunks.length; index++) { + String hunk = hunks[index]; - String[] linesChangedChunk = fileDiffResult.split(LINE_CHUNKS_SEPARATOR); - for (int sectionIndex = 1; sectionIndex < linesChangedChunk.length; sectionIndex++) { - String linesChangedInSection = linesChangedChunk[sectionIndex]; - if (!linesChangedInSection.contains(ADDED_LINE_SYMBOL + lineContent)) { + // skip hunk if lines added in the hunk does not include lineContent + if (!hunk.contains(ADDED_LINE_SYMBOL + lineContent)) { continue; } - String[] linesChanged = linesChangedInSection.split("\n"); - int currentPreImageLineNumber = getPreImageStartLineNumber(linesChanged[LINE_CHANGED_HEADER_INDEX]); + String[] linesChanged = hunk.split("\n"); + int currentPreImageLineNumber = getPreImageStartingLineNumber(linesChanged[LINES_CHANGED_HEADER_INDEX]); + // skip the lines changed header, index starts from 1 for (int lineIndex = 1; lineIndex < linesChanged.length; lineIndex++) { String lineChanged = linesChanged[lineIndex]; + if (lineChanged.startsWith(DELETED_LINE_SYMBOL)) { - String deletedLineContent = lineChanged.substring(1); + String deletedLineContent = lineChanged.substring(DELETED_LINE_SYMBOL.length()); double similarityScore = similarityScore(lineContent, deletedLineContent); - if (similarityScore >= SIMILARITY_THRESHOLD - && (mostSimilarLine == null || similarityScore > mostSimilarLine.getSimilarityScore())) { - mostSimilarLine = new CandidateLine( + if (highestSimilarityLine == null || similarityScore > highestSimilarityLine.getSimilarityScore()) { + highestSimilarityLine = new CandidateLine( currentPreImageLineNumber, deletedLineContent, filePath, commitHash, similarityScore); } } @@ -140,22 +150,22 @@ private static CandidateLine getDeletedLineWithHighestSimilarityInDiff( } } } - return mostSimilarLine; + return highestSimilarityLine; } /** - * Returns the pre-image starting line number within the file diff result, by matching the pattern inside - * {@code linesChanged}. + * Returns the pre-image starting line number by matching the pattern inside {@code linesChangedHeader}. */ - private static int getPreImageStartLineNumber(String linesChanged) { - Matcher chunkHeaderMatcher = STARTING_LINE_NUMBER_PATTERN.matcher(linesChanged); + private static int getPreImageStartingLineNumber(String linesChangedHeader) { + Matcher linesChangedHeaderMatcher = STARTING_LINE_NUMBER_PATTERN.matcher(linesChangedHeader); - if (!chunkHeaderMatcher.find()) { - logger.severe(String.format(MATCH_GROUP_FAIL_MESSAGE_FORMAT, "line changed", linesChanged)); - throw new AssertionError("Should not have error matching line number pattern inside chunk header!"); + if (!linesChangedHeaderMatcher.find()) { + logger.severe( + String.format(MATCH_GROUP_FAIL_MESSAGE_FORMAT, PREIMAGE_START_LINE_GROUP_NAME, linesChangedHeader)); + throw new AssertionError("Should not have error matching line number pattern inside lines changed header!"); } - return Integer.parseInt(chunkHeaderMatcher.group(PREIMAGE_START_LINE_GROUP_NAME)); + return Integer.parseInt(linesChangedHeaderMatcher.group(PREIMAGE_START_LINE_GROUP_NAME)); } /** @@ -167,10 +177,13 @@ private static double similarityScore(String s, String baseString) { } /** - * Calculates the Levenshtein Distance between {@code s} and {@code t} using Dynamic Programming. + * Calculates the Levenshtein Distance between two strings using Dynamic Programming. */ private static int getLevenshteinDistance(String s, String t) { + // dp[i][j] stores the distance between s.substring(0, i) and t.substring(0, j) -> distance(s[:i], t[:j]) int[][] dp = new int[s.length() + 1][t.length() + 1]; + + // Distance between a string and an empty string is the length of the string for (int i = 0; i <= s.length(); i++) { dp[i][0] = i; } @@ -179,9 +192,14 @@ private static int getLevenshteinDistance(String s, String t) { } for (int i = 1; i <= s.length(); i++) { for (int j = 1; j <= t.length(); j++) { + // If s[i-1] and t[j-1] are equal, distance(s[:i], t[:j]) equals to distance(s[:i-1], t[:j-1]) if (s.charAt(i - 1) == t.charAt(j - 1)) { dp[i][j] = dp[i - 1][j - 1]; } else { + // distance(s[:i], t[:j]) is the minimum of: + // 1) distance(s[:i-1], t[:j]) + 1 -> add s[i] + // 2) distance(s[:i], t[:j-1]) + 1 -> add t[j] + // 3) distance(s[:i-1], t[:j-1]) + 1 -> substitute s[i] with t[j] dp[i][j] = Math.min(dp[i - 1][j], Math.min(dp[i][j - 1], dp[i - 1][j - 1])) + 1; } } @@ -190,11 +208,11 @@ private static int getLevenshteinDistance(String s, String t) { } /** - * Returns the git blame results for {@code line}. + * Returns the git blame line info for {@code line}. */ - private static GitBlameLineInfo getGitBlameResults(RepoConfiguration config, CandidateLine line) { - String blameResults = GitBlame.blameLine(config.getRepoRoot(), line.getGitBlameCommitHash(), - line.getFilePath(), line.getLineNumber()); + private static GitBlameLineInfo getGitBlameLineInfo(RepoConfiguration config, CandidateLine line) { + String blameResults = GitBlame.blameLine( + config.getRepoRoot(), line.getGitBlameCommitHash(), line.getFilePath(), line.getLineNumber()); String[] blameResultLines = blameResults.split("\n"); String commitHash = blameResultLines[0].substring(0, FULL_COMMIT_HASH_LENGTH); diff --git a/src/main/java/reposense/authorship/model/CandidateLine.java b/src/main/java/reposense/authorship/model/CandidateLine.java index 9b30c75108..ee745ed6c8 100644 --- a/src/main/java/reposense/authorship/model/CandidateLine.java +++ b/src/main/java/reposense/authorship/model/CandidateLine.java @@ -1,7 +1,7 @@ package reposense.authorship.model; /** - * Stores the information of a candidate line used in {@code LineCreditAnalyzer}. + * Stores the information of a candidate line used in {@code AuthorshipAnalyzer}. */ public class CandidateLine { private int lineNumber; diff --git a/src/test/java/reposense/authorship/AuthorshipAnalyzerTest.java b/src/test/java/reposense/authorship/AuthorshipAnalyzerTest.java index 4199e060d7..a6c3d84765 100644 --- a/src/test/java/reposense/authorship/AuthorshipAnalyzerTest.java +++ b/src/test/java/reposense/authorship/AuthorshipAnalyzerTest.java @@ -30,7 +30,7 @@ public void analyzeAuthorship_partialCredit_success() { Assert.assertTrue(fileInfoFull.getLine(1).isFullCredit()); - // Partial credit given as author only appended a full stop to the line + // Partial credit given as author only appended a full stop to the line Assert.assertFalse(fileInfoFull.getLine(2).isFullCredit()); Assert.assertFalse(fileInfoFull.getLine(3).isFullCredit()); } @@ -67,7 +67,7 @@ public void analyzeAuthorship_beforeSinceDate_success() { Assert.assertTrue(fileInfoFull.getLine(1).isFullCredit()); - // Full credit given since previous version was made in a commit before the sinceDate + // Full credit given since previous version was made in a commit before the sinceDate Assert.assertTrue(fileInfoFull.getLine(2).isFullCredit()); Assert.assertTrue(fileInfoFull.getLine(3).isFullCredit()); } @@ -96,7 +96,7 @@ public void analyzeAuthorship_emptyLine_success() { FileInfo fileInfoFull = generateAnalyzeAuthorshipTestFileInfo("analyzeAuthorshipTest1.java"); FileInfoAnalyzer.analyzeFile(config, fileInfoFull, true); - // Empty line is given full credit + // Empty line is given full credit Assert.assertEquals(new Author(FAKE_AUTHOR_NAME), fileInfoFull.getLine(1).getAuthor()); Assert.assertTrue(fileInfoFull.getLine(1).isFullCredit()); }