Skip to content

Commit

Permalink
Add comments and refactor code
Browse files Browse the repository at this point in the history
  • Loading branch information
myteo committed Nov 21, 2019
1 parent 6a13d6e commit c44839d
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 44 deletions.
98 changes: 58 additions & 40 deletions src/main/java/reposense/authorship/analyzer/AuthorshipAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ public class AuthorshipAnalyzer {
Pattern.compile("\n(-){3} a?/(?<preImageFilePath>.*)\n(\\+){3} b?/(?<postImageFilePath>.*)\n");
private static final String PRE_IMAGE_FILE_PATH_GROUP_NAME = "preImageFilePath";
private static final String POST_IMAGE_FILE_PATH_GROUP_NAME = "postImageFilePath";
private static final String NONEXISTENT_FILE_SYMBOL = "/dev/null";
private static final String LINE_CHUNKS_SEPARATOR = "\n@@ ";
private static final int LINE_CHANGED_HEADER_INDEX = 0;
private static final String FILE_ADDED_SYMBOL = "dev/null";
private static final String HUNK_SEPARATOR = "\n@@ ";
private static final int LINES_CHANGED_HEADER_INDEX = 0;
private static final Pattern STARTING_LINE_NUMBER_PATTERN = Pattern.compile(
"-(?<preImageStartLine>\\d+),?\\d* \\+\\d+,?\\d* @@");
private static final String PREIMAGE_START_LINE_GROUP_NAME = "preImageStartLine";
Expand All @@ -45,32 +45,34 @@ public class AuthorshipAnalyzer {

/**
* Analyzes the authorship of {@code lineContent} in {@code filePath}.
* Returns {@code true} if {@code currentAuthor} should be assigned full credit, false otherwise.
* Returns {@code true} if {@code currentAuthor} should be assigned full credit, {@code false} otherwise.
*/
public static boolean analyzeAuthorship(
RepoConfiguration config, String filePath, String lineContent, String commitHash, Author currentAuthor) {
// Empty lines are ignored and given full credit
if (lineContent.length() == 0) {
return true;
}
CandidateLine deletedLine = getDeletedLineWithHighestSimilarity(config, filePath, lineContent, commitHash);

if (deletedLine == null) {
// Give full credit if there are no deleted lines found or deleted line is less than similarity threshold
if (deletedLine == null || deletedLine.getSimilarityScore() < SIMILARITY_THRESHOLD) {
return true;
}
GitBlameLineInfo gitBlameInfo = getGitBlameResults(config, deletedLine);
GitBlameLineInfo deletedLineInfo = getGitBlameLineInfo(config, deletedLine);

if (gitBlameInfo.getAuthor().equals(Author.UNKNOWN_AUTHOR)
|| gitBlameInfo.getTimestampMilliseconds() < config.getSinceDate().getTime()
|| CommitHash.isInsideCommitList(gitBlameInfo.getCommitHash(), config.getIgnoreCommitList())
|| gitBlameInfo.getAuthor().getIgnoreGlobMatcher().matches(Paths.get(deletedLine.getFilePath()))) {
if (deletedLineInfo.getAuthor().equals(Author.UNKNOWN_AUTHOR)
|| deletedLineInfo.getTimestampMilliseconds() < config.getSinceDate().getTime()
|| CommitHash.isInsideCommitList(deletedLineInfo.getCommitHash(), config.getIgnoreCommitList())
|| deletedLineInfo.getAuthor().getIgnoreGlobMatcher().matches(Paths.get(deletedLine.getFilePath()))) {
return true;
}
if (!gitBlameInfo.getAuthor().equals(currentAuthor)) {
// Give partial credit if currentAuthor is not the author of the previous version
if (!currentAuthor.equals(deletedLineInfo.getAuthor())) {
return false;
} else {
return analyzeAuthorship(config, deletedLine.getFilePath(), deletedLine.getLineContent(),
gitBlameInfo.getCommitHash(), gitBlameInfo.getAuthor());
}
return analyzeAuthorship(config, deletedLine.getFilePath(), deletedLine.getLineContent(),
deletedLineInfo.getCommitHash(), deletedLineInfo.getAuthor());
}

/**
Expand All @@ -84,6 +86,7 @@ private static CandidateLine getDeletedLineWithHighestSimilarity(
CandidateLine highestSimilarityLine = null;

for (String parentCommit : parentCommits) {
// Generate diff between commit and parent commit
String gitDiffResult = GitDiff.diffCommits(config.getRepoRoot(), parentCommit, commitHash);
String[] fileDiffResultList = gitDiffResult.split(DIFF_FILE_CHUNK_SEPARATOR);

Expand All @@ -94,7 +97,9 @@ private static CandidateLine getDeletedLineWithHighestSimilarity(
}
String preImageFilePath = filePathMatcher.group(PRE_IMAGE_FILE_PATH_GROUP_NAME);
String postImageFilePath = filePathMatcher.group(POST_IMAGE_FILE_PATH_GROUP_NAME);
if (preImageFilePath.equals(NONEXISTENT_FILE_SYMBOL) || !postImageFilePath.equals(filePath)) {

// If file was added in the commit or file name does not match
if (preImageFilePath.equals(FILE_ADDED_SYMBOL) || !postImageFilePath.equals(filePath)) {
continue;
}
CandidateLine candidateLine = getDeletedLineWithHighestSimilarityInDiff(
Expand All @@ -113,25 +118,30 @@ private static CandidateLine getDeletedLineWithHighestSimilarity(
*/
private static CandidateLine getDeletedLineWithHighestSimilarityInDiff(
String fileDiffResult, String lineContent, String commitHash, String filePath) {
CandidateLine mostSimilarLine = null;
CandidateLine highestSimilarityLine = null;

String[] hunks = fileDiffResult.split(HUNK_SEPARATOR);

// skip the diff header, index starts from 1
for (int index = 1; index < hunks.length; index++) {
String hunk = hunks[index];

String[] linesChangedChunk = fileDiffResult.split(LINE_CHUNKS_SEPARATOR);
for (int sectionIndex = 1; sectionIndex < linesChangedChunk.length; sectionIndex++) {
String linesChangedInSection = linesChangedChunk[sectionIndex];
if (!linesChangedInSection.contains(ADDED_LINE_SYMBOL + lineContent)) {
// skip hunk if lines added in the hunk does not include lineContent
if (!hunk.contains(ADDED_LINE_SYMBOL + lineContent)) {
continue;
}
String[] linesChanged = linesChangedInSection.split("\n");
int currentPreImageLineNumber = getPreImageStartLineNumber(linesChanged[LINE_CHANGED_HEADER_INDEX]);
String[] linesChanged = hunk.split("\n");
int currentPreImageLineNumber = getPreImageStartingLineNumber(linesChanged[LINES_CHANGED_HEADER_INDEX]);

// skip the lines changed header, index starts from 1
for (int lineIndex = 1; lineIndex < linesChanged.length; lineIndex++) {
String lineChanged = linesChanged[lineIndex];

if (lineChanged.startsWith(DELETED_LINE_SYMBOL)) {
String deletedLineContent = lineChanged.substring(1);
String deletedLineContent = lineChanged.substring(DELETED_LINE_SYMBOL.length());
double similarityScore = similarityScore(lineContent, deletedLineContent);
if (similarityScore >= SIMILARITY_THRESHOLD
&& (mostSimilarLine == null || similarityScore > mostSimilarLine.getSimilarityScore())) {
mostSimilarLine = new CandidateLine(
if (highestSimilarityLine == null || similarityScore > highestSimilarityLine.getSimilarityScore()) {
highestSimilarityLine = new CandidateLine(
currentPreImageLineNumber, deletedLineContent, filePath, commitHash, similarityScore);
}
}
Expand All @@ -140,22 +150,22 @@ private static CandidateLine getDeletedLineWithHighestSimilarityInDiff(
}
}
}
return mostSimilarLine;
return highestSimilarityLine;
}

/**
* Returns the pre-image starting line number within the file diff result, by matching the pattern inside
* {@code linesChanged}.
* Returns the pre-image starting line number by matching the pattern inside {@code linesChangedHeader}.
*/
private static int getPreImageStartLineNumber(String linesChanged) {
Matcher chunkHeaderMatcher = STARTING_LINE_NUMBER_PATTERN.matcher(linesChanged);
private static int getPreImageStartingLineNumber(String linesChangedHeader) {
Matcher linesChangedHeaderMatcher = STARTING_LINE_NUMBER_PATTERN.matcher(linesChangedHeader);

if (!chunkHeaderMatcher.find()) {
logger.severe(String.format(MATCH_GROUP_FAIL_MESSAGE_FORMAT, "line changed", linesChanged));
throw new AssertionError("Should not have error matching line number pattern inside chunk header!");
if (!linesChangedHeaderMatcher.find()) {
logger.severe(
String.format(MATCH_GROUP_FAIL_MESSAGE_FORMAT, PREIMAGE_START_LINE_GROUP_NAME, linesChangedHeader));
throw new AssertionError("Should not have error matching line number pattern inside lines changed header!");
}

return Integer.parseInt(chunkHeaderMatcher.group(PREIMAGE_START_LINE_GROUP_NAME));
return Integer.parseInt(linesChangedHeaderMatcher.group(PREIMAGE_START_LINE_GROUP_NAME));
}

/**
Expand All @@ -167,10 +177,13 @@ private static double similarityScore(String s, String baseString) {
}

/**
* Calculates the Levenshtein Distance between {@code s} and {@code t} using Dynamic Programming.
* Calculates the Levenshtein Distance between two strings using Dynamic Programming.
*/
private static int getLevenshteinDistance(String s, String t) {
// dp[i][j] stores the distance between s.substring(0, i) and t.substring(0, j) -> distance(s[:i], t[:j])
int[][] dp = new int[s.length() + 1][t.length() + 1];

// Distance between a string and an empty string is the length of the string
for (int i = 0; i <= s.length(); i++) {
dp[i][0] = i;
}
Expand All @@ -179,9 +192,14 @@ private static int getLevenshteinDistance(String s, String t) {
}
for (int i = 1; i <= s.length(); i++) {
for (int j = 1; j <= t.length(); j++) {
// If s[i-1] and t[j-1] are equal, distance(s[:i], t[:j]) equals to distance(s[:i-1], t[:j-1])
if (s.charAt(i - 1) == t.charAt(j - 1)) {
dp[i][j] = dp[i - 1][j - 1];
} else {
// distance(s[:i], t[:j]) is the minimum of:
// 1) distance(s[:i-1], t[:j]) + 1 -> add s[i]
// 2) distance(s[:i], t[:j-1]) + 1 -> add t[j]
// 3) distance(s[:i-1], t[:j-1]) + 1 -> substitute s[i] with t[j]
dp[i][j] = Math.min(dp[i - 1][j], Math.min(dp[i][j - 1], dp[i - 1][j - 1])) + 1;
}
}
Expand All @@ -190,11 +208,11 @@ private static int getLevenshteinDistance(String s, String t) {
}

/**
* Returns the git blame results for {@code line}.
* Returns the git blame line info for {@code line}.
*/
private static GitBlameLineInfo getGitBlameResults(RepoConfiguration config, CandidateLine line) {
String blameResults = GitBlame.blameLine(config.getRepoRoot(), line.getGitBlameCommitHash(),
line.getFilePath(), line.getLineNumber());
private static GitBlameLineInfo getGitBlameLineInfo(RepoConfiguration config, CandidateLine line) {
String blameResults = GitBlame.blameLine(
config.getRepoRoot(), line.getGitBlameCommitHash(), line.getFilePath(), line.getLineNumber());
String[] blameResultLines = blameResults.split("\n");

String commitHash = blameResultLines[0].substring(0, FULL_COMMIT_HASH_LENGTH);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package reposense.authorship.model;

/**
* Stores the information of a candidate line used in {@code LineCreditAnalyzer}.
* Stores the information of a candidate line used in {@code AuthorshipAnalyzer}.
*/
public class CandidateLine {
private int lineNumber;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public void analyzeAuthorship_partialCredit_success() {

Assert.assertTrue(fileInfoFull.getLine(1).isFullCredit());

// Partial credit given as author only appended a full stop to the line
// Partial credit given as author only appended a full stop to the line
Assert.assertFalse(fileInfoFull.getLine(2).isFullCredit());
Assert.assertFalse(fileInfoFull.getLine(3).isFullCredit());
}
Expand Down Expand Up @@ -67,7 +67,7 @@ public void analyzeAuthorship_beforeSinceDate_success() {

Assert.assertTrue(fileInfoFull.getLine(1).isFullCredit());

// Full credit given since previous version was made in a commit before the sinceDate
// Full credit given since previous version was made in a commit before the sinceDate
Assert.assertTrue(fileInfoFull.getLine(2).isFullCredit());
Assert.assertTrue(fileInfoFull.getLine(3).isFullCredit());
}
Expand Down Expand Up @@ -96,7 +96,7 @@ public void analyzeAuthorship_emptyLine_success() {
FileInfo fileInfoFull = generateAnalyzeAuthorshipTestFileInfo("analyzeAuthorshipTest1.java");
FileInfoAnalyzer.analyzeFile(config, fileInfoFull, true);

// Empty line is given full credit
// Empty line is given full credit
Assert.assertEquals(new Author(FAKE_AUTHOR_NAME), fileInfoFull.getLine(1).getAuthor());
Assert.assertTrue(fileInfoFull.getLine(1).isFullCredit());
}
Expand Down

0 comments on commit c44839d

Please sign in to comment.