Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[engine] git log 파싱에 사용되는 separator 개선 #770

Merged
merged 12 commits into from
Oct 22, 2024
2 changes: 0 additions & 2 deletions packages/analysis-engine/src/constant.ts

This file was deleted.

3 changes: 1 addition & 2 deletions packages/analysis-engine/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ export class AnalysisEngine {
if (this.isDebugMode) console.log("stemDict: ", stemDict);
const csmDict = buildCSMDict(commitDict, stemDict, this.baseBranchName, pullRequests);
if (this.isDebugMode) console.log("csmDict: ", csmDict);
const nodes = stemDict.get(this.baseBranchName)?.nodes?.map(({commit}) => commit);
const nodes = stemDict.get(this.baseBranchName)?.nodes?.map(({ commit }) => commit);
const geminiCommitSummary = await getSummary(nodes ? nodes?.slice(-10) : []);
if (this.isDebugMode) console.log("GeminiCommitSummary: ", geminiCommitSummary);

Expand All @@ -92,4 +92,3 @@ export class AnalysisEngine {
}

export default AnalysisEngine;
export { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant";
61 changes: 32 additions & 29 deletions packages/analysis-engine/src/parser.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { getCommitMessageType } from "./commit.util";
import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

드디어 사라졌군요!!! 🎆🎆🎆

import getCommitRaws from "./parser";
import type { CommitRaw, DifferenceStatistic } from "./types";

Expand Down Expand Up @@ -36,11 +35,15 @@ describe("commit message type", () => {
});

describe("getCommitRaws", () => {
const fakeAuthorAndCommitter = `${GIT_LOG_SEPARATOR}John Park${GIT_LOG_SEPARATOR}[email protected]${GIT_LOG_SEPARATOR}Sun Sep 4 20:17:59 2022 +0900${GIT_LOG_SEPARATOR}John Park 2${GIT_LOG_SEPARATOR}[email protected]${GIT_LOG_SEPARATOR}Sun Sep 5 20:17:59 2022 +0900`;
const fakeCommitMessage = `${GIT_LOG_SEPARATOR}commit message${GIT_LOG_SEPARATOR}`;
const fakeCommitMessageAndBody = `${GIT_LOG_SEPARATOR}commit message title\n\ncommit message body${GIT_LOG_SEPARATOR}`;
const fakeCommitHash = `a${GIT_LOG_SEPARATOR}b`;
const fakeCommitRef = `${GIT_LOG_SEPARATOR}HEAD`;
const FRONT_NEW_LINE = "\n\n";
const INDENTATION = " ";

const fakeAuthor = "John Park\[email protected]\nSun Sep 4 20:17:59 2022 +0900";
const fakeCommitter = `John Park 2\[email protected]\nSun Sep 5 20:17:59 2022 +0900`;
const fakeCommitMessage = `commit message\n${INDENTATION}`;
const fakeCommitMessageAndBody = `commit message title\n${INDENTATION}\n${INDENTATION}commit message body`;
const fakeCommitHash = "a\nb";
const fakeCommitRef = "HEAD";
const fakeCommitFileChange = "10\t0\ta.ts\n1\t0\tREADME.md";

const commonExpectatedResult: CommitRaw = {
Expand Down Expand Up @@ -73,23 +76,23 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${`a${GIT_LOG_SEPARATOR}`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${"a\n"}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
id: "a",
parents: [""],
parents: [],
},
],
[
`${COMMIT_SEPARATOR}${`c${GIT_LOG_SEPARATOR}b`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${"c\nd"}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
id: "c",
parents: ["b"],
parents: ["d"],
},
],
[
`${COMMIT_SEPARATOR}${`d${GIT_LOG_SEPARATOR}e f`}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${"d\ne f"}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
id: "d",
Expand All @@ -103,47 +106,47 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD"],
tags: [],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD -> main, origin/main, origin/HEAD`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD -> main, origin/main, origin/HEAD"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD", "main", "origin/main", "origin/HEAD"],
tags: [],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD, tag: v1.0.0`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD, tag: v1.0.0"}\n${fakeAuthor}\n${fakeCommitter}$\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD"],
tags: ["v1.0.0"],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD -> main, origin/main, origin/HEAD, tag: v2.0.0"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD", "main", "origin/main", "origin/HEAD"],
tags: ["v2.0.0"],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${`${GIT_LOG_SEPARATOR}HEAD, tag: v2.0.0, tag: v1.4`}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${"HEAD, tag: v2.0.0, tag: v1.4"}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: ["HEAD"],
tags: ["v2.0.0", "v1.4"],
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${GIT_LOG_SEPARATOR}${fakeAuthorAndCommitter}${fakeCommitMessage}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${""}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`,
{
...commonExpectatedResult,
branches: [],
Expand All @@ -157,7 +160,7 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"10\t0\ta.ts\n1\t0\tREADME.md"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"10\t0\ta.ts\n1\t0\tREADME.md"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -171,7 +174,7 @@ describe("getCommitRaws", () => {
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"3\t3\ta.ts"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"3\t3\ta.ts"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -182,7 +185,7 @@ describe("getCommitRaws", () => {
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"4\t0\ta.ts"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"4\t0\ta.ts"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -193,7 +196,7 @@ describe("getCommitRaws", () => {
},
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${"0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts"}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${"0\t6\ta.ts\n2\t0\tb.ts\n3\t3\tc.ts"}`,
{
...commonExpectatedResult,
differenceStatistic: {
Expand All @@ -213,7 +216,7 @@ describe("getCommitRaws", () => {
});

it(`should parse gitlog to commitRaw(multiple commits)`, () => {
const mockLog = `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}\n${fakeCommitFileChange}${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessage}`;
const mockLog = `${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}\n${fakeCommitFileChange}\n\n\n\n${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessage}`;
const result = getCommitRaws(mockLog);
const expectedResult = [
{ ...commonExpectatedResult, differenceStatistic: expectedFileChange },
Expand All @@ -225,23 +228,23 @@ describe("getCommitRaws", () => {

it.each([
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}`,
{ ...commonExpectatedResult, message: "commit message title" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\ncommit message${GIT_LOG_SEPARATOR}`}`,
{ ...commonExpectatedResult, message: "commit message title\ncommit message" },
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}${"commit message body"}`,
{ ...commonExpectatedResult, message: "commit message title\ncommit message body" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\n\ncommit message body${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}\n${INDENTATION}${"commit message body"}`,
{ ...commonExpectatedResult, message: "commit message title\n\ncommit message body" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}commit message title\n\n\ncommit message body${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${"commit message title"}\n${INDENTATION}\n${INDENTATION}\n${INDENTATION}${"commit message body"}`,
{ ...commonExpectatedResult, message: "commit message title\n\n\ncommit message body" },
],
[
`${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${`${GIT_LOG_SEPARATOR}${GIT_LOG_SEPARATOR}`}`,
`${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n\n${INDENTATION}`,
{ ...commonExpectatedResult, message: "" },
],
])("should parse gitlog to commitRaw(commit message)", (mockLog, expectedResult) => {
Expand All @@ -250,7 +253,7 @@ describe("getCommitRaws", () => {
});

it(`should parse gitlog to commitRaw(commit message body and file change)`, () => {
const mockLog = `${COMMIT_SEPARATOR}${fakeCommitHash}${fakeCommitRef}${fakeAuthorAndCommitter}${fakeCommitMessageAndBody}\n${fakeCommitFileChange}`;
const mockLog = `${FRONT_NEW_LINE}${fakeCommitHash}\n${fakeCommitRef}\n${fakeAuthor}\n${fakeCommitter}\n${fakeCommitMessageAndBody}\n${fakeCommitFileChange}`;
const result = getCommitRaws(mockLog);
const expectedResult = {
...commonExpectatedResult,
Expand Down
85 changes: 47 additions & 38 deletions packages/analysis-engine/src/parser.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import { getCommitMessageType } from "./commit.util";
import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "./constant";
import type { CommitRaw } from "./types";
import type { CommitRaw, DifferenceStatistic } from "./types";

export default function getCommitRaws(log: string) {
if (!log) return [];
const EOL_REGEX = /\r?\n/;
const COMMIT_SEPARATOR = new RegExp(`${EOL_REGEX.source}{4}`);
const INDENTATION = " ";

// step 0: Split log into commits
const commits = log.split(COMMIT_SEPARATOR);
const commits = log.substring(2).split(COMMIT_SEPARATOR);
const commitRaws: CommitRaw[] = [];
// skip the first empty element
for (let commitIdx = 1; commitIdx < commits.length; commitIdx += 1) {
// step 1: Extract commitData from the first line of the commit
const commitData = commits[commitIdx].split(GIT_LOG_SEPARATOR);
for (let commitIdx = 0; commitIdx < commits.length; commitIdx += 1) {
// step 1: Extract commitData
const commitData = commits[commitIdx].split(EOL_REGEX);
const [
id,
parents,
Expand All @@ -23,10 +23,9 @@ export default function getCommitRaws(log: string) {
committerName,
committerEmail,
committerDate,
message,
diffStats,
...messageAndDiffStats
] = commitData;
// Extract branch and tag data from refs
// step 2: Extract branch and tag data from refs
const refsArray = refs.replace(" -> ", ", ").split(", ");
const [branches, tags]: string[][] = refsArray.reduce(
([branches, tags], ref) => {
Expand All @@ -41,11 +40,45 @@ export default function getCommitRaws(log: string) {
[new Array<string>(), new Array<string>()]
);

// make base commitRaw object
// step 3: Extract message and diffStats
let messageSubject = "";
let messageBody = "";
const diffStats: DifferenceStatistic = {
totalInsertionCount: 0,
totalDeletionCount: 0,
fileDictionary: {},
};
for (let idx = 0; idx < messageAndDiffStats.length; idx++) {
const line = messageAndDiffStats[idx];
if (idx === 0)
// message subject
messageSubject = line;
else if (line.startsWith(INDENTATION)) {
// message body (add newline if not first line)
messageBody += idx === 1 ? line.trim() : `\n${line.trim()}`;
} else if (line === "")
// pass empty line
continue;
else {
// diffStats
const [insertions, deletions, path] = line.split("\t");
const numberedInsertions = insertions === "-" ? 0 : Number(insertions);
const numberedDeletions = deletions === "-" ? 0 : Number(deletions);
diffStats.totalInsertionCount += numberedInsertions;
diffStats.totalDeletionCount += numberedDeletions;
diffStats.fileDictionary[path] = {
insertionCount: numberedInsertions,
deletionCount: numberedDeletions,
};
}
}

const message = messageBody === "" ? messageSubject : `${messageSubject}\n${messageBody}`;
// step 4: Construct commitRaw
const commitRaw: CommitRaw = {
sequence: commitIdx - 1,
sequence: commitIdx,
id,
parents: parents.split(" "),
parents: parents.length === 0 ? [] : parents.split(" "),
branches,
tags,
author: {
Expand All @@ -60,32 +93,8 @@ export default function getCommitRaws(log: string) {
committerDate: new Date(committerDate),
message,
commitMessageType: getCommitMessageType(message),
differenceStatistic: {
totalInsertionCount: 0,
totalDeletionCount: 0,
fileDictionary: {},
},
differenceStatistic: diffStats,
};

// step 2: Extract diffStats from the rest of the commit
if (!diffStats) {
commitRaws.push(commitRaw);
continue;
}
const diffStatsArray = diffStats.split(EOL_REGEX);
// pass the first empty element
for (let diffIdx = 1; diffIdx < diffStatsArray.length; diffIdx += 1) {
if (diffStatsArray[diffIdx] === "") continue;
const [insertions, deletions, path] = diffStatsArray[diffIdx].split("\t");
const numberedInsertions = insertions === "-" ? 0 : Number(insertions);
const numberedDeletions = deletions === "-" ? 0 : Number(deletions);
commitRaw.differenceStatistic.totalInsertionCount += numberedInsertions;
commitRaw.differenceStatistic.totalDeletionCount += numberedDeletions;
commitRaw.differenceStatistic.fileDictionary[path] = {
insertionCount: numberedInsertions,
deletionCount: numberedDeletions,
};
}
commitRaws.push(commitRaw);
}

Expand Down
15 changes: 7 additions & 8 deletions packages/vscode/src/utils/git.util.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { COMMIT_SEPARATOR, GIT_LOG_SEPARATOR } from "@githru-vscode-ext/analysis-engine";
import * as cp from "child_process";
import * as fs from "fs";
import * as path from "path";
Expand Down Expand Up @@ -112,7 +111,7 @@
if (await isExecutable(file)) {
try {
return await getGitExecutable(file);
} catch (_) {}

Check warning on line 114 in packages/vscode/src/utils/git.util.ts

View workflow job for this annotation

GitHub Actions / build (20.x)

Empty block statement
}
}
return Promise.reject<GitExecutable>();
Expand Down Expand Up @@ -148,7 +147,7 @@
for (let i = 0; i < paths.length; i++) {
try {
return await getGitExecutable(paths[i]);
} catch (_) {}

Check warning on line 150 in packages/vscode/src/utils/git.util.ts

View workflow job for this annotation

GitHub Actions / build (20.x)

Empty block statement
}
throw new Error("None of the provided paths are a Git executable");
}
Expand All @@ -156,20 +155,20 @@
export async function getGitLog(gitPath: string, currentWorkspacePath: string): Promise<string> {
return new Promise((resolve, reject) => {
const gitLogFormat =
COMMIT_SEPARATOR +
"%n%n" +
[
"%H", // commit hash (id)
"%P", // parent hashes
"%D", // ref names (branches, tags)
"%an", // author name
"%ae", // author email
"%ad", // author date
"%cn",
"%ce",
"%cd", // committer name, committer email and committer date
"%B", // commit message (subject and body)
].join(GIT_LOG_SEPARATOR) +
GIT_LOG_SEPARATOR;
"%cn", // committer name
"%ce", // committer email
"%cd", // committer date
"%w(0,0,4)%s", // commit message subject
"%b", // commit message body
].join("%n");
const args = [
"--no-pager",
"log",
Expand Down
Loading