Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmarking framework hot-fixes #49

Merged
merged 9 commits into from
Nov 27, 2024
1 change: 1 addition & 0 deletions etc/docs/benchmark/BENCHMARKING_FRAMEWORK_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ This experiment must be run within the workspace environment, which the user act
cd dataset/imm
nix-build # builds the 'imm' project
nix-shell # activates its environment
export COQ_LSP_PATH=$(which coq-lsp) # sets up the proper coq-lsp server path
cd ../..
npm run single-workspace-benchmark
```
Expand Down
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -397,11 +397,13 @@
"preclean-test": "npm run clean && npm run rebuild-test-resources && npm run compile && npm run lint",
"clean-test": "npm run test-only",
"prebenchmark": "npm run preclean-test",
"premulti-workspace-benchmark": "npm run prebenchmark",
"premulti-workspaces-benchmark": "npm run prebenchmark",
"multi-workspaces-benchmark": "node ./out/benchmark/multiWorkspacesSetup.js",
"presingle-workspace-benchmark": "npm run prebenchmark",
"single-workspace-benchmark": "npm run test-executables-unsafe -- -g=\"[SourceExecutable] Single Workspace Benchmark\"",
"benchmark": "npm run single-workspace-benchmark",
"prebenchmark-test": "rm -rf benchmarksOutput",
"benchmark-test": "npm run benchmark",
"preteamcity-benchmark-setup": "npm run prebenchmark",
"teamcity-benchmark-setup": "node ./out/benchmark/teamCitySetup.js",
"teamcity-benchmark-agent": "npm run test-executables-unsafe -- -g=\"[SourceExecutable] Team City Benchmark Agent\"",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { AbstractExperiment, ExecutionContext } from "./abstractExperiment";
* In summary, `MultiWorkspacesExperiment` is designed to work with multiple workspaces per run,
* making it better suited for large benchmarking experiments.
*/
// CRITICAL TODO: support COQ_LSP_PATH switch for the processes of different workspaces
export class MultiWorkspacesExperiment extends AbstractExperiment {
constructor(
bundles: InputBenchmarkingBundle[] = [],
Expand Down
55 changes: 28 additions & 27 deletions src/benchmark/framework/parseDataset/cacheHandlers/cacheUpdater.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import {
ParsedFileHolder,
ParsedFileTarget,
} from "../coqProjectParser/implementation/parsedWorkspaceHolder";
import { throwOnTheoremWithoutInitialGoal } from "../utils/invariantFailedErrors";

export function updateWorkspaceCache(
workspaceCache: WorkspaceCacheHolder,
Expand Down Expand Up @@ -84,32 +83,30 @@ namespace UpdateCacheHolders {
}
cachedFile.updateDocumentVersion(parsedFile.documentVersion);

const newlyInitializedCachedTheorems = new Set<string>();
for (const fileTarget of parsedFileHolder.targets()) {
let cachedTheorem = cachedFile.getCachedTheorem(
fileTarget.sourceTheorem.name
);
const theoremName = fileTarget.sourceTheorem.name;
let cachedTheorem = cachedFile.getCachedTheorem(theoremName);
if (cachedTheorem === undefined) {
newlyInitializedCachedTheorems.add(theoremName);
cacheUpdaterLogger.debug(
`+ cached new theorem: "${fileTarget.sourceTheorem.name}"`
);
cachedTheorem = new CacheHolderData.CachedTheoremData(
fileTarget.sourceTheorem
);
buildInitialTargets(
cachedTheorem,
cacheUpdaterLogger,
parsedFile.filePath
);
buildInitialTargets(cachedTheorem, cacheUpdaterLogger);
cachedFile.addCachedTheorem(cachedTheorem);
} else {
cacheUpdaterLogger.debug(
`* updated theorem: "${fileTarget.sourceTheorem.name}"`
);
}

updateCachedTheoremData(
updateCachedTheoremWithRequestedTarget(
cachedTheorem,
fileTarget,
newlyInitializedCachedTheorems.has(theoremName),
cachedFile.workspacePath,
cacheUpdaterLogger
);
Expand Down Expand Up @@ -138,18 +135,15 @@ namespace UpdateCacheHolders {
const cachedTheorem = new CacheHolderData.CachedTheoremData(
theoremData
);
buildInitialTargets(
cachedTheorem,
cachedFileUpdateLogger,
parsedFile.filePath
);
buildInitialTargets(cachedTheorem, cachedFileUpdateLogger);

for (const fileTarget of parsedFileTargetsByTheorem.get(
theoremData.name
) ?? []) {
updateCachedTheoremData(
updateCachedTheoremWithRequestedTarget(
cachedTheorem,
fileTarget,
true,
workspacePath,
cachedFileUpdateLogger
);
Expand All @@ -167,8 +161,7 @@ namespace UpdateCacheHolders {

export function buildInitialTargets(
cachedTheorem: CacheHolderData.CachedTheoremData,
cachedFileUpdateLogger: AsOneRecordLogsBuilder,
sourceFilePath: string
cachedFileUpdateLogger: AsOneRecordLogsBuilder
) {
if (!cachedTheorem.hasNoTargets()) {
cachedFileUpdateLogger
Expand Down Expand Up @@ -205,17 +198,10 @@ namespace UpdateCacheHolders {
const sourceTheoremData = cachedTheorem.theoremData;

// PROVE_THEOREM target
const initialGoal = sourceTheoremData.sourceTheorem.initial_goal;
if (initialGoal === null) {
throwOnTheoremWithoutInitialGoal(
sourceTheoremData.name,
sourceFilePath
);
}
initializeCachedTarget(
TargetType.PROVE_THEOREM,
extractTheoremFisrtProofStep(sourceTheoremData),
initialGoal
sourceTheoremData.sourceTheorem.initial_goal ?? undefined
);

// ADMIT target
Expand All @@ -224,9 +210,15 @@ namespace UpdateCacheHolders {
);
}

export function updateCachedTheoremData(
/**
* _Note:_ additionally, this method checks two invariants.
* 1. `CachedTheoremData` should have its targets initialized before updating them with parsed ones.
* 2. Parsed target should not have been requested, if it had been already present in cache.
*/
export function updateCachedTheoremWithRequestedTarget(
cachedTheorem: CacheHolderData.CachedTheoremData,
parsedTarget: ParsedFileTarget,
isNewlyInitialized: boolean,
workspacePath: string,
cachedFileUpdateLogger: AsOneRecordLogsBuilder
) {
Expand All @@ -251,6 +243,15 @@ namespace UpdateCacheHolders {
`Cache building invariant failed: \`CachedTheoremData\` is built incorrectly`
);
} else {
const parsedTargetWasBuiltFromInitialGoal =
parsedTarget.targetType === TargetType.PROVE_THEOREM &&
parsedTarget.sourceTheorem.sourceTheorem.initial_goal !== null;
const cachedTargetIsInitializedComplete =
isNewlyInitialized && parsedTargetWasBuiltFromInitialGoal;
if (cachedTargetIsInitializedComplete) {
return;
}

if (cachedTargetToUpdate.hasCachedGoal()) {
cachedFileUpdateLogger
.error(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ export namespace ParseCoqProjectInternalSignature {
export type FilePathToFileTargets = { [key: string]: FileTarget[] };

export interface FileTarget {
// TODO: optimize - `PROVE_THEOREM` targets don't need to be requested explicitly,
// they will be always parsed together with the theorems
requestType: TargetRequestType;
/**
* If `specificTheoremName` is undefined, this target means request on all file theorems.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
SerializedGoal,
serializeGoal,
} from "../../../utils/coqUtils/goalParser";
import { extractSerializedTheoremFisrtProofStep } from "../../../utils/coqUtils/proofTargetExtractor";
import { LogsIPCSender } from "../../../utils/subprocessUtils/ipc/onParentProcessCallExecutor/logsIpcSender";

import { ParseCoqProjectInternalSignature } from "./internalSignature";
Expand Down Expand Up @@ -99,15 +100,12 @@ export namespace ParseCoqProjectImpl {
logger: Logger
): Promise<SerializedParsedCoqFile> {
const mockDocumentVersion = 1;
// TODO: [@Gleb Solovev] Do not create this Abort Controller but pass
// the one created at the top
// TODO + check coq-lsp creation in benchmarks
const abortController = new AbortController();
const sourceFileEnvironment = await createSourceFileEnvironment(
mockDocumentVersion,
Uri.fromPath(filePath),
coqLspClient,
abortController.signal
new AbortController().signal, // abort behaviour is not supported at the parsing stage
true // TODO: pass `ContextTheoremsRanker.needsUnwrappedNotations` here to improve performance
);
const serializedParsedFile: SerializedParsedCoqFile = {
serializedTheoremsByNames: packIntoMappedObject(
Expand Down Expand Up @@ -141,10 +139,12 @@ export namespace ParseCoqProjectImpl {
for (const fileTarget of fileTargets) {
if (fileTarget.specificTheoremName === undefined) {
// all theorems requests
for (const theorem of mappedObjectValues(theoremsMapping)) {
for (const serializedTheorem of mappedObjectValues(
theoremsMapping
)) {
const parsedTargetsFromTheorem =
await extractTargetsFromTheorem(
theorem,
serializedTheorem,
fileTarget.requestType,
serializedParsedFile,
coqLspClient,
Expand Down Expand Up @@ -176,7 +176,7 @@ export namespace ParseCoqProjectImpl {
}

async function extractTargetsFromTheorem(
theorem: SerializedTheorem,
serializedTheorem: SerializedTheorem,
requestType: TargetRequestType,
serializedParsedFile: SerializedParsedCoqFile,
coqLspClient: CoqLspClient,
Expand All @@ -192,7 +192,7 @@ export namespace ParseCoqProjectImpl {
knownGoal
) => {
return {
theoremName: theorem.name,
theoremName: serializedTheorem.name,
targetType: targetType,
goalToProve:
knownGoal ??
Expand All @@ -207,12 +207,19 @@ export namespace ParseCoqProjectImpl {
};
switch (requestType) {
case TargetRequestType.THEOREM_PROOF:
// THEOREM_PROOF goals are already parsed within the theorems,
// so `ParsedFileTarget`-s for them are redundant
return [];
const theoremProofTarget = await targetBuilder(
extractSerializedTheoremFisrtProofStep(serializedTheorem),
TargetType.PROVE_THEOREM,
serializedTheorem.initial_goal
);
if (serializedTheorem.initial_goal === undefined) {
serializedTheorem.initial_goal =
theoremProofTarget.goalToProve;
}
return [theoremProofTarget];
case TargetRequestType.ALL_ADMITS:
const parsedTargets = [];
for (const holeProofStep of theorem.proof!.holes) {
for (const holeProofStep of serializedTheorem.proof!.holes) {
parsedTargets.push(
await targetBuilder(
holeProofStep,
Expand Down
1 change: 1 addition & 0 deletions src/benchmark/framework/structures/common/inputTargets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ export abstract class FileTarget implements EqualTo<FileTarget> {
abstract equalTo(other: FileTarget): boolean;
abstract hash(): number;
abstract toString(linePrefix: string, itemizeString: string): string;
// TODO: support `needsTheoremInitialGoals` here to improve parsing performance
}

export class SpecificTheoremTarget extends FileTarget {
Expand Down
4 changes: 2 additions & 2 deletions src/core/inspectSourceFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export async function inspectSourceFile(
fileUri: Uri,
client: CoqLspClient,
abortSignal: AbortSignal,
needsTheoremInitialGoals: boolean = true,
needsTheoremInitialGoals: boolean,
eventLogger?: EventLogger
): Promise<AnalyzedFile> {
const sourceFileEnvironment = await createSourceFileEnvironment(
Expand Down Expand Up @@ -82,7 +82,7 @@ export async function createSourceFileEnvironment(
fileUri: Uri,
client: CoqLspClient,
abortSignal: AbortSignal,
needsTheoremInitialGoals: boolean = true,
needsTheoremInitialGoals: boolean,
eventLogger?: EventLogger
): Promise<SourceFileEnvironment> {
const fileTheorems = await parseCoqFile(
Expand Down
6 changes: 5 additions & 1 deletion src/llm/llmServices/grazie/grazieService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,11 @@ class GrazieServiceInternal extends LLMServiceInternal<
chat: ChatHistory,
modelParams: GrazieModelParams
): GrazieFormattedHistory {
const o1CompatibleChatHistory = toO1CompatibleChatHistory(chat, modelParams.modelName);
const o1CompatibleChatHistory = toO1CompatibleChatHistory(
chat,
modelParams.modelName,
"grazie"
);

return o1CompatibleChatHistory.map((message: ChatMessage) => {
const grazieRoleName =
Expand Down
6 changes: 3 additions & 3 deletions src/llm/llmServices/openai/openAiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import { GeneratedProofImpl } from "../generatedProof";
import { LLMServiceImpl } from "../llmService";
import { LLMServiceInternal } from "../llmServiceInternal";
import { OpenAiModelParams } from "../modelParams";
import { toO1CompatibleChatHistory } from "../utils/o1ClassModels";

import { OpenAiModelParamsResolver } from "./openAiModelParamsResolver";
import { toO1CompatibleChatHistory } from "../utils/o1ClassModels";

export class OpenAiService extends LLMServiceImpl<
OpenAiUserModelParams,
Expand Down Expand Up @@ -219,9 +219,9 @@ class OpenAiServiceInternal extends LLMServiceInternal<
private static readonly connectionErrorPattern = /^Connection error\.$/;

private formatChatHistory(
chat: ChatHistory,
chat: ChatHistory,
modelParams: OpenAiModelParams
): ChatHistory {
return toO1CompatibleChatHistory(chat, modelParams.modelName);
return toO1CompatibleChatHistory(chat, modelParams.modelName, "openai");
}
}
19 changes: 14 additions & 5 deletions src/llm/llmServices/utils/o1ClassModels.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
import { ChatHistory, ChatMessage } from "../commonStructures/chat";

const o1ClassModels = ['o1-preview', 'o1-preview-2024-09-12', 'o1-mini', 'o1-mini-2024-09-12'];
const o1ClassModelsOpenAI = [
"o1-preview",
"o1-preview-2024-09-12",
"o1-mini",
"o1-mini-2024-09-12",
];
const o1ClassModelsGrazie = ["openai-o1", "openai-o1-mini"];

/**
* As of November 2024, o1 model requires a different format of chat history.
* It doesn't support the system prompt, therefore we manually
* change system prompt to a user's message in case of this specific
* change system prompt to a user's message in case of this specific
* model usage.
*/
export function toO1CompatibleChatHistory(
chatHistory: ChatHistory,
modelName: string
chatHistory: ChatHistory,
modelName: string,
service: "openai" | "grazie"
): ChatHistory {
const o1ClassModels =
service === "openai" ? o1ClassModelsOpenAI : o1ClassModelsGrazie;
if (o1ClassModels.includes(modelName)) {
return chatHistory.map((message: ChatMessage) => {
return {
Expand All @@ -22,4 +31,4 @@ export function toO1CompatibleChatHistory(
} else {
return chatHistory;
}
}
}
3 changes: 2 additions & 1 deletion src/test/commonTestFunctions/prepareEnvironment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ export async function withPreparedEnvironment<T>(
(_hole) => true,
fileUri,
client,
new AbortController().signal
new AbortController().signal,
true // to support any ranker
)
);
const preparedEnvironment = {
Expand Down
Loading