diff --git a/etc/docs/benchmark/BENCHMARKING_FRAMEWORK_GUIDE.md b/etc/docs/benchmark/BENCHMARKING_FRAMEWORK_GUIDE.md index ca8e2d7..7e5524c 100644 --- a/etc/docs/benchmark/BENCHMARKING_FRAMEWORK_GUIDE.md +++ b/etc/docs/benchmark/BENCHMARKING_FRAMEWORK_GUIDE.md @@ -40,6 +40,7 @@ This experiment must be run within the workspace environment, which the user act cd dataset/imm nix-build # builds the 'imm' project nix-shell # activates its environment +export COQ_LSP_PATH=$(which coq-lsp) # sets up the proper coq-lsp server path cd ../.. npm run single-workspace-benchmark ``` diff --git a/package.json b/package.json index 64bed75..350780b 100644 --- a/package.json +++ b/package.json @@ -397,11 +397,13 @@ "preclean-test": "npm run clean && npm run rebuild-test-resources && npm run compile && npm run lint", "clean-test": "npm run test-only", "prebenchmark": "npm run preclean-test", - "premulti-workspace-benchmark": "npm run prebenchmark", + "premulti-workspaces-benchmark": "npm run prebenchmark", "multi-workspaces-benchmark": "node ./out/benchmark/multiWorkspacesSetup.js", "presingle-workspace-benchmark": "npm run prebenchmark", "single-workspace-benchmark": "npm run test-executables-unsafe -- -g=\"[SourceExecutable] Single Workspace Benchmark\"", "benchmark": "npm run single-workspace-benchmark", + "prebenchmark-test": "rm -rf benchmarksOutput", + "benchmark-test": "npm run benchmark", "preteamcity-benchmark-setup": "npm run prebenchmark", "teamcity-benchmark-setup": "node ./out/benchmark/teamCitySetup.js", "teamcity-benchmark-agent": "npm run test-executables-unsafe -- -g=\"[SourceExecutable] Team City Benchmark Agent\"", diff --git a/src/benchmark/framework/experiment/multiWorkspacesExperiment.ts b/src/benchmark/framework/experiment/multiWorkspacesExperiment.ts index 20088b7..db890f7 100644 --- a/src/benchmark/framework/experiment/multiWorkspacesExperiment.ts +++ b/src/benchmark/framework/experiment/multiWorkspacesExperiment.ts @@ -19,6 +19,7 @@ import { AbstractExperiment, ExecutionContext } from "./abstractExperiment"; * In summary, `MultiWorkspacesExperiment` is designed to work with multiple workspaces per run, * making it better suited for large benchmarking experiments. */ +// CRITICAL TODO: support COQ_LSP_PATH switch for the processes of different workspaces export class MultiWorkspacesExperiment extends AbstractExperiment { constructor( bundles: InputBenchmarkingBundle[] = [], diff --git a/src/benchmark/framework/parseDataset/cacheHandlers/cacheUpdater.ts b/src/benchmark/framework/parseDataset/cacheHandlers/cacheUpdater.ts index fc1da51..3db7df4 100644 --- a/src/benchmark/framework/parseDataset/cacheHandlers/cacheUpdater.ts +++ b/src/benchmark/framework/parseDataset/cacheHandlers/cacheUpdater.ts @@ -20,7 +20,6 @@ import { ParsedFileHolder, ParsedFileTarget, } from "../coqProjectParser/implementation/parsedWorkspaceHolder"; -import { throwOnTheoremWithoutInitialGoal } from "../utils/invariantFailedErrors"; export function updateWorkspaceCache( workspaceCache: WorkspaceCacheHolder, @@ -84,22 +83,19 @@ namespace UpdateCacheHolders { } cachedFile.updateDocumentVersion(parsedFile.documentVersion); + const newlyInitializedCachedTheorems = new Set(); for (const fileTarget of parsedFileHolder.targets()) { - let cachedTheorem = cachedFile.getCachedTheorem( - fileTarget.sourceTheorem.name - ); + const theoremName = fileTarget.sourceTheorem.name; + let cachedTheorem = cachedFile.getCachedTheorem(theoremName); if (cachedTheorem === undefined) { + newlyInitializedCachedTheorems.add(theoremName); cacheUpdaterLogger.debug( `+ cached new theorem: "${fileTarget.sourceTheorem.name}"` ); cachedTheorem = new CacheHolderData.CachedTheoremData( fileTarget.sourceTheorem ); - buildInitialTargets( - cachedTheorem, - cacheUpdaterLogger, - parsedFile.filePath - ); + buildInitialTargets(cachedTheorem, cacheUpdaterLogger); cachedFile.addCachedTheorem(cachedTheorem); } else { cacheUpdaterLogger.debug( @@ -107,9 +103,10 @@ namespace UpdateCacheHolders { ); } - updateCachedTheoremData( + updateCachedTheoremWithRequestedTarget( cachedTheorem, fileTarget, + newlyInitializedCachedTheorems.has(theoremName), cachedFile.workspacePath, cacheUpdaterLogger ); @@ -138,18 +135,15 @@ namespace UpdateCacheHolders { const cachedTheorem = new CacheHolderData.CachedTheoremData( theoremData ); - buildInitialTargets( - cachedTheorem, - cachedFileUpdateLogger, - parsedFile.filePath - ); + buildInitialTargets(cachedTheorem, cachedFileUpdateLogger); for (const fileTarget of parsedFileTargetsByTheorem.get( theoremData.name ) ?? []) { - updateCachedTheoremData( + updateCachedTheoremWithRequestedTarget( cachedTheorem, fileTarget, + true, workspacePath, cachedFileUpdateLogger ); @@ -167,8 +161,7 @@ namespace UpdateCacheHolders { export function buildInitialTargets( cachedTheorem: CacheHolderData.CachedTheoremData, - cachedFileUpdateLogger: AsOneRecordLogsBuilder, - sourceFilePath: string + cachedFileUpdateLogger: AsOneRecordLogsBuilder ) { if (!cachedTheorem.hasNoTargets()) { cachedFileUpdateLogger @@ -205,17 +198,10 @@ namespace UpdateCacheHolders { const sourceTheoremData = cachedTheorem.theoremData; // PROVE_THEOREM target - const initialGoal = sourceTheoremData.sourceTheorem.initial_goal; - if (initialGoal === null) { - throwOnTheoremWithoutInitialGoal( - sourceTheoremData.name, - sourceFilePath - ); - } initializeCachedTarget( TargetType.PROVE_THEOREM, extractTheoremFisrtProofStep(sourceTheoremData), - initialGoal + sourceTheoremData.sourceTheorem.initial_goal ?? undefined ); // ADMIT target @@ -224,9 +210,15 @@ namespace UpdateCacheHolders { ); } - export function updateCachedTheoremData( + /** + * _Note:_ additionally, this method checks two invariants. + * 1. `CachedTheoremData` should have its targets initialized before updating them with parsed ones. + * 2. Parsed target should not have been requested, if it had been already present in cache. + */ + export function updateCachedTheoremWithRequestedTarget( cachedTheorem: CacheHolderData.CachedTheoremData, parsedTarget: ParsedFileTarget, + isNewlyInitialized: boolean, workspacePath: string, cachedFileUpdateLogger: AsOneRecordLogsBuilder ) { @@ -251,6 +243,15 @@ namespace UpdateCacheHolders { `Cache building invariant failed: \`CachedTheoremData\` is built incorrectly` ); } else { + const parsedTargetWasBuiltFromInitialGoal = + parsedTarget.targetType === TargetType.PROVE_THEOREM && + parsedTarget.sourceTheorem.sourceTheorem.initial_goal !== null; + const cachedTargetIsInitializedComplete = + isNewlyInitialized && parsedTargetWasBuiltFromInitialGoal; + if (cachedTargetIsInitializedComplete) { + return; + } + if (cachedTargetToUpdate.hasCachedGoal()) { cachedFileUpdateLogger .error( diff --git a/src/benchmark/framework/parseDataset/coqProjectParser/implementation/internalSignature.ts b/src/benchmark/framework/parseDataset/coqProjectParser/implementation/internalSignature.ts index ce2c876..98d1a8f 100644 --- a/src/benchmark/framework/parseDataset/coqProjectParser/implementation/internalSignature.ts +++ b/src/benchmark/framework/parseDataset/coqProjectParser/implementation/internalSignature.ts @@ -33,8 +33,6 @@ export namespace ParseCoqProjectInternalSignature { export type FilePathToFileTargets = { [key: string]: FileTarget[] }; export interface FileTarget { - // TODO: optimize - `PROVE_THEOREM` targets don't need to be requested explicitly, - // they will be always parsed together with the theorems requestType: TargetRequestType; /** * If `specificTheoremName` is undefined, this target means request on all file theorems. diff --git a/src/benchmark/framework/parseDataset/coqProjectParser/implementation/parseCoqProject.ts b/src/benchmark/framework/parseDataset/coqProjectParser/implementation/parseCoqProject.ts index 45e532e..ab5c963 100644 --- a/src/benchmark/framework/parseDataset/coqProjectParser/implementation/parseCoqProject.ts +++ b/src/benchmark/framework/parseDataset/coqProjectParser/implementation/parseCoqProject.ts @@ -25,6 +25,7 @@ import { SerializedGoal, serializeGoal, } from "../../../utils/coqUtils/goalParser"; +import { extractSerializedTheoremFisrtProofStep } from "../../../utils/coqUtils/proofTargetExtractor"; import { LogsIPCSender } from "../../../utils/subprocessUtils/ipc/onParentProcessCallExecutor/logsIpcSender"; import { ParseCoqProjectInternalSignature } from "./internalSignature"; @@ -99,15 +100,12 @@ export namespace ParseCoqProjectImpl { logger: Logger ): Promise { const mockDocumentVersion = 1; - // TODO: [@Gleb Solovev] Do not create this Abort Controller but pass - // the one created at the top - // TODO + check coq-lsp creation in benchmarks - const abortController = new AbortController(); const sourceFileEnvironment = await createSourceFileEnvironment( mockDocumentVersion, Uri.fromPath(filePath), coqLspClient, - abortController.signal + new AbortController().signal, // abort behaviour is not supported at the parsing stage + true // TODO: pass `ContextTheoremsRanker.needsUnwrappedNotations` here to improve performance ); const serializedParsedFile: SerializedParsedCoqFile = { serializedTheoremsByNames: packIntoMappedObject( @@ -141,10 +139,12 @@ export namespace ParseCoqProjectImpl { for (const fileTarget of fileTargets) { if (fileTarget.specificTheoremName === undefined) { // all theorems requests - for (const theorem of mappedObjectValues(theoremsMapping)) { + for (const serializedTheorem of mappedObjectValues( + theoremsMapping + )) { const parsedTargetsFromTheorem = await extractTargetsFromTheorem( - theorem, + serializedTheorem, fileTarget.requestType, serializedParsedFile, coqLspClient, @@ -176,7 +176,7 @@ export namespace ParseCoqProjectImpl { } async function extractTargetsFromTheorem( - theorem: SerializedTheorem, + serializedTheorem: SerializedTheorem, requestType: TargetRequestType, serializedParsedFile: SerializedParsedCoqFile, coqLspClient: CoqLspClient, @@ -192,7 +192,7 @@ export namespace ParseCoqProjectImpl { knownGoal ) => { return { - theoremName: theorem.name, + theoremName: serializedTheorem.name, targetType: targetType, goalToProve: knownGoal ?? @@ -207,12 +207,19 @@ export namespace ParseCoqProjectImpl { }; switch (requestType) { case TargetRequestType.THEOREM_PROOF: - // THEOREM_PROOF goals are already parsed within the theorems, - // so `ParsedFileTarget`-s for them are redundant - return []; + const theoremProofTarget = await targetBuilder( + extractSerializedTheoremFisrtProofStep(serializedTheorem), + TargetType.PROVE_THEOREM, + serializedTheorem.initial_goal + ); + if (serializedTheorem.initial_goal === undefined) { + serializedTheorem.initial_goal = + theoremProofTarget.goalToProve; + } + return [theoremProofTarget]; case TargetRequestType.ALL_ADMITS: const parsedTargets = []; - for (const holeProofStep of theorem.proof!.holes) { + for (const holeProofStep of serializedTheorem.proof!.holes) { parsedTargets.push( await targetBuilder( holeProofStep, diff --git a/src/benchmark/framework/structures/common/inputTargets.ts b/src/benchmark/framework/structures/common/inputTargets.ts index 7b3ea97..f65e3d4 100644 --- a/src/benchmark/framework/structures/common/inputTargets.ts +++ b/src/benchmark/framework/structures/common/inputTargets.ts @@ -259,6 +259,7 @@ export abstract class FileTarget implements EqualTo { abstract equalTo(other: FileTarget): boolean; abstract hash(): number; abstract toString(linePrefix: string, itemizeString: string): string; + // TODO: support `needsTheoremInitialGoals` here to improve parsing performance } export class SpecificTheoremTarget extends FileTarget { diff --git a/src/core/inspectSourceFile.ts b/src/core/inspectSourceFile.ts index 28c38c3..ac338f3 100644 --- a/src/core/inspectSourceFile.ts +++ b/src/core/inspectSourceFile.ts @@ -18,7 +18,7 @@ export async function inspectSourceFile( fileUri: Uri, client: CoqLspClient, abortSignal: AbortSignal, - needsTheoremInitialGoals: boolean = true, + needsTheoremInitialGoals: boolean, eventLogger?: EventLogger ): Promise { const sourceFileEnvironment = await createSourceFileEnvironment( @@ -82,7 +82,7 @@ export async function createSourceFileEnvironment( fileUri: Uri, client: CoqLspClient, abortSignal: AbortSignal, - needsTheoremInitialGoals: boolean = true, + needsTheoremInitialGoals: boolean, eventLogger?: EventLogger ): Promise { const fileTheorems = await parseCoqFile( diff --git a/src/llm/llmServices/grazie/grazieService.ts b/src/llm/llmServices/grazie/grazieService.ts index c913db6..10e9e7c 100644 --- a/src/llm/llmServices/grazie/grazieService.ts +++ b/src/llm/llmServices/grazie/grazieService.ts @@ -120,7 +120,11 @@ class GrazieServiceInternal extends LLMServiceInternal< chat: ChatHistory, modelParams: GrazieModelParams ): GrazieFormattedHistory { - const o1CompatibleChatHistory = toO1CompatibleChatHistory(chat, modelParams.modelName); + const o1CompatibleChatHistory = toO1CompatibleChatHistory( + chat, + modelParams.modelName, + "grazie" + ); return o1CompatibleChatHistory.map((message: ChatMessage) => { const grazieRoleName = diff --git a/src/llm/llmServices/openai/openAiService.ts b/src/llm/llmServices/openai/openAiService.ts index f363b2e..77c9f37 100644 --- a/src/llm/llmServices/openai/openAiService.ts +++ b/src/llm/llmServices/openai/openAiService.ts @@ -14,9 +14,9 @@ import { GeneratedProofImpl } from "../generatedProof"; import { LLMServiceImpl } from "../llmService"; import { LLMServiceInternal } from "../llmServiceInternal"; import { OpenAiModelParams } from "../modelParams"; +import { toO1CompatibleChatHistory } from "../utils/o1ClassModels"; import { OpenAiModelParamsResolver } from "./openAiModelParamsResolver"; -import { toO1CompatibleChatHistory } from "../utils/o1ClassModels"; export class OpenAiService extends LLMServiceImpl< OpenAiUserModelParams, @@ -219,9 +219,9 @@ class OpenAiServiceInternal extends LLMServiceInternal< private static readonly connectionErrorPattern = /^Connection error\.$/; private formatChatHistory( - chat: ChatHistory, + chat: ChatHistory, modelParams: OpenAiModelParams ): ChatHistory { - return toO1CompatibleChatHistory(chat, modelParams.modelName); + return toO1CompatibleChatHistory(chat, modelParams.modelName, "openai"); } } diff --git a/src/llm/llmServices/utils/o1ClassModels.ts b/src/llm/llmServices/utils/o1ClassModels.ts index ba9cbdd..e6f6069 100644 --- a/src/llm/llmServices/utils/o1ClassModels.ts +++ b/src/llm/llmServices/utils/o1ClassModels.ts @@ -1,17 +1,26 @@ import { ChatHistory, ChatMessage } from "../commonStructures/chat"; -const o1ClassModels = ['o1-preview', 'o1-preview-2024-09-12', 'o1-mini', 'o1-mini-2024-09-12']; +const o1ClassModelsOpenAI = [ + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", +]; +const o1ClassModelsGrazie = ["openai-o1", "openai-o1-mini"]; /** * As of November 2024, o1 model requires a different format of chat history. * It doesn't support the system prompt, therefore we manually - * change system prompt to a user's message in case of this specific + * change system prompt to a user's message in case of this specific * model usage. */ export function toO1CompatibleChatHistory( - chatHistory: ChatHistory, - modelName: string + chatHistory: ChatHistory, + modelName: string, + service: "openai" | "grazie" ): ChatHistory { + const o1ClassModels = + service === "openai" ? o1ClassModelsOpenAI : o1ClassModelsGrazie; if (o1ClassModels.includes(modelName)) { return chatHistory.map((message: ChatMessage) => { return { @@ -22,4 +31,4 @@ export function toO1CompatibleChatHistory( } else { return chatHistory; } -} \ No newline at end of file +} diff --git a/src/test/commonTestFunctions/prepareEnvironment.ts b/src/test/commonTestFunctions/prepareEnvironment.ts index 54d1d5e..1f9404b 100644 --- a/src/test/commonTestFunctions/prepareEnvironment.ts +++ b/src/test/commonTestFunctions/prepareEnvironment.ts @@ -46,7 +46,8 @@ export async function withPreparedEnvironment( (_hole) => true, fileUri, client, - new AbortController().signal + new AbortController().signal, + true // to support any ranker ) ); const preparedEnvironment = { diff --git a/src/test/legacyBenchmark/benchmarkingFramework.ts b/src/test/legacyBenchmark/benchmarkingFramework.ts index 5a20d33..4b3cd3b 100644 --- a/src/test/legacyBenchmark/benchmarkingFramework.ts +++ b/src/test/legacyBenchmark/benchmarkingFramework.ts @@ -40,6 +40,7 @@ import { consoleLog, consoleLogSeparatorLine } from "./utils/loggingUtils"; export interface TestBenchmarkOptions extends TestBenchmarkOptionsWithDefaults { filePath: string; + // TODO: support ranker inputModelsParams: InputModelsParams; relativePathToFile: string; } @@ -451,7 +452,8 @@ async function prepareForBenchmarkCompletions( mockDocumentVersion, shouldCompleteHole, fileUri, - coqLspClient + coqLspClient, + true // TODO: pass `ranker.needsUnwrappedNotations` here ); const llmServices: LLMServices = { openAiService: new OpenAiService(eventLogger), @@ -479,14 +481,16 @@ async function extractCompletionTargets( documentVersion: number, shouldCompleteHole: (hole: ProofStep) => boolean, fileUri: Uri, - client: CoqLspClient + client: CoqLspClient, + rankerNeedsUnwrappedNotations: boolean ): Promise<[BenchmarkingCompletionTargets, SourceFileEnvironment]> { const abortController = new AbortController(); const sourceFileEnvironment = await createSourceFileEnvironment( documentVersion, fileUri, client, - abortController.signal + abortController.signal, + rankerNeedsUnwrappedNotations ); const completionTargets = await createCompletionTargets( documentVersion,