Skip to content

Commit

Permalink
feat(evals): Support file based evals in "runNewEvaluation" #1579
Browse files Browse the repository at this point in the history
  • Loading branch information
ssbushi authored Jan 2, 2025
1 parent 9aad90e commit 99eb347
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 22 deletions.
2 changes: 1 addition & 1 deletion genkit-tools/cli/src/commands/eval-flow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ export const evalFlow = new Command('eval:flow')
const evalDataset = await runInference({
manager,
actionRef,
evalFlowInput,
evalInferenceInput: evalFlowInput,
auth: options.auth,
});

Expand Down
52 changes: 32 additions & 20 deletions genkit-tools/common/src/eval/evaluate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,25 +66,38 @@ export async function runNewEvaluation(
manager: RuntimeManager,
request: RunNewEvaluationRequest
): Promise<EvalRunKey> {
const { datasetId, actionRef, evaluators } = request;
const datasetStore = await getDatasetStore();
logger.info(`Fetching dataset ${datasetId}...`);
const dataset = await datasetStore.getDataset(datasetId);
const datasetMetadatas = await datasetStore.listDatasets();
const targetDatasetMetadata = datasetMetadatas.find(
(d) => d.datasetId === datasetId
);
const datasetVersion = targetDatasetMetadata?.version;
const { dataSource, actionRef, evaluators } = request;
const { datasetId, data } = dataSource;
if (!datasetId && !data) {
throw new Error(`Either 'data' or 'datasetId' must be provided`);
}

let evalInferenceInput: EvalInferenceInput;
let metadata = {};
if (datasetId) {
const datasetStore = await getDatasetStore();
logger.info(`Fetching dataset ${datasetId}...`);
const dataset = await datasetStore.getDataset(datasetId);
if (dataset.length === 0) {
throw new Error(`Dataset ${datasetId} is empty`);
}
evalInferenceInput = EvalInferenceInputSchema.parse(dataset);

if (dataset.length === 0) {
throw new Error(`Dataset ${datasetId} is empty`);
const datasetMetadatas = await datasetStore.listDatasets();
const targetDatasetMetadata = datasetMetadatas.find(
(d) => d.datasetId === datasetId
);
const datasetVersion = targetDatasetMetadata?.version;
metadata = { datasetId, datasetVersion };
} else {
evalInferenceInput = data!;
}

logger.info('Running inference...');
const evalDataset = await runInference({
manager,
actionRef,
evalFlowInput: EvalInferenceInputSchema.parse(dataset),
evalInferenceInput,
auth: request.options?.auth,
actionConfig: request.options?.actionConfig,
});
Expand All @@ -98,9 +111,8 @@ export async function runNewEvaluation(
evaluatorActions,
evalDataset,
augments: {
...metadata,
actionRef,
datasetId,
datasetVersion,
actionConfig: request.options?.actionConfig,
},
});
Expand All @@ -111,19 +123,19 @@ export async function runNewEvaluation(
export async function runInference(params: {
manager: RuntimeManager;
actionRef: string;
evalFlowInput: EvalInferenceInput;
evalInferenceInput: EvalInferenceInput;
auth?: string;
actionConfig?: any;
}): Promise<EvalInput[]> {
const { manager, actionRef, evalFlowInput, auth, actionConfig } = params;
const { manager, actionRef, evalInferenceInput, auth, actionConfig } = params;
if (!isSupportedActionRef(actionRef)) {
throw new Error('Inference is only supported on flows and models');
}

const evalDataset: EvalInput[] = await bulkRunAction({
manager,
actionRef,
evalFlowInput,
evalInferenceInput,
auth,
actionConfig,
});
Expand Down Expand Up @@ -210,13 +222,13 @@ export async function getMatchingEvaluatorActions(
async function bulkRunAction(params: {
manager: RuntimeManager;
actionRef: string;
evalFlowInput: EvalInferenceInput;
evalInferenceInput: EvalInferenceInput;
auth?: string;
actionConfig?: any;
}): Promise<EvalInput[]> {
const { manager, actionRef, evalFlowInput, auth, actionConfig } = params;
const { manager, actionRef, evalInferenceInput, auth, actionConfig } = params;
const isModelAction = actionRef.startsWith('/model');
let testCases: TestCase[] = evalFlowInput.map((c) => ({
let testCases: TestCase[] = evalInferenceInput.map((c) => ({
input: c.input,
reference: c.reference,
testCaseId: c.testCaseId ?? generateTestCaseId(),
Expand Down
5 changes: 4 additions & 1 deletion genkit-tools/common/src/types/apis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ export const UpdateDatasetRequestSchema = z.object({
export type UpdateDatasetRequest = z.infer<typeof UpdateDatasetRequestSchema>;

export const RunNewEvaluationRequestSchema = z.object({
datasetId: z.string(),
dataSource: z.object({
datasetId: z.string().optional(),
data: EvalInferenceInputSchema.optional(),
}),
actionRef: z.string(),
evaluators: z.array(z.string()).optional(),
options: z
Expand Down

0 comments on commit 99eb347

Please sign in to comment.