-
Notifications
You must be signed in to change notification settings - Fork 357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: SearchFlatRuns api call for flat runs table support #8852
Changes from 28 commits
dc052e0
4ac3696
4263146
c2e0857
3086c8f
dd9d486
a063d6e
d3039da
aa95c84
0a44bb4
ce3893f
314d44f
45dfd39
afbcf97
3fdf9f2
7632812
ee8a4fe
3bca919
cb5ea71
26ec913
f094fe9
ce8aaee
b02671d
790f8a3
a743c12
25218fc
931691b
c5d1fa4
d38c961
e6f95ee
6684cda
8d72386
52476db
3d08e46
d2eafff
3cddf6b
34daed5
b844fd5
c98fe20
d1eb522
9bfc5bf
6c3ee9f
676d8e7
f5b2ff1
cbd184e
a0b9bae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,14 +2,27 @@ package internal | |
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"fmt" | ||
"strings" | ||
|
||
"github.com/uptrace/bun" | ||
"google.golang.org/grpc/codes" | ||
"google.golang.org/grpc/status" | ||
|
||
"github.com/determined-ai/determined/master/internal/db" | ||
"github.com/determined-ai/determined/master/internal/db/bunutils" | ||
"github.com/determined-ai/determined/master/internal/experiment" | ||
"github.com/determined-ai/determined/master/internal/grpcutil" | ||
"github.com/determined-ai/determined/master/internal/storage" | ||
"github.com/determined-ai/determined/master/internal/trials" | ||
"github.com/determined-ai/determined/master/pkg/ptrs" | ||
"github.com/determined-ai/determined/master/pkg/schemas/expconf" | ||
"github.com/determined-ai/determined/proto/pkg/apiv1" | ||
"github.com/determined-ai/determined/proto/pkg/projectv1" | ||
"github.com/determined-ai/determined/proto/pkg/rbacv1" | ||
"github.com/determined-ai/determined/proto/pkg/runv1" | ||
"github.com/determined-ai/determined/proto/pkg/trialv1" | ||
) | ||
|
||
func (a *apiServer) RunPrepareForReporting( | ||
|
@@ -43,3 +56,178 @@ func (a *apiServer) RunPrepareForReporting( | |
StorageId: storageID, | ||
}, nil | ||
} | ||
|
||
func (a *apiServer) SearchRuns( | ||
ctx context.Context, req *apiv1.SearchRunsRequest, | ||
) (*apiv1.SearchRunsResponse, error) { | ||
resp := &apiv1.SearchRunsResponse{Runs: []*runv1.FlatRun{}} | ||
AmanuelAaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
query := db.Bun().NewSelect(). | ||
Model(&resp.Runs). | ||
ModelTableExpr("runs AS r"). | ||
Apply(getRunsColumns) | ||
|
||
curUser, _, err := grpcutil.GetUser(ctx) | ||
if err != nil { | ||
return nil, status.Errorf(codes.Internal, "failed to get the user: %s", err) | ||
} | ||
var proj *projectv1.Project | ||
if req.ProjectId != nil { | ||
proj, err = a.GetProjectByID(ctx, *req.ProjectId, *curUser) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
query = query.Where("e.project_id = ?", req.ProjectId) | ||
} | ||
if query, err = experiment.AuthZProvider.Get(). | ||
AmanuelAaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
FilterExperimentsQuery(ctx, *curUser, proj, query, | ||
[]rbacv1.PermissionType{rbacv1.PermissionType_PERMISSION_TYPE_VIEW_EXPERIMENT_METADATA}, | ||
); err != nil { | ||
return nil, err | ||
} | ||
|
||
if req.Filter != nil { | ||
var efr experimentFilterRoot | ||
err := json.Unmarshal([]byte(*req.Filter), &efr) | ||
if err != nil { | ||
return nil, err | ||
} | ||
query = query.WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery { | ||
_, err = efr.toSQL(q) | ||
return q | ||
}).WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery { | ||
if !efr.ShowArchived { | ||
return q.Where(`e.archived = false`) | ||
} | ||
return q | ||
}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
} | ||
|
||
if req.Sort != nil { | ||
err = sortRuns(req.Sort, query) | ||
if err != nil { | ||
return nil, err | ||
} | ||
} else { | ||
query.OrderExpr("id ASC") | ||
} | ||
|
||
pagination, err := runPagedBunExperimentsQuery(ctx, query, int(req.Offset), int(req.Limit)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is another point that also applies to but allow filtering on any field effectively means some of these columns won't have indexes on them leading to poor performance for larger customers. Is there any plans in place to mitigate this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently no -- as you noted, theoretically any column, metric, or hyperparameter can act as a sort or filter. furthermore, those sorts and filters can be on multiple columns, meaning there isn't a straightforward set of indices that will satisfy the product needs while also allowing for performance. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to change the product needs then? @mackrorysd what do you think? |
||
resp.Pagination = pagination | ||
if err != nil { | ||
return nil, err | ||
} | ||
return resp, nil | ||
} | ||
|
||
func getRunsColumns(q *bun.SelectQuery) *bun.SelectQuery { | ||
return q. | ||
Column("r.id"). | ||
ColumnExpr("proto_time(r.start_time) AS start_time"). | ||
ColumnExpr("proto_time(r.end_time) AS end_time"). | ||
ColumnExpr(bunutils.ProtoStateDBCaseString(trialv1.State_value, "r.state", "state", | ||
"STATE_")). | ||
Column("r.tags"). | ||
Column("r.checkpoint_size"). | ||
Column("r.checkpoint_count"). | ||
Column("r.external_run_id"). | ||
ColumnExpr( | ||
"(SELECT COUNT(*) FROM runs r WHERE e.id = r.experiment_id) AS num_runs"). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a weird column to me. Is there a product ask around it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was initially done so that frontend could know if the run is part of a multi-trial experiment. Instead I have renamed and turned the val into a bool, where it is true if > 1. Makes the field easier to understand and moves the logic off the frontend |
||
ColumnExpr("extract(epoch FROM coalesce(r.end_time, now()) - r.start_time)::int AS duration"). | ||
ColumnExpr("COALESCE(u.display_name, u.username) as display_name"). | ||
ColumnExpr("r.hparams AS hyperparameters"). | ||
ColumnExpr("r.summary_metrics AS summary_metrics"). | ||
ColumnExpr("CASE WHEN e.parent_id IS NULL THEN NULL ELSE " + | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is confusing to me Like are we returning an experiment ID as the column forked from? Someone might assume that it is a runID that is forked from. Do we have a specific product ask for this column? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general we want all experiment fields available except ones that deal with "best trial" |
||
"json_build_object('value', e.parent_id) END AS forked_from"). | ||
AmanuelAaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ColumnExpr("CASE WHEN e.progress IS NULL THEN NULL ELSE " + | ||
"json_build_object('value', e.progress) END AS progress"). | ||
AmanuelAaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ColumnExpr("e.id AS experiment_id"). | ||
Column("e.owner_id"). | ||
ColumnExpr("e.config->>'description' AS description"). | ||
AmanuelAaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ColumnExpr("e.config->'resources'->>'resource_pool' AS resource_pool"). | ||
ColumnExpr("e.config->'searcher'->>'name' AS searcher_type"). | ||
ColumnExpr("e.config->'searcher'->>'metric' AS searcher_metric"). | ||
ColumnExpr("e.config->>'name' as experiment_name"). | ||
ColumnExpr("w.id AS workspace_id"). | ||
ColumnExpr("w.name AS workspace_name"). | ||
ColumnExpr("(w.archived OR p.archived) AS parent_archived"). | ||
Column("e.unmanaged"). | ||
ColumnExpr("p.name AS project_name"). | ||
NicholasBlaskey marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Join("LEFT JOIN experiments AS e ON r.experiment_id=e.id"). | ||
Join("LEFT JOIN users u ON e.owner_id = u.id"). | ||
Join("LEFT JOIN projects p ON e.project_id = p.id"). | ||
AmanuelAaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Join("LEFT JOIN workspaces w ON p.workspace_id = w.id") | ||
} | ||
|
||
func sortRuns(sortString *string, runQuery *bun.SelectQuery) error { | ||
if sortString == nil { | ||
return nil | ||
} | ||
sortByMap := map[string]string{ | ||
"asc": "ASC", | ||
"desc": "DESC NULLS LAST", | ||
} | ||
orderColMap := map[string]string{ | ||
"id": "id", | ||
"description": "description", | ||
"name": "name", | ||
"searcherType": "searcher_type", | ||
"searcherMetric": "searcher_metric", | ||
"startTime": "r.start_time", | ||
"endTime": "r.end_time", | ||
"state": "r.state", | ||
"progress": "COALESCE(progress, 0)", | ||
"user": "display_name", | ||
"forkedFrom": "e.parent_id", | ||
"resourcePool": "resource_pool", | ||
"projectId": "e.project_id", | ||
"checkpointSize": "checkpoint_size", | ||
"checkpointCount": "checkpoint_count", | ||
"duration": "duration", | ||
"searcherMetricsVal": "r.searcher_metric_val", | ||
"externalExperimentId": "e.external_experiment_id", | ||
"externalRunId": "r.external_run_id", | ||
"experimentId": "e.id", | ||
"numRuns": "num_runs", | ||
} | ||
sortParams := strings.Split(*sortString, ",") | ||
hasIDSort := false | ||
for _, sortParam := range sortParams { | ||
paramDetail := strings.Split(sortParam, "=") | ||
if len(paramDetail) != 2 { | ||
return status.Errorf(codes.InvalidArgument, "invalid sort parameter: %s", sortParam) | ||
} | ||
if _, ok := sortByMap[paramDetail[1]]; !ok { | ||
return status.Errorf(codes.InvalidArgument, "invalid sort direction: %s", paramDetail[1]) | ||
} | ||
sortDirection := sortByMap[paramDetail[1]] | ||
switch { | ||
case strings.HasPrefix(paramDetail[0], "hp."): | ||
param := strings.ReplaceAll(paramDetail[0], "'", "") | ||
hps := strings.ReplaceAll(strings.TrimPrefix(param, "hp."), ".", "'->'") | ||
runQuery.OrderExpr( | ||
fmt.Sprintf("r.hparams->'%s' %s", hps, sortDirection)) | ||
AmanuelAaron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
case strings.Contains(paramDetail[0], "."): | ||
metricGroup, metricName, metricQualifier, err := parseMetricsName(paramDetail[0]) | ||
if err != nil { | ||
return err | ||
} | ||
runQuery.OrderExpr("r.summary_metrics->?->?->>? ?", | ||
metricGroup, metricName, metricQualifier, bun.Safe(sortDirection)) | ||
default: | ||
if _, ok := orderColMap[paramDetail[0]]; !ok { | ||
return status.Errorf(codes.InvalidArgument, "invalid sort col: %s", paramDetail[0]) | ||
} | ||
hasIDSort = hasIDSort || paramDetail[0] == "id" | ||
runQuery.OrderExpr( | ||
fmt.Sprintf("%s %s", orderColMap[paramDetail[0]], sortDirection)) | ||
} | ||
} | ||
if !hasIDSort { | ||
runQuery.OrderExpr("id ASC") | ||
} | ||
return nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: declare variables closer to when they are used
Its kinda weird how we have
curUser
after these declarationsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what you mean here.
resp
,curUser
, andquery
are all used in the following lines for getting the project and doing the auth checkThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it just looks off to me since most other API calls start with getting the user
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was following the same order that exists in SearchExperiments. I can move the get user stuff to the top though