From 0af43a764893028a0931c146afc439f62b42e2bd Mon Sep 17 00:00:00 2001 From: Martin Martinez Rivera Date: Fri, 26 Apr 2019 16:34:58 -0700 Subject: [PATCH] Use initial schema during bulk load. Use the initial schema during bulk load, otherwise data might not be loaded correctly for those predicates. In particular, I observed that when loading the 21million dataset with dgraph.type triples added to it, queries would not respond and would eventually timeout. I figured this was because the index for that predicate was not built during the bulkload. I reloaded the dataset with my changes included and queries respond immediately. --- dgraph/cmd/bulk/schema.go | 10 ++++++++++ schema/schema.go | 18 +++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/dgraph/cmd/bulk/schema.go b/dgraph/cmd/bulk/schema.go index dcf28c1e9b0..369a1bfbf2f 100644 --- a/dgraph/cmd/bulk/schema.go +++ b/dgraph/cmd/bulk/schema.go @@ -25,6 +25,7 @@ import ( "github.com/dgraph-io/badger" "github.com/dgraph-io/dgraph/posting" "github.com/dgraph-io/dgraph/protos/pb" + "github.com/dgraph-io/dgraph/schema" wk "github.com/dgraph-io/dgraph/worker" "github.com/dgraph-io/dgraph/x" ) @@ -45,6 +46,15 @@ func newSchemaStore(initial []*pb.SchemaUpdate, opt options, state *state) *sche }, state: state, } + + // Load all initial predicates. Some predicates that might not be used when + // the alpha is started (e.g ACL predicates) might be included but it's + // better to include them in case the input data contains triples with these + // predicates. + for _, update := range schema.CompleteInitialSchema() { + s.m[update.Predicate] = update + } + if opt.StoreXids { s.m["xid"] = &pb.SchemaUpdate{ ValueType: pb.Posting_STRING, diff --git a/schema/schema.go b/schema/schema.go index 27213ce4c2e..20cedde9a9d 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -408,7 +408,23 @@ func LoadTypesFromDb() error { return nil } +// InitialSchema returns the schema updates to insert at the beginning of +// Dgraph's execution. It looks at the worker options to determine which +// attributes to insert. func InitialSchema() []*pb.SchemaUpdate { + return initialSchemaInternal(false) +} + +// CompleteInitialSchema returns all the schema updates regardless of the worker +// options. This is useful in situations where the worker options are not known +// in advance and it's better to create all the reserved predicates and remove +// them later than miss some of them. An example of such situation is during bulk +// loading. +func CompleteInitialSchema() []*pb.SchemaUpdate { + return initialSchemaInternal(true) +} + +func initialSchemaInternal(all bool) []*pb.SchemaUpdate { var initialSchema []*pb.SchemaUpdate // propose the schema for _predicate_ @@ -428,7 +444,7 @@ func InitialSchema() []*pb.SchemaUpdate { List: true, }) - if x.WorkerConfig.AclEnabled { + if all || x.WorkerConfig.AclEnabled { // propose the schema update for acl predicates initialSchema = append(initialSchema, []*pb.SchemaUpdate{ {