Skip to content

Commit

Permalink
Add support of CREATE|DROP VECTOR INDEX syntax (#84)
Browse files Browse the repository at this point in the history
* Add support of CREATE|DROP VECTOR INDEX syntax

For the syntax documentation, please refer to:

https://cloud.google.com/spanner/docs/reference/standard-sql/data-definition-language#vector_index_statements

* Add more comments

* Move the comment
  • Loading branch information
git-hulk authored Aug 15, 2024
1 parent f34cb10 commit 2482e84
Show file tree
Hide file tree
Showing 14 changed files with 424 additions and 0 deletions.
67 changes: 67 additions & 0 deletions ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@ func (CreateTable) isStatement() {}
func (CreateSequence) isStatement() {}
func (CreateView) isStatement() {}
func (CreateIndex) isStatement() {}
func (CreateVectorIndex) isStatement() {}
func (CreateRole) isStatement() {}
func (AlterTable) isStatement() {}
func (AlterIndex) isStatement() {}
func (DropTable) isStatement() {}
func (DropIndex) isStatement() {}
func (DropVectorIndex) isStatement() {}
func (DropRole) isStatement() {}
func (Insert) isStatement() {}
func (Delete) isStatement() {}
Expand Down Expand Up @@ -226,8 +228,10 @@ func (CreateSequence) isDDL() {}
func (AlterTable) isDDL() {}
func (DropTable) isDDL() {}
func (CreateIndex) isDDL() {}
func (CreateVectorIndex) isDDL() {}
func (AlterIndex) isDDL() {}
func (DropIndex) isDDL() {}
func (DropVectorIndex) isDDL() {}
func (CreateRole) isDDL() {}
func (DropRole) isDDL() {}
func (Grant) isDDL() {}
Expand Down Expand Up @@ -1829,6 +1833,56 @@ type CreateIndex struct {
InterleaveIn *InterleaveIn // optional
}

// CreateVectorIndex is CREATE VECTOR INDEX statement node.
//
// CREATE VECTOR INDEX {if .IfNotExists}}IF NOT EXISTS{{end}} {{.Name | sql}}
// ON {{.TableName | sql}}({{.ColumnName | sql}})
// {{if .Where}}WHERE {{.Where | sql}}{{end}}
// {{.Options | sql}}

type CreateVectorIndex struct {
// pos = Create
// end = Options.end

Create token.Pos // position of "CREATE" keyword

IfNotExists bool // optional
Name *Ident
TableName *Ident
ColumnName *Ident

// It only allows `WHERE column_name IS NOT NULL` for now, but we still relax the condition
// by reusing the `parseWhere` function for sake of it may be extended more conditions in the future.
//
// Reference: https://cloud.google.com/spanner/docs/reference/standard-sql/data-definition-language#vector_index_statements
Where *Where // optional
Options *VectorIndexOptions
}

// VectorIndexOptions is OPTIONS clause node in CREATE VECTOR INDEX.
//
// OPTIONS ({{.Records | sqlJoin ","}})
type VectorIndexOptions struct {
// pos = Options
// end = Rparen + 1

Options token.Pos // position of "OPTIONS" keyword
Rparen token.Pos // position of ")"

Records []*VectorIndexOption // len(Records) > 0
}

// VectorIndexOption is OPTIONS record node.
//
// {{.Key | sql}}={{.Expr | sql}}
type VectorIndexOption struct {
// pos = Key.pos
// end = Value.end

Key *Ident
Value Expr
}

// CreateChangeStream is CREATE CHANGE STREAM statement node.
//
// CREATE CHANGE STREAM {{.Name | sql}} {{.For | sqlOpt}} {{.Options | sqlOpt}}
Expand Down Expand Up @@ -2000,6 +2054,19 @@ type DropIndex struct {
Name *Ident
}

// DropVectorIndex is DROP VECTOR INDEX statement node.
//
// DROP VECTOR INDEX {{if .IfExists}}IF EXISTS{{end}} {{.Name | sql}}
type DropVectorIndex struct {
// pos = Drop
// end = Name.end

Drop token.Pos // position of "DROP" keyword

IfExists bool
Name *Ident
}

// CreateRole is CREATE ROLE statement node.
//
// CREATE ROLE {{.Name | sql}}
Expand Down
3 changes: 3 additions & 0 deletions ast/ast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ func TestStatement(t *testing.T) {
Statement(&AlterIndex{}).isStatement()
Statement(&DropTable{}).isStatement()
Statement(&DropIndex{}).isStatement()
Statement(&DropVectorIndex{}).isStatement()
Statement(&DropRole{}).isStatement()
Statement(&Insert{}).isStatement()
Statement(&Delete{}).isStatement()
Expand Down Expand Up @@ -124,13 +125,15 @@ func TestDDL(t *testing.T) {
DDL(&CreateDatabase{}).isDDL()
DDL(&CreateTable{}).isDDL()
DDL(&CreateIndex{}).isDDL()
DDL(&CreateVectorIndex{}).isDDL()
DDL(&CreateSequence{}).isDDL()
DDL(&CreateView{}).isDDL()
DDL(&AlterTable{}).isDDL()
DDL(&DropTable{}).isDDL()
DDL(&CreateIndex{}).isDDL()
DDL(&AlterIndex{}).isDDL()
DDL(&DropIndex{}).isDDL()
DDL(&DropVectorIndex{}).isDDL()
DDL(&CreateRole{}).isDDL()
DDL(&DropRole{}).isDDL()
DDL(&Grant{}).isDDL()
Expand Down
11 changes: 11 additions & 0 deletions ast/pos.go
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,14 @@ func (c *CreateIndex) End() token.Pos {
return c.Rparen + 1
}

func (c *CreateVectorIndex) Pos() token.Pos {
return c.Create
}

func (c *CreateVectorIndex) End() token.Pos {
return c.Options.Rparen + 1
}

func (c *CreateChangeStream) Pos() token.Pos {
return c.Create
}
Expand Down Expand Up @@ -704,6 +712,9 @@ func (a *DropStoredColumn) End() token.Pos { return a.Name.End() }
func (d *DropIndex) Pos() token.Pos { return d.Drop }
func (d *DropIndex) End() token.Pos { return d.Name.End() }

func (d *DropVectorIndex) Pos() token.Pos { return d.Drop }
func (d *DropVectorIndex) End() token.Pos { return d.Name.End() }

func (c *CreateRole) Pos() token.Pos { return c.Create }
func (c *CreateRole) End() token.Pos { return c.Name.End() }

Expand Down
38 changes: 38 additions & 0 deletions ast/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,36 @@ func (c *CreateIndex) SQL() string {
return sql
}

func (c *CreateVectorIndex) SQL() string {
sql := "CREATE VECTOR INDEX "
if c.IfNotExists {
sql += "IF NOT EXISTS "
}
sql += c.Name.SQL()
sql += " ON " + c.TableName.SQL() + " (" + c.ColumnName.SQL() + ") "
if c.Where != nil {
sql += c.Where.SQL() + " "
}
sql += c.Options.SQL()
return sql
}

func (v *VectorIndexOptions) SQL() string {
sql := "OPTIONS ("
for i, o := range v.Records {
if i > 0 {
sql += ", "
}
sql += o.SQL()
}
sql += ")"
return sql
}

func (v *VectorIndexOption) SQL() string {
return v.Key.SQL() + "=" + v.Value.SQL()
}

func (c *CreateChangeStream) SQL() string {
sql := "CREATE CHANGE STREAM " + c.Name.SQL()
if c.For != nil {
Expand Down Expand Up @@ -1077,6 +1107,14 @@ func (d *DropIndex) SQL() string {
return sql + d.Name.SQL()
}

func (d *DropVectorIndex) SQL() string {
sql := "DROP VECTOR INDEX "
if d.IfExists {
sql += "IF EXISTS "
}
return sql + d.Name.SQL()
}

func (c *CreateRole) SQL() string {
return "CREATE ROLE " + c.Name.SQL()
}
Expand Down
65 changes: 65 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -1987,6 +1987,8 @@ func (p *Parser) parseDDL() ast.DDL {
return p.parseCreateView(pos)
case p.Token.IsKeywordLike("INDEX") || p.Token.IsKeywordLike("UNIQUE") || p.Token.IsKeywordLike("NULL_FILTERED"):
return p.parseCreateIndex(pos)
case p.Token.IsKeywordLike("VECTOR"):
return p.parseCreateVectorIndex(pos)
case p.Token.IsKeywordLike("ROLE"):
return p.parseCreateRole(pos)
case p.Token.IsKeywordLike("CHANGE"):
Expand All @@ -2011,6 +2013,8 @@ func (p *Parser) parseDDL() ast.DDL {
return p.parseDropTable(pos)
case p.Token.IsKeywordLike("INDEX"):
return p.parseDropIndex(pos)
case p.Token.IsKeywordLike("VECTOR"):
return p.parseDropVectorIndex(pos)
case p.Token.IsKeywordLike("ROLE"):
return p.parseDropRole(pos)
case p.Token.IsKeywordLike("CHANGE"):
Expand Down Expand Up @@ -2454,6 +2458,55 @@ func (p *Parser) parseOnDeleteAction() (onDelete ast.OnDeleteAction, onDeleteEnd
return
}

func (p *Parser) parseVectorIndexOptions() *ast.VectorIndexOptions {
pos := p.expectKeywordLike("OPTIONS").Pos
p.expect("(")
options := &ast.VectorIndexOptions{Options: pos}
for {
key := p.parseIdent()
p.expect("=")
value := p.parseExpr()
options.Records = append(options.Records, &ast.VectorIndexOption{Key: key, Value: value})
if p.Token.Kind == "," {
p.nextToken()
continue
}
if p.Token.Kind == ")" {
options.Rparen = p.Token.Pos
p.nextToken()
break
}
p.panicfAtToken(&p.Token, "expected expr or , or ), but: %s", p.Token.AsString)
}

return options
}

func (p *Parser) parseCreateVectorIndex(pos token.Pos) *ast.CreateVectorIndex {
p.expectKeywordLike("VECTOR")
p.expectKeywordLike("INDEX")
ifNotExists := p.parseIfNotExists()
name := p.parseIdent()
p.expect("ON")
tableName := p.parseIdent()
p.expect("(")
columnName := p.parseIdent()
p.expect(")")

where := p.tryParseWhere()
options := p.parseVectorIndexOptions()

return &ast.CreateVectorIndex{
Create: pos,
IfNotExists: ifNotExists,
Name: name,
TableName: tableName,
ColumnName: columnName,
Where: where,
Options: options,
}
}

func (p *Parser) parseCreateIndex(pos token.Pos) *ast.CreateIndex {
unique := false
if p.Token.IsKeywordLike("UNIQUE") {
Expand Down Expand Up @@ -2923,6 +2976,18 @@ func (p *Parser) parseDropIndex(pos token.Pos) *ast.DropIndex {
}
}

func (p *Parser) parseDropVectorIndex(pos token.Pos) *ast.DropVectorIndex {
p.expectKeywordLike("VECTOR")
p.expectKeywordLike("INDEX")
ifExists := p.parseIfExists()
name := p.parseIdent()
return &ast.DropVectorIndex{
Drop: pos,
IfExists: ifExists,
Name: name,
}
}

func (p *Parser) parseCreateRole(pos token.Pos) *ast.CreateRole {
p.expectKeywordLike("ROLE")
name := p.parseIdent()
Expand Down
2 changes: 2 additions & 0 deletions testdata/input/ddl/create_vector_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CREATE VECTOR INDEX IF NOT EXISTS hello_vector_index ON hello(embedding)
OPTIONS(distance_type = 'COSINE')
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE VECTOR INDEX hello_vector_index ON hello(embedding)
WHERE embedding IS NOT NULL
OPTIONS(distance_type = 'COSINE')
1 change: 1 addition & 0 deletions testdata/input/ddl/drop_vector_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP VECTOR INDEX hello_vector_index
45 changes: 45 additions & 0 deletions testdata/result/ddl/create_vector_index.sql.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
--- create_vector_index.sql
CREATE VECTOR INDEX IF NOT EXISTS hello_vector_index ON hello(embedding)
OPTIONS(distance_type = 'COSINE')
--- AST
&ast.CreateVectorIndex{
Create: 0,
IfNotExists: true,
Name: &ast.Ident{
NamePos: 34,
NameEnd: 52,
Name: "hello_vector_index",
},
TableName: &ast.Ident{
NamePos: 56,
NameEnd: 61,
Name: "hello",
},
ColumnName: &ast.Ident{
NamePos: 62,
NameEnd: 71,
Name: "embedding",
},
Where: (*ast.Where)(nil),
Options: &ast.VectorIndexOptions{
Options: 73,
Rparen: 105,
Records: []*ast.VectorIndexOption{
&ast.VectorIndexOption{
Key: &ast.Ident{
NamePos: 81,
NameEnd: 94,
Name: "distance_type",
},
Value: &ast.StringLiteral{
ValuePos: 97,
ValueEnd: 105,
Value: "COSINE",
},
},
},
},
}

--- SQL
CREATE VECTOR INDEX IF NOT EXISTS hello_vector_index ON hello (embedding) OPTIONS (distance_type="COSINE")
Loading

0 comments on commit 2482e84

Please sign in to comment.