diff --git a/pkg/geo/geo.go b/pkg/geo/geo.go index 26328352c8ef..a675587b07a0 100644 --- a/pkg/geo/geo.go +++ b/pkg/geo/geo.go @@ -122,6 +122,11 @@ func (g *Geometry) AsGeography() (*Geography, error) { return NewGeography(geopb.EWKB(ret)), nil } +// AsGeomT returns the geometry as a geom.T object. +func (g *Geometry) AsGeomT() (geom.T, error) { + return ewkb.Unmarshal(g.ewkb) +} + // Geography is a spherical spatial object. type Geography struct { spatialObjectBase diff --git a/pkg/geo/geo_test.go b/pkg/geo/geo_test.go index 16fcc4695132..d7a4f1776ef6 100644 --- a/pkg/geo/geo_test.go +++ b/pkg/geo/geo_test.go @@ -14,7 +14,6 @@ import ( "fmt" "testing" - "github.com/cockroachdb/cockroach/pkg/geo/geopb" "github.com/cockroachdb/cockroach/pkg/geo/geos" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/datadriven" @@ -187,7 +186,7 @@ func TestClipEWKBByRect(t *testing.T) { d.ScanArgs(t, "xmax", &xMax) d.ScanArgs(t, "ymax", &yMax) ewkb, err := geos.ClipEWKBByRect( - geopb.WKB(g.ewkb), float64(xMin), float64(yMin), float64(xMax), float64(yMax)) + g.ewkb, float64(xMin), float64(yMin), float64(xMax), float64(yMax)) if err != nil { return err.Error() } diff --git a/pkg/geo/geoindex/config.pb.go b/pkg/geo/geoindex/config.pb.go new file mode 100644 index 000000000000..3ded6685b48e --- /dev/null +++ b/pkg/geo/geoindex/config.pb.go @@ -0,0 +1,1050 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: geo/geoindex/config.proto + +package geoindex + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" + +import encoding_binary "encoding/binary" + +import io "io" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +// Config is the information used to tune one instance of a geospatial index. +// Each SQL index will have its own config. +// +// At the moment, only one major indexing strategy is implemented (S2 cells). +type Config struct { + S2Geography *S2GeographyConfig `protobuf:"bytes,1,opt,name=s2_geography,json=s2Geography,proto3" json:"s2_geography,omitempty"` + S2Geometry *S2GeometryConfig `protobuf:"bytes,2,opt,name=s2_geometry,json=s2Geometry,proto3" json:"s2_geometry,omitempty"` +} + +func (m *Config) Reset() { *m = Config{} } +func (m *Config) String() string { return proto.CompactTextString(m) } +func (*Config) ProtoMessage() {} +func (*Config) Descriptor() ([]byte, []int) { + return fileDescriptor_config_01f672ff9c2aee8f, []int{0} +} +func (m *Config) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *Config) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil +} +func (dst *Config) XXX_Merge(src proto.Message) { + xxx_messageInfo_Config.Merge(dst, src) +} +func (m *Config) XXX_Size() int { + return m.Size() +} +func (m *Config) XXX_DiscardUnknown() { + xxx_messageInfo_Config.DiscardUnknown(m) +} + +var xxx_messageInfo_Config proto.InternalMessageInfo + +// S2Config is the required information to tune one instance of an S2 cell +// backed geospatial index. For advanced users only -- the defaults should be +// good enough. +// +// TODO(sumeer): Based on experiments, reduce the knobs below by making the +// covering self-tuning. +type S2Config struct { + // MinLevel is the minimum cell level stored in the index. If left unset, it + // defaults to 0. + MinLevel int32 `protobuf:"varint,1,opt,name=min_level,json=minLevel,proto3" json:"min_level,omitempty"` + // MaxLevel is the maximum cell level stored in the index. If left unset, it + // defaults to 30. + MaxLevel int32 `protobuf:"varint,2,opt,name=max_level,json=maxLevel,proto3" json:"max_level,omitempty"` + // `MaxLevel-MinLevel` must be an exact multiple of LevelMod. If left unset, + // it defaults to 1. + LevelMod int32 `protobuf:"varint,3,opt,name=level_mod,json=levelMod,proto3" json:"level_mod,omitempty"` + // MaxCells is a soft hint for the maximum number of entries used to store a + // single geospatial object. If left unset, it defaults to 4. + MaxCells int32 `protobuf:"varint,4,opt,name=max_cells,json=maxCells,proto3" json:"max_cells,omitempty"` +} + +func (m *S2Config) Reset() { *m = S2Config{} } +func (m *S2Config) String() string { return proto.CompactTextString(m) } +func (*S2Config) ProtoMessage() {} +func (*S2Config) Descriptor() ([]byte, []int) { + return fileDescriptor_config_01f672ff9c2aee8f, []int{1} +} +func (m *S2Config) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *S2Config) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil +} +func (dst *S2Config) XXX_Merge(src proto.Message) { + xxx_messageInfo_S2Config.Merge(dst, src) +} +func (m *S2Config) XXX_Size() int { + return m.Size() +} +func (m *S2Config) XXX_DiscardUnknown() { + xxx_messageInfo_S2Config.DiscardUnknown(m) +} + +var xxx_messageInfo_S2Config proto.InternalMessageInfo + +type S2GeographyConfig struct { + S2Config *S2Config `protobuf:"bytes,1,opt,name=s2_config,json=s2Config,proto3" json:"s2_config,omitempty"` +} + +func (m *S2GeographyConfig) Reset() { *m = S2GeographyConfig{} } +func (m *S2GeographyConfig) String() string { return proto.CompactTextString(m) } +func (*S2GeographyConfig) ProtoMessage() {} +func (*S2GeographyConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_config_01f672ff9c2aee8f, []int{2} +} +func (m *S2GeographyConfig) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *S2GeographyConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil +} +func (dst *S2GeographyConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_S2GeographyConfig.Merge(dst, src) +} +func (m *S2GeographyConfig) XXX_Size() int { + return m.Size() +} +func (m *S2GeographyConfig) XXX_DiscardUnknown() { + xxx_messageInfo_S2GeographyConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_S2GeographyConfig proto.InternalMessageInfo + +type S2GeometryConfig struct { + // The rectangle bounds of the plane that will be efficiently indexed. Shapes + // should rarely exceed these bounds. + MinX float64 `protobuf:"fixed64,1,opt,name=min_x,json=minX,proto3" json:"min_x,omitempty"` + MaxX float64 `protobuf:"fixed64,2,opt,name=max_x,json=maxX,proto3" json:"max_x,omitempty"` + MinY float64 `protobuf:"fixed64,3,opt,name=min_y,json=minY,proto3" json:"min_y,omitempty"` + MaxY float64 `protobuf:"fixed64,4,opt,name=max_y,json=maxY,proto3" json:"max_y,omitempty"` + S2Config *S2Config `protobuf:"bytes,5,opt,name=s2_config,json=s2Config,proto3" json:"s2_config,omitempty"` +} + +func (m *S2GeometryConfig) Reset() { *m = S2GeometryConfig{} } +func (m *S2GeometryConfig) String() string { return proto.CompactTextString(m) } +func (*S2GeometryConfig) ProtoMessage() {} +func (*S2GeometryConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_config_01f672ff9c2aee8f, []int{3} +} +func (m *S2GeometryConfig) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *S2GeometryConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil +} +func (dst *S2GeometryConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_S2GeometryConfig.Merge(dst, src) +} +func (m *S2GeometryConfig) XXX_Size() int { + return m.Size() +} +func (m *S2GeometryConfig) XXX_DiscardUnknown() { + xxx_messageInfo_S2GeometryConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_S2GeometryConfig proto.InternalMessageInfo + +func init() { + proto.RegisterType((*Config)(nil), "cockroach.geo.geoindex.Config") + proto.RegisterType((*S2Config)(nil), "cockroach.geo.geoindex.S2Config") + proto.RegisterType((*S2GeographyConfig)(nil), "cockroach.geo.geoindex.S2GeographyConfig") + proto.RegisterType((*S2GeometryConfig)(nil), "cockroach.geo.geoindex.S2GeometryConfig") +} +func (m *Config) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Config) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.S2Geography != nil { + dAtA[i] = 0xa + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.S2Geography.Size())) + n1, err := m.S2Geography.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n1 + } + if m.S2Geometry != nil { + dAtA[i] = 0x12 + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.S2Geometry.Size())) + n2, err := m.S2Geometry.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n2 + } + return i, nil +} + +func (m *S2Config) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *S2Config) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.MinLevel != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.MinLevel)) + } + if m.MaxLevel != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.MaxLevel)) + } + if m.LevelMod != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.LevelMod)) + } + if m.MaxCells != 0 { + dAtA[i] = 0x20 + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.MaxCells)) + } + return i, nil +} + +func (m *S2GeographyConfig) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *S2GeographyConfig) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.S2Config != nil { + dAtA[i] = 0xa + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.S2Config.Size())) + n3, err := m.S2Config.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n3 + } + return i, nil +} + +func (m *S2GeometryConfig) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *S2GeometryConfig) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.MinX != 0 { + dAtA[i] = 0x9 + i++ + encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.MinX)))) + i += 8 + } + if m.MaxX != 0 { + dAtA[i] = 0x11 + i++ + encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.MaxX)))) + i += 8 + } + if m.MinY != 0 { + dAtA[i] = 0x19 + i++ + encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.MinY)))) + i += 8 + } + if m.MaxY != 0 { + dAtA[i] = 0x21 + i++ + encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.MaxY)))) + i += 8 + } + if m.S2Config != nil { + dAtA[i] = 0x2a + i++ + i = encodeVarintConfig(dAtA, i, uint64(m.S2Config.Size())) + n4, err := m.S2Config.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n4 + } + return i, nil +} + +func encodeVarintConfig(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Config) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.S2Geography != nil { + l = m.S2Geography.Size() + n += 1 + l + sovConfig(uint64(l)) + } + if m.S2Geometry != nil { + l = m.S2Geometry.Size() + n += 1 + l + sovConfig(uint64(l)) + } + return n +} + +func (m *S2Config) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.MinLevel != 0 { + n += 1 + sovConfig(uint64(m.MinLevel)) + } + if m.MaxLevel != 0 { + n += 1 + sovConfig(uint64(m.MaxLevel)) + } + if m.LevelMod != 0 { + n += 1 + sovConfig(uint64(m.LevelMod)) + } + if m.MaxCells != 0 { + n += 1 + sovConfig(uint64(m.MaxCells)) + } + return n +} + +func (m *S2GeographyConfig) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.S2Config != nil { + l = m.S2Config.Size() + n += 1 + l + sovConfig(uint64(l)) + } + return n +} + +func (m *S2GeometryConfig) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.MinX != 0 { + n += 9 + } + if m.MaxX != 0 { + n += 9 + } + if m.MinY != 0 { + n += 9 + } + if m.MaxY != 0 { + n += 9 + } + if m.S2Config != nil { + l = m.S2Config.Size() + n += 1 + l + sovConfig(uint64(l)) + } + return n +} + +func sovConfig(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozConfig(x uint64) (n int) { + return sovConfig(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *Config) GetValue() interface{} { + if this.S2Geography != nil { + return this.S2Geography + } + if this.S2Geometry != nil { + return this.S2Geometry + } + return nil +} + +func (this *Config) SetValue(value interface{}) bool { + switch vt := value.(type) { + case *S2GeographyConfig: + this.S2Geography = vt + case *S2GeometryConfig: + this.S2Geometry = vt + default: + return false + } + return true +} +func (m *Config) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Config: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Config: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field S2Geography", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthConfig + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.S2Geography == nil { + m.S2Geography = &S2GeographyConfig{} + } + if err := m.S2Geography.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field S2Geometry", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthConfig + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.S2Geometry == nil { + m.S2Geometry = &S2GeometryConfig{} + } + if err := m.S2Geometry.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipConfig(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthConfig + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *S2Config) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: S2Config: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: S2Config: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MinLevel", wireType) + } + m.MinLevel = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MinLevel |= (int32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxLevel", wireType) + } + m.MaxLevel = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MaxLevel |= (int32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field LevelMod", wireType) + } + m.LevelMod = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.LevelMod |= (int32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxCells", wireType) + } + m.MaxCells = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MaxCells |= (int32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipConfig(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthConfig + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *S2GeographyConfig) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: S2GeographyConfig: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: S2GeographyConfig: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field S2Config", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthConfig + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.S2Config == nil { + m.S2Config = &S2Config{} + } + if err := m.S2Config.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipConfig(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthConfig + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *S2GeometryConfig) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: S2GeometryConfig: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: S2GeometryConfig: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 1 { + return fmt.Errorf("proto: wrong wireType = %d for field MinX", wireType) + } + var v uint64 + if (iNdEx + 8) > l { + return io.ErrUnexpectedEOF + } + v = uint64(encoding_binary.LittleEndian.Uint64(dAtA[iNdEx:])) + iNdEx += 8 + m.MinX = float64(math.Float64frombits(v)) + case 2: + if wireType != 1 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxX", wireType) + } + var v uint64 + if (iNdEx + 8) > l { + return io.ErrUnexpectedEOF + } + v = uint64(encoding_binary.LittleEndian.Uint64(dAtA[iNdEx:])) + iNdEx += 8 + m.MaxX = float64(math.Float64frombits(v)) + case 3: + if wireType != 1 { + return fmt.Errorf("proto: wrong wireType = %d for field MinY", wireType) + } + var v uint64 + if (iNdEx + 8) > l { + return io.ErrUnexpectedEOF + } + v = uint64(encoding_binary.LittleEndian.Uint64(dAtA[iNdEx:])) + iNdEx += 8 + m.MinY = float64(math.Float64frombits(v)) + case 4: + if wireType != 1 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxY", wireType) + } + var v uint64 + if (iNdEx + 8) > l { + return io.ErrUnexpectedEOF + } + v = uint64(encoding_binary.LittleEndian.Uint64(dAtA[iNdEx:])) + iNdEx += 8 + m.MaxY = float64(math.Float64frombits(v)) + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field S2Config", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowConfig + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthConfig + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.S2Config == nil { + m.S2Config = &S2Config{} + } + if err := m.S2Config.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipConfig(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthConfig + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipConfig(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowConfig + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowConfig + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowConfig + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthConfig + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowConfig + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipConfig(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthConfig = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowConfig = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("geo/geoindex/config.proto", fileDescriptor_config_01f672ff9c2aee8f) } + +var fileDescriptor_config_01f672ff9c2aee8f = []byte{ + // 367 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x92, 0xbf, 0x4e, 0xeb, 0x30, + 0x18, 0xc5, 0xe3, 0xde, 0xb6, 0x4a, 0xdd, 0x3b, 0xdc, 0x9b, 0x7b, 0x85, 0x02, 0x48, 0xa6, 0xca, + 0x54, 0x18, 0x52, 0x29, 0x6c, 0x48, 0x2c, 0x74, 0x40, 0x48, 0x65, 0x49, 0x97, 0x96, 0x25, 0x0a, + 0xa9, 0x71, 0x23, 0x92, 0xb8, 0x4a, 0x2a, 0xe4, 0x2c, 0x3c, 0x03, 0x8f, 0xc0, 0x82, 0x78, 0x95, + 0x8e, 0x1d, 0x3b, 0x42, 0xfa, 0x22, 0x28, 0x9f, 0x9d, 0x22, 0xfe, 0x4a, 0x6c, 0xf6, 0x39, 0xbf, + 0xef, 0xe8, 0x3b, 0x96, 0xf1, 0x36, 0xa3, 0xbc, 0xc7, 0x28, 0x0f, 0x93, 0x09, 0x15, 0xbd, 0x80, + 0x27, 0x57, 0x21, 0xb3, 0x67, 0x29, 0x9f, 0x73, 0x63, 0x2b, 0xe0, 0xc1, 0x75, 0xca, 0xfd, 0x60, + 0x6a, 0x33, 0xca, 0xed, 0x0a, 0xda, 0xf9, 0xcf, 0x38, 0xe3, 0x80, 0xf4, 0xca, 0x93, 0xa4, 0xad, + 0x47, 0x84, 0x9b, 0x7d, 0x18, 0x37, 0x06, 0xf8, 0x77, 0xe6, 0x78, 0x8c, 0x72, 0x96, 0xfa, 0xb3, + 0x69, 0x6e, 0xa2, 0x0e, 0xea, 0xb6, 0x9d, 0x7d, 0xfb, 0xf3, 0x3c, 0x7b, 0xe8, 0x9c, 0x56, 0xa8, + 0x0c, 0x70, 0xdb, 0xd9, 0xab, 0x64, 0x9c, 0xe1, 0xb6, 0x4c, 0x8b, 0xe9, 0x3c, 0xcd, 0xcd, 0x1a, + 0x84, 0x75, 0xbf, 0x0d, 0x03, 0x52, 0x65, 0xe1, 0x6c, 0xa3, 0x1c, 0xd5, 0x17, 0xf7, 0x7b, 0xc8, + 0xba, 0xc5, 0xfa, 0xd0, 0x51, 0xab, 0xee, 0xe2, 0x56, 0x1c, 0x26, 0x5e, 0x44, 0x6f, 0x68, 0x04, + 0x7b, 0x36, 0x5c, 0x3d, 0x0e, 0x93, 0x41, 0x79, 0x07, 0xd3, 0x17, 0xca, 0xac, 0x29, 0xd3, 0x17, + 0x1b, 0x13, 0x0c, 0x2f, 0xe6, 0x13, 0xf3, 0x97, 0x34, 0x41, 0x38, 0xe7, 0x93, 0x6a, 0x32, 0xa0, + 0x51, 0x94, 0x99, 0xf5, 0xcd, 0x64, 0xbf, 0xbc, 0x5b, 0x2e, 0xfe, 0xfb, 0xa1, 0xb2, 0x71, 0x8c, + 0x5b, 0x99, 0xe3, 0xc9, 0xf7, 0x57, 0x0f, 0xd6, 0xf9, 0xba, 0xa3, 0xea, 0xa6, 0x67, 0xea, 0x64, + 0x3d, 0x20, 0xfc, 0xe7, 0x7d, 0x75, 0xe3, 0x1f, 0x6e, 0x94, 0xe5, 0x04, 0xe4, 0x21, 0xb7, 0x1e, + 0x87, 0xc9, 0x08, 0x44, 0x5f, 0x78, 0x02, 0x0a, 0x95, 0xa2, 0x2f, 0x46, 0x15, 0x99, 0x43, 0x11, + 0x49, 0x8e, 0x2b, 0x32, 0x87, 0x02, 0x92, 0x1c, 0xbf, 0xdd, 0xb3, 0xf1, 0xd3, 0x3d, 0x4f, 0x0e, + 0x16, 0xcf, 0x44, 0x5b, 0x14, 0x04, 0x2d, 0x0b, 0x82, 0x56, 0x05, 0x41, 0x4f, 0x05, 0x41, 0x77, + 0x6b, 0xa2, 0x2d, 0xd7, 0x44, 0x5b, 0xad, 0x89, 0x76, 0xa1, 0x57, 0xe3, 0x97, 0x4d, 0xf8, 0x58, + 0x87, 0x2f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x82, 0x60, 0x2f, 0x36, 0xa3, 0x02, 0x00, 0x00, +} diff --git a/pkg/geo/geoindex/config.proto b/pkg/geo/geoindex/config.proto new file mode 100644 index 000000000000..59568646d023 --- /dev/null +++ b/pkg/geo/geoindex/config.proto @@ -0,0 +1,61 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +syntax = "proto3"; +package cockroach.geo.geoindex; +option go_package = "geoindex"; + +import "gogoproto/gogo.proto"; + +// Config is the information used to tune one instance of a geospatial index. +// Each SQL index will have its own config. +// +// At the moment, only one major indexing strategy is implemented (S2 cells). +message Config { + option (gogoproto.onlyone) = true; + S2GeographyConfig s2_geography = 1; + S2GeometryConfig s2_geometry = 2; +} + +// S2Config is the required information to tune one instance of an S2 cell +// backed geospatial index. For advanced users only -- the defaults should be +// good enough. +// +// TODO(sumeer): Based on experiments, reduce the knobs below by making the +// covering self-tuning. +message S2Config { + // MinLevel is the minimum cell level stored in the index. If left unset, it + // defaults to 0. + int32 min_level = 1; + // MaxLevel is the maximum cell level stored in the index. If left unset, it + // defaults to 30. + int32 max_level = 2; + // `MaxLevel-MinLevel` must be an exact multiple of LevelMod. If left unset, + // it defaults to 1. + int32 level_mod = 3; + // MaxCells is a soft hint for the maximum number of entries used to store a + // single geospatial object. If left unset, it defaults to 4. + int32 max_cells = 4; +} + +message S2GeographyConfig { + S2Config s2_config = 1; +} + +message S2GeometryConfig { + // The rectangle bounds of the plane that will be efficiently indexed. Shapes + // should rarely exceed these bounds. + double min_x = 1; + double max_x = 2; + double min_y = 3; + double max_y = 4; + + S2Config s2_config = 5; +} diff --git a/pkg/geo/geoindex/index.go b/pkg/geo/geoindex/index.go new file mode 100644 index 000000000000..0a010ca987e6 --- /dev/null +++ b/pkg/geo/geoindex/index.go @@ -0,0 +1,528 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geoindex + +import ( + "context" + "fmt" + "math" + "strings" + + "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/golang/geo/s2" +) + +// Interfaces for accelerating certain spatial operations, by allowing the +// caller to use an externally stored index. +// +// An index interface has methods that specify what to write or read from the +// stored index. It is the caller's responsibility to do the actual writes and +// reads. Index keys are represented using uint64 which implies that the index +// is transforming the 2D space (spherical or planar) to 1D space, say using a +// space-filling curve. The index can return false positives so query +// execution must use an exact filtering step after using the index. The +// current implementations use the S2 geometry library. +// +// The externally stored index must support key-value semantics and range +// queries over keys. The return values from Covers/CoveredBy/Intersects are +// specialized to the computation that needs to be performed: +// - Intersects needs to union (a) all the ranges corresponding to subtrees +// rooted at the covering of g, (b) all the parent nodes of the covering +// of g. The individual entries in (b) are representable as ranges of +// length 1. All of this is represented as UnionKeySpans. Covers, which +// is the shape that g covers, currently delegates to Interects so +// returns the same. +// +// - CoveredBy, which are the shapes that g is covered-by, needs to compute +// on the paths from each covering node to the root. For example, consider +// a quad-tree approach to dividing the space, where the nodes/cells +// covering g are c53, c61, c64. The corresponding ancestor path sets are +// {c53, c13, c3, c0}, {c61, c15, c3, c0}, {c64, c15, c3, c0}. Let I(c53) +// represent the index entries for c53. Then, +// I(c53) \union I(c13) \union I(c3) \union I(c0) +// represents all the shapes that cover cell c53. Similar union expressions +// can be constructed for all these paths. The computation needs to +// intersect these unions since all these cells need to be covered by a +// shape that covers g. One can extract the common sub-expressions to give +// I(c0) \union I(c3) \union +// ((I(c13) \union I(c53)) \intersection +// (I(c15) \union (I(c61) \intersection I(c64))) +// CoveredBy returns this factored expression in Reverse Polish notation. + +// GeographyIndex is an index over the unit sphere. +type GeographyIndex interface { + // InvertedIndexKeys returns the keys to store this object under when adding + // it to the index. + InvertedIndexKeys(c context.Context, g *geo.Geography) ([]Key, error) + + // Acceleration for topological relationships (see + // https://postgis.net/docs/reference.html#Spatial_Relationships). Distance + // relationships can be accelerated by adjusting g before calling these + // functions. Bounding box operators are not accelerated since we do not + // index the bounding box -- bounding box queries are an implementation + // detail of a particular indexing approach in PostGIS and are not part of + // the OGC or SQL/MM specs. + + // Covers returns the index spans to read and union for the relationship + // ST_Covers(g, x), where x are the indexed geometries. + Covers(c context.Context, g *geo.Geography) (UnionKeySpans, error) + + // CoveredBy returns the index entries to read and the expression to compute + // for ST_CoveredBy(g, x), where x are the indexed geometries. + CoveredBy(c context.Context, g *geo.Geography) (RPKeyExpr, error) + + // Intersects returns the index spans to read and union for the relationship + // ST_Intersects(g, x), where x are the indexed geometries. + Intersects(c context.Context, g *geo.Geography) (UnionKeySpans, error) + + // testingInnerCovering returns an inner covering of g. + testingInnerCovering(g *geo.Geography) s2.CellUnion +} + +// GeometryIndex is an index over 2D cartesian coordinates. +type GeometryIndex interface { + // InvertedIndexKeys returns the keys to store this object under when adding + // it to the index. + InvertedIndexKeys(c context.Context, g *geo.Geometry) ([]Key, error) + + // Acceleration for topological relationships (see + // https://postgis.net/docs/reference.html#Spatial_Relationships). Distance + // relationships can be accelerated by adjusting g before calling these + // functions. Bounding box operators are not accelerated since we do not + // index the bounding box -- bounding box queries are an implementation + // detail of a particular indexing approach in PostGIS and are not part of + // the OGC or SQL/MM specs. + + // Covers returns the index spans to read and union for the relationship + // ST_Covers(g, x), where x are the indexed geometries. + Covers(c context.Context, g *geo.Geometry) (UnionKeySpans, error) + + // CoveredBy returns the index entries to read and the expression to compute + // for ST_CoveredBy(g, x), where x are the indexed geometries. + CoveredBy(c context.Context, g *geo.Geometry) (RPKeyExpr, error) + + // Intersects returns the index spans to read and union for the relationship + // ST_Intersects(g, x), where x are the indexed geometries. + Intersects(c context.Context, g *geo.Geometry) (UnionKeySpans, error) + + // testingInnerCovering returns an inner covering of g. + testingInnerCovering(g *geo.Geometry) s2.CellUnion +} + +// Key is one entry under which a geospatial shape is stored on behalf of an +// Index. The index is of the form (Key, Primary Key). +type Key uint64 + +// rpExprElement implements the RPExprElement interface. +func (k Key) rpExprElement() {} + +func (k Key) String() string { + c := s2.CellID(k) + if !c.IsValid() { + return "spilled" + } + var b strings.Builder + b.WriteByte('F') + b.WriteByte("012345"[c.Face()]) + fmt.Fprintf(&b, "/L%d/", c.Level()) + for level := 1; level <= c.Level(); level++ { + b.WriteByte("0123"[c.ChildPosition(level)]) + } + return b.String() +} + +// KeySpan represents a range of Keys. +type KeySpan struct { + // Both Start and End are inclusive, i.e., [Start, End]. + Start, End Key +} + +// UnionKeySpans is the set of indexed spans to retrieve and combine via set +// union. The spans are guaranteed to be non-overlapping. Duplicate primary +// keys will not be retrieved by any individual key, but they may be present +// if more than one key is retrieved (including duplicates in a single span +// where End - Start > 1). +type UnionKeySpans []KeySpan + +func (s UnionKeySpans) String() string { + return s.toString(math.MaxInt32) +} + +func (s UnionKeySpans) toString(wrap int) string { + b := newStringBuilderWithWrap(&strings.Builder{}, wrap) + for i, span := range s { + if span.Start == span.End { + fmt.Fprintf(b, "%s", span.Start) + } else { + fmt.Fprintf(b, "[%s, %s]", span.Start, span.End) + } + if i != len(s)-1 { + b.WriteString(", ") + } + b.tryWrap() + } + return b.String() +} + +// RPExprElement is an element in the Reverse Polish notation expression. +// It is implemented by Key and RPSetOperator. +type RPExprElement interface { + rpExprElement() +} + +// RPSetOperator is a set operator in the Reverse Polish notation expression. +type RPSetOperator int + +const ( + // RPSetUnion is the union operator. + RPSetUnion RPSetOperator = iota + 1 + + // RPSetIntersection is the intersection operator. + RPSetIntersection +) + +// rpExprElement implements the RPExprElement interface. +func (o RPSetOperator) rpExprElement() {} + +// RPKeyExpr is an expression to evaluate over primary keys retrieved for +// index keys. If we view each index key as a posting list of primary keys, +// the expression involves union and intersection over the sets represented by +// each posting list. For S2, this expression represents an intersection of +// ancestors of different keys (cell ids) and is likely to contain many common +// keys. This special structure allows us to efficiently and easily eliminate +// common sub-expressions, hence the interface presents the factored +// expression. The expression is represented in Reverse Polish notation. +type RPKeyExpr []RPExprElement + +func (x RPKeyExpr) String() string { + var elements []string + for _, e := range x { + switch elem := e.(type) { + case Key: + elements = append(elements, elem.String()) + case RPSetOperator: + switch elem { + case RPSetUnion: + elements = append(elements, `\U`) + case RPSetIntersection: + elements = append(elements, `\I`) + } + } + } + return strings.Join(elements, " ") +} + +// Helper functions for index implementations that use the S2 geometry +// library. + +func covering(rc *s2.RegionCoverer, regions []s2.Region) s2.CellUnion { + // TODO(sumeer): Add a max cells constraint for the whole covering, + // to respect the index configuration. + var u s2.CellUnion + for _, r := range regions { + u = append(u, rc.Covering(r)...) + } + // Ensure the cells are non-overlapping. + u.Normalize() + return u +} + +// The "inner covering", for shape s, represented by the regions parameter, is +// used to find shapes that contain shape s. A regular covering that includes +// a cell c not completely covered by shape s could result in false negatives, +// since shape x that covers shape s could use a finer cell covering (using +// cells below c). For example, consider a portion of the cell quad-tree +// below: +// +// c0 +// | +// c3 +// | +// +---+---+ +// | | +// c13 c15 +// | | +// c53 +--+--+ +// | | +// c61 c64 +// +// Shape s could have a regular covering c15, c53, where c15 has 4 child cells +// c61..c64, and shape s only intersects wit c61, c64. A different shape x +// that covers shape s may have a covering c61, c64, c53. That is, it has used +// the finer cells c61, c64. If we used both regular coverings it is hard to +// know that x covers g. Hence, we compute the "inner covering" of g (defined +// below). +// +// The interior covering of shape s includes only cells covered by s. This is +// computed by RegionCoverer.InteriorCovering() and is intuitively what we +// need. But the interior covering is naturally empty for points and lines +// (and can be empty for polygons too), and an empty covering is not useful +// for constraining index lookups. We observe that leaf cells that intersect +// shape s can be used in the covering, since the covering of shape x must +// also cover these cells. This allows us to compute non-empty coverings for +// all shapes. Since this is not technically the interior covering, we use the +// term "inner covering". +func innerCovering(rc *s2.RegionCoverer, regions []s2.Region) s2.CellUnion { + var u s2.CellUnion + for _, r := range regions { + switch region := r.(type) { + case s2.Point: + cellID := s2.CellFromPoint(region).ID() + if !cellID.IsLeaf() { + panic("bug in S2") + } + u = append(u, cellID) + case *s2.Polyline: + // TODO(sumeer): for long lines could also pick some intermediate + // points along the line. Decide based on experiments. + for _, p := range *region { + cellID := s2.CellFromPoint(p).ID() + u = append(u, cellID) + } + case *s2.Polygon: + // Iterate over all exterior points + if region.NumLoops() > 0 { + loop := region.Loop(0) + for _, p := range loop.Vertices() { + cellID := s2.CellFromPoint(p).ID() + u = append(u, cellID) + } + // Arbitrary threshold value. This is to avoid computing an expensive + // region covering for regions with small area. + // TODO(sumeer): Improve this heuristic: + // - Area() may be expensive. + // - For large area regions, put an upper bound on the + // level used for cells. + // Decide based on experiments. + const smallPolygonLevelThreshold = 25 + if region.Area() > s2.AvgAreaMetric.Value(smallPolygonLevelThreshold) { + u = append(u, rc.InteriorCovering(region)...) + } + } + default: + panic("bug: code should not be producing unhandled Region type") + } + } + // Ensure the cells are non-overlapping. + u.Normalize() + + // TODO(sumeer): if the number of cells is too many, make the list sparser. + // u[len(u)-1] - u[0] / len(u) is the mean distance between cells. Pick a + // target distance based on the goal to reduce to k cells: target_distance + // := mean_distance * k / len(u) Then iterate over u and for every sequence + // of cells that are within target_distance, replace by median cell or by + // largest cell. Decide based on experiments. + + return u +} + +// ancestorCells returns the set of cells containing these cells, not +// including the given cells. +// +// TODO(sumeer): use the MinLevel and LevelMod of the RegionCoverer used +// for the index to constrain the ancestors set. +func ancestorCells(cells []s2.CellID) []s2.CellID { + var ancestors []s2.CellID + var seen map[s2.CellID]struct{} + if len(cells) > 1 { + seen = make(map[s2.CellID]struct{}) + } + for _, c := range cells { + for l := c.Level() - 1; l >= 0; l-- { + p := c.Parent(l) + if seen != nil { + if _, ok := seen[p]; ok { + break + } + seen[p] = struct{}{} + } + ancestors = append(ancestors, p) + } + } + return ancestors +} + +// Helper for InvertedIndexKeys. +func invertedIndexKeys(_ context.Context, rc *s2.RegionCoverer, r []s2.Region) []Key { + covering := covering(rc, r) + keys := make([]Key, len(covering)) + for i, cid := range covering { + keys[i] = Key(cid) + } + return keys +} + +// TODO(sumeer): examine RegionCoverer carefully to see if we can strengthen +// the covering invariant, which would increase the efficiency of covers() and +// remove the need for testingInnerCovering(). +// +// Helper for Covers. +func covers(c context.Context, rc *s2.RegionCoverer, r []s2.Region) UnionKeySpans { + // We use intersects since geometries covered by r may have been indexed + // using cells that are ancestors of the covering of r. We could avoid + // reading ancestors if we had a stronger covering invariant, such as by + // indexing inner coverings. + return intersects(c, rc, r) +} + +// Helper for Intersects. Returns spans in sorted order for convenience of +// scans. +func intersects(_ context.Context, rc *s2.RegionCoverer, r []s2.Region) UnionKeySpans { + covering := covering(rc, r) + querySpans := make([]KeySpan, len(covering)) + for i, cid := range covering { + querySpans[i] = KeySpan{Start: Key(cid.RangeMin()), End: Key(cid.RangeMax())} + } + for _, cid := range ancestorCells(covering) { + querySpans = append(querySpans, KeySpan{Start: Key(cid), End: Key(cid)}) + } + return querySpans +} + +// Helper for CoveredBy. +func coveredBy(_ context.Context, rc *s2.RegionCoverer, r []s2.Region) RPKeyExpr { + covering := innerCovering(rc, r) + ancestors := ancestorCells(covering) + + // The covering is normalized so no 2 cells are such that one is an ancestor + // of another. Arrange these cells and their ancestors in a quad-tree. Any cell + // with more than one child needs to be unioned with the intersection of the + // expressions corresponding to each child. See the detailed comment in + // generateRPExprForTree(). + + // It is sufficient to represent the tree(s) using presentCells since the ids + // of all possible 4 children of a cell can be computed and checked for + // presence in the map. + presentCells := make(map[s2.CellID]struct{}, len(covering)+len(ancestors)) + for _, c := range covering { + presentCells[c] = struct{}{} + } + for _, c := range ancestors { + presentCells[c] = struct{}{} + } + + // Construct the reverse polish expression. Note that there are up to 6 + // trees corresponding to the 6 faces in S2. The expressions for the + // trees need to be intersected with each other. + expr := make([]RPExprElement, 0, len(presentCells)*2) + numFaces := 0 + for face := 0; face < 6; face++ { + rootID := s2.CellIDFromFace(face) + if _, ok := presentCells[rootID]; !ok { + continue + } + expr = append(expr, generateRPExprForTree(rootID, presentCells)...) + numFaces++ + if numFaces > 1 { + expr = append(expr, RPSetIntersection) + } + } + return expr +} + +// The quad-trees stored in presentCells together represent a set expression. +// This expression specifies: +// - the path for each leaf to the root of that quad-tree. The index entries +// on each such path represent the shapes that cover that leaf. Hence these +// index entries for a single path need to be unioned to give the shapes +// that cover the leaf. +// - The full expression specifies the shapes that cover all the leaves, so +// the union expressions for the paths must be intersected with each other. +// +// Reusing an example from earlier in this file, say the quad-tree is: +// c0 +// | +// c3 +// | +// +---+---+ +// | | +// c13 c15 +// | | +// c53 +--+--+ +// | | +// c61 c64 +// +// This tree represents the following expression (where I(c) are the index +// entries stored at cell c): +// (I(c64) \union I(c15) \union I(c3) \union I(c0)) \intersection +// (I(c61) \union I(c15) \union I(c3) \union I(c0)) \intersection +// (I(c53) \union I(c13) \union I(c3) \union I(c0)) +// In this example all the union sub-expressions have the same number of terms +// but that does not need to be true. +// +// The above expression can be factored to eliminate repetition of the +// same cell. The factored expression for this example is: +// I(c0) \union I(c3) \union +// ((I(c13) \union I(c53)) \intersection +// (I(c15) \union (I(c61) \intersection I(c64))) +// +// This function generates this factored expression represented in reverse +// polish notation. +// +// One can generate the factored expression in reverse polish notation using +// a post-order traversal of this tree: +// Step A. append the expression for the subtree rooted at c3 +// Step B. append c0 and the union operator +// For Step A: +// - append the expression for the subtree rooted at c13 +// - append the expression for the subtree rooted at c15 +// - append the intersection operator +// - append c13 +// - append the union operator +func generateRPExprForTree(rootID s2.CellID, presentCells map[s2.CellID]struct{}) []RPExprElement { + expr := []RPExprElement{Key(rootID)} + if rootID.IsLeaf() { + return expr + } + numChildren := 0 + for _, childCellID := range rootID.Children() { + if _, ok := presentCells[childCellID]; !ok { + continue + } + expr = append(expr, generateRPExprForTree(childCellID, presentCells)...) + numChildren++ + if numChildren > 1 { + expr = append(expr, RPSetIntersection) + } + } + if numChildren > 0 { + expr = append(expr, RPSetUnion) + } + return expr +} + +// stringBuilderWithWrap is a strings.Builder that approximately wraps at a +// certain number of characters. Newline characters should only be +// written using tryWrap and doWrap. +type stringBuilderWithWrap struct { + *strings.Builder + wrap int + lastWrap int +} + +func newStringBuilderWithWrap(b *strings.Builder, wrap int) *stringBuilderWithWrap { + return &stringBuilderWithWrap{ + Builder: b, + wrap: wrap, + lastWrap: b.Len(), + } +} + +func (b *stringBuilderWithWrap) tryWrap() { + if b.Len()-b.lastWrap > b.wrap { + b.doWrap() + } +} + +func (b *stringBuilderWithWrap) doWrap() { + fmt.Fprintln(b) + b.lastWrap = b.Len() +} diff --git a/pkg/geo/geoindex/s2_geography_index.go b/pkg/geo/geoindex/s2_geography_index.go new file mode 100644 index 000000000000..ca8495c00b0c --- /dev/null +++ b/pkg/geo/geoindex/s2_geography_index.go @@ -0,0 +1,87 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geoindex + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/golang/geo/s2" +) + +// s2GeographyIndex is an implementation of GeographyIndex that uses the S2 geometry +// library. +type s2GeographyIndex struct { + rc *s2.RegionCoverer +} + +var _ GeographyIndex = (*s2GeographyIndex)(nil) + +// NewS2GeographyIndex returns an index with the given configuration. The +// configuration of an index cannot be changed without rewriting the index +// since deletes could miss some index entries. Currently, reads could use a +// different configuration, but that is subject to change if we manage to +// strengthen the covering invariants (see the todo in covers() in index.go). +func NewS2GeographyIndex(cfg S2GeographyConfig) GeographyIndex { + // TODO(sumeer): Sanity check cfg. + return &s2GeographyIndex{ + rc: &s2.RegionCoverer{ + MinLevel: int(cfg.S2Config.MinLevel), + MaxLevel: int(cfg.S2Config.MaxLevel), + LevelMod: int(cfg.S2Config.LevelMod), + MaxCells: int(cfg.S2Config.MaxCells), + }, + } +} + +// InvertedIndexKeys implements the GeographyIndex interface. +func (i *s2GeographyIndex) InvertedIndexKeys(c context.Context, g *geo.Geography) ([]Key, error) { + r, err := g.AsS2() + if err != nil { + return nil, err + } + return invertedIndexKeys(c, i.rc, r), nil +} + +// Covers implements the GeographyIndex interface. +func (i *s2GeographyIndex) Covers(c context.Context, g *geo.Geography) (UnionKeySpans, error) { + r, err := g.AsS2() + if err != nil { + return nil, err + } + return covers(c, i.rc, r), nil +} + +// CoveredBy implements the GeographyIndex interface. +func (i *s2GeographyIndex) CoveredBy(c context.Context, g *geo.Geography) (RPKeyExpr, error) { + r, err := g.AsS2() + if err != nil { + return nil, err + } + return coveredBy(c, i.rc, r), nil +} + +// Intersects implements the GeographyIndex interface. +func (i *s2GeographyIndex) Intersects(c context.Context, g *geo.Geography) (UnionKeySpans, error) { + r, err := g.AsS2() + if err != nil { + return nil, err + } + return intersects(c, i.rc, r), nil +} + +func (i *s2GeographyIndex) testingInnerCovering(g *geo.Geography) s2.CellUnion { + r, _ := g.AsS2() + if r == nil { + return nil + } + return innerCovering(i.rc, r) +} diff --git a/pkg/geo/geoindex/s2_geography_index_test.go b/pkg/geo/geoindex/s2_geography_index_test.go new file mode 100644 index 000000000000..bd52b152b81c --- /dev/null +++ b/pkg/geo/geoindex/s2_geography_index_test.go @@ -0,0 +1,62 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geoindex + +import ( + "context" + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/datadriven" +) + +// TODO(sumeer): applies to this and the geometry test. The current test is +// verbose in printing out the actual expressions, which are useful as a +// sanity check, but hard to validate. +// - Add datadriven cases that test relationships between shapes. +// - Add randomized tests + +func TestS2GeographyIndexBasic(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + var index GeographyIndex + shapes := make(map[string]*geo.Geography) + datadriven.RunTest(t, "testdata/s2_geography", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "init": + cfg := s2Config(t, d) + index = NewS2GeographyIndex(S2GeographyConfig{S2Config: &cfg}) + return "" + case "geometry": + g, err := geo.ParseGeography(d.Input) + if err != nil { + return err.Error() + } + shapes[nameArg(t, d)] = g + return "" + case "index-keys": + return keysToString(index.InvertedIndexKeys(ctx, shapes[nameArg(t, d)])) + case "inner-covering": + return cellUnionToString(index.testingInnerCovering(shapes[nameArg(t, d)])) + case "covers": + return spansToString(index.Covers(ctx, shapes[nameArg(t, d)])) + case "intersects": + return spansToString(index.Intersects(ctx, shapes[nameArg(t, d)])) + case "covered-by": + return checkExprAndToString(index.CoveredBy(ctx, shapes[nameArg(t, d)])) + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} diff --git a/pkg/geo/geoindex/s2_geometry_index.go b/pkg/geo/geoindex/s2_geometry_index.go new file mode 100644 index 000000000000..8eb851a4adaf --- /dev/null +++ b/pkg/geo/geoindex/s2_geometry_index.go @@ -0,0 +1,312 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geoindex + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/cockroachdb/cockroach/pkg/geo/geos" + "github.com/cockroachdb/errors" + "github.com/golang/geo/r3" + "github.com/golang/geo/s2" + "github.com/twpayne/go-geom" +) + +// s2GeometryIndex is an implementation of GeometryIndex that uses the S2 geometry +// library. +type s2GeometryIndex struct { + rc *s2.RegionCoverer + minX, maxX, minY, maxY float64 +} + +var _ GeometryIndex = (*s2GeometryIndex)(nil) + +// NewS2GeometryIndex returns an index with the given configuration. All reads and +// writes on this index must use the same config. Writes must use the same +// config to correctly process deletions. Reads must use the same config since +// the bounds affect when a read needs to look at the exceedsBoundsCellID. +func NewS2GeometryIndex(cfg S2GeometryConfig) GeometryIndex { + // TODO(sumeer): Sanity check cfg. + return &s2GeometryIndex{ + rc: &s2.RegionCoverer{ + MinLevel: int(cfg.S2Config.MinLevel), + MaxLevel: int(cfg.S2Config.MaxLevel), + LevelMod: int(cfg.S2Config.LevelMod), + MaxCells: int(cfg.S2Config.MaxCells), + }, + minX: cfg.MinX, + maxX: cfg.MaxX, + minY: cfg.MinY, + maxY: cfg.MaxY, + } +} + +// A cell id unused by S2. We use it to index geometries that exceed the +// configured bounds. +const exceedsBoundsCellID = s2.CellID(^uint64(0)) + +// TODO(sumeer): adjust code to handle precision issues with floating point +// arithmetic. + +// InvertedIndexKeys implements the GeometryIndex interface. +func (s *s2GeometryIndex) InvertedIndexKeys(c context.Context, g *geo.Geometry) ([]Key, error) { + // If the geometry exceeds the bounds, we index the clipped geometry in + // addition to the special cell, so that queries for geometries that don't + // exceed the bounds don't need to query the special cell (which would + // become a hotspot in the key space). + gt, clipped, err := s.convertToGeomTAndTryClip(g) + if err != nil { + return nil, err + } + var keys []Key + if gt != nil { + r := s.s2RegionsFromPlanarGeom(gt) + keys = invertedIndexKeys(c, s.rc, r) + } + if clipped { + keys = append(keys, Key(exceedsBoundsCellID)) + } + return keys, nil +} + +// Covers implements the GeometryIndex interface. +func (s *s2GeometryIndex) Covers(c context.Context, g *geo.Geometry) (UnionKeySpans, error) { + return s.Intersects(c, g) +} + +// CoveredBy implements the GeometryIndex interface. +func (s *s2GeometryIndex) CoveredBy(c context.Context, g *geo.Geometry) (RPKeyExpr, error) { + // If the geometry exceeds the bounds, we use the clipped geometry to + // restrict the search within the bounds. + gt, clipped, err := s.convertToGeomTAndTryClip(g) + if err != nil { + return nil, err + } + var expr RPKeyExpr + if gt != nil { + r := s.s2RegionsFromPlanarGeom(gt) + expr = coveredBy(c, s.rc, r) + } + if clipped { + // Intersect with the shapes that exceed the bounds. + expr = append(expr, Key(exceedsBoundsCellID)) + if len(expr) > 1 { + expr = append(expr, RPSetIntersection) + } + } + return expr, nil +} + +// Intersects implements the GeometryIndex interface. +func (s *s2GeometryIndex) Intersects(c context.Context, g *geo.Geometry) (UnionKeySpans, error) { + // If the geometry exceeds the bounds, we use the clipped geometry to + // restrict the search within the bounds. + gt, clipped, err := s.convertToGeomTAndTryClip(g) + if err != nil { + return nil, err + } + var spans UnionKeySpans + if gt != nil { + r := s.s2RegionsFromPlanarGeom(gt) + spans = intersects(c, s.rc, r) + } + if clipped { + // And lookup all shapes that exceed the bounds. + spans = append(spans, KeySpan{Start: Key(exceedsBoundsCellID), End: Key(exceedsBoundsCellID)}) + } + return spans, nil +} + +// Converts to geom.T and clips to the rectangle bounds of the index. +func (s *s2GeometryIndex) convertToGeomTAndTryClip(g *geo.Geometry) (geom.T, bool, error) { + gt, err := g.AsGeomT() + if err != nil { + return nil, false, err + } + clipped := false + if s.geomExceedsBounds(gt) { + clipped = true + clippedEWKB, err := + geos.ClipEWKBByRect(g.EWKB(), s.minX, s.minY, s.maxX, s.maxY) + if err != nil { + return nil, false, err + } + gt = nil + if clippedEWKB != nil { + g = geo.NewGeometry(clippedEWKB) + if g == nil { + return nil, false, errors.Errorf("internal error: clippedWKB cannot be parsed") + } + gt, err = g.AsGeomT() + if err != nil { + return nil, false, err + } + } + } + return gt, clipped, nil +} + +// Returns true if the point represented by (x, y) exceeds the rectangle +// bounds of the index. +func (s *s2GeometryIndex) xyExceedsBounds(x float64, y float64) bool { + if x < s.minX || x > s.maxX { + return true + } + if y < s.minY || y > s.maxY { + return true + } + return false +} + +// Returns true if g exceeds the rectangle bounds of the index. +func (s *s2GeometryIndex) geomExceedsBounds(g geom.T) bool { + switch repr := g.(type) { + case *geom.Point: + return s.xyExceedsBounds(repr.X(), repr.Y()) + case *geom.LineString: + for i := 0; i < repr.NumCoords(); i++ { + p := repr.Coord(i) + if s.xyExceedsBounds(p.X(), p.Y()) { + return true + } + } + case *geom.Polygon: + if repr.NumLinearRings() > 0 { + lr := repr.LinearRing(0) + for i := 0; i < lr.NumCoords(); i++ { + if s.xyExceedsBounds(lr.Coord(i).X(), lr.Coord(i).Y()) { + return true + } + } + } + case *geom.GeometryCollection: + for _, geom := range repr.Geoms() { + if s.geomExceedsBounds(geom) { + return true + } + } + case *geom.MultiPoint: + for i := 0; i < repr.NumPoints(); i++ { + if s.geomExceedsBounds(repr.Point(i)) { + return true + } + } + case *geom.MultiLineString: + for i := 0; i < repr.NumLineStrings(); i++ { + if s.geomExceedsBounds(repr.LineString(i)) { + return true + } + } + case *geom.MultiPolygon: + for i := 0; i < repr.NumPolygons(); i++ { + if s.geomExceedsBounds(repr.Polygon(i)) { + return true + } + } + } + return false +} + +// stToUV() and face0UVToXYZPoint() are adapted from unexported methods in +// github.com/golang/geo/s2/stuv.go + +// stToUV converts an s or t value to the corresponding u or v value. +// This is a non-linear transformation from [-1,1] to [-1,1] that +// attempts to make the cell sizes more uniform. +// This uses what the C++ version calls 'the quadratic transform'. +func stToUV(s float64) float64 { + if s >= 0.5 { + return (1 / 3.) * (4*s*s - 1) + } + return (1 / 3.) * (1 - 4*(1-s)*(1-s)) +} + +// Specialized version of faceUVToXYZ() for face 0 +func face0UVToXYZPoint(u, v float64) s2.Point { + return s2.Point{Vector: r3.Vector{X: 1, Y: u, Z: v}} +} + +func (s *s2GeometryIndex) planarPointToS2Point(x float64, y float64) s2.Point { + ss := (x - s.minX) / (s.maxX - s.minX) + tt := (y - s.minY) / (s.maxY - s.minY) + u := stToUV(ss) + v := stToUV(tt) + return face0UVToXYZPoint(u, v) +} + +// TODO(sumeer): this is similar to s2RegionsFromGeom() but needs to do +// a different point conversion. If these functions do not diverge further, +// and turn out not to be performance critical, merge the two implementations. +func (s *s2GeometryIndex) s2RegionsFromPlanarGeom(geomRepr geom.T) []s2.Region { + var regions []s2.Region + switch repr := geomRepr.(type) { + case *geom.Point: + regions = []s2.Region{ + s.planarPointToS2Point(repr.X(), repr.Y()), + } + case *geom.LineString: + points := make([]s2.Point, repr.NumCoords()) + for i := 0; i < repr.NumCoords(); i++ { + p := repr.Coord(i) + points[i] = s.planarPointToS2Point(p.X(), p.Y()) + } + pl := s2.Polyline(points) + regions = []s2.Region{&pl} + case *geom.Polygon: + loops := make([]*s2.Loop, repr.NumLinearRings()) + // The first ring is a "shell", which is represented as CCW. + // Following rings are "holes", which are CW. For S2, they are CCW and automatically figured out. + for ringIdx := 0; ringIdx < repr.NumLinearRings(); ringIdx++ { + linearRing := repr.LinearRing(ringIdx) + points := make([]s2.Point, linearRing.NumCoords()) + for pointIdx := 0; pointIdx < linearRing.NumCoords(); pointIdx++ { + p := linearRing.Coord(pointIdx) + pt := s.planarPointToS2Point(p.X(), p.Y()) + if ringIdx == 0 { + points[pointIdx] = pt + } else { + points[len(points)-pointIdx-1] = pt + } + } + loops[ringIdx] = s2.LoopFromPoints(points) + } + regions = []s2.Region{ + s2.PolygonFromLoops(loops), + } + case *geom.GeometryCollection: + for _, geom := range repr.Geoms() { + regions = append(regions, s.s2RegionsFromPlanarGeom(geom)...) + } + case *geom.MultiPoint: + for i := 0; i < repr.NumPoints(); i++ { + regions = append(regions, s.s2RegionsFromPlanarGeom(repr.Point(i))...) + } + case *geom.MultiLineString: + for i := 0; i < repr.NumLineStrings(); i++ { + regions = append(regions, s.s2RegionsFromPlanarGeom(repr.LineString(i))...) + } + case *geom.MultiPolygon: + for i := 0; i < repr.NumPolygons(); i++ { + regions = append(regions, s.s2RegionsFromPlanarGeom(repr.Polygon(i))...) + } + } + return regions +} + +func (s *s2GeometryIndex) testingInnerCovering(g *geo.Geometry) s2.CellUnion { + gt, _, err := s.convertToGeomTAndTryClip(g) + if err != nil || gt == nil { + return nil + } + r := s.s2RegionsFromPlanarGeom(gt) + return innerCovering(s.rc, r) +} diff --git a/pkg/geo/geoindex/s2_geometry_index_test.go b/pkg/geo/geoindex/s2_geometry_index_test.go new file mode 100644 index 000000000000..337c593dc505 --- /dev/null +++ b/pkg/geo/geoindex/s2_geometry_index_test.go @@ -0,0 +1,67 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geoindex + +import ( + "context" + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/datadriven" +) + +func TestS2GeometryIndexBasic(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + var index GeometryIndex + shapes := make(map[string]*geo.Geometry) + datadriven.RunTest(t, "testdata/s2_geometry", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "init": + cfg := s2Config(t, d) + var minX, minY, maxX, maxY int + d.ScanArgs(t, "minx", &minX) + d.ScanArgs(t, "miny", &minY) + d.ScanArgs(t, "maxx", &maxX) + d.ScanArgs(t, "maxy", &maxY) + index = NewS2GeometryIndex(S2GeometryConfig{ + MinX: float64(minX), + MinY: float64(minY), + MaxX: float64(maxX), + MaxY: float64(maxY), + S2Config: &cfg, + }) + return "" + case "geometry": + g, err := geo.ParseGeometry(d.Input) + if err != nil { + return err.Error() + } + shapes[nameArg(t, d)] = g + return "" + case "index-keys": + return keysToString(index.InvertedIndexKeys(ctx, shapes[nameArg(t, d)])) + case "inner-covering": + return cellUnionToString(index.testingInnerCovering(shapes[nameArg(t, d)])) + case "covers": + return spansToString(index.Covers(ctx, shapes[nameArg(t, d)])) + case "intersects": + return spansToString(index.Intersects(ctx, shapes[nameArg(t, d)])) + case "covered-by": + return checkExprAndToString(index.CoveredBy(ctx, shapes[nameArg(t, d)])) + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} diff --git a/pkg/geo/geoindex/testdata/s2_geography b/pkg/geo/geoindex/testdata/s2_geography new file mode 100644 index 000000000000..1b25045632d9 --- /dev/null +++ b/pkg/geo/geoindex/testdata/s2_geography @@ -0,0 +1,305 @@ +geometry name=point1 +MULTIPOINT((1.0 5.0), (3.0 4.0)) +---- + +geometry name=shortline1 +LINESTRING (-86.609955 32.703682,-86.609836 32.703659) +---- + +geometry name=nystate +POLYGON((-79.7624 42.5142,-79.0672 42.7783,-78.9313 42.8508,-78.9024 42.9061,-78.9313 42.9554,-78.9656 42.9584,-79.0219 42.9886,-79.0027 43.0568,-79.0727 43.0769,-79.0713 43.1220,-79.0302 43.1441,-79.0576 43.1801,-79.0604 43.2482,-79.0837 43.2812,-79.2004 43.4509,-78.6909 43.6311,-76.7958 43.6321,-76.4978 43.9987,-76.4388 44.0965,-76.3536 44.1349,-76.3124 44.1989,-76.2437 44.2049,-76.1655 44.2413,-76.1353 44.2973,-76.0474 44.3327,-75.9856 44.3553,-75.9196 44.3749,-75.8730 44.3994,-75.8221 44.4308,-75.8098 44.4740,-75.7288 44.5425,-75.5585 44.6647,-75.4088 44.7672,-75.3442 44.8101,-75.3058 44.8383,-75.2399 44.8676,-75.1204 44.9211,-74.9995 44.9609,-74.9899 44.9803,-74.9103 44.9852,-74.8856 45.0017,-74.8306 45.0153,-74.7633 45.0046,-74.7070 45.0027,-74.5642 45.0007,-74.1467 44.9920,-73.7306 45.0037,-73.4203 45.0085,-73.3430 45.0109,-73.3547 44.9874,-73.3379 44.9648,-73.3396 44.9160,-73.3739 44.8354,-73.3324 44.8013,-73.3667 44.7419,-73.3873 44.6139,-73.3736 44.5787,-73.3049 44.4916,-73.2953 44.4289,-73.3365 44.3513,-73.3118 44.2757,-73.3818 44.1980,-73.4079 44.1142,-73.4367 44.0511,-73.4065 44.0165,-73.4079 43.9375,-73.3749 43.8771,-73.3914 43.8167,-73.3557 43.7790,-73.4244 43.6460,-73.4340 43.5893,-73.3969 43.5655,-73.3818 43.6112,-73.3049 43.6271,-73.3063 43.5764,-73.2582 43.5675,-73.2445 43.5227,-73.2582 43.2582,-73.2733 42.9715,-73.2898 42.8004,-73.2664 42.7460,-73.3708 42.4630,-73.5095 42.0840,-73.4903 42.0218,-73.4999 41.8808,-73.5535 41.2953,-73.4834 41.2128,-73.7275 41.1011,-73.6644 41.0237,-73.6578 40.9851,-73.6132 40.9509,-72.4823 41.1869,-72.0950 41.2551,-71.9714 41.3005,-71.9193 41.3108,-71.7915 41.1838,-71.7929 41.1249,-71.7517 41.0462,-72.9465 40.6306,-73.4628 40.5368,-73.8885 40.4887,-73.9490 40.5232,-74.2271 40.4772,-74.2532 40.4861,-74.1866 40.6468,-74.0547 40.6556,-74.0156 40.7618,-73.9421 40.8699,-73.8934 40.9980,-73.9854 41.0343,-74.6274 41.3268,-74.7084 41.3583,-74.7101 41.3811,-74.8265 41.4386,-74.9913 41.5075,-75.0668 41.6000,-75.0366 41.6719,-75.0545 41.7672,-75.1945 41.8808,-75.3552 42.0013,-75.4266 42.0003,-77.0306 42.0013,-79.7250 41.9993,-79.7621 42.0003,-79.7621 42.1827,-79.7621 42.5146,-79.7624 42.5142)) +---- + +init minlevel=0 maxlevel=30 maxcells=2 +---- + +index-keys name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333 + +covers name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333, +F0/L29/20002321013311200010223231310, F0/L28/2000232101331120001022323131, +F0/L27/200023210133112000102232313, F0/L26/20002321013311200010223231, +F0/L25/2000232101331120001022323, F0/L24/200023210133112000102232, +F0/L23/20002321013311200010223, F0/L22/2000232101331120001022, +F0/L21/200023210133112000102, F0/L20/20002321013311200010, F0/L19/2000232101331120001, +F0/L18/200023210133112000, F0/L17/20002321013311200, F0/L16/2000232101331120, +F0/L15/200023210133112, F0/L14/20002321013311, F0/L13/2000232101331, +F0/L12/200023210133, F0/L11/20002321013, F0/L10/2000232101, F0/L9/200023210, +F0/L8/20002321, F0/L7/2000232, F0/L6/200023, F0/L5/20002, F0/L4/2000, +F0/L3/200, F0/L2/20, F0/L1/2, F0/L0/, F0/L29/20003322302133013000110100233, +F0/L28/2000332230213301300011010023, F0/L27/200033223021330130001101002, +F0/L26/20003322302133013000110100, F0/L25/2000332230213301300011010, +F0/L24/200033223021330130001101, F0/L23/20003322302133013000110, +F0/L22/2000332230213301300011, F0/L21/200033223021330130001, +F0/L20/20003322302133013000, F0/L19/2000332230213301300, F0/L18/200033223021330130, +F0/L17/20003322302133013, F0/L16/2000332230213301, F0/L15/200033223021330, +F0/L14/20003322302133, F0/L13/2000332230213, F0/L12/200033223021, +F0/L11/20003322302, F0/L10/2000332230, F0/L9/200033223, F0/L8/20003322, +F0/L7/2000332, F0/L6/200033, F0/L5/20003 + +intersects name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333, +F0/L29/20002321013311200010223231310, F0/L28/2000232101331120001022323131, +F0/L27/200023210133112000102232313, F0/L26/20002321013311200010223231, +F0/L25/2000232101331120001022323, F0/L24/200023210133112000102232, +F0/L23/20002321013311200010223, F0/L22/2000232101331120001022, +F0/L21/200023210133112000102, F0/L20/20002321013311200010, F0/L19/2000232101331120001, +F0/L18/200023210133112000, F0/L17/20002321013311200, F0/L16/2000232101331120, +F0/L15/200023210133112, F0/L14/20002321013311, F0/L13/2000232101331, +F0/L12/200023210133, F0/L11/20002321013, F0/L10/2000232101, F0/L9/200023210, +F0/L8/20002321, F0/L7/2000232, F0/L6/200023, F0/L5/20002, F0/L4/2000, +F0/L3/200, F0/L2/20, F0/L1/2, F0/L0/, F0/L29/20003322302133013000110100233, +F0/L28/2000332230213301300011010023, F0/L27/200033223021330130001101002, +F0/L26/20003322302133013000110100, F0/L25/2000332230213301300011010, +F0/L24/200033223021330130001101, F0/L23/20003322302133013000110, +F0/L22/2000332230213301300011, F0/L21/200033223021330130001, +F0/L20/20003322302133013000, F0/L19/2000332230213301300, F0/L18/200033223021330130, +F0/L17/20003322302133013, F0/L16/2000332230213301, F0/L15/200033223021330, +F0/L14/20003322302133, F0/L13/2000332230213, F0/L12/200033223021, +F0/L11/20003322302, F0/L10/2000332230, F0/L9/200033223, F0/L8/20003322, +F0/L7/2000332, F0/L6/200033, F0/L5/20003 + +inner-covering name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333 + +covered-by name=point1 +---- +0: F0/L30/200033223021330130001101002333, F0/L29/20003322302133013000110100233, +F0/L28/2000332230213301300011010023, F0/L27/200033223021330130001101002, +F0/L26/20003322302133013000110100, F0/L25/2000332230213301300011010, +F0/L24/200033223021330130001101, F0/L23/20003322302133013000110, +F0/L22/2000332230213301300011, F0/L21/200033223021330130001, +F0/L20/20003322302133013000, F0/L19/2000332230213301300, F0/L18/200033223021330130, +F0/L17/20003322302133013, F0/L16/2000332230213301, F0/L15/200033223021330, +F0/L14/20003322302133, F0/L13/2000332230213, F0/L12/200033223021, +F0/L11/20003322302, F0/L10/2000332230, F0/L9/200033223, F0/L8/20003322, +F0/L7/2000332, F0/L6/200033, F0/L5/20003, F0/L4/2000, F0/L3/200, +F0/L2/20, F0/L1/2, F0/L0/ +1: F0/L30/200023210133112000102232313101, F0/L29/20002321013311200010223231310, +F0/L28/2000232101331120001022323131, F0/L27/200023210133112000102232313, +F0/L26/20002321013311200010223231, F0/L25/2000232101331120001022323, +F0/L24/200023210133112000102232, F0/L23/20002321013311200010223, +F0/L22/2000232101331120001022, F0/L21/200023210133112000102, +F0/L20/20002321013311200010, F0/L19/2000232101331120001, F0/L18/200023210133112000, +F0/L17/20002321013311200, F0/L16/2000232101331120, F0/L15/200023210133112, +F0/L14/20002321013311, F0/L13/2000232101331, F0/L12/200023210133, +F0/L11/20002321013, F0/L10/2000232101, F0/L9/200023210, F0/L8/20002321, +F0/L7/2000232, F0/L6/200023, F0/L5/20002, F0/L4/2000, F0/L3/200, +F0/L2/20, F0/L1/2, F0/L0/ + +index-keys name=shortline1 +---- +F4/L19/1010131200020223230, F4/L21/101013120002022323312 + +covers name=shortline1 +---- +[F4/L30/101013120002022323000000000000, F4/L30/101013120002022323033333333333], +[F4/L30/101013120002022323312000000000, F4/L30/101013120002022323312333333333], +F4/L18/101013120002022323, F4/L17/10101312000202232, F4/L16/1010131200020223, +F4/L15/101013120002022, F4/L14/10101312000202, F4/L13/1010131200020, +F4/L12/101013120002, F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, +F4/L8/10101312, F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, +F4/L3/101, F4/L2/10, F4/L1/1, F4/L0/, F4/L20/10101312000202232331, +F4/L19/1010131200020223233 + +intersects name=shortline1 +---- +[F4/L30/101013120002022323000000000000, F4/L30/101013120002022323033333333333], +[F4/L30/101013120002022323312000000000, F4/L30/101013120002022323312333333333], +F4/L18/101013120002022323, F4/L17/10101312000202232, F4/L16/1010131200020223, +F4/L15/101013120002022, F4/L14/10101312000202, F4/L13/1010131200020, +F4/L12/101013120002, F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, +F4/L8/10101312, F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, +F4/L3/101, F4/L2/10, F4/L1/1, F4/L0/, F4/L20/10101312000202232331, +F4/L19/1010131200020223233 + +inner-covering name=shortline1 +---- +F4/L30/101013120002022323031030033032, F4/L30/101013120002022323312001023033 + +covered-by name=shortline1 +---- +0: F4/L30/101013120002022323312001023033, F4/L29/10101312000202232331200102303, +F4/L28/1010131200020223233120010230, F4/L27/101013120002022323312001023, +F4/L26/10101312000202232331200102, F4/L25/1010131200020223233120010, +F4/L24/101013120002022323312001, F4/L23/10101312000202232331200, +F4/L22/1010131200020223233120, F4/L21/101013120002022323312, +F4/L20/10101312000202232331, F4/L19/1010131200020223233, F4/L18/101013120002022323, +F4/L17/10101312000202232, F4/L16/1010131200020223, F4/L15/101013120002022, +F4/L14/10101312000202, F4/L13/1010131200020, F4/L12/101013120002, +F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, F4/L8/10101312, +F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, F4/L3/101, +F4/L2/10, F4/L1/1, F4/L0/ +1: F4/L30/101013120002022323031030033032, F4/L29/10101312000202232303103003303, +F4/L28/1010131200020223230310300330, F4/L27/101013120002022323031030033, +F4/L26/10101312000202232303103003, F4/L25/1010131200020223230310300, +F4/L24/101013120002022323031030, F4/L23/10101312000202232303103, +F4/L22/1010131200020223230310, F4/L21/101013120002022323031, +F4/L20/10101312000202232303, F4/L19/1010131200020223230, F4/L18/101013120002022323, +F4/L17/10101312000202232, F4/L16/1010131200020223, F4/L15/101013120002022, +F4/L14/10101312000202, F4/L13/1010131200020, F4/L12/101013120002, +F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, F4/L8/10101312, +F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, F4/L3/101, +F4/L2/10, F4/L1/1, F4/L0/ + +# TODO(sumeer): investigate and fix this broad covering. + +index-keys name=nystate +---- +F0/L0/, F1/L0/, F2/L0/, F3/L0/, F4/L0/, F5/L0/ + +covers name=nystate +---- +[F0/L30/000000000000000000000000000000, F0/L30/333333333333333333333333333333], +[F1/L30/000000000000000000000000000000, F1/L30/333333333333333333333333333333], +[F2/L30/000000000000000000000000000000, F2/L30/333333333333333333333333333333], +[F3/L30/000000000000000000000000000000, F3/L30/333333333333333333333333333333], +[F4/L30/000000000000000000000000000000, F4/L30/333333333333333333333333333333], +[F5/L30/000000000000000000000000000000, F5/L30/333333333333333333333333333333] + +intersects name=nystate +---- +[F0/L30/000000000000000000000000000000, F0/L30/333333333333333333333333333333], +[F1/L30/000000000000000000000000000000, F1/L30/333333333333333333333333333333], +[F2/L30/000000000000000000000000000000, F2/L30/333333333333333333333333333333], +[F3/L30/000000000000000000000000000000, F3/L30/333333333333333333333333333333], +[F4/L30/000000000000000000000000000000, F4/L30/333333333333333333333333333333], +[F5/L30/000000000000000000000000000000, F5/L30/333333333333333333333333333333] + +init minlevel=0 maxlevel=30 maxcells=1 +---- + +index-keys name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333 + +covers name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333, +F0/L29/20002321013311200010223231310, F0/L28/2000232101331120001022323131, +F0/L27/200023210133112000102232313, F0/L26/20002321013311200010223231, +F0/L25/2000232101331120001022323, F0/L24/200023210133112000102232, +F0/L23/20002321013311200010223, F0/L22/2000232101331120001022, +F0/L21/200023210133112000102, F0/L20/20002321013311200010, F0/L19/2000232101331120001, +F0/L18/200023210133112000, F0/L17/20002321013311200, F0/L16/2000232101331120, +F0/L15/200023210133112, F0/L14/20002321013311, F0/L13/2000232101331, +F0/L12/200023210133, F0/L11/20002321013, F0/L10/2000232101, F0/L9/200023210, +F0/L8/20002321, F0/L7/2000232, F0/L6/200023, F0/L5/20002, F0/L4/2000, +F0/L3/200, F0/L2/20, F0/L1/2, F0/L0/, F0/L29/20003322302133013000110100233, +F0/L28/2000332230213301300011010023, F0/L27/200033223021330130001101002, +F0/L26/20003322302133013000110100, F0/L25/2000332230213301300011010, +F0/L24/200033223021330130001101, F0/L23/20003322302133013000110, +F0/L22/2000332230213301300011, F0/L21/200033223021330130001, +F0/L20/20003322302133013000, F0/L19/2000332230213301300, F0/L18/200033223021330130, +F0/L17/20003322302133013, F0/L16/2000332230213301, F0/L15/200033223021330, +F0/L14/20003322302133, F0/L13/2000332230213, F0/L12/200033223021, +F0/L11/20003322302, F0/L10/2000332230, F0/L9/200033223, F0/L8/20003322, +F0/L7/2000332, F0/L6/200033, F0/L5/20003 + +intersects name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333, +F0/L29/20002321013311200010223231310, F0/L28/2000232101331120001022323131, +F0/L27/200023210133112000102232313, F0/L26/20002321013311200010223231, +F0/L25/2000232101331120001022323, F0/L24/200023210133112000102232, +F0/L23/20002321013311200010223, F0/L22/2000232101331120001022, +F0/L21/200023210133112000102, F0/L20/20002321013311200010, F0/L19/2000232101331120001, +F0/L18/200023210133112000, F0/L17/20002321013311200, F0/L16/2000232101331120, +F0/L15/200023210133112, F0/L14/20002321013311, F0/L13/2000232101331, +F0/L12/200023210133, F0/L11/20002321013, F0/L10/2000232101, F0/L9/200023210, +F0/L8/20002321, F0/L7/2000232, F0/L6/200023, F0/L5/20002, F0/L4/2000, +F0/L3/200, F0/L2/20, F0/L1/2, F0/L0/, F0/L29/20003322302133013000110100233, +F0/L28/2000332230213301300011010023, F0/L27/200033223021330130001101002, +F0/L26/20003322302133013000110100, F0/L25/2000332230213301300011010, +F0/L24/200033223021330130001101, F0/L23/20003322302133013000110, +F0/L22/2000332230213301300011, F0/L21/200033223021330130001, +F0/L20/20003322302133013000, F0/L19/2000332230213301300, F0/L18/200033223021330130, +F0/L17/20003322302133013, F0/L16/2000332230213301, F0/L15/200033223021330, +F0/L14/20003322302133, F0/L13/2000332230213, F0/L12/200033223021, +F0/L11/20003322302, F0/L10/2000332230, F0/L9/200033223, F0/L8/20003322, +F0/L7/2000332, F0/L6/200033, F0/L5/20003 + +inner-covering name=point1 +---- +F0/L30/200023210133112000102232313101, F0/L30/200033223021330130001101002333 + +covered-by name=point1 +---- +0: F0/L30/200033223021330130001101002333, F0/L29/20003322302133013000110100233, +F0/L28/2000332230213301300011010023, F0/L27/200033223021330130001101002, +F0/L26/20003322302133013000110100, F0/L25/2000332230213301300011010, +F0/L24/200033223021330130001101, F0/L23/20003322302133013000110, +F0/L22/2000332230213301300011, F0/L21/200033223021330130001, +F0/L20/20003322302133013000, F0/L19/2000332230213301300, F0/L18/200033223021330130, +F0/L17/20003322302133013, F0/L16/2000332230213301, F0/L15/200033223021330, +F0/L14/20003322302133, F0/L13/2000332230213, F0/L12/200033223021, +F0/L11/20003322302, F0/L10/2000332230, F0/L9/200033223, F0/L8/20003322, +F0/L7/2000332, F0/L6/200033, F0/L5/20003, F0/L4/2000, F0/L3/200, +F0/L2/20, F0/L1/2, F0/L0/ +1: F0/L30/200023210133112000102232313101, F0/L29/20002321013311200010223231310, +F0/L28/2000232101331120001022323131, F0/L27/200023210133112000102232313, +F0/L26/20002321013311200010223231, F0/L25/2000232101331120001022323, +F0/L24/200023210133112000102232, F0/L23/20002321013311200010223, +F0/L22/2000232101331120001022, F0/L21/200023210133112000102, +F0/L20/20002321013311200010, F0/L19/2000232101331120001, F0/L18/200023210133112000, +F0/L17/20002321013311200, F0/L16/2000232101331120, F0/L15/200023210133112, +F0/L14/20002321013311, F0/L13/2000232101331, F0/L12/200023210133, +F0/L11/20002321013, F0/L10/2000232101, F0/L9/200023210, F0/L8/20002321, +F0/L7/2000232, F0/L6/200023, F0/L5/20002, F0/L4/2000, F0/L3/200, +F0/L2/20, F0/L1/2, F0/L0/ + +index-keys name=shortline1 +---- +F4/L18/101013120002022323 + +covers name=shortline1 +---- +[F4/L30/101013120002022323000000000000, F4/L30/101013120002022323333333333333], +F4/L17/10101312000202232, F4/L16/1010131200020223, F4/L15/101013120002022, +F4/L14/10101312000202, F4/L13/1010131200020, F4/L12/101013120002, +F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, F4/L8/10101312, +F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, F4/L3/101, +F4/L2/10, F4/L1/1, F4/L0/ + +intersects name=shortline1 +---- +[F4/L30/101013120002022323000000000000, F4/L30/101013120002022323333333333333], +F4/L17/10101312000202232, F4/L16/1010131200020223, F4/L15/101013120002022, +F4/L14/10101312000202, F4/L13/1010131200020, F4/L12/101013120002, +F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, F4/L8/10101312, +F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, F4/L3/101, +F4/L2/10, F4/L1/1, F4/L0/ + +inner-covering name=shortline1 +---- +F4/L30/101013120002022323031030033032, F4/L30/101013120002022323312001023033 + +covered-by name=shortline1 +---- +0: F4/L30/101013120002022323312001023033, F4/L29/10101312000202232331200102303, +F4/L28/1010131200020223233120010230, F4/L27/101013120002022323312001023, +F4/L26/10101312000202232331200102, F4/L25/1010131200020223233120010, +F4/L24/101013120002022323312001, F4/L23/10101312000202232331200, +F4/L22/1010131200020223233120, F4/L21/101013120002022323312, +F4/L20/10101312000202232331, F4/L19/1010131200020223233, F4/L18/101013120002022323, +F4/L17/10101312000202232, F4/L16/1010131200020223, F4/L15/101013120002022, +F4/L14/10101312000202, F4/L13/1010131200020, F4/L12/101013120002, +F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, F4/L8/10101312, +F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, F4/L3/101, +F4/L2/10, F4/L1/1, F4/L0/ +1: F4/L30/101013120002022323031030033032, F4/L29/10101312000202232303103003303, +F4/L28/1010131200020223230310300330, F4/L27/101013120002022323031030033, +F4/L26/10101312000202232303103003, F4/L25/1010131200020223230310300, +F4/L24/101013120002022323031030, F4/L23/10101312000202232303103, +F4/L22/1010131200020223230310, F4/L21/101013120002022323031, +F4/L20/10101312000202232303, F4/L19/1010131200020223230, F4/L18/101013120002022323, +F4/L17/10101312000202232, F4/L16/1010131200020223, F4/L15/101013120002022, +F4/L14/10101312000202, F4/L13/1010131200020, F4/L12/101013120002, +F4/L11/10101312000, F4/L10/1010131200, F4/L9/101013120, F4/L8/10101312, +F4/L7/1010131, F4/L6/101013, F4/L5/10101, F4/L4/1010, F4/L3/101, +F4/L2/10, F4/L1/1, F4/L0/ diff --git a/pkg/geo/geoindex/testdata/s2_geometry b/pkg/geo/geoindex/testdata/s2_geometry new file mode 100644 index 000000000000..696a31b3cb3c --- /dev/null +++ b/pkg/geo/geoindex/testdata/s2_geometry @@ -0,0 +1,61 @@ +# TODO(sumeer): add more test cases + +geometry name=point1 +MULTIPOINT((12 12), (21 21)) +---- + +init minlevel=0 maxlevel=30 maxcells=1 minx=10 miny=10 maxx=20 maxy=20 +---- + +# Point exceeds the defined bounds. + +index-keys name=point1 +---- +F0/L30/002200220022002200220022002200, spilled + +covers name=point1 +---- +F0/L30/002200220022002200220022002200, F0/L29/00220022002200220022002200220, +F0/L28/0022002200220022002200220022, F0/L27/002200220022002200220022002, +F0/L26/00220022002200220022002200, F0/L25/0022002200220022002200220, +F0/L24/002200220022002200220022, F0/L23/00220022002200220022002, +F0/L22/0022002200220022002200, F0/L21/002200220022002200220, +F0/L20/00220022002200220022, F0/L19/0022002200220022002, F0/L18/002200220022002200, +F0/L17/00220022002200220, F0/L16/0022002200220022, F0/L15/002200220022002, +F0/L14/00220022002200, F0/L13/0022002200220, F0/L12/002200220022, +F0/L11/00220022002, F0/L10/0022002200, F0/L9/002200220, F0/L8/00220022, +F0/L7/0022002, F0/L6/002200, F0/L5/00220, F0/L4/0022, F0/L3/002, +F0/L2/00, F0/L1/0, F0/L0/, spilled + +intersects name=point1 +---- +F0/L30/002200220022002200220022002200, F0/L29/00220022002200220022002200220, +F0/L28/0022002200220022002200220022, F0/L27/002200220022002200220022002, +F0/L26/00220022002200220022002200, F0/L25/0022002200220022002200220, +F0/L24/002200220022002200220022, F0/L23/00220022002200220022002, +F0/L22/0022002200220022002200, F0/L21/002200220022002200220, +F0/L20/00220022002200220022, F0/L19/0022002200220022002, F0/L18/002200220022002200, +F0/L17/00220022002200220, F0/L16/0022002200220022, F0/L15/002200220022002, +F0/L14/00220022002200, F0/L13/0022002200220, F0/L12/002200220022, +F0/L11/00220022002, F0/L10/0022002200, F0/L9/002200220, F0/L8/00220022, +F0/L7/0022002, F0/L6/002200, F0/L5/00220, F0/L4/0022, F0/L3/002, +F0/L2/00, F0/L1/0, F0/L0/, spilled + +inner-covering name=point1 +---- +F0/L30/002200220022002200220022002200 + +covered-by name=point1 +---- +0: spilled +1: F0/L30/002200220022002200220022002200, F0/L29/00220022002200220022002200220, +F0/L28/0022002200220022002200220022, F0/L27/002200220022002200220022002, +F0/L26/00220022002200220022002200, F0/L25/0022002200220022002200220, +F0/L24/002200220022002200220022, F0/L23/00220022002200220022002, +F0/L22/0022002200220022002200, F0/L21/002200220022002200220, +F0/L20/00220022002200220022, F0/L19/0022002200220022002, F0/L18/002200220022002200, +F0/L17/00220022002200220, F0/L16/0022002200220022, F0/L15/002200220022002, +F0/L14/00220022002200, F0/L13/0022002200220, F0/L12/002200220022, +F0/L11/00220022002, F0/L10/0022002200, F0/L9/002200220, F0/L8/00220022, +F0/L7/0022002, F0/L6/002200, F0/L5/00220, F0/L4/0022, F0/L3/002, +F0/L2/00, F0/L1/0, F0/L0/ diff --git a/pkg/geo/geoindex/utils_test.go b/pkg/geo/geoindex/utils_test.go new file mode 100644 index 000000000000..608aa0c7544a --- /dev/null +++ b/pkg/geo/geoindex/utils_test.go @@ -0,0 +1,146 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geoindex + +import ( + "fmt" + "sort" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/golang/geo/s2" +) + +func nameArg(t *testing.T, d *datadriven.TestData) string { + var name string + d.ScanArgs(t, "name", &name) + return name +} + +func s2Config(t *testing.T, d *datadriven.TestData) S2Config { + var minLevel, maxLevel, maxCells int + d.ScanArgs(t, "minlevel", &minLevel) + d.ScanArgs(t, "maxlevel", &maxLevel) + d.ScanArgs(t, "maxcells", &maxCells) + return S2Config{ + MinLevel: int32(minLevel), + MaxLevel: int32(maxLevel), + LevelMod: 1, + MaxCells: int32(maxCells), + } +} + +func keysToString(keys []Key, err error) string { + if err != nil { + return err.Error() + } + var cells []string + for _, k := range keys { + cells = append(cells, k.String()) + } + return strings.Join(cells, ", ") +} + +func cellUnionToString(cells s2.CellUnion) string { + var strs []string + for _, c := range cells { + strs = append(strs, Key(c).String()) + } + return strings.Join(strs, ", ") +} + +func spansToString(spans UnionKeySpans, err error) string { + if err != nil { + return err.Error() + } + return spans.toString(60) +} + +// Intersection of unioned keys. +type evaluatedExpr [][]Key + +type unionSorter []Key + +func (s unionSorter) Len() int { + return len(s) +} + +func (s unionSorter) Less(i, j int) bool { + return s2.CellID(s[i]).Level() > s2.CellID(s[j]).Level() +} + +func (s unionSorter) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +// Checks that the factored expression does not repeat a cell +// and prints out the unfactored expression with each unioned +// sub-expression in high-to-low level order, so that it is +// easy to read and validate. +func checkExprAndToString(expr RPKeyExpr, err error) string { + if err != nil { + return err.Error() + } + keys := make(map[Key]struct{}) + for _, elem := range expr { + switch k := elem.(type) { + case Key: + if _, ok := keys[k]; ok { + return fmt.Sprintf("duplicate key: %s", k) + } + keys[k] = struct{}{} + } + } + var stack []evaluatedExpr + for _, elem := range expr { + switch e := elem.(type) { + case Key: + stack = append(stack, evaluatedExpr{[]Key{e}}) + case RPSetOperator: + op0, op1 := stack[len(stack)-1], stack[len(stack)-2] + stack = stack[:len(stack)-2] + switch e { + case RPSetIntersection: + op0 = append(op0, op1...) + case RPSetUnion: + if len(op1) != 1 { + op0, op1 = op1, op0 + } + if len(op1) != 1 { + return "error in expression" + } + for i := range op0 { + op0[i] = append(op0[i], op1[0]...) + } + } + stack = append(stack, op0) + } + } + if len(stack) != 1 { + return fmt.Sprintf("stack has unexpected length: %d", len(stack)) + } + b := newStringBuilderWithWrap(&strings.Builder{}, 60) + for i, cells := range stack[0] { + sort.Sort(unionSorter(cells)) + fmt.Fprintf(b, "%d: ", i) + for i, c := range cells { + fmt.Fprintf(b, "%s", c) + if i != len(cells)-1 { + b.WriteString(", ") + } + b.tryWrap() + } + b.doWrap() + } + + return b.String() +} diff --git a/pkg/geo/geos/geos_unix.go b/pkg/geo/geos/geos_unix.go index c149c11c6b71..0821981dd166 100644 --- a/pkg/geo/geos/geos_unix.go +++ b/pkg/geo/geos/geos_unix.go @@ -185,7 +185,7 @@ func WKTToEWKB(wkt geopb.WKT, srid geopb.SRID) (geopb.EWKB, error) { // ClipEWKBByRect clips a WKB to the specified rectangle. func ClipEWKBByRect( - wkb geopb.WKB, xMin float64, yMin float64, xMax float64, yMax float64, + wkb geopb.EWKB, xMin float64, yMin float64, xMax float64, yMax float64, ) (geopb.EWKB, error) { g, err := ensureInit(EnsureInitErrorDisplayPrivate) if err != nil { diff --git a/pkg/jobs/registry.go b/pkg/jobs/registry.go index 47c851e891f8..2617173738e8 100644 --- a/pkg/jobs/registry.go +++ b/pkg/jobs/registry.go @@ -769,11 +769,17 @@ func (r *Registry) createResumer(job *Job, settings *cluster.Settings) (Resumer, type retryJobError string +// retryJobErrorSentinel exists so the errors returned from NewRetryJobError can +// be marked with it, allowing more robust detection of retry errors even if +// they are wrapped, etc. This was originally introduced to deal with injected +// retry errors from testing knobs. +var retryJobErrorSentinel = retryJobError("") + // NewRetryJobError creates a new error that, if returned by a Resumer, // indicates to the jobs registry that the job should be restarted in the // background. func NewRetryJobError(s string) error { - return retryJobError(s) + return errors.Mark(retryJobError(s), retryJobErrorSentinel) } func (r retryJobError) Error() string { @@ -817,8 +823,8 @@ func (r *Registry) stepThroughStateMachine( // TODO(spaskob): enforce a limit on retries. // TODO(spaskob,lucy): Add metrics on job retries. Consider having a backoff // mechanism (possibly combined with a retry limit). - if e, ok := err.(retryJobError); ok { - return errors.Errorf("job %d: %s: restarting in background", *job.ID(), e) + if errors.Is(err, retryJobErrorSentinel) { + return errors.Errorf("job %d: %s: restarting in background", *job.ID(), err) } if err, ok := errors.Cause(err).(*InvalidStatusError); ok { if err.status != StatusCancelRequested && err.status != StatusPauseRequested { @@ -878,8 +884,8 @@ func (r *Registry) stepThroughStateMachine( // mark the job as failed because it can be resumed by another node. return errors.Errorf("job %d: node liveness error: restarting in background", *job.ID()) } - if e, ok := err.(retryJobError); ok { - return errors.Errorf("job %d: %s: restarting in background", *job.ID(), e) + if errors.Is(err, retryJobErrorSentinel) { + return errors.Errorf("job %d: %s: restarting in background", *job.ID(), err) } if err, ok := errors.Cause(err).(*InvalidStatusError); ok { if err.status != StatusPauseRequested { diff --git a/pkg/sql/schema_change_migrations_test.go b/pkg/sql/schema_change_migrations_test.go index 367f22ec3de7..83f42ab0b33c 100644 --- a/pkg/sql/schema_change_migrations_test.go +++ b/pkg/sql/schema_change_migrations_test.go @@ -554,7 +554,27 @@ func setupTestingKnobs( } case AfterBackfill: if shouldCancel { - knobs.RunAfterOnFailOrCancel = blockFn + // This is a special case where (1) RunAfterBackfill within Resume() needs + // to call cancelFn() to cancel the job, (2) RunBeforeOnFailOrCancel needs + // to set hasCanceled, and (3) RunAfterBackfill, running for the 2nd time + // within OnFailOrCancel(), needs to read the value of hasCanceled (which + // is true) and run BlockFn(). + knobs.RunBeforeOnFailOrCancel = func(jobID int64) error { + mu.Lock() + defer mu.Unlock() + hasCanceled = true + return nil + } + knobs.RunAfterBackfill = func(jobID int64) error { + mu.Lock() + hasCanceled := hasCanceled + mu.Unlock() + if hasCanceled { + return blockFn(jobID) + } else { + return cancelFn(jobID) + } + } } else { knobs.RunAfterBackfill = blockFn } @@ -813,6 +833,10 @@ func TestMigrateSchemaChanges(t *testing.T) { blockState := blockState shouldCancel := shouldCancel + // Rollbacks of DROP CONSTRAINT are broken. See #47323. + if schemaChange.kind == DropConstraint && shouldCancel { + continue + } if !canBlockIfCanceled(blockState, shouldCancel) { continue } diff --git a/pkg/sql/schema_changer.go b/pkg/sql/schema_changer.go index c40ecf1fe05d..4ac5d2e3bbd1 100644 --- a/pkg/sql/schema_changer.go +++ b/pkg/sql/schema_changer.go @@ -547,9 +547,11 @@ func (sc *SchemaChanger) handlePermanentSchemaChangeError( ctx context.Context, err error, evalCtx *extendedEvalContext, ) error { if rollbackErr := sc.rollbackSchemaChange(ctx, err); rollbackErr != nil { - // Note: the "err" object is captured by rollbackSchemaChange(), so - // it does not simply disappear. - return errors.Wrap(rollbackErr, "while rolling back schema change") + // From now on, the returned error will be a secondary error of the returned + // error, so we'll record the original error now. + secondary := errors.Wrap(err, "original error when rolling back mutations") + sqltelemetry.RecordError(ctx, secondary, &sc.settings.SV) + return errors.WithSecondaryError(rollbackErr, secondary) } // TODO (lucy): This is almost the same as in exec(), maybe refactor. @@ -634,20 +636,7 @@ func (sc *SchemaChanger) initJobRunningStatus(ctx context.Context) error { func (sc *SchemaChanger) rollbackSchemaChange(ctx context.Context, err error) error { log.Warningf(ctx, "reversing schema change %d due to irrecoverable error: %s", *sc.job.ID(), err) if errReverse := sc.maybeReverseMutations(ctx, err); errReverse != nil { - // Although the backfill did hit an integrity constraint violation - // and made a decision to reverse the mutations, - // maybeReverseMutations() failed. If exec() is called again the entire - // schema change will be retried. - - // Note: we capture the original error as "secondary" to ensure it - // does not fully disappear. - // However, since it is not in the main causal chain any more, - // it will become invisible to further telemetry. So before - // we relegate it to a secondary, go through the recording motions. - // This ensures that any important error gets reported to Sentry, etc. - secondary := errors.Wrap(err, "original error when reversing mutations") - sqltelemetry.RecordError(ctx, secondary, &sc.settings.SV) - return errors.WithSecondaryError(errReverse, secondary) + return err } if fn := sc.testingKnobs.RunAfterMutationReversal; fn != nil { @@ -658,20 +647,7 @@ func (sc *SchemaChanger) rollbackSchemaChange(ctx context.Context, err error) er // After this point the schema change has been reversed and any retry // of the schema change will act upon the reversed schema change. - if errPurge := sc.runStateMachineAndBackfill(ctx); errPurge != nil { - // Don't return this error because we do want the caller to know - // that an integrity constraint was violated with the original - // schema change. The reversed schema change will be - // retried via the async schema change manager. - - // Since the errors are going to disappear, do the recording - // motions on them. This ensures that any assertion failure or - // other important error underneath gets recorded properly. - log.Warningf(ctx, "error purging mutation: %+v\nwhile handling error: %+v", errPurge, err) - sqltelemetry.RecordError(ctx, err, &sc.settings.SV) - sqltelemetry.RecordError(ctx, errPurge, &sc.settings.SV) - } - return nil + return sc.runStateMachineAndBackfill(ctx) } // RunStateMachineBeforeBackfill moves the state machine forward @@ -1722,12 +1698,36 @@ func (r schemaChangeResumer) OnFailOrCancel(ctx context.Context, phs interface{} // We check for this case so that we can just return the error without // wrapping it in a retry error. return rollbackErr - default: - // Always retry when we get any other error. Otherwise we risk leaving the - // in-progress schema change state on the table descriptor indefinitely. - // Note that, in theory, this could mean retrying the job forever even for - // an error we can't recover from, if there's a bug. + case !isPermanentSchemaChangeError(rollbackErr): + // Check if the error is on a whitelist of errors we should retry on, and + // have the job registry retry. return jobs.NewRetryJobError(rollbackErr.Error()) + default: + // All other errors lead to a failed job. + // + // TODO (lucy): We have a problem where some schema change rollbacks will + // never succeed because the backfiller can't handle rolling back schema + // changes that involve dropping a column; see #46541. (This is probably + // not the only bug that could cause rollbacks to fail.) For historical + // context: This was the case in 19.2 and probably earlier versions as + // well, and in those earlier versions, the old async schema changer would + // keep retrying the rollback and failing in the background because the + // mutation would still be left on the table descriptor. In the present + // schema change job, we return an error immediately and put the job in a + // terminal state instead of retrying indefinitely, basically to make the + // behavior similar to 19.2: If the rollback fails, we end up returning + // immediately (instead of retrying and blocking indefinitely), and the + // table descriptor is left in a bad state with some mutations that we + // can't clean up. + // + // Ultimately, this is untenable, and we should figure out some better way + // of dealing with failed rollbacks. Part of the solution is just making + // rollbacks (especially of dropped columns) more robust, but part of it + // will likely involve some sort of medium-term solution for cleaning up + // mutations that we can't make any progress on (see #47456). In the long + // term we'll hopefully be rethinking what it even means to "roll back" a + // (transactional) schema change. + return rollbackErr } } diff --git a/pkg/sql/schema_changer_test.go b/pkg/sql/schema_changer_test.go index f8c9da30703a..ab7f79b4cf0b 100644 --- a/pkg/sql/schema_changer_test.go +++ b/pkg/sql/schema_changer_test.go @@ -5803,3 +5803,113 @@ ALTER TABLE t.public.test DROP COLUMN v; {"1", "2"}, }, rows) } + +// TestRetriableErrorDuringRollback tests that a retriable error while rolling +// back a schema change causes the rollback to retry and succeed. +func TestRetriableErrorDuringRollback(t *testing.T) { + defer leaktest.AfterTest(t)() + defer setTestJobsAdoptInterval()() + ctx := context.Background() + + onFailOrCancelStarted := false + injectedError := false + params, _ := tests.CreateTestServerParams() + params.Knobs = base.TestingKnobs{ + SQLSchemaChanger: &sql.SchemaChangerTestingKnobs{ + RunBeforeOnFailOrCancel: func(_ int64) error { + onFailOrCancelStarted = true + return nil + }, + RunBeforeBackfill: func() error { + // The first time through the backfiller in OnFailOrCancel, return a + // retriable error. + if !onFailOrCancelStarted || injectedError { + return nil + } + injectedError = true + // Return an artificial context canceled error. + return context.Canceled + }, + }, + } + s, sqlDB, kvDB := serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) + + // Disable strict GC TTL enforcement because we're going to shove a zero-value + // TTL into the system with addImmediateGCZoneConfig. + defer disableGCTTLStrictEnforcement(t, sqlDB)() + + _, err := sqlDB.Exec(` +CREATE DATABASE t; +CREATE TABLE t.test (k INT PRIMARY KEY, v INT8); +INSERT INTO t.test VALUES (1, 2), (2, 2); +`) + require.NoError(t, err) + tableDesc := sqlbase.GetTableDescriptor(kvDB, "t", "test") + // Add a zone config for the table. + _, err = addImmediateGCZoneConfig(sqlDB, tableDesc.ID) + require.NoError(t, err) + + // Try to create a unique index which won't be valid and will need a rollback. + _, err = sqlDB.Exec(` +CREATE UNIQUE INDEX i ON t.test(v); +`) + require.Error(t, err) + require.Regexp(t, "violates unique constraint", err.Error()) + // Verify that the index was cleaned up. + testutils.SucceedsSoon(t, func() error { + return checkTableKeyCountExact(ctx, kvDB, 2) + }) +} + +// TestPermanentErrorDuringRollback tests that a permanent error while rolling +// back a schema change causes the job to fail, and that the appropriate error +// is displayed in the jobs table. +func TestPermanentErrorDuringRollback(t *testing.T) { + defer leaktest.AfterTest(t)() + defer setTestJobsAdoptInterval()() + ctx := context.Background() + + onFailOrCancelStarted := false + injectedError := false + params, _ := tests.CreateTestServerParams() + params.Knobs = base.TestingKnobs{ + SQLSchemaChanger: &sql.SchemaChangerTestingKnobs{ + RunBeforeOnFailOrCancel: func(_ int64) error { + onFailOrCancelStarted = true + return nil + }, + RunBeforeBackfill: func() error { + // The first time through the backfiller in OnFailOrCancel, return a + // permanent error. + if !onFailOrCancelStarted || injectedError { + return nil + } + injectedError = true + // Any error not on the whitelist of retriable errors is considered permanent. + return errors.New("permanent error") + }, + }, + } + s, sqlDB, _ := serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) + + _, err := sqlDB.Exec(` +CREATE DATABASE t; +CREATE TABLE t.test (k INT PRIMARY KEY, v INT8); +INSERT INTO t.test VALUES (1, 2), (2, 2); +`) + require.NoError(t, err) + + // Try to create a unique index which won't be valid and will need a rollback. + _, err = sqlDB.Exec(` +CREATE UNIQUE INDEX i ON t.test(v); +`) + require.Error(t, err) + require.Regexp(t, "violates unique constraint", err.Error()) + + var jobErr string + row := sqlDB.QueryRow("SELECT error FROM [SHOW JOBS] WHERE job_type = 'SCHEMA CHANGE'") + require.NoError(t, row.Scan(&jobErr)) + require.Regexp(t, "cannot be reverted, manual cleanup may be required", jobErr) +} diff --git a/pkg/testutils/lint/lint_test.go b/pkg/testutils/lint/lint_test.go index 13a35ea53e6e..bb4b73fd97b3 100644 --- a/pkg/testutils/lint/lint_test.go +++ b/pkg/testutils/lint/lint_test.go @@ -1393,6 +1393,8 @@ func TestLint(t *testing.T) { stream.GrepNot(`pkg/sql/opt/optgen/exprgen/custom_funcs.go:.* func .* is unused`), // Using deprecated method to COPY. stream.GrepNot(`pkg/cli/nodelocal.go:.* stmt.Exec is deprecated: .*`), + // Geospatial code is in-progress. + stream.GrepNot("pkg/geo/geopb/types.go:.* type WKB is unused .*"), ), func(s string) { t.Errorf("\n%s", s) }); err != nil {