-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
2,460 additions
and
944 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
name: Test | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
|
||
jobs: | ||
lint: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: golangci/golangci-lint-action@v3 | ||
with: | ||
version: latest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
name: Test | ||
|
||
on: | ||
schedule: | ||
- cron: "0 0 * * 1" | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: read | ||
id-token: write | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: actions/setup-go@v3 | ||
with: | ||
go-version-file: go.mod | ||
cache: true | ||
- uses: google-github-actions/auth@v1 | ||
with: | ||
workload_identity_provider: projects/1050537848145/locations/global/workloadIdentityPools/pool-github-actions/providers/provider-github-actions | ||
service_account: [email protected] | ||
- run: go test -v ./... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,70 +1,76 @@ | ||
# spream [](https://pkg.go.dev/github.com/toga4/spream) | ||
# spream | ||
|
||
Tracking Spanner Change Streams for Go | ||
[](https://github.com/toga4/spream/actions/workflows/test.yaml) | ||
[](https://pkg.go.dev/github.com/toga4/spream) | ||
|
||
This library is an implementation of reading change streams of Google Spanner in Go. | ||
Cloud Spanner Change Streams Subscriber for Go | ||
|
||
## Usage | ||
### Sypnosis | ||
|
||
This library is an implementation to subscribe a change stream's records of Google Spanner in Go. | ||
It is heavily inspired by the SpannerIO connector of the [Apache Beam SDK](https://github.com/apache/beam) and is compatible with the PartitionMetadata data model. | ||
|
||
### Motivation | ||
|
||
To read a change streams, Google Cloud offers [Dataflow connector](https://cloud.google.com/spanner/docs/change-streams/use-dataflow) as a scalable and reliable solution, but in some cases the abstraction and capabilities of Dataflow pipelines can be too much (or is simply too expensive). | ||
For more flexibility, use the change stream API directly, but it is a bit complex. | ||
This library aims to make reading change streams more flexible and casual to use. | ||
|
||
## Example Usage | ||
|
||
```go | ||
package main | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"errors" | ||
"fmt" | ||
"log" | ||
"time" | ||
"io" | ||
"os" | ||
"os/signal" | ||
"sync" | ||
|
||
"cloud.google.com/go/spanner" | ||
"github.com/toga4/spream" | ||
"github.com/toga4/spream/partitionstorage" | ||
) | ||
|
||
func main() { | ||
ctx := context.Background() | ||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill) | ||
defer stop() | ||
|
||
database := fmt.Sprintf("projects/%s/instances/%s/databases/%s", "foo-project", "foo-instance", "foo-database") | ||
changeStreamName := "FooStream" | ||
|
||
spannerClient, err := spanner.NewClient(ctx, database) | ||
if err != nil { | ||
panic(err) | ||
} | ||
defer spannerClient.Close() | ||
|
||
c := spream.NewController( | ||
spannerClient, | ||
changeStreamName, | ||
changeSink, | ||
spream.WithWatermarker(watermarker), | ||
spream.WithOnPartitionClosed(onPartitionClosed), | ||
) | ||
|
||
partition := spream.Partition{ | ||
PartitionToken: spream.RootPartition, | ||
StartTimestamp: time.Now().Add(-time.Hour), | ||
} | ||
if err := c.StartWithPartitions(ctx, partition); err != nil { | ||
partitionMetadataTableName := "PartitionMetadata_FooStream" | ||
partitionStorage := partitionstorage.NewSpanner(spannerClient, partitionMetadataTableName) | ||
if err := partitionStorage.CreateTableIfNotExists(ctx); err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
func changeSink(ctx context.Context, change *spream.Change) error { | ||
b, err := json.MarshalIndent(change, "", " ") | ||
if err != nil { | ||
return err | ||
changeStreamName := "FooStream" | ||
subscriber := spream.NewSubscriber(spannerClient, changeStreamName, partitionStorage) | ||
|
||
fmt.Fprintf(os.Stderr, "Reading the stream...\n") | ||
logger := &Logger{out: os.Stdout} | ||
if err := subscriber.Subscribe(ctx, logger); err != nil && !errors.Is(ctx.Err(), context.Canceled) { | ||
panic(err) | ||
} | ||
log.Printf("changed: %s", b) | ||
return nil | ||
} | ||
|
||
func watermarker(ctx context.Context, partitionToken string, timestamp time.Time) error { | ||
log.Printf("watermark: %v : %s", partitionToken, timestamp) | ||
return nil | ||
type Logger struct { | ||
out io.Writer | ||
mu sync.Mutex | ||
} | ||
|
||
func onPartitionClosed(ctx context.Context, partitionToken string) error { | ||
log.Printf("partition closed: %v", partitionToken) | ||
return nil | ||
func (l *Logger) Consume(change *spream.DataChangeRecord) error { | ||
l.mu.Lock() | ||
defer l.mu.Unlock() | ||
return json.NewEncoder(l.out).Encode(change) | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
package spream | ||
|
||
import ( | ||
"time" | ||
|
||
"cloud.google.com/go/spanner" | ||
) | ||
|
||
type changeRecord struct { | ||
DataChangeRecords []*dataChangeRecord `spanner:"data_change_record" json:"data_change_record"` | ||
HeartbeatRecords []*HeartbeatRecord `spanner:"heartbeat_record" json:"heartbeat_record"` | ||
ChildPartitionsRecords []*ChildPartitionsRecord `spanner:"child_partitions_record" json:"child_partitions_record"` | ||
} | ||
|
||
type dataChangeRecord struct { | ||
CommitTimestamp time.Time `spanner:"commit_timestamp" json:"commit_timestamp"` | ||
RecordSequence string `spanner:"record_sequence" json:"record_sequence"` | ||
ServerTransactionID string `spanner:"server_transaction_id" json:"server_transaction_id"` | ||
IsLastRecordInTransactionInPartition bool `spanner:"is_last_record_in_transaction_in_partition" json:"is_last_record_in_transaction_in_partition"` | ||
TableName string `spanner:"table_name" json:"table_name"` | ||
ColumnTypes []*columnType `spanner:"column_types" json:"column_types"` | ||
Mods []*mod `spanner:"mods" json:"mods"` | ||
ModType string `spanner:"mod_type" json:"mod_type"` | ||
ValueCaptureType string `spanner:"value_capture_type" json:"value_capture_type"` | ||
NumberOfRecordsInTransaction int64 `spanner:"number_of_records_in_transaction" json:"number_of_records_in_transaction"` | ||
NumberOfPartitionsInTransaction int64 `spanner:"number_of_partitions_in_transaction" json:"number_of_partitions_in_transaction"` | ||
TransactionTag string `spanner:"transaction_tag" json:"transaction_tag"` | ||
IsSystemTransaction bool `spanner:"is_system_transaction" json:"is_system_transaction"` | ||
} | ||
|
||
type columnType struct { | ||
Name string `spanner:"name" json:"name"` | ||
Type spanner.NullJSON `spanner:"type" json:"type"` | ||
IsPrimaryKey bool `spanner:"is_primary_key" json:"is_primary_key"` | ||
OrdinalPosition int64 `spanner:"ordinal_position" json:"ordinal_position"` | ||
} | ||
|
||
type mod struct { | ||
Keys spanner.NullJSON `spanner:"keys" json:"keys"` | ||
NewValues spanner.NullJSON `spanner:"new_values" json:"new_values"` | ||
OldValues spanner.NullJSON `spanner:"old_values" json:"old_values"` | ||
} | ||
|
||
// DataChangeRecord is the change set of the table. | ||
type DataChangeRecord struct { | ||
CommitTimestamp time.Time `json:"commit_timestamp"` | ||
RecordSequence string `json:"record_sequence"` | ||
ServerTransactionID string `json:"server_transaction_id"` | ||
IsLastRecordInTransactionInPartition bool `json:"is_last_record_in_transaction_in_partition"` | ||
TableName string `json:"table_name"` | ||
ColumnTypes []*ColumnType `json:"column_types"` | ||
Mods []*Mod `json:"mods"` | ||
ModType ModType `json:"mod_type"` | ||
ValueCaptureType string `json:"value_capture_type"` | ||
NumberOfRecordsInTransaction int64 `json:"number_of_records_in_transaction"` | ||
NumberOfPartitionsInTransaction int64 `json:"number_of_partitions_in_transaction"` | ||
TransactionTag string `json:"transaction_tag"` | ||
IsSystemTransaction bool `json:"is_system_transaction"` | ||
} | ||
|
||
// ColumnType is the metadata of the column. | ||
type ColumnType struct { | ||
Name string `json:"name"` | ||
Type Type `json:"type"` | ||
IsPrimaryKey bool `json:"is_primary_key,omitempty"` | ||
OrdinalPosition int64 `json:"ordinal_position"` | ||
} | ||
|
||
// Type is the type of the column. | ||
type Type struct { | ||
Code TypeCode `json:"code"` | ||
ArrayElementType TypeCode `json:"array_element_type,omitempty"` | ||
} | ||
|
||
type TypeCode string | ||
|
||
const ( | ||
TypeCode_NONE TypeCode = "" | ||
TypeCode_BOOL TypeCode = "BOOL" | ||
TypeCode_INT64 TypeCode = "INT64" | ||
TypeCode_FLOAT64 TypeCode = "FLOAT64" | ||
TypeCode_TIMESTAMP TypeCode = "TIMESTAMP" | ||
TypeCode_DATE TypeCode = "DATE" | ||
TypeCode_STRING TypeCode = "STRING" | ||
TypeCode_BYTES TypeCode = "BYTES" | ||
TypeCode_NUMERIC TypeCode = "NUMERIC" | ||
TypeCode_JSON TypeCode = "JSON" | ||
TypeCode_ARRAY TypeCode = "ARRAY" | ||
) | ||
|
||
// Mod contains the keys and the values of the changed records. | ||
type Mod struct { | ||
Keys map[string]interface{} `json:"keys,omitempty"` | ||
NewValues map[string]interface{} `json:"new_values,omitempty"` | ||
OldValues map[string]interface{} `json:"old_values,omitempty"` | ||
} | ||
|
||
type ModType string | ||
|
||
const ( | ||
ModType_INSERT = "INSERT" | ||
ModType_UPDATE = "UPDATE" | ||
ModType_DELETE = "DELETE" | ||
) | ||
|
||
type HeartbeatRecord struct { | ||
Timestamp time.Time `spanner:"timestamp" json:"timestamp"` | ||
} | ||
|
||
type ChildPartitionsRecord struct { | ||
StartTimestamp time.Time `spanner:"start_timestamp" json:"start_timestamp"` | ||
RecordSequence string `spanner:"record_sequence" json:"record_sequence"` | ||
ChildPartitions []*ChildPartition `spanner:"child_partitions" json:"child_partitions"` | ||
} | ||
|
||
type ChildPartition struct { | ||
Token string `spanner:"token" json:"token"` | ||
ParentPartitionTokens []string `spanner:"parent_partition_tokens" json:"parent_partition_tokens"` | ||
} | ||
|
||
func (r *dataChangeRecord) decodeToNonSpannerType() *DataChangeRecord { | ||
columnTypes := []*ColumnType{} | ||
for _, t := range r.ColumnTypes { | ||
columnTypes = append(columnTypes, &ColumnType{ | ||
Name: t.Name, | ||
Type: decodeColumnTypeJSONToType(t.Type), | ||
IsPrimaryKey: t.IsPrimaryKey, | ||
OrdinalPosition: t.OrdinalPosition, | ||
}) | ||
} | ||
|
||
mods := make([]*Mod, 0, len(r.Mods)) | ||
for _, m := range r.Mods { | ||
mods = append(mods, &Mod{ | ||
Keys: decodeNullJSONToMap(m.Keys), | ||
NewValues: decodeNullJSONToMap(m.NewValues), | ||
OldValues: decodeNullJSONToMap(m.OldValues), | ||
}) | ||
} | ||
|
||
return &DataChangeRecord{ | ||
CommitTimestamp: r.CommitTimestamp, | ||
RecordSequence: r.RecordSequence, | ||
ServerTransactionID: r.ServerTransactionID, | ||
IsLastRecordInTransactionInPartition: r.IsLastRecordInTransactionInPartition, | ||
TableName: r.TableName, | ||
ColumnTypes: columnTypes, | ||
Mods: mods, | ||
ModType: ModType(r.ModType), | ||
ValueCaptureType: r.ValueCaptureType, | ||
NumberOfRecordsInTransaction: r.NumberOfRecordsInTransaction, | ||
NumberOfPartitionsInTransaction: r.NumberOfPartitionsInTransaction, | ||
TransactionTag: r.TransactionTag, | ||
IsSystemTransaction: r.IsSystemTransaction, | ||
} | ||
} | ||
|
||
func decodeColumnTypeJSONToType(columnType spanner.NullJSON) Type { | ||
m := columnType.Value.(map[string]interface{}) | ||
code := TypeCode(m["code"].(string)) | ||
|
||
if aet, ok := m["array_element_type"].(map[string]interface{}); ok { | ||
arrayElementType := TypeCode(aet["code"].(string)) | ||
return Type{ | ||
Code: code, | ||
ArrayElementType: arrayElementType, | ||
} | ||
} | ||
|
||
return Type{Code: code} | ||
} | ||
|
||
func decodeNullJSONToMap(j spanner.NullJSON) map[string]interface{} { | ||
if j.IsNull() { | ||
return nil | ||
} | ||
return j.Value.(map[string]interface{}) | ||
} |
Oops, something went wrong.