Skip to content

Commit

Permalink
Rewrite
Browse files Browse the repository at this point in the history
  • Loading branch information
toga4 committed Jan 7, 2023
1 parent 085d52d commit e38c7fb
Show file tree
Hide file tree
Showing 23 changed files with 2,460 additions and 944 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Test

on:
push:
branches:
- main
pull_request:

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: golangci/golangci-lint-action@v3
with:
version: latest
27 changes: 27 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Test

on:
schedule:
- cron: "0 0 * * 1"
push:
branches:
- main
pull_request:

jobs:
test:
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v3
- uses: actions/setup-go@v3
with:
go-version-file: go.mod
cache: true
- uses: google-github-actions/auth@v1
with:
workload_identity_provider: projects/1050537848145/locations/global/workloadIdentityPools/pool-github-actions/providers/provider-github-actions
service_account: [email protected]
- run: go test -v ./...
76 changes: 41 additions & 35 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,70 +1,76 @@
# spream [![Go Reference](https://pkg.go.dev/badge/github.com/toga4/spream.svg)](https://pkg.go.dev/github.com/toga4/spream)
# spream

Tracking Spanner Change Streams for Go
[![Test](https://github.com/toga4/spream/actions/workflows/test.yaml/badge.svg)](https://github.com/toga4/spream/actions/workflows/test.yaml)
[![Go Reference](https://pkg.go.dev/badge/github.com/toga4/spream.svg)](https://pkg.go.dev/github.com/toga4/spream)

This library is an implementation of reading change streams of Google Spanner in Go.
Cloud Spanner Change Streams Subscriber for Go

## Usage
### Sypnosis

This library is an implementation to subscribe a change stream's records of Google Spanner in Go.
It is heavily inspired by the SpannerIO connector of the [Apache Beam SDK](https://github.com/apache/beam) and is compatible with the PartitionMetadata data model.

### Motivation

To read a change streams, Google Cloud offers [Dataflow connector](https://cloud.google.com/spanner/docs/change-streams/use-dataflow) as a scalable and reliable solution, but in some cases the abstraction and capabilities of Dataflow pipelines can be too much (or is simply too expensive).
For more flexibility, use the change stream API directly, but it is a bit complex.
This library aims to make reading change streams more flexible and casual to use.

## Example Usage

```go
package main

import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"time"
"io"
"os"
"os/signal"
"sync"

"cloud.google.com/go/spanner"
"github.com/toga4/spream"
"github.com/toga4/spream/partitionstorage"
)

func main() {
ctx := context.Background()
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill)
defer stop()

database := fmt.Sprintf("projects/%s/instances/%s/databases/%s", "foo-project", "foo-instance", "foo-database")
changeStreamName := "FooStream"

spannerClient, err := spanner.NewClient(ctx, database)
if err != nil {
panic(err)
}
defer spannerClient.Close()

c := spream.NewController(
spannerClient,
changeStreamName,
changeSink,
spream.WithWatermarker(watermarker),
spream.WithOnPartitionClosed(onPartitionClosed),
)

partition := spream.Partition{
PartitionToken: spream.RootPartition,
StartTimestamp: time.Now().Add(-time.Hour),
}
if err := c.StartWithPartitions(ctx, partition); err != nil {
partitionMetadataTableName := "PartitionMetadata_FooStream"
partitionStorage := partitionstorage.NewSpanner(spannerClient, partitionMetadataTableName)
if err := partitionStorage.CreateTableIfNotExists(ctx); err != nil {
panic(err)
}
}

func changeSink(ctx context.Context, change *spream.Change) error {
b, err := json.MarshalIndent(change, "", " ")
if err != nil {
return err
changeStreamName := "FooStream"
subscriber := spream.NewSubscriber(spannerClient, changeStreamName, partitionStorage)

fmt.Fprintf(os.Stderr, "Reading the stream...\n")
logger := &Logger{out: os.Stdout}
if err := subscriber.Subscribe(ctx, logger); err != nil && !errors.Is(ctx.Err(), context.Canceled) {
panic(err)
}
log.Printf("changed: %s", b)
return nil
}

func watermarker(ctx context.Context, partitionToken string, timestamp time.Time) error {
log.Printf("watermark: %v : %s", partitionToken, timestamp)
return nil
type Logger struct {
out io.Writer
mu sync.Mutex
}

func onPartitionClosed(ctx context.Context, partitionToken string) error {
log.Printf("partition closed: %v", partitionToken)
return nil
func (l *Logger) Consume(change *spream.DataChangeRecord) error {
l.mu.Lock()
defer l.mu.Unlock()
return json.NewEncoder(l.out).Encode(change)
}
```
178 changes: 178 additions & 0 deletions change_record.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
package spream

import (
"time"

"cloud.google.com/go/spanner"
)

type changeRecord struct {
DataChangeRecords []*dataChangeRecord `spanner:"data_change_record" json:"data_change_record"`
HeartbeatRecords []*HeartbeatRecord `spanner:"heartbeat_record" json:"heartbeat_record"`
ChildPartitionsRecords []*ChildPartitionsRecord `spanner:"child_partitions_record" json:"child_partitions_record"`
}

type dataChangeRecord struct {
CommitTimestamp time.Time `spanner:"commit_timestamp" json:"commit_timestamp"`
RecordSequence string `spanner:"record_sequence" json:"record_sequence"`
ServerTransactionID string `spanner:"server_transaction_id" json:"server_transaction_id"`
IsLastRecordInTransactionInPartition bool `spanner:"is_last_record_in_transaction_in_partition" json:"is_last_record_in_transaction_in_partition"`
TableName string `spanner:"table_name" json:"table_name"`
ColumnTypes []*columnType `spanner:"column_types" json:"column_types"`
Mods []*mod `spanner:"mods" json:"mods"`
ModType string `spanner:"mod_type" json:"mod_type"`
ValueCaptureType string `spanner:"value_capture_type" json:"value_capture_type"`
NumberOfRecordsInTransaction int64 `spanner:"number_of_records_in_transaction" json:"number_of_records_in_transaction"`
NumberOfPartitionsInTransaction int64 `spanner:"number_of_partitions_in_transaction" json:"number_of_partitions_in_transaction"`
TransactionTag string `spanner:"transaction_tag" json:"transaction_tag"`
IsSystemTransaction bool `spanner:"is_system_transaction" json:"is_system_transaction"`
}

type columnType struct {
Name string `spanner:"name" json:"name"`
Type spanner.NullJSON `spanner:"type" json:"type"`
IsPrimaryKey bool `spanner:"is_primary_key" json:"is_primary_key"`
OrdinalPosition int64 `spanner:"ordinal_position" json:"ordinal_position"`
}

type mod struct {
Keys spanner.NullJSON `spanner:"keys" json:"keys"`
NewValues spanner.NullJSON `spanner:"new_values" json:"new_values"`
OldValues spanner.NullJSON `spanner:"old_values" json:"old_values"`
}

// DataChangeRecord is the change set of the table.
type DataChangeRecord struct {
CommitTimestamp time.Time `json:"commit_timestamp"`
RecordSequence string `json:"record_sequence"`
ServerTransactionID string `json:"server_transaction_id"`
IsLastRecordInTransactionInPartition bool `json:"is_last_record_in_transaction_in_partition"`
TableName string `json:"table_name"`
ColumnTypes []*ColumnType `json:"column_types"`
Mods []*Mod `json:"mods"`
ModType ModType `json:"mod_type"`
ValueCaptureType string `json:"value_capture_type"`
NumberOfRecordsInTransaction int64 `json:"number_of_records_in_transaction"`
NumberOfPartitionsInTransaction int64 `json:"number_of_partitions_in_transaction"`
TransactionTag string `json:"transaction_tag"`
IsSystemTransaction bool `json:"is_system_transaction"`
}

// ColumnType is the metadata of the column.
type ColumnType struct {
Name string `json:"name"`
Type Type `json:"type"`
IsPrimaryKey bool `json:"is_primary_key,omitempty"`
OrdinalPosition int64 `json:"ordinal_position"`
}

// Type is the type of the column.
type Type struct {
Code TypeCode `json:"code"`
ArrayElementType TypeCode `json:"array_element_type,omitempty"`
}

type TypeCode string

const (
TypeCode_NONE TypeCode = ""
TypeCode_BOOL TypeCode = "BOOL"
TypeCode_INT64 TypeCode = "INT64"
TypeCode_FLOAT64 TypeCode = "FLOAT64"
TypeCode_TIMESTAMP TypeCode = "TIMESTAMP"
TypeCode_DATE TypeCode = "DATE"
TypeCode_STRING TypeCode = "STRING"
TypeCode_BYTES TypeCode = "BYTES"
TypeCode_NUMERIC TypeCode = "NUMERIC"
TypeCode_JSON TypeCode = "JSON"
TypeCode_ARRAY TypeCode = "ARRAY"
)

// Mod contains the keys and the values of the changed records.
type Mod struct {
Keys map[string]interface{} `json:"keys,omitempty"`
NewValues map[string]interface{} `json:"new_values,omitempty"`
OldValues map[string]interface{} `json:"old_values,omitempty"`
}

type ModType string

const (
ModType_INSERT = "INSERT"
ModType_UPDATE = "UPDATE"
ModType_DELETE = "DELETE"
)

type HeartbeatRecord struct {
Timestamp time.Time `spanner:"timestamp" json:"timestamp"`
}

type ChildPartitionsRecord struct {
StartTimestamp time.Time `spanner:"start_timestamp" json:"start_timestamp"`
RecordSequence string `spanner:"record_sequence" json:"record_sequence"`
ChildPartitions []*ChildPartition `spanner:"child_partitions" json:"child_partitions"`
}

type ChildPartition struct {
Token string `spanner:"token" json:"token"`
ParentPartitionTokens []string `spanner:"parent_partition_tokens" json:"parent_partition_tokens"`
}

func (r *dataChangeRecord) decodeToNonSpannerType() *DataChangeRecord {
columnTypes := []*ColumnType{}
for _, t := range r.ColumnTypes {
columnTypes = append(columnTypes, &ColumnType{
Name: t.Name,
Type: decodeColumnTypeJSONToType(t.Type),
IsPrimaryKey: t.IsPrimaryKey,
OrdinalPosition: t.OrdinalPosition,
})
}

mods := make([]*Mod, 0, len(r.Mods))
for _, m := range r.Mods {
mods = append(mods, &Mod{
Keys: decodeNullJSONToMap(m.Keys),
NewValues: decodeNullJSONToMap(m.NewValues),
OldValues: decodeNullJSONToMap(m.OldValues),
})
}

return &DataChangeRecord{
CommitTimestamp: r.CommitTimestamp,
RecordSequence: r.RecordSequence,
ServerTransactionID: r.ServerTransactionID,
IsLastRecordInTransactionInPartition: r.IsLastRecordInTransactionInPartition,
TableName: r.TableName,
ColumnTypes: columnTypes,
Mods: mods,
ModType: ModType(r.ModType),
ValueCaptureType: r.ValueCaptureType,
NumberOfRecordsInTransaction: r.NumberOfRecordsInTransaction,
NumberOfPartitionsInTransaction: r.NumberOfPartitionsInTransaction,
TransactionTag: r.TransactionTag,
IsSystemTransaction: r.IsSystemTransaction,
}
}

func decodeColumnTypeJSONToType(columnType spanner.NullJSON) Type {
m := columnType.Value.(map[string]interface{})
code := TypeCode(m["code"].(string))

if aet, ok := m["array_element_type"].(map[string]interface{}); ok {
arrayElementType := TypeCode(aet["code"].(string))
return Type{
Code: code,
ArrayElementType: arrayElementType,
}
}

return Type{Code: code}
}

func decodeNullJSONToMap(j spanner.NullJSON) map[string]interface{} {
if j.IsNull() {
return nil
}
return j.Value.(map[string]interface{})
}
49 changes: 0 additions & 49 deletions config.go

This file was deleted.

Loading

0 comments on commit e38c7fb

Please sign in to comment.