Skip to content

Commit

Permalink
detect DateTime format till success
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzhichang committed May 19, 2021
1 parent 6f7124f commit 2549fe7
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 10 deletions.
2 changes: 1 addition & 1 deletion docs/dev/introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Refers to [design](./design.md) for how it works.
- [x] UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64
- [x] Float32, Float64
- [x] String, FixedString, LowCardinality(String)
- [x] Date, DateTime, DateTime64. Automatically detect [these date formats](https://github.com/housepower/clickhouse_sinker/blob/master/parser/parser.go).
- [x] Date, DateTime, DateTime64. Assuming that all values of a field of kafka message has the same layout, and layouts of each field are unrelated. Automatically detect the layout from [these date layouts](https://github.com/housepower/clickhouse_sinker/blob/master/parser/parser.go) till the first successful detection and reuse that layout forever.
- [x] Array(T), where T is one of above basic types
- [x] Nullable(T), where T is one of above basic types
- [x] [ElasticDateTime](https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html) => Int64 (2019-12-16T12:10:30Z => 1576498230)
Expand Down
3 changes: 0 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,6 @@ github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/segmentio/kafka-go v0.4.8 h1:LO36H2tb7RcCRjsYzT/qf7xE+vRBXgddZDD82e1eiWY=
github.com/segmentio/kafka-go v0.4.8/go.mod h1:Inh7PqOsxmfgasV8InZYKVXWsdjcCq2d9tFV75GLbuM=
github.com/segmentio/kafka-go v0.4.16 h1:9dt78ehM9qzAkekA60D6A96RlqDzC3hnYYa8y5Szd+U=
github.com/segmentio/kafka-go v0.4.16/go.mod h1:19+Eg7KwrNKy/PFhiIthEPkO8k+ac7/ZYXwYM9Df10w=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
Expand All @@ -349,7 +347,6 @@ github.com/streadway/amqp v0.0.0-20200108173154-1c71cc93ed71 h1:2MR0pKUzlP3SGgj5
github.com/streadway/amqp v0.0.0-20200108173154-1c71cc93ed71/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
Expand Down
4 changes: 2 additions & 2 deletions parser/fastjson.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (c *FastjsonMetric) GetDateTime(key string, nullable bool) (val interface{}
val = UnixFloat(f)
case fastjson.TypeString:
var b []byte
if b, err = v.StringBytes(); err != nil {
if b, err = v.StringBytes(); err != nil || len(b) == 0 {
val = Epoch
return
}
Expand Down Expand Up @@ -189,7 +189,7 @@ func (c *FastjsonMetric) GetArray(key string, typ int) (val interface{}) {
t = UnixFloat(f)
}
case fastjson.TypeString:
if b, err := e.StringBytes(); err != nil {
if b, err := e.StringBytes(); err != nil || len(b) == 0 {
t = Epoch
} else {
t = c.pp.ParseDateTime(key, string(b))
Expand Down
11 changes: 7 additions & 4 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,27 +130,30 @@ func (pp *Pool) Put(p Parser) {
pp.pool.Put(p)
}

// Detect date format for each key at the first message.
// Assuming that all values of a field of kafka message has the same layout, and layouts of each field are unrelated.
// Automatically detect the layout from till the first successful detection and reuse that layout forever.
// Return time in UTC.
// Return Epoch if parsing fail.
func (pp *Pool) ParseDateTime(key string, val string) (t time.Time) {
var err error
var layout string
var lay interface{}
var ok bool
if val == "" {
t = Epoch
return
}
if lay, ok = pp.knownLayouts.Load(key); !ok {
t, layout = parseInLocation(val, pp.timeZone)
if layout != "" {
pp.knownLayouts.Store(key, layout)
return
}
pp.knownLayouts.Store(key, nil)
}
if lay == nil {
if layout, ok = lay.(string); !ok {
t = Epoch
return
}
layout, _ = lay.(string)
if t, err = time.ParseInLocation(layout, val, pp.timeZone); err != nil {
t = Epoch
return
Expand Down
3 changes: 3 additions & 0 deletions task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ func (service *Service) put(msg model.InputMessage) {
if taskCfg.DynamicSchema.Enable {
foundNewKeys = metric.GetNewKeys(&service.knownKeys, &service.newKeys)
}
// Dumping message and result
//util.Logger.Debug("parsed kafka message", zap.Int("partition", msg.Partition), zap.Int64("offset", msg.Offset),
// zap.String("message value", string(msg.Value)), zap.String("row(spew)", spew.Sdump(row)))
}
// WARNNING: metric.GetXXX may depend on p. Don't call them after p been freed.
service.pp.Put(p)
Expand Down

0 comments on commit 2549fe7

Please sign in to comment.