Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7.13](backport #420) Propagate checkin status to agent record #442

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions cmd/fleet/handleCheckin.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,20 @@ func (ct *CheckinT) _handleCheckin(w http.ResponseWriter, r *http.Request, id st
return err
}

<<<<<<< HEAD
// Subsribe to actions dispatcher
=======
log.Debug().
Str("agentId", id).
Str("reqId", reqId).
Str("status", req.Status).
Str("seqNo", seqno.String()).
RawJSON("meta", rawMeta).
Uint64("bodyCount", readCounter.Count()).
Msg("checkin start long poll")

// Subscribe to actions dispatcher
>>>>>>> 73fcdb4 (Propagate checkin status to agent record)
aSub := ct.ad.Subscribe(agent.Id, seqno)
defer ct.ad.Unsubscribe(aSub)
actCh := aSub.Ch()
Expand All @@ -185,7 +198,11 @@ func (ct *CheckinT) _handleCheckin(w http.ResponseWriter, r *http.Request, id st
defer longPoll.Stop()

// Intial update on checkin, and any user fields that might have changed
<<<<<<< HEAD
ct.bc.CheckIn(agent.Id, fields, seqno)
=======
ct.bc.CheckIn(agent.Id, req.Status, rawMeta, seqno)
>>>>>>> 73fcdb4 (Propagate checkin status to agent record)

// Initial fetch for pending actions
var (
Expand Down Expand Up @@ -222,7 +239,11 @@ func (ct *CheckinT) _handleCheckin(w http.ResponseWriter, r *http.Request, id st
log.Trace().Msg("fire long poll")
break LOOP
case <-tick.C:
<<<<<<< HEAD
ct.bc.CheckIn(agent.Id, nil, seqno)
=======
ct.bc.CheckIn(agent.Id, req.Status, nil, nil)
>>>>>>> 73fcdb4 (Propagate checkin status to agent record)
}
}
}
Expand Down
1 change: 1 addition & 0 deletions cmd/fleet/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ type EnrollResponse struct {
}

type CheckinRequest struct {
Status string `json:"status"`
AckToken string `json:"ack_token,omitempty"`
Events []Event `json:"events"`
LocalMeta json.RawMessage `json:"local_metadata"`
Expand Down
238 changes: 238 additions & 0 deletions internal/pkg/checkin/bulk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package checkin

import (
"context"
"encoding/json"
"sync"
"time"

"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
"github.com/elastic/fleet-server/v7/internal/pkg/dl"
"github.com/elastic/fleet-server/v7/internal/pkg/sqn"

"github.com/rs/zerolog/log"
)

const defaultFlushInterval = 10 * time.Second

type optionsT struct {
flushInterval time.Duration
}

type Opt func(*optionsT)

func WithFlushInterval(d time.Duration) Opt {
return func(opt *optionsT) {
opt.flushInterval = d
}
}

type extraT struct {
meta []byte
seqNo sqn.SeqNo
}

// Minimize the size of this structure.
// There will be 10's of thousands of items
// in the map at any point.
type pendingT struct {
ts string
status string
extra *extraT
}

type Bulk struct {
opts optionsT
bulker bulk.Bulk
mut sync.Mutex
pending map[string]pendingT

ts string
unix int64
}

func NewBulk(bulker bulk.Bulk, opts ...Opt) *Bulk {
parsedOpts := parseOpts(opts...)

return &Bulk{
opts: parsedOpts,
bulker: bulker,
pending: make(map[string]pendingT),
}
}

func parseOpts(opts ...Opt) optionsT {

outOpts := optionsT{
flushInterval: defaultFlushInterval,
}

for _, f := range opts {
f(&outOpts)
}

return outOpts
}

// Generate and cache timestamp on seconds change.
// Avoid thousands of formats of an identical string.
func (bc *Bulk) timestamp() string {

// WARNING: Expects mutex locked.
now := time.Now()
if now.Unix() != bc.unix {
bc.unix = now.Unix()
bc.ts = now.UTC().Format(time.RFC3339)
}

return bc.ts
}

// WARNING: Bulk will take ownership of fields,
// so do not use after passing in.
func (bc *Bulk) CheckIn(id string, status string, meta []byte, seqno sqn.SeqNo) error {

// Separate out the extra data to minimize
// the memory footprint of the 90% case of just
// updating the timestamp.
var extra *extraT
if meta != nil || seqno.IsSet() {
extra = &extraT{
meta: meta,
seqNo: seqno,
}
}

bc.mut.Lock()

bc.pending[id] = pendingT{
ts: bc.timestamp(),
status: status,
extra: extra,
}

bc.mut.Unlock()
return nil
}

func (bc *Bulk) Run(ctx context.Context) error {

tick := time.NewTicker(bc.opts.flushInterval)
defer tick.Stop()

var err error
LOOP:
for {
select {
case <-tick.C:
if err = bc.flush(ctx); err != nil {
log.Error().Err(err).Msg("Eat bulk checkin error; Keep on truckin'")
err = nil
}

case <-ctx.Done():
err = ctx.Err()
break LOOP
}
}

return err
}

func (bc *Bulk) flush(ctx context.Context) error {
start := time.Now()

bc.mut.Lock()
pending := bc.pending
bc.pending = make(map[string]pendingT, len(pending))
bc.mut.Unlock()

if len(pending) == 0 {
return nil
}

updates := make([]bulk.MultiOp, 0, len(pending))

simpleCache := make(map[pendingT][]byte)

nowTimestamp := start.UTC().Format(time.RFC3339)

var err error
var needRefresh bool
for id, pendingData := range pending {

// In the simple case, there are no fields and no seqNo.
// When that is true, we can reuse an already generated
// JSON body containing just the timestamp updates.
var body []byte
if pendingData.extra == nil {

var ok bool
body, ok = simpleCache[pendingData]
if !ok {
fields := bulk.UpdateFields{
dl.FieldLastCheckin: pendingData.ts,
dl.FieldUpdatedAt: nowTimestamp,
dl.FieldLastCheckinStatus: pendingData.status,
}
if body, err = fields.Marshal(); err != nil {
return err
}
simpleCache[pendingData] = body
}
} else {

fields := bulk.UpdateFields{
dl.FieldLastCheckin: pendingData.ts, // Set the checkin timestamp
dl.FieldUpdatedAt: nowTimestamp, // Set "updated_at" to the current timestamp
dl.FieldLastCheckinStatus: pendingData.status, // Set the pending status
}

// Update local metadata if provided
if pendingData.extra.meta != nil {
// Surprise: The json encodeer compacts this raw JSON during
// the encode process, so there my be unexpected memory overhead:
// https://github.com/golang/go/blob/go1.16.3/src/encoding/json/encode.go#L499
fields[dl.FieldLocalMetadata] = json.RawMessage(pendingData.extra.meta)
}

// If seqNo changed, set the field appropriately
if pendingData.extra.seqNo.IsSet() {
fields[dl.FieldActionSeqNo] = pendingData.extra.seqNo

// Only refresh if seqNo changed; dropping metadata not important.
needRefresh = true
}

if body, err = fields.Marshal(); err != nil {
return err
}
}

updates = append(updates, bulk.MultiOp{
Id: id,
Body: body,
Index: dl.FleetAgents,
})
}

var opts []bulk.Opt
if needRefresh {
opts = append(opts, bulk.WithRefresh())
}

_, err = bc.bulker.MUpdate(ctx, updates, opts...)

log.Trace().
Err(err).
Dur("rtt", time.Since(start)).
Int("cnt", len(updates)).
Bool("refresh", needRefresh).
Msg("Flush updates")

return err
}
Loading