-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix partition key computation for aggregation #158
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ import ( | |
"crypto/md5" | ||
"fmt" | ||
|
||
"github.com/aws/amazon-kinesis-streams-for-fluent-bit/util" | ||
"github.com/aws/aws-sdk-go/aws" | ||
"github.com/aws/aws-sdk-go/service/kinesis" | ||
"github.com/sirupsen/logrus" | ||
|
@@ -30,52 +31,80 @@ type Aggregator struct { | |
records []*Record | ||
aggSize int // Size of both records, and partitionKeys in bytes | ||
maxAggRecordSize int | ||
stringGen *util.RandomStringGenerator | ||
} | ||
|
||
// NewAggregator create a new aggregator | ||
func NewAggregator() *Aggregator { | ||
func NewAggregator(stringGen *util.RandomStringGenerator) *Aggregator { | ||
|
||
return &Aggregator{ | ||
partitionKeys: make(map[string]uint64, 0), | ||
records: make([]*Record, 0), | ||
maxAggRecordSize: defaultMaxAggRecordSize, | ||
aggSize: initialAggRecordSize, | ||
stringGen: stringGen, | ||
} | ||
} | ||
|
||
// AddRecord to the aggregate buffer. | ||
// Will return a kinesis PutRecordsRequest once buffer is full, or if the data exceeds the aggregate limit. | ||
func (a *Aggregator) AddRecord(partitionKey string, data []byte) (entry *kinesis.PutRecordsRequestEntry, err error) { | ||
func (a *Aggregator) AddRecord(partitionKey string, hasPartitionKey bool, data []byte) (entry *kinesis.PutRecordsRequestEntry, err error) { | ||
|
||
partitionKeySize := len([]byte(partitionKey)) | ||
if partitionKeySize < 1 { | ||
return nil, fmt.Errorf("Invalid partition key provided") | ||
if hasPartitionKey { | ||
partitionKeySize := len([]byte(partitionKey)) | ||
if partitionKeySize < 1 { | ||
return nil, fmt.Errorf("Invalid partition key provided") | ||
} | ||
} | ||
|
||
dataSize := len(data) | ||
|
||
// If this is a very large record, then don't aggregate it. | ||
if dataSize >= a.maxAggRecordSize { | ||
if !hasPartitionKey { | ||
partitionKey = a.stringGen.RandomString() | ||
} | ||
return &kinesis.PutRecordsRequestEntry{ | ||
Data: data, | ||
PartitionKey: aws.String(partitionKey), | ||
}, nil | ||
} | ||
|
||
if !hasPartitionKey { | ||
if len(a.partitionKeys) > 0 { | ||
// Take any partition key from the map, as long as one exists | ||
for k, _ := range a.partitionKeys { | ||
partitionKey = k | ||
break | ||
} | ||
} else { | ||
partitionKey = a.stringGen.RandomString() | ||
} | ||
} | ||
|
||
// Check if we need to add a new partition key, and if we do how much space it will take | ||
pKeyIdx, pKeyAddedSize := a.checkPartitionKey(partitionKey) | ||
|
||
// data field size is proto size of data + data field number size | ||
// partition key field size is varint of index size + field number size | ||
recordSize := protowire.SizeBytes(dataSize) + fieldNumberSize + protowire.SizeVarint(pKeyIdx) + fieldNumberSize | ||
// Total size is proto size of data + field number of parent proto | ||
addedSize := protowire.SizeBytes(recordSize) + fieldNumberSize | ||
dataFieldSize := protowire.SizeBytes(dataSize) + fieldNumberSize | ||
pkeyFieldSize := protowire.SizeVarint(pKeyIdx) + fieldNumberSize | ||
// Total size is byte size of data + pkey field + field number of parent proto | ||
|
||
if a.getSize()+addedSize+pKeyAddedSize >= maximumRecordSize { | ||
// Aggregate records, and return | ||
if a.getSize()+protowire.SizeBytes(dataFieldSize+pkeyFieldSize)+fieldNumberSize+pKeyAddedSize >= maximumRecordSize { | ||
// Aggregate records, and return if error | ||
entry, err = a.AggregateRecords() | ||
if err != nil { | ||
return entry, err | ||
} | ||
|
||
if !hasPartitionKey { | ||
// choose a new partition key if needed now that we've aggregated the previous records | ||
partitionKey = a.stringGen.RandomString() | ||
} | ||
hossain-rayhan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Recompute field size, since it changed | ||
pKeyIdx, _ = a.checkPartitionKey(partitionKey) | ||
pkeyFieldSize = protowire.SizeVarint(pKeyIdx) + fieldNumberSize | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also fixes a sizing bug, when we switch records we may be off on the size slightly (it may shrink if the previous |
||
} | ||
|
||
// Add new record, and update aggSize | ||
|
@@ -86,7 +115,7 @@ func (a *Aggregator) AddRecord(partitionKey string, data []byte) (entry *kinesis | |
PartitionKeyIndex: &partitionKeyIndex, | ||
}) | ||
|
||
a.aggSize += addedSize | ||
a.aggSize += protowire.SizeBytes(dataFieldSize+pkeyFieldSize) + fieldNumberSize | ||
|
||
return entry, err | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm open to suggestions for how to do this better, not sure how to get keys out of a map properly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could I ask a question about what "first" means here? Does it mean the first element/key was put in the map? Then I am not sure if you could get it from the map through iteration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I generally just want any key that is in the map, basically we choose a "random" key for the record but AFAICT this key does not matter, since the real partition key is just the first key, so this is just a way to save space. I'll update the comment.