Skip to content

Commit

Permalink
fix: fix string field has invalid utf-8 (#37104)
Browse files Browse the repository at this point in the history
issue: #37083
We use vector of string_view to save data temporally but real string
data will be released after record batch is deconstructed.
Change it to vector of string to avoid memory corruption.

---------

Signed-off-by: sunby <[email protected]>
  • Loading branch information
sunby authored Oct 25, 2024
1 parent 0dbf948 commit bf956a3
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions internal/core/src/common/ChunkWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "common/ChunkWriter.h"
#include <cstdint>
#include <memory>
#include <string_view>
#include <vector>
#include "arrow/array/array_binary.h"
#include "arrow/array/array_primitive.h"
Expand All @@ -29,14 +28,14 @@ namespace milvus {
void
StringChunkWriter::write(std::shared_ptr<arrow::RecordBatchReader> data) {
auto size = 0;
std::vector<std::string_view> strs;
std::vector<std::string> strs;
std::vector<std::pair<const uint8_t*, int64_t>> null_bitmaps;
for (auto batch : *data) {
auto data = batch.ValueOrDie()->column(0);
auto array = std::dynamic_pointer_cast<arrow::StringArray>(data);
for (int i = 0; i < array->length(); i++) {
auto str = array->GetView(i);
strs.push_back(str);
strs.emplace_back(str);
size += str.size();
}
auto null_bitmap_n = (data->length() + 7) / 8;
Expand Down Expand Up @@ -250,14 +249,14 @@ void
SparseFloatVectorChunkWriter::write(
std::shared_ptr<arrow::RecordBatchReader> data) {
auto size = 0;
std::vector<std::string_view> strs;
std::vector<std::string> strs;
std::vector<std::pair<const uint8_t*, int64_t>> null_bitmaps;
for (auto batch : *data) {
auto data = batch.ValueOrDie()->column(0);
auto array = std::dynamic_pointer_cast<arrow::BinaryArray>(data);
for (int i = 0; i < array->length(); i++) {
auto str = array->GetView(i);
strs.push_back(str);
strs.emplace_back(str);
size += str.size();
}
auto null_bitmap_n = (data->length() + 7) / 8;
Expand Down

0 comments on commit bf956a3

Please sign in to comment.