Skip to content

Commit

Permalink
feat: support mmap for marisa trie (#29613)
Browse files Browse the repository at this point in the history
this supports mmap for marisa trie index
related #21866

Signed-off-by: yah01 <[email protected]>
  • Loading branch information
yah01 authored Jan 11, 2024
1 parent d642993 commit 031243f
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 15 deletions.
6 changes: 6 additions & 0 deletions internal/core/src/common/File.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include <string>
#include "common/EasyAssert.h"
#include "common/Types.h"
#include "fmt/core.h"
#include <fcntl.h>
#include <unistd.h>
Expand Down Expand Up @@ -51,6 +52,11 @@ class File {
return write(fd_, buf, size);
}

offset_t
Seek(offset_t offset, int whence) {
return lseek(fd_, offset, whence);
}

void
Close() {
close(fd_);
Expand Down
34 changes: 20 additions & 14 deletions internal/core/src/index/StringIndexMarisa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,16 @@
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <cstring>
#include <memory>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <sys/errno.h>
#include <sys/mman.h>
#include <unistd.h>

#include "common/File.h"
#include "common/Types.h"
#include "common/EasyAssert.h"
#include "common/Exception.h"
Expand Down Expand Up @@ -249,28 +254,29 @@ StringIndexMarisa::LoadWithoutAssemble(const BinarySet& set,
const Config& config) {
auto uuid = boost::uuids::random_generator()();
auto uuid_string = boost::uuids::to_string(uuid);
auto file = std::string("/tmp/") + uuid_string;
auto file_name = std::string("/tmp/") + uuid_string;

auto index = set.GetByName(MARISA_TRIE_INDEX);
auto len = index->size;

auto fd = open(
file.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IXUSR);
lseek(fd, 0, SEEK_SET);

auto status = write(fd, index->data.get(), len);
if (status != len) {
close(fd);
remove(file.c_str());
auto file = File::Open(file_name, O_RDWR | O_CREAT | O_EXCL);
auto written = file.Write(index->data.get(), len);
if (written != len) {
file.Close();
remove(file_name.c_str());
throw SegcoreError(
ErrorCode::UnistdError,
"write index to fd error, errorCode is " + std::to_string(status));
fmt::format("write index to fd error: {}", strerror(errno)));
}

lseek(fd, 0, SEEK_SET);
trie_.read(fd);
close(fd);
remove(file.c_str());
file.Seek(0, SEEK_SET);
if (config.contains(kEnableMmap)) {
trie_.mmap(file_name.c_str());
} else {
trie_.read(file.Descriptor());
}
// make sure the file would be removed after we unmap & close it
unlink(file_name.c_str());

auto str_ids = set.GetByName(MARISA_STR_IDS);
auto str_ids_len = str_ids->size;
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_intergration_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ beginTime=`date +%s`

for d in $(go list ./tests/integration/...); do
echo "$d"
go test -race -tags dynamic -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" -timeout=20m
go test -race -tags dynamic -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" -timeout=30m
if [ -f profile.out ]; then
grep -v kafka profile.out | grep -v planparserv2/generated | grep -v mocks | sed '1d' >> ${FILE_COVERAGE_INFO}
rm profile.out
Expand Down

0 comments on commit 031243f

Please sign in to comment.