Skip to content

Commit

Permalink
Improve roaring UDF (#13120)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsjant authored Jan 9, 2025
1 parent 2ec6638 commit 217e49d
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 0 deletions.
144 changes: 144 additions & 0 deletions ydb/library/yql/udfs/common/roaring/roaring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <contrib/libs/croaring/include/roaring/memory.h>
#include <contrib/libs/croaring/include/roaring/roaring.h>

#include <util/generic/array_ref.h>
#include <util/generic/vector.h>
#include <util/string/builder.h>
#include <util/system/yassert.h>
Expand All @@ -30,6 +31,11 @@ namespace {
{
}

TRoaringWrapper(roaring_bitmap_t* bitmap)
: Roaring(bitmap)
{
}

~TRoaringWrapper() {
roaring_bitmap_free(Roaring);
}
Expand Down Expand Up @@ -105,6 +111,47 @@ namespace {
}
};

class TRoaringAndNotWithBinary: public TBoxedValue {
public:
TRoaringAndNotWithBinary() {
}

static TStringRef Name() {
return TStringRef::Of("AndNotWithBinary");
}

private:
TUnboxedValue Run(const IValueBuilder* valueBuilder,
const TUnboxedValuePod* args) const override {
Y_UNUSED(valueBuilder);
auto binaryString = args[1].AsStringRef();
auto bitmap = DeserializePortable(binaryString);

roaring_bitmap_andnot_inplace(GetBitmapFromArg(args[0]), bitmap);
roaring_bitmap_free(bitmap);

return args[0];
}
};

class TRoaringAndNot: public TBoxedValue {
public:
TRoaringAndNot() {
}

static TStringRef Name() {
return TStringRef::Of("AndNot");
}

private:
TUnboxedValue Run(const IValueBuilder* valueBuilder,
const TUnboxedValuePod* args) const override {
Y_UNUSED(valueBuilder);
roaring_bitmap_andnot_inplace(GetBitmapFromArg(args[0]), GetBitmapFromArg(args[1]));
return args[0];
}
};

class TRoaringOr: public TBoxedValue {
public:
TRoaringOr() {
Expand Down Expand Up @@ -223,6 +270,46 @@ namespace {
TSourcePosition Pos_;
};

class TRoaringFromUint32List: public TBoxedValue {
public:
TRoaringFromUint32List(TSourcePosition pos)
: Pos_(pos)
{
}

static TStringRef Name() {
return TStringRef::Of("FromUint32List");
}

private:
TUnboxedValue Run(const IValueBuilder* valueBuilder,
const TUnboxedValuePod* args) const override {
Y_UNUSED(valueBuilder);
auto* bitmap = roaring_bitmap_create();
try {
const auto vector = args[0];
const auto* elements = vector.GetElements();
if (elements) {
for (auto& value : TArrayRef{elements, vector.GetListLength()}) {
roaring_bitmap_add(bitmap, value.Get<ui32>());
}
} else {
TUnboxedValue value;
const auto it = vector.GetListIterator();
while (it.Next(value)) {
roaring_bitmap_add(bitmap, value.Get<ui32>());
}
}

return TUnboxedValuePod(new TRoaringWrapper(bitmap));
} catch (const std::exception& e) {
roaring_bitmap_free(bitmap);
UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
}
}
TSourcePosition Pos_;
};

class TRoaringSerialize: public TBoxedValue {
public:
TRoaringSerialize() {
Expand Down Expand Up @@ -266,6 +353,25 @@ namespace {
}
};

class TRoaringRunOptimize: public TBoxedValue {
public:
TRoaringRunOptimize() {
}

static TStringRef Name() {
return TStringRef::Of("RunOptimize");
}

private:
TUnboxedValue Run(const IValueBuilder* valueBuilder,
const TUnboxedValuePod* args) const override {
Y_UNUSED(valueBuilder);
auto bitmap = GetBitmapFromArg(args[0]);
roaring_bitmap_run_optimize(bitmap);
return args[0];
}
};

class TRoaringModule: public IUdfModule {
public:
TRoaringModule() {
Expand All @@ -282,6 +388,7 @@ namespace {
void GetAllFunctions(IFunctionsSink& sink) const final {
sink.Add(TRoaringSerialize::Name());
sink.Add(TRoaringDeserialize::Name());
sink.Add(TRoaringFromUint32List::Name());

sink.Add(TRoaringCardinality::Name());

Expand All @@ -292,6 +399,11 @@ namespace {

sink.Add(TRoaringAndWithBinary::Name());
sink.Add(TRoaringAnd::Name());

sink.Add(TRoaringAndNotWithBinary::Name());
sink.Add(TRoaringAndNot::Name());

sink.Add(TRoaringRunOptimize::Name());
}

void CleanupOnTerminate() const final {
Expand All @@ -312,6 +424,12 @@ namespace {
if (!typesOnly) {
builder.Implementation(new TRoaringDeserialize(builder.GetSourcePosition()));
}
} else if (TRoaringFromUint32List::Name() == name) {
builder.Returns<TResource<RoaringResourceName>>().Args()->Add<TListType<ui32>>();

if (!typesOnly) {
builder.Implementation(new TRoaringFromUint32List(builder.GetSourcePosition()));
}
} else if (TRoaringSerialize::Name() == name) {
builder.Returns(builder.SimpleType<char*>())
.Args()
Expand Down Expand Up @@ -372,6 +490,32 @@ namespace {
if (!typesOnly) {
builder.Implementation(new TRoaringAnd());
}
} else if (TRoaringAndNotWithBinary::Name() == name) {
builder.Returns<TResource<RoaringResourceName>>()
.Args()
->Add<TAutoMap<TResource<RoaringResourceName>>>()
.Add<TAutoMap<char*>>();

if (!typesOnly) {
builder.Implementation(new TRoaringAndNotWithBinary());
}
} else if (TRoaringAndNot::Name() == name) {
builder.Returns<TResource<RoaringResourceName>>()
.Args()
->Add<TAutoMap<TResource<RoaringResourceName>>>()
.Add<TAutoMap<TResource<RoaringResourceName>>>();

if (!typesOnly) {
builder.Implementation(new TRoaringAndNot());
}
} else if (TRoaringRunOptimize::Name() == name) {
builder.Returns<TResource<RoaringResourceName>>()
.Args()
->Add<TAutoMap<TResource<RoaringResourceName>>>();

if (!typesOnly) {
builder.Implementation(new TRoaringRunOptimize());
}
} else {
TStringBuilder sb;
sb << "Unknown function: " << name.Data();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,10 @@
{
"uri": "file://test.test_union_/results.txt"
}
],
"test.test[run_optimize]": [
{
"uri": "file://test.test_run_optimize_/results.txt"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,109 @@
]
}
]
};
{
"Write" = [
{
"Type" = [
"ListType";
[
"StructType";
[
[
"AndNotList";
[
"OptionalType";
[
"ListType";
[
"DataType";
"Uint32"
]
]
]
]
]
]
];
"Data" = [
[
[
[
"2"
]
]
]
]
}
]
};
{
"Write" = [
{
"Type" = [
"ListType";
[
"StructType";
[
[
"AndNotWithBinaryList";
[
"OptionalType";
[
"ListType";
[
"DataType";
"Uint32"
]
]
]
]
]
]
];
"Data" = [
[
[
[
"3"
]
]
]
]
}
]
};
{
"Write" = [
{
"Type" = [
"ListType";
[
"StructType";
[
[
"AndNotWithBinaryListEmpty";
[
"OptionalType";
[
"ListType";
[
"DataType";
"Uint32"
]
]
]
]
]
]
];
"Data" = [
[
#
]
]
}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[
{
"Write" = [
{
"Type" = [
"ListType";
[
"StructType";
[
[
"RunOptimizeList";
[
"ListType";
[
"DataType";
"Uint32"
]
]
]
]
]
];
"Data" = [
[
[
"10";
"42";
"567"
]
]
]
}
]
}
]
Loading

0 comments on commit 217e49d

Please sign in to comment.