Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support inet_aton function #51883

Merged
merged 6 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions be/src/common/format_ip.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <cstring>

constexpr size_t IPV4_BINARY_LENGTH = 4;
constexpr int IPV4_MAX_OCTET_VALUE = 255; //max value of octet
constexpr size_t IPV4_OCTET_BITS = 8;
constexpr size_t DECIMAL_BASE = 10;

namespace starrocks {

inline bool is_numeric_ascii(char c) {
// This is faster than
// return UInt8(UInt8(c) - UInt8('0')) < UInt8(10);
// on Intel CPUs when compiled by gcc 8.
return (c >= '0' && c <= '9');
}

inline bool parse_ipv4(const char* pos, size_t str_len, int64_t& dst, int64_t first_octet = -1) {
if (pos == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
return false;
}

int64_t result = 0;
int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;

if (first_octet >= 0) {
result |= first_octet << offset;
offset -= IPV4_OCTET_BITS;
}

if (str_len < 7 || str_len > 15) {
return false;
}

size_t i = 0;
for (; i < str_len; offset -= IPV4_OCTET_BITS, ++pos, ++i) {
int64_t value = 0;
size_t len = 0;

while (i < str_len && is_numeric_ascii(*pos) && len <= 3) {
value = value * DECIMAL_BASE + (*pos - '0');
++len;
++pos;
++i;
}

if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && *pos != '.')) {
return false;
}

result |= value << offset;

if (offset == 0) {
break;
}
}

while (i < str_len) {
if (*pos != '\0') {
return false;
}
++pos;
++i;
}

dst = result;
return true;
}

} // namespace starrocks
1 change: 1 addition & 0 deletions be/src/exprs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ set(EXPR_FILES
hyperloglog_functions.cpp
in_const_predicate.cpp
info_func.cpp
inet_aton.cpp
in_predicate.cpp
is_null_predicate.cpp
json_functions.cpp
Expand Down
48 changes: 48 additions & 0 deletions be/src/exprs/inet_aton.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "common/format_ip.h"
#include "exprs/string_functions.h"

namespace starrocks {

static inline bool try_parse_ipv4(const char* pos, size_t str_len, int64& result_value) {
return parse_ipv4(pos, str_len, result_value);
}

StatusOr<ColumnPtr> StringFunctions::inet_aton(FunctionContext* context, const Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);

auto str_viewer = ColumnViewer<TYPE_VARCHAR>(columns[0]);
auto size = columns[0]->size();

ColumnBuilder<TYPE_BIGINT> result(size);
for (int row = 0; row < size; row++) {
if (str_viewer.is_null(row)) {
result.append_null();
continue;
}

auto str_value = str_viewer.value(row);
int64_t parsed_result;
if (try_parse_ipv4(str_value.get_data(), str_value.get_size(), parsed_result)) {
result.append(parsed_result);
} else {
result.append_null();
}
}
return result.build(ColumnHelper::is_all_const(columns));
}

} // namespace starrocks
7 changes: 7 additions & 0 deletions be/src/exprs/string_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,13 @@ class StringFunctions {
*/
DEFINE_VECTORIZED_FN(get_char);

/**
* @param: [string_value]
* @paramType: [BinaryColumn]
* @return: BigIntColumn
*/
DEFINE_VECTORIZED_FN(inet_aton);

/**
* Return the index of the first occurrence of substring
*
Expand Down
39 changes: 39 additions & 0 deletions be/test/exprs/string_fn_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1533,6 +1533,45 @@ PARALLEL_TEST(VecStringFunctionsTest, charTest) {
ASSERT_EQ("~", v->get_data()[5].to_string());
}

PARALLEL_TEST(VecStringFunctionsTest, inetAtonInvalidIPv4Test) {
std::unique_ptr<FunctionContext> ctx(FunctionContext::create_test_context());

Columns columns;
auto input_column = BinaryColumn::create();
input_column->append("999.999.999.999");
input_column->append("abc.def.ghi.jkl");
input_column->append("192.168.1.1.1");
input_column->append("192.168.1");
input_column->append("");
columns.emplace_back(input_column);

auto result = StringFunctions::inet_aton(ctx.get(), columns).value();

ASSERT_TRUE(result->is_null(0));
ASSERT_TRUE(result->is_null(1));
ASSERT_TRUE(result->is_null(2));
ASSERT_TRUE(result->is_null(3));
ASSERT_TRUE(result->is_null(4));
}

PARALLEL_TEST(VecStringFunctionsTest, inetAtonValidIPv4Test) {
std::unique_ptr<FunctionContext> ctx(FunctionContext::create_test_context());

Columns columns;
auto input_column = BinaryColumn::create();
input_column->append("192.168.1.1");
input_column->append("0.0.0.0");
input_column->append("255.255.255.255");
columns.emplace_back(input_column);

auto result = StringFunctions::inet_aton(ctx.get(), columns).value();

auto res_column = ColumnHelper::cast_to<TYPE_BIGINT>(result);
ASSERT_EQ(3232235777, res_column->get_data()[0]);
ASSERT_EQ(0, res_column->get_data()[1]);
ASSERT_EQ(4294967295, res_column->get_data()[2]);
}

PARALLEL_TEST(VecStringFunctionsTest, instrTest) {
std::unique_ptr<FunctionContext> ctx(FunctionContext::create_test_context());
Columns columns;
Expand Down
39 changes: 39 additions & 0 deletions docs/en/sql-reference/sql-functions/string-functions/inet_aton.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
---
displayed_sidebar: docs
---

# inet_aton



Takes a string containing an IPv4 address in the format A.B.C.D. Returns a BIGINT number representing the corresponding IPv4 address in big endian.

## Syntax

```Haskell
BIGINT inet_aton(VARCHAR ipv4)
```

## Examples

```Plain Text
mysql> select inet_aton('192.168.1.1');
+--------------------------------------+
| inet_aton('192.168.1.1') |
+--------------------------------------+
| 3232235777 |
+--------------------------------------+

mysql> select stringIp, inet_aton(stringIp) from ipv4;
+-----------------+----------------------------+
|stringIp | inet_aton(stringIp) |
+-----------------+----------------------------+
| 0.0.0.0 | 0 |
| 255.255.255.255 | 4294967295 |
| invalid | NULL |
+-----------------+----------------------------+
```

## keyword

INET_ATON
44 changes: 44 additions & 0 deletions docs/zh/sql-reference/sql-functions/string-functions/inet_aton.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
displayed_sidebar: docs
---

# inet_aton




获取包含 IPv4 地址的字符串,格式为 A.B.C.D。返回一个 BIGINT 数字,表示相应的大端 IPv4 地址。

## 语法

```Haskell
inet_aton(VARCHAR ipv4)
```

## 参数说明

`str`: 支持的数据类型为 VARCHAR。

## 返回值说明

返回值的数据类型为 BIGINT。

## 示例

```Plain Text
mysql> select inet_aton('192.168.1.1');
+--------------------------------------+
| inet_aton('192.168.1.1') |
+--------------------------------------+
| 3232235777 |
+--------------------------------------+

mysql> select stringIp, inet_aton(stringIp) from ipv4;
+-----------------+----------------------------+
|stringIp | inet_aton(stringIp) |
+-----------------+----------------------------+
| 0.0.0.0 | 0 |
| 255.255.255.255 | 4294967295 |
| invalid | NULL |
+-----------------+----------------------------+
```
1 change: 1 addition & 0 deletions gensrc/script/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@
[30120, 'length', True, False, 'INT', ['VARCHAR'], 'StringFunctions::length'],
[30130, 'char_length', True, False, 'INT', ['VARCHAR'], 'StringFunctions::utf8_length'],
[30131, 'character_length', True, False, 'INT', ['VARCHAR'], 'StringFunctions::utf8_length'],
[30132, 'inet_aton', True, False, 'BIGINT', ['VARCHAR'], 'StringFunctions::inet_aton'],

[30140, 'lower', True, False, 'VARCHAR', ['VARCHAR'], 'StringFunctions::lower'],
[30141, 'lcase', True, False, 'VARCHAR', ['VARCHAR'], 'StringFunctions::lower'],
Expand Down
Loading