Skip to content

Commit

Permalink
Merge pull request duckdb#9736 from carlopi/format_bytes
Browse files Browse the repository at this point in the history
format_bytes rework, moving from decimal multipliers to binary ones
  • Loading branch information
Mytherin authored Nov 24, 2023
2 parents 0e01450 + baa0a32 commit 18f614f
Show file tree
Hide file tree
Showing 15 changed files with 147 additions and 68 deletions.
44 changes: 21 additions & 23 deletions src/common/string_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,31 +170,29 @@ string StringUtil::Join(const set<string> &input, const string &separator) {
return result;
}

string StringUtil::BytesToHumanReadableString(idx_t bytes) {
string StringUtil::BytesToHumanReadableString(idx_t bytes, idx_t multiplier) {
D_ASSERT(multiplier == 1000 || multiplier == 1024);
string db_size;
auto kilobytes = bytes / 1000;
auto megabytes = kilobytes / 1000;
kilobytes -= megabytes * 1000;
auto gigabytes = megabytes / 1000;
megabytes -= gigabytes * 1000;
auto terabytes = gigabytes / 1000;
gigabytes -= terabytes * 1000;
auto petabytes = terabytes / 1000;
terabytes -= petabytes * 1000;
if (petabytes > 0) {
return to_string(petabytes) + "." + to_string(terabytes / 100) + "PB";
}
if (terabytes > 0) {
return to_string(terabytes) + "." + to_string(gigabytes / 100) + "TB";
} else if (gigabytes > 0) {
return to_string(gigabytes) + "." + to_string(megabytes / 100) + "GB";
} else if (megabytes > 0) {
return to_string(megabytes) + "." + to_string(kilobytes / 100) + "MB";
} else if (kilobytes > 0) {
return to_string(kilobytes) + "KB";
} else {
return to_string(bytes) + (bytes == 1 ? " byte" : " bytes");
idx_t array[6] = {};
const char *unit[2][6] = {{"bytes", "KiB", "MiB", "GiB", "TiB", "PiB"}, {"bytes", "kB", "MB", "GB", "TB", "PB"}};

const int sel = (multiplier == 1000);

array[0] = bytes;
for (idx_t i = 1; i < 6; i++) {
array[i] = array[i - 1] / multiplier;
array[i - 1] %= multiplier;
}

for (idx_t i = 5; i >= 1; i--) {
if (array[i]) {
// Map 0 -> 0 and (multiplier-1) -> 9
idx_t fractional_part = (array[i - 1] * 10) / multiplier;
return to_string(array[i]) + "." + to_string(fractional_part) + " " + unit[sel][i];
}
}

return to_string(array[0]) + (bytes == 1 ? " byte" : " bytes");
}

string StringUtil::Upper(const string &str) {
Expand Down
3 changes: 2 additions & 1 deletion src/core_functions/function_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ static StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION(ListFlattenFun),
DUCKDB_SCALAR_FUNCTION_SET(FloorFun),
DUCKDB_SCALAR_FUNCTION(FormatFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FormatreadabledecimalsizeFun),
DUCKDB_SCALAR_FUNCTION(FormatreadabledecimalsizeFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FormatreadablesizeFun),
DUCKDB_SCALAR_FUNCTION(FormatBytesFun),
DUCKDB_SCALAR_FUNCTION(FromBase64Fun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FromBinaryFun),
Expand Down
9 changes: 7 additions & 2 deletions src/core_functions/scalar/string/format_bytes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace duckdb {

template <int64_t MULTIPLIER>
static void FormatBytesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
UnaryExecutor::Execute<int64_t, string_t>(args.data[0], result, args.size(), [&](int64_t bytes) {
bool is_negative = bytes < 0;
Expand All @@ -18,12 +19,16 @@ static void FormatBytesFunction(DataChunk &args, ExpressionState &state, Vector
unsigned_bytes = idx_t(bytes);
}
return StringVector::AddString(result, (is_negative ? "-" : "") +
StringUtil::BytesToHumanReadableString(unsigned_bytes));
StringUtil::BytesToHumanReadableString(unsigned_bytes, MULTIPLIER));
});
}

ScalarFunction FormatBytesFun::GetFunction() {
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction);
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction<1024>);
}

ScalarFunction FormatreadabledecimalsizeFun::GetFunction() {
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction<1000>);
}

} // namespace duckdb
11 changes: 9 additions & 2 deletions src/core_functions/scalar/string/functions.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@
{
"name": "format_bytes",
"parameters": "bytes",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 16KB)",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 15.6 KiB)",
"example": "format_bytes(1000 * 16)",
"type": "scalar_function",
"aliases": ["formatReadableDecimalSize"]
"aliases": ["formatReadableSize"]
},
{
"name": "formatReadableDecimalSize",
"parameters": "bytes",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 16.0 KB)",
"example": "format_bytes(1000 * 16)",
"type": "scalar_function"
},
{
"name": "hamming",
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/common/string_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class StringUtil {
}

//! Return a string that formats the give number of bytes
DUCKDB_API static string BytesToHumanReadableString(idx_t bytes);
DUCKDB_API static string BytesToHumanReadableString(idx_t bytes, idx_t multiplier = 1024);

//! Convert a string to uppercase
DUCKDB_API static string Upper(const string &str);
Expand Down
13 changes: 11 additions & 2 deletions src/include/duckdb/core_functions/scalar/string_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,25 @@ struct FormatFun {
struct FormatBytesFun {
static constexpr const char *Name = "format_bytes";
static constexpr const char *Parameters = "bytes";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 16KB)";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 15.6 KiB)";
static constexpr const char *Example = "format_bytes(1000 * 16)";

static ScalarFunction GetFunction();
};

struct FormatreadabledecimalsizeFun {
struct FormatreadablesizeFun {
using ALIAS = FormatBytesFun;

static constexpr const char *Name = "formatReadableSize";
};

struct FormatreadabledecimalsizeFun {
static constexpr const char *Name = "formatReadableDecimalSize";
static constexpr const char *Parameters = "bytes";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 16.0 KB)";
static constexpr const char *Example = "format_bytes(1000 * 16)";

static ScalarFunction GetFunction();
};

struct HammingFun {
Expand Down
11 changes: 10 additions & 1 deletion src/main/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,17 @@ idx_t DBConfig::ParseMemoryLimit(const string &arg) {
multiplier = 1000LL * 1000LL * 1000LL;
} else if (unit == "terabyte" || unit == "terabytes" || unit == "tb" || unit == "t") {
multiplier = 1000LL * 1000LL * 1000LL * 1000LL;
} else if (unit == "kib") {
multiplier = 1024LL;
} else if (unit == "mib") {
multiplier = 1024LL * 1024LL;
} else if (unit == "gib") {
multiplier = 1024LL * 1024LL * 1024LL;
} else if (unit == "tib") {
multiplier = 1024LL * 1024LL * 1024LL * 1024LL;
} else {
throw ParserException("Unknown unit for memory_limit: %s (expected: b, mb, gb or tb)", unit);
throw ParserException("Unknown unit for memory_limit: %s (expected: KB, MB, GB, TB for 1000^i units or KiB, "
"MiB, GiB, TiB for 1024^i unites)");
}
return (idx_t)multiplier * limit;
}
Expand Down
10 changes: 5 additions & 5 deletions test/api/test_reset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void RequireValueEqual(ConfigurationOption *op, const Value &left, const Value &
OptionValueSet &GetValueForOption(const string &name) {
static unordered_map<string, OptionValueSet> value_map = {
{"threads", {Value::BIGINT(42), Value::BIGINT(42)}},
{"checkpoint_threshold", {"4.2GB"}},
{"checkpoint_threshold", {"4.0 GiB"}},
{"debug_checkpoint_abort", {{"none", "before_truncate", "before_header", "after_free_list_write"}}},
{"default_collation", {"nocase"}},
{"default_order", {"desc"}},
Expand Down Expand Up @@ -82,8 +82,8 @@ OptionValueSet &GetValueForOption(const string &name) {
{"extension_directory", {"test"}},
{"immediate_transaction_mode", {true}},
{"max_expression_depth", {50}},
{"max_memory", {"4.2GB"}},
{"memory_limit", {"4.2GB"}},
{"max_memory", {"4.0 GiB"}},
{"memory_limit", {"4.0 GiB"}},
{"ordered_aggregate_threshold", {Value::UBIGINT(idx_t(1) << 12)}},
{"null_order", {"nulls_first"}},
{"perfect_ht_threshold", {0}},
Expand All @@ -96,11 +96,11 @@ OptionValueSet &GetValueForOption(const string &name) {
{"enable_progress_bar_print", {false}},
{"progress_bar_time", {0}},
{"temp_directory", {"tmp"}},
{"wal_autocheckpoint", {"4.2GB"}},
{"wal_autocheckpoint", {"4.0 GiB"}},
{"worker_threads", {42}},
{"enable_http_metadata_cache", {true}},
{"force_bitpacking_mode", {"constant"}},
{"allocator_flush_threshold", {"4.2GB"}},
{"allocator_flush_threshold", {"4.0 GiB"}},
{"arrow_large_buffer_size", {true}}};
// Every option that's not excluded has to be part of this map
if (!value_map.count(name)) {
Expand Down
86 changes: 58 additions & 28 deletions test/sql/function/string/format_bytes.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,74 +11,74 @@ SELECT format_bytes(0);
0 bytes

query I
SELECT format_bytes(999);
SELECT format_bytes(1);
----
999 bytes
1 byte

query I
SELECT format_bytes(1000);
SELECT format_bytes(1023);
----
1KB
1023 bytes

query I
SELECT pg_size_pretty(1000);
SELECT format_bytes(1024);
----
1KB
1.0 KiB

query I
SELECT formatReadableDecimalSize(1000);
SELECT pg_size_pretty(1024);
----
1KB
1.0 KiB

query I
SELECT format_bytes(1000*1000-1);
SELECT format_bytes(1024*1024-1);
----
999KB
1023.9 KiB

query I
SELECT format_bytes(1000*1000);
SELECT format_bytes(1024*1024);
----
1.0MB
1.0 MiB

query I
SELECT format_bytes(1000*1000 + 555555);
SELECT format_bytes(1024*1024 + 555555);
----
1.5MB
1.5 MiB

query I
SELECT format_bytes(1000*1000*1000-1);
SELECT format_bytes(1024*1024*1024-1);
----
999.9MB
1023.9 MiB

query I
SELECT format_bytes(1000*1000*1000);
SELECT format_bytes(1e9::BIGINT);
----
1.0GB
953.6 MiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000-1);
SELECT format_bytes(pow(1024,3)::BIGINT);
----
999.9GB
1.0 GiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000);
SELECT format_bytes(pow(1024.0,4)::BIGINT);
----
1.0TB
1.0 TiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000*1000-1);
SELECT format_bytes((pow(1024.0,4) - 1)::BIGINT);
----
999.9TB
1023.9 GiB

query I
SELECT format_bytes(1000::BIGINT*1000*1000*1000*1000);
SELECT format_bytes(1e15::BIGINT);
----
1.0PB
909.4 TiB

query I
SELECT format_bytes(9223372036854775807);
----
9223.3PB
8191.9 PiB

query I
SELECT format_bytes(NULL);
Expand All @@ -98,4 +98,34 @@ SELECT format_bytes(-1);
query I
SELECT format_bytes(-9223372036854775808);
----
-9223.3PB
-8192.0 PiB

query I
SELECT formatReadableDecimalSize(500);
----
500 bytes

query I
SELECT formatReadableSize(500);
----
500 bytes

query I
SELECT formatReadableDecimalSize(500*1000);
----
500.0 kB

query I
SELECT formatReadableSize(500*1000);
----
488.2 KiB

query I
SELECT formatReadableDecimalSize(500*1000*1000);
----
500.0 MB

query I
SELECT formatReadableSize(500*1000*1000);
----
476.8 MiB
4 changes: 4 additions & 0 deletions test/sql/index/art/memory/test_art_linear.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::INT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::INT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::INT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;

Expand Down
6 changes: 5 additions & 1 deletion test/sql/index/art/memory/test_art_non_linear.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::INT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::INT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::INT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;

Expand Down Expand Up @@ -114,4 +118,4 @@ SELECT mem_to_bytes(memory_usage) < 4000000 FROM pragma_database_size();
true

statement ok
DROP TABLE art;
DROP TABLE art;
6 changes: 5 additions & 1 deletion test/sql/index/art/memory/test_art_varchar.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::BIGINT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::BIGINT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::BIGINT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0::BIGINT
ELSE x::BIGINT END;

Expand Down Expand Up @@ -73,4 +77,4 @@ SELECT mem_to_bytes(current.memory_usage) > base.usage AND
mem_to_bytes(current.memory_usage) < 4 * base.usage
FROM base, pragma_database_size() current;
----
1
1
4 changes: 4 additions & 0 deletions test/sql/index/art/vacuum/test_art_vacuum_integers.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'MB') THEN REPLACE(x, 'MB', '')::INT * 1000 * 1000
WHEN CONTAINS(x, 'GB') THEN REPLACE(x, 'GB', '')::INT * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'TB') THEN REPLACE(x, 'TB', '')::INT * 1000 * 1000 * 1000 * 1000
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;

Expand Down
Loading

0 comments on commit 18f614f

Please sign in to comment.