Skip to content

Commit

Permalink
Merge branch 'main' into string_to_int
Browse files Browse the repository at this point in the history
  • Loading branch information
nickgerrets committed Nov 24, 2023
2 parents 0bd96ac + 18f614f commit f80bf4a
Show file tree
Hide file tree
Showing 22 changed files with 423 additions and 196 deletions.
18 changes: 17 additions & 1 deletion .github/config/distribution_matrix.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,21 @@
"vcpkg_triplet": "x64-windows-static-md"
}
]
},
"wasm": {
"include": [
{
"duckdb_arch": "wasm_mvp",
"vcpkg_triplet": "wasm32-emscripten"
},
{
"duckdb_arch": "wasm_eh",
"vcpkg_triplet": "wasm32-emscripten"
},
{
"duckdb_arch": "wasm_threads",
"vcpkg_triplet": "wasm32-emscripten"
}
]
}
}
}
65 changes: 61 additions & 4 deletions .github/workflows/_extension_distribution.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ jobs:
linux_matrix: ${{ steps.set-matrix-linux.outputs.linux_matrix }}
windows_matrix: ${{ steps.set-matrix-windows.outputs.windows_matrix }}
osx_matrix: ${{ steps.set-matrix-osx.outputs.osx_matrix }}
wasm_matrix: ${{ steps.set-matrix-wasm.outputs.wasm_matrix }}
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -67,9 +68,12 @@ jobs:
- id: parse-matrices
run: |
python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os linux --output linux_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os osx --output osx_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os windows --output windows_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os linux --output linux_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os osx --output osx_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os windows --output windows_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
# In v0.9.2 wasm was not part of the avaliable triplets, backporting it here
grep wasm ./duckdb/.github/config/distribution_matrix.json || echo '{"wasm":{"include":[{"duckdb_arch":"wasm_mvp","vcpkg_triplet":"wasm32-emscripten"},{"duckdb_arch":"wasm_eh","vcpkg_triplet":"wasm32-emscripten"},{"duckdb_arch":"wasm_threads","vcpkg_triplet":"wasm32-emscripten"}]}}' > ./duckdb/.github/config/distribution_matrix.json
python3 ${{ inputs.matrix_parse_script }} --input ./duckdb/.github/config/distribution_matrix.json --select_os wasm --output wasm_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty
- id: set-matrix-linux
run: |
Expand All @@ -89,6 +93,12 @@ jobs:
echo windows_matrix=$windows_matrix >> $GITHUB_OUTPUT
echo `cat $GITHUB_OUTPUT`
- id: set-matrix-wasm
run: |
wasm_matrix="`cat wasm_matrix.json`"
echo wasm_matrix=$wasm_matrix >> $GITHUB_OUTPUT
echo `cat $GITHUB_OUTPUT`
linux:
name: Linux
runs-on: ubuntu-latest
Expand Down Expand Up @@ -290,4 +300,51 @@ jobs:
with:
name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}
path: |
build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension
build/release/extension/${{ inputs.extension_name }}/${{ inputs.extension_name }}.duckdb_extension
wasm:
name: DuckDB-Wasm
runs-on: ubuntu-latest
needs: generate_matrix
if: ${{ needs.generate_matrix.outputs.wasm_matrix != '{}' && needs.generate_matrix.outputs.wasm_matrix != '' }}
strategy:
matrix: ${{fromJson(needs.generate_matrix.outputs.wasm_matrix)}}
env:
VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
GEN: Ninja

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: 'true'

- name: Checkout DuckDB to version
run: |
cd duckdb
git checkout ${{ inputs.duckdb_version }}
- uses: mymindstorm/setup-emsdk@v13
with:
version: 'latest'

- name: Setup vcpkg
uses: lukka/[email protected]
with:
vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}

- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
with:
key: ${{ github.job }}-${{ matrix.duckdb_arch }}

- name: Build Wasm module
run: |
make ${{ matrix.duckdb_arch }}
- uses: actions/upload-artifact@v3
with:
name: ${{ inputs.extension_name }}-${{ inputs.duckdb_version }}-extension-${{matrix.duckdb_arch}}${{inputs.artifact_postfix}}
path: |
build/${{ matrix.duckdb_arch }}/${{ inputs.extension_name }}.duckdb_extension.wasm
7 changes: 7 additions & 0 deletions scripts/modify_distribution_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import json
import sys
import logging

# Define command-line arguments
parser = argparse.ArgumentParser(description="Filter a JSON file based on excluded duckdb_arch values and select an OS")
Expand Down Expand Up @@ -41,10 +42,16 @@ def filter_entries(data, arch_values):

# Select an OS if specified
if select_os:
found = False
for os in filtered_data.keys():
if os == select_os:
filtered_data = filtered_data[os]
found = True
break
if found == False:
logging.warning('A selection OS was provided but not found')
filtered_data = []

# When deploy_matrix is specified, we only output a single merged include list with all the duckdb_archs
elif args.deploy_matrix:
deploy_archs = []
Expand Down
44 changes: 21 additions & 23 deletions src/common/string_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,31 +170,29 @@ string StringUtil::Join(const set<string> &input, const string &separator) {
return result;
}

string StringUtil::BytesToHumanReadableString(idx_t bytes) {
string StringUtil::BytesToHumanReadableString(idx_t bytes, idx_t multiplier) {
D_ASSERT(multiplier == 1000 || multiplier == 1024);
string db_size;
auto kilobytes = bytes / 1000;
auto megabytes = kilobytes / 1000;
kilobytes -= megabytes * 1000;
auto gigabytes = megabytes / 1000;
megabytes -= gigabytes * 1000;
auto terabytes = gigabytes / 1000;
gigabytes -= terabytes * 1000;
auto petabytes = terabytes / 1000;
terabytes -= petabytes * 1000;
if (petabytes > 0) {
return to_string(petabytes) + "." + to_string(terabytes / 100) + "PB";
}
if (terabytes > 0) {
return to_string(terabytes) + "." + to_string(gigabytes / 100) + "TB";
} else if (gigabytes > 0) {
return to_string(gigabytes) + "." + to_string(megabytes / 100) + "GB";
} else if (megabytes > 0) {
return to_string(megabytes) + "." + to_string(kilobytes / 100) + "MB";
} else if (kilobytes > 0) {
return to_string(kilobytes) + "KB";
} else {
return to_string(bytes) + (bytes == 1 ? " byte" : " bytes");
idx_t array[6] = {};
const char *unit[2][6] = {{"bytes", "KiB", "MiB", "GiB", "TiB", "PiB"}, {"bytes", "kB", "MB", "GB", "TB", "PB"}};

const int sel = (multiplier == 1000);

array[0] = bytes;
for (idx_t i = 1; i < 6; i++) {
array[i] = array[i - 1] / multiplier;
array[i - 1] %= multiplier;
}

for (idx_t i = 5; i >= 1; i--) {
if (array[i]) {
// Map 0 -> 0 and (multiplier-1) -> 9
idx_t fractional_part = (array[i - 1] * 10) / multiplier;
return to_string(array[i]) + "." + to_string(fractional_part) + " " + unit[sel][i];
}
}

return to_string(array[0]) + (bytes == 1 ? " byte" : " bytes");
}

string StringUtil::Upper(const string &str) {
Expand Down
3 changes: 2 additions & 1 deletion src/core_functions/function_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ static StaticFunctionDefinition internal_functions[] = {
DUCKDB_SCALAR_FUNCTION(ListFlattenFun),
DUCKDB_SCALAR_FUNCTION_SET(FloorFun),
DUCKDB_SCALAR_FUNCTION(FormatFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FormatreadabledecimalsizeFun),
DUCKDB_SCALAR_FUNCTION(FormatreadabledecimalsizeFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FormatreadablesizeFun),
DUCKDB_SCALAR_FUNCTION(FormatBytesFun),
DUCKDB_SCALAR_FUNCTION(FromBase64Fun),
DUCKDB_SCALAR_FUNCTION_ALIAS(FromBinaryFun),
Expand Down
9 changes: 7 additions & 2 deletions src/core_functions/scalar/string/format_bytes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace duckdb {

template <int64_t MULTIPLIER>
static void FormatBytesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
UnaryExecutor::Execute<int64_t, string_t>(args.data[0], result, args.size(), [&](int64_t bytes) {
bool is_negative = bytes < 0;
Expand All @@ -18,12 +19,16 @@ static void FormatBytesFunction(DataChunk &args, ExpressionState &state, Vector
unsigned_bytes = idx_t(bytes);
}
return StringVector::AddString(result, (is_negative ? "-" : "") +
StringUtil::BytesToHumanReadableString(unsigned_bytes));
StringUtil::BytesToHumanReadableString(unsigned_bytes, MULTIPLIER));
});
}

ScalarFunction FormatBytesFun::GetFunction() {
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction);
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction<1024>);
}

ScalarFunction FormatreadabledecimalsizeFun::GetFunction() {
return ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, FormatBytesFunction<1000>);
}

} // namespace duckdb
11 changes: 9 additions & 2 deletions src/core_functions/scalar/string/functions.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@
{
"name": "format_bytes",
"parameters": "bytes",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 16KB)",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 15.6 KiB)",
"example": "format_bytes(1000 * 16)",
"type": "scalar_function",
"aliases": ["formatReadableDecimalSize"]
"aliases": ["formatReadableSize"]
},
{
"name": "formatReadableDecimalSize",
"parameters": "bytes",
"description": "Converts bytes to a human-readable presentation (e.g. 16000 -> 16.0 KB)",
"example": "format_bytes(1000 * 16)",
"type": "scalar_function"
},
{
"name": "hamming",
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/common/string_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class StringUtil {
}

//! Return a string that formats the give number of bytes
DUCKDB_API static string BytesToHumanReadableString(idx_t bytes);
DUCKDB_API static string BytesToHumanReadableString(idx_t bytes, idx_t multiplier = 1024);

//! Convert a string to uppercase
DUCKDB_API static string Upper(const string &str);
Expand Down
13 changes: 11 additions & 2 deletions src/include/duckdb/core_functions/scalar/string_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,25 @@ struct FormatFun {
struct FormatBytesFun {
static constexpr const char *Name = "format_bytes";
static constexpr const char *Parameters = "bytes";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 16KB)";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 15.6 KiB)";
static constexpr const char *Example = "format_bytes(1000 * 16)";

static ScalarFunction GetFunction();
};

struct FormatreadabledecimalsizeFun {
struct FormatreadablesizeFun {
using ALIAS = FormatBytesFun;

static constexpr const char *Name = "formatReadableSize";
};

struct FormatreadabledecimalsizeFun {
static constexpr const char *Name = "formatReadableDecimalSize";
static constexpr const char *Parameters = "bytes";
static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 16.0 KB)";
static constexpr const char *Example = "format_bytes(1000 * 16)";

static ScalarFunction GetFunction();
};

struct HammingFun {
Expand Down
11 changes: 10 additions & 1 deletion src/main/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,17 @@ idx_t DBConfig::ParseMemoryLimit(const string &arg) {
multiplier = 1000LL * 1000LL * 1000LL;
} else if (unit == "terabyte" || unit == "terabytes" || unit == "tb" || unit == "t") {
multiplier = 1000LL * 1000LL * 1000LL * 1000LL;
} else if (unit == "kib") {
multiplier = 1024LL;
} else if (unit == "mib") {
multiplier = 1024LL * 1024LL;
} else if (unit == "gib") {
multiplier = 1024LL * 1024LL * 1024LL;
} else if (unit == "tib") {
multiplier = 1024LL * 1024LL * 1024LL * 1024LL;
} else {
throw ParserException("Unknown unit for memory_limit: %s (expected: b, mb, gb or tb)", unit);
throw ParserException("Unknown unit for memory_limit: %s (expected: KB, MB, GB, TB for 1000^i units or KiB, "
"MiB, GiB, TiB for 1024^i unites)");
}
return (idx_t)multiplier * limit;
}
Expand Down
2 changes: 1 addition & 1 deletion src/storage/storage_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ struct StorageVersionInfo {
idx_t storage_version;
};

static StorageVersionInfo storage_version_info[] = {{"v0.9.0 or v0.9.1", 64},
static StorageVersionInfo storage_version_info[] = {{"v0.9.0, v0.9.1 or v0.9.2", 64},
{"v0.8.0 or v0.8.1", 51},
{"v0.7.0 or v0.7.1", 43},
{"v0.6.0 or v0.6.1", 39},
Expand Down
10 changes: 5 additions & 5 deletions test/api/test_reset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void RequireValueEqual(ConfigurationOption *op, const Value &left, const Value &
OptionValueSet &GetValueForOption(const string &name) {
static unordered_map<string, OptionValueSet> value_map = {
{"threads", {Value::BIGINT(42), Value::BIGINT(42)}},
{"checkpoint_threshold", {"4.2GB"}},
{"checkpoint_threshold", {"4.0 GiB"}},
{"debug_checkpoint_abort", {{"none", "before_truncate", "before_header", "after_free_list_write"}}},
{"default_collation", {"nocase"}},
{"default_order", {"desc"}},
Expand Down Expand Up @@ -82,8 +82,8 @@ OptionValueSet &GetValueForOption(const string &name) {
{"extension_directory", {"test"}},
{"immediate_transaction_mode", {true}},
{"max_expression_depth", {50}},
{"max_memory", {"4.2GB"}},
{"memory_limit", {"4.2GB"}},
{"max_memory", {"4.0 GiB"}},
{"memory_limit", {"4.0 GiB"}},
{"ordered_aggregate_threshold", {Value::UBIGINT(idx_t(1) << 12)}},
{"null_order", {"nulls_first"}},
{"perfect_ht_threshold", {0}},
Expand All @@ -96,11 +96,11 @@ OptionValueSet &GetValueForOption(const string &name) {
{"enable_progress_bar_print", {false}},
{"progress_bar_time", {0}},
{"temp_directory", {"tmp"}},
{"wal_autocheckpoint", {"4.2GB"}},
{"wal_autocheckpoint", {"4.0 GiB"}},
{"worker_threads", {42}},
{"enable_http_metadata_cache", {true}},
{"force_bitpacking_mode", {"constant"}},
{"allocator_flush_threshold", {"4.2GB"}},
{"allocator_flush_threshold", {"4.0 GiB"}},
{"arrow_large_buffer_size", {true}}};
// Every option that's not excluded has to be part of this map
if (!value_map.count(name)) {
Expand Down
Loading

0 comments on commit f80bf4a

Please sign in to comment.