Skip to content

Commit

Permalink
Merge branch 'main' into string_to_int
Browse files Browse the repository at this point in the history
  • Loading branch information
nickgerrets committed Nov 24, 2023
2 parents 18c9296 + 173a690 commit 9e47561
Show file tree
Hide file tree
Showing 95 changed files with 15,351 additions and 14,084 deletions.
2 changes: 1 addition & 1 deletion .github/actions/build_extensions/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ runs:
if: inputs.vcpkg_build == 1
uses: lukka/[email protected]
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Set vcpkg env variables
if: inputs.vcpkg_build == 1
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/manylinux_2014_setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ runs:
if: ${{ inputs.vcpkg == 1 }}
uses: lukka/[email protected]
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Install OpenSSL
if: ${{ inputs.openssl == 1 }}
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/ubuntu_18_setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ runs:
if: ${{ inputs.vcpkg == 1 }}
uses: lukka/[email protected]
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Setup Ccache
if: ${{ inputs.ccache == 1 }}
Expand Down
16 changes: 8 additions & 8 deletions .github/config/extensions.csv
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ parquet,,,
tpcds,,,
tpch,,,
visualizer,,,
sqlite_scanner,https://github.com/duckdblabs/sqlite_scanner,3443b2999ae1e68a108568fd32145705237a5760,
postgres_scanner,https://github.com/duckdblabs/postgres_scanner,844f46536b5d5f9e65b57b7ff92f4ce3346e2829,
substrait,https://github.com/duckdblabs/substrait,5d621b1d7d16fe86f8b1930870c8e6bf05bcb92a,no-windows
arrow,https://github.com/duckdblabs/arrow,1b5b9649d28cd7f79496fb3f2e4dd7b03bf90ac5,no-windows
aws,https://github.com/duckdblabs/duckdb_aws,348ae2625de86ab760f80a43eb76e4441cd01354,
azure,https://github.com/duckdblabs/azure,1fe568d3eb3c8842118e395ba8031e2a8566daed,
spatial,https://github.com/duckdblabs/duckdb_spatial.git,36e5a126976ac3b66716893360ef7e6295707082,
iceberg,https://github.com/duckdblabs/duckdb_iceberg.git,51ba9564859698c29db4165f17143a2f6af2bb18,
sqlite_scanner,https://github.com/duckdb/sqlite_scanner,3443b2999ae1e68a108568fd32145705237a5760,
postgres_scanner,https://github.com/duckdb/postgres_scanner,844f46536b5d5f9e65b57b7ff92f4ce3346e2829,
substrait,https://github.com/duckdb/substrait,5d621b1d7d16fe86f8b1930870c8e6bf05bcb92a,no-windows
arrow,https://github.com/duckdb/arrow,1b5b9649d28cd7f79496fb3f2e4dd7b03bf90ac5,no-windows
aws,https://github.com/duckdb/duckdb_aws,348ae2625de86ab760f80a43eb76e4441cd01354,
azure,https://github.com/duckdb/duckdb_azure,1fe568d3eb3c8842118e395ba8031e2a8566daed,
spatial,https://github.com/duckdb/duckdb_spatial,36e5a126976ac3b66716893360ef7e6295707082,
iceberg,https://github.com/duckdb/duckdb_iceberg,51ba9564859698c29db4165f17143a2f6af2bb18,
2 changes: 1 addition & 1 deletion .github/patches/extensions/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ lets say our extension config looks like this:
```shell
duckdb_extension_load(spatial
DONT_LINK
GIT_URL https://github.com/duckdblabs/duckdb_spatial.git
GIT_URL https://github.com/duckdb/duckdb_spatial
GIT_TAG f577b9441793f9170403e489f5d3587e023a945f
APPLY_PATCHES
)
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ExtensionRebuild.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
required: true
type: string
extension_repo:
description: 'Extension git repo (e.g. https://github.com/duckdblabs/postgres_scanner)'
description: 'Extension git repo (e.g. https://github.com/duckdb/postgres_scanner)'
required: true
type: string
extension_ref:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ExtensionTrigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ jobs:

- name: Trigger Substrait Extension
run: |
curl -XPOST -u "${{secrets.PAT_USERNAME}}:${{secrets.PAT_TOKEN}}" -H "Accept: application/vnd.github.everest-preview+json" -H "Content-Type: application/json" https://api.github.com/repos/duckdblabs/substrait/dispatches --data '{"event_type": "build_application"}'
curl -XPOST -u "${{secrets.PAT_USERNAME}}:${{secrets.PAT_TOKEN}}" -H "Accept: application/vnd.github.everest-preview+json" -H "Content-Type: application/json" https://api.github.com/repos/duckdb/substrait/dispatches --data '{"event_type": "build_application"}'
4 changes: 2 additions & 2 deletions .github/workflows/OSX.yml
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ jobs:
with:
python-version: '3.7'

- name: Install Ninja
run: brew install ninja
- name: Install Ninja and Pkg-config
run: brew install pkg-config ninja

- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Wasm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: Setup vcpkg
uses: lukka/[email protected]
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/_extension_distribution.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ on:
vcpkg_commit:
required: false
type: string
default: "501db0f17ef6df184fcdbfbe0f87cde2313b6ab1"
default: "a42af01b72c28a8e1d7b48107b33e4f286a55ef6"
# Override the default script producing the matrices. Allows specifying custom matrices.
matrix_parse_script:
required: false
Expand Down Expand Up @@ -247,6 +247,12 @@ jobs:
BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }}

steps:
- name: Keep \n line endings
shell: bash
run: |
git config --global core.autocrlf false
git config --global core.eol lf
- uses: actions/checkout@v3
with:
fetch-depth: 0
Expand Down
Binary file added data/parquet-testing/bigdecimal.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion extension/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ cmake build directory and build it from there:
```cmake
duckdb_extension_load(postgres_scanner
(DONT_LINK)
GIT_URL https://github.com/duckdblabs/postgres_scanner
GIT_URL https://github.com/duckdb/postgres_scanner
GIT_TAG cd043b49cdc9e0d3752535b8333c9433e1007a48
)
```
Expand Down
2 changes: 2 additions & 0 deletions extension/httpfs/include/s3fs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ class S3FileHandle : public HTTPFileHandle {
throw NotImplementedException("Cannot open an HTTP file for appending");
}
}
~S3FileHandle() override;

S3AuthParams auth_params;
const S3ConfigParams config_params;

Expand Down
4 changes: 4 additions & 0 deletions extension/httpfs/s3fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ S3AuthParams S3AuthParams::ReadFrom(FileOpener *opener, FileOpenerInfo &info) {
endpoint, url_style, use_ssl, s3_url_compatibility_mode};
}

S3FileHandle::~S3FileHandle() {
Close();
}

S3ConfigParams S3ConfigParams::ReadFrom(FileOpener *opener) {
uint64_t uploader_max_filesize;
uint64_t max_parts_per_file;
Expand Down
2 changes: 1 addition & 1 deletion extension/icu/icu_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ void IcuExtension::Load(DuckDB &ddb) {
}
collation = StringUtil::Lower(collation);

CreateCollationInfo info(collation, GetICUFunction(collation), false, true);
CreateCollationInfo info(collation, GetICUFunction(collation), false, false);
ExtensionUtil::RegisterCollation(db, info);
}
ScalarFunction sort_key("icu_sort_key", {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR,
Expand Down
42 changes: 38 additions & 4 deletions extension/parquet/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,8 +1137,8 @@ struct DecimalParquetValueConversion {
byte_len = plain_data.read<uint32_t>();
}
plain_data.available(byte_len);
auto res =
ParquetDecimalUtils::ReadDecimalValue<DUCKDB_PHYSICAL_TYPE>(const_data_ptr_cast(plain_data.ptr), byte_len);
auto res = ParquetDecimalUtils::ReadDecimalValue<DUCKDB_PHYSICAL_TYPE>(const_data_ptr_cast(plain_data.ptr),
byte_len, reader.Schema());

plain_data.inc(byte_len);
return res;
Expand Down Expand Up @@ -1192,11 +1192,39 @@ static unique_ptr<ColumnReader> CreateDecimalReaderInternal(ParquetReader &reade
case PhysicalType::INT128:
return make_uniq<DecimalColumnReader<hugeint_t, FIXED_LENGTH>>(reader, type_p, schema_p, file_idx_p, max_define,
max_repeat);
case PhysicalType::DOUBLE:
return make_uniq<DecimalColumnReader<double, FIXED_LENGTH>>(reader, type_p, schema_p, file_idx_p, max_define,
max_repeat);
default:
throw InternalException("Unrecognized type for Decimal");
}
}

template <>
double ParquetDecimalUtils::ReadDecimalValue(const_data_ptr_t pointer, idx_t size,
const duckdb_parquet::format::SchemaElement &schema_ele) {
double res = 0;
bool positive = (*pointer & 0x80) == 0;
for (idx_t i = 0; i < size; i += 8) {
auto byte_size = MinValue<idx_t>(sizeof(uint64_t), size - i);
uint64_t input = 0;
auto res_ptr = reinterpret_cast<uint8_t *>(&input);
for (idx_t k = 0; k < byte_size; k++) {
auto byte = pointer[i + k];
res_ptr[sizeof(uint64_t) - k - 1] = positive ? byte : byte ^ 0xFF;
}
res *= double(NumericLimits<uint64_t>::Maximum()) + 1;
res += input;
}
if (!positive) {
res += 1;
res /= pow(10, schema_ele.scale);
return -res;
}
res /= pow(10, schema_ele.scale);
return res;
}

unique_ptr<ColumnReader> ParquetDecimalUtils::CreateReader(ParquetReader &reader, const LogicalType &type_p,
const SchemaElement &schema_p, idx_t file_idx_p,
idx_t max_define, idx_t max_repeat) {
Expand Down Expand Up @@ -1372,8 +1400,14 @@ unique_ptr<ColumnReader> ColumnReader::CreateReader(ParquetReader &reader, const
return make_uniq<TemplatedColumnReader<float, TemplatedParquetValueConversion<float>>>(
reader, type_p, schema_p, file_idx_p, max_define, max_repeat);
case LogicalTypeId::DOUBLE:
return make_uniq<TemplatedColumnReader<double, TemplatedParquetValueConversion<double>>>(
reader, type_p, schema_p, file_idx_p, max_define, max_repeat);
switch (schema_p.type) {
case Type::BYTE_ARRAY:
case Type::FIXED_LEN_BYTE_ARRAY:
return ParquetDecimalUtils::CreateReader(reader, type_p, schema_p, file_idx_p, max_define, max_repeat);
default:
return make_uniq<TemplatedColumnReader<double, TemplatedParquetValueConversion<double>>>(
reader, type_p, schema_p, file_idx_p, max_define, max_repeat);
}
case LogicalTypeId::TIMESTAMP:
case LogicalTypeId::TIMESTAMP_TZ:
switch (schema_p.type) {
Expand Down
7 changes: 6 additions & 1 deletion extension/parquet/include/parquet_decimal_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ namespace duckdb {
class ParquetDecimalUtils {
public:
template <class PHYSICAL_TYPE>
static PHYSICAL_TYPE ReadDecimalValue(const_data_ptr_t pointer, idx_t size) {
static PHYSICAL_TYPE ReadDecimalValue(const_data_ptr_t pointer, idx_t size,
const duckdb_parquet::format::SchemaElement &schema_ele) {
D_ASSERT(size <= sizeof(PHYSICAL_TYPE));
PHYSICAL_TYPE res = 0;

Expand All @@ -40,4 +41,8 @@ class ParquetDecimalUtils {
idx_t max_repeat);
};

template <>
double ParquetDecimalUtils::ReadDecimalValue(const_data_ptr_t pointer, idx_t size,
const duckdb_parquet::format::SchemaElement &schema_ele);

} // namespace duckdb
3 changes: 3 additions & 0 deletions extension/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele, bool bi
if (!s_ele.__isset.precision || !s_ele.__isset.scale) {
throw IOException("DECIMAL requires a length and scale specifier!");
}
if (s_ele.precision > DecimalType::MaxWidth()) {
return LogicalType::DOUBLE;
}
switch (s_ele.type) {
case Type::BYTE_ARRAY:
case Type::FIXED_LEN_BYTE_ARRAY:
Expand Down
25 changes: 17 additions & 8 deletions extension/parquet/parquet_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ Value ParquetStatisticsUtils::ConvertValue(const LogicalType &type,
return Value::FLOAT(val);
}
case LogicalTypeId::DOUBLE: {
switch (schema_ele.type) {
case Type::FIXED_LEN_BYTE_ARRAY:
case Type::BYTE_ARRAY:
// decimals cast to double
return Value::DOUBLE(ParquetDecimalUtils::ReadDecimalValue<double>(stats_data, stats.size(), schema_ele));
default:
break;
}
if (stats.size() != sizeof(double)) {
throw InternalException("Incorrect stats size for type DOUBLE");
}
Expand Down Expand Up @@ -124,17 +132,18 @@ Value ParquetStatisticsUtils::ConvertValue(const LogicalType &type,
}
switch (type.InternalType()) {
case PhysicalType::INT16:
return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue<int16_t>(stats_data, stats.size()), width,
scale);
return Value::DECIMAL(
ParquetDecimalUtils::ReadDecimalValue<int16_t>(stats_data, stats.size(), schema_ele), width, scale);
case PhysicalType::INT32:
return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue<int32_t>(stats_data, stats.size()), width,
scale);
return Value::DECIMAL(
ParquetDecimalUtils::ReadDecimalValue<int32_t>(stats_data, stats.size(), schema_ele), width, scale);
case PhysicalType::INT64:
return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue<int64_t>(stats_data, stats.size()), width,
scale);
return Value::DECIMAL(
ParquetDecimalUtils::ReadDecimalValue<int64_t>(stats_data, stats.size(), schema_ele), width, scale);
case PhysicalType::INT128:
return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue<hugeint_t>(stats_data, stats.size()), width,
scale);
return Value::DECIMAL(
ParquetDecimalUtils::ReadDecimalValue<hugeint_t>(stats_data, stats.size(), schema_ele), width,
scale);
default:
throw InternalException("Unsupported internal type for decimal");
}
Expand Down
78 changes: 78 additions & 0 deletions scripts/extension-upload-from-nightly.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

# This script deploys the extension binaries that are currently deployed to the nightly bucket to the main bucket

# WARNING: don't use this script if you don't know exactly what you're doing. To deploy a binary:
# - Run the script with ./extension-upload-from-nightly.sh <extension_name> <duckdb_version> (<nightly_commit>)
# - CHECK the output of the dry run thoroughly
# - If successful, set the I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL env variable to the correct value
# - run the script again now deploying for real
# - check the output
# - unset the I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL env var

if [ -z "$1" ] || [ -z "$2" ]; then
echo "Usage: ./extension-upload-from-nightly.sh <extension_name> <duckdb_version> (<nightly_commit>)"
exit 1
fi

if [ -z "$3" ]; then
BASE_NIGHTLY_DIR="$2"
else
BASE_NIGHTLY_DIR="$1/$3/$2"
fi

# CONFIG
FROM_BUCKET=duckdb-extensions-nightly
TO_BUCKET=duckdb-extensions
CLOUDFRONT_DISTRIBUTION_ID=E2Z28NDMI4PVXP

### COPY THE FILES
REAL_RUN="aws s3 cp s3://$FROM_BUCKET/$BASE_NIGHTLY_DIR s3://$TO_BUCKET/$2 --recursive --exclude '*' --include '*/$1.duckdb_extension.gz' --acl public-read"
DRY_RUN="$REAL_RUN --dryrun"

if [ "$I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL" == "yessir" ]; then
echo "DEPLOYING"
echo "> FROM: $FROM_BUCKET"
echo "> TO : $TO_BUCKET"
echo "> AWS CLI deploy: "
eval "$REAL_RUN"
else
echo "DEPLOYING (DRY RUN)"
echo "> FROM: $FROM_BUCKET"
echo "> TO : $TO_BUCKET"
echo "> AWS CLI Dry run: "
eval "$DRY_RUN"
fi

echo ""

### INVALIDATE THE CLOUDFRONT CACHE
# For double checking we are invalidating the correct domain
CLOUDFRONT_ORIGINS=`aws cloudfront get-distribution --id $CLOUDFRONT_DISTRIBUTION_ID --query 'Distribution.DistributionConfig.Origins.Items[*].DomainName' --output text`

# Parse the dry run output
output=$(eval "$DRY_RUN")
s3_paths=()
while IFS= read -r line; do
if [[ $line == *"copy:"* ]]; then
s3_path=$(echo $line | grep -o 's3://[^ ]*' | awk 'NR%2==0' | awk -F "s3://$TO_BUCKET" '{print $2}' | cut -d' ' -f1)
s3_paths+=("$s3_path")
fi
done <<< "$output"

if [ "$I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL" == "yessir" ]; then
echo "INVALIDATION"
echo "> Total files: ${#s3_paths[@]}"
echo "> Domain: $CLOUDFRONT_ORIGINS"
for path in "${s3_paths[@]}"; do
aws cloudfront create-invalidation --distribution-id "$CLOUDFRONT_DISTRIBUTION_ID" --paths "$path"
done
else
echo "INVALIDATION (DRY RUN)"
echo "> Total files: ${#s3_paths[@]}"
echo "> Domain: $CLOUDFRONT_ORIGINS"
echo "> Paths:"
for path in "${s3_paths[@]}"; do
echo " $path"
done
fi
2 changes: 1 addition & 1 deletion scripts/merge_vcpkg_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def prefix_overlay_port(overlay_port):

data = {
"description": f"Auto-generated vcpkg.json for combined DuckDB extension build",
"builtin-baseline": "501db0f17ef6df184fcdbfbe0f87cde2313b6ab1",
"builtin-baseline": "a42af01b72c28a8e1d7b48107b33e4f286a55ef6",
"dependencies": final_deduplicated_deps,
"overrides": [{"name": "openssl", "version": "3.0.8"}],
}
Expand Down
2 changes: 1 addition & 1 deletion scripts/setup_manylinux2014.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ install_deps() {
(
cd $VCPKG_TARGET_DIR ;
git clone https://github.com/Microsoft/vcpkg.git ;
git checkout 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1 ;
git checkout a42af01b72c28a8e1d7b48107b33e4f286a55ef6 ;
cd vcpkg ;
./bootstrap-vcpkg.sh
)
Expand Down
Loading

0 comments on commit 9e47561

Please sign in to comment.