Skip to content

Commit

Permalink
Merge remote-tracking branch 'up/main' into top-n-window
Browse files Browse the repository at this point in the history
Signed-off-by: coldWater <[email protected]>
  • Loading branch information
forsaken628 committed Nov 12, 2024
2 parents 6ac6761 + e35bacc commit 0d52a67
Show file tree
Hide file tree
Showing 658 changed files with 12,883 additions and 62,394 deletions.
348 changes: 183 additions & 165 deletions Cargo.lock

Large diffs are not rendered by default.

25 changes: 11 additions & 14 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ members = [
"src/common/storage",
"src/common/vector",
"src/common/license",
"src/common/parquet2",
"src/query/ast",
"src/query/codegen",
"src/query/config",
Expand Down Expand Up @@ -140,7 +139,6 @@ databend-common-meta-store = { path = "src/meta/store" }
databend-common-meta-types = { path = "src/meta/types" }
databend-common-metrics = { path = "src/common/metrics" }
databend-common-openai = { path = "src/common/openai" }
databend-common-parquet2 = { path = "src/common/parquet2" }
databend-common-pipeline-core = { path = "src/query/pipeline/core" }
databend-common-pipeline-sinks = { path = "src/query/pipeline/sinks" }
databend-common-pipeline-sources = { path = "src/query/pipeline/sources" }
Expand Down Expand Up @@ -196,12 +194,6 @@ databend-storages-common-session = { path = "src/query/storages/common/session"
databend-storages-common-stage = { path = "src/query/storages/common/stage" }
databend-storages-common-table-meta = { path = "src/query/storages/common/table_meta" }

# Specific dependencies
parquet2 = { package = "databend-common-parquet2", path = "src/common/parquet2", default-features = false, features = [
"serde_types",
"async",
] }

# Crates.io dependencies
ahash = "0.8"
aho-corasick = { version = "1.0.1" } #
Expand All @@ -214,14 +206,13 @@ arrow-buffer = { version = "53" }
arrow-cast = { version = "53", features = ["prettyprint"] }
arrow-data = { version = "53" }
arrow-flight = { version = "53", features = ["flight-sql-experimental", "tls"] }
arrow-format = { version = "0.8.1", features = ["flight-data", "flight-service", "ipc"] }
arrow-ipc = { version = "53" }
arrow-ord = { version = "53" }
arrow-schema = { version = "53", features = ["serde"] }
arrow-select = { version = "53" }
arrow-udf-js = "0.5.0"
arrow-udf-python = "0.4.0"
arrow-udf-wasm = "0.4.0"
arrow-udf-js = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" }
arrow-udf-python = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" }
arrow-udf-wasm = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" }
async-backtrace = "0.2"
async-channel = "1.7.1"
async-compression = { git = "https://github.com/datafuse-extras/async-compression", rev = "dc81082", features = [
Expand Down Expand Up @@ -260,6 +251,7 @@ cidr = { version = "0.2.2" }
clap = { version = "4.4.2", features = ["derive"] }
comfy-table = "7"
convert_case = "0.6.0"
cookie = "0.18.1"
crc32fast = "1.3.2"
criterion = "0.5"
cron = "0.12.0"
Expand Down Expand Up @@ -289,6 +281,7 @@ flagset = "0.4"
flatbuffers = "24" # Must use the same version with arrow-ipc
flate2 = "1"
foreign_vec = "0.1.0"
fs_extra = "1.3.0"
futures = "0.3.24"
futures-async-stream = { version = "0.2.7" }
futures-util = "0.3.24"
Expand All @@ -297,6 +290,7 @@ geo-types = "0.7.13"
geohash = "0.13.0"
geos = { version = "9.0.0", features = ["static", "geo", "geo-types"] }
geozero = { version = "0.14.0", features = ["default", "with-wkb", "with-geos", "with-geojson"] }
gimli = "0.31.0"
globiter = "0.1"
goldenfile = "1.4"
h3o = "0.4.0"
Expand Down Expand Up @@ -352,6 +346,7 @@ num-bigint = "0.4.6"
num-derive = "0.3.3"
num-traits = "0.2.19"
num_cpus = "1.13.1"
object = "0.36.5"
object_store_opendal = "0.48.1"
once_cell = "1.15.0"
openai_api_rust = "0.1"
Expand Down Expand Up @@ -408,6 +403,7 @@ prost = { version = "0.13" }
prost-build = { version = "0.13" }
prqlc = "0.11.3"
quanta = "0.11.1"
raft-log = { version = "0.2.2" }
rand = { version = "0.8.5", features = ["small_rng"] }
rayon = "1.9.0"
recursive = "0.1.1"
Expand Down Expand Up @@ -511,6 +507,7 @@ nom-rule = "0.4"
pratt = "0.4.0"
pretty = "0.11.3"
rspack-codespan-reporting = "0.11"
rustc-demangle = "0.1"
strsim = "0.10"
strum_macros = "0.24"
vergen = { version = "8.3.1", default-features = false, features = ["build", "cargo", "git", "gix", "rustc"] }
Expand Down Expand Up @@ -594,6 +591,7 @@ gimli = { opt-level = 3 }
miniz_oxide = { opt-level = 3 }
object = { opt-level = 3 }
rustc-demangle = { opt-level = 3 }
databend-common-exception = { opt-level = 3 }

[profile.test]
opt-level = 0
Expand All @@ -605,7 +603,6 @@ overflow-checks = true
rpath = false

[patch.crates-io]
arrow-format = { git = "https://github.com/Xuanwo/arrow-format", rev = "5502823a" }
async-backtrace = { git = "https://github.com/datafuse-extras/async-backtrace.git", rev = "dea4553" }
async-recursion = { git = "https://github.com/datafuse-extras/async-recursion.git", rev = "a353334" }
backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "72265be", features = [
Expand All @@ -617,7 +614,7 @@ deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "3038c145" }
ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" }
jsonb = { git = "https://github.com/databendlabs/jsonb", rev = "ada713c" }
openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" }
openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.6" }
openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.7" }
orc-rust = { git = "https://github.com/datafusion-contrib/orc-rust", rev = "dfb1ede" }
recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "6af35a1" }
sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1" }
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@

- **Data Simplification**: Streamlines data ingestion, no external ETL needed. 👉 [Data Loading](https://docs.databend.com/guides/load-data/).

- **Real-Time CDC**: Supports real-time incremental data updates to keep data current and accurate. 👉 [Stream](https://docs.databend.com/guides/load-data/continuous-data-pipelines/stream)

- **Format Flexibility**: Supports multiple data formats and types, including JSON, CSV, Parquet, GEO, and more.

- **ACID Transactions**: Ensures data integrity with atomic, consistent, isolated, and durable operations.
Expand Down
2 changes: 0 additions & 2 deletions docker/it-hive/hive-docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
version: "3"

services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
Expand Down
4 changes: 1 addition & 3 deletions docker/it-iceberg-rest/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
# specific language governing permissions and limitations
# under the License.

version: '3.8'

services:
rest:
image: tabulario/iceberg-rest:0.10.0
Expand Down Expand Up @@ -47,7 +45,7 @@ services:
expose:
- 9001
- 9000
command: [ "server", "/data", "--console-address", ":9001" ]
command: ["server", "/data", "--console-address", ":9001"]
ports:
- "9000:9000"

Expand Down
63 changes: 2 additions & 61 deletions src/common/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,87 +11,33 @@ doctest = false
test = true

[features]
default = ["arrow-default", "parquet-default"]
default = ["arrow-default"]

arrow = ["arrow-buffer", "arrow-schema", "arrow-data", "arrow-array"]
io_flight = ["io_ipc", "arrow-format/flight-data"]
io_ipc = []
io_ipc_compression = []

# base64 + io_ipc because arrow schemas are stored as base64-encoded ipc format.
io_parquet = ["io_ipc", "base64", "streaming-iterator", "fallible-streaming-iterator"]
io_parquet_async = ["futures", "io_parquet", "parquet2/async"]

io_parquet_compression = [
"io_parquet_zstd",
"io_parquet_gzip",
"io_parquet_snappy",
"io_parquet_lz4",
"io_parquet_brotli",
]

# sample testing of generated arrow data
io_parquet_sample_test = ["io_parquet_async"]

# compression backends
io_parquet_brotli = ["parquet2/brotli"]
io_parquet_gzip = ["parquet2/gzip"]
io_parquet_lz4 = ["parquet2/lz4"]
io_parquet_snappy = ["parquet2/snappy"]
io_parquet_zstd = ["parquet2/zstd"]

# parquet bloom filter functions
io_parquet_bloom_filter = ["parquet2/bloom_filter"]

compute = [
"compute_aggregate",
"compute_cast",
"compute_concatenate",
"compute_merge_sort",
"compute_sort",
"compute_take",
]
compute_aggregate = []
compute_cast = ["lexical-core", "compute_take"]
compute_concatenate = []
compute_merge_sort = ["itertools", "compute_sort"]
compute_sort = ["compute_take"]
compute_take = []

serde_types = ["serde", "serde_derive"]
simd = []

arrow-default = [
"arrow",
"io_ipc",
"io_ipc_compression",
"io_flight",
"io_parquet_async",
"io_parquet_compression",
"io_parquet",
"compute",
"serde_types",
"simd",
]

parquet-default = [
"parquet2/lz4",
"parquet2/zstd",
"parquet2/snappy",
# this feature can't be built in musl
# "parquet2/gzip_zlib_ng",
"parquet2/brotli",
]

[dependencies]
ahash = { workspace = true }
arrow-array = { workspace = true, optional = true }
arrow-buffer = { workspace = true, optional = true }
arrow-data = { workspace = true, optional = true }
arrow-format = { workspace = true }
arrow-schema = { workspace = true, optional = true }
async-stream = { workspace = true, optional = true }
base64 = { workspace = true, optional = true }
bitpacking = { workspace = true }
bytemuck = { workspace = true }
byteorder = { workspace = true }
Expand All @@ -101,28 +47,23 @@ chrono-tz = { workspace = true, optional = true }
dyn-clone = { workspace = true }
either = { workspace = true }
ethnum = { workspace = true }
fallible-streaming-iterator = { workspace = true, optional = true }
foreign_vec = { workspace = true }
futures = { workspace = true, optional = true }
hashbrown_v0_14 = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true, optional = true }
lexical-core = { workspace = true, optional = true }
log = { workspace = true }
lz4 = { workspace = true }
num = { workspace = true, features = ["std"] }
num-traits = { workspace = true }
opendal = { workspace = true }
ordered-float = { workspace = true }
parquet2 = { workspace = true }
rand = { workspace = true }
ringbuffer = { workspace = true }
roaring = { workspace = true }
serde = { workspace = true, features = ["rc"], optional = true }
serde_derive = { workspace = true, optional = true }
serde_json = { workspace = true }
simdutf8 = { workspace = true }
snap = { workspace = true }
streaming-iterator = { workspace = true, optional = true }
zstd = { workspace = true }

[dev-dependencies]
Expand Down
80 changes: 0 additions & 80 deletions src/common/arrow/src/arrow/array/binary/ffi.rs

This file was deleted.

1 change: 0 additions & 1 deletion src/common/arrow/src/arrow/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use crate::arrow::offset::Offsets;
use crate::arrow::offset::OffsetsBuffer;
use crate::arrow::trusted_len::TrustedLen;

mod ffi;
pub(super) mod fmt;
mod iterator;
pub use iterator::*;
Expand Down
Loading

0 comments on commit 0d52a67

Please sign in to comment.