Skip to content

Commit

Permalink
put version guessing behind a flag
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed Nov 18, 2024
1 parent 15d0cc3 commit 6ed670b
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 5 deletions.
8 changes: 7 additions & 1 deletion src/common/iceberg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ string GenerateMetaDataUrl(FileSystem &fs, const string &meta_path, string &tabl
}


string IcebergSnapshot::GetMetaDataPath(const string &path, FileSystem &fs, string metadata_compression_codec, string table_version = DEFAULT_TABLE_VERSION, string version_format = DEFAULT_TABLE_VERSION_FORMAT) {
string IcebergSnapshot::GetMetaDataPath(ClientContext &context, const string &path, FileSystem &fs, string metadata_compression_codec, string table_version = DEFAULT_TABLE_VERSION, string version_format = DEFAULT_TABLE_VERSION_FORMAT) {
string version_hint;
string meta_path = fs.JoinPath(path, "metadata");
if (StringUtil::EndsWith(path, ".json")) {
Expand All @@ -210,6 +210,12 @@ string IcebergSnapshot::GetMetaDataPath(const string &path, FileSystem &fs, stri
version_hint = GetTableVersionFromHint(meta_path, fs, DEFAULT_VERSION_HINT_FILE);
return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format);
} else {
Value result;
(void)context.TryGetCurrentSetting("unsafe_enable_version_guessing", result);
if (!result.GetValue<bool>()) {
throw InvalidInputException("No version was provided and no version-hint could be found, globbing the filesystem to locate the latest version is disabled by default as this is considered unsafe and could result in reading uncommitted data. To enable this use 'SET unsafe_enable_version_guessing = true;'");
}

// We need to guess from file paths
return GuessTableVersion(meta_path, fs, table_version, metadata_compression_codec, version_format);
}
Expand Down
8 changes: 8 additions & 0 deletions src/iceberg_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ static void LoadInternal(DatabaseInstance &instance) {
}

void IcebergExtension::Load(DuckDB &db) {
auto &config = DBConfig::GetConfig(*db.instance);

config.AddExtensionOption(
"unsafe_enable_version_guessing",
"Enable globbing the filesystem (if possible) to find the latest version metadata. This could result in reading an uncommitted version.",
LogicalType::BOOLEAN,
Value::BOOLEAN(false)
);
LoadInternal(*db.instance);
}
std::string IcebergExtension::Name() {
Expand Down
2 changes: 1 addition & 1 deletion src/iceberg_functions/iceberg_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ static unique_ptr<FunctionData> IcebergMetaDataBind(ClientContext &context, Tabl
}
}

auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(iceberg_path, fs, metadata_compression_codec, table_version, version_name_format);
auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(context, iceberg_path, fs, metadata_compression_codec, table_version, version_name_format);
IcebergSnapshot snapshot_to_scan;
if (input.inputs.size() > 1) {
if (input.inputs[1].type() == LogicalType::UBIGINT) {
Expand Down
2 changes: 1 addition & 1 deletion src/iceberg_functions/iceberg_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ static unique_ptr<TableRef> IcebergScanBindReplace(ClientContext &context, Table
version_name_format = StringValue::Get(kv.second);
}
}
auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(iceberg_path, fs, metadata_compression_codec, table_version, version_name_format);
auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(context, iceberg_path, fs, metadata_compression_codec, table_version, version_name_format);
IcebergSnapshot snapshot_to_scan;
if (input.inputs.size() > 1) {
if (input.inputs[1].type() == LogicalType::UBIGINT) {
Expand Down
2 changes: 1 addition & 1 deletion src/iceberg_functions/iceberg_snapshots.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct IcebergSnapshotGlobalTableFunctionState : public GlobalTableFunctionState
FileSystem &fs = FileSystem::GetFileSystem(context);

auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(
bind_data.filename, fs, bind_data.metadata_compression_codec, bind_data.table_version, bind_data.version_name_format);
context, bind_data.filename, fs, bind_data.metadata_compression_codec, bind_data.table_version, bind_data.version_name_format);
global_state->metadata_file = IcebergSnapshot::ReadMetaData(iceberg_meta_path, fs, bind_data.metadata_compression_codec);
global_state->metadata_doc =
yyjson_read(global_state->metadata_file.c_str(), global_state->metadata_file.size(), 0);
Expand Down
2 changes: 1 addition & 1 deletion src/include/iceberg_metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class IcebergSnapshot {

static IcebergSnapshot ParseSnapShot(yyjson_val *snapshot, idx_t iceberg_format_version, idx_t schema_id,
vector<yyjson_val *> &schemas, string metadata_compression_codec, bool skip_schema_inference);
static string GetMetaDataPath(const string &path, FileSystem &fs, string metadata_compression_codec, string table_version, string version_format);
static string GetMetaDataPath(ClientContext &context, const string &path, FileSystem &fs, string metadata_compression_codec, string table_version, string version_format);
static string ReadMetaData(const string &path, FileSystem &fs, string metadata_compression_codec);
static yyjson_val *GetSnapshots(const string &path, FileSystem &fs, string GetSnapshotByTimestamp);
static unique_ptr<SnapshotParseInfo> GetParseInfo(yyjson_doc &metadata_json);
Expand Down
8 changes: 8 additions & 0 deletions test/sql/iceberg_metadata.test
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_nonexistent');
----
IO Error: Cannot open "data/iceberg/lineitem_iceberg_nonexistent": Metadata directory does not exist

statement error
SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE);
----
<REGEX>:.*SET unsafe_enable_version_guessing.*

statement ok
SET unsafe_enable_version_guessing = true;

query IIIIIIII
SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE);
----
Expand Down

0 comments on commit 6ed670b

Please sign in to comment.