Skip to content

Commit

Permalink
修复metadata cache失效问题 (#20)
Browse files Browse the repository at this point in the history
* fix metadata cache issue

* only update cache when partition info is initialized
  • Loading branch information
taiyang-li authored Aug 25, 2022
1 parent 59e3a84 commit c0e3b46
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 16 deletions.
44 changes: 30 additions & 14 deletions src/Storages/Hive/HiveCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,23 @@ bool HiveMetastoreClient::PartitionInfo::haveSameParameters(const Apache::Hadoop
return (it == partition.parameters.cend() && oit == other.parameters.cend());
}

void HiveMetastoreClient::PartitionInfo::initialize(const std::vector<FileInfo> & files_)
{
files = files_;
initialized = true;
initialized_time = time(nullptr);
}

bool HiveMetastoreClient::PartitionInfo::isValid() const
{
return initialized && time(nullptr) < 300 + initialized_time;
}

bool HiveMetastoreClient::PartitionInfo::isInitialized() const
{
return initialized;
}

std::vector<Apache::Hadoop::Hive::Partition> HiveMetastoreClient::HiveTableMetadata::getPartitions() const
{
std::vector<Apache::Hadoop::Hive::Partition> result;
Expand All @@ -137,7 +154,7 @@ std::vector<HiveMetastoreClient::FileInfo> HiveMetastoreClient::HiveTableMetadat
if (it == partition_infos.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid location {}", location);

if (it->second.initialized)
if (it->second.isValid())
{
LOG_DEBUG(log, "Get {} files under directory {} from cache", it->second.files.size(), location);
return it->second.files;
Expand All @@ -162,10 +179,9 @@ std::vector<HiveMetastoreClient::FileInfo> HiveMetastoreClient::HiveTableMetadat
if (it == partition_infos.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid location {}", location);

it->second.files = result;
it->second.initialized = true;
it->second.initialize(result);
}
LOG_DEBUG(log, "Get {} files under directory {}", result.size(), location);
LOG_DEBUG(log, "Get {} files under directory {} from HDFS", result.size(), location);
return result;
}

Expand All @@ -186,40 +202,40 @@ void HiveMetastoreClient::HiveTableMetadata::updateIfNeeded(const std::vector<Ap
for (const auto & partition : partitions)
{
auto it = old_partiton_infos.find(partition.sd.location);
if (it == old_partiton_infos.end() || !it->second.haveSameParameters(partition) || !it->second.initialized)
if (it == old_partiton_infos.end() || !it->second.haveSameParameters(partition) || !it->second.isValid())
{
new_partition_infos.emplace(partition.sd.location, PartitionInfo(partition));
continue;
}
else
{
new_partition_infos.emplace(partition.sd.location, std::move(it->second));
}
}
partition_infos.swap(new_partition_infos);
last_update_time = time(nullptr);
LOG_DEBUG(log, "Finish update metadata for table {}.{}", db_name, table_name);
}

bool HiveMetastoreClient::HiveTableMetadata::shouldUpdate(const std::vector<Apache::Hadoop::Hive::Partition> & partitions)
{
const auto & old_partiton_infos = partition_infos;
if (old_partiton_infos.size() != partitions.size())
const auto & old_partition_infos = partition_infos;
if (old_partition_infos.size() != partitions.size())
return true;

for (const auto & partition : partitions)
{
auto it = old_partiton_infos.find(partition.sd.location);
if (it == old_partiton_infos.end())
auto it = old_partition_infos.find(partition.sd.location);
if (it == old_partition_infos.end())
return true;

const auto & old_partition_info = it->second;
if (!old_partition_info.haveSameParameters(partition))
return true;

/// Cache is out-of-date
if (old_partition_info.isInitialized() && !old_partition_info.isValid())
return true;
}

auto now = time(nullptr);
return now - last_update_time >= 300;
return false;
}


Expand Down
10 changes: 8 additions & 2 deletions src/Storages/Hive/HiveCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,17 @@ class HiveMetastoreClient
Apache::Hadoop::Hive::Partition partition;
std::vector<FileInfo> files;
bool initialized = false; /// If true, files are initialized.
time_t initialized_time{0};

explicit PartitionInfo(const Apache::Hadoop::Hive::Partition & partition_): partition(partition_) {}
explicit PartitionInfo(const Apache::Hadoop::Hive::Partition & partition_) : partition(partition_) { }
PartitionInfo(PartitionInfo &&) = default;

bool haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const;

void initialize(const std::vector<FileInfo> & files);

bool isValid() const;
bool isInitialized() const;
};

class HiveTableMetadata;
Expand Down Expand Up @@ -111,7 +117,7 @@ class HiveMetastoreClient
/// Mutex to protect partition_infos.
mutable std::mutex mutex;
std::map<String, PartitionInfo> partition_infos;
time_t last_update_time{0};
// time_t last_update_time{0};

const bool empty_partition_keys;
const HiveFilesCachePtr hive_files_cache;
Expand Down

0 comments on commit c0e3b46

Please sign in to comment.