Skip to content

Commit

Permalink
Feat: P3 refactor index scan and test cases (#637)
Browse files Browse the repository at this point in the history
* Feat: P3 refactor index scan and test cases
  • Loading branch information
ChaosZhai authored Oct 27, 2023
1 parent c4d4818 commit 865905b
Show file tree
Hide file tree
Showing 25 changed files with 303 additions and 361 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ set(P3_FILES
"src/optimizer/optimizer_custom_rules.cpp"
"src/optimizer/sort_limit_as_topn.cpp"
"src/optimizer/optimizer_internal.cpp"
"src/optimizer/seqscan_as_indexscan.cpp"
"src/common/bustub_ddl.cpp"
"src/include/execution/plans/index_scan_plan.h"
"src/include/execution/plans/topn_per_group_plan.h"
Expand Down
2 changes: 1 addition & 1 deletion src/common/bustub_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void BustubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
std::unique_lock<std::shared_mutex> l(catalog_lock_);
auto info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
IntegerHashFunctionType{});
IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
l.unlock();

if (info == nullptr) {
Expand Down
9 changes: 9 additions & 0 deletions src/common/util/string_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ auto StringUtil::Contains(const std::string &haystack, const std::string &needle
return (haystack.find(needle) != std::string::npos);
}

auto StringUtil::ContainsAfter(const std::string &keyword, const std::string &haystack, const std::string &needle)
-> bool {
auto pos = haystack.find(keyword);
if (pos == std::string::npos) {
return false;
}
return (haystack.find(needle, pos) != std::string::npos);
}

void StringUtil::RTrim(std::string *str) {
// remove trailing ' ', \f, \n, \r, \t, \v
str->erase(std::find_if(str->rbegin(), str->rend(), [](int ch) { return std::isspace(ch) == 0; }).base(), str->end());
Expand Down
30 changes: 27 additions & 3 deletions src/execution/mock_scan_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,25 @@ static const char *ta_list_2022[] = {"amstqq", "durovo", "joyceliaoo",
static const char *ta_list_2023[] = {"abigalekim", "arvinwu168", "christopherlim98", "David-Lyons", "fanyuex2",
"Mayank-Baranwal", "skyzh", "yarkhinephyo", "yliang412"};

static const char *ta_list_2023_fall[] = {"skyzh", "yliang412", "fernandolis10", "wiam8",
"anurag-23", "Mayank-Baranwal", "abigalekim", "ChaosZhai",
"aoleizhou", "averyqi115", "kswim8"};

static const char *ta_oh_2022[] = {"Tuesday", "Wednesday", "Monday", "Wednesday", "Thursday", "Friday",
"Wednesday", "Randomly", "Tuesday", "Monday", "Tuesday"};

static const char *ta_oh_2023[] = {"Friday", "Thursday", "Tuesday", "Monday", "Tuesday",
"Tuesday", "Randomly", "Wednesday", "Thursday"};

static const char *ta_oh_2023_fall[] = {"Randomly", "Tuesday", "Wednesday", "Tuesday", "Thursday", "Tuesday",
"Friday", "Yesterday", "Friday", "Friday", "Never"};

static const char *course_on_date[] = {"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"};

const char *mock_table_list[] = {"__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_tas_2022",
"__mock_table_tas_2023", "__mock_agg_input_small", "__mock_agg_input_big",
"__mock_table_schedule_2022", "__mock_table_schedule_2023", "__mock_table_123",
"__mock_graph",
"__mock_table_tas_2023", "__mock_table_tas_2023_fall", "__mock_agg_input_small",
"__mock_agg_input_big", "__mock_table_schedule_2022", "__mock_table_schedule_2023",
"__mock_table_123", "__mock_graph",
// For leaderboard Q1
"__mock_t1",
// For leaderboard Q2
Expand Down Expand Up @@ -71,6 +78,10 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema {
return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}};
}

if (table == "__mock_table_tas_2023_fall") {
return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}};
}

if (table == "__mock_table_schedule_2022") {
return Schema{std::vector{Column{"day_of_week", TypeId::VARCHAR, 128}, Column{"has_lecture", TypeId::INTEGER}}};
}
Expand Down Expand Up @@ -139,6 +150,10 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t {
return sizeof(ta_list_2023) / sizeof(ta_list_2023[0]);
}

if (table == "__mock_table_tas_2023_fall") {
return sizeof(ta_list_2023_fall) / sizeof(ta_list_2023_fall[0]);
}

if (table == "__mock_table_schedule_2022") {
return sizeof(course_on_date) / sizeof(course_on_date[0]);
}
Expand Down Expand Up @@ -256,6 +271,15 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
};
}

if (table == "__mock_table_tas_2023_fall") {
return [plan](size_t cursor) {
std::vector<Value> values{};
values.push_back(ValueFactory::GetVarcharValue(ta_list_2023_fall[cursor]));
values.push_back(ValueFactory::GetVarcharValue(ta_oh_2023_fall[cursor]));
return Tuple{values, &plan->OutputSchema()};
};
}

if (table == "__mock_table_schedule_2022") {
return [plan](size_t cursor) {
std::vector<Value> values{};
Expand Down
16 changes: 14 additions & 2 deletions src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ using table_oid_t = uint32_t;
using column_oid_t = uint32_t;
using index_oid_t = uint32_t;

enum class IndexType { BPlusTreeIndex, HashTableIndex };

/**
* The TableInfo class maintains metadata about a table.
*/
Expand Down Expand Up @@ -94,6 +96,8 @@ struct IndexInfo {
const size_t key_size_;
/** Is primary key index? */
bool is_primary_key_;
/** The index type */
[[maybe_unused]] IndexType index_type_{IndexType::BPlusTreeIndex};
};

/**
Expand Down Expand Up @@ -206,7 +210,8 @@ class Catalog {
template <class KeyType, class ValueType, class KeyComparator>
auto CreateIndex(Transaction *txn, const std::string &index_name, const std::string &table_name, const Schema &schema,
const Schema &key_schema, const std::vector<uint32_t> &key_attrs, std::size_t keysize,
HashFunction<KeyType> hash_function, bool is_primary_key = false) -> IndexInfo * {
HashFunction<KeyType> hash_function, bool is_primary_key = false,
IndexType index_type = IndexType::HashTableIndex) -> IndexInfo * {
// Reject the creation request for nonexistent table
if (table_names_.find(table_name) == table_names_.end()) {
return NULL_INDEX_INFO;
Expand All @@ -231,7 +236,14 @@ class Catalog {
// just the key, value, and comparator types

// TODO(chi): support both hash index and btree index
auto index = std::make_unique<BPlusTreeIndex<KeyType, ValueType, KeyComparator>>(std::move(meta), bpm_);
std::unique_ptr<Index> index;
if (index_type == IndexType::HashTableIndex) {
index = std::make_unique<ExtendibleHashTableIndex<KeyType, ValueType, KeyComparator>>(std::move(meta), bpm_,
hash_function);
} else {
BUSTUB_ASSERT(index_type == IndexType::BPlusTreeIndex, "Unsupported Index Type");
index = std::make_unique<BPlusTreeIndex<KeyType, ValueType, KeyComparator>>(std::move(meta), bpm_);
}

// Populate the index with all tuples in table heap
auto *table_meta = GetTable(table_name);
Expand Down
3 changes: 3 additions & 0 deletions src/include/common/util/string_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class StringUtil {
/** @return true if haystack contains needle, false otherwise */
static auto Contains(const std::string &haystack, const std::string &needle) -> bool;

/** @return true if haystack contains needle after keyword, false otherwise */
static auto ContainsAfter(const std::string &keyword, const std::string &haystack, const std::string &needle) -> bool;

/** @return true if target string starts with given prefix, false otherwise */
static auto StartsWith(const std::string &str, const std::string &prefix) -> bool;

Expand Down
6 changes: 6 additions & 0 deletions src/include/optimizer/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ class Optimizer {
*/
auto OptimizeOrderByAsIndexScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef;

/**
* @brief optimize seq scan as index scan if there's an index on a table
* @note Fall 2023 only: using hash index and only support point lookup
*/
auto OptimizeSeqScanAsIndexScan(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef;

/** @brief check if the index can be matched */
auto MatchIndex(const std::string &table_name, uint32_t index_key_idx)
-> std::optional<std::tuple<index_oid_t, std::string>>;
Expand Down
15 changes: 8 additions & 7 deletions src/include/storage/index/b_plus_tree_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,14 @@ class BPlusTreeIndex : public Index {

/** We only support index table with one integer key for now in BusTub. Hardcode everything here. */

constexpr static const auto TWO_INTEGER_SIZE = 8;
using IntegerKeyType = GenericKey<TWO_INTEGER_SIZE>;
using IntegerValueType = RID;
using IntegerComparatorType = GenericComparator<TWO_INTEGER_SIZE>;
using BPlusTreeIndexForTwoIntegerColumn = BPlusTreeIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>;
constexpr static const auto TWO_INTEGER_SIZE_B_TREE = 8;
using IntegerKeyType_BTree = GenericKey<TWO_INTEGER_SIZE_B_TREE>;
using IntegerValueType_BTree = RID;
using IntegerComparatorType_BTree = GenericComparator<TWO_INTEGER_SIZE_B_TREE>;
using BPlusTreeIndexForTwoIntegerColumn =
BPlusTreeIndex<IntegerKeyType_BTree, IntegerValueType_BTree, IntegerComparatorType_BTree>;
using BPlusTreeIndexIteratorForTwoIntegerColumn =
IndexIterator<IntegerKeyType, IntegerValueType, IntegerComparatorType>;
using IntegerHashFunctionType = HashFunction<IntegerKeyType>;
IndexIterator<IntegerKeyType_BTree, IntegerValueType_BTree, IntegerComparatorType_BTree>;
using IntegerHashFunctionType = HashFunction<IntegerKeyType_BTree>;

} // namespace bustub
7 changes: 7 additions & 0 deletions src/include/storage/index/extendible_hash_table_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,11 @@ class ExtendibleHashTableIndex : public Index {
DiskExtendibleHashTable<KeyType, ValueType, KeyComparator> container_;
};

constexpr static const auto TWO_INTEGER_SIZE = 8;
using IntegerKeyType = GenericKey<TWO_INTEGER_SIZE>;
using IntegerValueType = RID;
using IntegerComparatorType = GenericComparator<TWO_INTEGER_SIZE>;
using HashTableIndexForTwoIntegerColumn =
ExtendibleHashTableIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>;

} // namespace bustub
3 changes: 2 additions & 1 deletion src/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ add_library(
optimizer_custom_rules.cpp
optimizer_internal.cpp
order_by_index_scan.cpp
sort_limit_as_topn.cpp)
sort_limit_as_topn.cpp
seqscan_as_indexscan.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:bustub_optimizer>
Expand Down
2 changes: 2 additions & 0 deletions src/optimizer/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ auto Optimizer::Optimize(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef
p = OptimizeMergeFilterNLJ(p);
p = OptimizeOrderByAsIndexScan(p);
p = OptimizeSortLimitAsTopN(p);
p = OptimizeMergeFilterScan(p);
p = OptimizeSeqScanAsIndexScan(p);
return p;
}
// By default, use user-defined rules.
Expand Down
11 changes: 11 additions & 0 deletions src/optimizer/seqscan_as_indexscan.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "optimizer/optimizer.h"

namespace bustub {

auto Optimizer::OptimizeSeqScanAsIndexScan(const bustub::AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef {
// TODO(student): implement seq scan with predicate -> index scan optimizer rule
// The Filter Predicate Pushdown has been enabled for you in optimizer.cpp when forcing starter rule
return plan;
}

} // namespace bustub
22 changes: 12 additions & 10 deletions test/sql/p3.00-primer.slt
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
query rowsort
select github_id from __mock_table_tas_2023;
select github_id, office_hour from __mock_table_tas_2023_fall;
----
abigalekim
arvinwu168
christopherlim98
David-Lyons
fanyuex2
Mayank-Baranwal
skyzh
yarkhinephyo
yliang412
skyzh Randomly
yliang412 Tuesday
fernandolis10 Wednesday
wiam8 Tuesday
anurag-23 Thursday
Mayank-Baranwal Tuesday
abigalekim Friday
ChaosZhai Yesterday
aoleizhou Friday
averyqi115 Friday
kswim8 Never
Loading

0 comments on commit 865905b

Please sign in to comment.