From 922b9afe34e5f5590445449ca871e25f67703ac4 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 22 Aug 2024 16:57:29 +0200 Subject: [PATCH 01/38] add RandomConfigs class with default values and applied to the GenerateStatement() --- src/include/statement_generator.hpp | 3 +++ src/statement_generator.cpp | 31 +++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index dfc5e22..3016899 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -40,6 +40,7 @@ class StatementGenerator { friend class ExpressionDepthChecker; friend class AggregateChecker; friend class WindowChecker; + friend class RandomsConfig; public: StatementGenerator(ClientContext &context); @@ -55,6 +56,8 @@ class StatementGenerator { bool RandomPercentage(idx_t percentage); bool verification_enabled = false; idx_t RandomValue(idx_t max); + + string GetRandomAttachedDataBase(); unique_ptr GenerateStatement(StatementType type); // came from private diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index fe85e48..369f84d 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -92,24 +92,43 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon return result; } +class RandomsConfig { +public: + idx_t select_percentage = 60; + idx_t attach_percentage = 40; + idx_t attach_use_percentage = 50; + idx_t detach_percentage = 60; + idx_t set_percentage = 30; + idx_t delete_percentage = 40; + + RandomsConfig(){ + + }; + + void getConfigFromFile() { + // read file and update default values + } +}; + unique_ptr StatementGenerator::GenerateStatement() { - if (RandomPercentage(80)) { + RandomsConfig config = RandomsConfig(); + if (RandomPercentage(config.select_percentage)) { return GenerateStatement(StatementType::SELECT_STATEMENT); } - if (RandomPercentage(40)) { - if (RandomPercentage(50)) { + if (RandomPercentage(config.attach_percentage)) { + if (RandomPercentage(config.attach_use_percentage)) { // We call this directly so we have a higher chance to fuzz persistent databases return GenerateAttachUse(); } return GenerateStatement(StatementType::ATTACH_STATEMENT); } - if (RandomPercentage(60)) { + if (RandomPercentage(config.detach_percentage)) { return GenerateStatement(StatementType::DETACH_STATEMENT); } - if (RandomPercentage(30)) { + if (RandomPercentage(config.set_percentage)) { return GenerateStatement(StatementType::SET_STATEMENT); } - if (RandomPercentage(40)) { //20 + if (RandomPercentage(config.delete_percentage)) { //20 return GenerateStatement(StatementType::DELETE_STATEMENT); } return GenerateStatement(StatementType::CREATE_STATEMENT); From 88949bc5df8b309c37c8f7397d4c1c5ff111dd25 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Tue, 3 Sep 2024 15:22:38 +0200 Subject: [PATCH 02/38] add a class for random numbers configuraion --- src/include/randoms_config.hpp | 36 ++++++++++++++++++++++++++++++++++ src/randoms_config.cpp | 0 2 files changed, 36 insertions(+) create mode 100644 src/include/randoms_config.hpp create mode 100644 src/randoms_config.cpp diff --git a/src/include/randoms_config.hpp b/src/include/randoms_config.hpp new file mode 100644 index 0000000..7fd7ea9 --- /dev/null +++ b/src/include/randoms_config.hpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// randoms_config.hpp +// +// +//===----------------------------------------------------------------------===// + +#include "duckdb.hpp" + +class RandomsConfig { +public: + + idx_t select_percentage = 60; + idx_t attach_percentage = 40; + idx_t attach_use_percentage = 50; + idx_t detach_percentage = 60; + idx_t set_percentage = 30; + idx_t delete_percentage = 40; + + RandomsConfig() { + + }; + ~RandomsConfig(); + + // read file and update default values + void getConfigFromFile(duckdb_string_t file_path) { + + } + + // set default values + void GetDefaultConfig() { + + } + +}; \ No newline at end of file diff --git a/src/randoms_config.cpp b/src/randoms_config.cpp new file mode 100644 index 0000000..e69de29 From 1632858cf724b0e26a00a74328cb36353e9b5c27 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Tue, 3 Sep 2024 15:23:55 +0200 Subject: [PATCH 03/38] change statement generator to work with the RandomsConfig --- src/include/statement_generator.hpp | 3 ++- src/statement_generator.cpp | 18 ------------------ 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 3016899..bfe8eb2 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -11,6 +11,7 @@ #include "duckdb.hpp" #include "duckdb/parser/parsed_data/detach_info.hpp" #include "duckdb/parser/query_node.hpp" +#include "src/include/randoms_config.hpp" #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" @@ -40,7 +41,7 @@ class StatementGenerator { friend class ExpressionDepthChecker; friend class AggregateChecker; friend class WindowChecker; - friend class RandomsConfig; + RandomsConfig random_config; public: StatementGenerator(ClientContext &context); diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 369f84d..371cecd 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -92,24 +92,6 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon return result; } -class RandomsConfig { -public: - idx_t select_percentage = 60; - idx_t attach_percentage = 40; - idx_t attach_use_percentage = 50; - idx_t detach_percentage = 60; - idx_t set_percentage = 30; - idx_t delete_percentage = 40; - - RandomsConfig(){ - - }; - - void getConfigFromFile() { - // read file and update default values - } -}; - unique_ptr StatementGenerator::GenerateStatement() { RandomsConfig config = RandomsConfig(); if (RandomPercentage(config.select_percentage)) { From aaac78c28031adc60371c7dad330bb7e0e2b4ca1 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Wed, 4 Sep 2024 14:32:20 +0200 Subject: [PATCH 04/38] rename randoms config --- ...doms_config.hpp => random_nums_config.hpp} | 19 +++++++++++++------ src/include/statement_generator.hpp | 4 ++-- src/random_nums_config.cpp | 12 ++++++++++++ src/randoms_config.cpp | 0 src/statement_generator.cpp | 2 +- 5 files changed, 28 insertions(+), 9 deletions(-) rename src/include/{randoms_config.hpp => random_nums_config.hpp} (78%) create mode 100644 src/random_nums_config.cpp delete mode 100644 src/randoms_config.cpp diff --git a/src/include/randoms_config.hpp b/src/include/random_nums_config.hpp similarity index 78% rename from src/include/randoms_config.hpp rename to src/include/random_nums_config.hpp index 7fd7ea9..a92ea11 100644 --- a/src/include/randoms_config.hpp +++ b/src/include/random_nums_config.hpp @@ -1,15 +1,19 @@ //===----------------------------------------------------------------------===// // DuckDB // -// randoms_config.hpp +// random_nums_config.hpp // // //===----------------------------------------------------------------------===// +#pragma once + #include "duckdb.hpp" -class RandomsConfig { -public: +namespace duckdb { + +class RandomNumsConfig { +private: idx_t select_percentage = 60; idx_t attach_percentage = 40; @@ -18,10 +22,11 @@ class RandomsConfig { idx_t set_percentage = 30; idx_t delete_percentage = 40; - RandomsConfig() { +public: + RandomNumsConfig() { }; - ~RandomsConfig(); + ~RandomNumsConfig(); // read file and update default values void getConfigFromFile(duckdb_string_t file_path) { @@ -33,4 +38,6 @@ class RandomsConfig { } -}; \ No newline at end of file +}; + +} // namespace duckdb \ No newline at end of file diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index bfe8eb2..ebd6e72 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -11,7 +11,7 @@ #include "duckdb.hpp" #include "duckdb/parser/parsed_data/detach_info.hpp" #include "duckdb/parser/query_node.hpp" -#include "src/include/randoms_config.hpp" +#include "src/include/random_nums_config.hpp" #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" @@ -41,7 +41,7 @@ class StatementGenerator { friend class ExpressionDepthChecker; friend class AggregateChecker; friend class WindowChecker; - RandomsConfig random_config; + RandomNumsConfig random_nums_config; public: StatementGenerator(ClientContext &context); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp new file mode 100644 index 0000000..9a8bb01 --- /dev/null +++ b/src/random_nums_config.cpp @@ -0,0 +1,12 @@ +#include "random_nums_config.hpp" + +namespace duckdb { + void getConfigFromFile(duckdb_string_t file_path) { + + } + + // set default values + void GetDefaultConfig() { + + } +} \ No newline at end of file diff --git a/src/randoms_config.cpp b/src/randoms_config.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 371cecd..ff24181 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -93,7 +93,7 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon } unique_ptr StatementGenerator::GenerateStatement() { - RandomsConfig config = RandomsConfig(); + RandomNumsConfig config = RandomNumsConfig(); if (RandomPercentage(config.select_percentage)) { return GenerateStatement(StatementType::SELECT_STATEMENT); } From c1506c5c2cf2edeb0e3bec07e95f1e67a7a9c9ee Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Wed, 4 Sep 2024 17:00:11 +0200 Subject: [PATCH 05/38] mention random_nums_config files in the CMakes --- CMakeLists.txt | 1 + src/CMakeLists.txt | 2 +- src/include/random_nums_config.hpp | 29 +++++++++++------------------ src/include/statement_generator.hpp | 2 -- src/random_nums_config.cpp | 26 ++++++++++++++++++++------ src/statement_generator.cpp | 1 + 6 files changed, 34 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f55d4e6..ec9e9b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ add_subdirectory(src/third_party/sqlsmith) set(EXTENSION_SOURCES src/sqlsmith_extension.cpp src/statement_generator.cpp src/statement_simplifier.cpp + src/random_nums_config.cpp src/fuzzyduck.cpp ${EXTENSION_OBJECT_FILES}) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 309e2e6..78b2ada 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,7 +7,7 @@ include_directories(third_party/sqlsmith/include) add_subdirectory(third_party) set(SQLSMITH_SOURCES - sqlsmith_extension.cpp statement_generator.cpp statement_simplifier.cpp + sqlsmith_extension.cpp statement_generator.cpp statement_simplifier.cpp random_nums_config.cpp fuzzyduck.cpp ${SQLSMITH_OBJECT_FILES}) build_static_extension(sqlsmith ${SQLSMITH_SOURCES}) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index a92ea11..c1dd338 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -13,30 +13,23 @@ namespace duckdb { class RandomNumsConfig { -private: - - idx_t select_percentage = 60; - idx_t attach_percentage = 40; - idx_t attach_use_percentage = 50; - idx_t detach_percentage = 60; - idx_t set_percentage = 30; - idx_t delete_percentage = 40; - public: - RandomNumsConfig() { - }; + idx_t select_percentage; + idx_t attach_percentage; + idx_t attach_use_percentage; + idx_t detach_percentage; + idx_t set_percentage; + idx_t delete_percentage; + + RandomNumsConfig(); ~RandomNumsConfig(); + // is it better to return an object initialised with fetched/default params? // read file and update default values - void getConfigFromFile(duckdb_string_t file_path) { - - } - + void GetConfigFromFile(duckdb_string_t file_path); // set default values - void GetDefaultConfig() { - - } + void GetDefaultConfig(); }; diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index ebd6e72..5695bfd 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -11,7 +11,6 @@ #include "duckdb.hpp" #include "duckdb/parser/parsed_data/detach_info.hpp" #include "duckdb/parser/query_node.hpp" -#include "src/include/random_nums_config.hpp" #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" @@ -41,7 +40,6 @@ class StatementGenerator { friend class ExpressionDepthChecker; friend class AggregateChecker; friend class WindowChecker; - RandomNumsConfig random_nums_config; public: StatementGenerator(ClientContext &context); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 9a8bb01..224a8e9 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -1,12 +1,26 @@ #include "random_nums_config.hpp" namespace duckdb { - void getConfigFromFile(duckdb_string_t file_path) { + +RandomNumsConfig::RandomNumsConfig() { + +} - } +RandomNumsConfig::~RandomNumsConfig() { +} - // set default values - void GetDefaultConfig() { - - } + +void RandomNumsConfig::GetConfigFromFile(duckdb_string_t file_path) { + +} + +// set default values +void RandomNumsConfig::GetDefaultConfig() { + idx_t select_percentage = 60; + idx_t attach_percentage = 40; + idx_t attach_use_percentage = 50; + idx_t detach_percentage = 60; + idx_t set_percentage = 30; + idx_t delete_percentage = 40; +} } \ No newline at end of file diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index ff24181..5743e7c 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -25,6 +25,7 @@ #include "duckdb/parser/statement/set_statement.hpp" #include "duckdb/parser/statement/update_statement.hpp" #include "duckdb/parser/tableref/list.hpp" +#include "random_nums_config.hpp" namespace duckdb { From f602008040cec20506f72af1fdf89317ced8d0db Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Wed, 11 Sep 2024 17:08:15 +0200 Subject: [PATCH 06/38] a json string gets parsed --- src/include/random_nums_config.hpp | 13 ++-- src/random_nums_config.cpp | 117 +++++++++++++++++++++++++---- src/statement_generator.cpp | 4 +- 3 files changed, 115 insertions(+), 19 deletions(-) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index c1dd338..b52eab9 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -23,14 +23,17 @@ class RandomNumsConfig { idx_t delete_percentage; RandomNumsConfig(); + RandomNumsConfig(string &file_path); ~RandomNumsConfig(); - // is it better to return an object initialised with fetched/default params? - // read file and update default values - void GetConfigFromFile(duckdb_string_t file_path); // set default values void GetDefaultConfig(); - + // is it better to return an object initialised with fetched/default params? + // read file and update default values + void GetConfigFromFile(string &file_path); }; -} // namespace duckdb \ No newline at end of file +} // namespace duckdb + + +// duckdb/common/serializer/read_stream.hpp \ No newline at end of file diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 224a8e9..35ce419 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -1,26 +1,117 @@ -#include "random_nums_config.hpp" +#include "include/random_nums_config.hpp" +#include "duckdb/common/string_util.hpp" +#include "/Users/zuleykhapavlichenkova/Desktop/duckdb_sqlsmith/duckdb/third_party/yyjson/include/yyjson.hpp" +#include "duckdb/common/local_file_system.hpp" + +#include +#include namespace duckdb { - + +using namespace duckdb_yyjson; + RandomNumsConfig::RandomNumsConfig() { - + GetDefaultConfig(); +} + +RandomNumsConfig::RandomNumsConfig(string &file_path) { + GetDefaultConfig(); + GetConfigFromFile(file_path); } RandomNumsConfig::~RandomNumsConfig() { } +void RandomNumsConfig::GetDefaultConfig() { + select_percentage = 60; + attach_percentage = 40; + attach_use_percentage = 50; + detach_percentage = 60; + set_percentage = 30; + delete_percentage = 40; +} -void RandomNumsConfig::GetConfigFromFile(duckdb_string_t file_path) { +void RandomNumsConfig::GetConfigFromFile(string &file_path) { + // select_percentage = csv_parser(file_path, "select_percentage"); + // attach_percentage = csv_parser(file_path, "attach_percentage"); + // attach_use_percentage =csv_parser(file_path, "attach_use_percentage"); + // detach_percentage =csv_parser(file_path, "detach_percentage"); + // set_percentage = csv_parser(file_path, "set_percentage"); + // delete_percentage = csv_parser(file_path, "delete_percentage"); -} + // open and read file into a string & + string result; + std::ifstream open_file(file_path); + if (!open_file.is_open()) { + std::cerr << "Could not open the file!" << std::endl; + } + std::string line; + while (std::getline(open_file, line)) { + if (open_file.fail()) { + std::cerr << "Error reading file!" << std::endl; + } + result.append(line); + } + unordered_map json; + + try { + json = StringUtil::ParseJSONMap(result); + } catch (std::exception &ex) { + throw IOException("Couldn't parse JSON file with percentages config."); + } + + open_file.close(); + + std::cout << json[0] << std::endl; + // LocalFileSystem fs; + // string open_file; + // try { + // auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ); + // fs.Read(handle, buffer, n_bites, location); + + + + // } catch (std::exception &ex) { + // throw IOException("Failed to open config file with provided path '%s.", file_path); + // } + + //read + + + + // check the JSON format + // if (open_file[0] != '{') { + // // throw an error that this is not a JSON + // throw IOException("Not JSON file provided as the config."); + // } + // if (!strstr(open_file, "percentage_types")) { + // throw IOException("percentage_types are not provided in the config file."); + // } + + // yyjson_doc *doc = yyjson_read(open_file, strlen(open_file), 0); + // if (!doc) { + // throw IOException("Failed to read config file with provided path '%s.", file_path); + // } + + // yyjson_val *root = yyjson_doc_get_root(doc); + // if (!yyjson_is_obj(root)) { + // throw IOException("Not JSON object provided as the config."); + // yyjson_doc_free(doc); + // } + + // parse json to map + // unordered_map json; + + // try { + // json = StringUtil::ParseJSONMap(open_file); + // } catch (std::exception &ex) { + // throw IOException("Couldn't parse JSON file with percentages config."); + // } -// set default values -void RandomNumsConfig::GetDefaultConfig() { - idx_t select_percentage = 60; - idx_t attach_percentage = 40; - idx_t attach_use_percentage = 50; - idx_t detach_percentage = 60; - idx_t set_percentage = 30; - idx_t delete_percentage = 40; } + + // test_yyjson/config.json + + + } \ No newline at end of file diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 5743e7c..5a9a095 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -94,7 +94,9 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon } unique_ptr StatementGenerator::GenerateStatement() { - RandomNumsConfig config = RandomNumsConfig(); + + string file_path = "test/config_copy.json"; + RandomNumsConfig config = RandomNumsConfig(file_path); if (RandomPercentage(config.select_percentage)) { return GenerateStatement(StatementType::SELECT_STATEMENT); } From e08843bd7c8f5053b76e64c91f45c4713cdb4787 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 12 Sep 2024 10:28:26 +0200 Subject: [PATCH 07/38] parseJSONMap and initialise RandomNumsConfig with the custom values from the config file --- src/random_nums_config.cpp | 69 +++++--------------------------------- 1 file changed, 8 insertions(+), 61 deletions(-) diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 35ce419..24fa1e5 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -1,6 +1,5 @@ #include "include/random_nums_config.hpp" #include "duckdb/common/string_util.hpp" -#include "/Users/zuleykhapavlichenkova/Desktop/duckdb_sqlsmith/duckdb/third_party/yyjson/include/yyjson.hpp" #include "duckdb/common/local_file_system.hpp" #include @@ -32,13 +31,6 @@ void RandomNumsConfig::GetDefaultConfig() { } void RandomNumsConfig::GetConfigFromFile(string &file_path) { - // select_percentage = csv_parser(file_path, "select_percentage"); - // attach_percentage = csv_parser(file_path, "attach_percentage"); - // attach_use_percentage =csv_parser(file_path, "attach_use_percentage"); - // detach_percentage =csv_parser(file_path, "detach_percentage"); - // set_percentage = csv_parser(file_path, "set_percentage"); - // delete_percentage = csv_parser(file_path, "delete_percentage"); - // open and read file into a string & string result; std::ifstream open_file(file_path); @@ -52,66 +44,21 @@ void RandomNumsConfig::GetConfigFromFile(string &file_path) { } result.append(line); } - unordered_map json; + unordered_map json; try { json = StringUtil::ParseJSONMap(result); } catch (std::exception &ex) { throw IOException("Couldn't parse JSON file with percentages config."); } - open_file.close(); - - std::cout << json[0] << std::endl; - // LocalFileSystem fs; - // string open_file; - // try { - // auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ); - // fs.Read(handle, buffer, n_bites, location); - - - - // } catch (std::exception &ex) { - // throw IOException("Failed to open config file with provided path '%s.", file_path); - // } - - //read - - - // check the JSON format - // if (open_file[0] != '{') { - // // throw an error that this is not a JSON - // throw IOException("Not JSON file provided as the config."); - // } - // if (!strstr(open_file, "percentage_types")) { - // throw IOException("percentage_types are not provided in the config file."); - // } - - // yyjson_doc *doc = yyjson_read(open_file, strlen(open_file), 0); - // if (!doc) { - // throw IOException("Failed to read config file with provided path '%s.", file_path); - // } - - // yyjson_val *root = yyjson_doc_get_root(doc); - // if (!yyjson_is_obj(root)) { - // throw IOException("Not JSON object provided as the config."); - // yyjson_doc_free(doc); - // } - - // parse json to map - // unordered_map json; - - // try { - // json = StringUtil::ParseJSONMap(open_file); - // } catch (std::exception &ex) { - // throw IOException("Couldn't parse JSON file with percentages config."); - // } + select_percentage = stoi(json.find("select_percentage")->second); + attach_percentage = stoi(json.find("attach_percentage")->second); + attach_use_percentage = stoi(json.find("attach_use_percentage")->second); + detach_percentage = stoi(json.find("detach_percentage")->second); + set_percentage = stoi(json.find("set_percentage")->second); + delete_percentage = stoi(json.find("delete_percentage")->second); } - - // test_yyjson/config.json - - - -} \ No newline at end of file +} // namespace duckdb \ No newline at end of file From fa89b250f6c9c1512a4e7bd0353b031c934b947d Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 12 Sep 2024 12:01:43 +0200 Subject: [PATCH 08/38] fix paths to the reusable workflows, since they were moved to the .workflow directory of the source repo from the reusable_workflows directory --- .github/workflows/test-fuzzer-ci-still-works.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-fuzzer-ci-still-works.yml b/.github/workflows/test-fuzzer-ci-still-works.yml index 3660000..f67ad99 100644 --- a/.github/workflows/test-fuzzer-ci-still-works.yml +++ b/.github/workflows/test-fuzzer-ci-still-works.yml @@ -10,7 +10,7 @@ on: jobs: build-duckdb: name: Build DuckDB - uses: duckdblabs/duckdb-fuzzer-ci/.github/workflows/reusable_workflows/build_fuzzer.yml@main + uses: duckdblabs/duckdb-fuzzer-ci/.github/workflows/build_fuzzer.yml@main with: git_url: ${{ github.actor }} git_tag: ${{ github.ref_name }} @@ -31,7 +31,7 @@ jobs: fuzzer: sqlsmith - enable_verification: true fuzzer: duckfuzz_functions - uses: duckdblabs/duckdb-fuzzer-ci/.github/workflows/reusable_workflows/fuzz_duckdb.yml@main + uses: duckdblabs/duckdb-fuzzer-ci/.github/workflows/fuzz_duckdb.yml@main with: fuzzer: ${{ matrix.fuzzer }} data: ${{ matrix.data }} From 11c5910ec873388b0b26d70047606e282c564b15 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 13 Sep 2024 10:05:26 +0200 Subject: [PATCH 09/38] add randoms_config_handle to fuzzyduck --- src/fuzzyduck.cpp | 9 ++++++++- src/include/fuzzyduck.hpp | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index c7ee8ec..1241088 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -22,8 +22,8 @@ void FuzzyDuck::BeginFuzzing() { if (max_queries == 0) { throw BinderException("Provide a max_queries argument greater than 0"); } + auto &fs = FileSystem::GetFileSystem(context); if (!complete_log.empty()) { - auto &fs = FileSystem::GetFileSystem(context); TryRemoveFile(complete_log); complete_log_handle = fs.OpenFile(complete_log, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW); @@ -31,12 +31,19 @@ void FuzzyDuck::BeginFuzzing() { if (enable_verification) { RunQuery("PRAGMA enable_verification"); } + // if (!randoms_config_filepath.empty()) { + // randoms_config_handle = fs.OpenFile(randoms_config_filepath, FileFlags::FILE_FLAGS_READ); + + // } } void FuzzyDuck::EndFuzzing() { if (complete_log_handle) { complete_log_handle->Close(); } + if (randoms_config_handle) { + randoms_config_handle->Close(); + } } void FuzzyDuck::Fuzz() { diff --git a/src/include/fuzzyduck.hpp b/src/include/fuzzyduck.hpp index e6d5363..e54fb25 100644 --- a/src/include/fuzzyduck.hpp +++ b/src/include/fuzzyduck.hpp @@ -27,6 +27,7 @@ class FuzzyDuck { bool verbose_output = false; bool enable_verification = false; idx_t timeout = 30; + string randoms_config_filepath; public: void Fuzz(); @@ -50,6 +51,7 @@ class FuzzyDuck { private: unique_ptr complete_log_handle; + unique_ptr randoms_config_handle; }; } // namespace duckdb From 566aff392fd29eaea8090a4d3e38db0ecfc42c3c Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 13 Sep 2024 10:13:25 +0200 Subject: [PATCH 10/38] add config file --- config.json | 8 ++++++++ src/statement_generator.cpp | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 config.json diff --git a/config.json b/config.json new file mode 100644 index 0000000..75daaaa --- /dev/null +++ b/config.json @@ -0,0 +1,8 @@ +{ + "select_percentage": "70", + "attach_percentage": "20", + "attach_use_percentage": "80", + "detach_percentage": "15", + "set_percentage": "5", + "delete_percentage": "25" +} \ No newline at end of file diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 5a9a095..2fd694a 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -95,7 +95,7 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon unique_ptr StatementGenerator::GenerateStatement() { - string file_path = "test/config_copy.json"; + string file_path = "config.json"; RandomNumsConfig config = RandomNumsConfig(file_path); if (RandomPercentage(config.select_percentage)) { return GenerateStatement(StatementType::SELECT_STATEMENT); From 87d6a036d5728991c5466ca123859c6786525c16 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 13 Sep 2024 16:49:59 +0200 Subject: [PATCH 11/38] FileSystem::ReadFile uses reads the file with all the new line symbols and that doesn't make ParseJSONMap parse the string with the new lines --- src/fuzzyduck.cpp | 26 ++++++++++++++++++++------ src/include/fuzzyduck.hpp | 2 ++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index 1241088..bf8325a 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -31,10 +31,24 @@ void FuzzyDuck::BeginFuzzing() { if (enable_verification) { RunQuery("PRAGMA enable_verification"); } - // if (!randoms_config_filepath.empty()) { - // randoms_config_handle = fs.OpenFile(randoms_config_filepath, FileFlags::FILE_FLAGS_READ); - - // } + if (!randoms_config_filepath.empty()) { + randoms_config_handle = fs.OpenFile(randoms_config_filepath, FileFlags::FILE_FLAGS_READ); + // read config and initialize the RandomNumsConfig here + // string file_path = "config.json"; + string json_string; + if (randoms_config_handle) { + auto file_size = fs.GetFileSize(*randoms_config_handle); + char buffer[1024]; + auto bytes_read = fs.Read(*randoms_config_handle, buffer, 1024); + if (bytes_read < file_size) { + throw CatalogException("Cannot read the file \"%s\"", randoms_config_filepath); + } + json_string = buffer; + config = RandomNumsConfig(json_string); + + } + + } } void FuzzyDuck::EndFuzzing() { @@ -85,12 +99,12 @@ string FuzzyDuck::GenerateQuery() { LogTask("Generating Multi-Statement query of " + to_string(number_of_statements) + " statements with seed " + to_string(seed)); for (idx_t i = 0; i < number_of_statements; i++) { - statement += generator.GenerateStatement()->ToString() + "; "; + statement += generator.GenerateStatement(config)->ToString() + "; "; } } else { // normal statement LogTask("Generating Single-Statement query with seed " + to_string(seed)); - statement = generator.GenerateStatement()->ToString(); + statement = generator.GenerateStatement(config)->ToString(); } return statement; } diff --git a/src/include/fuzzyduck.hpp b/src/include/fuzzyduck.hpp index e54fb25..062d59e 100644 --- a/src/include/fuzzyduck.hpp +++ b/src/include/fuzzyduck.hpp @@ -10,6 +10,7 @@ #include "duckdb.hpp" #include "duckdb/parser/query_node.hpp" +#include "random_nums_config.hpp" namespace duckdb { struct FileHandle; @@ -28,6 +29,7 @@ class FuzzyDuck { bool enable_verification = false; idx_t timeout = 30; string randoms_config_filepath; + RandomNumsConfig config = RandomNumsConfig(); public: void Fuzz(); From de5cadad4a0604522dfcfd16c4122212b1e315b9 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 13 Sep 2024 16:51:06 +0200 Subject: [PATCH 12/38] pass config file path as randoms_config_filepath parameter --- src/include/random_nums_config.hpp | 4 +-- src/include/statement_generator.hpp | 3 ++- src/random_nums_config.cpp | 38 ++++++++++++++--------------- src/sqlsmith_extension.cpp | 3 +++ src/statement_generator.cpp | 7 +++--- 5 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index b52eab9..048b622 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -23,14 +23,14 @@ class RandomNumsConfig { idx_t delete_percentage; RandomNumsConfig(); - RandomNumsConfig(string &file_path); + RandomNumsConfig(string &json_string); ~RandomNumsConfig(); // set default values void GetDefaultConfig(); // is it better to return an object initialised with fetched/default params? // read file and update default values - void GetConfigFromFile(string &file_path); + void GetConfigFromFile(string &json_string); }; } // namespace duckdb diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 5695bfd..8362c1f 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -11,6 +11,7 @@ #include "duckdb.hpp" #include "duckdb/parser/parsed_data/detach_info.hpp" #include "duckdb/parser/query_node.hpp" +#include "random_nums_config.hpp" #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" @@ -47,7 +48,7 @@ class StatementGenerator { ~StatementGenerator(); public: - unique_ptr GenerateStatement(); + unique_ptr GenerateStatement(RandomNumsConfig config); vector GenerateAllFunctionCalls(); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 24fa1e5..8b7714c 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -1,4 +1,5 @@ #include "include/random_nums_config.hpp" + #include "duckdb/common/string_util.hpp" #include "duckdb/common/local_file_system.hpp" @@ -13,9 +14,9 @@ RandomNumsConfig::RandomNumsConfig() { GetDefaultConfig(); } -RandomNumsConfig::RandomNumsConfig(string &file_path) { +RandomNumsConfig::RandomNumsConfig(string &json_string) { GetDefaultConfig(); - GetConfigFromFile(file_path); + GetConfigFromFile(json_string); } RandomNumsConfig::~RandomNumsConfig() { @@ -30,29 +31,28 @@ void RandomNumsConfig::GetDefaultConfig() { delete_percentage = 40; } -void RandomNumsConfig::GetConfigFromFile(string &file_path) { +void RandomNumsConfig::GetConfigFromFile(string &json_string) { // open and read file into a string & - string result; - std::ifstream open_file(file_path); - if (!open_file.is_open()) { - std::cerr << "Could not open the file!" << std::endl; - } - std::string line; - while (std::getline(open_file, line)) { - if (open_file.fail()) { - std::cerr << "Error reading file!" << std::endl; - } - result.append(line); - } + // string result; + // std::ifstream open_file(file_path); + // if (!open_file.is_open()) { + // std::cerr << "Could not open the file!" << std::endl; + // } + // std::string line; + // while (std::getline(open_file, line)) { + // if (open_file.fail()) { + // std::cerr << "Error reading file!" << std::endl; + // } + // result.append(line); + // } unordered_map json; try { - json = StringUtil::ParseJSONMap(result); + json = StringUtil::ParseJSONMap(json_string); } catch (std::exception &ex) { - throw IOException("Couldn't parse JSON file with percentages config."); + throw IOException("Couldn't parse JSON string containing percentages config."); } - open_file.close(); - + select_percentage = stoi(json.find("select_percentage")->second); attach_percentage = stoi(json.find("attach_percentage")->second); attach_use_percentage = stoi(json.find("attach_use_percentage")->second); diff --git a/src/sqlsmith_extension.cpp b/src/sqlsmith_extension.cpp index f995e73..d694a32 100644 --- a/src/sqlsmith_extension.cpp +++ b/src/sqlsmith_extension.cpp @@ -143,6 +143,8 @@ static duckdb::unique_ptr FuzzyDuckBind(ClientContext &context, Ta result->fuzzer.verbose_output = BooleanValue::Get(kv.second); } else if (kv.first == "enable_verification") { result->fuzzer.enable_verification = BooleanValue::Get(kv.second); + } else if (kv.first == "randoms_config_filepath") { + result->fuzzer.randoms_config_filepath = StringValue::Get(kv.second); } } return_types.emplace_back(LogicalType::BOOLEAN); @@ -191,6 +193,7 @@ void SqlsmithExtension::Load(DuckDB &db) { fuzzy_duck_fun.named_parameters["complete_log"] = LogicalType::VARCHAR; fuzzy_duck_fun.named_parameters["verbose_output"] = LogicalType::BOOLEAN; fuzzy_duck_fun.named_parameters["enable_verification"] = LogicalType::BOOLEAN; + fuzzy_duck_fun.named_parameters["randoms_config_filepath"] = LogicalType::VARCHAR; ExtensionUtil::RegisterFunction(db_instance, fuzzy_duck_fun); TableFunction fuzz_all_functions("fuzz_all_functions", {}, FuzzAllFunctions, FuzzyDuckBind); diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 2fd694a..4874a7e 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -25,7 +25,6 @@ #include "duckdb/parser/statement/set_statement.hpp" #include "duckdb/parser/statement/update_statement.hpp" #include "duckdb/parser/tableref/list.hpp" -#include "random_nums_config.hpp" namespace duckdb { @@ -93,10 +92,10 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon return result; } -unique_ptr StatementGenerator::GenerateStatement() { +unique_ptr StatementGenerator::GenerateStatement(RandomNumsConfig config) { - string file_path = "config.json"; - RandomNumsConfig config = RandomNumsConfig(file_path); + // string file_path = "config.json"; + // RandomNumsConfig config = RandomNumsConfig(file_path); if (RandomPercentage(config.select_percentage)) { return GenerateStatement(StatementType::SELECT_STATEMENT); } From ce877c4a61a0580dd59453cbbbe172bae4f674ff Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Tue, 24 Sep 2024 14:35:10 +0200 Subject: [PATCH 13/38] turn the RandomNumsConfig into an unorderdered_map --- src/fuzzyduck.cpp | 17 +---- src/include/fuzzyduck.hpp | 4 +- src/include/random_nums_config.hpp | 45 +++++-------- src/include/statement_generator.hpp | 2 +- src/random_nums_config.cpp | 100 +++++++++++++--------------- src/statement_generator.cpp | 14 ++-- 6 files changed, 79 insertions(+), 103 deletions(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index bf8325a..0e56d1e 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -33,21 +33,8 @@ void FuzzyDuck::BeginFuzzing() { } if (!randoms_config_filepath.empty()) { randoms_config_handle = fs.OpenFile(randoms_config_filepath, FileFlags::FILE_FLAGS_READ); - // read config and initialize the RandomNumsConfig here - // string file_path = "config.json"; - string json_string; - if (randoms_config_handle) { - auto file_size = fs.GetFileSize(*randoms_config_handle); - char buffer[1024]; - auto bytes_read = fs.Read(*randoms_config_handle, buffer, 1024); - if (bytes_read < file_size) { - throw CatalogException("Cannot read the file \"%s\"", randoms_config_filepath); - } - json_string = buffer; - config = RandomNumsConfig(json_string); - - } - + const char *file_path = "config.json"; + auto config = GetConfigFromFile(file_path); } } diff --git a/src/include/fuzzyduck.hpp b/src/include/fuzzyduck.hpp index 062d59e..816cc90 100644 --- a/src/include/fuzzyduck.hpp +++ b/src/include/fuzzyduck.hpp @@ -13,6 +13,8 @@ #include "random_nums_config.hpp" namespace duckdb { +using namespace std; + struct FileHandle; class FuzzyDuck { @@ -29,7 +31,7 @@ class FuzzyDuck { bool enable_verification = false; idx_t timeout = 30; string randoms_config_filepath; - RandomNumsConfig config = RandomNumsConfig(); + unordered_map config; public: void Fuzz(); diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index 048b622..163cf52 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -1,39 +1,30 @@ -//===----------------------------------------------------------------------===// +// ===----------------------------------------------------------------------===// // DuckDB -// + // random_nums_config.hpp -// -// -//===----------------------------------------------------------------------===// + + +// ===----------------------------------------------------------------------===// #pragma once #include "duckdb.hpp" +#include "percentages_enum.hpp" +#include "yyjson.hpp" namespace duckdb { -class RandomNumsConfig { -public: - - idx_t select_percentage; - idx_t attach_percentage; - idx_t attach_use_percentage; - idx_t detach_percentage; - idx_t set_percentage; - idx_t delete_percentage; - - RandomNumsConfig(); - RandomNumsConfig(string &json_string); - ~RandomNumsConfig(); - - // set default values - void GetDefaultConfig(); - // is it better to return an object initialised with fetched/default params? - // read file and update default values - void GetConfigFromFile(string &json_string); +enum class RandomNumsConfig : idx_t { + SELECT = 0, + ATTACH = 1, + ATTACH_USE = 2, + DETACH = 3, + SET = 4, + DELETE = 5, }; -} // namespace duckdb - +unordered_map GetDefaultConfig(); +unordered_map GetConfigFromFile(const char *json_string); +string PercentagesEnumTypeToString(RandomNumsConfig type); -// duckdb/common/serializer/read_stream.hpp \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 8362c1f..4ac4e93 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -48,7 +48,7 @@ class StatementGenerator { ~StatementGenerator(); public: - unique_ptr GenerateStatement(RandomNumsConfig config); + unique_ptr GenerateStatement(unordered_map config); vector GenerateAllFunctionCalls(); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 8b7714c..e305abf 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -1,64 +1,60 @@ #include "include/random_nums_config.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/local_file_system.hpp" - -#include -#include +#include +#include +#include namespace duckdb { using namespace duckdb_yyjson; -RandomNumsConfig::RandomNumsConfig() { - GetDefaultConfig(); -} - -RandomNumsConfig::RandomNumsConfig(string &json_string) { - GetDefaultConfig(); - GetConfigFromFile(json_string); -} - -RandomNumsConfig::~RandomNumsConfig() { +unordered_map GetDefaultConfig() { + unordered_map default_config = { + { RandomNumsConfig::ATTACH, 40 }, + { RandomNumsConfig::ATTACH_USE, 50 }, + { RandomNumsConfig::DELETE, 40 }, + { RandomNumsConfig::DETACH, 60 }, + { RandomNumsConfig::SELECT, 60 }, + { RandomNumsConfig::SET, 30 } + }; + return default_config; } -void RandomNumsConfig::GetDefaultConfig() { - select_percentage = 60; - attach_percentage = 40; - attach_use_percentage = 50; - detach_percentage = 60; - set_percentage = 30; - delete_percentage = 40; -} - -void RandomNumsConfig::GetConfigFromFile(string &json_string) { - // open and read file into a string & - // string result; - // std::ifstream open_file(file_path); - // if (!open_file.is_open()) { - // std::cerr << "Could not open the file!" << std::endl; - // } - // std::string line; - // while (std::getline(open_file, line)) { - // if (open_file.fail()) { - // std::cerr << "Error reading file!" << std::endl; - // } - // result.append(line); - // } - - unordered_map json; - try { - json = StringUtil::ParseJSONMap(json_string); - } catch (std::exception &ex) { - throw IOException("Couldn't parse JSON string containing percentages config."); +unordered_map StringToRandomNumsConfig = { + { "attach_percentage", RandomNumsConfig::ATTACH }, + { "attach_use_percentage", RandomNumsConfig::ATTACH_USE }, + { "delete_percentage", RandomNumsConfig::DELETE }, + { "detach_percentage", RandomNumsConfig::DETACH }, + { "select_percentage", RandomNumsConfig::SELECT }, + { "set_percentage", RandomNumsConfig::SET } +}; + + +unordered_map GetConfigFromFile(const char *json_string) { + unordered_map config_from_file; + auto doc = yyjson_read_file(json_string, YYJSON_READ_NOFLAG, NULL, NULL); + if (doc) { + yyjson_val *obj = yyjson_doc_get_root(doc); + yyjson_obj_iter iter; + yyjson_obj_iter_init(obj, &iter); + yyjson_val *key, *val; + while ((key = yyjson_obj_iter_next(&iter))) { + const char* k = yyjson_get_str(key); + val = yyjson_obj_iter_get_val(key); + auto it = StringToRandomNumsConfig.find(k); + if (it != StringToRandomNumsConfig.end()) { + RandomNumsConfig perc_type = it->second; + idx_t perc_value = yyjson_get_int(val); + config_from_file[perc_type] = perc_value; + } + } + } else { + throw IOException("Couldn't read JSON with percentages config."); + yyjson_doc_free(doc); + return GetDefaultConfig(); } - - select_percentage = stoi(json.find("select_percentage")->second); - attach_percentage = stoi(json.find("attach_percentage")->second); - attach_use_percentage = stoi(json.find("attach_use_percentage")->second); - detach_percentage = stoi(json.find("detach_percentage")->second); - set_percentage = stoi(json.find("set_percentage")->second); - delete_percentage = stoi(json.find("delete_percentage")->second); - + // Free the doc + yyjson_doc_free(doc); + return config_from_file; } } // namespace duckdb \ No newline at end of file diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 4874a7e..f0bedc2 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -92,27 +92,27 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon return result; } -unique_ptr StatementGenerator::GenerateStatement(RandomNumsConfig config) { +unique_ptr StatementGenerator::GenerateStatement(unordered_map config) { // string file_path = "config.json"; // RandomNumsConfig config = RandomNumsConfig(file_path); - if (RandomPercentage(config.select_percentage)) { + if (RandomPercentage(config[RandomNumsConfig::SELECT])) { return GenerateStatement(StatementType::SELECT_STATEMENT); } - if (RandomPercentage(config.attach_percentage)) { - if (RandomPercentage(config.attach_use_percentage)) { + if (RandomPercentage(config[RandomNumsConfig::ATTACH])) { + if (RandomPercentage(config[RandomNumsConfig::ATTACH_USE])) { // We call this directly so we have a higher chance to fuzz persistent databases return GenerateAttachUse(); } return GenerateStatement(StatementType::ATTACH_STATEMENT); } - if (RandomPercentage(config.detach_percentage)) { + if (RandomPercentage(config[RandomNumsConfig::DETACH])) { return GenerateStatement(StatementType::DETACH_STATEMENT); } - if (RandomPercentage(config.set_percentage)) { + if (RandomPercentage(config[RandomNumsConfig::SET])) { return GenerateStatement(StatementType::SET_STATEMENT); } - if (RandomPercentage(config.delete_percentage)) { //20 + if (RandomPercentage(config[RandomNumsConfig::DELETE])) { //20 return GenerateStatement(StatementType::DELETE_STATEMENT); } return GenerateStatement(StatementType::CREATE_STATEMENT); From 38757a5c92bffd64151975644607ec7b12bc960f Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Tue, 24 Sep 2024 16:12:01 +0200 Subject: [PATCH 14/38] rename to RandomPercentagesEnum --- src/include/fuzzyduck.hpp | 2 +- src/include/random_nums_config.hpp | 9 +++---- src/random_nums_config.cpp | 40 +++++++++++++++--------------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/src/include/fuzzyduck.hpp b/src/include/fuzzyduck.hpp index 816cc90..d66aa5c 100644 --- a/src/include/fuzzyduck.hpp +++ b/src/include/fuzzyduck.hpp @@ -31,7 +31,7 @@ class FuzzyDuck { bool enable_verification = false; idx_t timeout = 30; string randoms_config_filepath; - unordered_map config; + unordered_map config; public: void Fuzz(); diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index 163cf52..c4fd547 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -9,12 +9,11 @@ #pragma once #include "duckdb.hpp" -#include "percentages_enum.hpp" #include "yyjson.hpp" namespace duckdb { -enum class RandomNumsConfig : idx_t { +enum class RandomPercentagesEnum : idx_t { SELECT = 0, ATTACH = 1, ATTACH_USE = 2, @@ -23,8 +22,8 @@ enum class RandomNumsConfig : idx_t { DELETE = 5, }; -unordered_map GetDefaultConfig(); -unordered_map GetConfigFromFile(const char *json_string); -string PercentagesEnumTypeToString(RandomNumsConfig type); +unordered_map GetDefaultConfig(); +unordered_map GetConfigFromFile(const char *json_string); +string RandomPercentagesEnumToString(RandomPercentagesEnum type); } // namespace duckdb \ No newline at end of file diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index e305abf..f4f07e6 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -8,30 +8,30 @@ namespace duckdb { using namespace duckdb_yyjson; -unordered_map GetDefaultConfig() { - unordered_map default_config = { - { RandomNumsConfig::ATTACH, 40 }, - { RandomNumsConfig::ATTACH_USE, 50 }, - { RandomNumsConfig::DELETE, 40 }, - { RandomNumsConfig::DETACH, 60 }, - { RandomNumsConfig::SELECT, 60 }, - { RandomNumsConfig::SET, 30 } +unordered_map GetDefaultConfig() { + unordered_map default_config = { + { RandomPercentagesEnum::ATTACH, 40 }, + { RandomPercentagesEnum::ATTACH_USE, 50 }, + { RandomPercentagesEnum::DELETE, 40 }, + { RandomPercentagesEnum::DETACH, 60 }, + { RandomPercentagesEnum::SELECT, 60 }, + { RandomPercentagesEnum::SET, 30 } }; return default_config; } -unordered_map StringToRandomNumsConfig = { - { "attach_percentage", RandomNumsConfig::ATTACH }, - { "attach_use_percentage", RandomNumsConfig::ATTACH_USE }, - { "delete_percentage", RandomNumsConfig::DELETE }, - { "detach_percentage", RandomNumsConfig::DETACH }, - { "select_percentage", RandomNumsConfig::SELECT }, - { "set_percentage", RandomNumsConfig::SET } +unordered_map StringToRandomPercentagesEnum = { + { "attach_percentage", RandomPercentagesEnum::ATTACH }, + { "attach_use_percentage", RandomPercentagesEnum::ATTACH_USE }, + { "delete_percentage", RandomPercentagesEnum::DELETE }, + { "detach_percentage", RandomPercentagesEnum::DETACH }, + { "select_percentage", RandomPercentagesEnum::SELECT }, + { "set_percentage", RandomPercentagesEnum::SET } }; -unordered_map GetConfigFromFile(const char *json_string) { - unordered_map config_from_file; +unordered_map GetConfigFromFile(const char *json_string) { + unordered_map config_from_file; auto doc = yyjson_read_file(json_string, YYJSON_READ_NOFLAG, NULL, NULL); if (doc) { yyjson_val *obj = yyjson_doc_get_root(doc); @@ -41,9 +41,9 @@ unordered_map GetConfigFromFile(const char *json_string while ((key = yyjson_obj_iter_next(&iter))) { const char* k = yyjson_get_str(key); val = yyjson_obj_iter_get_val(key); - auto it = StringToRandomNumsConfig.find(k); - if (it != StringToRandomNumsConfig.end()) { - RandomNumsConfig perc_type = it->second; + auto it = StringToRandomPercentagesEnum.find(k); + if (it != StringToRandomPercentagesEnum.end()) { + RandomPercentagesEnum perc_type = it->second; idx_t perc_value = yyjson_get_int(val); config_from_file[perc_type] = perc_value; } From 26ba5ece51f30c73e46d77adc7837e5bda25968d Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Tue, 24 Sep 2024 16:32:11 +0200 Subject: [PATCH 15/38] fixed missing values --- src/include/statement_generator.hpp | 2 +- src/random_nums_config.cpp | 4 ++-- src/statement_generator.cpp | 17 +++++++---------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 4ac4e93..438dab8 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -48,7 +48,7 @@ class StatementGenerator { ~StatementGenerator(); public: - unique_ptr GenerateStatement(unordered_map config); + unique_ptr GenerateStatement(unordered_map config); vector GenerateAllFunctionCalls(); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index f4f07e6..6918c74 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -44,8 +44,8 @@ unordered_map GetConfigFromFile(const char *json_s auto it = StringToRandomPercentagesEnum.find(k); if (it != StringToRandomPercentagesEnum.end()) { RandomPercentagesEnum perc_type = it->second; - idx_t perc_value = yyjson_get_int(val); - config_from_file[perc_type] = perc_value; + auto perc_value = yyjson_get_str(val); + config_from_file[perc_type] = std::stoi(perc_value); } } } else { diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index f0bedc2..78d5169 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -92,27 +92,24 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon return result; } -unique_ptr StatementGenerator::GenerateStatement(unordered_map config) { - - // string file_path = "config.json"; - // RandomNumsConfig config = RandomNumsConfig(file_path); - if (RandomPercentage(config[RandomNumsConfig::SELECT])) { +unique_ptr StatementGenerator::GenerateStatement(unordered_map config) { + if (RandomPercentage(config[RandomPercentagesEnum::SELECT])) { return GenerateStatement(StatementType::SELECT_STATEMENT); } - if (RandomPercentage(config[RandomNumsConfig::ATTACH])) { - if (RandomPercentage(config[RandomNumsConfig::ATTACH_USE])) { + if (RandomPercentage(config[RandomPercentagesEnum::ATTACH])) { + if (RandomPercentage(config[RandomPercentagesEnum::ATTACH_USE])) { // We call this directly so we have a higher chance to fuzz persistent databases return GenerateAttachUse(); } return GenerateStatement(StatementType::ATTACH_STATEMENT); } - if (RandomPercentage(config[RandomNumsConfig::DETACH])) { + if (RandomPercentage(config[RandomPercentagesEnum::DETACH])) { return GenerateStatement(StatementType::DETACH_STATEMENT); } - if (RandomPercentage(config[RandomNumsConfig::SET])) { + if (RandomPercentage(config[RandomPercentagesEnum::SET])) { return GenerateStatement(StatementType::SET_STATEMENT); } - if (RandomPercentage(config[RandomNumsConfig::DELETE])) { //20 + if (RandomPercentage(config[RandomPercentagesEnum::DELETE])) { //20 return GenerateStatement(StatementType::DELETE_STATEMENT); } return GenerateStatement(StatementType::CREATE_STATEMENT); From 7275d9a9e41bb580afcb4cf7aba9167ce0eca3d8 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Tue, 24 Sep 2024 17:09:48 +0200 Subject: [PATCH 16/38] GetDefaultConfig() instead of throwing an exception when the file with the wrong json structure or contains an empty {} --- src/fuzzyduck.cpp | 3 +++ src/random_nums_config.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index 0e56d1e..3b95575 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -35,6 +35,9 @@ void FuzzyDuck::BeginFuzzing() { randoms_config_handle = fs.OpenFile(randoms_config_filepath, FileFlags::FILE_FLAGS_READ); const char *file_path = "config.json"; auto config = GetConfigFromFile(file_path); + if (!config.size()) { + config = GetDefaultConfig(); + } } } diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 6918c74..e603b08 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -49,7 +49,7 @@ unordered_map GetConfigFromFile(const char *json_s } } } else { - throw IOException("Couldn't read JSON with percentages config."); + // Couldn't read JSON with percentages config yyjson_doc_free(doc); return GetDefaultConfig(); } From ad51bddba0de4608bd2fa0ae6b7718316d008243 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Wed, 25 Sep 2024 10:46:47 +0200 Subject: [PATCH 17/38] do not declare a variable for config again --- src/fuzzyduck.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index 3b95575..e95662b 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -33,8 +33,7 @@ void FuzzyDuck::BeginFuzzing() { } if (!randoms_config_filepath.empty()) { randoms_config_handle = fs.OpenFile(randoms_config_filepath, FileFlags::FILE_FLAGS_READ); - const char *file_path = "config.json"; - auto config = GetConfigFromFile(file_path); + config = GetConfigFromFile(randoms_config_filepath.c_str()); if (!config.size()) { config = GetDefaultConfig(); } From 1430522768b71ab061733c8bf9d0ea9ff524d362 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Wed, 25 Sep 2024 11:19:50 +0200 Subject: [PATCH 18/38] remove file handling, because it's being handled by yyjson_read_file --- src/fuzzyduck.cpp | 6 ++---- src/include/fuzzyduck.hpp | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index e95662b..a385bcf 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -32,11 +32,12 @@ void FuzzyDuck::BeginFuzzing() { RunQuery("PRAGMA enable_verification"); } if (!randoms_config_filepath.empty()) { - randoms_config_handle = fs.OpenFile(randoms_config_filepath, FileFlags::FILE_FLAGS_READ); config = GetConfigFromFile(randoms_config_filepath.c_str()); if (!config.size()) { config = GetDefaultConfig(); } + } else { + config = GetDefaultConfig(); } } @@ -44,9 +45,6 @@ void FuzzyDuck::EndFuzzing() { if (complete_log_handle) { complete_log_handle->Close(); } - if (randoms_config_handle) { - randoms_config_handle->Close(); - } } void FuzzyDuck::Fuzz() { diff --git a/src/include/fuzzyduck.hpp b/src/include/fuzzyduck.hpp index d66aa5c..70c6279 100644 --- a/src/include/fuzzyduck.hpp +++ b/src/include/fuzzyduck.hpp @@ -13,7 +13,6 @@ #include "random_nums_config.hpp" namespace duckdb { -using namespace std; struct FileHandle; From 3d395fd2a2207e17f51d3599df993a494c7b2677 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Wed, 25 Sep 2024 12:56:36 +0200 Subject: [PATCH 19/38] add randoms_config_filepath to scripts/run_fuzzer.py --- scripts/run_fuzzer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/run_fuzzer.py b/scripts/run_fuzzer.py index 02373fa..4d4e950 100644 --- a/scripts/run_fuzzer.py +++ b/scripts/run_fuzzer.py @@ -34,6 +34,8 @@ perform_checks = False elif param.startswith('--enable_verification'): verification = param.replace('--enable_verification=', '').lower() == 'true' + elif param.startswith('--randoms_config_filepath'): + randoms_config_filepath = param.replace('--randoms_config_filepath=', '') elif param.startswith('--shell='): shell = param.replace('--shell=', '') elif param.startswith('--seed='): @@ -76,7 +78,8 @@ def run_fuzzer_script(fuzzer): if fuzzer == 'sqlsmith': return "call sqlsmith(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');" elif fuzzer == 'duckfuzz': - return "call fuzzyduck(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}', enable_verification='${ENABLE_VERIFICATION}');" + return "call fuzzyduck(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}', \ + enable_verification='${ENABLE_VERIFICATION}', randoms_config_filepath='${RANDOMS_CONFIG_FILEPATH});" elif fuzzer == 'duckfuzz_functions': return "call fuzz_all_functions(seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');" else: @@ -132,6 +135,7 @@ def run_shell_command(cmd): .replace('${COMPLETE_LOG_FILE}', complete_log_file) .replace('${SEED}', str(seed)) .replace('${ENABLE_VERIFICATION}', str(verification)) + .replace('${RANDOMS_CONFIG_FILEPATH}', randoms_config_filepath) ) print(load_script) From 4930d43b3fd2dc39902ac23c6eead9cb5f587956 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 26 Sep 2024 14:52:12 +0200 Subject: [PATCH 20/38] naive way to parse nested config.json to a map --- src/include/random_nums_config.hpp | 23 +++++++++ src/random_nums_config.cpp | 78 +++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index c4fd547..e1504d8 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -20,6 +20,29 @@ enum class RandomPercentagesEnum : idx_t { DETACH = 3, SET = 4, DELETE = 5, + + // ---------------------------------- + // Generate Select Percentages Types + // ---------------------------------- + SELECT_NODE = 6, + SELECT_NODE_IS_DISTINCT = 7, + SELECT_NODE_FROM_TABLE = 8, + SELECT_NODE_WHERE = 9, + SELECT_NODE_HAVING = 10, + SELECT_NODE_GROUPS = 11, + SELECT_NODE_GROUP_BY = 12, + SELECT_NODE_QUALIFY = 13, + SELECT_NODE_AGGREGATE = 14, + SELECT_NODE_SAMPLE = 15, + SELECT_NODE_SAMPLE_IS_PERC = 16, + SELECT_NODE_SAMPLE_SIZE = 17, + RESULT_MODIFIERS = 18, + LIMIT_PERCENT_MODIFIER = 19, + LIMIT_PERCENT_MODIFIER_LIMIT = 20, + LIMIT_PERCENT_MODIFIER_OFFSET = 21, + LIMIT_MODIFIER_LIMIT = 22, + LIMIT_MODIFIER_OFFSET = 23 + }; unordered_map GetDefaultConfig(); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index e603b08..58cb450 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -26,7 +26,32 @@ unordered_map StringToRandomPercentagesEnum = { { "delete_percentage", RandomPercentagesEnum::DELETE }, { "detach_percentage", RandomPercentagesEnum::DETACH }, { "select_percentage", RandomPercentagesEnum::SELECT }, - { "set_percentage", RandomPercentagesEnum::SET } + { "select_node_perc", RandomPercentagesEnum::SELECT_NODE }, + { "select_node_is_distinct_perc", RandomPercentagesEnum::SELECT_NODE_IS_DISTINCT }, + { "select_node_from_table_perc", RandomPercentagesEnum::SELECT_NODE_FROM_TABLE }, + { "select_node_where_perc", RandomPercentagesEnum::SELECT_NODE_WHERE }, + { "select_node_having_perc", RandomPercentagesEnum::SELECT_NODE_HAVING }, + { "select_node_groups_perc", RandomPercentagesEnum::SELECT_NODE_GROUPS }, + { "select_node_group_by_perc", RandomPercentagesEnum::SELECT_NODE_GROUP_BY }, + { "select_node_qualify_perc", RandomPercentagesEnum::SELECT_NODE_QUALIFY }, + { "select_node_aggregate_perc", RandomPercentagesEnum::SELECT_NODE_AGGREGATE }, + { "select_node_sample_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE }, + { "select_node_sample_is_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC }, + { "select_node_sample_size", RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE }, + { "result_modifiers", RandomPercentagesEnum::RESULT_MODIFIERS }, + { "limit_percent_modifier", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER }, + { "limit_percent_modifier_limit", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT }, + { "limit_percent_modifier_offset", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_OFFSET }, + { "limit_modifier_limit", RandomPercentagesEnum::LIMIT_MODIFIER_LIMIT }, + { "limit_modifier_offset", RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET } +}; + +enum Statements { + select = 0, + attach, + delete_st, + set, + }; @@ -37,15 +62,54 @@ unordered_map GetConfigFromFile(const char *json_s yyjson_val *obj = yyjson_doc_get_root(doc); yyjson_obj_iter iter; yyjson_obj_iter_init(obj, &iter); + size_t idx, max; yyjson_val *key, *val; - while ((key = yyjson_obj_iter_next(&iter))) { - const char* k = yyjson_get_str(key); - val = yyjson_obj_iter_get_val(key); - auto it = StringToRandomPercentagesEnum.find(k); + yyjson_obj_foreach(obj, idx, max, key, val) { + const char* root_key = yyjson_get_str(key); + auto it = StringToRandomPercentagesEnum.find(root_key); // "select" or "attach" if (it != StringToRandomPercentagesEnum.end()) { - RandomPercentagesEnum perc_type = it->second; + RandomPercentagesEnum perc_type = it->second; // SELECT auto perc_value = yyjson_get_str(val); - config_from_file[perc_type] = std::stoi(perc_value); + config_from_file[perc_type] = std::stoi(perc_value); // { SELECT: 90 } + } + if (yyjson_is_obj(val)) { + size_t node_idx, node_max; + yyjson_val *node_key, *node_val; + yyjson_obj_foreach(val, node_idx, node_max, node_key, node_val) { + const char* node_root_key = yyjson_get_str(node_key); // sub roots are "select_node" and "select_node_sample" + auto node_it = StringToRandomPercentagesEnum.find(node_root_key); + if (node_it != StringToRandomPercentagesEnum.end()) { + RandomPercentagesEnum node_perc_type = node_it->second; + auto node_perc_value = yyjson_get_str(node_val); + config_from_file[node_perc_type] = std::stoi(node_perc_value); + } + if (yyjson_is_obj(node_val)) { + size_t sub_idx, sub_max; + yyjson_val *sub_key, *sub_val; + yyjson_obj_foreach(node_val, sub_idx, sub_max, sub_key, sub_val) { + const char* sub_root_key = yyjson_get_str(sub_key); + auto sub_it = StringToRandomPercentagesEnum.find(sub_root_key); + if (sub_it != StringToRandomPercentagesEnum.end()) { + RandomPercentagesEnum sub_perc_type = sub_it->second; + auto sub_perc_value = yyjson_get_str(sub_val); + config_from_file[sub_perc_type] = std::stoi(sub_perc_value); + } + if (yyjson_is_obj(sub_val)) { + size_t subnode_idx, subnode_max; + yyjson_val *subnode_key, *subnode_val; + yyjson_obj_foreach(sub_val, subnode_idx, subnode_max, subnode_key, subnode_val) { + const char* subnode_root_key = yyjson_get_str(subnode_key); + auto subnode_it = StringToRandomPercentagesEnum.find(subnode_root_key); + if (subnode_it != StringToRandomPercentagesEnum.end()) { + RandomPercentagesEnum subnode_perc_type = subnode_it->second; + auto subnode_perc_value = yyjson_get_str(subnode_val); + config_from_file[subnode_perc_type] = std::stoi(subnode_perc_value); + } + } + } + } + } + } } } } else { From fb32a9d7d88dca28ada104c8ae32a41404539138 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 26 Sep 2024 14:55:26 +0200 Subject: [PATCH 21/38] config_nested.json file --- config_nested.json | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 config_nested.json diff --git a/config_nested.json b/config_nested.json new file mode 100644 index 0000000..9a67877 --- /dev/null +++ b/config_nested.json @@ -0,0 +1,30 @@ +{ + "attach_percentage": "20", + "select": { + "select_percentage": "1", + "select_node": { + "select_node_perc": "2", + "select_node_is_distinct_perc": "3", + "select_node_from_table_perc": "4", + "select_node_where_perc": "5", + "select_node_having_perc": "6", + "select_node_groups": { + "select_node_groups_perc": "7", + "select_node_group_by_perc": "8" + }, + "select_node_qualify_perc": "9", + "select_node_aggregate_perc": "10", + "select_node_sample": { + "select_node_sample_perc": "11", + "select_node_sample_is_perc": "12", + "select_node_sample_size": "13" + } + }, + "result_modifiers": "14", + "limit_percent_modifier": "15", + "limit_percent_modifier_limit": "16", + "limit_percent_modifier_offset": "17", + "limit_modifier_limit": "18", + "limit_modifier_offset": "19" + } +} \ No newline at end of file From 73b1cf10c5947f63dcd1b87330b468177a3984fc Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 26 Sep 2024 15:38:51 +0200 Subject: [PATCH 22/38] parseJson recursively --- src/random_nums_config.cpp | 80 +++++++++++++------------------------- 1 file changed, 26 insertions(+), 54 deletions(-) diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 58cb450..673e539 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -54,71 +54,43 @@ enum Statements { }; +void ParseJsonObj(yyjson_val *obj, unordered_map &config_from_file) { + yyjson_obj_iter iter; + yyjson_obj_iter_init(obj, &iter); + size_t idx, max; + yyjson_val *key, *val; + yyjson_obj_foreach(obj, idx, max, key, val) { + const char* root_key = yyjson_get_str(key); + auto it = StringToRandomPercentagesEnum.find(root_key); + if (it != StringToRandomPercentagesEnum.end()) { + RandomPercentagesEnum perc_type = it->second; + auto perc_value = yyjson_get_str(val); + if (perc_value) { + config_from_file[perc_type] = std::stoi(perc_value); + } + } + if (yyjson_is_obj(val)) { + ParseJsonObj(val, config_from_file); + } + } +} unordered_map GetConfigFromFile(const char *json_string) { + unordered_map config_from_file; auto doc = yyjson_read_file(json_string, YYJSON_READ_NOFLAG, NULL, NULL); if (doc) { - yyjson_val *obj = yyjson_doc_get_root(doc); - yyjson_obj_iter iter; - yyjson_obj_iter_init(obj, &iter); - size_t idx, max; - yyjson_val *key, *val; - yyjson_obj_foreach(obj, idx, max, key, val) { - const char* root_key = yyjson_get_str(key); - auto it = StringToRandomPercentagesEnum.find(root_key); // "select" or "attach" - if (it != StringToRandomPercentagesEnum.end()) { - RandomPercentagesEnum perc_type = it->second; // SELECT - auto perc_value = yyjson_get_str(val); - config_from_file[perc_type] = std::stoi(perc_value); // { SELECT: 90 } - } - if (yyjson_is_obj(val)) { - size_t node_idx, node_max; - yyjson_val *node_key, *node_val; - yyjson_obj_foreach(val, node_idx, node_max, node_key, node_val) { - const char* node_root_key = yyjson_get_str(node_key); // sub roots are "select_node" and "select_node_sample" - auto node_it = StringToRandomPercentagesEnum.find(node_root_key); - if (node_it != StringToRandomPercentagesEnum.end()) { - RandomPercentagesEnum node_perc_type = node_it->second; - auto node_perc_value = yyjson_get_str(node_val); - config_from_file[node_perc_type] = std::stoi(node_perc_value); - } - if (yyjson_is_obj(node_val)) { - size_t sub_idx, sub_max; - yyjson_val *sub_key, *sub_val; - yyjson_obj_foreach(node_val, sub_idx, sub_max, sub_key, sub_val) { - const char* sub_root_key = yyjson_get_str(sub_key); - auto sub_it = StringToRandomPercentagesEnum.find(sub_root_key); - if (sub_it != StringToRandomPercentagesEnum.end()) { - RandomPercentagesEnum sub_perc_type = sub_it->second; - auto sub_perc_value = yyjson_get_str(sub_val); - config_from_file[sub_perc_type] = std::stoi(sub_perc_value); - } - if (yyjson_is_obj(sub_val)) { - size_t subnode_idx, subnode_max; - yyjson_val *subnode_key, *subnode_val; - yyjson_obj_foreach(sub_val, subnode_idx, subnode_max, subnode_key, subnode_val) { - const char* subnode_root_key = yyjson_get_str(subnode_key); - auto subnode_it = StringToRandomPercentagesEnum.find(subnode_root_key); - if (subnode_it != StringToRandomPercentagesEnum.end()) { - RandomPercentagesEnum subnode_perc_type = subnode_it->second; - auto subnode_perc_value = yyjson_get_str(subnode_val); - config_from_file[subnode_perc_type] = std::stoi(subnode_perc_value); - } - } - } - } - } - } - } + yyjson_val *root = yyjson_doc_get_root(doc); + if (yyjson_is_obj(root)) { + ParseJsonObj(root, config_from_file); } + // Free the doc + yyjson_doc_free(doc); } else { // Couldn't read JSON with percentages config yyjson_doc_free(doc); return GetDefaultConfig(); } - // Free the doc - yyjson_doc_free(doc); return config_from_file; } } // namespace duckdb \ No newline at end of file From 58d4908c28df8ea9422a8ce4252c5d16a1ecdf35 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 26 Sep 2024 15:49:57 +0200 Subject: [PATCH 23/38] a test file --- test/sql/call_fuzzyduck.test | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/sql/call_fuzzyduck.test b/test/sql/call_fuzzyduck.test index 3964217..1eabbf9 100644 --- a/test/sql/call_fuzzyduck.test +++ b/test/sql/call_fuzzyduck.test @@ -5,7 +5,4 @@ require sqlsmith statement ok -call fuzzyduck(max_queries=2, verbose_output=1, log='sqlsmith.log', complete_log='sqlsmith.complete.log', enable_verification=True); - -statement ok -call fuzzyduck(max_queries=2, verbose_output=1, log='__TEST_DIR__/logs.txt', complete_log='__TEST_DIR__/clog.txt', enable_verification=false); +call fuzzyduck(max_queries=2, verbose_output=1, log='sqlsmith.log', complete_log='sqlsmith.complete.log', enable_verification=True, randoms_config_filepath='config_nested.json'); \ No newline at end of file From 2b9ae4dbf9d0d438c517b197f5a5b0d60e8ebbba Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 26 Sep 2024 15:58:49 +0200 Subject: [PATCH 24/38] clean up --- src/random_nums_config.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 673e539..a11a26f 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -46,14 +46,6 @@ unordered_map StringToRandomPercentagesEnum = { { "limit_modifier_offset", RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET } }; -enum Statements { - select = 0, - attach, - delete_st, - set, - -}; - void ParseJsonObj(yyjson_val *obj, unordered_map &config_from_file) { yyjson_obj_iter iter; yyjson_obj_iter_init(obj, &iter); @@ -84,7 +76,6 @@ unordered_map GetConfigFromFile(const char *json_s if (yyjson_is_obj(root)) { ParseJsonObj(root, config_from_file); } - // Free the doc yyjson_doc_free(doc); } else { // Couldn't read JSON with percentages config From f8aea673e97c4ef2c95a0c67914bfb0d0b0e6c6d Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 26 Sep 2024 17:36:48 +0200 Subject: [PATCH 25/38] set values of missing in the config file statement types to 0 --- src/include/random_nums_config.hpp | 4 +++- src/random_nums_config.cpp | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index e1504d8..9678fc6 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -41,7 +41,9 @@ enum class RandomPercentagesEnum : idx_t { LIMIT_PERCENT_MODIFIER_LIMIT = 20, LIMIT_PERCENT_MODIFIER_OFFSET = 21, LIMIT_MODIFIER_LIMIT = 22, - LIMIT_MODIFIER_OFFSET = 23 + LIMIT_MODIFIER_OFFSET = 23, + + COUNT }; diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index a11a26f..c6c217a 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -67,6 +67,8 @@ void ParseJsonObj(yyjson_val *obj, unordered_map & } } + + unordered_map GetConfigFromFile(const char *json_string) { unordered_map config_from_file; @@ -82,6 +84,13 @@ unordered_map GetConfigFromFile(const char *json_s yyjson_doc_free(doc); return GetDefaultConfig(); } + // set values of missing statement types to 0 + for (idx_t i = 0; i < static_cast(RandomPercentagesEnum::COUNT); ++i) { + RandomPercentagesEnum statement_type = static_cast(i); + if (config_from_file.find(statement_type) == config_from_file.end()) { + config_from_file[statement_type] = 0; + } + } return config_from_file; } } // namespace duckdb \ No newline at end of file From 5e5e7adb3133ea03b5cee8b22f9c635e5f8c7eb2 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 27 Sep 2024 14:07:21 +0200 Subject: [PATCH 26/38] add missing quote --- scripts/run_fuzzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_fuzzer.py b/scripts/run_fuzzer.py index 4d4e950..f6e6cf5 100644 --- a/scripts/run_fuzzer.py +++ b/scripts/run_fuzzer.py @@ -79,7 +79,7 @@ def run_fuzzer_script(fuzzer): return "call sqlsmith(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');" elif fuzzer == 'duckfuzz': return "call fuzzyduck(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}', \ - enable_verification='${ENABLE_VERIFICATION}', randoms_config_filepath='${RANDOMS_CONFIG_FILEPATH});" + enable_verification='${ENABLE_VERIFICATION}', randoms_config_filepath='${RANDOMS_CONFIG_FILEPATH}');" elif fuzzer == 'duckfuzz_functions': return "call fuzz_all_functions(seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');" else: From 0ddd49c5596bf5f40f40bef2b781d5c108061ef8 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 27 Sep 2024 16:59:07 +0200 Subject: [PATCH 27/38] update the config file with almost all randoms --- config_nested.json | 69 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/config_nested.json b/config_nested.json index 9a67877..b0a51de 100644 --- a/config_nested.json +++ b/config_nested.json @@ -1,5 +1,18 @@ { - "attach_percentage": "20", + "attach": { + "attach_percentage": "20", + "attach_use_percentage": "21", + "attach_read_only": "30" + }, + "detach": { + "detach_percentage": "22", + "detach_random_name": "29" + }, + "set": { + "set_percentage": "23", + "set_attached_db": "24" + }, + "delete_percentage": "25", "select": { "select_percentage": "1", "select_node": { @@ -26,5 +39,59 @@ "limit_percent_modifier_offset": "17", "limit_modifier_limit": "18", "limit_modifier_offset": "19" + }, + "create": { + "create_percentage": "26", + "create_generate_select": "27", + "create_num_cols": "28" + }, + "table_ref": { + "table_ref_base_table_ref_perc": "31", + "table_ref_expression_list_ref": "32", + "table_ref_join_ref": "33" + }, + "join_ref": { + "join_ref_cross": "34", + "join_ref_asof": "35", + "join_ref_natural": "36", + "join_ref_positional": "37", + "join_ref_general_expression": "38" + }, + "expression": { + "expression_column_ref": "39", + "expression_constant": "40", + "expression_subquery": "41" + }, + "constant_value": { + "constant_value_bigint": "42", + "constant_value_to_string": "43" + }, + "function": { + "function_aggregate": { + "function_aggregate_window_function": "44", + "function_aggregate_order_by": "45", + "function_aggregate_distinct": "46" + } + }, + "window_function": { + "window_function_expression": "47", + "window_function_random_expression": "48", + "window_function_ignore_nulls": "49" + }, + "star": { + "star_relation_name": "50", + "star_column_name": "51", + "star_column_name_with_expression": "52", + "star_columns": { + "star_columns_true": "53", + "star_columns_true_lambda": "54" + } + }, + "position_reference": "55", + "relational_name": { + "relational_name_choose_current": "56" + }, + "column_names": { + "column_names_choose_current": "57" } } \ No newline at end of file From 7ada4b348f8ec945197ee563ecad2cdabb836e07 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 27 Sep 2024 16:59:36 +0200 Subject: [PATCH 28/38] update src/include/random_nums_config.hpp to align with the config --- src/include/random_nums_config.hpp | 127 +++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 25 deletions(-) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index 9678fc6..2db321c 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -13,36 +13,113 @@ namespace duckdb { -enum class RandomPercentagesEnum : idx_t { - SELECT = 0, - ATTACH = 1, - ATTACH_USE = 2, +enum class RandomPercentagesEnum : idx_t { + // ---------------------------------- + // Generate Attach Percentages + // ---------------------------------- + ATTACH = 0, + ATTACH_USE = 1, + ATTACH_READ_ONLY = 2, + // ---------------------------------- + // Generate Detach Percentages + // ---------------------------------- DETACH = 3, - SET = 4, - DELETE = 5, + DETACH_RANDOM_NAME = 4, + // ---------------------------------- + // Generate Set Percentages + // ---------------------------------- + SET = 5, + SET_ATTACHED_DB = 6, + // ---------------------------------- + // Generate Select Percentages + // ---------------------------------- + DELETE = 7, // ---------------------------------- - // Generate Select Percentages Types + // Generate Select Percentages + // ---------------------------------- + SELECT = 8, + SELECT_NODE = 9, + SELECT_NODE_IS_DISTINCT = 10, + SELECT_NODE_FROM_TABLE = 11, + SELECT_NODE_WHERE = 12, + SELECT_NODE_HAVING = 13, + SELECT_NODE_GROUPS = 14, + SELECT_NODE_GROUP_BY = 15, + SELECT_NODE_QUALIFY = 16, + SELECT_NODE_AGGREGATE = 17, + SELECT_NODE_SAMPLE = 18, + SELECT_NODE_SAMPLE_IS_PERC = 19, + SELECT_NODE_SAMPLE_SIZE = 20, + RESULT_MODIFIERS = 21, + LIMIT_PERCENT_MODIFIER = 22, + LIMIT_PERCENT_MODIFIER_LIMIT = 23, + LIMIT_PERCENT_MODIFIER_OFFSET = 24, + LIMIT_MODIFIER_LIMIT = 25, + LIMIT_MODIFIER_OFFSET = 26, + + // ---------------------------------- + // Generate Create Percentages // ---------------------------------- - SELECT_NODE = 6, - SELECT_NODE_IS_DISTINCT = 7, - SELECT_NODE_FROM_TABLE = 8, - SELECT_NODE_WHERE = 9, - SELECT_NODE_HAVING = 10, - SELECT_NODE_GROUPS = 11, - SELECT_NODE_GROUP_BY = 12, - SELECT_NODE_QUALIFY = 13, - SELECT_NODE_AGGREGATE = 14, - SELECT_NODE_SAMPLE = 15, - SELECT_NODE_SAMPLE_IS_PERC = 16, - SELECT_NODE_SAMPLE_SIZE = 17, - RESULT_MODIFIERS = 18, - LIMIT_PERCENT_MODIFIER = 19, - LIMIT_PERCENT_MODIFIER_LIMIT = 20, - LIMIT_PERCENT_MODIFIER_OFFSET = 21, - LIMIT_MODIFIER_LIMIT = 22, - LIMIT_MODIFIER_OFFSET = 23, + CREATE = 27, + CREATE_GENERATE_SELECT = 28, + CREATE_NUM_COLS = 29, + // ----------------------------------- + // Generate Table Ref Percentages + // ----------------------------------- + TABLE_REF_BASE_TABLE_REF_PERC = 30, + TABLE_REF_EXPRESSION_LIST_REF = 31, + TABLE_REF_JOIN_REF = 32, + + // ----------------------------------- + // Generate Join Ref Percentages + // ----------------------------------- + JOIN_REF_CROSS = 33, + JOIN_REF_ASOF = 34, + JOIN_REF_NATURAL = 35, + JOIN_REF_POSITIONAL = 36, + JOIN_REF_GENERAL_EXPRESSION = 37, + + // ----------------------------------- + // Generate Expression Percentages + // ----------------------------------- + EXPRESSION_COLUMN_REF = 38, + EXPRESSION_CONSTANT = 39, + EXPRESSION_SUBQUERY = 40, + + // ----------------------------------- + // Generate Constant Value Percentages + // ----------------------------------- + CONSTANT_VALUE_BIGINT = 41, + CONSTANT_VALUE_TO_STRING = 42, + + // ----------------------------------- + // Generate Function Percentages + // ----------------------------------- + FUNCTION_AGGREGATE_WINDOW_FUNCTION = 43, + FUNCTION_AGGREGATE_ORDER_BY = 44, + FUNCTION_AGGREGATE_DISTINCT = 45, + // ----------------------------------- + // Generate Window Function Percentages + // ----------------------------------- + WINDOW_FUNCTION_EXPRESSION = 46, + WINDOW_FUNCTION_RANDOM_EXPRESSION = 47, + WINDOW_FUNCTION_IGNORE_NULLS = 48, + + // ----------------------------------- + // Generate Star Percentages + // ----------------------------------- + STAR_RELATION_NAME = 49, + STAR_COLUMN_NAME = 50, + STAR_COLUMN_NAME_WITH_EXPRESSION = 51, + STAR_COLUMNS_TRUE = 52, + STAR_COLUMNS_TRUE_LAMBDA = 53, + + POSITION_REFERENCE = 54, + RELATIONAL_NAME_CHOOSE_CURRENT = 55, + COLUMN_NAMES_CHOOSE_CURRENT = 56, + COUNT }; From 4cfe8c44bff099b49b59c8e47dd08fb0604592e9 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 27 Sep 2024 17:00:07 +0200 Subject: [PATCH 29/38] update StringToRandomPercentagesEnum in the src/random_nums_config.cpp --- src/random_nums_config.cpp | 39 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index c6c217a..1ebf259 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -23,8 +23,12 @@ unordered_map GetDefaultConfig() { unordered_map StringToRandomPercentagesEnum = { { "attach_percentage", RandomPercentagesEnum::ATTACH }, { "attach_use_percentage", RandomPercentagesEnum::ATTACH_USE }, - { "delete_percentage", RandomPercentagesEnum::DELETE }, + { "attach_read_only", RandomPercentagesEnum::ATTACH_READ_ONLY }, { "detach_percentage", RandomPercentagesEnum::DETACH }, + { "detach_random_name", RandomPercentagesEnum::DETACH_RANDOM_NAME }, + { "set_percentage", RandomPercentagesEnum::SET }, + { "set_attached_db", RandomPercentagesEnum::SET_ATTACHED_DB }, + { "delete_percentage", RandomPercentagesEnum::DELETE }, { "select_percentage", RandomPercentagesEnum::SELECT }, { "select_node_perc", RandomPercentagesEnum::SELECT_NODE }, { "select_node_is_distinct_perc", RandomPercentagesEnum::SELECT_NODE_IS_DISTINCT }, @@ -43,7 +47,38 @@ unordered_map StringToRandomPercentagesEnum = { { "limit_percent_modifier_limit", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT }, { "limit_percent_modifier_offset", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_OFFSET }, { "limit_modifier_limit", RandomPercentagesEnum::LIMIT_MODIFIER_LIMIT }, - { "limit_modifier_offset", RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET } + { "limit_modifier_offset", RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET }, + { "create_percentage", RandomPercentagesEnum::CREATE }, + { "create_generate_select", RandomPercentagesEnum::CREATE_GENERATE_SELECT }, + { "create_num_cols", RandomPercentagesEnum::CREATE_NUM_COLS }, + { "table_ref_base_table_ref_perc", RandomPercentagesEnum::TABLE_REF_BASE_TABLE_REF_PERC }, + { "table_ref_expression_list_ref", RandomPercentagesEnum::TABLE_REF_EXPRESSION_LIST_REF }, + { "table_ref_join_ref", RandomPercentagesEnum::TABLE_REF_JOIN_REF }, + { "join_ref_cross", RandomPercentagesEnum::JOIN_REF_CROSS }, + { "join_ref_cross", RandomPercentagesEnum::JOIN_REF_CROSS }, + { "join_ref_asof", RandomPercentagesEnum::JOIN_REF_ASOF }, + { "join_ref_natural", RandomPercentagesEnum::JOIN_REF_NATURAL }, + { "join_ref_positional", RandomPercentagesEnum::JOIN_REF_POSITIONAL }, + { "join_ref_general_expression", RandomPercentagesEnum::JOIN_REF_GENERAL_EXPRESSION }, + { "expression_column_ref", RandomPercentagesEnum::EXPRESSION_COLUMN_REF }, + { "expression_constant", RandomPercentagesEnum::EXPRESSION_CONSTANT }, + { "expression_subquery", RandomPercentagesEnum::EXPRESSION_SUBQUERY }, + { "constant_value_bigint", RandomPercentagesEnum::CONSTANT_VALUE_BIGINT }, + { "constant_value_to_string", RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING }, + { "function_aggregate_window_function", RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION }, + { "function_aggregate_order_by", RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY }, + { "function_aggregate_distinct", RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT }, + { "window_function_expression", RandomPercentagesEnum::WINDOW_FUNCTION_EXPRESSION }, + { "window_function_random_expression", RandomPercentagesEnum::WINDOW_FUNCTION_RANDOM_EXPRESSION }, + { "window_function_ignore_nulls", RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS }, + { "star_relation_name", RandomPercentagesEnum::STAR_RELATION_NAME }, + { "star_column_name", RandomPercentagesEnum::STAR_COLUMN_NAME }, + { "star_column_name_with_expression", RandomPercentagesEnum::STAR_COLUMN_NAME_WITH_EXPRESSION }, + { "star_columns_true", RandomPercentagesEnum::STAR_COLUMNS_TRUE }, + { "star_columns_true_lambda", RandomPercentagesEnum::STAR_COLUMNS_TRUE_LAMBDA }, + { "position_reference", RandomPercentagesEnum::POSITION_REFERENCE }, + { "relational_name_choose_current", RandomPercentagesEnum::RELATIONAL_NAME_CHOOSE_CURRENT }, + { "column_names_choose_current", RandomPercentagesEnum::COLUMN_NAMES_CHOOSE_CURRENT } }; void ParseJsonObj(yyjson_val *obj, unordered_map &config_from_file) { From ea50779c7c53e237f149e207cb4d2a432d8b91d9 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Mon, 30 Sep 2024 17:41:39 +0200 Subject: [PATCH 30/38] config_nested.json with all the default values --- config_nested.json | 108 +++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/config_nested.json b/config_nested.json index b0a51de..8e24fb7 100644 --- a/config_nested.json +++ b/config_nested.json @@ -1,97 +1,99 @@ { "attach": { "attach_percentage": "20", - "attach_use_percentage": "21", + "attach_use_percentage": "80", "attach_read_only": "30" }, "detach": { - "detach_percentage": "22", - "detach_random_name": "29" + "detach_percentage": "15", + "detach_random_name": "20" }, "set": { - "set_percentage": "23", - "set_attached_db": "24" + "set_percentage": "5", + "set_attached_db": "90" }, "delete_percentage": "25", "select": { - "select_percentage": "1", + "select_percentage": "70", "select_node": { - "select_node_perc": "2", - "select_node_is_distinct_perc": "3", - "select_node_from_table_perc": "4", - "select_node_where_perc": "5", - "select_node_having_perc": "6", + "select_node_perc": "70", + "select_node_is_distinct_perc": "30", + "select_node_from_table_perc": "95", + "select_node_where_perc": "60", + "select_node_having_perc": "25", "select_node_groups": { - "select_node_groups_perc": "7", - "select_node_group_by_perc": "8" + "select_node_groups_perc": "30", + "select_node_group_by_perc": "70" }, - "select_node_qualify_perc": "9", + "select_node_qualify_perc": "10", "select_node_aggregate_perc": "10", "select_node_sample": { - "select_node_sample_perc": "11", - "select_node_sample_is_perc": "12", - "select_node_sample_size": "13" + "select_node_sample_perc": "10", + "select_node_sample_is_perc": "50", + "select_node_sample_size": "100" } }, - "result_modifiers": "14", - "limit_percent_modifier": "15", - "limit_percent_modifier_limit": "16", - "limit_percent_modifier_offset": "17", - "limit_modifier_limit": "18", - "limit_modifier_offset": "19" + "result_modifiers": "5", + "limit_percent_modifier": "50", + "limit_percent_modifier_limit": "30", + "limit_percent_modifier_offset": "30", + "limit_modifier_limit": "30", + "limit_modifier_offset": "30" }, "create": { - "create_percentage": "26", - "create_generate_select": "27", - "create_num_cols": "28" + "create_percentage": "0", + "create_generate_select": "50", + "create_num_cols": "1000" }, "table_ref": { - "table_ref_base_table_ref_perc": "31", - "table_ref_expression_list_ref": "32", - "table_ref_join_ref": "33" + "table_ref_base_table_ref_perc": "60", + "table_ref_expression_list_ref": "20", + "table_ref_join_ref": "40" }, "join_ref": { - "join_ref_cross": "34", - "join_ref_asof": "35", - "join_ref_natural": "36", - "join_ref_positional": "37", - "join_ref_general_expression": "38" + "join_ref_cross": "10", + "join_ref_asof": "10", + "join_ref_natural": "10", + "join_ref_positional": "10", + "join_ref_general_expression": "70" }, "expression": { - "expression_column_ref": "39", - "expression_constant": "40", - "expression_subquery": "41" + "expression_column_ref": "50", + "expression_constant": "30", + "expression_subquery": "3" }, "constant_value": { - "constant_value_bigint": "42", - "constant_value_to_string": "43" + "constant_value_bigint": "50", + "constant_value_to_string": "30" }, "function": { "function_aggregate": { - "function_aggregate_window_function": "44", - "function_aggregate_order_by": "45", - "function_aggregate_distinct": "46" + "function_aggregate_window_function": "10", + "function_aggregate_order_by": "10", + "function_aggregate_random_expression": "10", + "function_aggregate_distinct": "10" } }, "window_function": { - "window_function_expression": "47", - "window_function_random_expression": "48", - "window_function_ignore_nulls": "49" + "window_function_partitions": "50", + "window_function_orders": "30", + "window_function_ignore_nulls": "30", + "window_function_result_offset": "30", + "window_function_result_default": "30" }, "star": { - "star_relation_name": "50", - "star_column_name": "51", - "star_column_name_with_expression": "52", + "star_relation_name": "10", + "star_column_name_exclude_list": "20", + "star_column_name": "20", "star_columns": { - "star_columns_true": "53", - "star_columns_true_lambda": "54" + "star_columns_true": "50", + "star_columns_true_lambda": "50" } }, - "position_reference": "55", "relational_name": { - "relational_name_choose_current": "56" + "relational_name_choose_current": "80" }, "column_names": { - "column_names_choose_current": "57" + "column_names_choose_current": "80" } } \ No newline at end of file From c3c7725ba06bb56917d32181aecbf5494d6179b4 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Mon, 30 Sep 2024 17:42:16 +0200 Subject: [PATCH 31/38] src/include/random_nums_config.hpp corrected names --- src/include/random_nums_config.hpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index 2db321c..529db87 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -99,26 +99,28 @@ enum class RandomPercentagesEnum : idx_t { FUNCTION_AGGREGATE_WINDOW_FUNCTION = 43, FUNCTION_AGGREGATE_ORDER_BY = 44, FUNCTION_AGGREGATE_DISTINCT = 45, + FUNCTION_AGGREGATE_RANDOM_EXPRESSION = 46, // ----------------------------------- // Generate Window Function Percentages // ----------------------------------- - WINDOW_FUNCTION_EXPRESSION = 46, - WINDOW_FUNCTION_RANDOM_EXPRESSION = 47, - WINDOW_FUNCTION_IGNORE_NULLS = 48, + WINDOW_FUNCTION_PARTITIONS = 47, + WINDOW_FUNCTION_ORDERS = 48, + WINDOW_FUNCTION_IGNORE_NULLS = 49, + WINDOW_FUNCTION_RESULT_OFFSET = 50, + WINDOW_FUNCTION_RESULT_DEFAULT = 51, // ----------------------------------- // Generate Star Percentages // ----------------------------------- - STAR_RELATION_NAME = 49, - STAR_COLUMN_NAME = 50, - STAR_COLUMN_NAME_WITH_EXPRESSION = 51, - STAR_COLUMNS_TRUE = 52, - STAR_COLUMNS_TRUE_LAMBDA = 53, + STAR_RELATION_NAME = 52, + STAR_COLUMN_NAME = 53, + STAR_COLUMN_NAME_EXCLUDE_LIST = 54, + STAR_COLUMNS_TRUE = 55, + STAR_COLUMNS_TRUE_LAMBDA = 56, - POSITION_REFERENCE = 54, - RELATIONAL_NAME_CHOOSE_CURRENT = 55, - COLUMN_NAMES_CHOOSE_CURRENT = 56, + RELATIONAL_NAME_CHOOSE_CURRENT = 57, + COLUMN_NAMES_CHOOSE_CURRENT = 58, COUNT From ebe03d50638976212427f481af99ea62498e7c81 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Mon, 30 Sep 2024 17:42:49 +0200 Subject: [PATCH 32/38] pass config to the statement generator --- src/fuzzyduck.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index a385bcf..5ec0ff1 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -78,6 +78,7 @@ string FuzzyDuck::GenerateQuery() { // generate statement StatementGenerator generator(context); generator.verification_enabled = enable_verification; + generator.config = config; // accumulate statement(s) auto statement = string(""); if (generator.RandomPercentage(10)) { @@ -86,12 +87,12 @@ string FuzzyDuck::GenerateQuery() { LogTask("Generating Multi-Statement query of " + to_string(number_of_statements) + " statements with seed " + to_string(seed)); for (idx_t i = 0; i < number_of_statements; i++) { - statement += generator.GenerateStatement(config)->ToString() + "; "; + statement += generator.GenerateStatement()->ToString() + "; "; } } else { // normal statement LogTask("Generating Single-Statement query with seed " + to_string(seed)); - statement = generator.GenerateStatement(config)->ToString(); + statement = generator.GenerateStatement()->ToString(); } return statement; } From 5b8f6526b1268056fbe0959984f38cf381e6eec7 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Mon, 30 Sep 2024 17:43:20 +0200 Subject: [PATCH 33/38] rename some values --- src/random_nums_config.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 1ebf259..3a3c6f8 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -67,16 +67,18 @@ unordered_map StringToRandomPercentagesEnum = { { "constant_value_to_string", RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING }, { "function_aggregate_window_function", RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION }, { "function_aggregate_order_by", RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY }, + { "function_aggregate_random_expression", RandomPercentagesEnum::FUNCTION_AGGREGATE_RANDOM_EXPRESSION }, { "function_aggregate_distinct", RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT }, - { "window_function_expression", RandomPercentagesEnum::WINDOW_FUNCTION_EXPRESSION }, - { "window_function_random_expression", RandomPercentagesEnum::WINDOW_FUNCTION_RANDOM_EXPRESSION }, + { "window_function_partitions", RandomPercentagesEnum::WINDOW_FUNCTION_PARTITIONS }, + { "window_function_orders", RandomPercentagesEnum::WINDOW_FUNCTION_ORDERS }, { "window_function_ignore_nulls", RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS }, + { "window_function_result_offset", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_OFFSET }, + { "window_function_result_default", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_DEFAULT }, { "star_relation_name", RandomPercentagesEnum::STAR_RELATION_NAME }, + { "star_column_name_exclude_list", RandomPercentagesEnum::STAR_COLUMN_NAME_EXCLUDE_LIST }, { "star_column_name", RandomPercentagesEnum::STAR_COLUMN_NAME }, - { "star_column_name_with_expression", RandomPercentagesEnum::STAR_COLUMN_NAME_WITH_EXPRESSION }, { "star_columns_true", RandomPercentagesEnum::STAR_COLUMNS_TRUE }, { "star_columns_true_lambda", RandomPercentagesEnum::STAR_COLUMNS_TRUE_LAMBDA }, - { "position_reference", RandomPercentagesEnum::POSITION_REFERENCE }, { "relational_name_choose_current", RandomPercentagesEnum::RELATIONAL_NAME_CHOOSE_CURRENT }, { "column_names_choose_current", RandomPercentagesEnum::COLUMN_NAMES_CHOOSE_CURRENT } }; From 628f2d87333c0efe2c9f417153d7734652da1fae Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Mon, 30 Sep 2024 17:47:48 +0200 Subject: [PATCH 34/38] remove duplicating line --- src/random_nums_config.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 3a3c6f8..14da820 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -55,7 +55,6 @@ unordered_map StringToRandomPercentagesEnum = { { "table_ref_expression_list_ref", RandomPercentagesEnum::TABLE_REF_EXPRESSION_LIST_REF }, { "table_ref_join_ref", RandomPercentagesEnum::TABLE_REF_JOIN_REF }, { "join_ref_cross", RandomPercentagesEnum::JOIN_REF_CROSS }, - { "join_ref_cross", RandomPercentagesEnum::JOIN_REF_CROSS }, { "join_ref_asof", RandomPercentagesEnum::JOIN_REF_ASOF }, { "join_ref_natural", RandomPercentagesEnum::JOIN_REF_NATURAL }, { "join_ref_positional", RandomPercentagesEnum::JOIN_REF_POSITIONAL }, From 1a1ff8aa4435eb9cd017b18105363648da9a013e Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 3 Oct 2024 15:15:04 +0200 Subject: [PATCH 35/38] make RandomNumsConfig class --- src/fuzzyduck.cpp | 7 +- src/include/fuzzyduck.hpp | 1 + src/include/random_nums_config.hpp | 19 ++- src/include/statement_generator.hpp | 3 +- src/random_nums_config.cpp | 235 +++++++++++++++------------- src/statement_generator.cpp | 46 +++--- 6 files changed, 165 insertions(+), 146 deletions(-) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index 5ec0ff1..2580969 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -32,12 +32,9 @@ void FuzzyDuck::BeginFuzzing() { RunQuery("PRAGMA enable_verification"); } if (!randoms_config_filepath.empty()) { - config = GetConfigFromFile(randoms_config_filepath.c_str()); - if (!config.size()) { - config = GetDefaultConfig(); - } + config = RandomNumsConfig().GetConfigFromFile(randoms_config_filepath.c_str()); } else { - config = GetDefaultConfig(); + config = RandomNumsConfig().GetDefaultConfig(); } } diff --git a/src/include/fuzzyduck.hpp b/src/include/fuzzyduck.hpp index 70c6279..a0ee2cb 100644 --- a/src/include/fuzzyduck.hpp +++ b/src/include/fuzzyduck.hpp @@ -30,6 +30,7 @@ class FuzzyDuck { bool enable_verification = false; idx_t timeout = 30; string randoms_config_filepath; + // RandomNumsConfig config; unordered_map config; public: diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index 529db87..9282288 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -126,8 +126,19 @@ enum class RandomPercentagesEnum : idx_t { }; -unordered_map GetDefaultConfig(); -unordered_map GetConfigFromFile(const char *json_string); -string RandomPercentagesEnumToString(RandomPercentagesEnum type); +class RandomNumsConfig { +public: + RandomNumsConfig(); + RandomNumsConfig(const char *config_file_path); + ~RandomNumsConfig(); -} // namespace duckdb \ No newline at end of file + RandomPercentagesEnum percentages_selector; + // unordered_map config; + + unordered_map GetDefaultConfig(); + unordered_map GetConfigFromFile(const char *json_string); + string RandomPercentagesEnumToString(RandomPercentagesEnum type); + +}; + +}// namespace duckdb \ No newline at end of file diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 438dab8..c6d7dce 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -41,6 +41,7 @@ class StatementGenerator { friend class ExpressionDepthChecker; friend class AggregateChecker; friend class WindowChecker; + unordered_map config; public: StatementGenerator(ClientContext &context); @@ -48,7 +49,7 @@ class StatementGenerator { ~StatementGenerator(); public: - unique_ptr GenerateStatement(unordered_map config); + unique_ptr GenerateStatement(); vector GenerateAllFunctionCalls(); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 14da820..fd7828e 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -8,125 +8,134 @@ namespace duckdb { using namespace duckdb_yyjson; -unordered_map GetDefaultConfig() { - unordered_map default_config = { - { RandomPercentagesEnum::ATTACH, 40 }, - { RandomPercentagesEnum::ATTACH_USE, 50 }, - { RandomPercentagesEnum::DELETE, 40 }, - { RandomPercentagesEnum::DETACH, 60 }, - { RandomPercentagesEnum::SELECT, 60 }, - { RandomPercentagesEnum::SET, 30 } - }; - return default_config; -} - unordered_map StringToRandomPercentagesEnum = { - { "attach_percentage", RandomPercentagesEnum::ATTACH }, - { "attach_use_percentage", RandomPercentagesEnum::ATTACH_USE }, - { "attach_read_only", RandomPercentagesEnum::ATTACH_READ_ONLY }, - { "detach_percentage", RandomPercentagesEnum::DETACH }, - { "detach_random_name", RandomPercentagesEnum::DETACH_RANDOM_NAME }, - { "set_percentage", RandomPercentagesEnum::SET }, - { "set_attached_db", RandomPercentagesEnum::SET_ATTACHED_DB }, - { "delete_percentage", RandomPercentagesEnum::DELETE }, - { "select_percentage", RandomPercentagesEnum::SELECT }, - { "select_node_perc", RandomPercentagesEnum::SELECT_NODE }, - { "select_node_is_distinct_perc", RandomPercentagesEnum::SELECT_NODE_IS_DISTINCT }, - { "select_node_from_table_perc", RandomPercentagesEnum::SELECT_NODE_FROM_TABLE }, - { "select_node_where_perc", RandomPercentagesEnum::SELECT_NODE_WHERE }, - { "select_node_having_perc", RandomPercentagesEnum::SELECT_NODE_HAVING }, - { "select_node_groups_perc", RandomPercentagesEnum::SELECT_NODE_GROUPS }, - { "select_node_group_by_perc", RandomPercentagesEnum::SELECT_NODE_GROUP_BY }, - { "select_node_qualify_perc", RandomPercentagesEnum::SELECT_NODE_QUALIFY }, - { "select_node_aggregate_perc", RandomPercentagesEnum::SELECT_NODE_AGGREGATE }, - { "select_node_sample_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE }, - { "select_node_sample_is_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC }, - { "select_node_sample_size", RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE }, - { "result_modifiers", RandomPercentagesEnum::RESULT_MODIFIERS }, - { "limit_percent_modifier", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER }, - { "limit_percent_modifier_limit", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT }, - { "limit_percent_modifier_offset", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_OFFSET }, - { "limit_modifier_limit", RandomPercentagesEnum::LIMIT_MODIFIER_LIMIT }, - { "limit_modifier_offset", RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET }, - { "create_percentage", RandomPercentagesEnum::CREATE }, - { "create_generate_select", RandomPercentagesEnum::CREATE_GENERATE_SELECT }, - { "create_num_cols", RandomPercentagesEnum::CREATE_NUM_COLS }, - { "table_ref_base_table_ref_perc", RandomPercentagesEnum::TABLE_REF_BASE_TABLE_REF_PERC }, - { "table_ref_expression_list_ref", RandomPercentagesEnum::TABLE_REF_EXPRESSION_LIST_REF }, - { "table_ref_join_ref", RandomPercentagesEnum::TABLE_REF_JOIN_REF }, - { "join_ref_cross", RandomPercentagesEnum::JOIN_REF_CROSS }, - { "join_ref_asof", RandomPercentagesEnum::JOIN_REF_ASOF }, - { "join_ref_natural", RandomPercentagesEnum::JOIN_REF_NATURAL }, - { "join_ref_positional", RandomPercentagesEnum::JOIN_REF_POSITIONAL }, - { "join_ref_general_expression", RandomPercentagesEnum::JOIN_REF_GENERAL_EXPRESSION }, - { "expression_column_ref", RandomPercentagesEnum::EXPRESSION_COLUMN_REF }, - { "expression_constant", RandomPercentagesEnum::EXPRESSION_CONSTANT }, - { "expression_subquery", RandomPercentagesEnum::EXPRESSION_SUBQUERY }, - { "constant_value_bigint", RandomPercentagesEnum::CONSTANT_VALUE_BIGINT }, - { "constant_value_to_string", RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING }, - { "function_aggregate_window_function", RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION }, - { "function_aggregate_order_by", RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY }, - { "function_aggregate_random_expression", RandomPercentagesEnum::FUNCTION_AGGREGATE_RANDOM_EXPRESSION }, - { "function_aggregate_distinct", RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT }, - { "window_function_partitions", RandomPercentagesEnum::WINDOW_FUNCTION_PARTITIONS }, - { "window_function_orders", RandomPercentagesEnum::WINDOW_FUNCTION_ORDERS }, - { "window_function_ignore_nulls", RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS }, - { "window_function_result_offset", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_OFFSET }, - { "window_function_result_default", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_DEFAULT }, - { "star_relation_name", RandomPercentagesEnum::STAR_RELATION_NAME }, - { "star_column_name_exclude_list", RandomPercentagesEnum::STAR_COLUMN_NAME_EXCLUDE_LIST }, - { "star_column_name", RandomPercentagesEnum::STAR_COLUMN_NAME }, - { "star_columns_true", RandomPercentagesEnum::STAR_COLUMNS_TRUE }, - { "star_columns_true_lambda", RandomPercentagesEnum::STAR_COLUMNS_TRUE_LAMBDA }, - { "relational_name_choose_current", RandomPercentagesEnum::RELATIONAL_NAME_CHOOSE_CURRENT }, - { "column_names_choose_current", RandomPercentagesEnum::COLUMN_NAMES_CHOOSE_CURRENT } + { "attach_percentage", RandomPercentagesEnum::ATTACH }, + { "attach_use_percentage", RandomPercentagesEnum::ATTACH_USE }, + { "attach_read_only", RandomPercentagesEnum::ATTACH_READ_ONLY }, + { "detach_percentage", RandomPercentagesEnum::DETACH }, + { "detach_random_name", RandomPercentagesEnum::DETACH_RANDOM_NAME }, + { "set_percentage", RandomPercentagesEnum::SET }, + { "set_attached_db", RandomPercentagesEnum::SET_ATTACHED_DB }, + { "delete_percentage", RandomPercentagesEnum::DELETE }, + { "select_percentage", RandomPercentagesEnum::SELECT }, + { "select_node_perc", RandomPercentagesEnum::SELECT_NODE }, + { "select_node_is_distinct_perc", RandomPercentagesEnum::SELECT_NODE_IS_DISTINCT }, + { "select_node_from_table_perc", RandomPercentagesEnum::SELECT_NODE_FROM_TABLE }, + { "select_node_where_perc", RandomPercentagesEnum::SELECT_NODE_WHERE }, + { "select_node_having_perc", RandomPercentagesEnum::SELECT_NODE_HAVING }, + { "select_node_groups_perc", RandomPercentagesEnum::SELECT_NODE_GROUPS }, + { "select_node_group_by_perc", RandomPercentagesEnum::SELECT_NODE_GROUP_BY }, + { "select_node_qualify_perc", RandomPercentagesEnum::SELECT_NODE_QUALIFY }, + { "select_node_aggregate_perc", RandomPercentagesEnum::SELECT_NODE_AGGREGATE }, + { "select_node_sample_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE }, + { "select_node_sample_is_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC }, + { "select_node_sample_size", RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE }, + { "result_modifiers", RandomPercentagesEnum::RESULT_MODIFIERS }, + { "limit_percent_modifier", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER }, + { "limit_percent_modifier_limit", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT }, + { "limit_percent_modifier_offset", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_OFFSET }, + { "limit_modifier_limit", RandomPercentagesEnum::LIMIT_MODIFIER_LIMIT }, + { "limit_modifier_offset", RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET }, + { "create_percentage", RandomPercentagesEnum::CREATE }, + { "create_generate_select", RandomPercentagesEnum::CREATE_GENERATE_SELECT }, + { "create_num_cols", RandomPercentagesEnum::CREATE_NUM_COLS }, + { "table_ref_base_table_ref_perc", RandomPercentagesEnum::TABLE_REF_BASE_TABLE_REF_PERC }, + { "table_ref_expression_list_ref", RandomPercentagesEnum::TABLE_REF_EXPRESSION_LIST_REF }, + { "table_ref_join_ref", RandomPercentagesEnum::TABLE_REF_JOIN_REF }, + { "join_ref_cross", RandomPercentagesEnum::JOIN_REF_CROSS }, + { "join_ref_asof", RandomPercentagesEnum::JOIN_REF_ASOF }, + { "join_ref_natural", RandomPercentagesEnum::JOIN_REF_NATURAL }, + { "join_ref_positional", RandomPercentagesEnum::JOIN_REF_POSITIONAL }, + { "join_ref_general_expression", RandomPercentagesEnum::JOIN_REF_GENERAL_EXPRESSION }, + { "expression_column_ref", RandomPercentagesEnum::EXPRESSION_COLUMN_REF }, + { "expression_constant", RandomPercentagesEnum::EXPRESSION_CONSTANT }, + { "expression_subquery", RandomPercentagesEnum::EXPRESSION_SUBQUERY }, + { "constant_value_bigint", RandomPercentagesEnum::CONSTANT_VALUE_BIGINT }, + { "constant_value_to_string", RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING }, + { "function_aggregate_window_function", RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION }, + { "function_aggregate_order_by", RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY }, + { "function_aggregate_random_expression", RandomPercentagesEnum::FUNCTION_AGGREGATE_RANDOM_EXPRESSION }, + { "function_aggregate_distinct", RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT }, + { "window_function_partitions", RandomPercentagesEnum::WINDOW_FUNCTION_PARTITIONS }, + { "window_function_orders", RandomPercentagesEnum::WINDOW_FUNCTION_ORDERS }, + { "window_function_ignore_nulls", RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS }, + { "window_function_result_offset", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_OFFSET }, + { "window_function_result_default", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_DEFAULT }, + { "star_relation_name", RandomPercentagesEnum::STAR_RELATION_NAME }, + { "star_column_name_exclude_list", RandomPercentagesEnum::STAR_COLUMN_NAME_EXCLUDE_LIST }, + { "star_column_name", RandomPercentagesEnum::STAR_COLUMN_NAME }, + { "star_columns_true", RandomPercentagesEnum::STAR_COLUMNS_TRUE }, + { "star_columns_true_lambda", RandomPercentagesEnum::STAR_COLUMNS_TRUE_LAMBDA }, + { "relational_name_choose_current", RandomPercentagesEnum::RELATIONAL_NAME_CHOOSE_CURRENT }, + { "column_names_choose_current", RandomPercentagesEnum::COLUMN_NAMES_CHOOSE_CURRENT } }; -void ParseJsonObj(yyjson_val *obj, unordered_map &config_from_file) { - yyjson_obj_iter iter; - yyjson_obj_iter_init(obj, &iter); - size_t idx, max; - yyjson_val *key, *val; - yyjson_obj_foreach(obj, idx, max, key, val) { - const char* root_key = yyjson_get_str(key); - auto it = StringToRandomPercentagesEnum.find(root_key); - if (it != StringToRandomPercentagesEnum.end()) { - RandomPercentagesEnum perc_type = it->second; - auto perc_value = yyjson_get_str(val); - if (perc_value) { - config_from_file[perc_type] = std::stoi(perc_value); - } - } - if (yyjson_is_obj(val)) { - ParseJsonObj(val, config_from_file); - } - } +RandomNumsConfig::RandomNumsConfig() { + GetDefaultConfig(); } +RandomNumsConfig::RandomNumsConfig(const char *config_file_path) { + GetConfigFromFile(config_file_path); +} +RandomNumsConfig::~RandomNumsConfig() { +} -unordered_map GetConfigFromFile(const char *json_string) { - - unordered_map config_from_file; - auto doc = yyjson_read_file(json_string, YYJSON_READ_NOFLAG, NULL, NULL); - if (doc) { - yyjson_val *root = yyjson_doc_get_root(doc); - if (yyjson_is_obj(root)) { - ParseJsonObj(root, config_from_file); - } - yyjson_doc_free(doc); - } else { - // Couldn't read JSON with percentages config - yyjson_doc_free(doc); - return GetDefaultConfig(); - } - // set values of missing statement types to 0 - for (idx_t i = 0; i < static_cast(RandomPercentagesEnum::COUNT); ++i) { - RandomPercentagesEnum statement_type = static_cast(i); - if (config_from_file.find(statement_type) == config_from_file.end()) { - config_from_file[statement_type] = 0; - } - } - return config_from_file; +unordered_map RandomNumsConfig::GetDefaultConfig() { + unordered_map default_config = { + { RandomPercentagesEnum::ATTACH, 40 }, + { RandomPercentagesEnum::ATTACH_USE, 50 }, + { RandomPercentagesEnum::DELETE, 40 }, + { RandomPercentagesEnum::DETACH, 60 }, + { RandomPercentagesEnum::SELECT, 60 }, + { RandomPercentagesEnum::SET, 30 } + }; + return default_config; } + +void ParseJsonObj(yyjson_val *obj, unordered_map &config_from_file) { + yyjson_obj_iter iter; + yyjson_obj_iter_init(obj, &iter); + size_t idx, max; + yyjson_val *key, *val; + yyjson_obj_foreach(obj, idx, max, key, val) { + const char* root_key = yyjson_get_str(key); + auto it = StringToRandomPercentagesEnum.find(root_key); + if (it != StringToRandomPercentagesEnum.end()) { + RandomPercentagesEnum perc_type = it->second; + auto perc_value = yyjson_get_str(val); + if (perc_value) { + config_from_file[perc_type] = std::stoi(perc_value); + } + } + if (yyjson_is_obj(val)) { + ParseJsonObj(val, config_from_file); + } + } +} + +unordered_map RandomNumsConfig::GetConfigFromFile(const char *json_string) { + + unordered_map config_from_file; + auto doc = yyjson_read_file(json_string, YYJSON_READ_NOFLAG, NULL, NULL); + if (doc) { + yyjson_val *root = yyjson_doc_get_root(doc); + if (yyjson_is_obj(root)) { + ParseJsonObj(root, config_from_file); + } + yyjson_doc_free(doc); + } else { + // Couldn't read JSON with percentages config + yyjson_doc_free(doc); + return GetDefaultConfig(); + } + // set values of missing statement types to 0 + for (idx_t i = 0; i < static_cast(RandomPercentagesEnum::COUNT); ++i) { + RandomPercentagesEnum statement_type = static_cast(i); + if (config_from_file.find(statement_type) == config_from_file.end()) { + config_from_file[statement_type] = 0; + } + } + return config_from_file; + } } // namespace duckdb \ No newline at end of file diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 78d5169..f14b0c4 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -42,7 +42,7 @@ StatementGenerator::StatementGenerator(ClientContext &context) : context(context } StatementGenerator::StatementGenerator(StatementGenerator &parent_p) - : verification_enabled(parent_p.verification_enabled), context(parent_p.context), parent(&parent_p), + : config(parent_p.config), verification_enabled(parent_p.verification_enabled), context(parent_p.context), parent(&parent_p), generator_context(parent_p.generator_context), depth(parent_p.depth + 1) { if (depth > MAX_DEPTH) { throw InternalException("depth too high"); @@ -92,7 +92,7 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon return result; } -unique_ptr StatementGenerator::GenerateStatement(unordered_map config) { +unique_ptr StatementGenerator::GenerateStatement() { if (RandomPercentage(config[RandomPercentagesEnum::SELECT])) { return GenerateStatement(StatementType::SELECT_STATEMENT); } @@ -165,7 +165,7 @@ unique_ptr StatementGenerator::GenerateDetach() { // generate USE statement unique_ptr StatementGenerator::GenerateSet() { auto name_expr = make_uniq(GenerateDataBaseName()); - if (RandomPercentage(90)) { + if (RandomPercentage(config[RandomPercentagesEnum::SET])) { // 90 auto name = GetRandomAttachedDataBase(); name_expr = make_uniq(Value(name)); } @@ -206,7 +206,7 @@ unique_ptr StatementGenerator::GenerateDelete() { unique_ptr StatementGenerator::GenerateDetachInfo() { auto info = make_uniq(); - if (RandomPercentage(20)) { + if (RandomPercentage(config[RandomPercentagesEnum::DETACH_RANDOM_NAME])) { //20 info->name = "RANDOM_NAME_" + RandomString(15); } else { info->name = GetRandomAttachedDataBase(); @@ -312,23 +312,23 @@ void StatementGenerator::GenerateCTEs(QueryNode &node) { unique_ptr StatementGenerator::GenerateQueryNode() { unique_ptr result; bool is_distinct = false; - if (RandomPercentage(70)) { + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE])) { // 70 // select node auto select_node = make_uniq(); // generate CTEs GenerateCTEs(*select_node); - is_distinct = RandomPercentage(30); - if (RandomPercentage(95)) { + is_distinct = RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_IS_DISTINCT]); // 30 + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_FROM_TABLE])) { // 95 select_node->from_table = GenerateTableRef(); } select_node->select_list = GenerateChildren(1, 10); - select_node->where_clause = RandomExpression(60); - select_node->having = RandomExpression(25); - if (RandomPercentage(30)) { + select_node->where_clause = RandomExpression(config[RandomPercentagesEnum::SELECT_NODE_WHERE]); // 60 + select_node->having = RandomExpression(config[RandomPercentagesEnum::SELECT_NODE_HAVING]); // 25 + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_GROUPS])) { // 30 select_node->groups.group_expressions = GenerateChildren(1, 5); auto group_count = select_node->groups.group_expressions.size(); - if (RandomPercentage(70)) { + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_GROUP_BY])) { // 70 // single GROUP BY GroupingSet set; for (idx_t i = 0; i < group_count; i++) { @@ -352,18 +352,18 @@ unique_ptr StatementGenerator::GenerateQueryNode() { } } } - select_node->qualify = RandomExpression(10); + select_node->qualify = RandomExpression(config[RandomPercentagesEnum::SELECT_NODE_QUALIFY]); // 10 select_node->aggregate_handling = - RandomPercentage(10) ? AggregateHandling::FORCE_AGGREGATES : AggregateHandling::STANDARD_HANDLING; - if (RandomPercentage(10)) { + RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_AGGREGATE]) ? AggregateHandling::FORCE_AGGREGATES : AggregateHandling::STANDARD_HANDLING; // 10 + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_SAMPLE])) { auto sample = make_uniq(); - sample->is_percentage = RandomPercentage(50); + sample->is_percentage = RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC]); //50 if (sample->is_percentage) { - sample->sample_size = Value::BIGINT(RandomValue(100)); + sample->sample_size = Value::BIGINT(RandomValue(config[RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE])); // 100 - maybe we can replace the whole ->is_percentage thing with the sample size defined in the config.json } else { sample->sample_size = Value::BIGINT(RandomValue(99999)); } - sample->method = Choose( + sample->method = Choose( // maybe here is also go like sample->method = (config[RandomPercentagesEnum::SELECT_NODE_SAMPLE_METHOD])? {SampleMethod::BERNOULLI_SAMPLE, SampleMethod::RESERVOIR_SAMPLE, SampleMethod::SYSTEM_SAMPLE}); select_node->sample = std::move(sample); } @@ -396,14 +396,14 @@ unique_ptr StatementGenerator::GenerateQueryNode() { if (verification_enabled) { result->modifiers.push_back(GenerateOrderByAll()); } else if (!verification_enabled) { - if (RandomPercentage(5)) { + if (RandomPercentage(config[RandomPercentagesEnum::RESULT_MODIFIERS])) { // 5 result->modifiers.push_back(GenerateOrderBy()); } - if (RandomPercentage(50)) { + if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER])) { // 50 auto limit_percent_modifier = make_uniq(); - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT])) { // 30 limit_percent_modifier->limit = GenerateExpression(); - } else if (RandomPercentage(30)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_OFFSET])) { // 30 limit_percent_modifier->offset = GenerateExpression(); } else { limit_percent_modifier->limit = GenerateExpression(); @@ -412,9 +412,9 @@ unique_ptr StatementGenerator::GenerateQueryNode() { result->modifiers.push_back(std::move(limit_percent_modifier)); } else { auto limit_modifier = make_uniq(); - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_MODIFIER_LIMIT])) { // 30 limit_modifier->limit = GenerateExpression(); - } else if (RandomPercentage(30)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET])) { // 30 limit_modifier->offset = GenerateExpression(); } else { limit_modifier->limit = GenerateExpression(); From ef48353b279a2f84c3a41f2cc787659a04280c79 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Thu, 10 Oct 2024 21:21:56 +0200 Subject: [PATCH 36/38] limit count of generated set operations --- src/include/random_nums_config.hpp | 77 +++++++++++++++-------------- src/include/statement_generator.hpp | 1 + src/random_nums_config.cpp | 1 + src/statement_generator.cpp | 45 ++++++++++------- 4 files changed, 68 insertions(+), 56 deletions(-) diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index 9282288..f243ae1 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -51,76 +51,77 @@ enum class RandomPercentagesEnum : idx_t { SELECT_NODE_SAMPLE = 18, SELECT_NODE_SAMPLE_IS_PERC = 19, SELECT_NODE_SAMPLE_SIZE = 20, - RESULT_MODIFIERS = 21, - LIMIT_PERCENT_MODIFIER = 22, - LIMIT_PERCENT_MODIFIER_LIMIT = 23, - LIMIT_PERCENT_MODIFIER_OFFSET = 24, - LIMIT_MODIFIER_LIMIT = 25, - LIMIT_MODIFIER_OFFSET = 26, + SETOP_COUNT_LIMIT = 21, + RESULT_MODIFIERS = 22, + LIMIT_PERCENT_MODIFIER = 23, + LIMIT_PERCENT_MODIFIER_LIMIT = 24, + LIMIT_PERCENT_MODIFIER_OFFSET = 25, + LIMIT_MODIFIER_LIMIT = 26, + LIMIT_MODIFIER_OFFSET = 27, // ---------------------------------- // Generate Create Percentages // ---------------------------------- - CREATE = 27, - CREATE_GENERATE_SELECT = 28, - CREATE_NUM_COLS = 29, + CREATE = 28, + CREATE_GENERATE_SELECT = 29, + CREATE_NUM_COLS = 30, // ----------------------------------- // Generate Table Ref Percentages // ----------------------------------- - TABLE_REF_BASE_TABLE_REF_PERC = 30, - TABLE_REF_EXPRESSION_LIST_REF = 31, - TABLE_REF_JOIN_REF = 32, + TABLE_REF_BASE_TABLE_REF_PERC = 31, + TABLE_REF_EXPRESSION_LIST_REF = 32, + TABLE_REF_JOIN_REF = 33, // ----------------------------------- // Generate Join Ref Percentages // ----------------------------------- - JOIN_REF_CROSS = 33, - JOIN_REF_ASOF = 34, - JOIN_REF_NATURAL = 35, - JOIN_REF_POSITIONAL = 36, - JOIN_REF_GENERAL_EXPRESSION = 37, + JOIN_REF_CROSS = 34, + JOIN_REF_ASOF = 35, + JOIN_REF_NATURAL = 36, + JOIN_REF_POSITIONAL = 37, + JOIN_REF_GENERAL_EXPRESSION = 38, // ----------------------------------- // Generate Expression Percentages // ----------------------------------- - EXPRESSION_COLUMN_REF = 38, - EXPRESSION_CONSTANT = 39, - EXPRESSION_SUBQUERY = 40, + EXPRESSION_COLUMN_REF = 39, + EXPRESSION_CONSTANT = 40, + EXPRESSION_SUBQUERY = 41, // ----------------------------------- // Generate Constant Value Percentages // ----------------------------------- - CONSTANT_VALUE_BIGINT = 41, - CONSTANT_VALUE_TO_STRING = 42, + CONSTANT_VALUE_BIGINT = 42, + CONSTANT_VALUE_TO_STRING = 43, // ----------------------------------- // Generate Function Percentages // ----------------------------------- - FUNCTION_AGGREGATE_WINDOW_FUNCTION = 43, - FUNCTION_AGGREGATE_ORDER_BY = 44, - FUNCTION_AGGREGATE_DISTINCT = 45, - FUNCTION_AGGREGATE_RANDOM_EXPRESSION = 46, + FUNCTION_AGGREGATE_WINDOW_FUNCTION = 44, + FUNCTION_AGGREGATE_ORDER_BY = 45, + FUNCTION_AGGREGATE_DISTINCT = 46, + FUNCTION_AGGREGATE_RANDOM_EXPRESSION = 47, // ----------------------------------- // Generate Window Function Percentages // ----------------------------------- - WINDOW_FUNCTION_PARTITIONS = 47, - WINDOW_FUNCTION_ORDERS = 48, - WINDOW_FUNCTION_IGNORE_NULLS = 49, - WINDOW_FUNCTION_RESULT_OFFSET = 50, - WINDOW_FUNCTION_RESULT_DEFAULT = 51, + WINDOW_FUNCTION_PARTITIONS = 48, + WINDOW_FUNCTION_ORDERS = 49, + WINDOW_FUNCTION_IGNORE_NULLS = 50, + WINDOW_FUNCTION_RESULT_OFFSET = 51, + WINDOW_FUNCTION_RESULT_DEFAULT = 52, // ----------------------------------- // Generate Star Percentages // ----------------------------------- - STAR_RELATION_NAME = 52, - STAR_COLUMN_NAME = 53, - STAR_COLUMN_NAME_EXCLUDE_LIST = 54, - STAR_COLUMNS_TRUE = 55, - STAR_COLUMNS_TRUE_LAMBDA = 56, + STAR_RELATION_NAME = 53, + STAR_COLUMN_NAME = 54, + STAR_COLUMN_NAME_EXCLUDE_LIST = 55, + STAR_COLUMNS_TRUE = 56, + STAR_COLUMNS_TRUE_LAMBDA = 57, - RELATIONAL_NAME_CHOOSE_CURRENT = 57, - COLUMN_NAMES_CHOOSE_CURRENT = 58, + RELATIONAL_NAME_CHOOSE_CURRENT = 58, + COLUMN_NAMES_CHOOSE_CURRENT = 59, COUNT diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index c6d7dce..741c013 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -42,6 +42,7 @@ class StatementGenerator { friend class AggregateChecker; friend class WindowChecker; unordered_map config; + idx_t setop_limit = 0; public: StatementGenerator(ClientContext &context); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index fd7828e..8fa1ee1 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -30,6 +30,7 @@ unordered_map StringToRandomPercentagesEnum = { { "select_node_sample_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE }, { "select_node_sample_is_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC }, { "select_node_sample_size", RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE }, + { "setop_count_limit", RandomPercentagesEnum::SETOP_COUNT_LIMIT }, { "result_modifiers", RandomPercentagesEnum::RESULT_MODIFIERS }, { "limit_percent_modifier", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER }, { "limit_percent_modifier_limit", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT }, diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index f14b0c4..d2f64db 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -312,6 +312,7 @@ void StatementGenerator::GenerateCTEs(QueryNode &node) { unique_ptr StatementGenerator::GenerateQueryNode() { unique_ptr result; bool is_distinct = false; + idx_t config_setop = config[RandomPercentagesEnum::SETOP_COUNT_LIMIT]; if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE])) { // 70 // select node auto select_node = make_uniq(); @@ -369,25 +370,33 @@ unique_ptr StatementGenerator::GenerateQueryNode() { } result = std::move(select_node); } else { - auto setop = make_uniq(); - GenerateCTEs(*setop); - setop->setop_type = Choose({SetOperationType::EXCEPT, SetOperationType::INTERSECT, - SetOperationType::UNION, SetOperationType::UNION_BY_NAME}); - setop->left = GenerateQueryNode(); - setop->right = GenerateQueryNode(); - switch (setop->setop_type) { - case SetOperationType::EXCEPT: - case SetOperationType::INTERSECT: - is_distinct = true; - break; - case SetOperationType::UNION: - case SetOperationType::UNION_BY_NAME: - is_distinct = RandomBoolean(); - break; - default: - throw InternalException("Unsupported set operation type"); + // limit the count of generated setops + while (setop_limit < config_setop) { + setop_limit++; + auto setop = make_uniq(); + GenerateCTEs(*setop); + //should be possible to choose: from the complete enum, from only one enum value defined in config file, from 2-3 enum values + setop->setop_type = Choose({SetOperationType::EXCEPT, SetOperationType::INTERSECT, + SetOperationType::UNION, SetOperationType::UNION_BY_NAME}); + setop->left = GenerateQueryNode(); + setop->right = GenerateQueryNode(); + switch (setop->setop_type) { + case SetOperationType::EXCEPT: + case SetOperationType::INTERSECT: + is_distinct = true; + break; + case SetOperationType::UNION: + case SetOperationType::UNION_BY_NAME: + is_distinct = RandomBoolean(); + break; + default: + throw InternalException("Unsupported set operation type"); + } + if (setop_limit == config_setop) { + result = std::move(setop); + } } - result = std::move(setop); + result = std::move(make_uniq()); } if (is_distinct) { From 3cd1bfb75f280a90f482720fbf0d44f965297e49 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Fri, 11 Oct 2024 11:18:22 +0200 Subject: [PATCH 37/38] set percentage to setop --- config_nested.json | 3 +- src/include/random_nums_config.hpp | 2 +- src/include/statement_generator.hpp | 1 - src/random_nums_config.cpp | 2 +- src/statement_generator.cpp | 47 +++++++++++++---------------- 5 files changed, 25 insertions(+), 30 deletions(-) diff --git a/config_nested.json b/config_nested.json index 8e24fb7..bf2a2c0 100644 --- a/config_nested.json +++ b/config_nested.json @@ -16,7 +16,7 @@ "select": { "select_percentage": "70", "select_node": { - "select_node_perc": "70", + "select_node_perc": "5", "select_node_is_distinct_perc": "30", "select_node_from_table_perc": "95", "select_node_where_perc": "60", @@ -33,6 +33,7 @@ "select_node_sample_size": "100" } }, + "setop": "40", "result_modifiers": "5", "limit_percent_modifier": "50", "limit_percent_modifier_limit": "30", diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index f243ae1..251a4ac 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -51,7 +51,7 @@ enum class RandomPercentagesEnum : idx_t { SELECT_NODE_SAMPLE = 18, SELECT_NODE_SAMPLE_IS_PERC = 19, SELECT_NODE_SAMPLE_SIZE = 20, - SETOP_COUNT_LIMIT = 21, + SETOP = 21, RESULT_MODIFIERS = 22, LIMIT_PERCENT_MODIFIER = 23, LIMIT_PERCENT_MODIFIER_LIMIT = 24, diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 741c013..c6d7dce 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -42,7 +42,6 @@ class StatementGenerator { friend class AggregateChecker; friend class WindowChecker; unordered_map config; - idx_t setop_limit = 0; public: StatementGenerator(ClientContext &context); diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 8fa1ee1..60344d5 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -30,7 +30,7 @@ unordered_map StringToRandomPercentagesEnum = { { "select_node_sample_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE }, { "select_node_sample_is_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC }, { "select_node_sample_size", RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE }, - { "setop_count_limit", RandomPercentagesEnum::SETOP_COUNT_LIMIT }, + { "setop", RandomPercentagesEnum::SETOP }, { "result_modifiers", RandomPercentagesEnum::RESULT_MODIFIERS }, { "limit_percent_modifier", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER }, { "limit_percent_modifier_limit", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT }, diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index d2f64db..aac0eb5 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -312,7 +312,6 @@ void StatementGenerator::GenerateCTEs(QueryNode &node) { unique_ptr StatementGenerator::GenerateQueryNode() { unique_ptr result; bool is_distinct = false; - idx_t config_setop = config[RandomPercentagesEnum::SETOP_COUNT_LIMIT]; if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE])) { // 70 // select node auto select_node = make_uniq(); @@ -369,33 +368,29 @@ unique_ptr StatementGenerator::GenerateQueryNode() { select_node->sample = std::move(sample); } result = std::move(select_node); - } else { + } else if (RandomPercentage(config[RandomPercentagesEnum::SETOP])) { // limit the count of generated setops - while (setop_limit < config_setop) { - setop_limit++; - auto setop = make_uniq(); - GenerateCTEs(*setop); - //should be possible to choose: from the complete enum, from only one enum value defined in config file, from 2-3 enum values - setop->setop_type = Choose({SetOperationType::EXCEPT, SetOperationType::INTERSECT, - SetOperationType::UNION, SetOperationType::UNION_BY_NAME}); - setop->left = GenerateQueryNode(); - setop->right = GenerateQueryNode(); - switch (setop->setop_type) { - case SetOperationType::EXCEPT: - case SetOperationType::INTERSECT: - is_distinct = true; - break; - case SetOperationType::UNION: - case SetOperationType::UNION_BY_NAME: - is_distinct = RandomBoolean(); - break; - default: - throw InternalException("Unsupported set operation type"); - } - if (setop_limit == config_setop) { - result = std::move(setop); - } + auto setop = make_uniq(); + GenerateCTEs(*setop); + //should be possible to choose: from the complete enum, from only one enum value defined in config file, from 2-3 enum values + setop->setop_type = Choose({SetOperationType::EXCEPT, SetOperationType::INTERSECT, + SetOperationType::UNION, SetOperationType::UNION_BY_NAME}); + setop->left = GenerateQueryNode(); + setop->right = GenerateQueryNode(); + switch (setop->setop_type) { + case SetOperationType::EXCEPT: + case SetOperationType::INTERSECT: + is_distinct = true; + break; + case SetOperationType::UNION: + case SetOperationType::UNION_BY_NAME: + is_distinct = RandomBoolean(); + break; + default: + throw InternalException("Unsupported set operation type"); } + result = std::move(setop); + } else { result = std::move(make_uniq()); } From db0645b145d8e0b1a798fcdabe2cbdf2aa4d2aa8 Mon Sep 17 00:00:00 2001 From: Zuleykha Pavlichenkova Date: Sun, 27 Oct 2024 22:51:30 +0100 Subject: [PATCH 38/38] add more values from the config file --- config_nested.json | 3 ++ src/include/random_nums_config.hpp | 3 ++ src/random_nums_config.cpp | 3 ++ src/statement_generator.cpp | 52 +++++++++++++++--------------- 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/config_nested.json b/config_nested.json index bf2a2c0..fa4c6cd 100644 --- a/config_nested.json +++ b/config_nested.json @@ -70,6 +70,7 @@ "function": { "function_aggregate": { "function_aggregate_window_function": "10", + "function_aggregate_filter": "10", "function_aggregate_order_by": "10", "function_aggregate_random_expression": "10", "function_aggregate_distinct": "10" @@ -79,12 +80,14 @@ "window_function_partitions": "50", "window_function_orders": "30", "window_function_ignore_nulls": "30", + "window_function_filter_expr": "30", "window_function_result_offset": "30", "window_function_result_default": "30" }, "star": { "star_relation_name": "10", "star_column_name_exclude_list": "20", + "star_column_name_replace_list": "20", "star_column_name": "20", "star_columns": { "star_columns_true": "50", diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp index 251a4ac..1ad456d 100644 --- a/src/include/random_nums_config.hpp +++ b/src/include/random_nums_config.hpp @@ -99,6 +99,7 @@ enum class RandomPercentagesEnum : idx_t { // ----------------------------------- FUNCTION_AGGREGATE_WINDOW_FUNCTION = 44, FUNCTION_AGGREGATE_ORDER_BY = 45, + FUNCTION_AGGREGATE_FILTER = 60, FUNCTION_AGGREGATE_DISTINCT = 46, FUNCTION_AGGREGATE_RANDOM_EXPRESSION = 47, @@ -107,6 +108,7 @@ enum class RandomPercentagesEnum : idx_t { // ----------------------------------- WINDOW_FUNCTION_PARTITIONS = 48, WINDOW_FUNCTION_ORDERS = 49, + WINDOW_FUNCTION_FILTER_EXPRESSION = 61, WINDOW_FUNCTION_IGNORE_NULLS = 50, WINDOW_FUNCTION_RESULT_OFFSET = 51, WINDOW_FUNCTION_RESULT_DEFAULT = 52, @@ -117,6 +119,7 @@ enum class RandomPercentagesEnum : idx_t { STAR_RELATION_NAME = 53, STAR_COLUMN_NAME = 54, STAR_COLUMN_NAME_EXCLUDE_LIST = 55, + STAR_COLUMN_NAME_REPLACE_LIST = 62, STAR_COLUMNS_TRUE = 56, STAR_COLUMNS_TRUE_LAMBDA = 57, diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp index 60344d5..8a96e42 100644 --- a/src/random_nums_config.cpp +++ b/src/random_nums_config.cpp @@ -55,15 +55,18 @@ unordered_map StringToRandomPercentagesEnum = { { "constant_value_to_string", RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING }, { "function_aggregate_window_function", RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION }, { "function_aggregate_order_by", RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY }, + { "function_aggregate_filter", RandomPercentagesEnum::FUNCTION_AGGREGATE_FILTER }, { "function_aggregate_random_expression", RandomPercentagesEnum::FUNCTION_AGGREGATE_RANDOM_EXPRESSION }, { "function_aggregate_distinct", RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT }, { "window_function_partitions", RandomPercentagesEnum::WINDOW_FUNCTION_PARTITIONS }, + { "window_function_filter_expr", RandomPercentagesEnum::WINDOW_FUNCTION_FILTER_EXPRESSION }, { "window_function_orders", RandomPercentagesEnum::WINDOW_FUNCTION_ORDERS }, { "window_function_ignore_nulls", RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS }, { "window_function_result_offset", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_OFFSET }, { "window_function_result_default", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_DEFAULT }, { "star_relation_name", RandomPercentagesEnum::STAR_RELATION_NAME }, { "star_column_name_exclude_list", RandomPercentagesEnum::STAR_COLUMN_NAME_EXCLUDE_LIST }, + { "star_column_name_replace_list", RandomPercentagesEnum::STAR_COLUMN_NAME_REPLACE_LIST }, { "star_column_name", RandomPercentagesEnum::STAR_COLUMN_NAME }, { "star_columns_true", RandomPercentagesEnum::STAR_COLUMNS_TRUE }, { "star_columns_true_lambda", RandomPercentagesEnum::STAR_COLUMNS_TRUE_LAMBDA }, diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index aac0eb5..8075195 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -434,13 +434,13 @@ unique_ptr StatementGenerator::GenerateQueryNode() { // Table Ref //===--------------------------------------------------------------------===// unique_ptr StatementGenerator::GenerateTableRef() { - if (RandomPercentage(60)) { + if (RandomPercentage(config[RandomPercentagesEnum::TABLE_REF_BASE_TABLE_REF_PERC])) { return GenerateBaseTableRef(); } - if (RandomPercentage(20)) { + if (RandomPercentage(config[RandomPercentagesEnum::TABLE_REF_EXPRESSION_LIST_REF])) { return GenerateExpressionListRef(); } - if (RandomPercentage(40)) { + if (RandomPercentage(config[RandomPercentagesEnum::TABLE_REF_JOIN_REF])) { return GenerateJoinRef(); } switch (RandomValue(3)) { @@ -500,13 +500,13 @@ unique_ptr StatementGenerator::GenerateExpressionListRef() { unique_ptr StatementGenerator::GenerateJoinRef() { JoinRefType join_ref; - if (RandomPercentage(10)) { + if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_CROSS])) { join_ref = JoinRefType::CROSS; - } else if (RandomPercentage(10)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_ASOF])) { join_ref = JoinRefType::ASOF; - } else if (RandomPercentage(10)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_NATURAL])) { join_ref = JoinRefType::NATURAL; - } else if (RandomPercentage(10)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_POSITIONAL])) { join_ref = JoinRefType::POSITIONAL; } else { join_ref = JoinRefType::REGULAR; @@ -515,7 +515,7 @@ unique_ptr StatementGenerator::GenerateJoinRef() { join->left = GenerateTableRef(); join->right = GenerateTableRef(); if (join_ref != JoinRefType::CROSS && join_ref != JoinRefType::NATURAL) { - if (RandomPercentage(70)) { + if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_GENERAL_EXPRESSION])) { join->condition = GenerateExpression(); } else { while (true) { @@ -658,13 +658,13 @@ class ExpressionDepthChecker { unique_ptr StatementGenerator::GenerateExpression() { ExpressionDepthChecker checker(*this); - if (RandomPercentage(50) || RandomPercentage(expression_depth + depth * 5)) { + if (RandomPercentage(config[RandomPercentagesEnum::EXPRESSION_COLUMN_REF]) || RandomPercentage(expression_depth + depth * 5)) { return GenerateColumnRef(); } - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::EXPRESSION_CONSTANT])) { return GenerateConstant(); } - if (RandomPercentage(3)) { + if (RandomPercentage(config[RandomPercentagesEnum::EXPRESSION_SUBQUERY])) { return GenerateSubquery(); } switch (RandomValue(9)) { @@ -692,10 +692,10 @@ unique_ptr StatementGenerator::GenerateExpression() { } Value StatementGenerator::GenerateConstantValue() { - if (RandomPercentage(50)) { + if (RandomPercentage(config[RandomPercentagesEnum::CONSTANT_VALUE_BIGINT)) { return Value::BIGINT(RandomValue(9999)); } - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING])) { return Value(UUID::ToString(UUID::GenerateRandomUUID(RandomEngine::Get(context)))); } auto &val = Choose(generator_context->test_types); @@ -781,7 +781,7 @@ unique_ptr StatementGenerator::GenerateFunction() { if (actual_function.varargs.id() != LogicalTypeId::INVALID) { max_parameters += 5; } - if (RandomPercentage(10) && !in_window) { + if (RandomPercentage(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION]) && !in_window) { return GenerateWindowFunction(&actual_function); } if (in_aggregate) { @@ -789,12 +789,12 @@ unique_ptr StatementGenerator::GenerateFunction() { return GenerateColumnRef(); } checker = make_uniq(*this); - filter = RandomExpression(10); - if (RandomPercentage(10)) { + filter = RandomExpression(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_FILTER]); + if (RandomPercentage(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY])) { // generate order by order_bys = GenerateOrderBy(); } - if (RandomPercentage(10)) { + if (RandomPercentage(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT])) { distinct = true; } break; @@ -973,15 +973,15 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional WindowChecker checker(*this); auto result = make_uniq(type, INVALID_CATALOG, INVALID_SCHEMA, std::move(name)); result->children = GenerateChildren(min_parameters, max_parameters); - while (RandomPercentage(50)) { + while (RandomPercentage(config[RandomPercentagesEnum::WINDOW_FUNCTION_PARTITIONS])) { result->partitions.push_back(GenerateExpression()); } - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::WINDOW_FUNCTION_ORDERS])) { result->orders = std::move(GenerateOrderBy()->orders); } if (function) { - result->filter_expr = RandomExpression(30); - if (RandomPercentage(30)) { + result->filter_expr = RandomExpression(config[RandomPercentagesEnum::WINDOW_FUNCTION_FILTER_EXPRESSION]); + if (RandomPercentage(config[RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS])) { result->ignore_nulls = true; } } @@ -1018,8 +1018,8 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional switch (type) { case ExpressionType::WINDOW_LEAD: case ExpressionType::WINDOW_LAG: - result->offset_expr = RandomExpression(30); - result->default_expr = RandomExpression(30); + result->offset_expr = RandomExpression(config[RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_OFFSET]); + result->default_expr = RandomExpression(config[RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_DEFAULT]); break; default: break; @@ -1054,19 +1054,19 @@ unique_ptr StatementGenerator::GenerateConjunction() { unique_ptr StatementGenerator::GenerateStar() { auto result = make_uniq(); if (!current_relation_names.empty()) { - if (RandomPercentage(10)) { + if (RandomPercentage(config[RandomPercentagesEnum::STAR_RELATION_NAME])) { result->relation_name = GenerateRelationName(); } } if (!verification_enabled) { - while (RandomPercentage(20)) { + while (RandomPercentage(config[RandomPercentagesEnum::STAR_COLUMN_NAME_EXCLUDE_LIST])) { auto column_name = GenerateColumnName(); if (column_name.empty()) { break; } result->exclude_list.insert(column_name); } - while (RandomPercentage(20)) { + while (RandomPercentage(config[RandomPercentagesEnum::STAR_COLUMN_NAME_REPLACE_LIST])) { auto column_name = GenerateColumnName(); if (column_name.empty()) { break;