diff --git a/data_export.py b/data_export.py index 79abe2a5a..6063f979a 100644 --- a/data_export.py +++ b/data_export.py @@ -96,18 +96,22 @@ def cleaned_run_metric(run_metrics): dataset = DATASETS[dataset_name]() runbook_paths = [None] if track == 'streaming': - runbook_paths = ['neurips23/runbooks/streaming/simple_runbook.yaml', - 'neurips23/runbooks/streaming/simple_replace_runbook.yaml', - 'neurips23/runbooks/streaming/random_replace_runbook.yaml', - 'neurips23/runbooks/streaming/clustered_replace_runbook.yaml', - 'neurips23/runbooks/streaming/clustered_runbook.yaml', - 'neurips23/runbooks/streaming/clustered_runbook.yaml', - 'neurips23/runbooks/streaming/delete_runbook.yaml', - 'neurips23/runbooks/streaming/final_runbook.yaml', - 'neurips23/runbooks/streaming/msturing-10M_slidingwindow_runbook.yaml', - 'neurips23/runbooks/streaming/wikipedia-35M_expirationtime_runbook.yaml', - 'neurips23/runbooks/streaming/wikipedia-35M_expiration_time_replace_runbook.yaml', - 'neurips23/runbooks/streaming/msmarco-100M_expirationtime_runbook.yaml'] + runbook_paths = ['neurips23/runbooks/simple_runbook.yaml', + 'neurips23/runbooks/simple_replace_runbook.yaml', + 'neurips23/runbooks/random_replace_runbook.yaml', + 'neurips23/runbooks/clustered_replace_runbook.yaml', + 'neurips23/runbooks/clustered_runbook.yaml', + 'neurips23/runbooks/clustered_runbook.yaml', + 'neurips23/runbooks/delete_runbook.yaml', + 'neurips23/runbooks/final_runbook.yaml', + 'neurips23/runbooks/msturing-10M_slidingwindow_runbook.yaml', + 'neurips23/runbooks/wikipedia-35M_expirationtime_runbook.yaml', + 'neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml', + 'neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml', + 'neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml', + 'neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml', + 'neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml', + 'neurips23/runbooks/msmarco-100M_expirationtime_runbook.yaml'] for runbook_path in runbook_paths: print("Looking for runbook ", runbook_path) results = load_all_results(dataset_name, neurips23track=track, runbook_path=runbook_path) diff --git a/neurips23/runbooks/gen_expiration_time_runbook.py b/neurips23/runbooks/gen_expiration_time_runbook.py index bb5996bc2..23eb50014 100644 --- a/neurips23/runbooks/gen_expiration_time_runbook.py +++ b/neurips23/runbooks/gen_expiration_time_runbook.py @@ -124,7 +124,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-1M' dataset_size = 1000000 max_t = 100 -gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml/" +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml" gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, gt_url) ratios = (0, 4, 18) @@ -134,7 +134,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-35M' dataset_size = 8000000 #only use a prefix of the dataset max_t = 80 -gt_url = None +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml" gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False) ratios = (0, 4, 18) @@ -144,7 +144,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-1M' dataset_size = 1000000 max_t = 100 -gt_url = None +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml" gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False) ratios = (3, 8, 18) @@ -154,7 +154,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-35M' dataset_size = 35000000 max_t = 350 -gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None) +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml" +gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url) ratios = (1, 8, 18) timesteps = (0, 100, 20) @@ -163,7 +164,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-1M' dataset_size = 1000000 max_t = 100 -gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None) +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml" +gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url) ratios = (0, 6, 25) timesteps = (0, 200, 50) diff --git a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml index b12159b07..322e43264 100644 --- a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml +++ b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml @@ -1208,3 +1208,4 @@ wikipedia-1M: 316: operation: search max_pts: 293233 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml" diff --git a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml index d036dc57d..a26fbe274 100644 --- a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml +++ b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml @@ -1068,3 +1068,4 @@ wikipedia-1M: 278: operation: search max_pts: 698369 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml" diff --git a/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml b/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml index 0875cf409..d40f4b42e 100644 --- a/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml +++ b/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml @@ -840,4 +840,4 @@ wikipedia-1M: 260: operation: search max_pts: 410000 - gt_url: https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml/ + gt_url: https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml diff --git a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml index 499c6fab3..a9b3aa49e 100644 --- a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml +++ b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml @@ -4436,3 +4436,4 @@ wikipedia-35M: 1150: operation: search max_pts: 6682767 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml" diff --git a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml index f4fdcdd1e..ad5a38be0 100644 --- a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml +++ b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml @@ -852,3 +852,4 @@ wikipedia-35M: 222: operation: search max_pts: 5548955 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml" diff --git a/neurips23/streaming/diskann/config.yaml b/neurips23/streaming/diskann/config.yaml index 5f7d765b2..a0d2b9ab6 100644 --- a/neurips23/streaming/diskann/config.yaml +++ b/neurips23/streaming/diskann/config.yaml @@ -48,6 +48,32 @@ msturing-1M: query-args: | [{"Ls":300, "T":16}, {"Ls":100, "T":16}] +wikipedia-1M: + diskann: + docker-tag: neurips23-streaming-diskann + module: neurips23.streaming.diskann.diskann-str + constructor: diskann + base-args: ["@metric"] + run-groups: + base: + args: | + [{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}] + query-args: | + [ + {"Ls":100, "T":32}] +wikipedia-35M: + diskann: + docker-tag: neurips23-streaming-diskann + module: neurips23.streaming.diskann.diskann-str + constructor: diskann + base-args: ["@metric"] + run-groups: + base: + args: | + [{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}] + query-args: | + [ + {"Ls":100, "T":32}] msspacev-10M: diskann: docker-tag: neurips23-streaming-diskann