Skip to content

Commit

Permalink
added gt url option to generator
Browse files Browse the repository at this point in the history
  • Loading branch information
magdalendobson committed Oct 14, 2024
1 parent 37db26e commit 1c94d8a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
14 changes: 9 additions & 5 deletions neurips23/streaming/runbooks/gen_expiration_time_runbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
seed: seed given to random generator
do_replace: whether to include replace in runbook or not
'''
def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ratios, timesteps, seed = 0, do_replace = False):
def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ratios, timesteps, seed = 0, do_replace = False, gt_url = None):
random.seed(seed)
data = {dataset_name: {}}

Expand Down Expand Up @@ -102,6 +102,9 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra

data[dataset_name]["max_pts"]=max_num_points

if gt_url is not None:
data[dataset_name]["gt_url"] = gt_url

with open(runbook_filename, 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)

Expand All @@ -112,7 +115,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-35M'
dataset_size = 35000000
max_t = 350
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False)
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expirationtime_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, gt_url)

ratios = (0, 4, 18)
timesteps = (0, 100, 20)
Expand All @@ -121,7 +125,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-1M'
dataset_size = 1000000
max_t = 100
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False)
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, None)

ratios = (0, 4, 18)
timesteps = (0, 100, 20)
Expand All @@ -130,7 +134,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-35M'
dataset_size = 35000000
max_t = 350
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True)
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None)

ratios = (0, 6, 25)
timesteps = (0, 200, 50)
Expand All @@ -139,5 +143,5 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'msmarco-100M'
dataset_size = 101070374
max_t = 1000
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False)
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, None)

Original file line number Diff line number Diff line change
Expand Up @@ -3304,3 +3304,4 @@ wikipedia-35M:
1001:
operation: search
max_pts: 5200000
gt_url: https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expirationtime_runbook.yaml

0 comments on commit 1c94d8a

Please sign in to comment.