Skip to content

Commit

Permalink
change policies
Browse files Browse the repository at this point in the history
  • Loading branch information
blinkbear committed Dec 3, 2024
1 parent b4f21d8 commit 61572e5
Show file tree
Hide file tree
Showing 7 changed files with 329 additions and 366 deletions.
16 changes: 8 additions & 8 deletions benchmarks/1_serving_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ result_dir="${pwd}/result"
# scheduler_policy=(infer)
# swap_policies=(partial)
declare -a scheduler_swap_policies
# scheduler_swap_policies[0]="tfittradeoff partial"
scheduler_swap_policies[0]="tfittradeoff partial"
scheduler_swap_policies[1]="fcfs full"
scheduler_swap_policies[2]="las full"
scheduler_swap_policies[3]="tfittradeoff full"
# scheduler_swap_policies[3]="tfittradeoff full"
scheduler_swap_policies[4]="sjf full"
scheduler_swap_policies[5]="srjf full"
# scheduler_swap_policies[3]="sjmlfq full"
Expand All @@ -37,19 +37,19 @@ scheduler_swap_policies[5]="srjf full"
# scheduler_swap_policies[5]="sjmlfq full"fish

preemption_mode="swap"
gpu_memory_utilization=0.9 # 0.5, 0.7, 0.9
gpu_memory_utilization=0.7 # 0.5, 0.7, 0.9
max_num_seqs=384
swap_space=64
max_tokens=2048
iter_theshold=15
max_serving_time=86400 # 86400
request_duration=300 # 1
request_duration=120 # 1
num_shared_blocks=0

request_rates[0]=0.5
# request_rates[0]=0.5
request_rates[1]=1.0
request_rates[2]=2.0
request_rates[3]=5.0
# request_rates[2]=2.0
# request_rates[3]=5.0
# request_rates[4]=10.0
# request_rates[4]=10.0
# request_rates[5]=20.0
Expand Down Expand Up @@ -81,7 +81,7 @@ for waiting_iter in "${waiting_iter_base[@]}"; do
for i in {0..0}; do
taskset -c 30-49 python3 benchmark_serving.py --execution-counter $COUNTER --dataset-path $dataset_path \
--dataset-name $dataset_name --request-rate $request_rate \
--num-prompts 500 --request-duration $request_duration --sharegpt-output-len 2000 --model $model_name --scheduler-policy $policy \
--num-prompts 3000 --request-duration $request_duration --sharegpt-output-len 2000 --model $model_name --scheduler-policy $policy \
--save-result --result-dir $result_dir \
--metadata swap_space=$swap_space preemption_mode=$preemption_mode \
scheduler_policy=$policy gpu_memory_utilization=$gpu_memory_utilization \
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,11 @@ def sample_sharegpt_requests(
# if prompt_len < 0:
# # Prune too short sequences.
# continue
if prompt_len + output_len > 2048:
if output_len < 10 or prompt_len > 128:
# Prune too long sequences.
continue
filtered_dataset.append((prompt, prompt_len, output_len))
print(f"Number of requests: {len(filtered_dataset)}")
import collections
prompt_len_list=collections.Counter(prompt_len_list)
return filtered_dataset
Expand Down
Loading

0 comments on commit 61572e5

Please sign in to comment.