diff --git a/copilot_proxy/utils/codegen.py b/copilot_proxy/utils/codegen.py index 10d8670..003a655 100644 --- a/copilot_proxy/utils/codegen.py +++ b/copilot_proxy/utils/codegen.py @@ -164,7 +164,7 @@ def generate(self, data): # Calculate the beam index with the highest log prob in constant time. one_beam = False - print(f"one_beam is: {one_beam}") + # print(f"one_beam is: {one_beam}") lp_data = result.as_numpy("output_log_probs") lp_sums = np.zeros((lp_data.shape[0], lp_data.shape[1])) lp_result = np.zeros((lp_data.shape[0], lp_data.shape[2])) # Pick the best one from each beam diff --git a/docker-compose.yaml b/docker-compose.yaml index 19c2462..bb01e06 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,7 +1,7 @@ version: '3.3' services: triton: - image: triton_with_ft_boyuanchen:22.06 + image: triton_with_ft_3296:22.06 command: bash -c "CUDA_VISIBLE_DEVICES=${GPUS} mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model" shm_size: '2gb' volumes: