From 1701bc3ef03d60169ced356ab65fba9188f47917 Mon Sep 17 00:00:00 2001 From: Kevin-Yang Date: Sat, 26 Oct 2024 16:32:02 +0900 Subject: [PATCH] remve unnecessary print Signed-off-by: Kevin-Yang --- vllm/attention/backends/flash_attn.py | 9 --------- vllm/worker/embedding_model_runner.py | 1 - 2 files changed, 10 deletions(-) diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 2a8eaa8314b04..ffa05e80623ac 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -349,11 +349,6 @@ def _add_seq_group( else: block_table = block_tables[seq_id][ -curr_sliding_window_block:] - - print(f"prefix cache hit: {prefix_cache_hit}") - print(f"chunked prefill enabled: {chunked_prefill_enabled}") - print(f"prompt: {is_prompt}") - print(f"block table: {block_table}") self.block_tables.append(block_table) # Compute slot mapping. @@ -405,7 +400,6 @@ def build(self, seq_lens: List[int], query_lens: List[int], for inter_data in self.input_builder.inter_data_list ]) for inter_data in self.input_builder.inter_data_list: - print(f"inter_data: {inter_data}") self._add_seq_group(inter_data, self.input_builder.chunked_prefill_enabled, prefix_cache_hit) @@ -431,9 +425,6 @@ def build(self, seq_lens: List[int], query_lens: List[int], block_tables = self._get_graph_runner_block_tables( num_seqs, self.block_tables) else: - print(f"block tables: {self.block_tables}") - # if self.block_tables[0] is None: - # self.block_tables = [list() for _ in range(num_seqs)] block_tables = make_tensor_with_pad( self.block_tables, pad=0, diff --git a/vllm/worker/embedding_model_runner.py b/vllm/worker/embedding_model_runner.py index a8d078daaf0d3..a7f5b2d4fdd1f 100644 --- a/vllm/worker/embedding_model_runner.py +++ b/vllm/worker/embedding_model_runner.py @@ -173,7 +173,6 @@ def prepare_model_input( finished_requests_ids: Optional[List[str]] = None ) -> ModelInputForGPUWithPoolingMetadata: assert seq_group_metadata_list is not None - print(f"seq_group_metadata_list: {seq_group_metadata_list}") model_input = self._prepare_model_input_tensors( seq_group_metadata_list, finished_requests_ids) # Prepare PoolingMetadata.