Skip to content

Commit

Permalink
[SW-194177] - Integrate new vllm-PA algo with HQT
Browse files Browse the repository at this point in the history
Change-Id: I94c9679f0aff7c2f9a86a802da825bfd6d0772ad
  • Loading branch information
nirda7 committed Jul 25, 2024
1 parent 5e3a679 commit 3572617
Showing 1 changed file with 3 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -530,12 +530,10 @@ def forward_measure(self, input, cache, block_indices, block_offset):
measure_output((output_cache), self._mod_extra_config.outputs)
return output_cache

def fetch_from_cache(self, cache, blocks, permutations):
def fetch_from_cache(self, cache, blocks):
quant_cache = self.quant_input(cache)
output_cache = self.orig_fetch_from_cache(quant_cache, blocks, permutations)
for i in range(len(output_cache)):
output_cache[i]=self.quant_output(output_cache[i])
return output_cache
output_cache = self.orig_fetch_from_cache(quant_cache, blocks)
return self.quant_output(output_cache)


class PatchedConv2d(nn.Conv2d):
Expand Down

0 comments on commit 3572617

Please sign in to comment.