fix(server): empty_cache when stopped

huggingface · Jul 15, 2023 · a2cf1bd · a2cf1bd
1 parent c58a0c1
commit a2cf1bd
Showing 1 changed file with 1 addition and 0 deletions.
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
@@ -991,6 +991,7 @@ def generate_token(
 
         if stopped:
             del batch
+            torch.cuda.empty_cache()
             # No need to return a batch if we know that all requests stopped
             return generations, None