Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Add empty tensor initialization to LazyCompressedParameter (#53)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexm-redhat authored Feb 23, 2024
1 parent 31ecb4d commit 9eb83fe
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 8 deletions.
33 changes: 26 additions & 7 deletions vllm/model_executor/layers/parameters/lazy_compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class LazyCompressedParameter(torch.Tensor):
@staticmethod
def __new__(cls,
uncompressed_data: torch.Tensor,
is_empty: bool = False,
storage_format_cls: Type[
CompressedStorageFormat] = SparseBitmaskStorageFormat,
compress_transposed: bool = False):
Expand All @@ -30,12 +31,16 @@ def __new__(cls,
cls,
size=uncompressed_data.shape,
dtype=uncompressed_data.dtype,
device=uncompressed_data.device,
requires_grad=False)
self._is_param = True

self.storage_format_cls = storage_format_cls
self.compressed_data = None
self.uncompressed_data = uncompressed_data
self.compress_transposed = compress_transposed
self._is_param = True
self.compressed_data = None

self.is_empty = is_empty
self.uncompressed_data = None if self.is_empty else uncompressed_data

return self

Expand All @@ -45,7 +50,10 @@ def has_compressed_data(self) -> bool:

@property
def has_uncompressed_data(self) -> bool:
return (self.uncompressed_data is not None)
if self.is_empty:
raise ValueError(
"has_uncompressed_data() was called with empty data")
return self.uncompressed_data is not None

@classmethod
def __torch_dispatch__(cls, func, types, args, kwargs):
Expand All @@ -56,16 +64,27 @@ def unwrap(e):
if isinstance(e, LazyCompressedParameter):
assert ret_storage_format_cls is None or ret_storage_format_cls == e.storage_format_cls
ret_storage_format_cls = e.storage_format_cls
return e.uncompressed_data if isinstance(
e, LazyCompressedParameter) else e

if e.is_empty:
e.is_empty = False
e.uncompressed_data = torch.empty(size=e.size(),
dtype=e.dtype,
device=e.device)

return e.uncompressed_data
else:
return e

rs = func(*tree_map(unwrap, args), **tree_map(unwrap, kwargs))

def wrap(e):
if isinstance(e,
torch.Tensor) and ret_storage_format_cls is not None:
return LazyCompressedParameter(
e, storage_format_cls=ret_storage_format_cls)
e,
# Here, "e" is the output of "func" so it is real data and we store it
is_empty=False,
storage_format_cls=ret_storage_format_cls)
return e

rs = tree_map(wrap, rs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ def create_weights(self, input_size_per_partition: int,
weight = LazyCompressedParameter(
torch.empty((output_size_per_partition, input_size_per_partition),
dtype=params_dtype),
# For create_weights(..), we initialize an empty tensor to
# save GPU memory. When the parameter will be loaded from
# disk it will be copied into this tensor
is_empty=True,
storage_format_cls=self.storage_format_cls,
# if we don't support F.linear or something analogous,
# If we don't support F.linear or something analogous,
# transpose when we compress so we can use a basic matmul
compress_transposed=not supports_linear)

Expand Down

0 comments on commit 9eb83fe

Please sign in to comment.