Skip to content

Commit

Permalink
- Add abstract impl for FloatToHFP8Quantized (#2983)
Browse files Browse the repository at this point in the history
Summary:
X-link: facebookresearch/FBGEMM#78

Pull Request resolved: #2983

As title

Reviewed By: sryap

Differential Revision: D61216517

fbshipit-source-id: 13fb4cbe65ab235dbaf99778b424044e42e75bc5
  • Loading branch information
flaviotruzzi authored and facebook-github-bot committed Aug 14, 2024
1 parent 3070f88 commit 01775eb
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 14 deletions.
10 changes: 10 additions & 0 deletions fbgemm_gpu/fbgemm_gpu/sparse_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,12 @@ def histogram_binning_calibration_abstract(
return torch.empty_like(logit), torch.empty([logit.numel()], dtype=torch.int64)


def float_to_hfp8_quantized(
input: Tensor, ebits: int, exponent_bias: int, max_pos: float
) -> Tensor:
return torch.empty_like(input, dtype=torch.uint8)


def _setup() -> None:
# pyre-ignore[16]
_setup.done = getattr(_setup, "done", False)
Expand Down Expand Up @@ -1092,6 +1098,10 @@ def impl_autograd(op_name, fn, setup_context: Optional[Callable] = None) -> None
"fbgemm::histogram_binning_calibration",
histogram_binning_calibration_abstract,
)
impl_abstract(
"fbgemm::FloatToHFP8Quantized",
float_to_hfp8_quantized,
)
_setup.done = True


Expand Down
8 changes: 8 additions & 0 deletions fbgemm_gpu/src/quantize_ops/quantize_ops_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,14 @@ at::Tensor _hfp8_to_float_cpu(
} // namespace fbgemm_gpu

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
#ifdef HAS_IMPL_ABSTRACT_PYSTUB
m.impl_abstract_pystub(
"fbgemm_gpu.sparse_ops",
"//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_py");
#endif

m.set_python_module("fbgemm_gpu.sparse_ops");

m.def("FloatToFused8BitRowwiseQuantized(Tensor t) -> Tensor");
m.def(
"FloatToFP8RowwiseQuantized(Tensor t, bool forward) -> Tensor",
Expand Down
15 changes: 1 addition & 14 deletions fbgemm_gpu/test/quantize/failures_dict_fast.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,7 @@
"status": "xfail"
}
},
"fbgemm::FloatToHFP8Quantized": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_cpu": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_gpu_no_cache": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_gpu_no_cache_fp8_2048": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FloatToHFP8Quantized": {},
"fbgemm::Fused8BitRowwiseQuantizedToFloat": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_cpu_int8": {
"comment": "",
Expand Down

0 comments on commit 01775eb

Please sign in to comment.