Skip to content

Commit

Permalink
Enable TBE CPU bf16 output support on Mac and Windows platforms
Browse files Browse the repository at this point in the history
Summary: Remove the conditional macro `#if !defined(__APPLE__) && !defined(_WIN32)` to enable TBE CPU bf16 output support on MacOS and Windows

Differential Revision: D46806887

fbshipit-source-id: 66d4d5838167bc0190c7118e637f95d635462a91
  • Loading branch information
excelle08 authored and facebook-github-bot committed Jun 20, 2023
1 parent 8f1b877 commit 81ee62d
Showing 1 changed file with 0 additions and 16 deletions.
16 changes: 0 additions & 16 deletions src/EmbeddingSpMDMNBit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,25 +398,21 @@ GenEmbeddingSpMDMNBitLookup<
x86::Ymm mask_vreg; // mask for avx2
x86::Xmm mask2_vreg;
x86::Xmm mask_fp16_vreg;
#if !defined(__APPLE__) && !defined(_WIN32)
vec_reg_t ones_vreg;
#endif

// We need 2 vec registers for 1. scale 2. bias
--unroll_factor;
scale_vreg = vec_reg_t(unroll_factor);
--unroll_factor;
bias_vreg = vec_reg_t(unroll_factor);

#if !defined(__APPLE__) && !defined(_WIN32)
if (is_bf16_out) {
--unroll_factor;
ones_vreg = vec_reg_t(unroll_factor);
a->mov(scratchReg2_, 1 << 15);
a->vpinsrd(ones_vreg.xmm(), ones_vreg.xmm(), scratchReg2_, 0);
a->vpbroadcastd(ones_vreg, ones_vreg.xmm());
}
#endif

--unroll_factor;
src_vreg = vec_reg_t(unroll_factor);
Expand Down Expand Up @@ -883,19 +879,15 @@ GenEmbeddingSpMDMNBitLookup<
} else {
// 16-bit output
if (instSet == inst_set_t::avx2) {
#if !defined(__APPLE__) && !defined(_WIN32)
if (is_bf16_out) {
a->vpaddd(out_vreg, out_vreg, ones_vreg);
a->vpsrld(out_vreg, out_vreg, 16);
a->vpackusdw(out_vreg, out_vreg, out_vreg);
a->vpermq(out_vreg, out_vreg, 0xd8);
} else {
#endif
// round nearest with no exception
a->vcvtps2ph(out_vreg.xmm(), out_vreg, 8);
#if !defined(__APPLE__) && !defined(_WIN32)
}
#endif
if (remainder && vec_idx + v == num_vec_regs_per_block - 1) {
if (remainder > 1) {
a->vmaskmovps(dst_addr, mask_fp16_vreg, out_vreg.xmm());
Expand All @@ -918,31 +910,23 @@ GenEmbeddingSpMDMNBitLookup<
}
} else {
if (remainder && vec_idx + v == num_vec_regs_per_block - 1) {
#if !defined(__APPLE__) && !defined(_WIN32)
if (is_bf16_out) {
// bf16
a->k(x86::k(1)).vpaddd(out_vreg, out_vreg, ones_vreg);
a->k(x86::k(1)).vpsrld(out_vreg, out_vreg, 16);
a->k(x86::k(1)).vpmovdw(dst_addr, out_vreg);
} else {
#endif
a->k(x86::k(1)).vcvtps2ph(dst_addr, out_vreg, 8);
#if !defined(__APPLE__) && !defined(_WIN32)
}
#endif
} else {
#if !defined(__APPLE__) && !defined(_WIN32)
if (is_bf16_out) {
// bf16
a->vpaddd(out_vreg, out_vreg, ones_vreg);
a->vpsrld(out_vreg, out_vreg, 16);
a->vpmovdw(dst_addr, out_vreg);
} else {
#endif
a->vcvtps2ph(dst_addr, out_vreg, 8);
#if !defined(__APPLE__) && !defined(_WIN32)
}
#endif
}
}
}
Expand Down

0 comments on commit 81ee62d

Please sign in to comment.