Skip to content

Commit

Permalink
Add cache conflict miss support (backend) (pytorch#2596)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: pytorch#2596

Differential Revision: D55998215
  • Loading branch information
sryap authored and facebook-github-bot committed May 22, 2024
1 parent d7a5500 commit ee00d88
Show file tree
Hide file tree
Showing 23 changed files with 1,154 additions and 479 deletions.
49 changes: 41 additions & 8 deletions fbgemm_gpu/FbgemmGpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ set(GWD_OPTIMIZERS
set(DEFUSED_OPTIMIZERS
rowwise_adagrad)

# Optimizers with the SSD support
set(SSD_OPTIMIZERS
rowwise_adagrad)

set(WEIGHT_OPTIONS
weighted
unweighted_nobag
Expand Down Expand Up @@ -143,6 +147,7 @@ set(gen_gpu_kernel_source_files
"gen_embedding_forward_split_unweighted_codegen_cuda.cu"
"gen_embedding_backward_dense_indice_weights_codegen_cuda.cu"
"gen_embedding_backward_split_indice_weights_codegen_cuda.cu"
"gen_embedding_backward_ssd_indice_weights_codegen_cuda.cu"
"gen_embedding_forward_split_weighted_vbe_codegen_cuda.cu"
"gen_embedding_forward_split_unweighted_vbe_codegen_cuda.cu"
"gen_batch_index_select_dim0_forward_codegen_cuda.cu"
Expand All @@ -153,10 +158,13 @@ set(gen_gpu_kernel_source_files
"gen_batch_index_select_dim0_backward_kernel_warp.cu"
"gen_embedding_backward_split_grad_embedding_ops.cu"
"gen_embedding_backward_split_grad_index_select.cu"
"gen_embedding_backward_common_split_device_kernel.cuh"
"gen_embedding_backward_batch_index_select_split_device_kernel.cuh"
"gen_embedding_backward_split_common_device_kernel.cuh"
"gen_embedding_backward_split_batch_index_select_device_kernel.cuh"
"gen_embedding_forward_split_weighted_gwd_codegen_cuda.cu"
"gen_embedding_forward_split_unweighted_gwd_codegen_cuda.cu"
"gen_embedding_forward_ssd_weighted_codegen_cuda.cu"
"gen_embedding_forward_ssd_unweighted_codegen_cuda.cu"
"gen_embedding_forward_ssd_unweighted_nobag_kernel_small.cu"
)

if(NOT USE_ROCM)
Expand All @@ -179,7 +187,8 @@ foreach(wdesc ${WEIGHT_OPTIONS})
"gen_embedding_backward_dense_split_${wdesc}_kernel_cta.cu"
"gen_embedding_backward_dense_split_${wdesc}_kernel_warp.cu"
"gen_embedding_forward_split_${wdesc}_kernel.cu"
"gen_embedding_backward_${wdesc}_split_device_kernel.cuh")
"gen_embedding_forward_ssd_${wdesc}_kernel.cu"
"gen_embedding_backward_split_${wdesc}_device_kernel.cuh")

foreach(etype fp32 fp16 fp8 int8 int4 int2)
list(APPEND gen_gpu_kernel_source_files
Expand All @@ -191,7 +200,7 @@ endforeach()
foreach(wdesc weighted unweighted)
list(APPEND gen_gpu_kernel_source_files
"gen_embedding_forward_split_${wdesc}_vbe_kernel.cu"
"gen_embedding_backward_${wdesc}_vbe_split_device_kernel.cuh")
"gen_embedding_backward_split_${wdesc}_vbe_device_kernel.cuh")
endforeach()

# Generate GWD files
Expand All @@ -207,22 +216,31 @@ set(gen_cpu_source_files

set(gen_python_source_files
${CMAKE_BINARY_DIR}/__init__.py
${CMAKE_BINARY_DIR}/lookup_args.py)
${CMAKE_BINARY_DIR}/lookup_args.py
${CMAKE_BINARY_DIR}/lookup_args_ssd.py
)

# For each of the optimizers, generate the backward split variant by adding
# the Python, CPU-only, GPU host, and GPU kernel source files

# Generate the Python functions only if there is the backend support
# Generate the Python functions only if there is the backend support (for all
# optimizers)
foreach(optimizer
${COMMON_OPTIMIZERS}
${CPU_ONLY_OPTIMIZERS}
${GPU_ONLY_OPTIMIZERS})
list(APPEND gen_python_source_files
"${CMAKE_BINARY_DIR}/lookup_${optimizer}.py")
list(APPEND gen_python_source_files
"${CMAKE_BINARY_DIR}/lookup_${optimizer}.py"
"${CMAKE_BINARY_DIR}/lookup_${optimizer}_pt2.py")
endforeach()

# Generate the Python functions only if there is the backend support (for SSD
# optimizers)
foreach(optimizer ${SSD_OPTIMIZERS})
list(APPEND gen_python_source_files
"${CMAKE_BINARY_DIR}/lookup_${optimizer}_ssd.py")
endforeach()

# Generate the backend API for all optimizers to preserve the backward
# compatibility
list(APPEND gen_cpu_source_files
Expand Down Expand Up @@ -285,6 +303,21 @@ foreach(optimizer ${DEFUSED_OPTIMIZERS})
"${CMAKE_BINARY_DIR}/split_embedding_optimizer_${optimizer}.py")
endforeach()

foreach(optimizer ${SSD_OPTIMIZERS})
list(APPEND gen_gpu_kernel_source_files
"gen_embedding_optimizer_${optimizer}_ssd_device_kernel.cuh"
"gen_embedding_backward_ssd_${optimizer}.cpp"
)

foreach(wdesc weighted unweighted unweighted_nobag)
list(APPEND gen_gpu_kernel_source_files
"gen_embedding_backward_${optimizer}_ssd_${wdesc}_cuda.cu"
"gen_embedding_backward_${optimizer}_ssd_${wdesc}_kernel_cta.cu"
"gen_embedding_backward_${optimizer}_ssd_${wdesc}_kernel_warp.cu")
endforeach()

endforeach()

list(APPEND gen_defused_optim_py_files
${CMAKE_BINARY_DIR}/optimizer_args.py)

Expand Down
Loading

0 comments on commit ee00d88

Please sign in to comment.