Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use Abseil OStringStream in WebGPU EP string concat #22241

Merged
merged 9 commits into from
Sep 30, 2024
Merged
27 changes: 14 additions & 13 deletions onnxruntime/contrib_ops/webgpu/bert/fast_gelu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,23 @@ Status FastGeluProgram::GenerateShaderCode(ShaderHelper& shader) const {
const auto& x = shader.AddInput("x", ShaderUsage::UseUniform | ShaderUsage::UseValueTypeAlias);
const auto& y = shader.AddOutput("y", ShaderUsage::UseUniform);

std::string add_bias = "";
shader.AdditionalImplementation() << TanhImpl;
shader.MainFunctionBody() << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")
<< " var a = " << x.GetByOffset("global_idx") << ";\n";
if (Inputs().size() > 1) {
const auto& bias = shader.AddInput("bias", ShaderUsage::UseUniform | ShaderUsage::UseShapeAndStride);
add_bias = bias_components_ == 1 ? " let bias_offset = global_idx * 4;\n"
" a += x_value_t(" +
bias.GetByOffset("bias_offset % uniforms.bias_shape") + ", " +
bias.GetByOffset("(bias_offset + 1) % uniforms.bias_shape") + ", " +
bias.GetByOffset("(bias_offset + 2) % uniforms.bias_shape") + ", " +
bias.GetByOffset("(bias_offset + 3) % uniforms.bias_shape") + ");\n"
: " a += " + bias.GetByOffset("global_idx % uniforms.bias_shape") + ";\n";
if (bias_components_ == 1) {
shader.MainFunctionBody() << " let bias_offset = global_idx * 4;\n"
" a += x_value_t("
<< bias.GetByOffset("bias_offset % uniforms.bias_shape") << ", "
<< bias.GetByOffset("(bias_offset + 1) % uniforms.bias_shape") << ", "
<< bias.GetByOffset("(bias_offset + 2) % uniforms.bias_shape") << ", "
<< bias.GetByOffset("(bias_offset + 3) % uniforms.bias_shape") << ");\n";
} else {
shader.MainFunctionBody() << " a += " << bias.GetByOffset("global_idx % uniforms.bias_shape") + ";\n";
}
}
shader.AppendImplementation(TanhImpl);
shader.SetMainFunctionBody(shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size"),
" var a = ", x.GetByOffset("global_idx"), ";\n",
add_bias,
y.SetByOffset("global_idx", onnxruntime::webgpu::FastGeluExpr));
shader.MainFunctionBody() << y.SetByOffset("global_idx", onnxruntime::webgpu::FastGeluExpr);

return Status::OK();
}
Expand Down
49 changes: 17 additions & 32 deletions onnxruntime/contrib_ops/webgpu/bert/rotary_embedding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,23 @@ Status RotaryEmbeddingProgram::GenerateShaderCode(ShaderHelper& shader) const {
// TODO: remove output_indices.
const auto& output_indices = shader.AddIndices("output_indices", false);
const auto interleaved_str = interleaved_ ? "true" : "false";
shader.SetMainFunctionBody(
" let half_rotary_emb_dim = uniforms.cos_cache_shape[1];\n"
" let bsnh = global_idx / uniforms.global_stride % uniforms.global_shape;\n"
" let size = uniforms.global_shape[0] * uniforms.global_stride[0];\n",
" if (global_idx >= size) { return; }\n"
" if (bsnh[3] < half_rotary_emb_dim) {\n"
" let position_ids_idx = " +
position_ids.BroadcastedIndicesToOffset("bsnh.xy", output_indices) + ";\n" +
" let position_id = u32(" +
position_ids.GetByOffset("position_ids_idx") + ")" +
" + select(0, bsnh[1], position_ids_idx == 0);\n"
" let i = dot(bsnh, uniforms.input_output_stride) + select(0, bsnh[3], " +
interleaved_str +
");\n"
" let j = i + select(half_rotary_emb_dim, 1, " +
interleaved_str +
");\n"
" let re = " +
input.GetByOffset("i") + " * " + cos_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") + "-" +
input.GetByOffset("j") + " * " + sin_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") + ";\n" +
" " + output.SetByOffset("i", "re") + "\n" +
" let im = " + input.GetByOffset("i") + " * " +
sin_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") +
"+ " + input.GetByOffset("j") +
" * " + cos_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") +
";\n " + output.SetByOffset("j", "im") +
"\n"
" } else { \n"
" let k = dot(bsnh, uniforms.input_output_stride) + half_rotary_emb_dim;\n" +
" " + output.SetByOffset("k", input.GetByOffset("k")) +
"\n"
" }");
shader.MainFunctionBody() << " let half_rotary_emb_dim = uniforms.cos_cache_shape[1];\n"
" let bsnh = global_idx / uniforms.global_stride % uniforms.global_shape;\n"
" let size = uniforms.global_shape[0] * uniforms.global_stride[0];\n"
" if (global_idx >= size) { return; }\n"
" if (bsnh[3] < half_rotary_emb_dim) {\n"
<< " let position_ids_idx = " << position_ids.BroadcastedIndicesToOffset("bsnh.xy", output_indices) << ";\n"
<< " let position_id = u32(" << position_ids.GetByOffset("position_ids_idx") << ") + select(0, bsnh[1], position_ids_idx == 0);\n"
<< " let i = dot(bsnh, uniforms.input_output_stride) + select(0, bsnh[3], " << interleaved_str << ");\n"
<< " let j = i + select(half_rotary_emb_dim, 1, " << interleaved_str << ");\n"
<< " let re = " << input.GetByOffset("i") << " * " << cos_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") << " - " << input.GetByOffset("j") << " * " << sin_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") << ";\n"
<< " " << output.SetByOffset("i", "re") << "\n"
<< " let im = " << input.GetByOffset("i") << " * " << sin_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") << " + " << input.GetByOffset("j") + " * " << cos_cache.GetByIndices("vec2<u32>(position_id, bsnh[3])") << ";\n"
<< " " << output.SetByOffset("j", "im") << "\n"
<< " } else { \n"
" let k = dot(bsnh, uniforms.input_output_stride) + half_rotary_emb_dim;\n"
<< " " << output.SetByOffset("k", input.GetByOffset("k")) << "\n"
<< " }";

return Status::OK();
}
Expand Down
Loading
Loading