Skip to content

Commit

Permalink
PR#24893
Browse files Browse the repository at this point in the history
openvinotoolkit#24893

Signed-off-by: Chen, Peter <[email protected]>
  • Loading branch information
peterchen-intel committed Sep 25, 2024
1 parent c2d931a commit 5d2d0fc
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,17 @@ bool TuneParamsSelector::VerifyTuneParams(const fully_connected_params& params,
return false;

auto batch_size = params.is_shape_agnostic ? Align(output_b, tparams.tile_b) : output_b;
if (batch_size % (tparams.tile_b * tparams.dispatch_bsv) != 0)
return false;
// If batch size is prime number, still can apply tile execution to avoid poor performance.
if (batch_size % (tparams.tile_b * tparams.dispatch_bsv) != 0) {
if ((tparams.dispatch_bsv != 1) || batch_size == 1)
return false;
size_t tile = simd;
while (batch_size % tile != 0)
tile--;
if (tile > 1)
return false;
}

if (CeilDiv(output_f, tparams.tile_ofm * simd) % tparams.dispatch_fsv != 0)
return false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1095,6 +1095,59 @@ TEST(fully_connected_gpu, bf_tiled_with_pad) {
}
}

TEST(fully_connected_gpu, bf_tiled_with_unaligned_batch) {
tests::random_generator rg(GET_SUITE_NAME);
auto& engine = get_test_engine();
// Test parameters with unaligned batch size.
const int batch_num = 17;
const int feature_num = 1;
const int input_x = 1;
const int input_y = 64;
const int output_y = input_y;

// Allocate memory
auto input_mem =
engine.allocate_memory({{batch_num, feature_num, input_y, input_x}, data_types::f16, format::bfyx});
auto weights_mem = engine.allocate_memory({{output_y, input_y}, data_types::f16, format::bfyx});

// Generate random input data and set values
auto input_data = rg.generate_random_4d<ov::float16>(batch_num, feature_num, input_y, input_x, -1, 1);
auto weights_data = rg.generate_random_4d<ov::float16>(output_y, input_y, 1, 1, -1, 1);

auto input_data_bfyx = flatten_4d(format::bfyx, input_data);
auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);

std::vector<ov::float16> empty_bias(output_y, 0);
set_values(input_mem, input_data_bfyx);
set_values(weights_mem, weights_data_bfyx);
auto reference_output = dynamic_fully_connected_reference_calc<ov::float16>(batch_num * feature_num,
input_y,
output_y,
input_data_bfyx,
weights_data_bfyx,
empty_bias);
topology topology(input_layout("input", input_mem->get_layout()),
data("weights", weights_mem),
fully_connected("fc_prim", input_info("input"), "weights", "", 3, 3));

// Set data optimization to allow weights reordering to optimal format
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
ov::intel_gpu::ImplementationDesc fc_impl_desc = {format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl};
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"fc_prim", fc_impl_desc}}));

network network(engine, topology, config);
network.set_input_data("input", input_mem);

auto outputs = network.execute();
auto output_mem = outputs.at("fc_prim").get_memory();
cldnn::mem_lock<ov::float16> output_ptr(output_mem, get_test_stream());
ASSERT_EQ(output_mem->count(), batch_num * feature_num * output_y);

for (size_t i = 0; i < batch_num * feature_num * output_y; ++i) {
ASSERT_FLOAT_EQ(reference_output[i], output_ptr[i]) << " i = " << i;
}
}

TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34)
{
Expand Down

0 comments on commit 5d2d0fc

Please sign in to comment.