Skip to content

Commit e3a81e1

Browse files
authored
[GPU] Canonicalize 3d shape for onednn conv/deconv post operations (#32391)
### Description of the issue(symptom, root-cause, how it was resolved) - onednn 3d conv post-op mem_desc needs to be canonicalized to 4d when conv output is blocked #### The code and line that caused this issue (if it is not changed directly) - src/plugins/intel_gpu/src/graph/program_node.cpp #### Reproduction step and snapshot (if applicable. Do not attach for customer model) - reproduction step and model are attached in the ticket. ``` // need to convert IR: embedding_model.onnx -> FP32 -> INT8 $ ovc embedding_model.onnx --output_model model_FP32/embedding_model.xml --input "input[?,50,29]" --compress_to_fp16 False $ python int8_quantization.py // Run test $ python openvino_script.py --device GPU.1 --model ov_onnx_model/int8/model_INT8.xml --batch 1 ``` #### Problematic graph It doesn't rely on graph patterns. #### Checklist - [ ] Is it a proper fix? (not a workaround) - [x] Did you include test case for this fix, if necessary? - [x] Did you review existing test that can be extended to cover this scenario? Which test did you review? -- No test for this issue. ### Tickets: - 174583
1 parent a46f046 commit e3a81e1

File tree

2 files changed

+69
-3
lines changed

2 files changed

+69
-3
lines changed

src/plugins/intel_gpu/src/graph/program_node.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,6 +1679,17 @@ void program_node::create_onednn_primitive_attributes(
16791679
// ********************************* Common case with output range usage ********************************* //
16801680
const auto& q_param = desc.get_typed_fuse_params<QuantizeFuseParams>();
16811681
if (q_param->_per_tensor_output_range && q_param->_out_lo < q_param->_out_hi) {
1682+
auto generate_onednn_memory_desc = [this](const cldnn::layout& lay) -> dnnl::memory::desc {
1683+
if (this->is_type<gemm>() || this->is_type<fully_connected>()) {
1684+
return onednn::layout_to_memory_desc(lay, onednn::get_default_data_format(lay));
1685+
} else {
1686+
auto mem_flag = cldnn::format::is_blocked(this->get_output_layout().format) ?
1687+
onednn::mem_flags::need_blocked : onednn::mem_flags::None;
1688+
return onednn::layout_to_memory_desc(lay, dnnl::memory::format_tag::undef, mem_flag);
1689+
}
1690+
};
1691+
1692+
16821693
// 1. pre-scale & pre-shift
16831694
{
16841695
if (q_param->_per_tensor_input_scale && q_param->_per_tensor_input_shift) {
@@ -1691,7 +1702,8 @@ void program_node::create_onednn_primitive_attributes(
16911702
} else {
16921703
auto in_scale = get_input_layout(dep_idx++);
16931704
resize_layout_for_fc(this, in_scale);
1694-
dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, onednn::get_default_data_format(in_scale));
1705+
1706+
dnnl::memory::desc in_scale_desc = generate_onednn_memory_desc(in_scale);
16951707
post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
16961708
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, onednn::get_default_data_format(in_scale), false,
16971709
in_scale_desc.get_dims(), in_scale_desc.get_data_type());
@@ -1704,7 +1716,8 @@ void program_node::create_onednn_primitive_attributes(
17041716
} else {
17051717
auto in_shift = get_input_layout(dep_idx++);
17061718
resize_layout_for_fc(this, in_shift);
1707-
dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, onednn::get_default_data_format(in_shift));
1719+
1720+
dnnl::memory::desc in_shift_desc = generate_onednn_memory_desc(in_shift);
17081721
post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
17091722
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, onednn::get_default_data_format(in_shift), false,
17101723
in_shift_desc.get_dims(), in_shift_desc.get_data_type());
@@ -1737,7 +1750,8 @@ void program_node::create_onednn_primitive_attributes(
17371750
} else {
17381751
auto out_scale = get_input_layout(dep_idx++);
17391752
resize_layout_for_fc(this, out_scale);
1740-
dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, onednn::get_default_data_format(out_scale));
1753+
1754+
dnnl::memory::desc out_scale_desc = generate_onednn_memory_desc(out_scale);
17411755
post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
17421756
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, onednn::get_default_data_format(out_scale), false,
17431757
out_scale_desc.get_dims(), out_scale_desc.get_data_type());

src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10040,6 +10040,58 @@ TEST(convolution_gpu_onednn, spatial_1d) {
1004010040
}
1004110041
}
1004210042

10043+
TEST(convolution_gpu_onednn, spatial_1d_quantize_post_ops_blocked_format) {
10044+
auto& engine = get_test_engine();
10045+
if (!engine.get_device_info().supports_immad)
10046+
return;
10047+
10048+
auto in_layout = layout{{ov::Dimension(), 29, 50}, data_types::f32, format::bfyx};
10049+
10050+
tests::random_generator rg(GET_SUITE_NAME);
10051+
10052+
layout input_mem_layout{ {1, 29, 50}, data_types::f32, format::bfyx };
10053+
auto input_mem_data = rg.generate_random_1d<float>(input_mem_layout.count(), -1, 1);
10054+
auto input_mem = engine.allocate_memory(input_mem_layout);
10055+
set_values(input_mem, input_mem_data);
10056+
10057+
layout weights_layout{ {64, 29, 3}, data_types::i8, format::bfyx };
10058+
auto weights_data = rg.generate_random_1d<int8_t>(weights_layout.count(), -125, 125);
10059+
auto weights_mem = engine.allocate_memory(weights_layout);
10060+
set_values(weights_mem, weights_data);
10061+
10062+
auto input_low = engine.allocate_memory({ {1, 64, 1}, data_types::f32, format::bfyx });
10063+
auto input_high = engine.allocate_memory({ {1, 64, 1}, data_types::f32, format::bfyx });
10064+
auto output_low = engine.allocate_memory({ {1, 1, 1}, data_types::f32, format::bfyx });
10065+
auto output_high = engine.allocate_memory({ {1, 1, 1}, data_types::f32, format::bfyx });
10066+
set_values(input_low, { 0.0f });
10067+
set_values(input_high, { 40.0f });
10068+
set_values(output_low, { 0.0f });
10069+
set_values(output_high, { 255.0f });
10070+
10071+
topology t(input_layout("input", in_layout),
10072+
data("weights", weights_mem),
10073+
reorder("reorder_fsv32", input_info("input"), format::b_fs_yx_fsv32, data_types::u8),
10074+
convolution("conv", input_info("reorder_fsv32"), "weights", "", "", "", "",
10075+
0, ov::Strides{1}, ov::Strides{1}, ov::CoordinateDiff{0}, ov::CoordinateDiff{0}, false, data_types::f32),
10076+
data("in_lo", input_low),
10077+
data("in_hi", input_high),
10078+
data("out_lo", output_low),
10079+
data("out_hi", output_high),
10080+
quantize("quantize", input_info("conv"), input_info("in_lo"), input_info("in_hi"),
10081+
input_info("out_lo"), input_info("out_hi"), 255, data_types::u8),
10082+
reorder("reorder", input_info("quantize"), format::bfyx, data_types::f32 ));
10083+
10084+
ExecutionConfig config = get_test_default_config(engine);
10085+
ov::intel_gpu::ImplementationDesc conv_impl_test_blocked = { format::b_fs_yx_fsv32, "", impl_types::onednn };
10086+
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl_test_blocked } }));
10087+
config.set_property(ov::intel_gpu::optimize_data(true));
10088+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
10089+
10090+
network network_test_blocked(engine, t, config);
10091+
network_test_blocked.set_input_data("input", input_mem);
10092+
network_test_blocked.execute();
10093+
}
10094+
1004310095
struct convolution_gpu_onednn_both_shapes : public ::testing::TestWithParam<bool> {
1004410096
};
1004510097

0 commit comments

Comments
 (0)