Skip to content

Commit 9b3e405

Browse files
authored
[GPU] Fix weights conversion of 1d grouped conv (#32568)
### Description of the issue(symptom, root-cause, how it was resolved) - Static shape int8 model with 1d grouped convolution doesn't have proper weights dimension extension and group shape inference fails. - fp16 model has no error because weights of 1d grouped conv is constant and weights dimension extension happens in CreateConstantOp() properly while int8 model has scaling(multiply) between constant and weights input of 1d grouped conv. - Added case for 1d grouped conv when weights layout is calculated. #### The code and line that caused this issue (if it is not changed directly) - src/plugins/intel_gpu/src/graph/convolution.cpp - src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp #### Reproduction step and snapshot (if applicable. Do not attach for customer model) - $ benchmark_app … - ... #### Problematic graph - <img width="1043" height="464" alt="image" src="https://github.com/user-attachments/assets/3f08faa0-92b3-4925-9255-9d7995b42a2e" /> #### Checklist - [x] Is it a proper fix? (not a workaround) - [x] Did you include test case for this fix, if necessary? - [x] Did you review existing test that can be extended to cover this scenario? Which test did you review? ### Tickets: - 175374
1 parent 57751e7 commit 9b3e405

File tree

5 files changed

+233
-3
lines changed

5 files changed

+233
-3
lines changed

src/plugins/intel_gpu/src/graph/convolution.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,14 @@ std::vector<layout> calc_output_layout_impl(convolution_node const& node, kernel
126126
auto& weights_shape = input_shapes[1];
127127
// WA for legacy flow, mostly for unit tests as sometimes grouped conv has non-grouped weights
128128
if (legacy_flow && input_shapes[1].size() == 4 && input_shapes[0].size() == 4) {
129-
weights_shape.insert(weights_shape.begin(), desc->groups);
130-
weights_shape[1] /= desc->groups;
129+
// Extend grouped 1d conv weights shape from 4d to 5d when conv input shape is canonicalized to 4d by allow_new_shape_infer=false
130+
if (desc->grouped_weights_shape && desc->groups > 1 && (static_cast<int64_t>(desc->groups) == input_shapes[1][0].get_length())) {
131+
// 1d convolution with groups, e.g. shape [g,oc,ic,x] -> [g,oc,ic,x,1]
132+
weights_shape.insert(weights_shape.end(), 1);
133+
} else {
134+
weights_shape.insert(weights_shape.begin(), desc->groups);
135+
weights_shape[1] /= desc->groups;
136+
}
131137
}
132138
output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
133139
} else {
@@ -207,6 +213,17 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
207213

208214
auto filter_inst = node.weights().get_output_layout().convert_to_weights_layout(argument->grouped_weights_shape);
209215

216+
// Extend grouped 1d conv weights shape from 4d to 5d when conv input shape is canonicalized to 4d by allow_new_shape_infer=false
217+
const bool needs_filter_extension = !network.get_program()->is_new_shape_infer() &&
218+
argument->grouped_weights_shape &&
219+
argument->groups > 1 &&
220+
filter_inst.get_rank() == 4 &&
221+
!format::is_grouped(filter_inst.format);
222+
223+
if (needs_filter_extension) {
224+
filter_inst = extend_weights_layout_to_5d(filter_inst);
225+
}
226+
210227
if (bias_term()) {
211228
auto bias_inst = node.bias().get_output_layout();
212229
CLDNN_ERROR_NOT_EQUAL(node.id(),

src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,27 @@ struct convolution_impl : typed_primitive_impl_ocl<convolution> {
8181
conv_params.deformable_groups = deformable_groups;
8282

8383
conv_params.groups = groups;
84+
conv_params.grouped_weights_shape = primitive->grouped_weights_shape;
8485

8586
auto deform_conv_dep_offset = primitive->deformable_mode ? 1 : 0;
8687
if (primitive->input.size() == 3)
8788
deform_conv_dep_offset++;
8889

89-
const auto& weights_layout = impl_param.input_layouts[1 + 0 + deform_conv_dep_offset]
90+
const size_t weights_input_idx = 1 + deform_conv_dep_offset;
91+
auto weights_layout = impl_param.input_layouts[weights_input_idx]
9092
.convert_to_weights_layout(primitive->grouped_weights_shape);
9193

94+
// Extend grouped 1d conv weights shape from 4d to 5d when conv input shape is canonicalized to 4d by allow_new_shape_infer=false
95+
const bool needs_weights_extension = !impl_param.get_program().is_new_shape_infer() &&
96+
groups > 1 &&
97+
weights_layout.get_rank() == 4 &&
98+
conv_params.grouped_weights_shape;
99+
100+
if (needs_weights_extension) {
101+
weights_layout = extend_weights_layout_to_5d(weights_layout);
102+
conv_params.weights = convert_weights_tensor(weights_layout, true);
103+
}
104+
92105
ov::CoordinateDiff pads_begin(primitive->padding_begin.begin(), primitive->padding_begin.end());
93106
ov::CoordinateDiff pads_end(primitive->padding_end.begin(), primitive->padding_end.end());
94107
const auto auto_pad = primitive->auto_pad;

src/plugins/intel_gpu/src/graph/include/convolution_inst.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,20 @@ class typed_primitive_inst<convolution> : public typed_primitive_inst_base<convo
195195

196196
using convolution_inst = typed_primitive_inst<convolution>;
197197

198+
// Helper function to extend 4D weights layout to 5D for grouped 1D convolutions
199+
// Used when conv input shape is canonicalized to 4D by allow_new_shape_infer=false
200+
inline layout extend_weights_layout_to_5d(const layout& weights_layout) {
201+
OPENVINO_ASSERT(!weights_layout.is_dynamic());
202+
auto current_shape = weights_layout.get_shape();
203+
std::vector<size_t> new_shape(current_shape.begin(), current_shape.end());
204+
new_shape.push_back(1); // Extend with dimension of size 1
205+
206+
ov::PartialShape new_pshape(new_shape);
207+
auto new_format = weights_layout.format == format::oiyx
208+
? format::get_default_format(5, true, true)
209+
: weights_layout.format;
210+
211+
return layout(new_pshape, weights_layout.data_type, new_format);
212+
}
213+
198214
} // namespace cldnn

src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ struct convolution_params : public weight_bias_zero_point_params {
3030
bool bilinear_interpolation_pad {false};
3131
bool deformable_mask_enabled {false};
3232
bool has_explicit_paddings {false};
33+
bool grouped_weights_shape {false};
3334
DataTensor intermediate_tensor;
3435

3536
std::string to_string() const override;
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
// Copyright (C) 2018-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
#include "common_test_utils/ov_tensor_utils.hpp"
5+
#include "common_test_utils/file_utils.hpp"
6+
#include "shared_test_classes/single_op/group_convolution.hpp"
7+
#include "common_test_utils/node_builders/group_convolution.hpp"
8+
9+
#include "openvino/op/parameter.hpp"
10+
#include "openvino/op/result.hpp"
11+
#include "openvino/op/multiply.hpp"
12+
#include "openvino/op/add.hpp"
13+
#include "openvino/op/convert.hpp"
14+
15+
namespace {
16+
using ov::test::InputShape;
17+
using ov::test::groupConvSpecificParams;
18+
19+
typedef std::tuple<
20+
groupConvSpecificParams,
21+
ov::element::Type, // Net precision
22+
ov::element::Type, // Input precision
23+
ov::element::Type, // Output precision
24+
ov::element::Type, // Weights precision
25+
InputShape, // Input shape
26+
bool, // Weights scaling
27+
std::string // Device name
28+
> groupConvLayerTestParamsSet;
29+
30+
class GroupConvolutionLayerGPUTest : public testing::WithParamInterface<groupConvLayerTestParamsSet>,
31+
virtual public ov::test::SubgraphBaseTest {
32+
public:
33+
static std::string getTestCaseName(const testing::TestParamInfo<groupConvLayerTestParamsSet>& obj) {
34+
const auto& [convParams, netType, inType, outType, weightsType, inputShape, weightsScaling, targetDevice] = obj.param;
35+
36+
const auto& [kernel, stride, padBegin, padEnd, dilation, convOutChannels, group, padType] = convParams;
37+
38+
std::ostringstream result;
39+
result << "IS=";
40+
result << ov::test::utils::partialShape2str({inputShape.first}) << "_";
41+
result << "TS=(";
42+
for (const auto& shape : inputShape.second) {
43+
result << ov::test::utils::vec2str(shape) << "_";
44+
}
45+
result << ")_";
46+
result << "K" << ov::test::utils::vec2str(kernel) << "_";
47+
result << "S" << ov::test::utils::vec2str(stride) << "_";
48+
result << "PB" << ov::test::utils::vec2str(padBegin) << "_";
49+
result << "PE" << ov::test::utils::vec2str(padEnd) << "_";
50+
result << "D=" << ov::test::utils::vec2str(dilation) << "_";
51+
result << "O=" << convOutChannels << "_";
52+
result << "G=" << group << "_";
53+
result << "AP=" << padType << "_";
54+
result << "netPRC=" << netType << "_";
55+
result << "inPRC=" << inType << "_";
56+
result << "outPRC=" << outType << "_";
57+
result << "weightsPRC=" << weightsType << "_";
58+
result << "weightsScaling=" << weightsScaling << "_";
59+
result << "trgDev=" << targetDevice;
60+
61+
return result.str();
62+
}
63+
64+
protected:
65+
void SetUp() override {
66+
const auto& [groupConvParams, netType, _inType, _outType, weightsType, inputShape, weightsScaling, _targetDevice] = this->GetParam();
67+
inType = _inType;
68+
outType = _outType;
69+
targetDevice = _targetDevice;
70+
71+
init_input_shapes({inputShape});
72+
73+
const auto& [_kernel, stride, padBegin, padEnd, dilation, convOutChannels, group, padType] = groupConvParams;
74+
auto kernel = _kernel;
75+
76+
ov::ParameterVector inputParams;
77+
for (auto&& shape : inputDynamicShapes)
78+
inputParams.push_back(std::make_shared<ov::op::v0::Parameter>(inType, shape));
79+
80+
std::shared_ptr<ov::Node> groupConvolutionNode;
81+
if (weightsScaling) {
82+
size_t convInChannels = static_cast<size_t>(targetStaticShapes.front()[0][1] / group);
83+
ov::Shape filter_weights_shape = {group, convOutChannels, convInChannels};
84+
filter_weights_shape.insert(filter_weights_shape.end(), kernel.begin(), kernel.end());
85+
ov::Shape scaling_shape = {group, convOutChannels, 1, 1};
86+
auto weights_tensor = ov::test::utils::create_and_fill_tensor(weightsType,
87+
filter_weights_shape, ov::test::utils::InputGenerateData(-127, 256, 256, 1));
88+
auto scaling_tensor = ov::test::utils::create_and_fill_tensor(netType, scaling_shape, ov::test::utils::InputGenerateData(0, 1, 8092, 1));
89+
auto filter_weights_node = std::make_shared<ov::op::v0::Constant>(weights_tensor);
90+
auto convert_node = std::make_shared<ov::op::v0::Convert>(filter_weights_node, netType);
91+
auto scaling_node = std::make_shared<ov::op::v0::Constant>(scaling_tensor);
92+
auto multiply_node = std::make_shared<ov::op::v1::Multiply>(convert_node, scaling_node);
93+
groupConvolutionNode = ov::test::utils::make_group_convolution(inputParams.front(), multiply_node, netType, stride, padBegin,
94+
padEnd, dilation, padType);
95+
} else {
96+
groupConvolutionNode = ov::test::utils::make_group_convolution(inputParams.front(), netType, kernel, stride, padBegin,
97+
padEnd, dilation, padType, convOutChannels, group);
98+
}
99+
100+
101+
ov::ResultVector results;
102+
for (size_t i = 0; i < groupConvolutionNode->get_output_size(); i++)
103+
results.push_back(std::make_shared<ov::op::v0::Result>(groupConvolutionNode->output(i)));
104+
105+
function = std::make_shared<ov::Model>(results, inputParams, "GroupConvolution");
106+
107+
if (netType == ov::element::f16) {
108+
abs_threshold = 0.1;
109+
rel_threshold = 0.1;
110+
} else {
111+
abs_threshold = 0.005;
112+
rel_threshold = 0.005;
113+
}
114+
}
115+
116+
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
117+
inputs.clear();
118+
const auto& funcInputs = function->inputs();
119+
120+
for (size_t i = 0lu; i < funcInputs.size(); i++) {
121+
const auto& funcInput = funcInputs[i];
122+
ov::test::utils::InputGenerateData in_data;
123+
in_data.start_from = -10;
124+
in_data.resolution = 8092;
125+
in_data.range = 20u;
126+
127+
ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], in_data);
128+
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
129+
}
130+
}
131+
};
132+
133+
const InputShape input_shapes_1d = {
134+
{10, 32, 3}, {{10, 32, 3}}
135+
};
136+
137+
const InputShape input_shapes_depthwise = {
138+
{10, 64, 3}, {{10, 64, 3}}
139+
};
140+
141+
TEST_P(GroupConvolutionLayerGPUTest, Inference) {
142+
run();
143+
}
144+
145+
// Check 3D input tensor for convolution is handled properly and its output is correct comparing with ov runtime.
146+
INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolutionLayerGPUTest_1D_basic,
147+
GroupConvolutionLayerGPUTest,
148+
::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t>{3}),
149+
::testing::Values(std::vector<size_t>{1}),
150+
::testing::Values(std::vector<ptrdiff_t>{2}),
151+
::testing::Values(std::vector<ptrdiff_t>{0}),
152+
::testing::Values(std::vector<size_t>{1}),
153+
::testing::Values(4),
154+
::testing::Values(32),
155+
::testing::Values(ov::op::PadType::EXPLICIT)),
156+
::testing::Values(ov::element::f16),
157+
::testing::Values(ov::element::f16),
158+
::testing::Values(ov::element::dynamic),
159+
::testing::Values(ov::element::i8),
160+
::testing::Values(input_shapes_1d),
161+
::testing::Values(true),
162+
::testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),
163+
GroupConvolutionLayerGPUTest::getTestCaseName);
164+
165+
INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolutionLayerGPUTest_1D_depthwise,
166+
GroupConvolutionLayerGPUTest,
167+
::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t>{3}),
168+
::testing::Values(std::vector<size_t>{1}),
169+
::testing::Values(std::vector<ptrdiff_t>{2}),
170+
::testing::Values(std::vector<ptrdiff_t>{0}),
171+
::testing::Values(std::vector<size_t>{1}),
172+
::testing::Values(1),
173+
::testing::Values(64),
174+
::testing::Values(ov::op::PadType::EXPLICIT)),
175+
::testing::Values(ov::element::f16),
176+
::testing::Values(ov::element::f16),
177+
::testing::Values(ov::element::dynamic),
178+
::testing::Values(ov::element::i8),
179+
::testing::Values(input_shapes_depthwise),
180+
::testing::Values(true),
181+
::testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),
182+
GroupConvolutionLayerGPUTest::getTestCaseName);
183+
} // namespace

0 commit comments

Comments
 (0)