[GPU] Fix weights conversion of 1d grouped conv (#32568)

wilson-seok · web-flow · commit 9b3e4055a052 · 2025-11-07T08:44:23.000Z
### Description of the issue(symptom, root-cause, how it was resolved) - Static shape int8 model with 1d grouped convolution doesn't have proper weights dimension extension and group shape inference fails. - fp16 model has no error because weights of 1d grouped conv is constant and weights dimension extension happens in CreateConstantOp() properly while int8 model has scaling(multiply) between constant and weights input of 1d grouped conv. - Added case for 1d grouped conv when weights layout is calculated. #### The code and line that caused this issue (if it is not changed directly) - src/plugins/intel_gpu/src/graph/convolution.cpp - src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp #### Reproduction step and snapshot (if applicable. Do not attach for customer model) - $ benchmark_app … - ... #### Problematic graph - <img width="1043" height="464" alt="image" src="https://github.com/user-attachments/assets/3f08faa0-92b3-4925-9255-9d7995b42a2e" /> #### Checklist - [x] Is it a proper fix? (not a workaround) - [x] Did you include test case for this fix, if necessary? - [x] Did you review existing test that can be extended to cover this scenario? Which test did you review? ### Tickets: - 175374
diff --git a/src/plugins/intel_gpu/src/graph/convolution.cpp b/src/plugins/intel_gpu/src/graph/convolution.cpp
@@ -126,8 +126,14 @@ std::vector<layout> calc_output_layout_impl(convolution_node const& node, kernel
         auto& weights_shape = input_shapes[1];
         // WA for legacy flow, mostly for unit tests as sometimes grouped conv has non-grouped weights
         if (legacy_flow && input_shapes[1].size() == 4 && input_shapes[0].size() == 4) {
-            weights_shape.insert(weights_shape.begin(), desc->groups);
-            weights_shape[1] /= desc->groups;
+            // Extend grouped 1d conv weights shape from 4d to 5d when conv input shape is canonicalized to 4d by allow_new_shape_infer=false
+            if (desc->grouped_weights_shape && desc->groups > 1 && (static_cast<int64_t>(desc->groups) == input_shapes[1][0].get_length())) {
+                // 1d convolution with groups, e.g. shape [g,oc,ic,x] -> [g,oc,ic,x,1]
+                weights_shape.insert(weights_shape.end(), 1);
+            } else {
+                weights_shape.insert(weights_shape.begin(), desc->groups);
+                weights_shape[1] /= desc->groups;
+            }
         }
         output_shapes = ov::op::v1::shape_infer(&op, input_shapes, pads_begin, pads_end);
     } else {
@@ -207,6 +213,17 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
 
     auto filter_inst = node.weights().get_output_layout().convert_to_weights_layout(argument->grouped_weights_shape);
 
+    // Extend grouped 1d conv weights shape from 4d to 5d when conv input shape is canonicalized to 4d by allow_new_shape_infer=false
+    const bool needs_filter_extension = !network.get_program()->is_new_shape_infer() &&
+                                        argument->grouped_weights_shape &&
+                                        argument->groups > 1 &&
+                                        filter_inst.get_rank() == 4 &&
+                                        !format::is_grouped(filter_inst.format);
+
+    if (needs_filter_extension) {
+        filter_inst = extend_weights_layout_to_5d(filter_inst);
+    }
+
     if (bias_term()) {
         auto bias_inst = node.bias().get_output_layout();
         CLDNN_ERROR_NOT_EQUAL(node.id(),
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp
@@ -81,14 +81,27 @@ struct convolution_impl : typed_primitive_impl_ocl<convolution> {
         conv_params.deformable_groups = deformable_groups;
 
         conv_params.groups = groups;
+        conv_params.grouped_weights_shape = primitive->grouped_weights_shape;
 
         auto deform_conv_dep_offset = primitive->deformable_mode ? 1 : 0;
         if (primitive->input.size() == 3)
             deform_conv_dep_offset++;
 
-        const auto& weights_layout = impl_param.input_layouts[1 + 0 + deform_conv_dep_offset]
+        const size_t weights_input_idx = 1 + deform_conv_dep_offset;
+        auto weights_layout = impl_param.input_layouts[weights_input_idx]
                                                .convert_to_weights_layout(primitive->grouped_weights_shape);
 
+        // Extend grouped 1d conv weights shape from 4d to 5d when conv input shape is canonicalized to 4d by allow_new_shape_infer=false
+        const bool needs_weights_extension = !impl_param.get_program().is_new_shape_infer() &&
+                                             groups > 1 &&
+                                             weights_layout.get_rank() == 4 &&
+                                             conv_params.grouped_weights_shape;
+
+        if (needs_weights_extension) {
+            weights_layout = extend_weights_layout_to_5d(weights_layout);
+            conv_params.weights = convert_weights_tensor(weights_layout, true);
+        }
+
         ov::CoordinateDiff pads_begin(primitive->padding_begin.begin(), primitive->padding_begin.end());
         ov::CoordinateDiff pads_end(primitive->padding_end.begin(), primitive->padding_end.end());
         const auto auto_pad = primitive->auto_pad;
diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h
@@ -195,4 +195,20 @@ class typed_primitive_inst<convolution> : public typed_primitive_inst_base<convo
 
 using convolution_inst = typed_primitive_inst<convolution>;
 
+// Helper function to extend 4D weights layout to 5D for grouped 1D convolutions
+// Used when conv input shape is canonicalized to 4D by allow_new_shape_infer=false
+inline layout extend_weights_layout_to_5d(const layout& weights_layout) {
+    OPENVINO_ASSERT(!weights_layout.is_dynamic());
+    auto current_shape = weights_layout.get_shape();
+    std::vector<size_t> new_shape(current_shape.begin(), current_shape.end());
+    new_shape.push_back(1);  // Extend with dimension of size 1
+
+    ov::PartialShape new_pshape(new_shape);
+    auto new_format = weights_layout.format == format::oiyx
+                     ? format::get_default_format(5, true, true)
+                     : weights_layout.format;
+
+    return layout(new_pshape, weights_layout.data_type, new_format);
+}
+
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_params.h
@@ -30,6 +30,7 @@ struct convolution_params : public weight_bias_zero_point_params {
     bool bilinear_interpolation_pad {false};
     bool deformable_mask_enabled {false};
     bool has_explicit_paddings {false};
+    bool grouped_weights_shape {false};
     DataTensor intermediate_tensor;
 
     std::string to_string() const override;
diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/group_convolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/group_convolution.cpp
@@ -0,0 +1,183 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "common_test_utils/ov_tensor_utils.hpp"
+#include "common_test_utils/file_utils.hpp"
+#include "shared_test_classes/single_op/group_convolution.hpp"
+#include "common_test_utils/node_builders/group_convolution.hpp"
+
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/result.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/convert.hpp"
+
+namespace {
+using ov::test::InputShape;
+using ov::test::groupConvSpecificParams;
+
+typedef std::tuple<
+        groupConvSpecificParams,
+        ov::element::Type,     // Net precision
+        ov::element::Type,     // Input precision
+        ov::element::Type,     // Output precision
+        ov::element::Type,     // Weights precision
+        InputShape,            // Input shape
+        bool,                  // Weights scaling
+        std::string            // Device name
+> groupConvLayerTestParamsSet;
+
+class GroupConvolutionLayerGPUTest : public testing::WithParamInterface<groupConvLayerTestParamsSet>,
+                                     virtual public ov::test::SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<groupConvLayerTestParamsSet>& obj) {
+        const auto& [convParams, netType, inType, outType, weightsType, inputShape, weightsScaling, targetDevice] = obj.param;
+
+        const auto& [kernel, stride, padBegin, padEnd, dilation, convOutChannels, group, padType] = convParams;
+
+        std::ostringstream result;
+        result << "IS=";
+        result  << ov::test::utils::partialShape2str({inputShape.first}) << "_";
+        result << "TS=(";
+        for (const auto& shape : inputShape.second) {
+            result << ov::test::utils::vec2str(shape) << "_";
+        }
+        result << ")_";
+        result << "K" << ov::test::utils::vec2str(kernel) << "_";
+        result << "S" << ov::test::utils::vec2str(stride) << "_";
+        result << "PB" << ov::test::utils::vec2str(padBegin) << "_";
+        result << "PE" << ov::test::utils::vec2str(padEnd) << "_";
+        result << "D=" << ov::test::utils::vec2str(dilation) << "_";
+        result << "O=" << convOutChannels << "_";
+        result << "G=" << group << "_";
+        result << "AP=" << padType << "_";
+        result << "netPRC=" << netType << "_";
+        result << "inPRC=" << inType << "_";
+        result << "outPRC=" << outType << "_";
+        result << "weightsPRC=" << weightsType << "_";
+        result << "weightsScaling=" << weightsScaling << "_";
+        result << "trgDev=" << targetDevice;
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        const auto& [groupConvParams, netType, _inType, _outType, weightsType, inputShape, weightsScaling, _targetDevice] = this->GetParam();
+        inType = _inType;
+        outType = _outType;
+        targetDevice = _targetDevice;
+
+        init_input_shapes({inputShape});
+
+        const auto& [_kernel, stride, padBegin, padEnd, dilation, convOutChannels, group, padType] = groupConvParams;
+        auto kernel = _kernel;
+
+        ov::ParameterVector inputParams;
+        for (auto&& shape : inputDynamicShapes)
+            inputParams.push_back(std::make_shared<ov::op::v0::Parameter>(inType, shape));
+
+        std::shared_ptr<ov::Node> groupConvolutionNode;
+        if (weightsScaling) {
+            size_t convInChannels = static_cast<size_t>(targetStaticShapes.front()[0][1] / group);
+            ov::Shape filter_weights_shape = {group, convOutChannels, convInChannels};
+            filter_weights_shape.insert(filter_weights_shape.end(), kernel.begin(), kernel.end());
+            ov::Shape scaling_shape = {group, convOutChannels, 1, 1};
+            auto weights_tensor = ov::test::utils::create_and_fill_tensor(weightsType,
+                 filter_weights_shape, ov::test::utils::InputGenerateData(-127, 256, 256, 1));
+            auto scaling_tensor = ov::test::utils::create_and_fill_tensor(netType, scaling_shape, ov::test::utils::InputGenerateData(0, 1, 8092, 1));
+            auto filter_weights_node = std::make_shared<ov::op::v0::Constant>(weights_tensor);
+            auto convert_node = std::make_shared<ov::op::v0::Convert>(filter_weights_node, netType);
+            auto scaling_node = std::make_shared<ov::op::v0::Constant>(scaling_tensor);
+            auto multiply_node = std::make_shared<ov::op::v1::Multiply>(convert_node, scaling_node);
+            groupConvolutionNode = ov::test::utils::make_group_convolution(inputParams.front(), multiply_node, netType, stride, padBegin,
+                                                                           padEnd, dilation, padType);
+        } else {
+            groupConvolutionNode = ov::test::utils::make_group_convolution(inputParams.front(), netType, kernel, stride, padBegin,
+                                                                           padEnd, dilation, padType, convOutChannels, group);
+        }
+
+
+        ov::ResultVector results;
+        for (size_t i = 0; i < groupConvolutionNode->get_output_size(); i++)
+                results.push_back(std::make_shared<ov::op::v0::Result>(groupConvolutionNode->output(i)));
+
+        function = std::make_shared<ov::Model>(results, inputParams, "GroupConvolution");
+
+        if (netType == ov::element::f16) {
+            abs_threshold = 0.1;
+            rel_threshold = 0.1;
+        } else {
+            abs_threshold = 0.005;
+            rel_threshold = 0.005;
+        }
+    }
+
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+
+        for (size_t i = 0lu; i < funcInputs.size(); i++) {
+            const auto& funcInput = funcInputs[i];
+            ov::test::utils::InputGenerateData in_data;
+            in_data.start_from = -10;
+            in_data.resolution = 8092;
+            in_data.range = 20u;
+
+            ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], in_data);
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+};
+
+const InputShape input_shapes_1d = {
+        {10, 32, 3}, {{10, 32, 3}}
+    };
+
+const InputShape input_shapes_depthwise = {
+        {10, 64, 3}, {{10, 64, 3}}
+    };
+
+TEST_P(GroupConvolutionLayerGPUTest, Inference) {
+    run();
+}
+
+// Check 3D input tensor for convolution is handled properly and its output is correct comparing with ov runtime.
+INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolutionLayerGPUTest_1D_basic,
+                         GroupConvolutionLayerGPUTest,
+                         ::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t>{3}),
+                                                               ::testing::Values(std::vector<size_t>{1}),
+                                                               ::testing::Values(std::vector<ptrdiff_t>{2}),
+                                                               ::testing::Values(std::vector<ptrdiff_t>{0}),
+                                                               ::testing::Values(std::vector<size_t>{1}),
+                                                               ::testing::Values(4),
+                                                               ::testing::Values(32),
+                                                               ::testing::Values(ov::op::PadType::EXPLICIT)),
+                                            ::testing::Values(ov::element::f16),
+                                            ::testing::Values(ov::element::f16),
+                                            ::testing::Values(ov::element::dynamic),
+                                            ::testing::Values(ov::element::i8),
+                                            ::testing::Values(input_shapes_1d),
+                                            ::testing::Values(true),
+                                            ::testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),
+                         GroupConvolutionLayerGPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolutionLayerGPUTest_1D_depthwise,
+                         GroupConvolutionLayerGPUTest,
+                         ::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t>{3}),
+                                                               ::testing::Values(std::vector<size_t>{1}),
+                                                               ::testing::Values(std::vector<ptrdiff_t>{2}),
+                                                               ::testing::Values(std::vector<ptrdiff_t>{0}),
+                                                               ::testing::Values(std::vector<size_t>{1}),
+                                                               ::testing::Values(1),
+                                                               ::testing::Values(64),
+                                                               ::testing::Values(ov::op::PadType::EXPLICIT)),
+                                            ::testing::Values(ov::element::f16),
+                                            ::testing::Values(ov::element::f16),
+                                            ::testing::Values(ov::element::dynamic),
+                                            ::testing::Values(ov::element::i8),
+                                            ::testing::Values(input_shapes_depthwise),
+                                            ::testing::Values(true),
+                                            ::testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),
+                         GroupConvolutionLayerGPUTest::getTestCaseName);
+}  // namespace