Skip to content

Commit e5f22f7

Browse files
committed
[CPU] Get rid of 'withBias' flag in FullyConnected node
Not necessery anymore, since bias is always present and it is either empty or not
1 parent a051636 commit e5f22f7

File tree

10 files changed

+32
-39
lines changed

10 files changed

+32
-39
lines changed

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -759,12 +759,14 @@ DnnlShapeAgnosticDataPtr DnnlConvolutionPrimitive::createShapeAgnosticData(const
759759
OPENVINO_ASSERT(!cacheWeightsWithUndefData,
760760
"dnnl convolution weights caching for dynamic shapes is not implemented");
761761

762+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
763+
762764
ConvAttrs attrs{{1},
763765
{0},
764766
{0},
765767
{0},
766768
AutoPaddingType::None,
767-
fcAttrs.withBias,
769+
hasBias,
768770
fcAttrs.weightsNonTransposed,
769771
false,
770772
false,
@@ -880,7 +882,6 @@ DnnlMemoryDescPtr DnnlConvolutionPrimitive::makeTransposedWeightDescriptor(const
880882
const DnnlMemoryDescPtr& dstDesc,
881883
const ConvAttrs& attrs) {
882884
FCAttrs fcAttrs{};
883-
fcAttrs.withBias = attrs.withBias;
884885
fcAttrs.weightsNonTransposed = attrs.weightsNonTransposed;
885886

886887
return DnnlFCPrimitive::makeTransposedWeightDescriptor(srcDesc, dstDesc, fcAttrs);

src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ namespace ov::intel_cpu {
1414

1515
// @todo require explicit initialization of all the attributes?
1616
struct FCAttrs {
17-
// @todo probably we don't want with bias flag, since this information is already
18-
// a part of src memory descs
19-
bool withBias = false;
2017
bool weightsNonTransposed = false;
2118
bool sparseWeights = false;
2219
uint64_t dynamicQuantizationGroupSize = 0;

src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,9 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
278278
const std::shared_ptr<DnnlShapeAgnosticData>& shareAgnosticData) const {
279279

280280
const bool fcSemantic = true;
281+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
281282
ConvAttrs convAttrs{{1}, {0}, {0}, {0},
282-
AutoPaddingType::None, attrs.withBias, attrs.weightsNonTransposed,
283+
AutoPaddingType::None, hasBias, attrs.weightsNonTransposed,
283284
false, false, fcSemantic, false, ZeroPointsType::None, {}, attrs.postOps};
284285

285286
auto primitive =
@@ -366,9 +367,7 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
366367
VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS);
367368
VERIFY(all_of(f32, srcType(config), dstType(config)), UNSUPPORTED_SRC_PRECISIONS);
368369
VERIFY(any_of(weiType(config), f32, i8, i4), UNSUPPORTED_WEI_PRECISIONS);
369-
if (config.attrs.withBias) {
370-
VERIFY(biaType(config) == f32, UNSUPPORTED_SRC_PRECISIONS);
371-
}
370+
VERIFY(implication(hasBias(config), biaType(config) == f32), UNSUPPORTED_SRC_PRECISIONS);
372371
VERIFY(weiRank(config) == 2U, UNSUPPORTED_WEI_RANK);
373372
VERIFY(MatMulKleidiAIExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR);
374373

src/plugins/intel_cpu/src/nodes/executors/implementation_utils.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ ov::element::Type memoryDescType(const Config& config) {
3333
return config.descs.at(idx)->getPrecision();
3434
}
3535

36+
template <typename Config>
37+
bool hasBias(const Config& config) {
38+
return !config.descs.at(ARG_BIAS)->empty();
39+
}
40+
3641
template <typename Config>
3742
ov::element::Type srcType(const Config& config) {
3843
return memoryDescType<Config, ARG_SRC>(config);

src/plugins/intel_cpu/src/nodes/executors/kleidiai/kleidiai_mm.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "nodes/executors/executor.hpp"
2828
#include "nodes/executors/fullyconnected_config.hpp"
2929
#include "nodes/executors/memory_arguments.hpp"
30+
#include "openvino/core/except.hpp"
3031
#include "openvino/core/parallel.hpp"
3132
#include "openvino/core/type/element_type.hpp"
3233
#include "utils/cpu_utils.hpp"
@@ -66,23 +67,23 @@ bool MatMulKleidiAIExecutor::supports(const FCConfig& config) {
6667

6768
MatMulKleidiAIExecutor::MatMulKleidiAIExecutor(const FCAttrs& attrs,
6869
const MemoryArgs& memory,
69-
const ExecutorContext::CPtr& context)
70-
: m_attrs(attrs),
71-
m_memoryArgs(memory) {
70+
const ExecutorContext::CPtr& context) {
7271
auto srcMem = memory.at(ARG_SRC);
7372
auto weiMem = memory.at(ARG_WEI);
7473
auto weiDims = weiMem->getDesc().getShape().getDims();
7574
auto N = weiDims[0];
7675
auto K = weiDims[1];
7776

78-
bool hasBias = memory.at(ARG_BIAS)->getDataAs<float>() != nullptr;
79-
if (!hasBias) {
77+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
78+
79+
if (hasBias) {
80+
biasMem = memory.at(ARG_BIAS);
81+
} else {
8082
auto biasDesc = std::make_shared<CpuBlockedMemoryDesc>(f32, Shape({N}));
8183
biasMem = std::make_shared<Memory>(context->getEngine(), biasDesc);
8284
biasMem->nullify();
83-
} else {
84-
biasMem = memory.at(ARG_BIAS);
8585
}
86+
8687
if (memory.at(ARG_SRC)->getPrecision() != memory.at(ARG_WEI)->getPrecision()) {
8788
aclfcAttrs.isConvertedWeights = true;
8889
}
@@ -384,15 +385,8 @@ void MatMulKleidiAIExecutor::execute(const MemoryArgs& memory) {
384385
}
385386
}
386387

387-
void MatMulKleidiAIExecutor::moveMemToNumaNode(int numaNodeID) {
388-
if (curNumaNode == numaNodeID) {
389-
return;
390-
}
391-
curNumaNode = numaNodeID;
392-
mbind_move(packedWeights, numaNodeID);
393-
if (m_attrs.withBias) {
394-
mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID);
395-
}
388+
void MatMulKleidiAIExecutor::moveMemToNumaNode([[maybe_unused]] int numaNodeID) {
389+
OPENVINO_THROW_NOT_IMPLEMENTED("'moveMemToNumaNode' is not implemented by the executor");
396390
}
397391

398392
} // namespace ov::intel_cpu

src/plugins/intel_cpu/src/nodes/executors/kleidiai/kleidiai_mm.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,6 @@ class MatMulKleidiAIExecutor : public Executor {
109109
kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm,
110110
kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm};
111111

112-
const FCAttrs& m_attrs;
113-
const MemoryArgs& m_memoryArgs;
114112
DnnlScratchPadPtr scratchPad;
115113
ACLFCAttrs aclfcAttrs;
116114
MemoryPtr biasMem;
@@ -127,7 +125,6 @@ class MatMulKleidiAIExecutor : public Executor {
127125
size_t BLOCK_SIZE_M_LOWP;
128126
size_t packedlhs_block_in_bytes = 0UL;
129127
bool INT4_IMPL;
130-
int curNumaNode = -1;
131128
bool useDynamicQuant = false;
132129
};
133130

src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,7 @@ bool MlasGemmExecutor::supports(const FCConfig& config) {
103103
}
104104

105105
MlasGemmExecutor::MlasGemmExecutor(const FCAttrs& attrs, const MemoryArgs& memory, const ExecutorContext::CPtr& context)
106-
: m_attrs(attrs),
107-
m_memoryArgs(memory),
106+
: m_memoryArgs(memory),
108107
packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context, !attrs.weightsNonTransposed)),
109108

110109
N(batchDim(memory.at(ARG_WEI)->getStaticDims())),
@@ -151,7 +150,7 @@ void MlasGemmExecutor::moveMemToNumaNode(int numaNodeID) {
151150
}
152151
curNumaNode = numaNodeID;
153152
mbind_move(packedWeights, numaNodeID);
154-
if (m_attrs.withBias) {
153+
if (!m_memoryArgs.at(ARG_BIAS)->getDesc().empty()) {
155154
mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID);
156155
}
157156
}

src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ class MlasGemmExecutor : public Executor {
3232
void moveMemToNumaNode(int numaNodeID) override;
3333

3434
private:
35-
const FCAttrs& m_attrs;
3635
const MemoryArgs& m_memoryArgs;
3736
const MemoryCPtr packedWeights;
3837
int64_t M = 0, N, K;

src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "nodes/common/cpu_memcpy.h"
2020
#include "nodes/executors/executor.hpp"
2121
#include "nodes/executors/fullyconnected_config.hpp"
22+
#include "nodes/executors/implementation_utils.hpp"
2223
#include "nodes/executors/memory_arguments.hpp"
2324
#include "nodes/executors/shl/shl_utils.hpp"
2425
#include "openvino/core/except.hpp"
@@ -83,7 +84,7 @@ bool ShlFCExecutor::supports(const FCConfig& config) {
8384
return false;
8485
}
8586

86-
if (config.attrs.withBias) {
87+
if (hasBias(config)) {
8788
const auto& biaDesc = config.descs.at(ARG_BIAS);
8889
if (biaDesc->getPrecision() != ov::element::f32) {
8990
DEBUG_LOG("ShlFCExecutor: supports only f32 bias");
@@ -104,7 +105,9 @@ bool ShlFCExecutor::supports(const FCConfig& config) {
104105
return true;
105106
}
106107

107-
ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, const ExecutorContext::CPtr& context)
108+
ShlFCExecutor::ShlFCExecutor([[maybe_unused]] const FCAttrs& attrs,
109+
const MemoryArgs& memory,
110+
const ExecutorContext::CPtr& context)
108111
: packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context)) {
109112
const auto& srcDesc = memory.at(ARG_SRC)->getDescPtr();
110113
const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr();
@@ -121,7 +124,8 @@ ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, con
121124
weiDesc->getShape().getStaticDims());
122125
dst = ShlTensor(sess, precisionToShlDataType(dstDesc->getPrecision()), getShlDataLayoutByMemoryDesc(dstDesc));
123126

124-
if (attrs.withBias) {
127+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
128+
if (hasBias) {
125129
const auto& biasDesc = memory.at(ARG_BIAS)->getDescPtr();
126130
bias = ShlTensor(sess,
127131
precisionToShlDataType(biasDesc->getPrecision()),

src/plugins/intel_cpu/src/nodes/fullyconnected.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -561,8 +561,6 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
561561
}
562562

563563
void FullyConnected::initSupportedPrimitiveDescriptors() {
564-
attrs.withBias = getOriginalInputPrecisionAtPort(BIAS) != ov::element::dynamic;
565-
566564
attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
567565
getOriginalInputPrecisionAtPort(DATA),
568566
context->getConfig().fcSparseWeiDecompressionRate);
@@ -642,8 +640,8 @@ void FullyConnected::needSplitMemoryForTensorParallel() {
642640
: split_horizontal(context->getEngine(), wgt, 0, tp_cfg.w_rank, tp_cfg.w_size);
643641
memory[ARG_WEI] = tp_cfg.cached_splited_weight;
644642
// bias
645-
if (attrs.withBias) {
646-
auto bias = getSrcMemoryAtPort(BIAS);
643+
const auto& bias = getSrcMemoryAtPort(BIAS);
644+
if (!bias->getDesc().empty()) {
647645
auto select_bias = split_horizontal(context->getEngine(), bias, 0, tp_cfg.w_rank, tp_cfg.w_size);
648646
tp_cfg.cached_splited_bias = std::move(select_bias);
649647
} else {

0 commit comments

Comments
 (0)