-
Notifications
You must be signed in to change notification settings - Fork 238
Open
Description
注意到代码中不带掩膜的快速卷积,带掩膜的可以类似操作吗,要如何修改
//From ImageShop
// 4個有符號的32位的數據相加的和。
inline int _mm_hsum_epi32 (__m128i V) // V3 V2 V1 V0
{
// 實測這個速度要快些,_mm_extract_epi32最慢。
__m128i T = _mm_add_epi32 (V, _mm_srli_si128 (V, 8)); // V3+V1 V2+V0 V1 V0
T = _mm_add_epi32 (T, _mm_srli_si128 (T, 4)); // V3+V1+V2+V0 V2+V0+V1 V1+V0 V0
return _mm_cvtsi128_si32 (T); // 提取低位
}
// 基於SSE的字節數據的乘法。
// <param name="Kernel">需要卷積的核矩陣。 </param>
// <param name="Conv">卷積矩陣。 </param>
// <param name="Length">矩陣所有元素的長度。 </param>
inline int IM_Conv_SIMD (unsigned char* pCharKernel, unsigned char *pCharConv, int iLength)
{
const int iBlockSize = 16, Block = iLength / iBlockSize;
__m128i SumV = _mm_setzero_si128 ();
__m128i Zero = _mm_setzero_si128 ();
for (int Y = 0; Y < Block * iBlockSize; Y += iBlockSize)
{
__m128i SrcK = _mm_loadu_si128 ((__m128i*)(pCharKernel + Y));
__m128i SrcC = _mm_loadu_si128 ((__m128i*)(pCharConv + Y));
__m128i SrcK_L = _mm_unpacklo_epi8 (SrcK, Zero);
__m128i SrcK_H = _mm_unpackhi_epi8 (SrcK, Zero);
__m128i SrcC_L = _mm_unpacklo_epi8 (SrcC, Zero);
__m128i SrcC_H = _mm_unpackhi_epi8 (SrcC, Zero);
__m128i SumT = _mm_add_epi32 (_mm_madd_epi16 (SrcK_L, SrcC_L), _mm_madd_epi16 (SrcK_H, SrcC_H));
SumV = _mm_add_epi32 (SumV, SumT);
}
int Sum = _mm_hsum_epi32 (SumV);
for (int Y = Block * iBlockSize; Y < iLength; Y++)
{
Sum += pCharKernel[Y] * pCharConv[Y];
}
return Sum;
}
//#define ORG
//From ImageShop
matResult.create (matSrc.rows - pTemplData->vecPyramid[iLayer].rows + 1,
matSrc.cols - pTemplData->vecPyramid[iLayer].cols + 1, CV_32FC1);
matResult.setTo (0);
cv::Mat& matTemplate = pTemplData->vecPyramid[iLayer];
int t_r_end = matTemplate.rows, t_r = 0;
for (int r = 0; r < matResult.rows; r++)
{
float* r_matResult = matResult.ptr<float> (r);
uchar* r_source = matSrc.ptr<uchar> (r);
uchar* r_template, *r_sub_source;
for (int c = 0; c < matResult.cols; ++c, ++r_matResult, ++r_source)
{
r_template = matTemplate.ptr<uchar> ();
r_sub_source = r_source;
for (t_r = 0; t_r < t_r_end; ++t_r, r_sub_source += matSrc.cols, r_template += matTemplate.cols)
{
*r_matResult = *r_matResult + IM_Conv_SIMD (r_template, r_sub_source, matTemplate.cols);
}
}
}
//From ImageShop
Metadata
Metadata
Assignees
Labels
No labels