Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
aa8c918
Modified single, multi channel audio and generated QA
HazarathKumarM Sep 4, 2025
a353c5c
Added QA support for other kernels
HazarathKumarM Sep 16, 2025
f76df76
Updated bin files for QA
HazarathKumarM Sep 16, 2025
9e79dba
Updated and modified slice bin file for QA
HazarathKumarM Sep 16, 2025
41be482
Resolved review comments and made modifications to the parameters
HazarathKumarM Sep 17, 2025
6b013c5
Merge pull request #489 from HazarathKumarM/audio_qa
r-abishek Sep 18, 2025
55c1d70
Merge branch 'develop' into ar/audio_support_qa_samples
kiritigowda Sep 22, 2025
8493196
Merge branch 'develop' into ar/audio_support_qa_samples
kiritigowda Sep 23, 2025
f204c17
Modified parameters, updated the names and output docs image
HazarathKumarM Sep 23, 2025
b97acd3
Merge branch 'develop' into ar/audio_support_qa_samples
kiritigowda Sep 23, 2025
8524127
Merge branch 'develop' into ar/audio_support_qa_samples
kiritigowda Sep 26, 2025
144d02a
Resolved review comments and modified non silent region detection to …
HazarathKumarM Sep 30, 2025
c1a69c7
Modified and used oBufferSize as base directly to the resampleBuffer …
HazarathKumarM Sep 30, 2025
c3cc332
Updated compare function for non silent region
HazarathKumarM Sep 30, 2025
881fcda
Merge pull request #498 from HazarathKumarM/audio_qa
r-abishek Oct 2, 2025
53d9900
Merge branch 'develop' into ar/audio_support_qa_samples
r-abishek Oct 2, 2025
a7ab6c9
Merge branch 'develop' into ar/audio_support_qa_samples
kiritigowda Oct 2, 2025
79d444c
Resolved review comments
HazarathKumarM Oct 7, 2025
1da1b76
Merge pull request #502 from HazarathKumarM/audio_qa
r-abishek Oct 8, 2025
be98209
Merge branch 'develop' into ar/audio_support_qa_samples
r-abishek Oct 8, 2025
a28ebf8
Merge branch 'develop' into ar/audio_support_qa_samples
kiritigowda Oct 10, 2025
7603629
Updated comments for mel filter bank
HazarathKumarM Oct 13, 2025
81b53c5
Updated comments for resample
HazarathKumarM Oct 16, 2025
c8cc526
Merge pull request #506 from HazarathKumarM/audio_qa
r-abishek Oct 16, 2025
1cd4531
Merge branch 'develop' into ar/audio_support_qa_samples
r-abishek Oct 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified docs/data/spectrogramOutput.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 11 additions & 9 deletions utilities/test_suite/HIP/Tensor_audio_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ int main(int argc, char **argv)
}

// compute maximum possible buffer size of resample
Rpp64u resampleMaxBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride * 1.15;
Rpp64u resampleMaxBufferSize = static_cast<Rpp64u>(oBufferSize * RESAMPLE_BUFFER_SCALE_FACTOR);
if (testCase == RESAMPLE)
oBufferSize = resampleMaxBufferSize;

// compute maximum possible buffer size of spectrogram
Rpp64u spectrogramMaxBufferSize = 257 * 3754 * dstDescPtr->n;
Rpp64u spectrogramMaxBufferSize = SPECTROGRAM_MAX_HEIGHT * SPECTROGRAM_MAX_WIDTH * dstDescPtr->n;
if (testCase == SPECTROGRAM)
oBufferSize = spectrogramMaxBufferSize;

Expand Down Expand Up @@ -300,11 +300,13 @@ int main(int argc, char **argv)
{
testCaseName = "resample";

Rpp32u sampleRate = 16000;
Rpp32f upsampleRatio = 1.15f;
maxDstWidth = 0;
for(int i = 0, j = 0; i < batchSize; i++, j += 2)
{
inRateTensor[i] = 16000;
outRateTensor[i] = 16000 * 1.15f;
inRateTensor[i] = sampleRate;
outRateTensor[i] = sampleRate * upsampleRatio;
Rpp32f scaleRatio = outRateTensor[i] / inRateTensor[i];
srcDimsTensor[j] = srcLengthTensor[i];
srcDimsTensor[j + 1] = channelsTensor[i];
Expand Down Expand Up @@ -348,11 +350,11 @@ int main(int argc, char **argv)
Rpp32s numFilter = 80;
bool normalize = true;
srcDimsTensor[0] = 257;
srcDimsTensor[1] = 225;
srcDimsTensor[1] = 3170;
srcDimsTensor[2] = 257;
srcDimsTensor[3] = 211;
srcDimsTensor[3] = 552;
srcDimsTensor[4] = 257;
srcDimsTensor[5] = 214;
srcDimsTensor[5] = 1131;

init_mel_filter_bank(&inputf32, &outputf32, srcDescPtr, dstDescPtr, dstDims, offsetInBytes, numFilter, batchSize, srcDimsTensor, scriptPath, testType);

Expand All @@ -375,7 +377,7 @@ int main(int argc, char **argv)
if (missingFuncFlag == 1)
{
cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
return -1;
return RPP_ERROR_NOT_IMPLEMENTED;
}

wallTime = endWallTime - startWallTime;
Expand All @@ -395,7 +397,7 @@ int main(int argc, char **argv)
if (testCase != NON_SILENT_REGION_DETECTION)
verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath, "HIP");
else
verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst);
verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, scriptPath, dst);

/* Dump the outputs to csv files for debugging
Runs only if
Expand Down
18 changes: 10 additions & 8 deletions utilities/test_suite/HOST/Tensor_audio_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ int main(int argc, char **argv)
descriptorPtr3D->offsetInBytes = 0;
descriptorPtr3D->dataType = RpptDataType::F32;
descriptorPtr3D->dims[0] = batchSize;
descriptorPtr3D->dims[1] = maxSrcWidth;
descriptorPtr3D->dims[1] = (maxSrcWidth + 7) & ~7; // Ensure a consistent dimension order between generic and typed descriptors to prevent errors.
descriptorPtr3D->strides[0] = descriptorPtr3D->dims[1];
}

Expand All @@ -138,12 +138,12 @@ int main(int argc, char **argv)
}

// compute maximum possible buffer size of resample
Rpp64u resampleMaxBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride * 1.15;
Rpp64u resampleMaxBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride * RESAMPLE_BUFFER_SCALE_FACTOR;
if (testCase == RESAMPLE)
oBufferSize = resampleMaxBufferSize;

// compute maximum possible buffer size of spectrogram
Rpp64u spectrogramMaxBufferSize = 257 * 3754 * dstDescPtr->n;
Rpp64u spectrogramMaxBufferSize = SPECTROGRAM_MAX_HEIGHT * SPECTROGRAM_MAX_WIDTH * dstDescPtr->n;
if (testCase == SPECTROGRAM)
oBufferSize = spectrogramMaxBufferSize;

Expand Down Expand Up @@ -320,11 +320,13 @@ int main(int argc, char **argv)
Rpp32f outRateTensor[batchSize];
Rpp32s srcDimsTensor[batchSize * 2];

Rpp32u sampleRate = 16000;
Rpp32f upsampleRatio = 1.15f;
maxDstWidth = 0;
for(int i = 0, j = 0; i < batchSize; i++, j += 2)
{
inRateTensor[i] = 16000;
outRateTensor[i] = 16000 * 1.15f;
inRateTensor[i] = sampleRate;
outRateTensor[i] = sampleRate * upsampleRatio;
Rpp32f scaleRatio = outRateTensor[i] / inRateTensor[i];
srcDimsTensor[j] = srcLengthTensor[i];
srcDimsTensor[j + 1] = channelsTensor[i];
Expand Down Expand Up @@ -364,7 +366,7 @@ int main(int argc, char **argv)
Rpp32s numFilter = 80;
bool normalize = true;
// (height, width) for each tensor in a batch for given QA inputs.
Rpp32s srcDimsTensor[] = {257, 225, 257, 211, 257, 214};
Rpp32s srcDimsTensor[] = {257, 3170, 257, 552, 257, 1131};

init_mel_filter_bank(&inputf32, &outputf32, srcDescPtr, dstDescPtr, dstDims, offsetInBytes, numFilter, batchSize, srcDimsTensor, scriptPath, testType);

Expand All @@ -384,7 +386,7 @@ int main(int argc, char **argv)
if (missingFuncFlag == 1)
{
cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
return -1;
return RPP_ERROR_NOT_IMPLEMENTED;
}

wallTime = endWallTime - startWallTime;
Expand All @@ -397,7 +399,7 @@ int main(int argc, char **argv)
if (testType == 0)
{
if (testCase == NON_SILENT_REGION_DETECTION)
verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst);
verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, scriptPath, dst);
else
verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath, "HOST");

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/slice/slice.bin
Binary file not shown.
Binary file not shown.
2,146 changes: 1,916 additions & 230 deletions utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/to_decibels/to_decibels.bin

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
42 changes: 20 additions & 22 deletions utilities/test_suite/rpp_test_suite_audio.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ SOFTWARE.
#include <sndfile.h>
using namespace std;

#define MEL_FILTER_BANK_MAX_HEIGHT 257 // Maximum height for mel filter bank set to 257 to ensure compatibility with test configuration
#define MEL_FILTER_BANK_MAX_HEIGHT 257 // Maximum height for mel filter bank set to 257 to ensure compatibility with test configuration
#define RESAMPLE_BUFFER_SCALE_FACTOR 1.15 // Scale factor to allocate a safe maximum buffer size for resampling, allowing for upsampling
#define SPECTROGRAM_MAX_HEIGHT 257 // Maximum height for spectrogram set to 257 to ensure compatibility with test configuration, calculated as (nfft / 2) + 1 for a standard nfft of 512
#define SPECTROGRAM_MAX_WIDTH 3170 // Maximum width for a spectrogram, pre-calculated based on the longest audio file in the test dataset

std::map<int, string> audioAugmentationMap =
{
Expand All @@ -60,14 +63,6 @@ enum Augmentation {
MEL_FILTER_BANK = 7
};

// Golden outputs for Non Silent Region Detection
std::map<string, std::vector<int>> NonSilentRegionReferenceOutputs =
{
{"sample1", {0, 35840}},
{"sample2", {0, 33680}},
{"sample3", {0, 34160}}
};

// Cutoff values for audio kernels listed for HOST backend followed by HIP
static const std::map<string, std::vector<double>> audioCutOff =
{
Expand Down Expand Up @@ -251,7 +246,6 @@ void replicate_src_dims_to_fill_batch(Rpp32s *srcDimsTensor, int numSamples, int
// Compares output with reference outputs and validates QA
void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dstDims, string testCase, string dst, string scriptPath, string backend)
{
fstream refFile;
int fileMatch = 0;

// read data from golden outputs
Expand Down Expand Up @@ -343,24 +337,26 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst
}

// Compares output with reference outputs and validates QA for non silent region
void verify_non_silent_region_detection(int *detectedIndex, int *detectionLength, string testCase, int bs, vector<string> audioNames, string dst)
void verify_non_silent_region_detection(int *detectedIndex, int *detectionLength, string testCase, int bs, string scriptPath, string dst)
{
int fileMatch = 0;
// read data from golden outputs
string outFile = scriptPath + "/../REFERENCE_OUTPUTS_AUDIO/" + testCase + "/" + testCase + ".bin";
std::fstream fin(outFile, std::ios::in | std::ios::binary);
if(!fin.is_open())
{
cout << "\nUnable to get the reference outputs for the file specified!" << endl;
return;
}
Rpp32s *refOutput = (Rpp32s *)malloc(bs * 2 * sizeof(Rpp32s));
fin.read(reinterpret_cast<char*>(refOutput), bs * 2 * sizeof(Rpp32s));

for (int i = 0; i < bs; i++)
{
string currentFileName = audioNames[i];
size_t lastIndex = currentFileName.find_last_of(".");
currentFileName = currentFileName.substr(0, lastIndex); // Remove extension from file name
std::vector<int> referenceOutput = NonSilentRegionReferenceOutputs[currentFileName];
if(referenceOutput.empty())
{
cout << "\nUnable to get the reference outputs for the file specified!" << endl;
break;
}
Rpp32s outBegin = detectedIndex[i];
Rpp32s outLength = detectionLength[i];
Rpp32s refBegin = referenceOutput[0];
Rpp32s refLength = referenceOutput[1];
Rpp32s refBegin = refOutput[i * 2];
Rpp32s refLength = refOutput[i * 2 + 1];

if ((outBegin == refBegin) && (outLength == refLength))
fileMatch += 1;
Expand All @@ -385,6 +381,8 @@ void verify_non_silent_region_detection(int *detectedIndex, int *detectionLength
qaResults << status << std::endl;
qaResults.close();
}

free(refOutput);
}

inline Rpp32f sinc(Rpp32f x)
Expand Down