Issue with fixed-point FFT using the dsp.FFT function

10 Ansichten (letzte 30 Tage)
Matteo MONTEVERDI
Matteo MONTEVERDI am 20 Sep. 2024
Beantwortet: Nithin am 4 Okt. 2024
Hello everyone, I’m new to MATLAB and MATLAB coder. I’m trying to self-generate a code for a STM32F446RE microcontroller with MATLAB coder to use it on STM32CubeIDE. The code that I’m trying to generate is an FFT of fixed-point data. I’m using the dsp.FFT function with a 16-bit world length because it supports fixed-point.
This is my MATLAB script
s = readmatrix("FFT_data.txt");
c = fi(s,1,16);
q = fourier(c);
This is my MATLAB function
function [Y] = fourier(X)
ft = dsp.FFT("FFTLengthSource","Property","Normalize",true,"FFTLength",1024);
Y = abs(ft(X));
end
The program is working on my Micro, but it’s a little bit too slow. I think the problem is that the C code generated by MATLAB uses 64-bit Multiword functions despite the data being 16 bits long in MATLAB.
static void c_MWDSPCG_FFT_DblLen_Nrm_YCs16n(cint16_T y[], int nChans, int nRows,
const short twiddleTable[],
int twiddleStep)
{
int64m_T r;
int64m_T r1;
int64m_T r10;
int64m_T r11;
int64m_T r12;
int64m_T r13;
int64m_T r14;
int64m_T r15;
int64m_T r16;
int64m_T r2;
int64m_T r3;
int64m_T r4;
int64m_T r5;
int64m_T r6;
int64m_T r7;
int64m_T r8;
int64m_T r9;
int N2;
int N4;
int i;
int ix;
int tempOut0Re_tmp;
int yIdx;
short tempOut0Im;
short tempOut0Re;
/* In-place "double-length" data recovery
Table-based mem-optimized twiddle computation
Used to recover linear-ordered length-N point complex FFT result
from a linear-ordered complex length-N/2 point FFT, performed
on N interleaved real values.
*/
N2 = nRows >> 1;
N4 = N2 >> 1;
yIdx = nRows * (nChans - 1);
if (nRows > 2) {
tempOut0Re_tmp = N4 + yIdx;
tempOut0Re = (short)(y[tempOut0Re_tmp].re >> 1);
tempOut0Im = (short)(y[tempOut0Re_tmp].im >> 1);
i = (N2 + N4) + yIdx;
y[i].re = tempOut0Re;
y[i].im = tempOut0Im;
y[tempOut0Re_tmp].re = tempOut0Re;
y[tempOut0Re_tmp].im = (short)-tempOut0Im;
}
if (nRows > 1) {
sLong2MultiWord(y[yIdx].re << 15, (unsigned int *)&r.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r.chunks[0U], 21U,
(unsigned int *)&r2.chunks[0U]);
sLong2MultiWord(y[yIdx].im << 15, (unsigned int *)&r1.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r1.chunks[0U], 21U,
(unsigned int *)&r.chunks[0U]);
MultiWordSub((unsigned int *)&r2.chunks[0U], (unsigned int *)&r.chunks[0U],
(unsigned int *)&r4.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r4.chunks[0U], 21U,
(unsigned int *)&r5.chunks[0U]);
sMultiWordShr((unsigned int *)&r5.chunks[0U], 1U,
(unsigned int *)&r6.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r6.chunks[0U], 21U,
(unsigned int *)&r7.chunks[0U]);
sMultiWordShr((unsigned int *)&r7.chunks[0U], 15U,
(unsigned int *)&r8.chunks[0U]);
i = N2 + yIdx;
y[i].re = (short)MultiWord2sLong((unsigned int *)&r8.chunks[0U]);
y[i].im = 0;
}
sLong2MultiWord(y[yIdx].re << 15, (unsigned int *)&r1.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r1.chunks[0U], 21U,
(unsigned int *)&r.chunks[0U]);
sLong2MultiWord(y[yIdx].im << 15, (unsigned int *)&r3.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r3.chunks[0U], 21U,
(unsigned int *)&r1.chunks[0U]);
MultiWordAdd((unsigned int *)&r.chunks[0U], (unsigned int *)&r1.chunks[0U],
(unsigned int *)&r2.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r2.chunks[0U], 21U,
(unsigned int *)&r4.chunks[0U]);
sMultiWordShr((unsigned int *)&r4.chunks[0U], 1U,
(unsigned int *)&r5.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r5.chunks[0U], 21U,
(unsigned int *)&r6.chunks[0U]);
sMultiWordShr((unsigned int *)&r6.chunks[0U], 15U,
(unsigned int *)&r7.chunks[0U]);
y[yIdx].re = (short)MultiWord2sLong((unsigned int *)&r7.chunks[0U]);
y[yIdx].im = 0;
tempOut0Re_tmp = twiddleStep;
for (ix = 1; ix < N4; ix++) {
int i1;
int i2;
int i3;
int i4;
int i5;
int i6;
short cTemp_im;
short cTemp_re;
i = ix + yIdx;
i1 = y[i].re << 15;
sLong2MultiWord(i1, (unsigned int *)&r3.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r3.chunks[0U], 21U,
(unsigned int *)&r1.chunks[0U]);
i2 = (N2 - ix) + yIdx;
i3 = y[i2].re << 15;
sLong2MultiWord(i3, (unsigned int *)&r9.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r9.chunks[0U], 21U,
(unsigned int *)&r3.chunks[0U]);
MultiWordAdd((unsigned int *)&r1.chunks[0U], (unsigned int *)&r3.chunks[0U],
(unsigned int *)&r.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r.chunks[0U], 21U,
(unsigned int *)&r2.chunks[0U]);
sMultiWordShr((unsigned int *)&r2.chunks[0U], 2U,
(unsigned int *)&r4.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r4.chunks[0U], 21U,
(unsigned int *)&r5.chunks[0U]);
sMultiWordShr((unsigned int *)&r5.chunks[0U], 15U,
(unsigned int *)&r6.chunks[0U]);
tempOut0Re = (short)MultiWord2sLong((unsigned int *)&r6.chunks[0U]);
i4 = y[i].im << 15;
sLong2MultiWord(i4, (unsigned int *)&r9.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r9.chunks[0U], 21U,
(unsigned int *)&r3.chunks[0U]);
i5 = y[i2].im << 15;
sLong2MultiWord(i5, (unsigned int *)&r10.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r10.chunks[0U], 21U,
(unsigned int *)&r9.chunks[0U]);
MultiWordSub((unsigned int *)&r3.chunks[0U], (unsigned int *)&r9.chunks[0U],
(unsigned int *)&r1.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r1.chunks[0U], 21U,
(unsigned int *)&r.chunks[0U]);
sMultiWordShr((unsigned int *)&r.chunks[0U], 2U,
(unsigned int *)&r2.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r2.chunks[0U], 21U,
(unsigned int *)&r4.chunks[0U]);
sMultiWordShr((unsigned int *)&r4.chunks[0U], 15U,
(unsigned int *)&r5.chunks[0U]);
tempOut0Im = (short)MultiWord2sLong((unsigned int *)&r5.chunks[0U]);
sLong2MultiWord(i4, (unsigned int *)&r10.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r10.chunks[0U], 21U,
(unsigned int *)&r9.chunks[0U]);
sLong2MultiWord(i5, (unsigned int *)&r11.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r11.chunks[0U], 21U,
(unsigned int *)&r10.chunks[0U]);
MultiWordAdd((unsigned int *)&r9.chunks[0U],
(unsigned int *)&r10.chunks[0U],
(unsigned int *)&r3.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r3.chunks[0U], 21U,
(unsigned int *)&r1.chunks[0U]);
sMultiWordShr((unsigned int *)&r1.chunks[0U], 2U,
(unsigned int *)&r.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r.chunks[0U], 21U,
(unsigned int *)&r2.chunks[0U]);
sMultiWordShr((unsigned int *)&r2.chunks[0U], 15U,
(unsigned int *)&r4.chunks[0U]);
y[i].re = (short)MultiWord2sLong((unsigned int *)&r4.chunks[0U]);
sLong2MultiWord(i3, (unsigned int *)&r11.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r11.chunks[0U], 21U,
(unsigned int *)&r10.chunks[0U]);
sLong2MultiWord(i1, (unsigned int *)&r12.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r12.chunks[0U], 21U,
(unsigned int *)&r11.chunks[0U]);
MultiWordSub((unsigned int *)&r10.chunks[0U],
(unsigned int *)&r11.chunks[0U],
(unsigned int *)&r9.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r9.chunks[0U], 21U,
(unsigned int *)&r3.chunks[0U]);
sMultiWordShr((unsigned int *)&r3.chunks[0U], 2U,
(unsigned int *)&r1.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r1.chunks[0U], 21U,
(unsigned int *)&r.chunks[0U]);
sMultiWordShr((unsigned int *)&r.chunks[0U], 15U,
(unsigned int *)&r2.chunks[0U]);
y[i].im = (short)MultiWord2sLong((unsigned int *)&r2.chunks[0U]);
i1 = twiddleTable[tempOut0Re_tmp + N4 * twiddleStep];
i3 = y[i].re;
sLong2MultiWord(i1 * i3, (unsigned int *)&r11.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r11.chunks[0U], 22U,
(unsigned int *)&r10.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r10.chunks[0U], 21U,
(unsigned int *)&r9.chunks[0U]);
i4 = y[i].im;
i5 = twiddleTable[tempOut0Re_tmp + N2 * twiddleStep];
sLong2MultiWord(i5 * i4, (unsigned int *)&r12.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r12.chunks[0U], 22U,
(unsigned int *)&r11.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r11.chunks[0U], 21U,
(unsigned int *)&r10.chunks[0U]);
MultiWordSub((unsigned int *)&r9.chunks[0U],
(unsigned int *)&r10.chunks[0U],
(unsigned int *)&r3.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r3.chunks[0U], 21U,
(unsigned int *)&r1.chunks[0U]);
sMultiWordShr((unsigned int *)&r1.chunks[0U], 15U,
(unsigned int *)&r.chunks[0U]);
cTemp_re = (short)MultiWord2sLong((unsigned int *)&r.chunks[0U]);
sLong2MultiWord(i1 * i4, (unsigned int *)&r12.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r12.chunks[0U], 22U,
(unsigned int *)&r11.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r11.chunks[0U], 21U,
(unsigned int *)&r10.chunks[0U]);
sLong2MultiWord(i5 * i3, (unsigned int *)&r13.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r13.chunks[0U], 22U,
(unsigned int *)&r12.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r12.chunks[0U], 21U,
(unsigned int *)&r11.chunks[0U]);
MultiWordAdd((unsigned int *)&r10.chunks[0U],
(unsigned int *)&r11.chunks[0U],
(unsigned int *)&r9.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r9.chunks[0U], 21U,
(unsigned int *)&r3.chunks[0U]);
sMultiWordShr((unsigned int *)&r3.chunks[0U], 15U,
(unsigned int *)&r1.chunks[0U]);
cTemp_im = (short)MultiWord2sLong((unsigned int *)&r1.chunks[0U]);
i1 = tempOut0Re << 15;
sLong2MultiWord(i1, (unsigned int *)&r12.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r12.chunks[0U], 21U,
(unsigned int *)&r11.chunks[0U]);
i3 = cTemp_re << 15;
sLong2MultiWord(i3, (unsigned int *)&r13.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r13.chunks[0U], 21U,
(unsigned int *)&r12.chunks[0U]);
MultiWordAdd((unsigned int *)&r11.chunks[0U],
(unsigned int *)&r12.chunks[0U],
(unsigned int *)&r10.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r10.chunks[0U], 21U,
(unsigned int *)&r9.chunks[0U]);
sMultiWordShr((unsigned int *)&r9.chunks[0U], 15U,
(unsigned int *)&r3.chunks[0U]);
y[i].re = (short)MultiWord2sLong((unsigned int *)&r3.chunks[0U]);
i4 = tempOut0Im << 15;
sLong2MultiWord(i4, (unsigned int *)&r13.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r13.chunks[0U], 21U,
(unsigned int *)&r12.chunks[0U]);
i5 = cTemp_im << 15;
sLong2MultiWord(i5, (unsigned int *)&r14.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r14.chunks[0U], 21U,
(unsigned int *)&r13.chunks[0U]);
MultiWordAdd((unsigned int *)&r12.chunks[0U],
(unsigned int *)&r13.chunks[0U],
(unsigned int *)&r11.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r11.chunks[0U], 21U,
(unsigned int *)&r10.chunks[0U]);
sMultiWordShr((unsigned int *)&r10.chunks[0U], 15U,
(unsigned int *)&r9.chunks[0U]);
y[i].im = (short)MultiWord2sLong((unsigned int *)&r9.chunks[0U]);
i6 = (nRows - ix) + yIdx;
y[i6].re = y[i].re;
y[i6].im = (short)-y[i].im;
sLong2MultiWord(i1, (unsigned int *)&r14.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r14.chunks[0U], 21U,
(unsigned int *)&r13.chunks[0U]);
sLong2MultiWord(i3, (unsigned int *)&r15.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r15.chunks[0U], 21U,
(unsigned int *)&r14.chunks[0U]);
MultiWordSub((unsigned int *)&r13.chunks[0U],
(unsigned int *)&r14.chunks[0U],
(unsigned int *)&r12.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r12.chunks[0U], 21U,
(unsigned int *)&r11.chunks[0U]);
sMultiWordShr((unsigned int *)&r11.chunks[0U], 15U,
(unsigned int *)&r10.chunks[0U]);
i = (N2 + ix) + yIdx;
y[i].re = (short)MultiWord2sLong((unsigned int *)&r10.chunks[0U]);
sLong2MultiWord(i4, (unsigned int *)&r15.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r15.chunks[0U], 21U,
(unsigned int *)&r14.chunks[0U]);
sLong2MultiWord(i5, (unsigned int *)&r16.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r16.chunks[0U], 21U,
(unsigned int *)&r15.chunks[0U]);
MultiWordSub((unsigned int *)&r14.chunks[0U],
(unsigned int *)&r15.chunks[0U],
(unsigned int *)&r13.chunks[0U]);
MultiWordSignedWrap((unsigned int *)&r13.chunks[0U], 21U,
(unsigned int *)&r12.chunks[0U]);
sMultiWordShr((unsigned int *)&r12.chunks[0U], 15U,
(unsigned int *)&r11.chunks[0U]);
y[i].im = (short)MultiWord2sLong((unsigned int *)&r11.chunks[0U]);
y[i2].re = y[i].re;
y[i2].im = (short)-y[i].im;
tempOut0Re_tmp += twiddleStep;
}
}
Is there a way to self-generate code that operates on 16-bit data and remove the Multiword functions?

Antworten (1)

Nithin
Nithin am 4 Okt. 2024
Hello Matteo,
One way to decrease the time taken to get the output would be to leverage the CMSIS DSP CRLs for real-type FFT. The code replacements occur automatically when certain criteria are met. Follow this page for more details: https://in.mathworks.com/help/ecoder/armcortexm/ug/supported-matlab-functions-with-cmsis-library-for-armcortexm.html
I noticed that you were using a fixed-point vector input for the FFT. Only 'single' is supported for the CRL to occur. Additionally, do ensure that the Code replacement library setting in the MATLAB Coder configuration settings is set to ARM Cortex-M.

Produkte


Version

R2023b

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!

Translated by