I'm trying to do gaussian filtering of image in frequency domain (FFT) w/ IPP.
For image of size MxN and gauss kernel UxV, the steps:
1. zero-pad kernel to (M+U-1)x(N+V-1)
2. Fwd FFT kernel
3. zero-pad img to (M+U-1)x(N+V-1)
4. Fwd FFT img
5. multiply the 2 FFTs in float-complex format
4. Inv FFT the multiplied result
5. Trim edges
My resulting image looks a little strange. The best result I have is when I border extend both kernel and img but keep both left border width and top border height to 0.
Can the experts have a look to see what I'm doing wrong? NOTE: The test code below purposely uses a small kernel. Real code would be using very large kernels.
void foo(Ipp8u* img, int lineStep, int width, int height) { IppStatus status; // assume kernel is 5x5 with sigma = 1 int kernelStep; Ipp32f* kernel = ippiMalloc_32f_C1(5, 5, &kernelStep); IppiSize kernelRoi = { 5, 5 }; kernel[0] = 0.0030f; kernel[1] = 0.0133f; kernel[2] = 0.0219f; kernel[3] = 0.0133f; kernel[4] = 0.0030f; kernel[8] = 0.0133f; kernel[9] = 0.0596f; kernel[10] = 0.0983f; kernel[11] = 0.0596f; kernel[12] = 0.0133f; kernel[16] = 0.0219f; kernel[17] = 0.0983f; kernel[18] = 0.1621f; kernel[19] = 0.0983f; kernel[20] = 0.0219f; kernel[24] = 0.0133f; kernel[25] = 0.0596f; kernel[26] = 0.0983f; kernel[27] = 0.0596f; kernel[28] = 0.0133f; kernel[32] = 0.0030f; kernel[33] = 0.0133f; kernel[34] = 0.0219f; kernel[35] = 0.0133f; kernel[36] = 0.0030f; // pad kernel IppiSize padRoi = { width + 4, height + 4 }; // 2d fft padded kernel IppiDFTSpec_C_32fc* spec; status = ippiDFTInitAlloc_C_32fc(&spec, padRoi, IPP_FFT_DIV_INV_BY_N, ippAlgHintFast); // extend border by 4 pixels on each dimension int extKernelStep; Ipp32f* extKernel = ippiMalloc_32f_C1(width + 4, height + 4, &extKernelStep); status = ippiCopyConstBorder_32f_C1R(kernel, kernelStep, kernelRoi, extKernel, extKernelStep, padRoi, 0, 0, 0.f); int fftKernelStep; Ipp32fc* fftKernel = ippiMalloc_32fc_C1(width + 4, height + 4, &fftKernelStep); // copy over, then Fwd FFT it int kernelCorrectedStep = extKernelStep/4; int kernelFcStep = fftKernelStep/8; for(int h = 0; h < height + 4; ++h) { Ipp32f* currentKernelRow = extKernel + (h * kernelCorrectedStep); Ipp32fc* currentFFTKernelRow = fftKernel + (h * kernelFcStep); for(int w = 0; w < width + 4; ++w) { Ipp32f* currentKernel = currentKernelRow + w; Ipp32fc* currentFFTKernel = currentFFTKernelRow + w; (*currentFFTKernel).re = *currentKernel; (*currentFFTKernel).im = 0.f; } } // FWD kernel status = ippiDFTFwd_CToC_32fc_C1IR(fftKernel, fftKernelStep, spec, NULL); // convert img to 32f IppiSize imgRoi = { width, height }; int imgFStep; Ipp32f* imgF = ippiMalloc_32f_C1(width, height, &imgFStep); status = ippiConvert_8u32f_C1R(img, lineStep, imgF, imgFStep, imgRoi); // extend int extImgStep; Ipp32f* extImg = ippiMalloc_32f_C1(width + 4, height + 4, &extImgStep); //status = ippiCopyReplicateBorder_32f_C1R(imgF, imgFStep, imgRoi, extImg, extImgStep, padRoi, 0, 0); status = ippiCopyConstBorder_32f_C1R(imgF, imgFStep, imgRoi, extImg, extImgStep, padRoi, 0, 0, 0.f); int fftImgStep; Ipp32fc* fftImg = ippiMalloc_32fc_C1(width + 4, height + 4, &fftImgStep); int imgCorrectedStep = extImgStep/4; int imgFcStep = fftImgStep/8; for(int h = 0; h < height + 4; ++h) { Ipp32f* currentImgRow = extImg + (h * imgCorrectedStep); Ipp32fc* currentFFTImgRow = fftImg + (h * imgFcStep); for(int w = 0; w < width + 4; ++w) { Ipp32f* currentImg = currentImgRow + w; Ipp32fc* currentFFTImg = currentFFTImgRow + w; (*currentFFTImg).re = *currentImg; (*currentFFTImg).im = 0.f; } } // FWD img status = ippiDFTFwd_CToC_32fc_C1IR(fftImg, fftImgStep, spec, NULL); // now multiply both status = ippiMul_32fc_C1IR(fftKernel, fftKernelStep, fftImg, fftImgStep, padRoi); // inv FFT status = ippiDFTInv_CToC_32fc_C1IR(fftImg, fftImgStep, spec, NULL); // copy back out to extImg int fftCStep = fftImgStep/8; int cImgStep = extImgStep/4; for(int h = 0; h < height + 4; ++h) { Ipp32fc* currentFFTRow = fftImg + (h * fftCStep); Ipp32f* currentRow = extImg + (h * cImgStep); for(int w = 0; w < width + 4; ++w) { Ipp32fc* currentFFT = currentFFTRow + w; Ipp32f* currentImg = currentRow + w; *currentImg = (*currentFFT).re; } } Ipp32f* startPt = extImg + (2 * cImgStep) + 2; // looks better? //Ipp32f* startPt = extImg; // shifted??? status = ippiConvert_32f8u_C1R(startPt, extImgStep, img, lineStep, imgRoi, ippRndNear); }