in modules/core/src/dxt.cpp [2468:2899]
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{
#ifdef HAVE_CLAMDFFT
CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
_dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
ocl_dft_amdfft(_src0, _dst, flags))
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
ocl_dft(_src0, _dst, flags, nonzero_rows))
#endif
static DFTFunc dft_tbl[6] =
{
(DFTFunc)DFT_32f,
(DFTFunc)RealDFT_32f,
(DFTFunc)CCSIDFT_32f,
(DFTFunc)DFT_64f,
(DFTFunc)RealDFT_64f,
(DFTFunc)CCSIDFT_64f
};
AutoBuffer<uchar> buf;
Mat src0 = _src0.getMat(), src = src0;
int prev_len = 0, stage = 0;
bool inv = (flags & DFT_INVERSE) != 0;
int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0);
int type = src.type(), depth = src.depth();
int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
int factors[34];
bool inplace_transform = false;
#ifdef USE_IPP_DFT
AutoBuffer<uchar> ippbuf;
int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
#endif
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
_dst.create( src.size(), depth );
else
_dst.create( src.size(), type );
Mat dst = _dst.getMat();
#if defined USE_IPP_DFT
CV_IPP_CHECK()
{
if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0)
{
if ((flags & DFT_ROWS) == 0)
{
if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
{
if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
{
if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
}
else
{
if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
{
ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
return;
}
setIppErrorStatus();
}
if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
{
ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
{
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
return;
}
setIppErrorStatus();
}
}
}
}
#endif
if( !real_transform )
elem_size = complex_elem_size;
if( src.cols == 1 && nonzero_rows > 0 )
CV_Error( CV_StsNotImplemented,
"This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
"For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
// determine, which transform to do first - row-wise
// (stage 0) or column-wise (stage 1) transform
if( !(flags & DFT_ROWS) && src.rows > 1 &&
((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) ||
(src.cols > 1 && inv && real_transform)) )
stage = 1;
for(;;)
{
double scale = 1;
uchar* wave = 0;
int* itab = 0;
uchar* ptr;
int i, len, count, sz = 0;
int use_buf = 0, odd_real = 0;
DFTFunc dft_func;
if( stage == 0 ) // row-wise transform
{
len = !inv ? src.cols : dst.cols;
count = src.rows;
if( len == 1 && !(flags & DFT_ROWS) )
{
len = !inv ? src.rows : dst.rows;
count = 1;
}
odd_real = real_transform && (len & 1);
}
else
{
len = dst.rows;
count = !inv ? src0.cols : dst.cols;
sz = 2*len*complex_elem_size;
}
void *spec = 0;
#ifdef USE_IPP_DFT
if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available
{
int specsize=0, initsize=0, worksize=0;
IppDFTGetSizeFunc getSizeFunc = 0;
IppDFTInitFunc initFunc = 0;
if( real_transform && stage == 0 )
{
if( depth == CV_32F )
{
getSizeFunc = ippsDFTGetSize_R_32f;
initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
}
else
{
getSizeFunc = ippsDFTGetSize_R_64f;
initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
}
}
else
{
if( depth == CV_32F )
{
getSizeFunc = ippsDFTGetSize_C_32fc;
initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
}
else
{
getSizeFunc = ippsDFTGetSize_C_64fc;
initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
}
}
if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
{
ippbuf.allocate(specsize + initsize + 64);
spec = alignPtr(&ippbuf[0], 32);
uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
spec = 0;
sz += worksize;
}
else
setIppErrorStatus();
}
else
#endif
{
if( len != prev_len )
nf = DFTFactorize( len, factors );
inplace_transform = factors[0] == factors[nf-1];
sz += len*(complex_elem_size + sizeof(int));
i = nf > 1 && (factors[0] & 1) == 0;
if( (factors[i] & 1) != 0 && factors[i] > 5 )
sz += (factors[i]+1)*complex_elem_size;
if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) ||
(stage == 1 && !inplace_transform) )
{
use_buf = 1;
sz += len*complex_elem_size;
}
}
ptr = (uchar*)buf;
buf.allocate( sz + 32 );
if( ptr != (uchar*)buf )
prev_len = 0; // because we release the buffer,
// force recalculation of
// twiddle factors and permutation table
ptr = (uchar*)buf;
if( !spec )
{
wave = ptr;
ptr += len*complex_elem_size;
itab = (int*)ptr;
ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
if( len != prev_len || (!inplace_transform && inv && real_transform))
DFTInit( len, nf, factors, itab, complex_elem_size,
wave, stage == 0 && inv && real_transform );
// otherwise reuse the tables calculated on the previous stage
}
if( stage == 0 )
{
uchar* tmp_buf = 0;
int dptr_offset = 0;
int dst_full_len = len*elem_size;
int _flags = (int)inv + (src.channels() != dst.channels() ?
DFT_COMPLEX_INPUT_OR_OUTPUT : 0);
if( use_buf )
{
tmp_buf = ptr;
ptr += len*complex_elem_size;
if( odd_real && !inv && len > 1 &&
!(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT))
dptr_offset = elem_size;
}
if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) )
dst_full_len += (len & 1) ? elem_size : complex_elem_size;
dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3];
if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) )
stage = 1;
else if( flags & CV_DXT_SCALE )
scale = 1./(len * (flags & DFT_ROWS ? 1 : count));
if( nonzero_rows <= 0 || nonzero_rows > count )
nonzero_rows = count;
for( i = 0; i < nonzero_rows; i++ )
{
const uchar* sptr = src.ptr(i);
uchar* dptr0 = dst.ptr(i);
uchar* dptr = dptr0;
if( tmp_buf )
dptr = tmp_buf;
dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale );
if( dptr != dptr0 )
memcpy( dptr0, dptr + dptr_offset, dst_full_len );
}
for( ; i < count; i++ )
{
uchar* dptr0 = dst.ptr(i);
memset( dptr0, 0, dst_full_len );
}
if( stage != 1 )
{
if( !inv && real_transform && dst.channels() == 2 )
complementComplexOutput(dst, nonzero_rows, 1);
break;
}
src = dst;
}
else
{
int a = 0, b = count;
uchar *buf0, *buf1, *dbuf0, *dbuf1;
const uchar* sptr0 = src.ptr();
uchar* dptr0 = dst.ptr();
buf0 = ptr;
ptr += len*complex_elem_size;
buf1 = ptr;
ptr += len*complex_elem_size;
dbuf0 = buf0, dbuf1 = buf1;
if( use_buf )
{
dbuf1 = ptr;
dbuf0 = buf1;
ptr += len*complex_elem_size;
}
dft_func = dft_tbl[(depth == CV_64F)*3];
if( real_transform && inv && src.cols > 1 )
stage = 0;
else if( flags & CV_DXT_SCALE )
scale = 1./(len * count);
if( real_transform )
{
int even;
a = 1;
even = (count & 1) == 0;
b = (count+1)/2;
if( !inv )
{
memset( buf0, 0, len*complex_elem_size );
CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size );
sptr0 += dst.channels()*elem_size;
if( even )
{
memset( buf1, 0, len*complex_elem_size );
CopyColumn( sptr0 + (count-2)*elem_size, src.step,
buf1, complex_elem_size, len, elem_size );
}
}
else if( src.channels() == 1 )
{
CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size );
ExpandCCS( buf0, len, elem_size );
if( even )
{
CopyColumn( sptr0 + (count-1)*elem_size, src.step,
buf1, elem_size, len, elem_size );
ExpandCCS( buf1, len, elem_size );
}
sptr0 += elem_size;
}
else
{
CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
if( even )
{
CopyColumn( sptr0 + b*complex_elem_size, src.step,
buf1, complex_elem_size, len, complex_elem_size );
}
sptr0 += complex_elem_size;
}
if( even )
dft_func( buf1, dbuf1, len, nf, factors, itab,
wave, len, spec, ptr, inv, scale );
dft_func( buf0, dbuf0, len, nf, factors, itab,
wave, len, spec, ptr, inv, scale );
if( dst.channels() == 1 )
{
if( !inv )
{
// copy the half of output vector to the first/last column.
// before doing that, defgragment the vector
memcpy( dbuf0 + elem_size, dbuf0, elem_size );
CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
dst.step, len, elem_size );
if( even )
{
memcpy( dbuf1 + elem_size, dbuf1, elem_size );
CopyColumn( dbuf1 + elem_size, elem_size,
dptr0 + (count-1)*elem_size,
dst.step, len, elem_size );
}
dptr0 += elem_size;
}
else
{
// copy the real part of the complex vector to the first/last column
CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size );
if( even )
CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
dst.step, len, elem_size );
dptr0 += elem_size;
}
}
else
{
assert( !inv );
CopyColumn( dbuf0, complex_elem_size, dptr0,
dst.step, len, complex_elem_size );
if( even )
CopyColumn( dbuf1, complex_elem_size,
dptr0 + b*complex_elem_size,
dst.step, len, complex_elem_size );
dptr0 += complex_elem_size;
}
}
for( i = a; i < b; i += 2 )
{
if( i+1 < b )
{
CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size );
dft_func( buf1, dbuf1, len, nf, factors, itab,
wave, len, spec, ptr, inv, scale );
}
else
CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
dft_func( buf0, dbuf0, len, nf, factors, itab,
wave, len, spec, ptr, inv, scale );
if( i+1 < b )
CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size );
else
CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size );
sptr0 += 2*complex_elem_size;
dptr0 += 2*complex_elem_size;
}
if( stage != 0 )
{
if( !inv && real_transform && dst.channels() == 2 && len > 1 )
complementComplexOutput(dst, len, 2);
break;
}
src = dst;
}
}
}