void cv::dft()

in modules/core/src/dxt.cpp [2468:2899]


void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{
#ifdef HAVE_CLAMDFFT
    CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
            _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
               ocl_dft_amdfft(_src0, _dst, flags))
#endif

#ifdef HAVE_OPENCL
    CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
               ocl_dft(_src0, _dst, flags, nonzero_rows))
#endif

    static DFTFunc dft_tbl[6] =
    {
        (DFTFunc)DFT_32f,
        (DFTFunc)RealDFT_32f,
        (DFTFunc)CCSIDFT_32f,
        (DFTFunc)DFT_64f,
        (DFTFunc)RealDFT_64f,
        (DFTFunc)CCSIDFT_64f
    };
    AutoBuffer<uchar> buf;
    Mat src0 = _src0.getMat(), src = src0;
    int prev_len = 0, stage = 0;
    bool inv = (flags & DFT_INVERSE) != 0;
    int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0);
    int type = src.type(), depth = src.depth();
    int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
    int factors[34];
    bool inplace_transform = false;
#ifdef USE_IPP_DFT
    AutoBuffer<uchar> ippbuf;
    int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
#endif

    CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );

    if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
        _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
    else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
        _dst.create( src.size(), depth );
    else
        _dst.create( src.size(), type );

    Mat dst = _dst.getMat();

#if defined USE_IPP_DFT
    CV_IPP_CHECK()
    {
        if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0)
        {
            if ((flags & DFT_ROWS) == 0)
            {
                if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
                {
                    if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag))
                    {
                        CV_IMPL_ADD(CV_IMPL_IPP);
                        return;
                    }
                    setIppErrorStatus();
                }
                if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
                {
                    if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag))
                    {
                        CV_IMPL_ADD(CV_IMPL_IPP);
                        return;
                    }
                    setIppErrorStatus();
                }
            }
            else
            {
                if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
                {
                    ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
                    if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
                    {
                        CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
                        return;
                    }
                    setIppErrorStatus();
                }
                if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
                {
                    ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
                    if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
                    {
                        CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
                        return;
                    }
                    setIppErrorStatus();
                }
            }
        }
    }
#endif

    if( !real_transform )
        elem_size = complex_elem_size;

    if( src.cols == 1 && nonzero_rows > 0 )
        CV_Error( CV_StsNotImplemented,
        "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
        "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );

    // determine, which transform to do first - row-wise
    // (stage 0) or column-wise (stage 1) transform
    if( !(flags & DFT_ROWS) && src.rows > 1 &&
        ((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) ||
         (src.cols > 1 && inv && real_transform)) )
        stage = 1;

    for(;;)
    {
        double scale = 1;
        uchar* wave = 0;
        int* itab = 0;
        uchar* ptr;
        int i, len, count, sz = 0;
        int use_buf = 0, odd_real = 0;
        DFTFunc dft_func;

        if( stage == 0 ) // row-wise transform
        {
            len = !inv ? src.cols : dst.cols;
            count = src.rows;
            if( len == 1 && !(flags & DFT_ROWS) )
            {
                len = !inv ? src.rows : dst.rows;
                count = 1;
            }
            odd_real = real_transform && (len & 1);
        }
        else
        {
            len = dst.rows;
            count = !inv ? src0.cols : dst.cols;
            sz = 2*len*complex_elem_size;
        }

        void *spec = 0;
#ifdef USE_IPP_DFT
        if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available
        {
            int specsize=0, initsize=0, worksize=0;
            IppDFTGetSizeFunc getSizeFunc = 0;
            IppDFTInitFunc initFunc = 0;

            if( real_transform && stage == 0 )
            {
                if( depth == CV_32F )
                {
                    getSizeFunc = ippsDFTGetSize_R_32f;
                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
                }
                else
                {
                    getSizeFunc = ippsDFTGetSize_R_64f;
                    initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
                }
            }
            else
            {
                if( depth == CV_32F )
                {
                    getSizeFunc = ippsDFTGetSize_C_32fc;
                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
                }
                else
                {
                    getSizeFunc = ippsDFTGetSize_C_64fc;
                    initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
                }
            }
            if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
            {
                ippbuf.allocate(specsize + initsize + 64);
                spec = alignPtr(&ippbuf[0], 32);
                uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
                if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
                    spec = 0;
                sz += worksize;
            }
            else
                setIppErrorStatus();
        }
        else
#endif
        {
            if( len != prev_len )
                nf = DFTFactorize( len, factors );

            inplace_transform = factors[0] == factors[nf-1];
            sz += len*(complex_elem_size + sizeof(int));
            i = nf > 1 && (factors[0] & 1) == 0;
            if( (factors[i] & 1) != 0 && factors[i] > 5 )
                sz += (factors[i]+1)*complex_elem_size;

            if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) ||
                (stage == 1 && !inplace_transform) )
            {
                use_buf = 1;
                sz += len*complex_elem_size;
            }
        }

        ptr = (uchar*)buf;
        buf.allocate( sz + 32 );
        if( ptr != (uchar*)buf )
            prev_len = 0; // because we release the buffer,
                          // force recalculation of
                          // twiddle factors and permutation table
        ptr = (uchar*)buf;
        if( !spec )
        {
            wave = ptr;
            ptr += len*complex_elem_size;
            itab = (int*)ptr;
            ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );

            if( len != prev_len || (!inplace_transform && inv && real_transform))
                DFTInit( len, nf, factors, itab, complex_elem_size,
                            wave, stage == 0 && inv && real_transform );
            // otherwise reuse the tables calculated on the previous stage
        }

        if( stage == 0 )
        {
            uchar* tmp_buf = 0;
            int dptr_offset = 0;
            int dst_full_len = len*elem_size;
            int _flags = (int)inv + (src.channels() != dst.channels() ?
                         DFT_COMPLEX_INPUT_OR_OUTPUT : 0);
            if( use_buf )
            {
                tmp_buf = ptr;
                ptr += len*complex_elem_size;
                if( odd_real && !inv && len > 1 &&
                    !(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT))
                    dptr_offset = elem_size;
            }

            if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) )
                dst_full_len += (len & 1) ? elem_size : complex_elem_size;

            dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3];

            if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) )
                stage = 1;
            else if( flags & CV_DXT_SCALE )
                scale = 1./(len * (flags & DFT_ROWS ? 1 : count));

            if( nonzero_rows <= 0 || nonzero_rows > count )
                nonzero_rows = count;

            for( i = 0; i < nonzero_rows; i++ )
            {
                const uchar* sptr = src.ptr(i);
                uchar* dptr0 = dst.ptr(i);
                uchar* dptr = dptr0;

                if( tmp_buf )
                    dptr = tmp_buf;

                dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale );
                if( dptr != dptr0 )
                    memcpy( dptr0, dptr + dptr_offset, dst_full_len );
            }

            for( ; i < count; i++ )
            {
                uchar* dptr0 = dst.ptr(i);
                memset( dptr0, 0, dst_full_len );
            }

            if( stage != 1 )
            {
                if( !inv && real_transform && dst.channels() == 2 )
                    complementComplexOutput(dst, nonzero_rows, 1);
                break;
            }
            src = dst;
        }
        else
        {
            int a = 0, b = count;
            uchar *buf0, *buf1, *dbuf0, *dbuf1;
            const uchar* sptr0 = src.ptr();
            uchar* dptr0 = dst.ptr();
            buf0 = ptr;
            ptr += len*complex_elem_size;
            buf1 = ptr;
            ptr += len*complex_elem_size;
            dbuf0 = buf0, dbuf1 = buf1;

            if( use_buf )
            {
                dbuf1 = ptr;
                dbuf0 = buf1;
                ptr += len*complex_elem_size;
            }

            dft_func = dft_tbl[(depth == CV_64F)*3];

            if( real_transform && inv && src.cols > 1 )
                stage = 0;
            else if( flags & CV_DXT_SCALE )
                scale = 1./(len * count);

            if( real_transform )
            {
                int even;
                a = 1;
                even = (count & 1) == 0;
                b = (count+1)/2;
                if( !inv )
                {
                    memset( buf0, 0, len*complex_elem_size );
                    CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size );
                    sptr0 += dst.channels()*elem_size;
                    if( even )
                    {
                        memset( buf1, 0, len*complex_elem_size );
                        CopyColumn( sptr0 + (count-2)*elem_size, src.step,
                                    buf1, complex_elem_size, len, elem_size );
                    }
                }
                else if( src.channels() == 1 )
                {
                    CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size );
                    ExpandCCS( buf0, len, elem_size );
                    if( even )
                    {
                        CopyColumn( sptr0 + (count-1)*elem_size, src.step,
                                    buf1, elem_size, len, elem_size );
                        ExpandCCS( buf1, len, elem_size );
                    }
                    sptr0 += elem_size;
                }
                else
                {
                    CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
                    if( even )
                    {
                        CopyColumn( sptr0 + b*complex_elem_size, src.step,
                                       buf1, complex_elem_size, len, complex_elem_size );
                    }
                    sptr0 += complex_elem_size;
                }

                if( even )
                    dft_func( buf1, dbuf1, len, nf, factors, itab,
                              wave, len, spec, ptr, inv, scale );
                dft_func( buf0, dbuf0, len, nf, factors, itab,
                          wave, len, spec, ptr, inv, scale );

                if( dst.channels() == 1 )
                {
                    if( !inv )
                    {
                        // copy the half of output vector to the first/last column.
                        // before doing that, defgragment the vector
                        memcpy( dbuf0 + elem_size, dbuf0, elem_size );
                        CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
                                       dst.step, len, elem_size );
                        if( even )
                        {
                            memcpy( dbuf1 + elem_size, dbuf1, elem_size );
                            CopyColumn( dbuf1 + elem_size, elem_size,
                                           dptr0 + (count-1)*elem_size,
                                           dst.step, len, elem_size );
                        }
                        dptr0 += elem_size;
                    }
                    else
                    {
                        // copy the real part of the complex vector to the first/last column
                        CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size );
                        if( even )
                            CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
                                           dst.step, len, elem_size );
                        dptr0 += elem_size;
                    }
                }
                else
                {
                    assert( !inv );
                    CopyColumn( dbuf0, complex_elem_size, dptr0,
                                   dst.step, len, complex_elem_size );
                    if( even )
                        CopyColumn( dbuf1, complex_elem_size,
                                       dptr0 + b*complex_elem_size,
                                       dst.step, len, complex_elem_size );
                    dptr0 += complex_elem_size;
                }
            }

            for( i = a; i < b; i += 2 )
            {
                if( i+1 < b )
                {
                    CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size );
                    dft_func( buf1, dbuf1, len, nf, factors, itab,
                              wave, len, spec, ptr, inv, scale );
                }
                else
                    CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );

                dft_func( buf0, dbuf0, len, nf, factors, itab,
                          wave, len, spec, ptr, inv, scale );

                if( i+1 < b )
                    CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size );
                else
                    CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size );
                sptr0 += 2*complex_elem_size;
                dptr0 += 2*complex_elem_size;
            }

            if( stage != 0 )
            {
                if( !inv && real_transform && dst.channels() == 2 && len > 1 )
                    complementComplexOutput(dst, len, 2);
                break;
            }
            src = dst;
        }
    }
}