in modules/core/src/arithm.cpp [1926:2203]
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false,
void* usrdata=0, int oclop=-1 )
{
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
int kind1 = psrc1->kind(), kind2 = psrc2->kind();
bool haveMask = !_mask.empty();
bool reallocate = false;
int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
#ifdef HAVE_OPENCL
bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2;
#endif
bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 &&
!haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) ||
(_dst.fixedType() && _dst.type() == type1)) &&
((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) )
{
_dst.createSameSize(*psrc1, type1);
CV_OCL_RUN(use_opencl,
ocl_arithm_op(*psrc1, *psrc2, _dst, _mask,
(!usrdata ? type1 : std::max(depth1, CV_32F)),
usrdata, oclop, false))
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
Size sz = getContinuousSize(src1, src2, dst, src1.channels());
tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz, usrdata);
return;
}
bool haveScalar = false, swapped12 = false;
if( dims1 != dims2 || sz1 != sz2 || cn != cn2 ||
(kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) ||
(kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) )
{
if( checkScalar(*psrc1, type2, kind1, kind2) )
{
// src1 is a scalar; swap it with src2
swap(psrc1, psrc2);
swap(sz1, sz2);
swap(type1, type2);
swap(depth1, depth2);
swap(cn, cn2);
swap(dims1, dims2);
swapped12 = true;
if( oclop == OCL_OP_SUB )
oclop = OCL_OP_RSUB;
if ( oclop == OCL_OP_DIV_SCALE )
oclop = OCL_OP_RDIV_SCALE;
}
else if( !checkScalar(*psrc2, type1, kind2, kind1) )
CV_Error( CV_StsUnmatchedSizes,
"The operation is neither 'array op array' "
"(where arrays have the same size and the same number of channels), "
"nor 'array op scalar', nor 'scalar op array'" );
haveScalar = true;
CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4));
if (!muldiv)
{
Mat sc = psrc2->getMat();
depth2 = actualScalarDepth(sc.ptr<double>(), cn);
if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) )
depth2 = CV_32F;
}
else
depth2 = CV_64F;
}
if( dtype < 0 )
{
if( _dst.fixedType() )
dtype = _dst.type();
else
{
if( !haveScalar && type1 != type2 )
CV_Error(CV_StsBadArg,
"When the input arrays in add/subtract/multiply/divide functions have different types, "
"the output array type must be explicitly specified");
dtype = type1;
}
}
dtype = CV_MAT_DEPTH(dtype);
if( depth1 == depth2 && dtype == depth1 )
wtype = dtype;
else if( !muldiv )
{
wtype = depth1 <= CV_8S && depth2 <= CV_8S ? CV_16S :
depth1 <= CV_32S && depth2 <= CV_32S ? CV_32S : std::max(depth1, depth2);
wtype = std::max(wtype, dtype);
// when the result of addition should be converted to an integer type,
// and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation,
// instead of converting the other input to floating-point and then converting the operation result back to integers.
if( dtype < CV_32F && (depth1 < CV_32F || depth2 < CV_32F) )
wtype = CV_32S;
}
else
{
wtype = std::max(depth1, std::max(depth2, CV_32F));
wtype = std::max(wtype, dtype);
}
dtype = CV_MAKETYPE(dtype, cn);
wtype = CV_MAKETYPE(wtype, cn);
if( haveMask )
{
int mtype = _mask.type();
CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) );
reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype;
}
_dst.createSameSize(*psrc1, dtype);
if( reallocate )
_dst.setTo(0.);
CV_OCL_RUN(use_opencl,
ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype,
usrdata, oclop, haveScalar))
BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype);
BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype);
BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2);
size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
BinaryFunc copymask = getCopyMaskFunc(dsz);
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat();
AutoBuffer<uchar> _buf;
uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
size_t bufesz = (cvtsrc1 ? wsz : 0) +
(cvtsrc2 || haveScalar ? wsz : 0) +
(cvtdst ? wsz : 0) +
(haveMask ? dsz : 0);
BinaryFunc func = tab[CV_MAT_DEPTH(wtype)];
if( !haveScalar )
{
const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
uchar* ptrs[4];
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = total;
if( haveMask || cvtsrc1 || cvtsrc2 || cvtdst )
blocksize = std::min(blocksize, blocksize0);
_buf.allocate(bufesz*blocksize + 64);
buf = _buf;
if( cvtsrc1 )
buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
if( cvtsrc2 )
buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
wbuf = maskbuf = buf;
if( cvtdst )
buf = alignPtr(buf + blocksize*wsz, 16);
if( haveMask )
maskbuf = buf;
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
for( size_t j = 0; j < total; j += blocksize )
{
int bsz = (int)MIN(total - j, blocksize);
Size bszn(bsz*cn, 1);
const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
uchar* dptr = ptrs[2];
if( cvtsrc1 )
{
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
sptr1 = buf1;
}
if( ptrs[0] == ptrs[1] )
sptr2 = sptr1;
else if( cvtsrc2 )
{
cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
sptr2 = buf2;
}
if( !haveMask && !cvtdst )
func( sptr1, 1, sptr2, 1, dptr, 1, bszn, usrdata );
else
{
func( sptr1, 1, sptr2, 1, wbuf, 0, bszn, usrdata );
if( !haveMask )
cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
else if( !cvtdst )
{
copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[3] += bsz;
}
else
{
cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[3] += bsz;
}
}
ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
}
}
}
else
{
const Mat* arrays[] = { &src1, &dst, &mask, 0 };
uchar* ptrs[3];
NAryMatIterator it(arrays, ptrs);
size_t total = it.size, blocksize = std::min(total, blocksize0);
_buf.allocate(bufesz*blocksize + 64);
buf = _buf;
if( cvtsrc1 )
buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
wbuf = maskbuf = buf;
if( cvtdst )
buf = alignPtr(buf + blocksize*wsz, 16);
if( haveMask )
maskbuf = buf;
convertAndUnrollScalar( src2, wtype, buf2, blocksize);
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
for( size_t j = 0; j < total; j += blocksize )
{
int bsz = (int)MIN(total - j, blocksize);
Size bszn(bsz*cn, 1);
const uchar *sptr1 = ptrs[0];
const uchar* sptr2 = buf2;
uchar* dptr = ptrs[1];
if( cvtsrc1 )
{
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
sptr1 = buf1;
}
if( swapped12 )
std::swap(sptr1, sptr2);
if( !haveMask && !cvtdst )
func( sptr1, 1, sptr2, 1, dptr, 1, bszn, usrdata );
else
{
func( sptr1, 1, sptr2, 1, wbuf, 1, bszn, usrdata );
if( !haveMask )
cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
else if( !cvtdst )
{
copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[2] += bsz;
}
else
{
cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
ptrs[2] += bsz;
}
}
ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
}
}
}
}