imagecore/image/kernel.cpp

/* * MIT License * * Copyright (c) 2017 Twitter * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "imagecore/imagecore.h" #include "imagecore/utils/mathutils.h" #include "imagecore/utils/securemath.h" #include "imagecore/image/kernel.h" // From GPU Gems 3. float mitchell_netravali(float x, float B, float C) { float ax = fabs(x); if (ax < 1) { return ((12 - 9 * B - 6 * C) * ax * ax * ax + (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6; } else if ((ax >= 1) && (ax < 2)) { return ((-B - 6 * C) * ax * ax * ax + (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) * ax + (8 * B + 24 * C)) / 6; } else { return 0; } } // From Numerical Recipes in C: The Art of Scientific Computing // Zeroth order modified bessel function of the first kind. float bessel_i0(float x) { float ax,ans; double y; if ((ax=fabs(x)) < 3.75) { y=x/3.75; y*=y; ans=1.0+y*(3.5156229+y*(3.0899424+y*(1.2067492 +y*(0.2659732+y*(0.360768e-1+y*0.45813e-2))))); } else { y=3.75/ax; ans=(exp(ax)/sqrt(ax))*(0.39894228+y*(0.1328592e-1 +y*(0.225319e-2+y*(-0.157565e-2+y*(0.916281e-2 +y*(-0.2057706e-1+y*(0.2635537e-1+y*(-0.1647633e-1 +y*0.392377e-2)))))))); } return ans; } const float KaiserAlpha = 7.0f; const float KaiserInvI0Alpha = 1.0f / bessel_i0(KaiserAlpha); float kaiser(const float x, const float w) { double xSq = x * x; if( xSq < 1.0 ) { return KaiserInvI0Alpha * bessel_i0(KaiserAlpha * sqrt(1.0 - xSq)); } return 0.0f; } float sinc(const float x) { if( x == 0.0f ) { return 1.0f; } double xpi = x * M_PI; // Implementations of sinf() can vary by several ULP, enough to end up giving different // 16.16 fixed point kernel coefficients. Use sin() instead, which should be more consistent // across systems throughout the range of precision we require. return (float)(sin(xpi) / xpi); } float lanczos(const float x, const float invSize) { return sinc(x) * sinc(x * invSize); } #define SHARPEN_COS 0 #define SHARPEN_X4 0 #define SHARPEN_MUL 1 float lanczos_sharpen(const float x, float weight) { #if SHARPEN_COS weight *= (1.4f+(cosf(1.4f*x*x+M_PI)*0.4f)); #elif SHARPEN_X4 weight = sinc(x) * sinc(powf(x/3.0f, 4.0f)); #elif SHARPEN_MUL // Not continuous. if (weight < 0) { weight *= 1.30f; } #endif return weight; } float lanczos_sharper(const float x, const float invSize) { float weight = lanczos(x, invSize); return lanczos_sharpen(x, weight); } float lanczos_fixed(const float x, const float invSize) { double px = fmax(M_PI * x, 0.000001); float f = sin(px) * sin(px * 0.5) / (px * px); return f; } float lanczos_fixed_sharper(const float x, const float invSize) { float weight = lanczos_fixed(x, invSize); return lanczos_sharpen(x, weight); } float mitchell(const float x, const float invSize) { return mitchell_netravali(x, 0.33333f, 0.33333f); } typedef float (*FilterFunction)(const float, const float); FilterFunction s_FilterFunctionsAdaptive[kFilterType_MAX] = { lanczos, lanczos_sharper, mitchell, kaiser }; FilterFunction s_FilterFunctionsFixed[kFilterType_MAX] = { lanczos_fixed, lanczos_fixed_sharper, mitchell, kaiser }; FilterKernel::FilterKernel() { m_InSampleOffset = 0; m_OutSampleOffset = 0; m_SampleRatio = 1.0f; m_Table = NULL; m_TableBilinear = NULL; m_TableFixedPoint = NULL; m_TableFixedPoint4 = NULL; m_TableSize = 0; m_KernelSize = 0; m_MaxSamples = 0; } void FilterKernel::generateFixedPoint(EFilterType type) { unsigned int tableSize = SafeUMul(m_TableSize, m_KernelSize); if(type == kFilterType_Linear) { m_TableBilinear = new uint8_t[tableSize]; // Convert to fixed point. for( unsigned int i = 0; i < tableSize; i++ ) { m_TableBilinear[i] = (int)(m_Table[i] * 255.0f); } } else { m_TableFixedPoint = new int32_t[tableSize]; m_TableFixedPoint4 = new int32_t[SafeUMul(tableSize, 4U)]; // Convert to fixed point. // This is duplicated 4 times to save a few SSE instructions. for( unsigned int i = 0; i < tableSize; i++ ) { int value = (int)(m_Table[i] * 65536.0f); m_TableFixedPoint[i] = value; m_TableFixedPoint4[i * 4 + 0] = value; m_TableFixedPoint4[i * 4 + 1] = value; m_TableFixedPoint4[i * 4 + 2] = value; m_TableFixedPoint4[i * 4 + 3] = value; } } } void FilterKernel::setSampleOffset(unsigned int inSampleOffset, unsigned int outSampleOffset) { SECURE_ASSERT(m_OutSampleOffset < m_TableSize); m_InSampleOffset = inSampleOffset; m_OutSampleOffset = outSampleOffset; } FilterKernel::~FilterKernel() { delete[] m_Table; delete[] m_TableBilinear; delete[] m_TableFixedPoint; delete[] m_TableFixedPoint4; } // Uses a dynamic number of taps per sample, based on on the filter window size and the scaling factor. FilterKernelAdaptive::FilterKernelAdaptive(EFilterType type, unsigned int kernelSize, unsigned int inSize, unsigned int outSize) : FilterKernel() { float ratio = (float)outSize / (float)inSize; float invRatio = (float)inSize / (float)outSize; m_SampleRatio = invRatio; unsigned int tableAllocSize = SafeUMul(outSize, kernelSize); m_Table = new float[tableAllocSize]; m_TableSize = outSize; m_KernelSize = kernelSize; float maxWindowSize = (kernelSize * 0.5) - 0.00001f; float windowWidth = (fmin(maxWindowSize, kernelSize * 0.25f * invRatio)); float invFilterScale = 1.0f / (kernelSize * 0.25f); m_WindowWidth = windowWidth; FilterFunction filterFunction = s_FilterFunctionsAdaptive[type]; int maxSamples = 0; if (type == kFilterType_Linear) { for( unsigned int i = 0; i < outSize; i++ ) { float sample = (i + 0.5f) * invRatio; float w0 = sample - floorf(sample); m_Table[i * kernelSize + 0] = 1.0f - w0; m_Table[i * kernelSize + 1] = w0; } m_WindowWidth = 0.5f; maxSamples = 2; } else { for( unsigned int i = 0; i < outSize; i++ ) { float sample = (i + 0.5f) * invRatio - 0.00001f; int start = sample - windowWidth + 0.5f; int end = sample + windowWidth + 0.5f; int numSamples = end - start; maxSamples = max(maxSamples, numSamples); float sum = 0.0f; for( int k = 0; k < numSamples; k++ ) { float samplePos = ratio * ((float)(start + k) - sample + 0.5f); float weight = filterFunction(samplePos, invFilterScale); m_Table[i * kernelSize + k] = weight; sum += weight; } for( unsigned int k = numSamples; k < kernelSize; k++ ) { m_Table[i * kernelSize + k] = 0.0f; } float invSum = 1.0f / sum; for (int k = 0; k < numSamples; k++) { m_Table[i * kernelSize + k] *= invSum; } } } m_MaxSamples = maxSamples; generateFixedPoint(type); } // Always takes 4 fixed samples, regardless of the scaling factor. FilterKernelFixed::FilterKernelFixed(EFilterType type, unsigned int inSize, unsigned int outSize) : FilterKernel() { m_Table = new float[outSize * 4]; m_TableSize = outSize; m_KernelSize = 4; m_WindowWidth = 4; FilterFunction filterFunction = s_FilterFunctionsFixed[type]; m_SampleRatio = (float)inSize / (float)outSize; for( unsigned int i = 0; i < outSize; i++ ) { float inP = fmaxf(0.0f, ((float)(i + 0.5f) * m_SampleRatio) - 0.5f); float frP = inP - floorf(inP); float a = filterFunction(frP + 1.0f, 4); float b = filterFunction(frP, 4); float c = filterFunction(1.0f - frP, 4); float d = filterFunction(2.0f - frP, 4); float s = 1.0f / (a + b + c + d); m_Table[i * 4 + 0] = a * s; m_Table[i * 4 + 1] = b * s; m_Table[i * 4 + 2] = c * s; m_Table[i * 4 + 3] = d * s; } generateFixedPoint(type); }

imagecore/image/kernel.cpp (216 lines of code) (raw):