in src/dxapi/native/tickdb/data_writer.cpp [73:144]
void DataWriter::writeUTF8(const wchar_t * data, uintptr length)
{
assert(length == 0 || NULL != data);
if (length >= 0xFFFF) {
THROW_DBGLOG("putUTF8(wchar_t *): UTF8 string length is too big: %llu", (ulonglong)length);
}
byte * header = dataPtr;
putUInt16(0);
if (0 == length) {
setNotNull();
return;
}
data += length;
intptr i = -(intptr)length;
unsigned c, cc;
// TODO: Possible perf. improvement: estimate max. length and preallocate bytes
do {
c = data[i];
if (c < 0x80) {
putByte((byte)c);
continue;
}
if (c < 0x800) {
putByte((byte)(0xC0 | ((c >> 6) & 0x1F)));
putByte((byte)(0x80 | (c & 0x3F)));
continue;
}
if (0xD800 != (c & 0xF800)) {
putByte((byte)(0xE0 | ((c >> 12) & 0x0F)));
putByte((byte)(0x80 | ((c >> 6) & 0x3F)));
putByte((byte)(0x80 | (c & 0x3F)));
continue;
}
if (0x400 & c) {
THROW_DBGLOG("putUTF8(wchar *): UTF-16 High surrogate pair encountered before low");
}
if (0 == ++i) {
THROW_DBGLOG("putUTF8(wchar *): String ends on UTF-16 High surrogate");
}
cc = data[i];
c = (c << 10) + (0x10000U - (0xD800 << 10) - 0xDC00);
if (0xDC00 != (cc & 0xFC00)) {
THROW_DBGLOG("putUTF8(wchar *): UTF-16 High surrogate is not followed by Low surrogate");
}
c += cc;
// Encode 20-bit codepoint (which is guaranteed to be > 0xFFFF)
putByte((byte)(0xF0 | ((c >> 18) & 0x07)));
putByte((byte)(0x80 | ((c >> 12) & 0x3F)));
putByte((byte)(0x80 | ((c >> 6) & 0x3F)));
putByte((byte)(0x80 | (c & 0x3F)));
} while (0 != ++i);
length = (dataPtr - header - 2);
if (length >= 0xFFFF) {
THROW_DBGLOG("putUTF8(wchar *): after conversion to UTF8, length >= 0xFFFF: %llu", (ulonglong)length);
}
_storeBE<uint16>(header, (uint16)length);
setNotNull();
}