in ch-commons-charset/src/main/java/com/cloudhopper/commons/charset/ModifiedUTF8Charset.java [276:339]
static public int decodeToCharArray(byte[] byteBuffer, int byteOffset, int byteLength, char[] charBuffer, int charOffset) {
int c = 0, char2 = 0, char3 = 0;
int bytePos = byteOffset;
int byteAbsLength = byteOffset + byteLength;
int charPos = charOffset;
// optimization - do simple conversion of ascii-only chars
while (bytePos < byteAbsLength) {
c = (int) byteBuffer[bytePos] & 0xff;
if (c > 127)
break;
bytePos++;
charBuffer[charPos++] = (char)c;
}
while (bytePos < byteAbsLength) {
c = (int) byteBuffer[bytePos] & 0xff;
switch (c >> 4) {
// cases 0000 thru 0111
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
// 0xxxxxxx
bytePos++;
charBuffer[charPos++] = (char)c;
break;
// why not case 8, 9, 10, or 11? (are those invalid UTF-8 sequences?
case 12:
case 13:
// 110x xxxx then 10xx xxxx
bytePos += 2;
if (bytePos > byteAbsLength)
throw new IllegalArgumentException("malformed input: partial character at end");
char2 = (int) byteBuffer[bytePos - 1];
if ((char2 & 0xC0) != 0x80)
throw new IllegalArgumentException("malformed input around byte " + bytePos);
charBuffer[charPos++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
// 1110 xxxx then 10xx xxxx then 10xx xxxx
bytePos += 3;
if (bytePos > byteAbsLength)
throw new IllegalArgumentException("malformed input: partial character at end");
char2 = (int) byteBuffer[bytePos - 2];
char3 = (int) byteBuffer[bytePos - 1];
if (((char2 & 0xC0) != 0x80)
|| ((char3 & 0xC0) != 0x80))
throw new IllegalArgumentException("malformed input around byte " + (bytePos - 1));
charBuffer[charPos++] = (char) (((c & 0x0F) << 12)
| ((char2 & 0x3F) << 6) | (char3 & 0x3F));
break;
default:
// 10xx xxxx, 1111 xxxx
throw new IllegalArgumentException("malformed input around byte " + bytePos);
}
}
return (charPos - charOffset);
}