in csharp/src/HufDecompress.cs [266:358]
public static size_t HUF_decompress4X2_usingDTable_internal_body(void* dst, size_t dstSize, void* cSrc, size_t cSrcSize, HUF_DTable* DTable)
{
/* Check */
if (cSrcSize < 10) return ERROR(Error.corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{
BYTE* istart = (BYTE*)cSrc;
BYTE* ostart = (BYTE*)dst;
BYTE* oend = ostart + dstSize;
void* dtPtr = DTable + 1;
HUF_DEltX2* dt = (HUF_DEltX2*)dtPtr;
/* Init */
BIT_DStream_t bitD1 = new BIT_DStream_t();
BIT_DStream_t bitD2 = new BIT_DStream_t();
BIT_DStream_t bitD3 = new BIT_DStream_t();
BIT_DStream_t bitD4 = new BIT_DStream_t();
size_t length1 = MEM_readLE16(istart);
size_t length2 = MEM_readLE16(istart + 2);
size_t length3 = MEM_readLE16(istart + 4);
size_t length4 = cSrcSize - (length1 + length2 + length3 + 6);
BYTE* istart1 = istart + 6; /* jumpTable */
BYTE* istart2 = istart1 + length1;
BYTE* istart3 = istart2 + length2;
BYTE* istart4 = istart3 + length3;
size_t segmentSize = (dstSize + 3) / 4;
BYTE* opStart2 = ostart + segmentSize;
BYTE* opStart3 = opStart2 + segmentSize;
BYTE* opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal = (U32)BIT_DStream_status.BIT_DStream_unfinished;
DTableDesc dtd = GetDTableDesc(DTable);
U32 dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(Error.corruption_detected); /* overflow */
{ size_t errcod = InitDStream(bitD1, istart1, length1); if (IsError(errcod)) return errcod; }
{ size_t errcod = InitDStream(bitD2, istart2, length2); if (IsError(errcod)) return errcod; }
{ size_t errcod = InitDStream(bitD3, istart3, length3); if (IsError(errcod)) return errcod; }
{ size_t errcod = InitDStream(bitD4, istart4, length4); if (IsError(errcod)) return errcod; }
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
endSignal = (U32)ReloadDStream(bitD1) | (U32)ReloadDStream(bitD2) | (U32)ReloadDStream(bitD3) | (U32)ReloadDStream(bitD4);
while ((endSignal == (U32)BIT_DStream_status.BIT_DStream_unfinished) && (op4 < (oend - 3)))
{
HUF_DECODE_SYMBOLX2_2(ref op1, bitD1, dt, dtLog);
HUF_DECODE_SYMBOLX2_2(ref op2, bitD2, dt, dtLog);
HUF_DECODE_SYMBOLX2_2(ref op3, bitD3, dt, dtLog);
HUF_DECODE_SYMBOLX2_2(ref op4, bitD4, dt, dtLog);
HUF_DECODE_SYMBOLX2_1(ref op1, bitD1, dt, dtLog);
HUF_DECODE_SYMBOLX2_1(ref op2, bitD2, dt, dtLog);
HUF_DECODE_SYMBOLX2_1(ref op3, bitD3, dt, dtLog);
HUF_DECODE_SYMBOLX2_1(ref op4, bitD4, dt, dtLog);
HUF_DECODE_SYMBOLX2_2(ref op1, bitD1, dt, dtLog);
HUF_DECODE_SYMBOLX2_2(ref op2, bitD2, dt, dtLog);
HUF_DECODE_SYMBOLX2_2(ref op3, bitD3, dt, dtLog);
HUF_DECODE_SYMBOLX2_2(ref op4, bitD4, dt, dtLog);
HUF_DECODE_SYMBOLX2_0(ref op1, bitD1, dt, dtLog);
HUF_DECODE_SYMBOLX2_0(ref op2, bitD2, dt, dtLog);
HUF_DECODE_SYMBOLX2_0(ref op3, bitD3, dt, dtLog);
HUF_DECODE_SYMBOLX2_0(ref op4, bitD4, dt, dtLog);
ReloadDStream(bitD1);
ReloadDStream(bitD2);
ReloadDStream(bitD3);
ReloadDStream(bitD4);
}
/* check corruption */
/* note : should not be necessary : op# advance in lock step, and we control op4.
* but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
if (op1 > opStart2) return ERROR(Error.corruption_detected);
if (op2 > opStart3) return ERROR(Error.corruption_detected);
if (op3 > opStart4) return ERROR(Error.corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX2(op1, bitD1, opStart2, dt, dtLog);
HUF_decodeStreamX2(op2, bitD2, opStart3, dt, dtLog);
HUF_decodeStreamX2(op3, bitD3, opStart4, dt, dtLog);
HUF_decodeStreamX2(op4, bitD4, oend, dt, dtLog);
/* check */
{
U32 endCheck = EndOfDStream(bitD1) & EndOfDStream(bitD2) & EndOfDStream(bitD3) & EndOfDStream(bitD4);
if (endCheck == 0) return ERROR(Error.corruption_detected);
}
/* decoded size */
return dstSize;
}
}