in packages/miew/src/io/parsers/readCIF.js [23:256]
export default function readCIF(source) {
let i = 0;
let j = 0;
const n = source.length;
let code = NaN;
let newline = true;
let line = 1;
let column = 1;
let begin;
let state = 0; // 0 - start, 1 - block, 2 - item, 3 - loop, 4 - values, 5 - value
const result = {};
let block = {};
let keys = [];
let keysCount = 0;
let key = '';
let values = [];
let valuesCount = 0;
let value;
function _parseValue() {
let val;
if ((code === 46 || code === 63) && (i + 1 >= n || _isWhitespace(source.charCodeAt(i + 1)))) { // '.' or '?' .....
// it's a missing value
++column;
++i;
return undefined;
}
if (newline && code === 59) { // ';' ......................................................................
// parse multi-line string
j = i;
let lines = 0;
do {
j = _inlineIndexOf(10, source, j + 1); // '\n'
if (j === -1) {
throw new ParsingError('Unterminated text block found', line, column);
}
++lines;
} while ((j + 1 < n && source.charCodeAt(j + 1) !== code) || j + 1 >= n);
val = source.substring(i + 1, j).replace(/\r/g, '');
i = j + 2;
line += lines;
column = 1;
newline = false;
return val;
}
if (code === 39 || code === 34) { // ''' or '"' ...........................................................
// parse quoted string
j = i;
do {
j = _inlineIndexOf(code, source, j + 1);
if (j === -1) {
throw new ParsingError('Unterminated quoted string found', line, column);
}
} while (j + 1 < n && !_isWhitespace(source.charCodeAt(j + 1)));
val = source.substring(i + 1, j);
column += j - i + 1;
i = j + 1;
return val;
} // ......................................................................................................
// parse until the first whitespace
j = i;
while (j < n && !_isWhitespace(source.charCodeAt(j))) {
++j;
}
val = source.substring(i, j);
column += j - i;
i = j;
// try to convert to a number
const num = Number(val);
if (!Number.isNaN(num)) {
return num;
}
// or leave as an unquoted string
return val;
}
function _storeKey(tag) {
keys[keysCount++] = tag;
}
function _storeValue(val) {
const keyIndex = valuesCount % keysCount;
values[keyIndex].push(val);
++valuesCount;
return val;
}
while (i <= n) {
code = source.charCodeAt(i); // 'NaN' in place of '<eof>'
if (code === 13) { // '\r' .......................................................................................
// just ignore
} else if (code === 10) { // '\n' ................................................................................
// take note of new lines
newline = true;
++line;
column = 1;
} else {
// process inline characters
if (code === 32 || code === 9) { // ' ' or '\t' ................................................................
// just ignore
} else if (code === 35) { // '#' ...............................................................................
// skip the comment until before the end of the line
i = _inlineIndexOf(10, source, i + 1); // '\n'
if (i === -1) {
break;
} else {
continue; // don't forget to process the new line
}
} else if (state === 0) { // start =============================================================================
if ((code === 68 || code === 100) && source.substr(i + 1, 4).toLowerCase() === 'ata_') { // 'data_' ..........
j = i + 5;
begin = j;
while (j < n && !_isWhitespace(source.charCodeAt(j))) {
++j;
}
column += j - i;
i = j;
if (begin < i) {
// add new data block
result[source.substring(begin, i)] = block = {};
state = 1; // block
continue; // don't forget to process the whitespace
} else {
throw new ParsingError('Data block name missing', line, column);
}
} else if (Number.isNaN(code)) { // <eof> ....................................................................
break;
} else { // ..................................................................................................
throw new ParsingError(`Unexpected character in state ${state}`, line, column);
}
} else if (state === 1) { // block =============================================================================
if ((code === 68 || code === 100) && source.substr(i + 1, 4).toLowerCase() === 'ata_') { // 'data_' ..........
state = 0; // start
continue; // parse again in a different state
} else if (code === 95) { // '_' .............................................................................
j = i + 1;
begin = j;
while (j < n && !_isWhitespace(source.charCodeAt(j))) {
++j;
}
column += j - i;
i = j;
if (begin < i) {
// start new item
key = source.substring(begin, i);
state = 2; // item
continue; // don't forget to process the whitespace
} else {
throw new ParsingError('Tag name missing', line, column);
}
} else if ((code === 76 || code === 108) && source.substr(i + 1, 4).toLowerCase() === 'oop_') { // 'loop_' ...
i += 5;
column += 5;
if (i < n && !_isWhitespace(source.charCodeAt(i))) {
throw new ParsingError(`Unexpected character in state ${state}`, line, column);
} else {
// start new loop
keys = [];
keysCount = 0;
values = [];
valuesCount = 0;
state = 3; // loop
continue; // don't forget to process the whitespace
}
} else if (Number.isNaN(code)) { // <eof> ....................................................................
break;
} else { // ..................................................................................................
throw new ParsingError(`Unexpected character in state ${state}`, line, column);
}
} else if (state === 2) { // item ==============================================================================
if (Number.isNaN(code)) {
break;
}
value = _parseValue();
_.set(block, key, value);
state = 1; // block
continue;
} else if (state === 3) { // loop ==============================================================================
if (code === 95) { // '_' ....................................................................................
j = i + 1;
begin = j;
while (j < n && !_isWhitespace(source.charCodeAt(j))) {
++j;
}
column += j - i;
i = j;
if (begin < i) {
// add new key
_storeKey(source.substring(begin, i));
continue; // don't forget to process the whitespace
} else {
throw new ParsingError('Tag name missing', line, column);
}
} else { // ..................................................................................................
if (keysCount > 0) {
for (let keyIndex = 0; keyIndex < keysCount; ++keyIndex) {
value = [];
values[keyIndex] = value;
_.set(block, keys[keyIndex], value);
}
state = 4;
continue; // parse again in a different state
}
throw new ParsingError('Data tags are missing inside a loop', line, column);
}
} else if (state === 4) { // values ============================================================================
if ((code === 68 || code === 100) && source.substr(i + 1, 4).toLowerCase() === 'ata_') { // 'data_' ..........
state = 0; // start
} else if (code === 95) { // '_' .............................................................................
state = 1; // block
} else if ((code === 76 || code === 108) && source.substr(i + 1, 4).toLowerCase() === 'oop_') { // 'loop_' ...
state = 1; // block
} else if (Number.isNaN(code)) { // <eof> ....................................................................
state = 0;
} else { // ..................................................................................................
_storeValue(_parseValue());
}
continue; // parse again in a different state
} else { // ====================================================================================================
throw new ParsingError(`Unexpected internal state ${state}`, line, column);
}
newline = false;
++column;
}
++i;
}
if (state === 2) { // item
throw new ParsingError(`Unexpected end of file in state ${state}`, line, column);
}
return result;
}