packages/ketcher-core/src/domain/serializers/smi/smiles.js (604 lines of code) (raw):
/****************************************************************************
* Copyright 2021 EPAM Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
import { Atom, Bond, Pile, SGroup } from 'domain/entities';
import { KetcherLogger } from 'utilities';
import CisTrans from './cis_trans';
import Dfs from './dfs';
import Stereocenters from './stereocenters';
export function Smiles() {
this.smiles = '';
this.writtenAtoms = [];
this.writtenComponents = 0;
this.ignoreErrors = false;
}
Smiles._Atom = function (hCount) {
// eslint-disable-line no-underscore-dangle
this.neighbours = []; // Array of integer pairs {a, b}
this.aromatic = false; // has aromatic bond
this.lowercase = false; // aromatic and has to be written lowercase
this.chirality = 0; // 0 means no chirality, 1 means CCW pyramid, 2 means CW pyramid
this.branch_cnt = 0; // runs from 0 to (branches - 1)
this.paren_written = false;
this.h_count = hCount;
this.parent = -1;
};
// NB: only loops of length up to 6 are included here
Smiles.prototype.isBondInRing = function (bid) {
return this.inLoop[bid];
};
Smiles.prototype.saveMolecule = function (struct, ignoreErrors) {
// eslint-disable-line max-statements
let i, j, k;
if (!ignoreErrors) this.ignoreErrors = ignoreErrors;
// [RB]: KETCHER-498 (Incorrect smile-string for multiple Sgroup)
// TODO the fix is temporary, still need to implement error handling/reporting
// BEGIN
struct = struct.clone(
undefined,
undefined,
!struct.hasRxnArrow(), // make it drop multiple reactions
undefined,
undefined,
undefined,
);
struct.initHalfBonds();
struct.initNeighbors();
struct.sortNeighbors();
struct.setImplicitHydrogen();
struct.sgroups.forEach((sg) => {
if (sg.type === 'MUL') {
try {
SGroup.prepareMulForSaving(sg, struct);
} catch (error) {
KetcherLogger.error('smiles.js::Smiles.prototype.saveMolecule', error);
throw Error('Bad s-group (' + error.message + ')');
}
}
// 'SMILES data format doesn\'t support s-groups'
});
// END
this.atoms = new Array(struct.atoms.size);
struct.atoms.forEach((atom, aid) => {
this.atoms[aid] = new Smiles._Atom(atom.implicitH); // eslint-disable-line no-underscore-dangle
});
// From the SMILES specification:
// Please note that only atoms on the following list
// can be considered aromatic: C, N, O, P, S, As, Se, and * (wildcard).
const allowedLowercase = ['B', 'C', 'N', 'O', 'P', 'S', 'Se', 'As'];
// Detect atoms that have aromatic bonds and count neighbours
struct.bonds.forEach((bond, bid) => {
if (bond.type === Bond.PATTERN.TYPE.AROMATIC) {
this.atoms[bond.begin].aromatic = true;
if (allowedLowercase.indexOf(struct.atoms.get(bond.begin).label) !== -1) {
this.atoms[bond.begin].lowercase = true;
}
this.atoms[bond.end].aromatic = true;
if (allowedLowercase.indexOf(struct.atoms.get(bond.end).label) !== -1) {
this.atoms[bond.end].lowercase = true;
}
}
this.atoms[bond.begin].neighbours.push({ aid: bond.end, bid });
this.atoms[bond.end].neighbours.push({ aid: bond.begin, bid });
});
this.inLoop = (function () {
struct.prepareLoopStructure();
let bondsInLoops = new Pile();
struct.loops.forEach((loop) => {
if (loop.hbs.length <= 6) {
const hbids = loop.hbs.map((hbid) => struct.halfBonds.get(hbid).bid);
bondsInLoops = bondsInLoops.union(new Pile(hbids));
}
});
const inLoop = {};
bondsInLoops.forEach((bid) => {
inLoop[bid] = 1;
});
return inLoop;
})();
this.touchedCistransbonds = 0;
this.markCisTrans(struct);
const components = struct.getComponents();
const componentsAll = components.reactants.concat(components.products);
const walk = new Dfs(
struct,
this.atoms,
componentsAll,
components.reactants.length,
);
walk.walk();
this.atoms.forEach((atom) => {
atom.neighbours = [];
});
// fill up neighbor lists for the stereocenters calculation
for (i = 0; i < walk.v_seq.length; i++) {
var seqEl = walk.v_seq[i];
var vIdx = seqEl.idx;
var eIdx = seqEl.parent_edge;
var vPrevIdx = seqEl.parent_vertex;
if (eIdx >= 0) {
const atom = this.atoms[vIdx];
var openingCycles = walk.numOpeningCycles(eIdx);
for (j = 0; j < openingCycles; j++) {
this.atoms[vPrevIdx].neighbours.push({ aid: -1, bid: -1 });
}
if (walk.edgeClosingCycle(eIdx)) {
for (k = 0; k < atom.neighbours.length; k++) {
if (atom.neighbours[k].aid === -1) {
// eslint-disable-line max-depth
atom.neighbours[k].aid = vPrevIdx;
atom.neighbours[k].bid = eIdx;
break;
}
}
if (k === atom.neighbours.length) {
throw new Error('internal: can not put closing bond to its place');
}
} else {
atom.neighbours.push({ aid: vPrevIdx, bid: eIdx });
atom.parent = vPrevIdx;
}
this.atoms[vPrevIdx].neighbours.push({ aid: vIdx, bid: eIdx });
}
}
try {
// detect chiral configurations
const stereocenters = new Stereocenters(
struct,
function (idx) {
return this.atoms[idx].neighbours;
},
this,
);
stereocenters.buildFromBonds(this.ignoreErrors);
stereocenters.each((sc, atomIdx) => {
// eslint-disable-line max-statements
// if (sc.type < MoleculeStereocenters::ATOM_AND)
// continue;
let implicitHIdx = -1;
if (sc.pyramid[3] === -1) implicitHIdx = 3;
/*
else for (j = 0; j < 4; j++)
if (ignored_vertices[pyramid[j]])
{
implicit_h_idx = j;
break;
}
*/
const pyramidMapping = [];
let counter = 0;
const atom = this.atoms[atomIdx];
if (atom.parent !== -1) {
for (k = 0; k < 4; k++) {
if (sc.pyramid[k] === atom.parent) {
pyramidMapping[counter++] = k;
break;
}
}
}
if (implicitHIdx !== -1) pyramidMapping[counter++] = implicitHIdx;
for (j = 0; j !== atom.neighbours.length; j++) {
if (atom.neighbours[j].aid === atom.parent) continue; // eslint-disable-line no-continue
for (k = 0; k < 4; k++) {
if (atom.neighbours[j].aid === sc.pyramid[k]) {
if (counter >= 4) throw new Error('internal: pyramid overflow');
pyramidMapping[counter++] = k;
break;
}
}
}
if (counter === 4) {
// move the 'from' atom to the end
counter = pyramidMapping[0];
pyramidMapping[0] = pyramidMapping[1];
pyramidMapping[1] = pyramidMapping[2];
pyramidMapping[2] = pyramidMapping[3];
pyramidMapping[3] = counter;
} else if (counter !== 3) {
throw new Error('cannot calculate chirality');
}
if (Stereocenters.isPyramidMappingRigid(pyramidMapping)) {
this.atoms[atomIdx].chirality = 1;
} else this.atoms[atomIdx].chirality = 2;
});
} catch (e) {
KetcherLogger.error('smiles.js::Smiles.prototype.saveMolecule', e);
// TODO: add error handler call
}
// write the SMILES itself
// cycle_numbers[i] == -1 means that the number is available
// cycle_numbers[i] == n means that the number is used by vertex n
const cycleNumbers = [];
cycleNumbers.push(0); // never used
let firstComponent = true;
for (i = 0; i < walk.v_seq.length; i++) {
seqEl = walk.v_seq[i];
vIdx = seqEl.idx;
eIdx = seqEl.parent_edge;
vPrevIdx = seqEl.parent_vertex;
let writeAtom = true;
if (vPrevIdx >= 0) {
if (walk.numBranches(vPrevIdx) > 1) {
if (
this.atoms[vPrevIdx].branch_cnt > 0 &&
this.atoms[vPrevIdx].paren_written
) {
this.smiles += ')';
}
}
openingCycles = walk.numOpeningCycles(eIdx);
for (j = 0; j < openingCycles; j++) {
for (k = 1; k < cycleNumbers.length; k++) {
if (cycleNumbers[k] === -1) {
// eslint-disable-line max-depth
break;
}
}
if (k === cycleNumbers.length) cycleNumbers.push(vPrevIdx);
else cycleNumbers[k] = vPrevIdx;
this.writeCycleNumber(k);
}
if (vPrevIdx >= 0) {
const branches = walk.numBranches(vPrevIdx);
if (branches > 1 && this.atoms[vPrevIdx].branch_cnt < branches - 1) {
if (walk.edgeClosingCycle(eIdx)) {
// eslint-disable-line max-depth
this.atoms[vPrevIdx].paren_written = false;
} else {
this.smiles += '(';
this.atoms[vPrevIdx].paren_written = true;
}
}
this.atoms[vPrevIdx].branch_cnt++;
if (this.atoms[vPrevIdx].branch_cnt > branches) {
throw new Error('unexpected branch');
}
}
const bond = struct.bonds.get(eIdx);
let dir = 0;
if (bond.type === Bond.PATTERN.TYPE.SINGLE) {
dir = this.calcBondDirection(struct, eIdx, vPrevIdx);
}
if (
(dir === 1 && vIdx === bond.end) ||
(dir === 2 && vIdx === bond.begin)
) {
this.smiles += '/';
} else if (
(dir === 2 && vIdx === bond.end) ||
(dir === 1 && vIdx === bond.begin)
) {
this.smiles += '\\';
} else if (bond.type === Bond.PATTERN.TYPE.ANY) {
this.smiles += '~';
} else if (bond.type === Bond.PATTERN.TYPE.DOUBLE) {
this.smiles += '=';
} else if (bond.type === Bond.PATTERN.TYPE.TRIPLE) {
this.smiles += '#';
} else if (bond.type === Bond.PATTERN.TYPE.SINGLE_OR_AROMATIC) {
this.smiles += '-,:';
} else if (bond.type === Bond.PATTERN.TYPE.DOUBLE_OR_AROMATIC) {
this.smiles += '=,:';
} else if (bond.type === Bond.PATTERN.TYPE.SINGLE_OR_DOUBLE) {
this.smiles += '-,=';
} else if (
bond.type === Bond.PATTERN.TYPE.AROMATIC &&
(!this.atoms[bond.begin].lowercase ||
!this.atoms[bond.end].lowercase ||
!this.isBondInRing(eIdx))
) {
this.smiles += ':';
}
// TODO: Check if this : is needed
else if (
bond.type === Bond.PATTERN.TYPE.SINGLE &&
this.atoms[bond.begin].aromatic &&
this.atoms[bond.end].aromatic
) {
this.smiles += '-';
}
if (walk.edgeClosingCycle(eIdx)) {
for (j = 1; j < cycleNumbers.length; j++) {
if (cycleNumbers[j] === vIdx) break;
}
if (j === cycleNumbers.length)
throw new Error('cycle number not found');
this.writeCycleNumber(j);
cycleNumbers[j] = -1;
writeAtom = false;
}
} else {
if (!firstComponent) {
this.smiles +=
this.writtenComponents === walk.nComponentsInReactants &&
walk.nReactants !== 0
? '>>'
: '.'; // when walk.nReactants === 0 - not reaction
}
firstComponent = false;
this.writtenComponents++;
}
if (writeAtom) {
this.writeAtom(
struct,
vIdx,
this.atoms[vIdx].aromatic,
this.atoms[vIdx].lowercase,
this.atoms[vIdx].chirality,
);
this.writtenAtoms.push(seqEl.idx);
}
}
this.comma = false;
// this._writeStereogroups(mol, atoms);
this.writeRadicals(struct);
// this._writePseudoAtoms(mol);
// this._writeHighlighting();
if (this.comma) this.smiles += '|';
return this.smiles;
};
Smiles.prototype.writeCycleNumber = function (n) {
if (n > 0 && n < 10) this.smiles += n;
else if (n >= 10 && n < 100) this.smiles += '%' + n;
else if (n >= 100 && n < 1000) this.smiles += '%%' + n;
else throw new Error('bad cycle number: ' + n);
};
Smiles.prototype.writeAtom = function (
mol,
idx,
aromatic,
lowercase,
chirality,
) {
// eslint-disable-line max-params, max-statements
const atom = mol.atoms.get(idx);
let needBrackets = false;
let hydro = -1;
let aam = 0;
/*
if (mol.haveQueryAtoms())
{
query_atom = &mol.getQueryAtom(idx);
if (query_atom->type == QUERY_ATOM_RGROUP)
{
if (mol.getRGroups()->isRGroupAtom(idx))
{
const Array<int> &rg = mol.getRGroups()->getSiteRGroups(idx);
if (rg.length != 1)
throw Error("rgroup count %d", rg.length);
_output.printf("[&%d]", rg[0] + 1);
}
else
_output.printf("[&%d]", 1);
return;
}
}
*/
if (atom.label === 'A') {
this.smiles += '*';
return;
}
if (atom.label === 'R' || atom.label === 'R#') {
this.smiles += '[*]';
return;
}
// KETCHER-598 (Ketcher does not save AAM into reaction SMILES)
// BEGIN
// if (this.atom_atom_mapping)
// aam = atom_atom_mapping[idx];
aam = atom.aam;
// END
if (
atom.label !== 'C' &&
atom.label !== 'P' &&
atom.label !== 'N' &&
atom.label !== 'S' &&
atom.label !== 'O' &&
atom.label !== 'Cl' &&
atom.label !== 'F' &&
atom.label !== 'Br' &&
atom.label !== 'B' &&
atom.label !== 'I'
) {
needBrackets = true;
}
if (
atom.explicitValence >= 0 ||
atom.radical !== 0 ||
chirality > 0 ||
(aromatic && atom.label !== 'C' && atom.label !== 'O') ||
(aromatic &&
atom.label === 'C' &&
this.atoms[idx].neighbours.length < 3 &&
this.atoms[idx].h_count === 0)
) {
hydro = this.atoms[idx].h_count;
}
let label = atom.label;
if (atom.atomList && !atom.atomList.notList) {
label = atom.atomList.label();
needBrackets = false; // atom list label already has brackets
} else if (atom.isPseudo() || (atom.atomList && atom.atomList.notList)) {
label = '*';
needBrackets = false;
} else if (
chirality ||
(atom.charge !== 0 && atom.charge !== null) ||
atom.isotope > 0 ||
hydro >= 0 ||
aam > 0
) {
needBrackets = true;
}
if (needBrackets) {
if (hydro === -1) hydro = this.atoms[idx].h_count;
this.smiles += '[';
}
if (atom.isotope > 0) this.smiles += atom.isotope;
if (lowercase) this.smiles += label.toLowerCase();
else this.smiles += label;
if (chirality > 0) {
if (chirality === 1) this.smiles += '@';
// chirality == 2
else this.smiles += '@@';
if (atom.implicitH > 1) {
throw new Error(atom.implicitH + ' implicit H near stereocenter');
}
}
if (atom.label !== 'H') {
if (hydro > 1 || (hydro === 0 && !needBrackets)) this.smiles += 'H' + hydro;
else if (hydro === 1) this.smiles += 'H';
}
if (atom.charge > 1) this.smiles += '+' + atom.charge;
else if (atom.charge < -1) this.smiles += atom.charge;
else if (atom.charge === 1) this.smiles += '+';
else if (atom.charge === -1) this.smiles += '-';
if (aam > 0) this.smiles += ':' + aam;
if (needBrackets) this.smiles += ']';
/*
if (mol.getRGroupFragment() != 0)
{
for (i = 0; i < 2; i++)
{
int j;
for (j = 0; mol.getRGroupFragment()->getAttachmentPoint(i, j) != -1; j++)
if (idx == mol.getRGroupFragment()->getAttachmentPoint(i, j))
{
_output.printf("([*])");
break;
}
if (mol.getRGroupFragment()->getAttachmentPoint(i, j) != -1)
break;
}
}
*/
};
Smiles.prototype.markCisTrans = function (mol) {
this.cis_trans = new CisTrans(
mol,
function (idx) {
return this.atoms[idx].neighbours;
},
this,
);
this.cis_trans.build();
this.dbonds = new Array(mol.bonds.size);
mol.bonds.forEach((bond, bid) => {
this.dbonds[bid] = {
ctbond_beg: -1,
ctbond_end: -1,
saved: 0,
};
});
this.cis_trans.each((ct, bid) => {
const bond = mol.bonds.get(bid);
if (ct.parity !== 0 && !this.isBondInRing(bid)) {
const neiBeg = this.atoms[bond.begin].neighbours;
const neiEnd = this.atoms[bond.end].neighbours;
let aromFailBeg = true;
let aromFailEnd = true;
neiBeg.forEach((nei) => {
if (
nei.bid !== bid &&
mol.bonds.get(nei.bid).type === Bond.PATTERN.TYPE.SINGLE
) {
aromFailBeg = false;
}
});
neiEnd.forEach((nei) => {
if (
nei.bid !== bid &&
mol.bonds.get(nei.bid).type === Bond.PATTERN.TYPE.SINGLE
) {
aromFailEnd = false;
}
});
if (aromFailBeg || aromFailEnd) return;
neiBeg.forEach((nei) => {
if (nei.bid === bid) return;
if (mol.bonds.get(nei.bid).begin === bond.begin) {
this.dbonds[nei.bid].ctbond_beg = bid;
} else this.dbonds[nei.bid].ctbond_end = bid;
});
neiEnd.forEach((nei) => {
if (nei.bid === bid) return;
if (mol.bonds.get(nei.bid).begin === bond.end) {
this.dbonds[nei.bid].ctbond_beg = bid;
} else this.dbonds[nei.bid].ctbond_end = bid;
});
}
});
};
Smiles.prototype.updateSideBonds = function (mol, bondIdx) {
// eslint-disable-line max-statements
const bond = mol.bonds.get(bondIdx);
const subst = this.cis_trans.getSubstituents(bondIdx);
const parity = this.cis_trans.getParity(bondIdx);
const sidebonds = [-1, -1, -1, -1];
sidebonds[0] = mol.findBondId(subst[0], bond.begin);
if (subst[1] !== -1) sidebonds[1] = mol.findBondId(subst[1], bond.begin);
sidebonds[2] = mol.findBondId(subst[2], bond.end);
if (subst[3] !== -1) sidebonds[3] = mol.findBondId(subst[3], bond.end);
let n1 = 0;
let n2 = 0;
let n3 = 0;
let n4 = 0;
if (this.dbonds[sidebonds[0]].saved !== 0) {
if (
(this.dbonds[sidebonds[0]].saved === 1 &&
mol.bonds.get(sidebonds[0]).begin === bond.begin) ||
(this.dbonds[sidebonds[0]].saved === 2 &&
mol.bonds.get(sidebonds[0]).end === bond.begin)
) {
n1++;
} else n2++;
}
if (sidebonds[1] !== -1 && this.dbonds[sidebonds[1]].saved !== 0) {
if (
(this.dbonds[sidebonds[1]].saved === 2 &&
mol.bonds.get(sidebonds[1]).begin === bond.begin) ||
(this.dbonds[sidebonds[1]].saved === 1 &&
mol.bonds.get(sidebonds[1]).end === bond.begin)
) {
n1++;
} else n2++;
}
if (this.dbonds[sidebonds[2]].saved !== 0) {
if (
(this.dbonds[sidebonds[2]].saved === 1 &&
mol.bonds.get(sidebonds[2]).begin === bond.end) ||
(this.dbonds[sidebonds[2]].saved === 2 &&
mol.bonds.get(sidebonds[2]).end === bond.end)
) {
n3++;
} else n4++;
}
if (sidebonds[3] !== -1 && this.dbonds[sidebonds[3]].saved !== 0) {
if (
(this.dbonds[sidebonds[3]].saved === 2 &&
mol.bonds.get(sidebonds[3]).begin === bond.end) ||
(this.dbonds[sidebonds[3]].saved === 1 &&
mol.bonds.get(sidebonds[3]).end === bond.end)
) {
n3++;
} else n4++;
}
if (parity === CisTrans.PARITY.CIS) {
n1 += n3;
n2 += n4;
} else {
n1 += n4;
n2 += n3;
}
if (n1 > 0 && n2 > 0) throw new Error('incompatible cis-trans configuration');
if (n1 === 0 && n2 === 0) return false;
if (n1 > 0) {
this.dbonds[sidebonds[0]].saved =
mol.bonds.get(sidebonds[0]).begin === bond.begin ? 1 : 2;
if (sidebonds[1] !== -1) {
this.dbonds[sidebonds[1]].saved =
mol.bonds.get(sidebonds[1]).begin === bond.begin ? 2 : 1;
}
this.dbonds[sidebonds[2]].saved =
(mol.bonds.get(sidebonds[2]).begin === bond.end) ===
(parity === CisTrans.PARITY.CIS)
? 1
: 2;
if (sidebonds[3] !== -1) {
this.dbonds[sidebonds[3]].saved =
(mol.bonds.get(sidebonds[3]).begin === bond.end) ===
(parity === CisTrans.PARITY.CIS)
? 2
: 1;
}
}
if (n2 > 0) {
this.dbonds[sidebonds[0]].saved =
mol.bonds.get(sidebonds[0]).begin === bond.begin ? 2 : 1;
if (sidebonds[1] !== -1) {
this.dbonds[sidebonds[1]].saved =
mol.bonds.get(sidebonds[1]).begin === bond.begin ? 1 : 2;
}
this.dbonds[sidebonds[2]].saved =
(mol.bonds.get(sidebonds[2]).begin === bond.end) ===
(parity === CisTrans.PARITY.CIS)
? 2
: 1;
if (sidebonds[3] !== -1) {
this.dbonds[sidebonds[3]].saved =
(mol.bonds.get(sidebonds[3]).begin === bond.end) ===
(parity === CisTrans.PARITY.CIS)
? 1
: 2;
}
}
return true;
};
Smiles.prototype.calcBondDirection = function (mol, idx, vprev) {
let ntouched;
if (
this.dbonds[idx].ctbond_beg === -1 &&
this.dbonds[idx].ctbond_end === -1
) {
return 0;
}
if (mol.bonds.get(idx).type !== Bond.PATTERN.TYPE.SINGLE) {
throw new Error('internal: directed bond type ' + mol.bonds.get(idx).type);
}
while (true) {
// eslint-disable-line no-constant-condition
ntouched = 0;
this.cis_trans.each((ct, bid) => {
if (ct.parity !== 0 && !this.isBondInRing(bid)) {
if (this.updateSideBonds(mol, bid)) ntouched++;
}
});
if (ntouched === this.touchedCistransbonds) break;
this.touchedCistransbonds = ntouched;
}
if (this.dbonds[idx].saved === 0) {
if (vprev === mol.bonds.get(idx).begin) this.dbonds[idx].saved = 1;
else this.dbonds[idx].saved = 2;
}
return this.dbonds[idx].saved;
};
Smiles.prototype.writeRadicals = function (mol) {
// eslint-disable-line max-statements
const marked = new Array(this.writtenAtoms.length);
let i, j;
for (i = 0; i < this.writtenAtoms.length; i++) {
if (marked[i]) continue; // eslint-disable-line no-continue
const radical = mol.atoms.get(this.writtenAtoms[i]).radical;
if (radical === 0) continue; // eslint-disable-line no-continue
if (this.comma) {
this.smiles += ',';
} else {
this.smiles += ' |';
this.comma = true;
}
if (radical === Atom.PATTERN.RADICAL.SINGLET) this.smiles += '^3:';
else if (radical === Atom.PATTERN.RADICAL.DOUPLET) this.smiles += '^1:';
// RADICAL_TRIPLET
else this.smiles += '^4:';
this.smiles += i;
for (j = i + 1; j < this.writtenAtoms.length; j++) {
if (mol.atoms.get(this.writtenAtoms[j]).radical === radical) {
marked[j] = true;
this.smiles += ',' + j;
}
}
}
};