in core/indigo-core/molecule/src/molfile_saver.cpp [1200:1912]
void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query)
{
_handleCIP(mol);
QueryMolecule* qmol = nullptr;
if (query)
qmol = (QueryMolecule*)(&mol);
int i;
QS_DEF(Array<int[2]>, radicals);
QS_DEF(Array<int>, charges);
QS_DEF(Array<int>, isotopes);
QS_DEF(Array<int>, pseudoatoms);
QS_DEF(Array<int>, atom_lists);
QS_DEF(Array<int>, unsaturated);
QS_DEF(Array<int[2]>, substitution_count);
QS_DEF(Array<int[2]>, ring_bonds);
QS_DEF(Array<int>, aliases);
std::list<int> implicit_sgroups_indexes;
_atom_mapping.clear_resize(mol.vertexEnd());
_bond_mapping.clear_resize(mol.edgeEnd());
radicals.clear();
charges.clear();
isotopes.clear();
pseudoatoms.clear();
atom_lists.clear();
unsaturated.clear();
substitution_count.clear();
ring_bonds.clear();
aliases.clear();
int iw = 1;
for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i), iw++)
{
char label[3] = {' ', ' ', ' '};
int valence = 0, radical = 0, charge = 0, stereo_parity = 0, hydrogens_count = 0, stereo_care = 0, aam = 0, irflag = 0, ecflag = 0;
int atom_number = mol.getAtomNumber(i);
int atom_isotope = mol.getAtomIsotope(i);
int atom_charge = mol.getAtomCharge(i);
int atom_radical = 0;
_atom_mapping[i] = iw;
if (!mol.isRSite(i) && !mol.isPseudoAtom(i))
atom_radical = mol.getAtomRadical_NoThrow(i, 0);
if (mol.isAlias(i))
{
aliases.push(i);
}
if (mol.isRSite(i))
{
label[0] = 'R';
label[1] = '#';
}
else if (mol.isPseudoAtom(i))
{
const char* pseudo = mol.getPseudoAtom(i);
if (strlen(pseudo) <= (size_t)3)
memcpy(label, pseudo, std::min(strlen(pseudo), (size_t)3));
else
{
label[0] = 'A';
pseudoatoms.push(i);
}
}
else if (mol.isTemplateAtom(i))
{
throw Error("internal: template atom is not supported in V2000 format");
}
else if (atom_number == -1)
{
if (qmol == 0)
throw Error("internal: atom number = -1, but qmol == 0");
std::vector<std::unique_ptr<QueryMolecule::Atom>> list;
std::map<int, std::unique_ptr<QueryMolecule::Atom>> properties;
int query_atom_type = QueryMolecule::parseQueryAtomSmarts(*qmol, i, list, properties);
if (query_atom_type == QueryMolecule::QUERY_ATOM_A)
label[0] = 'A';
else if (query_atom_type == QueryMolecule::QUERY_ATOM_Q)
label[0] = 'Q';
else if (query_atom_type == QueryMolecule::QUERY_ATOM_X)
label[0] = 'X';
else if (query_atom_type == QueryMolecule::QUERY_ATOM_M)
label[0] = 'M';
else if (query_atom_type == QueryMolecule::QUERY_ATOM_AH)
{
label[0] = 'A';
label[1] = 'H';
}
else if (query_atom_type == QueryMolecule::QUERY_ATOM_QH)
{
label[0] = 'Q';
label[1] = 'H';
}
else if (query_atom_type == QueryMolecule::QUERY_ATOM_XH)
{
label[0] = 'X';
label[1] = 'H';
}
else if (query_atom_type == QueryMolecule::QUERY_ATOM_MH)
{
label[0] = 'M';
label[1] = 'H';
}
else if (query_atom_type == QueryMolecule::QUERY_ATOM_LIST || query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST)
{
label[0] = 'L';
atom_lists.push(i);
}
else
label[0] = 'A';
// throw Error("error saving atom #%d: unsupported query atom", i);
}
else if (atom_number == ELEM_H && atom_isotope == DEUTERIUM)
label[0] = 'D';
else if (atom_number == ELEM_H && atom_isotope == TRITIUM)
label[0] = 'T';
else
{
const char* str = Element::toString(atom_number);
label[0] = str[0];
if (str[1] != 0)
{
label[1] = str[1];
if (str[2] != 0)
{
label[2] = str[2];
}
}
if (atom_isotope > 0)
isotopes.push(i);
}
aam = mol.reaction_atom_mapping[i];
irflag = mol.reaction_atom_inversion[i];
ecflag = mol.reaction_atom_exact_change[i];
int explicit_valence = -1;
if (!mol.isRSite(i) && !mol.isPseudoAtom(i))
explicit_valence = mol.getExplicitValence(i);
if (explicit_valence > 0 && explicit_valence <= 14)
valence = explicit_valence;
if (explicit_valence == 0)
valence = 15;
if (atom_charge != CHARGE_UNKNOWN && atom_charge >= -15 && atom_charge <= 15)
charge = atom_charge;
if (charge != 0)
charges.push(i);
if (atom_radical >= 0 && atom_radical <= 3)
radical = atom_radical;
if (radical != 0)
{
int* r = radicals.push();
r[0] = i;
r[1] = radical;
}
if (qmol != 0)
{
int unsat;
if (qmol->getAtom(i).sureValue(QueryMolecule::ATOM_UNSATURATION, unsat))
unsaturated.push(i);
int rbc;
if (MoleculeSavers::getRingBondCountFlagValue(*qmol, i, rbc))
{
int* r = ring_bonds.push();
r[0] = i;
r[1] = rbc > MAX_RING_BOND_COUNT ? MAX_RING_BOND_COUNT : rbc;
}
int subst;
if (MoleculeSavers::getSubstitutionCountFlagValue(*qmol, i, subst))
{
int* s = substitution_count.push();
s[0] = i;
s[1] = subst;
}
}
hydrogens_count = MoleculeSavers::getHCount(mol, i, atom_number, atom_charge);
if (qmol != 0)
{
if (hydrogens_count == -1)
hydrogens_count = 0;
else
// molfile stores h+1
hydrogens_count++;
}
else if (add_implicit_h && Molecule::shouldWriteHCount(mol.asMolecule(), i) && hydrogens_count > 0)
{
int sg_idx;
sg_idx = mol.sgroups.addSGroup(SGroup::SG_TYPE_DAT);
implicit_sgroups_indexes.push_front(sg_idx);
DataSGroup& sgroup = static_cast<DataSGroup&>(mol.sgroups.getSGroup(sg_idx));
sgroup.setMrv_implicit(i, hydrogens_count);
hydrogens_count = 0;
}
else
{
hydrogens_count = 0;
}
stereo_parity = _getStereocenterParity(mol, i);
Vec3f pos = mol.getAtomXyz(i);
if (fabs(pos.x) < 1e-5f)
pos.x = 0;
if (fabs(pos.y) < 1e-5f)
pos.y = 0;
if (fabs(pos.z) < 1e-5f)
pos.z = 0;
output.printfCR("%10.4f%10.4f%10.4f %c%c%c%2d"
"%3d%3d%3d%3d%3d"
"%3d%3d%3d%3d%3d%3d",
pos.x, pos.y, pos.z, label[0], label[1], label[2], 0, 0, stereo_parity, hydrogens_count, stereo_care, valence, 0, 0, 0, aam, irflag,
ecflag);
}
iw = 1;
for (i = mol.edgeBegin(); i < mol.edgeEnd(); i = mol.edgeNext(i))
{
const Edge& edge = mol.getEdge(i);
int bond_order = mol.getBondOrder(i);
int indigo_topology = -1;
if (bond_order < 0 && qmol != 0)
{
int qb = QueryMolecule::getQueryBondType(qmol->getBond(i));
if (qb == _BOND_SINGLE_OR_DOUBLE || qb == _BOND_SINGLE_OR_AROMATIC || qb == _BOND_DOUBLE_OR_AROMATIC || qb == _BOND_ANY)
bond_order = qb;
}
if (bond_order < 0)
{
Array<char> buf;
qmol->getBondDescription(i, buf);
throw Error("unrepresentable query bond: %s", buf.ptr());
}
int stereo = 0;
int reacting_center = 0;
int direction = mol.getBondDirection(i);
switch (direction)
{
case BOND_UP:
stereo = 1;
break;
case BOND_DOWN:
stereo = 6;
break;
case BOND_EITHER:
stereo = 4;
break;
case 0:
if (mol.cis_trans.isIgnored(i))
stereo = 3;
break;
}
if (stereo == 3)
{
// discard it if we have a neighbor "either" bond
if (_hasNeighborEitherBond(mol, i))
stereo = 0;
}
if (qmol != 0 && indigo_topology == -1)
qmol->getBond(i).sureValue(QueryMolecule::BOND_TOPOLOGY, indigo_topology);
int topology = 0;
if (indigo_topology == TOPOLOGY_RING)
topology = 1;
else if (indigo_topology == TOPOLOGY_CHAIN)
topology = 2;
reacting_center = mol.reaction_bond_reacting_center[i];
output.printfCR("%3d%3d%3d%3d%3d%3d%3d", _atom_mapping[edge.beg], _atom_mapping[edge.end], bond_order, stereo, 0, topology, reacting_center);
_bond_mapping[i] = iw++;
}
if (charges.size() > 0)
{
int j = 0;
while (j < charges.size())
{
output.printf("M CHG%3d", std::min(charges.size(), j + 8) - j);
for (i = j; i < std::min(charges.size(), j + 8); i++)
output.printf(" %3d %3d", _atom_mapping[charges[i]], mol.getAtomCharge(charges[i]));
output.writeCR();
j += 8;
}
}
if (radicals.size() > 0)
{
int j = 0;
while (j < radicals.size())
{
output.printf("M RAD%3d", std::min(radicals.size(), j + 8) - j);
for (i = j; i < std::min(radicals.size(), j + 8); i++)
output.printf(" %3d %3d", _atom_mapping[radicals[i][0]], radicals[i][1]);
output.writeCR();
j += 8;
}
}
if (isotopes.size() > 0)
{
int j = 0;
while (j < isotopes.size())
{
output.printf("M ISO%3d", std::min(isotopes.size(), j + 8) - j);
for (i = j; i < std::min(isotopes.size(), j + 8); i++)
output.printf(" %3d %3d", _atom_mapping[isotopes[i]], mol.getAtomIsotope(isotopes[i]));
output.writeCR();
j += 8;
}
}
if (unsaturated.size() > 0)
{
int j = 0;
while (j < unsaturated.size())
{
output.printf("M UNS%3d", std::min(unsaturated.size(), j + 8) - j);
for (i = j; i < std::min(unsaturated.size(), j + 8); i++)
output.printf(" %3d %3d", _atom_mapping[unsaturated[i]], 1);
output.writeCR();
j += 8;
}
}
if (substitution_count.size() > 0)
{
int j = 0;
while (j < substitution_count.size())
{
output.printf("M SUB%3d", std::min(substitution_count.size(), j + 8) - j);
for (i = j; i < std::min(substitution_count.size(), j + 8); i++)
output.printf(" %3d %3d", _atom_mapping[substitution_count[i][0]], substitution_count[i][1]);
output.writeCR();
j += 8;
}
}
if (ring_bonds.size() > 0)
{
int j = 0;
while (j < ring_bonds.size())
{
output.printf("M RBC%3d", std::min(ring_bonds.size(), j + 8) - j);
for (i = j; i < std::min(ring_bonds.size(), j + 8); i++)
output.printf(" %3d %3d", _atom_mapping[ring_bonds[i][0]], ring_bonds[i][1]);
output.writeCR();
j += 8;
}
}
for (i = 0; i < atom_lists.size(); i++)
{
int atom_idx = atom_lists[i];
std::vector<std::unique_ptr<QueryMolecule::Atom>> list;
std::map<int, std::unique_ptr<QueryMolecule::Atom>> properties;
int query_atom_type = QueryMolecule::parseQueryAtomSmarts(*qmol, atom_idx, list, properties);
if (query_atom_type != QueryMolecule::QUERY_ATOM_LIST && query_atom_type != QueryMolecule::QUERY_ATOM_NOTLIST)
throw Error("internal: atom list not recognized");
if (list.size() < 1)
throw Error("internal: atom list size is zero");
output.printf("M ALS %3d%3d %c ", _atom_mapping[atom_idx], list.size(), query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST ? 'T' : 'F');
for (auto& qatom : list)
{
if (qatom->type == QueryMolecule::ATOM_NUMBER)
{
char c1 = ' ', c2 = ' ';
const char* str = Element::toString(qatom->value_max);
c1 = str[0];
if (str[1] != 0)
c2 = str[1];
output.printf("%c%c ", c1, c2);
}
else if (qatom->type == QueryMolecule::ATOM_PSEUDO)
{
const char* str = qatom->alias.ptr();
constexpr int SYMBOL_WIDTH = 4;
if (strlen(str) > 4)
{
for (int k = 0; k < SYMBOL_WIDTH; k++)
output.writeChar(str[k]);
}
else
{
output.writeString(str);
for (auto k = strlen(str); k < SYMBOL_WIDTH; k++)
output.writeChar(' ');
}
}
}
output.writeCR();
}
for (i = 0; i < pseudoatoms.size(); i++)
{
output.printfCR("A %3d", _atom_mapping[pseudoatoms[i]]);
output.writeString(mol.getPseudoAtom(pseudoatoms[i]));
output.writeCR();
}
for (i = 0; i < aliases.size(); i++)
{
output.printfCR("A %3d", _atom_mapping[aliases[i]]);
output.writeString(mol.getAlias(aliases[i]));
output.writeCR();
}
if (qmol && add_mrv_sma)
{
for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i))
{
std::string mrv_sma = QueryMolecule::getMolMrvSmaExtension(*qmol, i);
if (mrv_sma.length() > 0)
{
output.printfCR("M MRV SMA %3u [%s]", i + 1, mrv_sma.c_str());
}
}
}
QS_DEF(Array<int>, sgroup_ids);
QS_DEF(Array<int>, child_ids);
QS_DEF(Array<int>, parent_ids);
sgroup_ids.clear();
child_ids.clear();
parent_ids.clear();
for (i = mol.sgroups.begin(); i != mol.sgroups.end(); i = mol.sgroups.next(i))
{
/*SGroup& sgroup =*/std::ignore = mol.sgroups.getSGroup(i);
sgroup_ids.push(i);
}
QS_DEF(Array<int>, sgs_sorted);
_checkSGroupIndices(mol, sgs_sorted);
if (sgroup_ids.size() > 0)
{
int j;
for (j = 0; j < sgroup_ids.size(); j += 8)
{
output.printf("M STY%3d", std::min(sgroup_ids.size(), j + 8) - j);
for (i = j; i < std::min(sgroup_ids.size(), j + 8); i++)
{
SGroup* sgroup = &mol.sgroups.getSGroup(sgroup_ids[i]);
output.printf(" %3d %s", sgroup->original_group, SGroup::typeToString(sgroup->sgroup_type));
}
output.writeCR();
}
for (j = 0; j < sgroup_ids.size(); j++)
{
SGroup* sgroup = &mol.sgroups.getSGroup(sgroup_ids[j]);
if (sgroup->parent_group > 0)
{
child_ids.push(sgroup->original_group);
parent_ids.push(sgroup->parent_group);
}
}
for (j = 0; j < child_ids.size(); j += 8)
{
output.printf("M SPL%3d", std::min(child_ids.size(), j + 8) - j);
for (i = j; i < std::min(child_ids.size(), j + 8); i++)
{
output.printf(" %3d %3d", child_ids[i], parent_ids[i]);
}
output.writeCR();
}
for (j = 0; j < sgroup_ids.size(); j += 8)
{
output.printf("M SLB%3d", std::min(sgroup_ids.size(), j + 8) - j);
for (i = j; i < std::min(sgroup_ids.size(), j + 8); i++)
{
SGroup* sgroup = &mol.sgroups.getSGroup(sgroup_ids[i]);
output.printf(" %3d %3d", sgroup->original_group, sgroup->original_group);
}
output.writeCR();
}
int sru_count = mol.sgroups.getSGroupCount(SGroup::SG_TYPE_SRU);
for (j = 0; j < sru_count; j += 8)
{
output.printf("M SCN%3d", std::min(sru_count, j + 8) - j);
for (i = j; i < std::min(sru_count, j + 8); i++)
{
RepeatingUnit* ru = (RepeatingUnit*)&mol.sgroups.getSGroup(i, SGroup::SG_TYPE_SRU);
output.printf(" %3d ", ru->original_group);
if (ru->connectivity == SGroup::HEAD_TO_HEAD)
output.printf("HH ");
else if (ru->connectivity == SGroup::HEAD_TO_TAIL)
output.printf("HT ");
else
output.printf("EU ");
}
output.writeCR();
}
for (i = mol.sgroups.begin(); i != mol.sgroups.end(); i = mol.sgroups.next(i))
{
SGroup& sgroup = mol.sgroups.getSGroup(i);
for (j = 0; j < sgroup.atoms.size(); j += 8)
{
int k;
output.printf("M SAL %3d%3d", sgroup.original_group, std::min(sgroup.atoms.size(), j + 8) - j);
for (k = j; k < std::min(sgroup.atoms.size(), j + 8); k++)
output.printf(" %3d", _atom_mapping[sgroup.atoms[k]]);
output.writeCR();
}
for (j = 0; j < sgroup.bonds.size(); j += 8)
{
int k;
output.printf("M SBL %3d%3d", sgroup.original_group, std::min(sgroup.bonds.size(), j + 8) - j);
for (k = j; k < std::min(sgroup.bonds.size(), j + 8); k++)
output.printf(" %3d", _bond_mapping[sgroup.bonds[k]]);
output.writeCR();
}
if (sgroup.sgroup_type == SGroup::SG_TYPE_SUP)
{
Superatom& superatom = (Superatom&)sgroup;
if (superatom.subscript.size() > 1)
{
if (superatom.subscript.find(' ') > -1)
output.printfCR("M SMT %3d \"%s\"", superatom.original_group, superatom.subscript.ptr());
else
output.printfCR("M SMT %3d %s", superatom.original_group, superatom.subscript.ptr());
}
if (superatom.sa_class.size() > 1)
output.printfCR("M SCL %3d %s", superatom.original_group, superatom.sa_class.ptr());
if (superatom.bond_connections.size() > 0)
{
for (j = 0; j < superatom.bond_connections.size(); j++)
{
output.printfCR("M SBV %3d %3d %9.4f %9.4f", superatom.original_group, _bond_mapping[superatom.bond_connections[j].bond_idx],
superatom.bond_connections[j].bond_dir.x, superatom.bond_connections[j].bond_dir.y);
}
}
if (superatom.contracted == DisplayOption::Expanded)
{
output.printfCR("M SDS EXP 1 %3d", superatom.original_group);
}
if (superatom.attachment_points.size() > 0)
{
bool next_line = true;
int nrem = superatom.attachment_points.size();
int k = 0;
for (j = superatom.attachment_points.begin(); j < superatom.attachment_points.end(); j = superatom.attachment_points.next(j))
{
if (next_line)
{
output.printf("M SAP %3d%3d", superatom.original_group, std::min(nrem, 6));
next_line = false;
}
int leave_idx = 0;
if (superatom.attachment_points[j].lvidx > -1)
leave_idx = _atom_mapping[superatom.attachment_points[j].lvidx];
const int KApIdNumPlaces = 2;
std::string apid_str = superatom.attachment_points[j].apid.ptr();
if (apid_str.size() < KApIdNumPlaces)
apid_str = std::string(KApIdNumPlaces - apid_str.size(), ' ') + apid_str;
output.printf(" %3d %3d %.*s", _atom_mapping[superatom.attachment_points[j].aidx], leave_idx, KApIdNumPlaces, apid_str.c_str());
k++;
nrem--;
if ((k == 6) || (nrem == 0))
{
output.writeCR();
next_line = true;
k = 0;
}
}
}
}
else if (sgroup.sgroup_type == SGroup::SG_TYPE_SRU)
{
RepeatingUnit& sru = (RepeatingUnit&)sgroup;
if (sru.subscript.size() > 1)
{
if (sru.subscript.find(' ') > -1)
output.printfCR("M SMT %3d \"%s\"", sru.original_group, sru.subscript.ptr());
else
output.printfCR("M SMT %3d %s", sru.original_group, sru.subscript.ptr());
}
}
else if (sgroup.sgroup_type == SGroup::SG_TYPE_DAT)
{
DataSGroup& datasgroup = (DataSGroup&)sgroup;
output.printf("M SDT %3d ", datasgroup.original_group);
_writeFormattedString(output, datasgroup.name, 30);
_writeFormattedString(output, datasgroup.type, 2);
_writeFormattedString(output, datasgroup.description, 20);
_writeFormattedString(output, datasgroup.querycode, 2);
_writeFormattedString(output, datasgroup.queryoper, 15);
output.writeCR();
output.printf("M SDD %3d ", datasgroup.original_group);
_writeDataSGroupDisplay(datasgroup, output);
output.writeCR();
int k = datasgroup.data.size();
if (k > 0 && datasgroup.data.top() == 0)
k--; // Exclude terminating zero
char* ptr = datasgroup.data.ptr();
while (k > 0)
{
for (j = 0; j < 69 && j < k; j++)
if (ptr[j] == '\n')
break;
// Print ptr[0..i]
output.writeString("M ");
if (j != 69 || j == k)
output.writeString("SED ");
else
output.writeString("SCD ");
output.printf("%3d ", datasgroup.original_group);
output.write(ptr, j);
if (ptr[j] == '\n')
j++;
ptr += j;
k -= j;
output.writeCR();
}
}
else if (sgroup.sgroup_type == SGroup::SG_TYPE_MUL)
{
MultipleGroup& mg = (MultipleGroup&)sgroup;
for (j = 0; j < mg.parent_atoms.size(); j += 8)
{
int k;
output.printf("M SPA %3d%3d", mg.original_group, std::min(mg.parent_atoms.size(), j + 8) - j);
for (k = j; k < std::min(mg.parent_atoms.size(), j + 8); k++)
output.printf(" %3d", _atom_mapping[mg.parent_atoms[k]]);
output.writeCR();
}
output.printf("M SMT %3d %d\n", mg.original_group, mg.multiplier);
}
for (j = 0; j < sgroup.brackets.size(); j++)
{
output.printf("M SDI %3d 4 %9.4f %9.4f %9.4f %9.4f\n", sgroup.original_group, sgroup.brackets[j][0].x, sgroup.brackets[j][0].y,
sgroup.brackets[j][1].x, sgroup.brackets[j][1].y);
}
if (sgroup.brackets.size() > 0 && sgroup.brk_style > 0)
{
output.printf("M SBT 1 %3d %3d\n", sgroup.original_group, sgroup.brk_style);
}
if (sgroup.sgroup_subtype > 0)
{
if (sgroup.sgroup_subtype == SGroup::SG_SUBTYPE_ALT)
output.printf("M SST 1 %3d ALT\n", sgroup.original_group);
else if (sgroup.sgroup_subtype == SGroup::SG_SUBTYPE_RAN)
output.printf("M SST 1 %3d RAN\n", sgroup.original_group);
else if (sgroup.sgroup_subtype == SGroup::SG_SUBTYPE_BLO)
output.printf("M SST 1 %3d BLO\n", sgroup.original_group);
}
}
}
_removeImplicitSGroups(mol, implicit_sgroups_indexes);
}