in core/indigo-core/molecule/src/molfile_saver.cpp [446:1076]
void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query)
{
_handleCIP(mol);
if (mol.tgroups.getTGroupCount())
_handleMonomers(mol);
QueryMolecule* qmol = 0;
if (query)
qmol = (QueryMolecule*)(&mol);
int i;
int iw = 1;
QS_DEF(Array<char>, buf);
std::list<int> implicit_sgroups_indexes;
_atom_mapping.clear_resize(mol.vertexEnd());
_bond_mapping.clear_resize(mol.edgeEnd());
for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i), iw++)
_atom_mapping[i] = iw;
if (add_implicit_h && qmol == 0)
{
for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i))
{
int atom_number = mol.getAtomNumber(i);
int charge = mol.getAtomCharge(i);
int hcount = MoleculeSavers::getHCount(mol, i, atom_number, charge);
if (Molecule::shouldWriteHCount(mol.asMolecule(), i) && hcount > 0)
{
int sg_idx = mol.sgroups.addSGroup(SGroup::SG_TYPE_DAT);
implicit_sgroups_indexes.push_front(sg_idx);
DataSGroup& sgroup = static_cast<DataSGroup&>(mol.sgroups.getSGroup(sg_idx));
sgroup.setMrv_implicit(i, hcount);
}
}
}
output.writeStringCR("M V30 BEGIN CTAB");
output.printfCR("M V30 COUNTS %d %d %d 0 0", mol.vertexCount(), mol.edgeCount(), mol.countSGroups());
output.writeStringCR("M V30 BEGIN ATOM");
std::stringstream coords;
for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i))
{
int atom_number = mol.getAtomNumber(i);
int isotope = mol.getAtomIsotope(i);
ArrayOutput out(buf);
out.printf("%d ", _atom_mapping[i]);
std::vector<std::unique_ptr<QueryMolecule::Atom>> list;
std::map<int, std::unique_ptr<QueryMolecule::Atom>> properties;
int query_atom_type;
if (atom_number == ELEM_H && isotope == DEUTERIUM)
{
out.writeChar('D');
isotope = 0;
}
else if (atom_number == ELEM_H && isotope == TRITIUM)
{
out.writeChar('T');
isotope = 0;
}
else if (mol.isPseudoAtom(i))
out.writeString(mol.getPseudoAtom(i));
else if (mol.isTemplateAtom(i))
out.writeString(mol.getTemplateAtom(i));
else if (mol.isRSite(i))
out.writeString("R#");
else if (atom_number > 0)
{
_writeAtomLabel(out, atom_number);
}
else if (qmol != 0 && (query_atom_type = QueryMolecule::parseQueryAtomSmarts(*qmol, i, list, properties)) != -1)
{
if (query_atom_type == QueryMolecule::QUERY_ATOM_A)
out.writeChar('A');
else if (query_atom_type == QueryMolecule::QUERY_ATOM_Q)
out.writeChar('Q');
else if (query_atom_type == QueryMolecule::QUERY_ATOM_X)
out.writeChar('X');
else if (query_atom_type == QueryMolecule::QUERY_ATOM_M)
out.writeChar('M');
else if (query_atom_type == QueryMolecule::QUERY_ATOM_AH)
out.writeString("AH");
else if (query_atom_type == QueryMolecule::QUERY_ATOM_QH)
out.writeString("QH");
else if (query_atom_type == QueryMolecule::QUERY_ATOM_XH)
out.writeString("XH");
else if (query_atom_type == QueryMolecule::QUERY_ATOM_MH)
out.writeString("MH");
else if (query_atom_type == QueryMolecule::QUERY_ATOM_LIST || query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST)
{
if (query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST)
out.writeString("NOT");
out.writeChar('[');
bool not_first = false;
for (auto& qatom : list)
{
if (not_first)
out.writeChar(',');
else
not_first = true;
if (qatom->type == QueryMolecule::ATOM_NUMBER)
_writeAtomLabel(out, qatom->value_max);
else if (qatom->type == QueryMolecule::ATOM_PSEUDO)
out.writeString(qatom->alias.ptr());
}
out.writeChar(']');
}
}
else if (atom_number == -1)
out.writeChar('A');
else
throw Error("molfile 3000: can not save atom %d because of unsupported "
"query feature",
i);
int aam = 0, ecflag = 0, irflag = 0;
aam = mol.reaction_atom_mapping[i];
irflag = mol.reaction_atom_inversion[i];
ecflag = mol.reaction_atom_exact_change[i];
Vec3f& xyz = mol.getAtomXyz(i);
int charge = mol.getAtomCharge(i);
int radical = 0;
int valence = mol.getExplicitValence(i);
int stereo_parity = _getStereocenterParity(mol, i);
if (!mol.isRSite(i) && !mol.isPseudoAtom(i) && !mol.isTemplateAtom(i))
radical = mol.getAtomRadical_NoThrow(i, 0);
/*
* Trailing zeros workaround
*/
convert_xyz_to_string(xyz, coords);
out.printf(" %s %d", coords.str().c_str(), aam);
if ((mol.isQueryMolecule() && charge != CHARGE_UNKNOWN) || (!mol.isQueryMolecule() && charge != 0))
out.printf(" CHG=%d", charge);
if (qmol != 0)
{
int hcount = MoleculeSavers::getHCount(mol, i, atom_number, charge);
if (hcount > 0)
out.printf(" HCOUNT=%d", hcount);
else if (hcount == 0)
out.printf(" HCOUNT=-1");
}
if (radical > 0)
out.printf(" RAD=%d", radical);
if (stereo_parity > 0)
out.printf(" CFG=%d", stereo_parity);
if (isotope > 0)
out.printf(" MASS=%d", isotope);
if (valence > 0)
out.printf(" VAL=%d", valence);
if (valence == 0)
out.printf(" VAL=-1");
if (irflag > 0)
out.printf(" INVRET=%d", irflag);
if (ecflag > 0)
out.printf(" EXACHG=%d", ecflag);
if (mol.isRSite(i))
{
int k;
QS_DEF(Array<int>, rg_list);
mol.getAllowedRGroups(i, rg_list);
if (rg_list.size() > 0)
{
out.printf(" RGROUPS=(%d", rg_list.size());
for (k = 0; k < rg_list.size(); k++)
out.printf(" %d", rg_list[k]);
out.writeChar(')');
if (!_checkAttPointOrder(mol, i))
{
const Vertex& vertex = mol.getVertex(i);
out.printf(" ATTCHORD=(%d", vertex.degree() * 2);
for (k = 0; k < vertex.degree(); k++)
out.printf(" %d %d", _atom_mapping[mol.getRSiteAttachmentPointByOrder(i, k)], k + 1);
out.writeChar(')');
}
}
}
if (mol.isTemplateAtom(i))
{
std::string tclass;
if (mol.getTemplateAtomClass(i) != 0 && strlen(mol.getTemplateAtomClass(i)) > 0)
{
tclass = mol.getTemplateAtomClass(i);
// convert CHEM to LINKER for BIOVIA
out.printf(" CLASS=%s", tclass == kMonomerClassCHEM ? kMonomerClassLINKER : tclass.c_str());
}
if (mol.getTemplateAtomSeqid(i) != -1 && tclass != kMonomerClassCHEM) // No SEQID for chem
out.printf(" SEQID=%d", mol.getTemplateAtomSeqid(i));
// if (mol.getTemplateAtomSeqName(i) && strlen(mol.getTemplateAtomSeqName(i)))
// out.printf(" SEQNAME=%s", mol.getTemplateAtomSeqName(i));
if (mol.template_attachment_points.size() > 0)
{
int ap_count = mol.getTemplateAtomAttachmentPointsCount(i);
if (ap_count)
{
out.printf(" ATTCHORD=(%d", ap_count * 2);
for (int j = mol.template_attachment_points.begin(); j != mol.template_attachment_points.end(); j = mol.template_attachment_points.next(j))
{
BaseMolecule::TemplateAttPoint& ap = mol.template_attachment_points.at(j);
if (ap.ap_occur_idx == i)
{
out.printf(" %d %s", _atom_mapping[ap.ap_aidx], ap.ap_id.ptr());
}
}
out.printf(")");
}
}
}
if (mol.attachmentPointCount() > 0)
{
int val = 0;
for (int idx = 1; idx <= mol.attachmentPointCount(); idx++)
{
for (int j = 0; mol.getAttachmentPoint(idx, j) != -1; j++)
if (mol.getAttachmentPoint(idx, j) == i)
{
val |= 1 << (idx - 1);
break;
}
}
if (val > 0)
out.printf(" ATTCHPT=%d", val == 3 ? -1 : val);
}
if (qmol != 0)
{
int unsat;
if (qmol->getAtom(i).sureValue(QueryMolecule::ATOM_UNSATURATION, unsat))
out.printf(" UNSAT=1");
int subst;
if (MoleculeSavers::getSubstitutionCountFlagValue(*qmol, i, subst))
out.printf(" SUBST=%d", subst);
int rbc;
if (MoleculeSavers::getRingBondCountFlagValue(*qmol, i, rbc))
out.printf(" RBCNT=%d", rbc > MAX_RING_BOND_COUNT ? MAX_RING_BOND_COUNT : rbc);
}
_writeMultiString(output, buf.ptr(), buf.size());
}
output.writeStringCR("M V30 END ATOM");
output.writeStringCR("M V30 BEGIN BOND");
iw = 1;
for (i = mol.edgeBegin(); i < mol.edgeEnd(); i = mol.edgeNext(i), iw++)
{
const Edge& edge = mol.getEdge(i);
int bond_order = mol.getBondOrder(i);
ArrayOutput out(buf);
_bond_mapping[i] = iw;
if (bond_order < 0 && qmol != 0)
{
int qb = QueryMolecule::getQueryBondType(qmol->getBond(i));
if (qb == _BOND_SINGLE_OR_DOUBLE || qb == _BOND_SINGLE_OR_AROMATIC || qb == _BOND_DOUBLE_OR_AROMATIC || qb == _BOND_ANY)
bond_order = qb;
}
if (bond_order < 0)
throw Error("unrepresentable query bond");
if (bond_order == BOND_ZERO)
{
bond_order = _BOND_COORDINATION;
if ((mol.getAtomNumber(edge.beg) == ELEM_H) || (mol.getAtomNumber(edge.end) == ELEM_H))
bond_order = _BOND_HYDROGEN;
}
out.printf("%d %d %d %d", iw, bond_order, _atom_mapping[edge.beg], _atom_mapping[edge.end]);
int direction = mol.getBondDirection(i);
switch (direction)
{
case BOND_UP:
out.printf(" CFG=1");
break;
case BOND_EITHER:
out.printf(" CFG=2");
break;
case BOND_DOWN:
out.printf(" CFG=3");
break;
case 0:
if (mol.cis_trans.isIgnored(i))
if (!_hasNeighborEitherBond(mol, i))
out.printf(" CFG=2");
break;
}
int reacting_center = 0;
reacting_center = mol.reaction_bond_reacting_center[i];
if (reacting_center != 0)
out.printf(" RXCTR=%d", reacting_center);
int indigo_topology = -1;
if (qmol != 0)
qmol->getBond(i).sureValue(QueryMolecule::BOND_TOPOLOGY, indigo_topology);
int topology = 0;
if (indigo_topology == TOPOLOGY_RING)
topology = 1;
else if (indigo_topology == TOPOLOGY_CHAIN)
topology = 2;
if (topology != 0)
out.printf(" TOPO=%d", topology);
_writeMultiString(output, buf.ptr(), buf.size());
}
output.writeStringCR("M V30 END BOND");
MoleculeStereocenters& stereocenters = mol.stereocenters;
if (stereocenters.begin() != stereocenters.end() || mol.hasHighlighting())
{
output.writeStringCR("M V30 BEGIN COLLECTION");
QS_DEF(Array<int>, processed);
processed.clear_resize(mol.vertexEnd());
processed.zerofill();
for (i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
{
if (processed[i])
continue;
ArrayOutput out(buf);
int j, type = stereocenters.getType(i);
if (type == MoleculeStereocenters::ATOM_ABS)
out.writeString("MDLV30/STEABS ATOMS=(");
else if (type == MoleculeStereocenters::ATOM_OR)
out.printf("MDLV30/STEREL%d ATOMS=(", stereocenters.getGroup(i));
else if (type == MoleculeStereocenters::ATOM_AND)
out.printf("MDLV30/STERAC%d ATOMS=(", stereocenters.getGroup(i));
else
continue;
QS_DEF(Array<int>, list);
list.clear();
list.push(i);
for (j = mol.vertexNext(i); j < mol.vertexEnd(); j = mol.vertexNext(j))
if (stereocenters.sameGroup(i, j))
{
list.push(j);
processed[j] = 1;
}
out.printf("%d", list.size());
for (j = 0; j < list.size(); j++)
out.printf(" %d", _atom_mapping[list[j]]);
out.writeChar(')');
_writeMultiString(output, buf.ptr(), buf.size());
}
if (mol.hasHighlighting())
{
if (mol.countHighlightedBonds() > 0)
{
ArrayOutput out(buf);
out.printf("MDLV30/HILITE BONDS=(%d", mol.countHighlightedBonds());
for (i = mol.edgeBegin(); i != mol.edgeEnd(); i = mol.edgeNext(i))
if (mol.isBondHighlighted(i))
out.printf(" %d", _bond_mapping[i]);
out.writeChar(')');
_writeMultiString(output, buf.ptr(), buf.size());
}
if (mol.countHighlightedAtoms() > 0)
{
ArrayOutput out(buf);
out.printf("MDLV30/HILITE ATOMS=(%d", mol.countHighlightedAtoms());
for (i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
if (mol.isAtomHighlighted(i))
out.printf(" %d", _atom_mapping[i]);
out.writeChar(')');
_writeMultiString(output, buf.ptr(), buf.size());
}
}
if (mol.custom_collections.size() > 0)
{
for (i = mol.custom_collections.begin(); i != mol.custom_collections.end(); i = mol.custom_collections.next(i))
{
ArrayOutput out(buf);
out.printf("%s", mol.custom_collections.at(i));
_writeMultiString(output, buf.ptr(), buf.size());
}
}
output.writeStringCR("M V30 END COLLECTION");
}
QS_DEF(Array<int>, sgs_sorted);
_checkSGroupIndices(mol, sgs_sorted);
if (mol.countSGroups() > 0)
{
MoleculeSGroups* sgroups = &mol.sgroups;
int idx = 1;
output.writeStringCR("M V30 BEGIN SGROUP");
for (i = 0; i < sgs_sorted.size(); i++)
{
ArrayOutput out(buf);
int sg_idx = sgs_sorted[i];
SGroup& sgroup = sgroups->getSGroup(sg_idx);
_writeGenericSGroup3000(sgroup, idx++, out);
if (sgroup.sgroup_type == SGroup::SG_TYPE_GEN)
{
_writeMultiString(output, buf.ptr(), buf.size());
}
else if (sgroup.sgroup_type == SGroup::SG_TYPE_SUP)
{
Superatom& sup = static_cast<Superatom&>(sgroup);
if (sup.bond_connections.size() > 0)
{
for (int j = 0; j < sup.bond_connections.size(); j++)
{
out.printf(" CSTATE=(4 %d %f %f %f)", _bond_mapping[sup.bond_connections[j].bond_idx], sup.bond_connections[j].bond_dir.x,
sup.bond_connections[j].bond_dir.y, 0.f);
}
}
if (sup.subscript.size() > 1)
{
if (sup.subscript.find(' ') > -1)
out.printf(" LABEL=\"%s\"", sup.subscript.ptr());
else
out.printf(" LABEL=%s", sup.subscript.ptr());
}
// convert CHEM to LINKER for BIOVIA
if (sup.sa_class.size() > 1)
out.printf(" CLASS=%s", sup.sa_class.ptr() == std::string(kMonomerClassCHEM) ? kMonomerClassLINKER : sup.sa_class.ptr());
if (sup.contracted == DisplayOption::Expanded)
out.printf(" ESTATE=E");
if (sup.attachment_points.size() > 0)
{
for (int j = sup.attachment_points.begin(); j < sup.attachment_points.end(); j = sup.attachment_points.next(j))
{
int leave_idx = 0;
if (sup.attachment_points[j].lvidx > -1)
leave_idx = _atom_mapping[sup.attachment_points[j].lvidx];
out.printf(" SAP=(3 %d %d %s)", _atom_mapping[sup.attachment_points[j].aidx], leave_idx, sup.attachment_points[j].apid.ptr());
}
}
if (sup.seqid > 0)
out.printf(" SEQID=%d", sup.seqid);
if (sup.sa_natreplace.size() > 1)
out.printf(" NATREPLACE=%s", sup.sa_natreplace.ptr());
_writeMultiString(output, buf.ptr(), buf.size());
}
else if (sgroup.sgroup_type == SGroup::SG_TYPE_DAT)
{
DataSGroup& dsg = static_cast<DataSGroup&>(sgroup);
const char* name = dsg.name.ptr();
if (name != 0 && strlen(name) > 0)
{
out.writeString(" FIELDNAME=");
bool space_found = (strchr(name, ' ') != NULL);
if (space_found)
out.writeString("\"");
out.writeString(name);
if (space_found)
out.writeString("\"");
}
const char* desc = dsg.description.ptr();
if (desc != 0 && strlen(desc) > 0)
{
out.writeString(" FIELDINFO=");
bool space_found = (strchr(desc, ' ') != NULL);
if (space_found)
out.writeString("\"");
out.writeString(desc);
if (space_found)
out.writeString("\"");
}
const char* querycode = dsg.querycode.ptr();
if (querycode != 0 && strlen(querycode) > 0)
{
out.writeString(" QUERYTYPE=");
bool space_found = (strchr(querycode, ' ') != NULL);
if (space_found)
out.writeString("\"");
out.writeString(querycode);
if (space_found)
out.writeString("\"");
}
const char* queryoper = dsg.queryoper.ptr();
if (queryoper != 0 && strlen(queryoper) > 0)
{
out.writeString(" QUERYOP=");
bool space_found = (strchr(queryoper, ' ') != NULL);
if (space_found)
out.writeString("\"");
out.writeString(queryoper);
if (space_found)
out.writeString("\"");
}
out.printf(" FIELDDISP=\"");
_writeDataSGroupDisplay(dsg, out);
out.printf("\"");
if (dsg.data.size() > 0 && dsg.data[0] != 0)
{
// Split field data by new lines
int len = dsg.data.size();
char* data = dsg.data.ptr();
while (len > 0)
{
int j;
for (j = 0; j < len - 1; j++)
if (data[j] == '\n')
break;
out.printf(" FIELDDATA=\"%.*s\"", j, data);
if (data[j] == '\n')
j++;
data += j;
len -= j;
if (*data == 0)
break;
}
}
_writeMultiString(output, buf.ptr(), buf.size());
}
else if (sgroup.sgroup_type == SGroup::SG_TYPE_SRU)
{
RepeatingUnit& ru = static_cast<RepeatingUnit&>(sgroup);
if (ru.connectivity == SGroup::HEAD_TO_HEAD)
out.printf(" CONNECT=HH");
else if (ru.connectivity == SGroup::HEAD_TO_TAIL)
out.printf(" CONNECT=HT");
else
out.printf(" CONNECT=EU");
if (ru.subscript.size() > 1)
{
if (ru.subscript.find(' ') > -1)
out.printf(" LABEL=\"%s\"", ru.subscript.ptr());
else
out.printf(" LABEL=%s", ru.subscript.ptr());
}
_writeMultiString(output, buf.ptr(), buf.size());
}
else if (sgroup.sgroup_type == SGroup::SG_TYPE_MUL)
{
MultipleGroup& mg = static_cast<MultipleGroup&>(sgroup);
if (mg.parent_atoms.size() > 0)
{
out.printf(" PATOMS=(%d", mg.parent_atoms.size());
int j;
for (j = 0; j < mg.parent_atoms.size(); j++)
out.printf(" %d", _atom_mapping[mg.parent_atoms[j]]);
out.printf(")");
}
out.printf(" MULT=%d", mg.multiplier);
_writeMultiString(output, buf.ptr(), buf.size());
}
else
{
_writeMultiString(output, buf.ptr(), buf.size());
}
}
output.writeStringCR("M V30 END SGROUP");
_removeImplicitSGroups(mol, implicit_sgroups_indexes);
}
output.writeStringCR("M V30 END CTAB");
int n_rgroups = mol.rgroups.getRGroupCount();
for (i = 1; i <= n_rgroups; i++)
if (mol.rgroups.getRGroup(i).fragments.size() > 0)
_writeRGroup(output, mol, i);
int n_tgroups = mol.tgroups.getTGroupCount();
if (n_tgroups > 0)
{
output.writeStringCR("M V30 BEGIN TEMPLATE");
for (i = mol.tgroups.begin(); i != mol.tgroups.end(); i = mol.tgroups.next(i))
{
_writeTGroup(output, mol, i);
}
output.writeStringCR("M V30 END TEMPLATE");
}
}