layer3/MoleculeExporter.cpp (1,194 lines of code) (raw):
/*
* Molecular file formats export
*
* (c) 2016 Schrodinger, Inc.
*/
#include <vector>
#include <map>
#include <algorithm>
#include <cstdarg>
#include <clocale>
#include <memory>
#ifndef _PYMOL_NO_MSGPACKC
#include <mmtf.hpp>
#include <mmtf/export_helpers.hpp>
#endif
#include "os_std.h"
#include "MoleculeExporter.h"
#include "Selector.h"
#include "SelectorDef.h"
#include "Executive.h"
#include "Vector.h"
#include "ObjectMolecule.h"
#include "CoordSet.h"
#include "Mol2Typing.h"
#include "MmodTyping.h"
#include "Color.h"
#include "Util.h"
#include "Lex.h"
#include "P.h"
#include "PConv.h"
#include "CifDataValueFormatter.h"
#include "MaeExportHelpers.h"
#ifdef _PYMOL_IP_PROPERTIES
#include "Property.h"
#endif
/*
* Get the capitalized element symbol. Assume that ai->elem is either all
* caps (e.g. loaded from PDB) or capitalized.
*/
class ElemCanonicalizer {
ElemName m_buffer;
public:
const char * operator() (const AtomInfoType * ai) {
const char * elem = ai->elem;
if (ai->protons < 1 || !elem[0] || !elem[1] || islower(elem[1]))
return elem;
m_buffer[0] = elem[0];
UtilNCopyToLower(m_buffer + 1, elem + 1, sizeof(ElemName) - 1);
return m_buffer;
}
};
/*
* "sprintf" into VLA string buffer. Will grow (and reallocate) the VLA if
* needed.
*
* Returns the number of characters written (excluding the null byte).
*/
static
int VLAprintf(pymol::vla<char>& vla, int offset, const char * format, ...) {
int n, size = vla.size() - offset;
va_list ap;
va_start(ap, format);
n = vsnprintf(vla + offset, std::max(0, size), format, ap);
va_end(ap);
#ifdef WIN32
// Windows API: If len > count, then count characters are stored in buffer,
// no null-terminator is appended, and a negative value is returned.
if (n < 0) {
va_start(ap, format);
n = _vscprintf(format, ap);
va_end(ap);
}
#endif
// C99 standard: a return value of size or more means that the output was
// truncated (glibc since 2.1; not on Windows)
if (n >= size) {
vla.check(offset + n);
va_start(ap, format);
vsprintf(vla + offset, format, ap);
va_end(ap);
}
return n;
}
// for "multisave" behavior
enum {
cMolExportGlobal = 0,
cMolExportByObject = 1,
cMolExportByCoordSet = 2,
};
// for re-indexed bonds
struct BondRef {
BondType * ref;
int id1, id2;
};
// for re-indexed atoms
struct AtomRef {
AtomInfoType * ref;
float coord[3];
int id;
};
/*
* Abstract base class for exporting molecular selections
*/
struct MoleculeExporter {
pymol::vla<char> m_buffer; // out
protected:
int m_offset;
CoordSet * m_last_cs;
ObjectMolecule * m_last_obj;
int m_last_state;
PyMOLGlobals * G;
SeleCoordIterator m_iter;
bool m_retain_ids;
int m_id;
struct matrix_t {
double storage[16];
const double * ptr;
} m_mat_ref, // inverse of reference object transformation
m_mat_full, // for ANISOU
m_mat_move; // for coordinates (TTT)
// atom coordinate
private:
float m_coord_tmp[3];
protected:
const float *m_coord;
// how to restart models
int m_multi;
std::vector<BondRef> m_bonds;
std::vector<int> m_tmpids;
public:
// quasi constructor (easier to inherit than a real constructor)
virtual void init(PyMOLGlobals * G_) {
G = G_;
m_buffer.resize(1280);
m_buffer[0] = '\0';
m_mat_ref.ptr = NULL;
m_offset = 0;
m_last_cs = NULL;
m_last_obj = NULL;
m_last_state = -1;
m_retain_ids = false;
m_id = 0;
setMulti(getMultiDefault());
}
// destructor
virtual ~MoleculeExporter() {
}
/*
* Do the export (e.g. populate "m_buffer" with file contents)
*/
void execute(int sele, int state);
/*
* how to handle selections which span multiple objects
*/
void setMulti(int multi) {
if (multi != -1)
m_multi = multi;
}
private:
/*
* Reset the "index" fields in the selector table
*/
void resetTmpIDs() {
m_tmpids.resize(m_iter.obj->NAtom, 0);
std::fill(m_tmpids.begin(), m_tmpids.end(), 0);
}
protected:
/*
* Get the current atom's running index (1-based). Most formats use
* this index to reference atoms from the bonds table. If "retain_ids"
* is true, then this is AtomInfoType::id.
*/
int getTmpID() {
return m_tmpids[m_iter.getAtm()];
}
/*
* Get the running index for the given atom.
*/
int getTmpID(int atm) {
return m_tmpids[atm];
}
/*
* Get the current state title if not empty, or the object name otherwise.
*/
const char * getTitleOrName() {
if (!m_iter.cs)
return "untitled";
return m_iter.cs->Name[0] ? m_iter.cs->Name : m_iter.obj->Obj.Name;
}
public:
/*
* Set the reference coordinate frame to the inverse of the given
* object's transformation matrix.
*/
void setRefObject(const char * ref_object, int ref_state);
private:
/*
* Update a transformation matrix for the current state
*/
void updateMatrix(matrix_t& matrix, bool history);
/*
* Populates the "m_bonds" vector
*/
void populateBondRefs();
protected:
// functions to be implemented by derived classes
virtual int getMultiDefault() const = 0;
virtual bool isExcludedBond(int atm1, int atm2);
virtual bool isExcludedBond(const BondType * bond);
virtual void writeAtom() = 0;
virtual void writeBonds() = 0;
virtual void beginObject();
virtual void beginCoordSet();
virtual void endObject();
virtual void endCoordSet();
virtual void beginMolecule() {}
virtual void beginFile() {}
};
/*
* Return true if this bond should not be exported
*/
bool MoleculeExporter::isExcludedBond(int atm1, int atm2) {
return false;
}
bool MoleculeExporter::isExcludedBond(const BondType * bond) {
return isExcludedBond(bond->index[0], bond->index[1]);
}
void MoleculeExporter::beginObject() {
if (m_multi != cMolExportByCoordSet) {
resetTmpIDs();
}
if (m_multi == cMolExportByObject) {
beginMolecule();
}
}
void MoleculeExporter::beginCoordSet() {
if (m_multi == cMolExportByCoordSet) {
resetTmpIDs();
beginMolecule();
}
}
void MoleculeExporter::endObject() {
if (m_multi != cMolExportByCoordSet) {
populateBondRefs();
}
if (m_multi == cMolExportByObject) {
writeBonds();
m_id = 0;
}
}
void MoleculeExporter::endCoordSet() {
if (m_multi == cMolExportByCoordSet) {
populateBondRefs();
writeBonds();
m_id = 0;
}
}
void MoleculeExporter::execute(int sele, int state) {
m_iter.init(G, sele, state);
m_iter.setPerObject(m_multi != cMolExportGlobal);
beginFile();
while (m_iter.next()) {
if (m_last_cs != m_iter.cs) {
if (m_last_cs) {
endCoordSet();
} else if (m_multi == cMolExportGlobal) {
beginMolecule();
}
if (m_last_obj != m_iter.obj) {
if (m_last_obj)
endObject();
beginObject();
m_last_obj = m_iter.obj;
}
// update transformation matrices
updateMatrix(m_mat_full, true);
updateMatrix(m_mat_move, false);
beginCoordSet();
m_last_cs = m_iter.cs;
}
// for bonds
if (!m_tmpids[m_iter.getAtm()]) {
m_id = m_retain_ids ? m_iter.getAtomInfo()->id : (m_id + 1);
m_tmpids[m_iter.getAtm()] = m_id;
}
// atom coordinate
m_coord = m_iter.getCoord();
if (m_mat_move.ptr) {
transform44d3f(m_mat_move.ptr, m_coord, m_coord_tmp);
m_coord = m_coord_tmp;
}
writeAtom();
}
if (m_last_cs)
endCoordSet();
if (m_last_obj)
endObject();
else if (m_multi == cMolExportGlobal)
// empty selection
beginMolecule();
if (m_multi == cMolExportGlobal) {
writeBonds();
}
m_buffer.resize(m_offset);
}
void MoleculeExporter::setRefObject(const char * ref_object, int ref_state) {
double matrix[16];
m_mat_ref.ptr = NULL;
if (!ref_object || !ref_object[0])
return;
auto base = ExecutiveFindObjectByName(G, ref_object);
if (!base)
return;
if(ref_state < 0) {
ref_state = ObjectGetCurrentState(base, true);
}
if(ObjectGetTotalMatrix(base, ref_state, true, matrix)) {
invert_special44d44d(matrix, m_mat_ref.storage);
m_mat_ref.ptr = m_mat_ref.storage;
}
}
void MoleculeExporter::updateMatrix(matrix_t& matrix, bool history) {
const auto& ref = m_mat_ref.ptr;
if (ObjectGetTotalMatrix(reinterpret_cast<CObject*>(m_iter.obj),
m_iter.state, history, matrix.storage)) {
if (ref) {
left_multiply44d44d(ref, matrix.storage);
}
matrix.ptr = matrix.storage;
} else {
matrix.ptr = ref;
}
}
void MoleculeExporter::populateBondRefs() {
auto& obj = m_last_obj;
int id1, id2;
for (auto bond = obj->Bond, bond_end = obj->Bond + obj->NBond;
bond != bond_end; ++bond) {
auto atm1 = bond->index[0];
auto atm2 = bond->index[1];
if (!(id1 = getTmpID(atm1)) ||
!(id2 = getTmpID(atm2)))
continue;
if (isExcludedBond(bond))
continue;
if (id1 > id2)
std::swap(id1, id2);
// emit bond
m_bonds.emplace_back(BondRef { bond, id1, id2 });
}
}
// ---------------------------------------------------------------------------------- //
struct MoleculeExporterPDB : public MoleculeExporter {
bool m_conect_all;
bool m_conect_nodup;
bool m_mdl_written;
PDBInfoRec m_pdb_info;
// quasi constructor
void init(PyMOLGlobals * G_) {
MoleculeExporter::init(G_);
UtilZeroMem((void *) &m_pdb_info, sizeof(PDBInfoRec));
m_conect_all = false;
m_mdl_written = false;
m_conect_nodup = SettingGetGlobal_b(G, cSetting_pdb_conect_nodup);
m_retain_ids = SettingGetGlobal_b(G, cSetting_pdb_retain_ids);
}
int getMultiDefault() const {
// single entry format by default, but we also support writing multiple
// entries by writing a HEADER record for every object (1) or state (2)
return cMolExportGlobal;
}
void writeENDMDL() {
if (m_mdl_written) {
m_offset += VLAprintf(m_buffer, m_offset, "ENDMDL\n");
m_mdl_written = false;
}
}
void writeEND() {
if (!SettingGetGlobal_b(G, cSetting_pdb_no_end_record)) {
m_offset += VLAprintf(m_buffer, m_offset, "END\n");
}
}
void writeAtom() {
CoordSetAtomToPDBStrVLA(G, &m_buffer, &m_offset, m_iter.getAtomInfo(),
m_coord, getTmpID() - 1, &m_pdb_info, m_mat_full.ptr);
}
void writeBonds() {
writeENDMDL();
std::map<int, std::vector<int>> conect;
for (auto bond_it = m_bonds.begin(); bond_it != m_bonds.end(); ++bond_it) {
auto& bond = *bond_it;
int order = m_conect_nodup ? 1 : bond.ref->order;
for (int i = 0; i < 2; ++i) {
for (int d = 0; d < order; ++d) {
conect[bond.id1].push_back(bond.id2);
}
std::swap(bond.id1, bond.id2);
}
}
m_bonds.clear();
for (auto rec_it = conect.begin(); rec_it != conect.end(); ++rec_it) {
const auto& rec = *rec_it;
for (int i = 0, i_end = rec.second.size(); i != i_end;) {
m_offset += VLAprintf(m_buffer, m_offset, "CONECT%5d", rec.first);
// up to 4 bonds per record
for (int j = std::min(i + 4, i_end); i != j; ++i) {
m_offset += VLAprintf(m_buffer, m_offset, "%5d", rec.second[i]);
}
m_offset += VLAprintf(m_buffer, m_offset, "\n");
}
}
writeEND();
}
void writeCryst1() {
const auto& sym = m_iter.cs->Symmetry ? m_iter.cs->Symmetry : m_iter.obj->Symmetry;
if (sym && sym->Crystal) {
const auto& dim = sym->Crystal->Dim;
const auto& angle = sym->Crystal->Angle;
m_offset += VLAprintf(m_buffer, m_offset,
"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s%4d\n",
dim[0], dim[1], dim[2], angle[0], angle[1], angle[2],
sym->SpaceGroup, sym->PDBZValue);
}
}
void beginObject() {
MoleculeExporter::beginObject();
m_conect_all = SettingGet_b(G, m_iter.obj->Obj.Setting, NULL, cSetting_pdb_conect_all);
if (m_multi == cMolExportByObject) {
m_offset += VLAprintf(m_buffer, m_offset, "HEADER %.40s\n", m_iter.obj->Obj.Name);
writeCryst1();
}
}
void beginCoordSet() {
MoleculeExporter::beginCoordSet();
if (m_multi == cMolExportByCoordSet) {
m_offset += VLAprintf(m_buffer, m_offset, "HEADER %.40s\n", getTitleOrName());
writeCryst1();
}
if (m_iter.isMultistate()
&& (m_iter.isPerObject() || m_iter.state != m_last_state)) {
m_offset += VLAprintf(m_buffer, m_offset, "MODEL %4d\n", m_iter.state + 1);
m_last_state = m_iter.state;
m_mdl_written = true;
}
}
void endCoordSet() {
MoleculeExporter::endCoordSet();
if (m_iter.isPerObject() || m_iter.state != m_last_state) {
writeENDMDL();
}
}
bool isExcludedBond(int atm1, int atm2) {
const auto& atInfo = m_last_obj->AtomInfo;
return !(m_conect_all || atInfo[atm1].hetatm || atInfo[atm2].hetatm);
}
};
// ---------------------------------------------------------------------------------- //
struct MoleculeExporterPQR: public MoleculeExporterPDB {
// quasi constructor
void init(PyMOLGlobals * G_) {
MoleculeExporterPDB::init(G_);
m_pdb_info.variant = PDB_VARIANT_PQR;
m_pdb_info.pqr_workarounds = SettingGetGlobal_b(G, cSetting_pqr_workarounds);
}
bool isExcludedBond(int atm1, int atm2) {
// no bonds for PQR format
return true;
}
};
// ---------------------------------------------------------------------------------- //
struct MoleculeExporterCIF : public MoleculeExporter {
const char * m_molecule_name;
CifDataValueFormatter cifrepr;
// quasi constructor
void init(PyMOLGlobals * G_) {
MoleculeExporter::init(G_);
// The formatter uses a circular buffer of the given size. The size
// must be at least equal to the maximum number of invocations of `cifrepr`
// in any member function (count two for each call with type `char`).
cifrepr.m_buf.resize(10);
m_retain_ids = SettingGetGlobal_b(G, cSetting_pdb_retain_ids);
m_molecule_name = "multi";
m_offset += VLAprintf(m_buffer, m_offset,
"# generated by PyMOL " _PyMOL_VERSION "\n");
}
int getMultiDefault() const {
return cMolExportByObject;
}
void writeCellSymmetry() {
const auto& sym = m_iter.cs->Symmetry ? m_iter.cs->Symmetry : m_iter.obj->Symmetry;
if (sym && sym->Crystal) {
const auto& dim = sym->Crystal->Dim;
const auto& angle = sym->Crystal->Angle;
m_offset += VLAprintf(m_buffer, m_offset, "#\n"
"_cell.entry_id %s\n"
"_cell.length_a %.3f\n"
"_cell.length_b %.3f\n"
"_cell.length_c %.3f\n"
"_cell.angle_alpha %.2f\n"
"_cell.angle_beta %.2f\n"
"_cell.angle_gamma %.2f\n"
"_symmetry.entry_id %s\n"
"_symmetry.space_group_name_H-M %s\n",
cifrepr(m_molecule_name),
dim[0], dim[1], dim[2], angle[0], angle[1], angle[2],
cifrepr(m_molecule_name),
cifrepr(sym->SpaceGroup));
}
}
void beginMolecule() {
MoleculeExporter::beginMolecule();
switch (m_multi) {
case cMolExportByObject: m_molecule_name = m_iter.obj->Obj.Name; break;
case cMolExportByCoordSet: m_molecule_name = getTitleOrName(); break;
}
// data header
m_offset += VLAprintf(m_buffer, m_offset, "#\n"
"data_%s\n_entry.id %s\n", m_molecule_name,
cifrepr(m_molecule_name));
// symmetry
writeCellSymmetry();
// atom table header
m_offset += VLAprintf(m_buffer, m_offset, "#\n"
"loop_\n"
"_atom_site.group_PDB\n"
"_atom_site.id\n"
"_atom_site.type_symbol\n"
"_atom_site.label_atom_id\n"
"_atom_site.label_alt_id\n"
"_atom_site.label_comp_id\n"
"_atom_site.label_asym_id\n"
"_atom_site.label_entity_id\n"
"_atom_site.label_seq_id\n"
"_atom_site.pdbx_PDB_ins_code\n"
"_atom_site.Cartn_x\n"
"_atom_site.Cartn_y\n"
"_atom_site.Cartn_z\n"
"_atom_site.occupancy\n"
"_atom_site.B_iso_or_equiv\n"
"_atom_site.pdbx_formal_charge\n"
"_atom_site.auth_asym_id\n"
"_atom_site.pdbx_PDB_model_num\n");
}
void writeAtom() {
const AtomInfoType * ai = m_iter.getAtomInfo();
const char * entity_id = NULL;
#ifdef _PYMOL_IP_PROPERTIES
char entity_id_buf[16];
if (ai->prop_id) {
entity_id = PropertyGetAsString(G, ai->prop_id, "entity_id", entity_id_buf);
}
#endif
if (!entity_id) {
entity_id = LexStr(G, ai->custom);
}
m_offset += VLAprintf(m_buffer, m_offset,
"%-6s %-3d %s %-3s " // type .. name
"%s %-3s %s %s " // alt .. entity_id
"%d %s %6.3f %6.3f %6.3f " // resv .. z
"%4.2f %6.2f %d %s %d\n", // q .. state
ai->hetatm ? "HETATM" : "ATOM",
getTmpID(),
cifrepr(ai->elem),
cifrepr(LexStr(G, ai->name)),
cifrepr(ai->alt),
cifrepr(LexStr(G, ai->resn)),
cifrepr(LexStr(G, ai->segi)),
cifrepr(entity_id),
ai->resv,
cifrepr(ai->inscode, "?"),
m_coord[0], m_coord[1], m_coord[2],
ai->q, ai->b, ai->formalCharge,
cifrepr(LexStr(G, ai->chain)),
m_iter.state + 1);
}
void writeBonds() {
// TODO
// Bond categories:
// 1) within residue -> _chem_comp_bond
// 2) polymer links -> implicit
// 3) others -> _struct_conn
m_bonds.clear();
}
bool isExcludedBond(int atm1, int atm2) {
// TODO
// polymer links
return true;
}
};
// ---------------------------------------------------------------------------------- //
/*
* Experimental PyMOL style annotated mmCIF. Exports colors, representation,
* and secondary structure.
*/
struct MoleculeExporterPMCIF : public MoleculeExporterCIF {
void beginMolecule() {
MoleculeExporterCIF::beginMolecule();
m_offset += VLAprintf(m_buffer, m_offset, "#\n"
"_atom_site.pymol_color\n"
"_atom_site.pymol_reps\n"
"_atom_site.pymol_ss\n");
}
void writeAtom() {
MoleculeExporterCIF::writeAtom();
const AtomInfoType * ai = m_iter.getAtomInfo();
m_offset += VLAprintf(m_buffer, m_offset,
"%d %d %s\n",
ai->color,
ai->visRep,
cifrepr(ai->ssType));
}
void writeBonds() {
if (m_bonds.empty())
return;
m_offset += VLAprintf(m_buffer, m_offset, "#\n"
"loop_\n"
"_pymol_bond.atom_site_id_1\n"
"_pymol_bond.atom_site_id_2\n"
"_pymol_bond.order\n");
for (auto bond_it = m_bonds.begin(); bond_it != m_bonds.end(); ++bond_it) {
const auto& bond = *bond_it;
m_offset += VLAprintf(m_buffer, m_offset, "%d %d %d\n",
bond.id1, bond.id2, bond.ref->order);
}
m_bonds.clear();
}
bool isExcludedBond(int atm1, int atm2) {
return false;
}
};
// ---------------------------------------------------------------------------------- //
struct MoleculeExporterMOL : public MoleculeExporter {
int m_chiral_flag;
std::vector<AtomRef> m_atoms;
ElemCanonicalizer elemGetter;
int getMultiDefault() const {
// single entry format
return cMolExportGlobal;
}
void writeAtom() {
const auto ai = m_iter.getAtomInfo();
if (ai->stereo)
m_chiral_flag = 1;
m_atoms.emplace_back(AtomRef { ai, { m_coord[0], m_coord[1], m_coord[2] }, getTmpID() });
}
void writeCTabV3000() {
m_offset += VLAprintf(m_buffer, m_offset,
" 0 0 0 0 0 0 0 0 0 0999 V3000\n"
"M V30 BEGIN CTAB\n"
"M V30 COUNTS %d %d 0 0 %d\n"
"M V30 BEGIN ATOM\n",
m_atoms.size(), m_bonds.size(), m_chiral_flag);
// write atoms
for (auto atom_it = m_atoms.begin(); atom_it != m_atoms.end(); ++atom_it) {
const auto& atom = *atom_it;
auto ai = atom.ref;
m_offset += VLAprintf(m_buffer, m_offset, "M V30 %d %s %.4f %.4f %.4f 0",
atom.id, elemGetter(ai), atom.coord[0], atom.coord[1], atom.coord[2]);
if (ai->formalCharge)
m_offset += VLAprintf(m_buffer, m_offset, " CHG=%d", ai->formalCharge);
if (ai->stereo)
m_offset += VLAprintf(m_buffer, m_offset, " CFG=%d", ai->stereo);
m_offset += VLAprintf(m_buffer, m_offset, "\n");
}
m_atoms.clear();
m_offset += VLAprintf(m_buffer, m_offset,
"M V30 END ATOM\n"
"M V30 BEGIN BOND\n");
// write bonds
int n_bonds = 0;
for (auto bond_it = m_bonds.begin(); bond_it != m_bonds.end(); ++bond_it) {
const auto& bond = *bond_it;
m_offset += VLAprintf(m_buffer, m_offset, "M V30 %d %d %d %d\n",
++n_bonds, bond.ref->order, bond.id1, bond.id2);
}
m_bonds.clear();
// end
m_offset += VLAprintf(m_buffer, m_offset,
"M V30 END BOND\n"
"M V30 END CTAB\n"
"M END\n");
}
void writeCTabV2000() {
// counts line
m_offset += VLAprintf(m_buffer, m_offset,
"%3d%3d 0 0%3d 0 0 0 0 0999 V2000\n",
(int) m_atoms.size(), (int) m_bonds.size(), m_chiral_flag);
// write atoms
for (auto atom_it = m_atoms.begin(); atom_it != m_atoms.end(); ++atom_it) {
const auto& atom = *atom_it;
auto ai = atom.ref;
int chg = ai->formalCharge;
m_offset += VLAprintf(m_buffer, m_offset,
"%10.4f%10.4f%10.4f %-3s 0 %1d %1d 0 0 0 0 0 0 0 0 0\n",
atom.coord[0], atom.coord[1], atom.coord[2],
elemGetter(ai), chg ? (4 - chg) : 0, (int) ai->stereo);
}
m_atoms.clear();
// write bonds
for (auto bond_it = m_bonds.begin(); bond_it != m_bonds.end(); ++bond_it) {
const auto& bond = *bond_it;
m_offset += VLAprintf(m_buffer, m_offset, "%3d%3d%3d%3d 0 0 0\n",
bond.id1, bond.id2, bond.ref->order, (int) bond.ref->stereo);
}
m_bonds.clear();
// end
m_offset += VLAprintf(m_buffer, m_offset, "M END\n");
}
void writeBonds() {
if (m_atoms.size() > 999 || m_bonds.size() > 999) {
PRINTFB(G, FB_ObjectMolecule, FB_Warnings)
" Warning: MOL/SDF 999 atom/bond limit reached, using V3000\n" ENDFB(G);
writeCTabV3000();
} else {
writeCTabV2000();
}
}
void beginMolecule() {
MoleculeExporter::beginMolecule();
m_offset += VLAprintf(m_buffer, m_offset,
"%s\n PyMOL%3.3s 3D 0\n\n",
getTitleOrName(), _PyMOL_VERSION);
m_chiral_flag = 0;
}
bool isExcludedBond(const BondType * bond) {
// MOL format doesn't know zero order bonds. Writing them as order "0"
// will produce a nonstandard file which may be rejected by other
// applications (e.g. RDKit).
if (bond->order == 0) {
return !SettingGetGlobal_b(G, cSetting_sdf_write_zero_order_bonds);
}
return false;
}
};
// ---------------------------------------------------------------------------------- //
struct MoleculeExporterSDF : public MoleculeExporterMOL {
int getMultiDefault() const {
// multi-entry format
return cMolExportByCoordSet;
}
void writeProperties() {
#ifdef _PYMOL_IP_PROPERTIES
#endif
}
void writeBonds() {
MoleculeExporterMOL::writeBonds();
writeProperties();
m_offset += VLAprintf(m_buffer, m_offset, "$$$$\n");
}
};
// ---------------------------------------------------------------------------------- //
struct MOL2_SubSt {
AtomInfoType * ai;
int root_id;
const char * resn;
};
static const char MOL2_bondTypes[][3] = {
"nc", "1", "2", "3", "ar"
};
struct MoleculeExporterMOL2 : public MoleculeExporter {
int m_n_atoms; // atom count
int m_counts_offset; // offset for deferred counts writing
std::vector<MOL2_SubSt> m_substs; // substructures
int getMultiDefault() const {
// multi-entry format
return cMolExportByCoordSet;
}
void beginFile() {
m_offset += VLAprintf(m_buffer, m_offset,
"# created with PyMOL " _PyMOL_VERSION "\n");
}
void beginMolecule() {
MoleculeExporter::beginMolecule();
// RTI MOLECULE
m_offset += VLAprintf(m_buffer, m_offset, "@<TRIPOS>MOLECULE\n"
"%s\n", getTitleOrName());
// defer until number of substructures known
m_counts_offset = m_offset;
m_offset += VLAprintf(m_buffer, m_offset,
"X X X \n" // deferred
"SMALL\n"
"USER_CHARGES\n"
"@<TRIPOS>ATOM\n");
m_n_atoms = 0;
}
void beginObject() {
MoleculeExporter::beginObject();
ObjectMoleculeVerifyChemistry(m_iter.obj, m_iter.state);
}
void writeAtom() {
const auto ai = m_iter.getAtomInfo();
if (m_substs.empty() ||
!AtomInfoSameResidue(G, ai, m_substs.back().ai)) {
m_substs.emplace_back(MOL2_SubSt { ai, getTmpID(),
ai->resn ? LexStr(G, ai->resn) : "UNK" });
}
// RTI ATOM
// atom_id atom_name x y z atom_type [subst_id
// [subst_name [charge [status_bit]]]]
m_offset += VLAprintf(m_buffer, m_offset,
"%d\t%4s\t%.3f\t%.3f\t%.3f\t%2s\t%d\t%s%d%.1s\t%.3f\t%s\n",
getTmpID(),
ai->name ? LexStr(G, ai->name) : ai->elem[0] ? ai->elem : "X",
m_coord[0], m_coord[1], m_coord[2],
getMOL2Type(m_iter.obj, m_iter.getAtm()),
m_substs.size(),
m_substs.back().resn, ai->resv, &ai->inscode, // subst_name
ai->partialCharge,
(ai->flags & cAtomFlag_solvent) ? "WATER" : "");
++m_n_atoms;
}
void writeBonds() {
// atom count
m_counts_offset += sprintf(m_buffer + m_counts_offset, "%d %d %d",
m_n_atoms, (int) m_bonds.size(), (int) m_substs.size());
m_buffer[m_counts_offset] = ' '; // overwrite terminator
// RTI BOND
// bond_id origin_atom_id target_atom_id bond_type [status_bits]
m_offset += VLAprintf(m_buffer, m_offset, "@<TRIPOS>BOND\n");
int bond_id = 0;
for (auto bond_it = m_bonds.begin(); bond_it != m_bonds.end(); ++bond_it) {
const auto& bond = *bond_it;
m_offset += VLAprintf(m_buffer, m_offset, "%d %d %d %s\n",
++bond_id,
bond.id1,
bond.id2,
MOL2_bondTypes[bond.ref->order % 5]);
}
m_bonds.clear();
// RTI SUBSTRUCTURE
// subst_id subst_name root_atom [subst_type [dict_type
// [chain [sub_type [inter_bonds [status [comment]]]]]]]
m_offset += VLAprintf(m_buffer, m_offset, "@<TRIPOS>SUBSTRUCTURE\n");
int subst_id = 0;
for (auto subst_it = m_substs.begin(); subst_it != m_substs.end(); ++subst_it) {
const auto& subst = *subst_it;
const auto& ai = subst.ai;
m_offset += VLAprintf(m_buffer, m_offset, "%d\t%s%d%.1s\t%d\t%s\t1 %s\t%s\n",
++subst_id,
subst.resn, ai->resv, &ai->inscode, // subst_name
subst.root_id, // root_atom
(ai->flags & cAtomFlag_polymer) ? "RESIDUE" : "GROUP",
ai->chain ? LexStr(G, ai->chain) : ai->segi ? LexStr(G, ai->segi) : "****",
subst.resn);
}
m_substs.clear();
}
};
// ---------------------------------------------------------------------------------- //
struct MoleculeExporterMAE : public MoleculeExporter {
int m_n_atoms;
int m_n_atoms_offset;
int m_n_arom_bonds;
std::map<int, const AtomInfoType *> m_atoms;
// quasi constructor
void init(PyMOLGlobals * G_) {
MoleculeExporter::init(G_);
m_n_arom_bonds = 0;
}
int getMultiDefault() const {
// multi-entry format
return cMolExportByCoordSet;
}
void beginFile() {
m_offset += VLAprintf(m_buffer, m_offset,
"{ s_m_m2io_version ::: 2.0.0 }\n"
"# created with PyMOL " _PyMOL_VERSION " #\n");
}
void beginMolecule() {
MoleculeExporter::beginMolecule();
std::string groupid = MaeExportGetSubGroupId(G, reinterpret_cast<CObject*>(m_iter.obj));
m_offset += VLAprintf(m_buffer, m_offset,
"\nf_m_ct {\n"
"s_m_subgroupid\n"
"s_m_title\n"
":::\n"
"\"%s\"\n"
"\"%s\"\n",
groupid.c_str(),
getTitleOrName());
// defer until number of atoms known
m_n_atoms_offset = m_offset;
m_offset += VLAprintf(m_buffer, m_offset,
"m_atom[X] {\n" // place holder
"# First column is atom index #\n"
"i_m_mmod_type\n"
"r_m_x_coord\n"
"r_m_y_coord\n"
"r_m_z_coord\n"
"i_m_residue_number\n"
"s_m_insertion_code\n"
"s_m_chain_name\n"
"s_m_pdb_residue_name\n"
"s_m_pdb_atom_name\n"
"i_m_atomic_number\n"
"i_m_formal_charge\n"
"s_m_color_rgb\n"
"i_m_secondary_structure\n"
"r_m_pdb_occupancy\n"
"i_pdb_PDB_serial\n"
"i_m_visibility\n"
"i_m_representation\n"
"i_m_ribbon_style\n"
"i_m_ribbon_color\n"
"s_m_ribbon_color_rgb\n"
"s_m_label_format\n"
"i_m_label_color\n"
"s_m_label_user_text\n"
":::\n");
m_n_atoms = 0;
}
void beginObject() {
MoleculeExporter::beginObject();
ObjectMoleculeVerifyChemistry(m_iter.obj, m_iter.state);
}
void writeAtom() {
const auto ai = m_iter.getAtomInfo();
const float * rgb = ColorGet(G, ai->color);
char inscode[3] = { ai->inscode, 0 };
if (!inscode[0]) {
strcpy(inscode, "<>");
}
ResName resn = "";
AtomName name = "X";
if (ai->resn)
AtomInfoGetAlignedPDBResidueName(G, ai, resn);
if (ai->name)
AtomInfoGetAlignedPDBAtomName(G, ai, resn, name);
m_offset += VLAprintf(m_buffer, m_offset,
"%d %d %.3f %.3f %.3f %d %s %s \"%-4s\" \"%-4s\" %d %d %02X%02X%02X %d %.2f %d\n",
getTmpID(),
getMacroModelAtomType(ai),
m_coord[0], m_coord[1], m_coord[2],
ai->resv,
inscode,
ai->chain ? LexStr(G, ai->chain) : "\" \"",
resn,
name,
ai->protons,
ai->formalCharge,
int(rgb[0] * 255),
int(rgb[1] * 255),
int(rgb[2] * 255),
ai->ssType[0] == 'H' ? 1 : ai->ssType[0] == 'S' ? 2 : 0,
ai->q,
ai->id);
char ribbon_color_rgb[7] = "<>";
MaeExportGetRibbonColor(G, m_iter, ribbon_color_rgb);
std::string label_user_text = MaeExportGetLabelUserText(G, ai);
m_offset += VLAprintf(m_buffer, m_offset,
"%d %d %d %d %s \"%s\" 2 \"%s\"\n",
(ai->visRep & ~(cRepCartoonBit | cRepRibbonBit)) ? 1 : 0,
MaeExportGetAtomStyle(G, m_iter),
MaeExportGetRibbonStyle(ai),
ribbon_color_rgb[0] == '<' ? 3 /* calphaatom */ : 0 /* constant */,
ribbon_color_rgb,
label_user_text.empty() ? "" : "%UT",
label_user_text.c_str());
m_atoms[getTmpID()] = ai;
++m_n_atoms;
}
void writeBonds() {
// atom count
m_n_atoms_offset += sprintf(m_buffer + m_n_atoms_offset, "m_atom[%d]", m_n_atoms);
m_buffer[m_n_atoms_offset] = ' '; // overwrite terminator
if (!m_bonds.empty()) {
// table with zero rows not allowed
m_offset += VLAprintf(m_buffer, m_offset,
":::\n"
"}\n" // end atoms table
"m_bond[%d] {\n"
"# First column is bond index #\n"
"i_m_from\n"
"i_m_to\n"
"i_m_order\n"
"i_m_from_rep\n"
"i_m_to_rep\n"
":::\n", (int) m_bonds.size());
int b = 0;
for (auto bond_it = m_bonds.begin(); bond_it != m_bonds.end(); ++bond_it) {
const auto& bond = *bond_it;
int order = bond.ref->order;
if (order > 3) {
++m_n_arom_bonds;
order = 1; // aromatic bonds not supported
}
m_offset += VLAprintf(m_buffer, m_offset, "%d %d %d %d\n", ++b,
bond.id1,
bond.id2,
order);
int style = MaeExportGetBondStyle(m_atoms[bond.id1], m_atoms[bond.id2]);
m_offset += VLAprintf(m_buffer, m_offset, "%d %d\n", style, style);
}
m_bonds.clear();
}
// end molecule
m_offset += VLAprintf(m_buffer, m_offset,
":::\n"
"}\n" // end bonds (or atoms) table
"}\n");
if (m_n_arom_bonds > 0) {
PRINTFB(G, FB_ObjectMolecule, FB_Warnings)
" Warning: aromatic bonds not supported by MAE format, "
"exporting as single bonds\n" ENDFB(G);
m_n_arom_bonds = 0;
}
}
};
// ---------------------------------------------------------------------------------- //
struct MoleculeExporterXYZ : public MoleculeExporter {
int m_n_atoms;
int m_n_atoms_offset;
int getMultiDefault() const {
// multi-entry format
return cMolExportByCoordSet;
}
void beginMolecule() {
MoleculeExporter::beginMolecule();
// defer until number of atoms known
m_n_atoms = 0;
m_n_atoms_offset = m_offset;
m_offset += VLAprintf(m_buffer, m_offset,
"X \n" // natoms (deferred)
"%s\n", // comment
getTitleOrName());
}
void writeAtom() {
const auto ai = m_iter.getAtomInfo();
m_offset += VLAprintf(m_buffer, m_offset,
"%s %f %f %f\n", ai->elem, m_coord[0], m_coord[1], m_coord[2]);
++m_n_atoms;
}
void writeBonds() {
// atom count
m_n_atoms_offset += sprintf(m_buffer + m_n_atoms_offset, "%d", m_n_atoms);
m_buffer[m_n_atoms_offset] = ' '; // overwrite terminator
}
bool isExcludedBond(int atm1, int atm2) {
// no bonds for XYZ format
return true;
}
};
// ---------------------------------------------------------------------------------- //
#ifndef _PYMOL_NO_MSGPACKC
class MoleculeExporterMMTF : public MoleculeExporter {
mmtf::StructureData m_raw;
mmtf::GroupType * m_residue = nullptr;
const AtomInfoType * m_last_ai = nullptr;
ElemCanonicalizer m_elemGetter;
std::vector<int32_t> colorList;
std::vector<int32_t> repsList;
public:
int getMultiDefault() const {
return cMolExportGlobal;
}
void beginCoordSet() {
m_raw.chainsPerModel.emplace_back(0);
m_last_ai = nullptr;
}
void writeAtom() {
const AtomInfoType * ai = m_iter.getAtomInfo();
m_raw.xCoordList.emplace_back(m_coord[0]);
m_raw.yCoordList.emplace_back(m_coord[1]);
m_raw.zCoordList.emplace_back(m_coord[2]);
// PyMOL customs
colorList.emplace_back(ai->color);
repsList.emplace_back(ai->visRep);
bool is_same_residue = false;
bool is_same_chain = AtomInfoSameChainP(G, ai, m_last_ai);
if (!is_same_chain) {
m_raw.chainsPerModel.back() += 1;
m_raw.groupsPerChain.emplace_back(0); // increment with every group
m_raw.chainIdList.emplace_back(LexStr(G, ai->segi));
m_raw.chainNameList.emplace_back(LexStr(G, ai->chain));
} else {
is_same_residue = AtomInfoSameResidueP(G, ai, m_last_ai);
}
if (!is_same_residue) {
m_raw.groupsPerChain.back() += 1;
m_raw.groupTypeList.emplace_back(m_raw.groupList.size());
m_raw.groupIdList.emplace_back(ai->resv);
m_raw.insCodeList.emplace_back(ai->inscode);
m_raw.secStructList.emplace_back(
ai->ssType[0] == 'H' ? 2 :
ai->ssType[0] == 'S' ? 3 : -1);
m_raw.groupList.emplace_back();
m_residue = &m_raw.groupList.back();
m_residue->groupName = LexStr(G, ai->resn);
}
m_residue->formalChargeList.emplace_back(ai->formalCharge);
m_residue->atomNameList.emplace_back(LexStr(G, ai->name));
m_residue->elementList.emplace_back(m_elemGetter(ai));
m_last_ai = ai;
}
void writeBonds() {
m_raw.numAtoms = m_raw.xCoordList.size();
m_raw.numGroups = m_raw.groupIdList.size();
m_raw.numChains = m_raw.chainIdList.size();
m_raw.numModels = m_raw.chainsPerModel.size();
mmtf::BondAdder bondadder(m_raw);
for (const auto &bond : m_bonds) {
bondadder(bond.id1 - 1, bond.id2 - 1, bond.ref->order);
}
mmtf::compressGroupList(m_raw);
packMsgpack();
}
void packMsgpack() {
msgpack::zone _zone;
auto data_map = mmtf::encodeToMap(m_raw, _zone);
data_map["pymolColorList"] = msgpack::object(colorList, _zone);
data_map["pymolRepsList"] = msgpack::object(repsList, _zone);
std::stringstream stream;
msgpack::pack(stream, data_map);
auto buffer = stream.str();
auto bufferSize = buffer.size();
m_buffer.resize(bufferSize);
std::memcpy(m_buffer, buffer.data(), bufferSize);
m_offset = bufferSize;
}
};
#endif
/*========================================================================*/
/*
* Export the given selection to a molecular file format.
*
* Return the file contents as a VLA (must be VLAFree'd by caller) or
* NULL if the format is not known.
*
* format: pdb, sdf, ...
* selection: atom selection expression
* state: object state (-1 for all, -2/-3 for current)
* ref_object: name of a reference object which defines the frame of
* reference for exported coordinates
* ref_state: reference object state
* multi: defines how to handle selections which span multiple objects
* -1: use format-specific default
* 0: one global "molecule" (default for PDB)
* 1: molecules per objects
* 2: molecules per states (default for sdf, mol2)
*/
pymol::vla<char> MoleculeExporterGetStr(PyMOLGlobals * G,
const char *format,
const char *selection,
int state,
const char *ref_object,
int ref_state,
int multi,
bool quiet)
{
SelectorTmp tmpsele1(G, selection);
int sele = tmpsele1.getIndex();
if (sele < 0)
return nullptr;
std::unique_ptr<MoleculeExporter> exporter;
if (ref_state < -1)
ref_state = state;
// do "effective" current states
if (state == -2)
state = -3;
if (strcmp(format, "pdb") == 0) {
exporter.reset(new MoleculeExporterPDB);
} else if (strcmp(format, "pmcif") == 0) {
exporter.reset(new MoleculeExporterPMCIF);
} else if (strcmp(format, "cif") == 0) {
exporter.reset(new MoleculeExporterCIF);
} else if (strcmp(format, "sdf") == 0) {
exporter.reset(new MoleculeExporterSDF);
} else if (strcmp(format, "pqr") == 0) {
exporter.reset(new MoleculeExporterPQR);
} else if (strcmp(format, "mol2") == 0) {
exporter.reset(new MoleculeExporterMOL2);
} else if (strcmp(format, "mol") == 0) {
exporter.reset(new MoleculeExporterMOL);
} else if (strcmp(format, "xyz") == 0) {
exporter.reset(new MoleculeExporterXYZ);
} else if (strcmp(format, "mae") == 0) {
exporter.reset(new MoleculeExporterMAE);
} else if (strcmp(format, "mmtf") == 0) {
#ifndef _PYMOL_NO_MSGPACKC
exporter.reset(new MoleculeExporterMMTF);
#else
PRINTFB(G, FB_ObjectMolecule, FB_Errors)
" Error: This build has no fast MMTF support.\n" ENDFB(G);
return nullptr;
#endif
} else {
PRINTFB(G, FB_ObjectMolecule, FB_Errors)
" Error: unknown format: '%s'\n", format ENDFB(G);
return nullptr;
}
// Ensure "." decimal point in printf. It's possible to change this from
// Python, so don't rely on a persistent global value.
std::setlocale(LC_NUMERIC, "C");
exporter->init(G);
exporter->setMulti(multi);
exporter->setRefObject(ref_object, ref_state);
exporter->execute(sele, state);
return std::move(exporter->m_buffer);
}
/*========================================================================*/
#ifndef _PYMOL_NOPY
/*
* See MoleculeExporterGetPyBonds
*/
struct MoleculeExporterPyBonds : public MoleculeExporter {
PyObject *m_bond_list; // out
protected:
int getMultiDefault() const {
// single-entry format
return cMolExportGlobal;
}
void writeAtom() {}
void writeBonds() {
size_t nBond = m_bonds.size();
m_bond_list = PyList_New(nBond);
for (size_t b = 0; b < nBond; ++b) {
const auto& bond = m_bonds[b];
PyList_SetItem(m_bond_list, b, Py_BuildValue("iii",
bond.id1 - 1, bond.id2 - 1, bond.ref->order));
}
m_bonds.clear();
}
};
/*========================================================================*/
/*
* Get all bonds within the given selection.
*
* Returns a list of (atm1, atm2, order) tuples, where atm1 and atm2 are
* the 0-based atom indices within the selection.
*
* Return value: New reference
*/
PyObject *MoleculeExporterGetPyBonds(PyMOLGlobals * G,
const char *selection, int state)
{
SelectorTmp tmpsele1(G, selection);
int sele = tmpsele1.getIndex();
if (sele < 0)
return NULL;
int unblock = PAutoBlock(G);
MoleculeExporterPyBonds exporter;
exporter.init(G);
exporter.execute(sele, state);
if (PyErr_Occurred())
PyErr_Print();
PAutoUnblock(G, unblock);
return exporter.m_bond_list;
}
/*========================================================================*/
/*
* Creates a `chempy.models.Indexed` instance from a selection.
*
* Changes from the old implementation (SelectorGetChemPyModel):
* - drops support for `cs->Spheroid`
*
*/
struct MoleculeExporterChemPy : public MoleculeExporter {
PyObject *m_model; // out
protected:
int m_n_cs; // number of coordinate sets
float m_ref_tmp[3];
PyObject *m_atom_list;
public:
// quasi constructor
void init(PyMOLGlobals * G_) {
MoleculeExporter::init(G_);
m_model = NULL;
m_n_cs = 0;
m_atom_list = NULL;
}
protected:
int getMultiDefault() const {
// single-entry format
return cMolExportGlobal;
}
void beginMolecule() {
MoleculeExporter::beginMolecule();
m_model = PYOBJECT_CALLMETHOD(P_models, "Indexed", "");
if (m_model) {
m_atom_list = PyList_New(0);
PyObject_SetAttrString(m_model, "atom", m_atom_list);
Py_DECREF(m_atom_list);
}
}
void beginCoordSet() {
MoleculeExporter::beginCoordSet();
++m_n_cs;
}
const float * getRefPtr() {
RefPosType * ref_pos = m_iter.cs->RefPos;
const float * ref_ptr = NULL;
if (ref_pos) {
ref_pos += m_iter.getIdx();
if (ref_pos->specified) {
ref_ptr = ref_pos->coord;
if (m_mat_move.ptr) {
transform44d3f(m_mat_move.ptr, ref_ptr, m_ref_tmp);
ref_ptr = m_ref_tmp;
}
}
}
return ref_ptr;
}
void writeAtom() {
PyObject *atom = CoordSetAtomToChemPyAtom(G,
m_iter.getAtomInfo(), m_coord, getRefPtr(),
m_iter.getAtm(), m_mat_full.ptr);
if (atom) {
PyList_Append(m_atom_list, atom);
Py_DECREF(atom);
}
}
void writeProperties() {
if (!m_last_cs)
return;
// only support properties if export doesn't span multiple coordsets
if (m_n_cs != 1)
return;
// title
if (m_last_cs->Name[0]) {
PyObject *molecule = PyObject_GetAttrString(m_model, "molecule");
if (molecule) {
PyObject_SetAttrString(molecule, "title", PyString_FromString(m_last_cs->Name));
Py_DECREF(molecule);
}
}
// properties
#ifdef _PYMOL_IP_PROPERTIES
#endif
}
void writeBonds() {
if (!m_model)
return;
bool error = false;
size_t nBond = m_bonds.size();
PyObject *bond_list = PyList_New(nBond);
for (size_t b = 0; b < nBond; ++b) {
PyObject *bnd = PYOBJECT_CALLMETHOD(P_chempy, "Bond", "");
if (!bnd) {
error = true;
break;
}
const auto& bond = m_bonds[b];
int index[] = { bond.id1 - 1, bond.id2 - 1 };
PConvInt2ToPyObjAttr(bnd, "index", index);
PConvIntToPyObjAttr(bnd, "order", bond.ref->order);
PConvIntToPyObjAttr(bnd, "id", bond.ref->id);
PConvIntToPyObjAttr(bnd, "stereo", bond.ref->stereo);
PyList_SetItem(bond_list, b, bnd); /* steals bnd reference */
}
if (!error) {
PyObject_SetAttrString(m_model, "bond", bond_list);
}
Py_DECREF(bond_list);
m_bonds.clear();
writeProperties();
}
};
/*========================================================================*/
PyObject *ExecutiveSeleToChemPyModel(PyMOLGlobals * G,
const char *s1, int state,
const char *ref_object, int ref_state)
{
if (state == -1)
state = 0; // no multi-state support
if (ref_state < -1)
ref_state = state;
int sele = SelectorIndexByName(G, s1);
if (sele < 0)
return NULL;
int unblock = PAutoBlock(G);
MoleculeExporterChemPy exporter;
exporter.init(G);
exporter.setRefObject(ref_object, ref_state);
exporter.execute(sele, state);
if (PyErr_Occurred())
PyErr_Print();
PAutoUnblock(G, unblock);
return exporter.m_model;
}
#endif
// vi:sw=2