utils/indigo-cano/main.c (167 lines of code) (raw):

/**************************************************************************** * Copyright (C) from 2009 to Present EPAM Systems. * * This file is part of Indigo toolkit. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ // // This is a command line utility for producing canonical SMILES // or layered code for molecules in MOL or SDF format // #include <stdio.h> #include <stdlib.h> #include <string.h> #include "indigo.h" void onError(const char* message, void* context) { fflush(stdout); fprintf(stderr, "%s\n", message); fflush(stderr); exit(-1); } void usage() { printf("Usage:\n" " indigo-cano filename.{mol,smi,cml,sdf,sdf.gz,rdf,rdf.gz} [parameters]\n" " indigo-cano - SMILES [parameters]\n" "Parameters:\n" " -smiles Output canonical SMILES (default)\n" " -layered Output canonical layered code\n" " -id <string> ID field in SDF file\n" " -no-arom Do not aromatize molecules\n" " -no-tetrahedral Ignore tetrahedral stereocenters\n" " -no-cistrans Ignore cis-trans bonds information\n" "Examples:\n" " indigo-cano infile.sdf\n" " indigo-cano infile.sdf.gz -id molregno > results.txt\n" " indigo-cano infile.smi -layered -no-cistrans\n" " indigo-cano - 'NC1C=CC(O)=CC=1'\n"); } int processMolecule(int mol, int smiles, int no_arom, int no_cistrans, int no_tetra) { if (no_cistrans) if (indigoClearCisTrans(mol) < 0) return -1; if (no_tetra) if (!indigoClearStereocenters(mol)) return -1; if (smiles && !no_arom) if (indigoAromatize(mol) < 0) return -1; if (smiles) { const char* res = indigoCanonicalSmiles(mol); if (res == 0) return -1; printf("%s\n", res); } else { const char* res = indigoLayeredCode(mol); if (res == 0) return -1; printf("%s\n", res); } return 1; } int main(int argc, char* argv[]) { int smiles = 1; int no_cistrans = 0; int no_tetra = 0; int no_arom = 0; int i = 2; const char* idfield = 0; const char* filename = 0; const char* ext = 0; if (argc < 2) { usage(); return -1; } if (strcmp(argv[1], "-") != 0) filename = argv[1]; else if (argc >= 3 && strcmp(argv[1], "-") == 0) i = 3; else { usage(); return -1; } while (i < argc) { if (strcmp(argv[i], "-smiles") == 0) smiles = 1; else if (strcmp(argv[i], "-layered") == 0) smiles = 0; else if (strcmp(argv[i], "-no-cistrans") == 0) no_cistrans = 1; else if (strcmp(argv[i], "-no-arom") == 0) no_arom = 1; else if (strcmp(argv[i], "-no-tetrahedral") == 0) no_tetra = 1; else if (strcmp(argv[i], "-id") == 0) { if (++i >= argc) { fprintf(stderr, "expecting an identifier after -id\n"); return -1; } idfield = argv[i]; } else { fprintf(stderr, "unknown parameter: %s\n", argv[i]); return -1; } i++; } qword session = indigoAllocSessionId(); indigoSetErrorHandler(onError, 0); if (filename == 0) { int mol = indigoLoadMoleculeFromString(argv[2]); processMolecule(mol, smiles, no_arom, no_cistrans, no_tetra); indigoFree(mol); indigoReleaseSessionId(session); return 0; } if (strlen(filename) > 4 && filename[strlen(filename) - 4] == '.') ext = filename + strlen(filename) - 3; else if (strlen(filename) > 7 && filename[strlen(filename) - 7] == '.') ext = filename + strlen(filename) - 6; else { fprintf(stderr, "input file format not recognized\n"); indigoReleaseSessionId(session); return -1; } if (strcmp(ext, "mol") == 0) { int mol = indigoLoadMoleculeFromFile(filename); processMolecule(mol, smiles, no_arom, no_cistrans, no_tetra); indigoFree(mol); indigoReleaseSessionId(session); return 0; } else if (strcmp(ext, "cml") == 0 || strcmp(ext, "sdf") == 0 || strcmp(ext, "sdf.gz") == 0 || strcmp(ext, "rdf") == 0 || strcmp(ext, "rdf.gz") == 0 || strcmp(ext, "smi") == 0 || strcmp(ext, "smi.gz") == 0) { int item, iter; if ((strstr(ext, "cml") != NULL)) iter = indigoIterateCMLFile(filename); else if (strstr(ext, "sdf") != NULL) iter = indigoIterateSDFile(filename); else if (strstr(ext, "rdf") != NULL) iter = indigoIterateRDFile(filename); else iter = indigoIterateSmilesFile(filename); while ((item = indigoNext(iter))) { indigoSetErrorHandler(0, 0); if (processMolecule(item, smiles, no_arom, no_cistrans, no_tetra) == -1) printf("%s\n", indigoGetLastError()); indigoSetErrorHandler(onError, 0); indigoFree(item); } indigoFree(iter); } else { fprintf(stderr, "input file format not recognized\n"); indigoReleaseSessionId(session); return -1; } indigoReleaseSessionId(session); return 0; }