utils/indigo-deco/main.cpp (335 lines of code) (raw):

/**************************************************************************** * Copyright (C) from 2009 to Present EPAM Systems. * * This file is part of Indigo toolkit. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ #include <cstdio> #include <cstdlib> #include <cstring> #include <filesystem> #include <iostream> #include <regex> #include "indigo.h" bool matchesPattern(const std::string& filename, const std::string& pattern) { // Convert wildcard pattern to regex std::string regexPattern = std::regex_replace(pattern, std::regex("\\."), "\\."); regexPattern = std::regex_replace(regexPattern, std::regex("\\*"), ".*"); regexPattern = std::regex_replace(regexPattern, std::regex("\\?"), "."); std::regex re(regexPattern); return std::regex_match(filename, re); } void onError(const char* message, void* context) { fflush(stdout); fprintf(stderr, "%s\n", message); fflush(stderr); exit(-1); } void _replaceSlashes(char* str) { while (*str != 0) { if (*str == '\\') *str = '/'; str++; } } void _handleInputFile(const char* path, int structures) { if ((strlen(path) > 4 && strcmp(path + strlen(path) - 4, ".sdf") == 0) || (strlen(path) > 7 && strcmp(path + strlen(path) - 7, ".sdf.gz") == 0)) { int item, iter = indigoIterateSDFile(path); while ((item = indigoNext(iter))) { indigoArrayAdd(structures, item); indigoFree(item); } indigoFree(iter); } else if ((strlen(path) > 4 && strcmp(path + strlen(path) - 4, ".rdf") == 0) || (strlen(path) > 7 && strcmp(path + strlen(path) - 7, ".rdf.gz") == 0)) { int item, iter = indigoIterateRDFile(path); while ((item = indigoNext(iter))) { indigoArrayAdd(structures, item); indigoFree(item); } indigoFree(iter); } else if ((strlen(path) > 4 && strcmp(path + strlen(path) - 4, ".smi") == 0) || (strlen(path) > 7 && strcmp(path + strlen(path) - 7, ".smi.gz") == 0)) { int item, iter = indigoIterateSmilesFile(path); while ((item = indigoNext(iter))) { indigoArrayAdd(structures, item); indigoFree(item); } indigoFree(iter); } else if ((strlen(path) > 4 && strcmp(path + strlen(path) - 4, ".cml") == 0)) { int item, iter = indigoIterateCMLFile(path); while ((item = indigoNext(iter))) { indigoArrayAdd(structures, item); indigoFree(item); } indigoFree(iter); } else { int item = indigoLoadMoleculeFromFile(path); indigoArrayAdd(structures, item); indigoFree(item); } } void _printHelpMessage() { printf("Usage:\n indigo-deco files [options]\n" "Perfoms molecule scaffold detection and R-group deconvolution\n" "Accepted formats are: Molfile, SDFile, RDFile, SMILES, CML\n" "Options:\n" "-h print this help message\n" "-a calculate approximate scaffold (default is exact)\n" "-s <file> write maximum found scaffold to molfile\n" "-S <file> write all found scaffolds to SD-file\n" "-l <file> do not calculate scaffold, but load it from file\n" "-sr <file> write scaffold with R-sites to a file\n" "-o <file> write resulting highlighted molecules to file\n" "-r <file> write resulting molecules with separated r-groups to file\n" "-na no aromatic consideration\n" "-- marks end of options\n" "\nExamples:\n\n" "indigo-deco *.mol -o hl.sdf -s scaf.sdf\n" " read molecules from molfiles in the current directory\n" " save maximum found scaffold to scaf.mol\n" " save highlighted molecules to hl.sdf\n" "indigo-deco structure.mol many.sdf -s scaf.mol -S allscafs.sdf -r rg.sdf \n" " read one molecule from structure.mol and multiple molecules from many.sdf\n" " save molecules with r-rgoups to rg.sdf\n" " save all found scaffolds to allscafs.sdf\n" "indigo-deco *.smi -d readyscaf.mol -o hl.sdf\n" " read multiple molecules from every SMILES file in the current directory\n" " read scaffold from readyscaf.mol\n" " save highlighted molecules to hl.sdf\n"); } int main(int argc, const char** argv) { int i; int done_with_options = 0; int approximate = 0; int scaffold = 0; int aromatic = 1; const char* outfile_hl = 0; const char* outfile_rg = 0; const char* outfile_maxscaf = 0; const char* outfile_allscafs = 0; const char* outfile_scaf_r = 0; int deco = 0; int structures = 0; const qword session = indigoAllocSessionId(); indigoSetErrorHandler(onError, 0); printf("R-Group deconvolution utility, powered by Indigo API version %s\n", indigoVersion()); structures = indigoCreateArray(); indigoSetOptionBool("treat-x-as-pseudoatom", 1); indigoSetOptionBool("ignore-stereochemistry-errors", 1); for (i = 1; i < argc; i++) { if (!done_with_options && argv[i][0] == '-') { if (strcmp(argv[i], "--") == 0) done_with_options = 1; else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-?") == 0 || strcmp(argv[i], "/?") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) { _printHelpMessage(); return 0; } else if (strcmp(argv[i], "-a") == 0) approximate = 1; else if (strcmp(argv[i], "-l") == 0) { if (++i == argc) { fprintf(stderr, "expecting filename after -l\n"); return -1; } scaffold = indigoLoadMoleculeFromFile(argv[i]); } else if (strcmp(argv[i], "-o") == 0) { if (++i == argc) { fprintf(stderr, "expecting filename after -o\n"); return -1; } outfile_hl = argv[i]; } else if (strcmp(argv[i], "-r") == 0) { if (++i == argc) { fprintf(stderr, "expecting filename after -r\n"); return -1; } outfile_rg = argv[i]; } else if (strcmp(argv[i], "-s") == 0) { if (++i == argc) { fprintf(stderr, "expecting filename after -s\n"); return -1; } outfile_maxscaf = argv[i]; } else if (strcmp(argv[i], "-sr") == 0) { if (++i == argc) { fprintf(stderr, "expecting filename after -sr\n"); return -1; } outfile_scaf_r = argv[i]; } else if (strcmp(argv[i], "-S") == 0) { if (++i == argc) { fprintf(stderr, "expecting filename after -S\n"); return -1; } outfile_allscafs = argv[i]; } else if (strcmp(argv[i], "-na") == 0) aromatic = 0; else { fprintf(stderr, "Unknown option: %s", argv[i]); _printHelpMessage(); return -1; } } else { char dirname[1024]; char errbuf[1024]; const char* pattern = 0; int k; for (k = (int)strlen(argv[i]) - 1; k >= 0; k--) if (argv[i][k] == '/' || argv[i][k] == '\\') break; if (k == -1) strncpy(dirname, ".", sizeof(dirname)); else if (k == 0) { dirname[0] = argv[i][0]; dirname[1] = 0; } else if (k == strlen(argv[i]) - 1) { fprintf(stderr, "can not handle filenames ending with a slash\n"); return -1; } else if (k > sizeof(dirname) - 1) { fprintf(stderr, "filename too long\n"); return -1; } else { memcpy(dirname, argv[i], k); dirname[k] = 0; } _replaceSlashes(dirname); pattern = argv[i] + k + 1; for (const auto& path : std::filesystem::directory_iterator(dirname)) { const char* current_filename = path.path().filename().string().c_str(); if (!matchesPattern(current_filename, pattern)) { continue; } _handleInputFile(path.path().string().c_str(), structures); } } } if (indigoCount(structures) < 1) { fprintf(stderr, "no input structures\n"); _printHelpMessage(); return -1; } printf("got %d input structures\n", indigoCount(structures)); indigoSetOptionBool("deconvolution-aromatization", aromatic); if (scaffold == 0) { printf("calculating scaffold... "); fflush(stdout); if (approximate) scaffold = indigoExtractCommonScaffold(structures, "approximate"); else scaffold = indigoExtractCommonScaffold(structures, "exact"); printf("done\n"); fflush(stdout); } if (outfile_maxscaf != 0) { printf("saving the scaffold to %s\n", outfile_maxscaf); indigoSaveMolfileToFile(scaffold, outfile_maxscaf); } if (outfile_allscafs != 0) { int output = indigoWriteFile(outfile_allscafs); int allscafs = indigoAllScaffolds(scaffold); int item, iter = indigoIterateArray(allscafs); printf("saving all obtained scaffolds (%d total) to %s\n", indigoCount(allscafs), outfile_allscafs); while ((item = indigoNext(iter))) { indigoSdfAppend(output, item); indigoFree(item); } indigoFree(iter); indigoFree(output); } if (outfile_hl == 0 && outfile_rg == 0 && outfile_scaf_r == 0) { printf("none of -o, -r, -sr specified, nothing left to do\n"); return 0; } printf("decomposing the structures... "); fflush(stdout); deco = indigoDecomposeMolecules(scaffold, structures); printf("done\n"); fflush(stdout); if (outfile_scaf_r != 0) { int sr = indigoDecomposedMoleculeScaffold(deco); indigoLayout(sr); printf("saving the scaffold with R-sites to %s\n", outfile_scaf_r); indigoSaveMolfileToFile(sr, outfile_scaf_r); } if (outfile_hl != 0) { int output = indigoWriteFile(outfile_hl); int item, iter = indigoIterateDecomposedMolecules(deco); printf("saving the highlighted structures to %s\n", outfile_hl); while ((item = indigoNext(iter))) { indigoSdfAppend(output, indigoDecomposedMoleculeHighlighted(item)); indigoFree(item); } indigoFree(iter); indigoFree(output); } if (outfile_rg != 0) { int output = indigoWriteFile(outfile_rg); int item, iter = indigoIterateDecomposedMolecules(deco); printf("saving the structures with R-groups to %s\n", outfile_rg); while ((item = indigoNext(iter))) { indigoSdfAppend(output, indigoDecomposedMoleculeWithRGroups(item)); indigoFree(item); } indigoFree(iter); indigoFree(output); } indigoReleaseSessionId(session); return 0; };