layer0/Word.cpp (890 lines of code) (raw):
/*
A* -------------------------------------------------------------------
B* This file contains source code for the PyMOL computer program
C* Copyright (c) Schrodinger, LLC.
D* -------------------------------------------------------------------
E* It is unlawful to modify or remove this copyright notice.
F* -------------------------------------------------------------------
G* Please see the accompanying LICENSE file for further information.
H* -------------------------------------------------------------------
I* Additional authors of this source file include:
-*
-*
-*
Z* -------------------------------------------------------------------
*/
#include"os_python.h"
#include"os_predef.h"
#include"os_std.h"
#include"Base.h"
#include"OOMac.h"
#include"Word.h"
#include"Parse.h"
#include"PyMOLObject.h"
#include"MemoryDebug.h"
struct _CWord {
int no_state_at_present;
};
typedef struct {
int match_mode;
int continued;
int literal1, literal2; /* offsets into charVLA */
int numeric1, numeric2;
int has1, has2;
} MatchNode;
struct _CWordMatcher {
PyMOLGlobals *G;
MatchNode *node;
int n_node;
char *charVLA;
int n_char;
int ignore_case;
};
#define cMatchLiteral 0
#define cMatchNumericRange cWordMatchOptionNumericRanges
#define cMatchAlphaRange cWordMatchOptionAlphaRanges
#define cMatchWildcard 3
#ifndef _PYMOL_INLINE
int WordCompare(PyMOLGlobals * G, const char *p, const char *q, int ignCase)
/* all things equal, shorter is smaller */
{
int result = 0;
char cp, cq, tlp, tlq;
if(ignCase) {
while((cp = *p) && (cq = *q)) {
p++;
q++;
if(cp != cq) {
(tlp = tolower(cp));
(tlq = tolower(cq));
if(tlp < tlq)
return -1;
else if(tlp > tlq) {
return 1;
}
}
}
} else {
while((cp = *p) && (cq = *q)) {
p++;
q++;
if(cp != cq) {
if(cp < cq) {
return -1;
} else if(cp > cq) {
return 1;
}
}
}
}
if((!result) && (!*p) && (*q))
return -1;
else if((!result) && (*p) && (!*q))
return 1;
return 0;
}
#endif
void WordMatchOptionsConfigInteger(CWordMatchOptions * I)
{
I->range_mode = cWordMatchOptionNumericRanges;
I->lists = true;
I->ignore_case = true;
I->wildcard = 0; /* no wildcard for numbers */
I->allow_hyphen = true;
I->allow_plus = true;
I->space_lists = false;
}
void WordMatchOptionsConfigAlpha(CWordMatchOptions * I, char wildcard, int ignore_case)
{
I->range_mode = cWordMatchOptionAlphaRanges;
I->lists = true;
I->ignore_case = ignore_case;
I->wildcard = wildcard;
I->allow_hyphen = false;
I->allow_plus = false;
I->space_lists = false;
}
void WordMatchOptionsConfigAlphaList(CWordMatchOptions * I, char wildcard,
int ignore_case)
{ /* here we expect '+' to be used in lists */
I->range_mode = cWordMatchOptionAlphaRanges;
I->lists = true;
I->ignore_case = ignore_case;
I->wildcard = wildcard;
I->allow_hyphen = false;
I->allow_plus = true;
I->space_lists = false;
}
void WordMatchOptionsConfigMixed(CWordMatchOptions * I, char wildcard, int ignore_case)
{
I->range_mode = cWordMatchOptionNumericRanges;
I->lists = true;
I->ignore_case = ignore_case;
I->wildcard = wildcard;
I->allow_hyphen = true;
I->allow_plus = true;
I->space_lists = false;
}
void WordMatchOptionsConfigNameList(CWordMatchOptions * I, char wildcard, int ignore_case)
{
I->range_mode = cWordMatchOptionAlphaRanges;
I->lists = true;
I->ignore_case = ignore_case;
I->wildcard = wildcard;
I->allow_hyphen = false;
I->allow_plus = false;
I->space_lists = true;
}
CWordMatcher *WordMatcherNew(PyMOLGlobals * G, const char *st, CWordMatchOptions * option,
int force)
{
CWordMatcher *result = NULL;
int needed = force;
char wildcard = option->wildcard;
if(wildcard == 32)
wildcard = 0; /* space as wildcard means no wildcard */
if(!st)
return NULL;
{ /* first determine if we need to incur the overhead of the matcher */
int escape = false;
const char *p = st;
while((*p) && (!needed)) {
if(!escape) {
switch (*p) {
case '\\':
escape = true;
needed = true;
break;
case '+':
if((option->lists) && (option->allow_plus))
needed = true;
break;
case ',': /* list operators */
if(option->lists)
needed = true;
break;
case '-': /* range operators */
if(option->allow_hyphen)
needed = true;
break;
case ':':
if(option->range_mode)
needed = true;
break;
case ' ':
if(option->space_lists)
needed = true;
break;
default:
if(*p == wildcard)
needed = true;
break;
}
} else
escape = false;
p++;
}
}
if(needed) { /* if so, then convert the expression into a match tree */
int n_char = 0;
int n_node = 0;
OOCalloc(G, CWordMatcher);
I->charVLA = VLACalloc(char, 10); /* auto_zeroing... */
I->node = VLACalloc(MatchNode, 10);
I->ignore_case = option->ignore_case;
I->G = G;
/* build up the matcher structure... */
{
const char *p = st;
char c, *q;
int escape = false;
int token_active = false;
int node_active = false;
int char_handled = false;
int cur_node = 0;
int expectation = 1;
while(1) {
c = *p;
char_handled = false;
if(!escape) {
switch (c) {
case '\\':
escape = true;
char_handled = true;
break;
case 0:
if(option->lists) {
char_handled = true;
node_active = false;
token_active = false;
}
break;
case '+': /* list operator */
if(option->lists && option->allow_plus) {
if(n_node < expectation) { /* create empty node */
VLACheck(I->node, MatchNode, n_node);
n_node++;
} else {
expectation = n_node + 1;
}
char_handled = true;
node_active = false;
token_active = false;
}
break;
case ',': /* list operator */
if(option->lists) {
if(n_node < expectation) { /* create empty node */
VLACheck(I->node, MatchNode, n_node);
n_node++;
} else {
expectation = n_node + 1;
}
char_handled = true;
node_active = false;
token_active = false;
}
break;
case ' ': /* space list */
if(option->space_lists) {
if(n_node < expectation) { /* create empty node */
VLACheck(I->node, MatchNode, n_node);
n_node++;
} else {
expectation = n_node + 1;
}
char_handled = true;
node_active = false;
token_active = false;
}
break;
case '-': /* range operators */
if(option->allow_hyphen && option->range_mode) {
if(!node_active) {
cur_node = n_node;
VLACheck(I->node, MatchNode, cur_node);
node_active = true;
n_node++;
}
I->node[cur_node].match_mode = option->range_mode;
token_active = false;
char_handled = true;
}
break;
case ':':
if(option->range_mode) {
if(!node_active) {
cur_node = n_node;
VLACheck(I->node, MatchNode, cur_node);
node_active = true;
n_node++;
}
I->node[cur_node].match_mode = option->range_mode;
token_active = false;
char_handled = true;
}
break;
default:
if(c == wildcard) {
if(node_active) {
I->node[cur_node].continued = true;
}
VLACheck(I->node, MatchNode, n_node);
cur_node = n_node;
I->node[cur_node].match_mode = cMatchWildcard;
n_node++;
node_active = true;
token_active = false;
char_handled = true;
}
break;
}
} else
escape = false;
if(!char_handled) {
if(!token_active) {
n_char++;
VLACheck(I->charVLA, char, n_char);
token_active = true;
if((!node_active) || (I->node[cur_node].match_mode == cMatchWildcard)) {
if(node_active) /* must be extending after a wildcard */
I->node[cur_node].continued = true;
else
node_active = true;
VLACheck(I->node, MatchNode, n_node);
cur_node = n_node;
I->node[cur_node].literal1 = n_char; /* the first literal */
n_node++;
} else {
I->node[cur_node].literal2 = n_char; /* must be the second literal */
}
}
/* copy character into auto-terminated string */
VLACheck(I->charVLA, char, n_char + 1);
q = I->charVLA + n_char;
(*q++) = c;
n_char++;
}
if(c)
p++;
else
break;
}
if(n_node < expectation) { /* create empty node */
VLACheck(I->node, MatchNode, n_node);
n_node++;
}
}
{
int a;
int tmp;
MatchNode *node = I->node;
for(a = 0; a < n_node; a++) {
switch (node->match_mode) {
case cMatchLiteral:
if(option->range_mode == cWordMatchOptionNumericRanges) {
if(node->literal1) {
if(sscanf(I->charVLA + node->literal1, "%d", &tmp) == 1) {
node->numeric1 = tmp;
node->has1 = true;
}
}
}
break;
case cMatchAlphaRange:
if(node->literal1)
node->has1 = true;
if(node->literal2)
node->has2 = true;
break;
case cMatchNumericRange:
if(node->literal1) {
if(sscanf(I->charVLA + node->literal1, "%d", &tmp) == 1) {
node->numeric1 = tmp;
node->has1 = true;
}
}
if(node->literal2) {
if(sscanf(I->charVLA + node->literal2, "%d", &tmp) == 1) {
node->numeric2 = tmp;
node->has2 = true;
}
}
break;
}
node++;
}
}
I->n_char = n_char;
I->n_node = n_node;
/* WordMatcherDump(I); */
result = I;
}
return result;
}
static int recursive_match(CWordMatcher * I, MatchNode * cur_node, const char *text,
int *value_ptr)
{
int ignore_case = I->ignore_case;
switch (cur_node->match_mode) {
case cMatchLiteral:
{
char *q = I->charVLA + cur_node->literal1;
const char *p = text;
while((*p) && (*q)) {
if(*p != *q) {
if(!ignore_case)
return false;
else if(tolower(*p) != tolower(*q))
return false;
}
p++;
q++;
}
if(!*q) {
if(cur_node->continued)
return recursive_match(I, cur_node + 1, p, value_ptr);
if(!*p)
return true;
}
}
break;
case cMatchWildcard:
{
const char *p;
p = text;
if(!cur_node->continued)
return true;
else {
while(*p) {
if(recursive_match(I, cur_node + 1, p, value_ptr))
return 1;
p++;
}
}
}
break;
case cMatchAlphaRange:
{
char *l1 = I->charVLA + cur_node->literal1;
char *l2 = I->charVLA + cur_node->literal2;
if(((!cur_node->has1) ||
(WordCompare(I->G, l1, text, ignore_case) <= 0)) &&
((!cur_node->has2) || (WordCompare(I->G, l2, text, ignore_case) >= 0)))
return true;
else
return false;
}
break;
case cMatchNumericRange:
if(value_ptr) {
int value = *value_ptr;
if(((!cur_node->has1) ||
(cur_node->numeric1 <= value)) &&
((!cur_node->has2) || (cur_node->numeric2 >= value)))
return true;
} else {
int value;
if(sscanf(text, "%d", &value) == 1)
if(((!cur_node->has1) ||
(cur_node->numeric1 <= value)) &&
((!cur_node->has2) || (cur_node->numeric2 >= value)))
return true;
}
break;
}
return false;
}
int WordMatcherMatchAlpha(CWordMatcher * I, const char *text)
{
MatchNode *cur_node = I->node;
int n_node = I->n_node;
while((n_node--) > 0) {
if(recursive_match(I, cur_node, text, NULL))
return true;
else {
while(cur_node->continued) {
cur_node++;
n_node--;
}
cur_node++;
}
}
return false;
}
int WordMatcherMatchMixed(CWordMatcher * I, const char *text, int value)
{
MatchNode *cur_node = I->node;
int n_node = I->n_node;
while((n_node--) > 0) {
if(recursive_match(I, cur_node, text, &value))
return true;
else {
while(cur_node->continued) {
cur_node++;
n_node--;
}
cur_node++;
}
}
return false;
}
static int integer_match(CWordMatcher * I, MatchNode * cur_node, int value)
{
switch (cur_node->match_mode) {
case cMatchLiteral:
if((cur_node->has1) && (cur_node->numeric1 == value))
return true;
break;
case cMatchNumericRange:
if(((!cur_node->has1) ||
(cur_node->numeric1 <= value)) &&
((!cur_node->has2) || (cur_node->numeric2 >= value)))
return true;
break;
}
return false;
}
int WordMatcherMatchInteger(CWordMatcher * I, int value)
{
MatchNode *cur_node = I->node;
int n_node = I->n_node;
while((n_node--) > 0) {
if(integer_match(I, cur_node, value))
return true;
else {
while(cur_node->continued) {
cur_node++;
n_node--;
}
cur_node++;
}
}
return false;
}
void WordMatcherFree(CWordMatcher * I)
{
if(I) {
VLAFreeP(I->node);
VLAFreeP(I->charVLA);
}
OOFreeP(I);
}
CWordList *WordListNew(PyMOLGlobals * G, const char *st)
{
int n_word = 0;
const char *p;
int len = 0;
OOCalloc(G, CWordList);
if(I) {
p = st;
/* first, count how many words we have */
while(*p) {
if(*p > 32) {
n_word++;
while((*p) > 32) {
len++;
p++;
}
len++;
} else
p++;
}
/* allocate the storage we'll need to hold the words */
{
I->word = Alloc(char, len);
I->start = Alloc(char *, n_word);
/* and copy the words */
if(I->word && I->start) {
char *q = I->word;
char **q_ptr = I->start;
p = st;
while(*p) {
if(*p > 32) {
*(q_ptr++) = q;
while((*p) > 32) {
*(q++) = *(p++);
}
*(q++) = 0;
len++;
} else
p++;
}
I->n_word = n_word;
}
}
}
return I;
}
void WordListFreeP(CWordList * I)
{
if(I) {
FreeP(I->word);
FreeP(I->start);
FreeP(I);
}
}
void WordListDump(CWordList * I, const char *prefix)
{
if(I) {
int a;
printf(" %s: n_word %d\n", prefix, I->n_word);
for(a = 0; a < I->n_word; a++) {
printf(" %s: word %d=[%s]\n", prefix, a, I->start[a]);
}
}
}
int WordListIterate(PyMOLGlobals * G, CWordList * I, const char **ptr, int *hidden)
{
int result = true;
if(*hidden >= 0) {
if(*hidden < I->n_word) {
(*ptr) = I->start[(*hidden)++];
} else {
result = false;
}
}
return result;
}
int WordListMatch(PyMOLGlobals * G, CWordList * I, const char *name, int ignore_case)
{
int result = -1;
if(I) {
int a;
for(a = 0; a < I->n_word; a++) {
if(WordMatch(G, I->start[a], name, ignore_case)) {
result = a;
break;
}
}
}
return result;
}
int WordInit(PyMOLGlobals * G)
{
CWord *I = NULL;
I = (G->Word = Calloc(CWord, 1));
if(I) {
return 1;
} else
return 0;
}
void WordFree(PyMOLGlobals * G)
{
FreeP(G->Word);
}
void WordPrimeCommaMatch(PyMOLGlobals * G, char *p)
{ /* replace '+' with ',' */
while(*p) { /* this should not be done here... */
if(*p == '+')
if(!((*(p + 1) == 0) || (*(p + 1) == ',') || (*(p + 1) == '+')))
*p = ',';
p++;
}
}
int WordMatchExact(PyMOLGlobals * G, const char *p, const char *q, int ignCase)
/* 0 = no match
non-zero = perfect match */
{
while((*p) && (*q)) {
if(*p != *q) {
if(!ignCase)
return 0;
else if(tolower(*p) != tolower(*q))
return 0;
}
p++;
q++;
}
if((*p) != (*q))
return 0;
return 1;
}
int WordMatchNoWild(PyMOLGlobals * G, const char *p, const char *q, int ignCase)
/* allows for p to match when shorter than q.
Returns:
0 = no match
positive = match out to N characters
negative = perfect match */
{
int i = 1;
while((*p) && (*q)) {
if(*p != *q) {
if(ignCase) {
if(tolower(*p) != tolower(*q)) {
i = 0;
break;
}
} else {
i = 0;
break;
}
}
i++;
p++;
q++;
}
if((*p) && (!*q))
i = 0;
if(i && ((!*p) && (!*q))) /*exact match gives negative value */
i = -i;
return (i);
}
int WordMatch(PyMOLGlobals * G, const char *p, const char *q, int ignCase)
/* allows for terminal wildcard (*) in p
* and allows for p to match when shorter than q.
Returns:
0 = no match
positive = match out to N characters
negative = perfect/wildcard match */
{
int i = 1;
char WILDCARD = '*';
while((*p) && (*q)) {
if(*p != *q) {
if(*p == WILDCARD) {
i = -i;
break;
}
if(ignCase) {
if(tolower(*p) != tolower(*q)) {
i = 0;
break;
}
} else {
i = 0;
break;
}
}
i++;
p++;
q++;
}
if((!*q) && (*p == WILDCARD))
i = -i;
if(*p != WILDCARD) {
if((*p) && (!*q))
i = 0;
}
if(i && ((!*p) && (!*q))) /*exact match gives negative value */
i = -i;
return (i);
}
int WordMatchComma(PyMOLGlobals * G, const char *pp, const char *qq, int ignCase)
/* allows for comma list in p, also allows wildcards (*) in p */
{
const char *p = pp, *q = qq;
int i = 0;
char WILDCARD = '*';
char pc, qc;
int ic = ignCase;
int best_i = 0;
const char *q_copy;
int blank;
int trailing_comma = 0;
blank = (!*p);
q_copy = q;
while(((*p) || (blank)) && (best_i >= 0)) {
blank = 0;
i = 1;
q = q_copy;
while((pc = (*p)) && (qc = (*q))) {
if(pc == ',')
break;
if(pc != qc) {
if(pc == WILDCARD) {
i = -i;
break;
}
if(ic) {
if(tolower(pc) != tolower(qc)) {
i = 0;
break;
}
} else {
i = 0;
break;
}
}
p++;
q++;
i++;
}
if((!*q) && ((*p == WILDCARD) || (*p == ',')))
i = -i;
if((*p != WILDCARD) && (*p != ','))
if((*p) && (!*q))
i = 0;
if(i && ((!*p) && (!*q))) /*exact match */
i = -i;
if(i < 0)
best_i = i;
else if((best_i >= 0))
if(i > best_i)
best_i = i;
if(best_i >= 0) {
while(*p) {
if(*p == ',')
break;
p++;
}
if(*p == ',') { /* handle special case, trailing comma */
if(*(p + 1))
p++;
else if(!trailing_comma)
trailing_comma = 1;
else
p++;
}
}
}
return (best_i);
}
int WordMatchCommaExact(PyMOLGlobals * G, const char *p, const char *q, int ignCase)
/* allows for comma list in p, no wildcards */
{
int i = 0;
int best_i = 0;
const char *q_copy;
int blank;
int trailing_comma = 0;
/* printf("match? [%s] [%s] ",p,q); */
blank = (!*p);
q_copy = q;
while(((*p) || (blank)) && (best_i >= 0)) {
blank = 0;
i = 1;
q = q_copy;
while((*p) && (*q)) {
if(*p == ',')
break;
if(*p != *q) {
if(ignCase) {
if(tolower(*p) != tolower(*q)) {
i = 0;
break;
}
} else {
i = 0;
break;
}
}
i++;
p++;
q++;
}
if((!*q) && (*p == ','))
i = -i;
if(*p != ',')
if((*p) && (!*q))
i = 0;
if(i && ((!*p) && (!*q))) /*exact match */
i = -i;
if(i < 0)
best_i = i;
else if((best_i >= 0))
if(i > best_i)
best_i = i;
if(best_i >= 0) {
while(*p) {
if(*p == ',')
break;
p++;
}
if(*p == ',') { /* handle special case, trailing comma */
if(*(p + 1))
p++;
else if(!trailing_comma)
trailing_comma = 1;
else
p++;
}
}
}
/* printf("result: %d\n",best_i); */
return (best_i);
}
int WordMatchCommaInt(PyMOLGlobals * G, const char *p, int number)
{
WordType buffer;
sprintf(buffer, "%d", number);
return (WordMatchComma(G, p, buffer, 1));
}
int WordIndex(PyMOLGlobals * G, WordType * list, const char *word, int minMatch, int ignCase)
{
int c, i, mi, mc;
int result = -1;
c = 0;
mc = -1;
mi = -1;
while(list[c][0]) {
i = WordMatch(G, word, list[c], ignCase);
if(i > 0) {
if(mi < i) {
mi = i;
mc = c;
}
} else if(i < 0) {
if((-i) < minMatch)
mi = minMatch + 1; /*exact match always matches */
else
mi = (-i);
mc = c;
}
c++;
}
if((mi > minMatch))
result = mc;
return (result);
}
int WordKey(PyMOLGlobals * G, WordKeyValue * list, const char *word, int minMatch, int ignCase,
int *exact)
{
int c, i, mi, mc;
int result = 0;
c = 0;
mc = -1;
mi = -1;
*exact = false;
while(list[c].word[0]) {
i = WordMatchNoWild(G, word, list[c].word, ignCase);
if(i > 0) {
if(mi < i) {
mi = i;
mc = list[c].value;
}
} else if(i < 0) {
*exact = true;
if((-i) <= minMatch) {
mi = minMatch + 1; /*exact match always matches */
} else
mi = (-i);
mc = list[c].value;
}
c++;
}
if((mi >= minMatch))
result = mc;
return (result);
}