in archive/preprocess/clospan.py [0:0]
def cloSpan(self, s, Ds, Ids, parent):
texts = self.texts
#self._printLattice()
#print "\ncloSpan: " + str(Ids) + ":" + str(self.printSeq(s))
## check if s is a sup/sub-sequence of discovered sequences
Ldsc = self.hash.pop(Ids)
Lno = []
Lsup = []
Lsub = []
if Ldsc:
for dn in Ldsc:
c = self._checkSeqContainment(dn.s, s)
if c == 0 or c == 1: # s already discovered or discovered seq contains s
Lsub.append(dn)
elif c == -1: # discovered seq is contained in s
Lsup.append(dn)
else: # discovered seq and s do not contain each other
Lno.append(dn)
if len(Lsup) > 0 and len(Lsub) > 0:
raise RuntimeError("Conflicting sequences found in Lattice. Current sequence: " + self.printSeq(s) + ", Lsup: " + str([self.printSeq(o.s) for o in Lsup]) + ", Lsub: " + str([self.printSeq(o.s) for o in Lsub]))
# add parent to dn's parents, do this for all dn
if len(Lsub) > 0:
for dn in Lsub:
dn.addParent(parent)
self.hash.replace(Ids, Ldsc)
return
seq = CloSeq(s, Ids, parent)
# seq takes all of dn's parents
if len(Lsup) > 0:
for dn in Lsup:
seq.consumeSeq(dn)
Lno.append(seq)
self.hash.replace(Ids, Lno)
# add s to tree: parent is previous tree node - need to pass in prev tree node
#print str(Ids) + ' + ' + self.printSeq(s)
## grow s: scan DB for next freq items and their supports
fdist = FDist()
for i in range(0, len(texts)):
if Ds[i] >= 0 and Ds[i] < len(texts[i][0]):
fdist.add(texts[i][0][Ds[i]], texts[i][1])
for a in fdist.items():
if not self._checkMinSupport(fdist.freq(a)):
break
## update suppporting DB
Dsa = []
for i in range(0, len(texts)):
if Ds[i] < 0 or Ds[i] >= len(texts[i][0]) or texts[i][0][Ds[i]] != a:
Dsa.append(-1)
else:
Dsa.append(Ds[i]+1)
sa = list(s)
sa.append(a)
self.cloSpan(sa, Dsa, fdist.freq(a), seq)