def jig2tsv()

in archive/preprocess/jig2tsv.py [0:0]


def jig2tsv(jigFile, tsvFile):
	tree = ET.parse(jigFile)
	root = tree.getroot()
	wf = open(tsvFile, 'w')
	wtr = csv.writer(wf, delimiter='\t', doublequote=False, escapechar='\\', quoting=csv.QUOTE_MINIMAL)
	for document in root:
		docText = document.find('docText').text.encode('ascii', 'ignore').replace('"','\'').replace('\n', '').lower()
		sentences = sent_tokenize(docText)
		for sentence in sentences:
			if( len(sentence) > 0 ):
				wtr.writerow(['', sentence, '1'])
	wf.close()