spec/segmentation/dictionary_break_spec.rb (31 lines of code) (raw):

# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 require 'spec_helper' describe TwitterCldr::Segmentation::BreakIterator do base_path = File.join( TwitterCldr::RESOURCES_DIR, *%w(shared segments tests dictionary_tests) ) test_files_pattern = File.join(base_path, '*.yml') test_files = Dir.glob(test_files_pattern) test_files.reject! { |f| f.end_with?('combined.yml') } test_files.each do |test_file| test_data = YAML.load_file(test_file) locale = test_data[:locale].to_sym text = test_data[:text] expected_segments = test_data[:segments] locale_name = if locale == :my 'Burmese' else (locale.localize.as_language_code || locale).split(',').first end it "correctly segments text in #{locale_name} by word" do iterator = described_class.new(locale) actual_segments = iterator.each_word(text).map { |word, *| word } expect(actual_segments).to eq(expected_segments) end end it 'correctly segments a combined text sample' do test_data = YAML.load_file(File.join(base_path, 'combined.yml')) iterator = described_class.new(test_data[:locale]) actual_segments = iterator.each_word(test_data[:text]).map { |word, *| word } expect(actual_segments).to eq(test_data[:segments]) end end