spec/collation/collator_spec.rb (268 lines of code) (raw):
# encoding: UTF-8
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
require 'spec_helper'
describe TwitterCldr::Collation::Collator do
let(:trie) { TwitterCldr::Utils::Trie.new }
before(:each) { clear_tries_cache }
after(:all) { clear_tries_cache }
describe '.default_trie' do
before(:each) do
clear_default_trie_cache
expect(TwitterCldr::Collation::TrieLoader).to receive(:load_default_trie).and_return(trie)
end
it 'returns default fractional collation elements trie' do
expect(described_class.default_trie).to eq(trie)
end
it 'loads the trie only once' do
expect(described_class.default_trie.object_id).to eq(described_class.default_trie.object_id)
end
it 'locks the trie' do
expect(described_class.default_trie).to be_locked
end
end
describe '.tailored_trie' do
let(:locale) { :ru }
before(:each) do
clear_tailored_tries_cache
allow(described_class).to receive(:default_trie).and_return(trie)
expect(TwitterCldr::Collation::TrieLoader).to(
receive(:load_tailored_trie)
.with(locale, described_class.default_trie)
.and_return(trie)
)
end
it 'returns default fractional collation elements trie' do
expect(described_class.tailored_trie(locale)).to eq(trie)
end
it 'loads the trie only once' do
expect(described_class.tailored_trie(locale).object_id).to eq(described_class.tailored_trie(locale).object_id)
end
it 'locks the trie' do
expect(described_class.tailored_trie(locale)).to be_locked
end
end
describe '#initialize' do
before :each do
allow(described_class).to receive(:new) do |*args|
described_class.allocate.tap do |c|
allow(c).to receive(:load_trie).and_return(trie)
c.send(:initialize, *args)
end
end
end
context 'without locale' do
it 'initializes default collator' do
expect(described_class.new.locale).to be_nil
end
end
context 'with locale' do
it 'initialized tailored collator with provided locale' do
expect(described_class.new(:ru).locale).to eq(:ru)
end
it 'converts locale' do
expect(described_class.new(:no).locale).to eq(:nb)
end
end
end
describe '#get_collation_elements' do
let(:collator) { described_class.new }
let(:string) { 'abc' }
let(:code_points) { [0x61, 0x62, 0x63] }
let(:collation_elements) { [[39, 5, 5], [41, 5, 5], [43, 5, 5]] }
before :each do
allow_any_instance_of(TwitterCldr::Shared::CodePoint).to(
receive(:combining_class_for).and_return(0)
)
end
it 'returns collation elements for a string' do
expect(collator.get_collation_elements(string)).to eq(collation_elements)
end
it 'returns collation elements for an array of code points (represented as hex strings)' do
expect(collator.get_collation_elements(code_points)).to eq(collation_elements)
end
context('with an invalid string') do
let(:string) { "\u0450\u0D80" }
it 'raises a specific error if passed invalid unicode characters' do
expect { collator.get_collation_elements(string) }.to(
raise_error(TwitterCldr::Collation::UnexpectedCodePointError)
)
end
end
end
describe '#get_sort_key' do
let(:collator) { described_class.new }
let(:string) { 'abc' }
let(:code_points) { [0x61, 0x62, 0x63] }
let(:collation_elements) { [[39, 5, 5], [41, 5, 5], [43, 5, 5]] }
let(:sort_key) { [39, 41, 43, 1, 7, 1, 7] }
context 'with a loaded trie' do
before(:each) { expect(TwitterCldr::Collation::TrieLoader).to receive(:load_default_trie).and_return(trie) }
describe 'calculating sort key' do
before(:each) do
expect(TwitterCldr::Collation::SortKeyBuilder).to(
receive(:build)
.with(collation_elements, case_first: nil, maximum_level: nil)
.and_return(sort_key)
)
end
it 'calculates sort key for a string' do
expect(collator).to receive(:get_collation_elements).with(string).and_return(collation_elements)
expect(collator.get_sort_key(string)).to eq(sort_key)
end
it 'calculates sort key for an array of code points (represented as hex strings)' do
expect(collator).to receive(:get_collation_elements).with(code_points).and_return(collation_elements)
expect(collator.get_sort_key(code_points)).to eq(sort_key)
end
end
describe 'uses tailoring options' do
let(:case_first) { :upper }
let(:locale) { :uk }
let(:maximum_level) { 2 }
it 'passes case-first sort option to sort key builder' do
expect(TwitterCldr::Collation::TrieLoader).to receive(:load_tailored_trie).with(locale, trie).and_return(TwitterCldr::Utils::Trie.new)
expect(TwitterCldr::Collation::TrieBuilder).to receive(:tailoring_data).with(locale).and_return(collator_options: { case_first: case_first })
collator = described_class.new(locale)
expect(collator).to receive(:get_collation_elements).with(code_points).and_return(collation_elements)
expect(TwitterCldr::Collation::SortKeyBuilder).to receive(:build).with(collation_elements, case_first: case_first, maximum_level: nil).and_return(sort_key)
expect(collator.get_sort_key(code_points)).to eq(sort_key)
end
it 'passes maximum_level option to sort key builder' do
expect(TwitterCldr::Collation::TrieLoader).to receive(:load_tailored_trie).with(locale, trie).and_return(TwitterCldr::Utils::Trie.new)
expect(TwitterCldr::Collation::TrieBuilder).to receive(:tailoring_data).with(locale).and_return(collator_options: { case_first: case_first })
collator = described_class.new(locale)
expect(collator).to receive(:get_collation_elements).with(code_points).and_return(collation_elements)
expect(TwitterCldr::Collation::SortKeyBuilder).to receive(:build).with(collation_elements, case_first: case_first, maximum_level: maximum_level).and_return(sort_key)
expect(collator.get_sort_key(code_points, maximum_level: maximum_level)).to eq(sort_key)
end
end
end
context('with an invalid string') do
let(:string) { "\u0450\u0D80" }
it 'raises a specific error if passed invalid unicode characters' do
expect { collator.get_sort_key(string) }.to(
raise_error(TwitterCldr::Collation::UnexpectedCodePointError)
)
end
end
end
describe '#compare' do
let(:collator) { described_class.new }
let(:sort_key) { [1, 3, 8, 9] }
let(:another_sort_key) { [6, 8, 9, 2] }
before(:each) { allow(described_class).to receive(:default_trie).and_return(trie) }
it 'compares strings by sort keys' do
stub_sort_key(collator, 'foo', sort_key)
stub_sort_key(collator, 'bar', another_sort_key)
expect(collator.compare('foo', 'bar')).to eq(-1)
expect(collator.compare('bar', 'foo')).to eq(1)
end
it 'returns 0 without computing sort keys if the strings are equal' do
expect(collator).to_not receive(:get_sort_key)
expect(collator.compare('foo', 'foo')).to eq(0)
end
end
describe 'sorting' do
let(:collator) { described_class.new }
let(:sort_keys) { [['aaa', [1, 2, 3]], ['abc', [1, 3, 4]], ['bca', [2, 5, 9]]] }
let(:array) { %w[bca aaa abc] }
let(:sorted) { %w[aaa abc bca] }
before :each do
allow(described_class).to receive(:default_trie).and_return(trie)
sort_keys.each { |s, key| mock_sort_key(collator, s, key) }
end
describe '#sort' do
it 'sorts strings by sort keys' do
expect(collator.sort(array)).to eq(sorted)
end
it 'does not change the original array' do
expect { collator.sort(array) }.not_to change { array }
end
end
describe '#sort!' do
it 'sorts strings array by sort keys in-place ' do
collator.sort!(array)
expect(array).to eq(sorted)
end
end
end
describe 'tailoring support' do
before(:each) do
allow(TwitterCldr).to(
receive(:get_resource)
.with(:collation, :tailoring, locale)
.and_return(YAML.load(tailoring_resource_stub))
)
expect(File).to(
receive(:open)
.with(TwitterCldr::Collation::TrieBuilder::FRACTIONAL_UCA_SHORT_PATH, 'r')
.and_yield(fractional_uca_short_stub)
)
expect(TwitterCldr::Collation::TrieLoader).to receive(:load_default_trie) { TwitterCldr::Collation::TrieBuilder.load_default_trie }
expect(TwitterCldr::Collation::TrieLoader).to receive(:load_tailored_trie) { |*args| TwitterCldr::Collation::TrieBuilder.load_tailored_trie(*args) }
allow(TwitterCldr::Normalization).to receive(:normalize_code_points) { |code_points| code_points }
end
let(:locale) { :some_locale }
let(:default_collator) { described_class.new }
let(:tailored_collator) { described_class.new(locale) }
describe 'tailoring rules support' do
it 'tailored collation elements are used' do
expect(default_collator.get_collation_elements([0x490])).to eq([[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]])
expect(tailored_collator.get_collation_elements([0x490])).to eq([[0x5C1B, 5, 0x86]])
expect(default_collator.get_collation_elements([0x491])).to eq([[0x5C1A, 5, 9], [0, 0xDBB9, 9]])
expect(tailored_collator.get_collation_elements([0x491])).to eq([[0x5C1B, 5, 5]])
end
it 'original contractions for tailored elements are applied' do
expect(default_collator.get_collation_elements([0x491, 0x306])).to eq([[0x5C, 0xDB, 9]])
expect(tailored_collator.get_collation_elements([0x491, 0x306])).to eq([[0x5C, 0xDB, 9]])
end
end
describe 'contractions suppressing support' do
it 'suppressed contractions are ignored' do
expect(default_collator.get_collation_elements([0x41A, 0x301])).to eq([[0x5CCC, 5, 0x8F]])
expect(tailored_collator.get_collation_elements([0x41A, 0x301])).to eq([[0x5C6C, 5, 0x8F], [0, 0x8D, 5]])
end
it 'non-suppressed contractions are used' do
expect(default_collator.get_collation_elements([0x415, 0x306])).to eq([[0x5C36, 5, 0x8F]])
expect(tailored_collator.get_collation_elements([0x415, 0x306])).to eq([[0x5C36, 5, 0x8F]])
end
end
let(:fractional_uca_short_stub) do
<<END
# collation elements from default fractional collation elements table
0301; [, 8D, 05]
0306; [, 91, 05]
041A; [5C 6C, 05, 8F] # К
0413; [5C 1A, 05, 8F] # Г
0415; [5C 34, 05, 8F] # Е
# tailored (in UK locale) with "Г < ґ <<< Ґ"
0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
# contraction for a tailored collation element
0491 0306; [5C, DB, 09] # ґ̆
# contractions suppressed in tailoring (for RU locale)
041A 0301; [5C CC, 05, 8F] # Ќ
0413 0301; [5C 30, 05, 8F] # Ѓ
# contractions non-suppressed in tailoring
0415 0306; [5C 36, 05, 8F] # Ӗ
END
end
let(:tailoring_resource_stub) do
<<END
---
:tailored_table: ! '0491; [5C1B, 5, 5]
0490; [5C1B, 5, 86]'
:suppressed_contractions: ГК
...
END
end
end
def mock_sort_key(collator, string, sort_key)
expect(collator).to receive(:get_sort_key).with(string).and_return(sort_key)
end
def stub_sort_key(collator, string, sort_key)
allow(collator).to receive(:get_sort_key).with(string).and_return(sort_key)
end
def clear_tries_cache
clear_default_trie_cache
clear_tailored_tries_cache
end
def clear_default_trie_cache
described_class.instance_variable_set(:@default_trie, nil)
end
def clear_tailored_tries_cache
described_class.instance_variable_set(:@tailored_tries_cache, nil)
end
end