spec/collation/trie_builder_spec.rb (126 lines of code) (raw):

# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 require 'spec_helper' describe TwitterCldr::Collation::TrieBuilder do describe '.load_default_trie' do let(:trie) { described_class.load_default_trie } before(:each) { mock_default_table } it 'returns a Trie' do expect(trie).to be_instance_of(TwitterCldr::Utils::Trie) end it 'adds every collation element from the fractional collation elements table to the trie' do collation_elements_table.each do |code_points, collation_elements| expect(trie.get(code_points)).to eq(collation_elements) end end let(:fractional_uca_short_stub) do <<END # Fractional UCA Table, generated from standard UCA # 2012-01-03, 21:52:55 GMT [MD] # VERSION: UCA=6.1.0, UCD=6.1.0 # For a description of the format and usage, see CollationAuxiliary.html [UCA version = 6.1.0] 0000; [,,] 030C; [, 97, 05] 215E; [20, 05, 3B][0D 75 2C, 05, 3B][22, 05, 3D] FC63; [, D3 A9, 33][, D5 11, 33] 0E40 0E01; [72 0A, 05, 05][72 7E, 05, 3D] 0E40 0E02; [72 0C, 05, 05][72 7E, 05, 3D] # HOMELESS COLLATION ELEMENTS FDD0 0063; [, 97, 3D] FDD0 0064; [, A7, 09] # SPECIAL MAX/MIN COLLATION ELEMENTS FFFE; [02, 02, 02] # Special LOWEST primary, for merge/interleaving FFFF; [EF FE, 05, 05] # Special HIGHEST primary, for ranges # Top Byte => Reordering Tokens [top_byte 00 TERMINATOR ] # [0] TERMINATOR=1 [top_byte 01 LEVEL-SEPARATOR ] # [0] LEVEL-SEPARATOR=1 [top_byte 02 FIELD-SEPARATOR ] # [0] FIELD-SEPARATOR=1 [top_byte 03 SPACE ] # [9] SPACE=1 Cc=6 Zl=1 Zp=1 Zs=1 # VALUES BASED ON UCA [first tertiary ignorable [,,]] # CONSTRUCTED [last tertiary ignorable [,,]] # CONSTRUCTED # Warning: Case bits are masked in the following [first tertiary in secondary non-ignorable [X, X, 05]] # U+0332 COMBINING LOW LINE [last tertiary in secondary non-ignorable [X, X, 3D]] # U+2A74 DOUBLE COLON EQUAL END end let(:collation_elements_table) do [ # 0000; [,,] [[0], [[0, 0, 0]]], # 030C; [, 97, 05] [[780], [[0, 151, 5]]], # 215E; [20, 05, 3B][0D 75 2C, 05, 3B][22, 05, 3D] [[8542], [[32, 5, 59], [881964, 5, 59], [34, 5, 61]]], # FC63; [, D3 A9, 33][, D5 11, 33] [[64611], [[0, 54185, 51], [0, 54545, 51]]], # 0E40 0E01; [72 0A, 05, 05][72 7E, 05, 3D] [[3648, 3585], [[29194, 5, 5], [29310, 5, 61]]], # 0E40 0E02; [72 0C, 05, 05][72 7E, 05, 3D] [[3648, 3586], [[29196, 5, 5], [29310, 5, 61]]], # FDD0 0063; [, 97, 3D] [[64976, 99], [[0, 151, 61]]], # FDD0 0064; [, A7, 09] [[64976, 100], [[0, 167, 9]]], # FFFE; [02, 02, 02] [[65534], [[2, 2, 2]]], # FFFF; [EF FE, 05, 05] [[65535], [[61438, 5, 5]]] ] end end let(:tailoring_resource_stub) do <<END --- :collator_options: :case_first: upper :tailored_table: ! '0491; [5C1B, 5, 5] 0490; [5C1B, 5, 86]' :suppressed_contractions: ГК END end let(:tailoring_data) { YAML.load(tailoring_resource_stub) } describe '.load_tailored_trie' do let(:locale) { :xxx } let(:fallback) { TwitterCldr::Collation::TrieBuilder.load_default_trie } let(:tailored_trie) { TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, fallback) } before :each do mock_default_table expect(TwitterCldr::Collation::TrieBuilder).to receive(:tailoring_data).with(locale).and_return(tailoring_data) end it 'returns a TrieWithFallback' do expect(tailored_trie).to be_instance_of(TwitterCldr::Collation::TrieWithFallback) end it 'tailors elements in the trie' do expect(fallback.get([0x0491])).to eq([[0x5C1A, 5, 9], [0, 0xDBB9, 9]]) expect(fallback.get([0x0490])).to eq([[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]) expect(tailored_trie.get([0x0491])).to eq([[0x5C1B, 5, 5]]) expect(tailored_trie.get([0x0490])).to eq([[0x5C1B, 5, 0x86]]) end it 'makes contractions available in the tailored trie' do expect(tailored_trie.get([0x491, 0x306])).to eq([[0x5C, 0xDB, 9]]) expect(tailored_trie.get([0x415, 0x306])).to eq([[0x5C36, 5, 0x8F]]) end it 'suppresses required contractions' do expect(fallback.find_prefix([0x41A, 0x301]).first(2)).to eq([[[0x5CCC, 5, 0x8F]], 2]) expect(fallback.find_prefix([0x413, 0x301]).first(2)).to eq([[[0x5C30, 5, 0x8F]], 2]) expect(tailored_trie.find_prefix([0x41A, 0x301]).first(2)).to eq([[[0x5C6C, 5, 0x8F]], 1]) expect(tailored_trie.find_prefix([0x413, 0x301]).first(2)).to eq([[[0x5C1A, 5, 0x8F]], 1]) end it 'do not copy other collation elements from the fallback' do [0x301, 0x306, 0x41A, 0x413, 0x415].each_slice(1) do |code_points| expect(tailored_trie.get(code_points)).not_to be_nil expect(tailored_trie.get(code_points).object_id).to eq(fallback.get(code_points).object_id) end end let(:fractional_uca_short_stub) do <<END # collation elements from default fractional collation elements table 0301; [, 8D, 05] 0306; [, 91, 05] 041A; [5C 6C, 05, 8F] # К 0413; [5C 1A, 05, 8F] # Г 0415; [5C 34, 05, 8F] # Е # tailored (in UK locale) with "Г < ґ <<< Ґ" 0491; [5C 1A, 05, 09][, DB B9, 09] # ґ 0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ # contraction for a tailored collation element 0491 0306; [5C, DB, 09] # ґ̆ # contractions suppressed in tailoring (for RU locale) 041A 0301; [5C CC, 05, 8F] # Ќ 0413 0301; [5C 30, 05, 8F] # Ѓ # contractions non-suppressed in tailoring 0415 0306; [5C 36, 05, 8F] # Ӗ END end end describe '.tailoring_data' do let(:locale) { :fu } it 'loads tailoring data' do expect(TwitterCldr).to receive(:get_resource).with(:collation, :tailoring, locale).and_return(tailoring_data) expect(TwitterCldr::Collation::TrieBuilder.tailoring_data(locale)).to eq(tailoring_data) end end def mock_default_table expect(File).to( receive(:open) .with(TwitterCldr::Collation::TrieBuilder::FRACTIONAL_UCA_SHORT_PATH, 'r') .and_yield(fractional_uca_short_stub) ) end end