lib/twitter_cldr/resources/collation_tries_importer.rb (71 lines of code) (raw):
# encoding: UTF-8
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
require 'fileutils'
module TwitterCldr
module Resources
class CollationTriesImporter < Importer
AUXILIARY_ZIP_FILE = 'CollationAuxiliary.zip'.freeze
FRACTIONAL_UCA_FILE = 'FractionalUCA_SHORT.txt'.freeze
FRACTIONAL_UCA_SHORT_CUSTOMIZATION_HEADER = <<~END.freeze
# Three custom changes are made in this file:
#
# * Lines like '006C | 00B7; [, DB A9, 05]' are unfolded into the full
# form - '006C 00B7; [3D, 05, 05][, DB A9, 05]' (in this example,
# [3D, 05, 05] is a collation element for 0x006C). Note: This might
# break if tailoring is applied to 0x006C because with this rule
# unfolded collation element for '006C 00B7' won't be automatically
# updated when a collation element for 0x006C is changed.
#
# * SPECIAL FINAL VALUES are commented, because they are unnecessary for
# the current implementation and are breaking some UCA tests.
END
FRACTIONAL_UCA_SHORT_CUSTOMIZATIONS = {
'006C | 00B7; [, DB A9, 05]' => '006C 00B7; [3D, 05, 05][, DB A9, 05]',
'006C | 0387; [, DB A9, 05]' => '006C 0387; [3D, 05, 05][, DB A9, 05]',
'004C | 00B7; [, DB A9, 05]' => '004C 00B7; [3D, 05, 8F][, DB A9, 05]',
'004C | 0387; [, DB A9, 05]' => '004C 0387; [3D, 05, 8F][, DB A9, 05]',
}
requirement :cldr, '21'
requirement :uca, '6.1.0', [AUXILIARY_ZIP_FILE]
requirement :dependency, [TailoringImporter]
locales TwitterCldr.supported_locales
ruby_engine :jruby
private
def execute
copy_fractional_uca
update_default_trie_dump
params.fetch(:locales).each do |locale|
update_tailoring_trie_dump(locale)
end
end
private
def copy_fractional_uca
zip_path = requirements[:uca].source_path_for(AUXILIARY_ZIP_FILE)
base_output_path = File.join('resources', 'collation')
FileUtils.mkdir_p(base_output_path)
Zip::File.open(zip_path) do |zip|
output_path = File.join(base_output_path, FRACTIONAL_UCA_FILE)
File.open(output_path, 'w') do |file|
fractional_uca_short = zip.read(File.join('CollationAuxiliary', FRACTIONAL_UCA_FILE))
FRACTIONAL_UCA_SHORT_CUSTOMIZATIONS.each do |existing, replacement|
fractional_uca_short.gsub!(existing, replacement)
end
fractional_uca_short = fractional_uca_short
.split("\n")
.map do |line|
if line.end_with?('# Special final value for reordering token')
"##{line}"
else
line
end
end
.join("\n")
fractional_uca_short = FRACTIONAL_UCA_SHORT_CUSTOMIZATION_HEADER + fractional_uca_short
file.write(fractional_uca_short)
end
end
end
def update_default_trie_dump
save_trie_dump(TwitterCldr::Collation::TrieLoader::DEFAULT_TRIE_LOCALE, default_trie)
end
def update_tailoring_trie_dump(locale)
save_trie_dump(locale, TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, default_trie))
end
def save_trie_dump(locale, trie)
path = TwitterCldr::Collation::TrieLoader.dump_path(locale)
FileUtils.mkdir_p(File.dirname(path))
File.write(path, Marshal.dump(trie))
end
def default_trie
@default_trie ||= TwitterCldr::Collation::TrieBuilder.load_default_trie
end
end
end
end