spec/tokenizers/calendars/date_tokenizer_spec.rb (33 lines of code) (raw):
# encoding: UTF-8
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
require 'spec_helper'
describe TwitterCldr::Tokenizers::DateTokenizer do
describe "#tokens" do
it "should tokenize plaintext segments correctly (i.e. Spanish)" do
data_reader = TwitterCldr::DataReaders::DateDataReader.new(:es, type: :full)
got = data_reader.tokenizer.tokenize(data_reader.pattern)
expected = [
{ value: "EEEE", type: :pattern },
{ value: ", ", type: :plaintext },
{ value: "d", type: :pattern },
{ value: " 'de' ", type: :plaintext },
{ value: "MMMM", type: :pattern },
{ value: " 'de' ", type: :plaintext },
{ value: "y", type: :pattern }
]
check_token_list(got, expected)
end
it "should tokenize patterns with non-latin characters correctly (i.e. Japanese)" do
data_reader = TwitterCldr::DataReaders::DateDataReader.new(:ja, type: :full)
got = data_reader.tokenizer.tokenize(data_reader.pattern)
expected = [
{ value: "y", type: :pattern },
{ value: "年", type: :plaintext },
{ value: "M", type: :pattern },
{ value: "月", type: :plaintext },
{ value: "d", type: :pattern },
{ value: "日", type: :plaintext },
{ value: "EEEE", type: :pattern }
]
check_token_list(got, expected)
end
end
end