lib/twitter_cldr/js/mustache/implementation/shared/bidi.coffee (342 lines of code) (raw):
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
class TwitterCldr.Bidi
MAX_DEPTH = 62
@bidi_classes = `{{{bidi_classes}}}`
constructor: (options = {}) ->
@string_arr = options.string_arr || options.types
@types = options.types || []
@levels = []
@runs = []
@direction = options.direction
@default_direction = options.default_direction || "LTR"
@length = @types.length
this.run_bidi()
@bidi_class_for: (code_point) ->
for bidi_class, ranges of @bidi_classes
for range_offset, range_list of ranges
for range in range_list
start = range
end = start + parseInt(range_offset)
if (code_point >= start) && (code_point <= end)
return bidi_class
null
@from_string: (str, options = {}) ->
string_arr = TwitterCldr.Utilities.unpack_string(str)
options.types ||= this.compute_types(string_arr)
options.string_arr ||= string_arr
new TwitterCldr.Bidi(options)
@from_type_array: (types, options = {}) ->
options.types ||= types
new TwitterCldr.Bidi(options)
@compute_types: (arr) ->
TwitterCldr.Bidi.bidi_class_for(code_point) for code_point in arr
toString: ->
TwitterCldr.Utilities.pack_array(@string_arr)
reorder_visually: ->
throw "No string given!" unless @string_arr
# Do this explicitly so we can also find the maximum depth at the
# same time.
max = 0
lowest_odd = MAX_DEPTH + 1
for level in @levels
max = TwitterCldr.Utilities.max([level, max])
lowest_odd = TwitterCldr.Utilities.min([lowest_odd, level]) unless TwitterCldr.Utilities.is_even(level)
# Reverse the runs starting with the deepest.
for depth in [max...0]
start = 0
while start < @levels.length
# Find the start of a run >= DEPTH.
start += 1 while start < @levels.length && @levels[start] < depth
break if start == @levels.length
# Find the end of the run.
finish = start + 1
finish += 1 while finish < @levels.length && @levels[finish] >= depth
# Reverse this run.
for i in [0...((finish - start) / 2)]
tmpb = @levels[finish - i - 1]
@levels[finish - i - 1] = @levels[start + i]
@levels[start + i] = tmpb
tmpo = @string_arr[finish - i - 1]
@string_arr[finish - i - 1] = @string_arr[start + i]
@string_arr[start + i] = tmpo
# Handle the next run.
start = finish + 1
this
compute_paragraph_embedding_level: ->
# First check to see if the user supplied a directionality override.
if ["LTR", "RTL"].indexOf(@direction) > -1
if @direction == "LTR" then 0 else 1
else
# This implements rules P2 and P3.
# (Note that we don't need P1, as the user supplies
# a paragraph.)
for type in @types
return 0 if type == "L"
return 1 if type == "R"
if @default_direction == "LTR" then 0 else 1
compute_explicit_levels: ->
current_embedding = @base_embedding
# The directional override is a Character directionality
# constant. -1 means there is no override.
directional_override = -1
# The stack of pushed embeddings, and the stack pointer.
# Note that because the direction is inherent in the depth,
# and because we have a bit left over in a byte, we can encode
# the override, if any, directly in this value on the stack.
embedding_stack = []
@formatter_indices ||= []
sp = 0
for i in [0...@length]
is_ltr = false
is_special = true
is_ltr = @types[i] == "LRE" || @types[i] == "LRO"
switch @types[i]
when "RLE", "RLO", "LRE", "LRO"
new_embedding = if is_ltr
# Least greater even.
((current_embedding & ~1) + 2)
else
# Least greater odd.
((current_embedding + 1) | 1)
# FIXME: we don't properly handle invalid pushes.
if new_embedding < MAX_DEPTH
# The new level is valid. Push the old value.
# See above for a comment on the encoding here.
current_embedding |= -0x80 if (directional_override != -1)
embedding_stack[sp] = current_embedding
current_embedding = new_embedding
sp += 1
directional_override = if @types[i] == "LRO"
"L"
else if @types[i] == "RLO"
"R"
else
-1
when "PDF"
# FIXME: we don't properly handle a pop with a corresponding
# invalid push.
# If sp === 0, we saw a pop without a push. Just ignore it.
if sp > 0
sp -= 1
new_embedding = embedding_stack[sp]
current_embedding = new_embedding & 0x7f
directional_override = if new_embedding < 0
(new_embedding & 1) == 0 ? "L" : "R"
else
-1
else
is_special = false
@levels[i] = current_embedding
if is_special
# Mark this character for removal.
@formatter_indices.push(i)
else if directional_override != -1
@types[i] = directional_override
# Remove the formatting codes and update both the arrays
# and 'length'. It would be more efficient not to remove
# these codes, but it is also more complicated. Also, the
# Unicode algorithm reference does not properly describe
# how this is to be done -- from what I can tell, their suggestions
# in this area will not yield the correct results.
output = 0
input = 0
size = @formatter_indices.length
for i in [0..size]
next_fmt = if i == size
@length
else
@formatter_indices[i]
len = next_fmt - input
# Non-formatter codes are from 'input' to 'next_fmt'.
TwitterCldr.Utilities.arraycopy(@levels, input, @levels, output, len)
TwitterCldr.Utilities.arraycopy(@types, input, @types, output, len)
output += len
input = next_fmt + 1
@length -= @formatter_indices.length
compute_runs: ->
run_count = 0
current_embedding = @base_embedding
for i in [0...@length]
if @levels[i] != current_embedding
current_embedding = @levels[i]
run_count += 1
# This may be called multiple times. If so, and if
# the number of runs has not changed, then don't bother
# allocating a new array.
where = 0
last_run_start = 0
current_embedding = @base_embedding
for i in [0...@length]
if @levels[i] != current_embedding
@runs[where] = last_run_start
where += 1
last_run_start = i
current_embedding = @levels[i]
@runs[where] = last_run_start
resolve_weak_types: ->
run_count = @runs.length
previous_level = @base_embedding
for run_idx in [0...run_count]
start = this.get_run_start(run_idx)
finish = this.get_run_limit(run_idx)
level = this.get_run_level(run_idx) || 0
# These are the names used in the Bidi algorithm.
sor = if TwitterCldr.Utilities.is_even(TwitterCldr.Utilities.max([previous_level, level])) then "L" else "R"
next_level = if run_idx == (run_count - 1)
@base_embedding
else
this.get_run_level(run_idx + 1) || 0
eor = if TwitterCldr.Utilities.is_even(TwitterCldr.Utilities.max([level, next_level])) then "L" else "R"
prev_type = sor
prev_strong_type = sor
for i in [start...finish]
next_type = if i == (finish - 1) then eor else @types[i + 1]
# Rule W1: change NSM to the prevailing direction.
if @types[i] == "NSM"
@types[i] = prev_type
else
prev_type = @types[i]
# Rule W2: change EN to AN in some cases.
if @types[i] == "EN"
if prev_strong_type == "AL"
@types[i] = "AN"
else if @types[i] == "L" || @types[i] == "R" || @types[i] == "AL"
prev_strong_type = @types[i]
# Rule W3: change AL to R.
if @types[i] == "AL"
@types[i] = "R"
# Rule W4: handle separators between two numbers.
if prev_type == "EN" && next_type == "EN"
if @types[i] == "ES" || @types[i] == "CS"
@types[i] = nextType
else if prev_type == "AN" && next_type == "AN" && @types[i] == "CS"
@types[i] = next_type
# Rule W5: change a sequence of european terminators to
# european numbers, if they are adjacent to european numbers.
# We also include BN characters in this.
if @types[i] == "ET" || @types[i] == "BN"
if prev_type == "EN"
@types[i] = prev_type
else
# Look ahead to see if there is an EN terminating this
# sequence of ETs.
j = i + 1
while j < finish && @types[j] == "ET" || @types[j] == "BN"
j += 1
if j < finish && @types[j] == "EN"
# Change them all to EN now.
for k in [i...j]
@types[k] = "EN"
# Rule W6: separators and terminators change to ON.
# Again we include BN.
if @types[i] == "ET" || @types[i] == "CS" || @types[i] == "BN"
@types[i] = "ON"
# Rule W7: change european number types.
if prev_strong_type == "L" && @types[i] == "EN"
@types[i] = prev_strong_type
previous_level = level
return
get_run_count: ->
@runs.length
get_run_level: (which) ->
@levels[@runs[which]]
get_run_limit: (which) ->
if which == (@runs.length - 1)
@length
else
@runs[which + 1]
get_run_start: (which) ->
@runs[which]
resolve_implicit_levels: ->
# This implements rules I1 and I2.
for i in [0...@length]
if (@levels[i] & 1) == 0
if @types[i] == "R"
@levels[i] += 1
else if @types[i] == "AN" || @types[i] == "EN"
@levels[i] += 2
else
if @types[i] == "L" || @types[i] == "AN" || @types[i] == "EN"
@levels[i] += 1
return
resolve_neutral_types: ->
# This implements rules N1 and N2.
run_count = this.get_run_count()
previous_level = @base_embedding
for run in [0...run_count]
start = this.get_run_start(run)
finish = this.get_run_limit(run)
level = this.get_run_level(run)
continue unless level?
embedding_direction = if TwitterCldr.Utilities.is_even(level) then "L" else "R"
# These are the names used in the Bidi algorithm.
sor = if TwitterCldr.Utilities.is_even(TwitterCldr.Utilities.max([previous_level, level])) then "L" else "R"
next_level = if run == (run_count - 1)
@base_embedding
else
this.get_run_level(run + 1)
eor = if TwitterCldr.Utilities.is_even(TwitterCldr.Utilities.max([level, next_level])) then "L" else "R"
prev_strong = sor
neutral_start = -1
for i in [start..finish]
new_strong = -1
this_type = if i == finish then eor else @types[i]
switch this_type
when "L"
new_strong = "L"
when "R", "AN", "EN"
new_strong = "R"
when "BN", "ON", "S", "B", "WS"
neutral_start = i if neutral_start == -1
# If we see a strong character, update all the neutrals.
if new_strong != -1
if neutral_start != -1
override = if prev_strong == new_strong then prev_strong else embedding_direction
for j in [neutral_start...i]
@types[j] = override
prev_strong = new_strong
neutral_start = -1
previous_level = level
return
reinsert_formatting_codes: ->
if @formatter_indices? && @formatter_indices.length > 0
input = @length
output = @levels.length
# Process from the end as we are copying the array over itself here.
for index in [(@formatter_indices.length - 1)..0]
next_fmt = @formatter_indices[index]
# nextFmt points to a location in the original array. So,
# nextFmt+1 is the target of our copying. output is the location
# to which we last copied, thus we can derive the length of the
# copy from it.
len = output - next_fmt - 1
output = next_fmt
input -= len
# Note that we no longer need 'types' at this point, so we
# only edit 'levels'.
if next_fmt + 1 < @levels.length
TwitterCldr.Utilities.arraycopy(@levels, input, @levels, next_fmt + 1, len)
# Now set the level at the reinsertion point.
right_level = if output == @levels.length - 1
@base_embedding
else
if @levels[output + 1]? then @levels[output + 1] else 0
left_level = if input == 0
@base_embedding
else
if @levels[input]? then @levels[input] else 0
@levels[output] = TwitterCldr.Utilities.max([left_level, right_level])
@length = @levels.length
run_bidi: ->
@base_embedding = this.compute_paragraph_embedding_level()
this.compute_explicit_levels()
this.compute_runs()
this.resolve_weak_types()
this.resolve_neutral_types()
this.resolve_implicit_levels()
this.reinsert_formatting_codes()
# After resolving the implicit levels, the number
# of runs may have changed.
this.compute_runs()
return