From cdd7288978ebf0253fb3f0c622a8bc882ddfef91 Mon Sep 17 00:00:00 2001 From: tomoya ishida Date: Sun, 15 Dec 2024 20:50:39 +0900 Subject: [PATCH] Refactor Reline::Unicode ed_ vi_ em_ methods (#720) * Refactor Reline::Unicode vi_ ed_ em_ methods * Make Reline::Unicode's vi_ ed_ em_ method encoding safe --- lib/reline/unicode.rb | 446 ++++++++---------------------------- test/reline/test_unicode.rb | 61 +++-- 2 files changed, 145 insertions(+), 362 deletions(-) diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb index f8eb365069..ab7708a5fe 100644 --- a/lib/reline/unicode.rb +++ b/lib/reline/unicode.rb @@ -262,375 +262,126 @@ def self.get_prev_mbchar_size(line, byte_pointer) end def self.em_forward_word(line, byte_pointer) - byte_size = 0 - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - byte_size += size - end - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size += size - end - byte_size + gcs = line.byteslice(byte_pointer..).grapheme_clusters + nonwords = gcs.take_while { |c| !word_character?(c) } + words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) } + nonwords.sum(&:bytesize) + words.sum(&:bytesize) end def self.em_forward_word_with_capitalization(line, byte_pointer) - byte_size = 0 - new_str = String.new - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - new_str += mbchar - byte_size += size - end - first = true - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - if first - new_str += mbchar.upcase - first = false - else - new_str += mbchar.downcase - end - byte_size += size - end - [byte_size, new_str] + gcs = line.byteslice(byte_pointer..).grapheme_clusters + nonwords = gcs.take_while { |c| !word_character?(c) } + words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) } + [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize] end def self.em_backward_word(line, byte_pointer) - byte_size = 0 - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - byte_size += size - end - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size += size - end - byte_size + gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse + nonwords = gcs.take_while { |c| !word_character?(c) } + words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) } + nonwords.sum(&:bytesize) + words.sum(&:bytesize) end def self.em_big_backward_word(line, byte_pointer) - byte_size = 0 - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - break if mbchar =~ /\S/ - byte_size += size - end - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - break if mbchar =~ /\s/ - byte_size += size - end - byte_size + gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse + spaces = gcs.take_while { |c| space_character?(c) } + nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) } + spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize) end def self.ed_transpose_words(line, byte_pointer) - right_word_start = nil - size = get_next_mbchar_size(line, byte_pointer) - mbchar = line.byteslice(byte_pointer, size) - if size.zero? - # ' aaa bbb [cursor]' - byte_size = 0 - while 0 < (byte_pointer + byte_size) - size = get_prev_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - byte_size -= size - end - while 0 < (byte_pointer + byte_size) - size = get_prev_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size -= size - end - right_word_start = byte_pointer + byte_size - byte_size = 0 - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size += size - end - after_start = byte_pointer + byte_size - elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - # ' aaa bb[cursor]b' - byte_size = 0 - while 0 < (byte_pointer + byte_size) - size = get_prev_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size -= size - end - right_word_start = byte_pointer + byte_size - byte_size = 0 - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size += size - end - after_start = byte_pointer + byte_size - else - byte_size = 0 - while (line.bytesize - 1) > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - byte_size += size - end - if (byte_pointer + byte_size) == (line.bytesize - 1) - # ' aaa bbb [cursor] ' - after_start = line.bytesize - while 0 < (byte_pointer + byte_size) - size = get_prev_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - byte_size -= size - end - while 0 < (byte_pointer + byte_size) - size = get_prev_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size -= size - end - right_word_start = byte_pointer + byte_size - else - # ' aaa [cursor] bbb ' - right_word_start = byte_pointer + byte_size - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size += size - end - after_start = byte_pointer + byte_size - end - end - byte_size = right_word_start - byte_pointer - while 0 < (byte_pointer + byte_size) - size = get_prev_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ - byte_size -= size - end - middle_start = byte_pointer + byte_size - byte_size = middle_start - byte_pointer - while 0 < (byte_pointer + byte_size) - size = get_prev_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size - size, size) - break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ - byte_size -= size + gcs = line.byteslice(0, byte_pointer).grapheme_clusters + pos = gcs.size + gcs += line.byteslice(byte_pointer..).grapheme_clusters + pos += 1 while pos < gcs.size && !word_character?(gcs[pos]) + if pos == gcs.size # 'aaa bbb [cursor] ' + pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1]) + second_word_end = gcs.size + else # 'aaa [cursor]bbb' + pos += 1 while pos < gcs.size && word_character?(gcs[pos]) + second_word_end = pos + end + pos -= 1 while pos > 0 && word_character?(gcs[pos - 1]) + second_word_start = pos + pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1]) + first_word_end = pos + pos -= 1 while pos > 0 && word_character?(gcs[pos - 1]) + first_word_start = pos + + [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx| + gcs.take(idx).sum(&:bytesize) end - left_word_start = byte_pointer + byte_size - [left_word_start, middle_start, right_word_start, after_start] end def self.vi_big_forward_word(line, byte_pointer) - byte_size = 0 - while (line.bytesize - 1) > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar =~ /\s/ - byte_size += size - end - while (line.bytesize - 1) > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar =~ /\S/ - byte_size += size - end - byte_size + gcs = line.byteslice(byte_pointer..).grapheme_clusters + nonspaces = gcs.take_while { |c| !space_character?(c) } + spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) } + nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize) end def self.vi_big_forward_end_word(line, byte_pointer) - if (line.bytesize - 1) > byte_pointer - size = get_next_mbchar_size(line, byte_pointer) - byte_size = size - else - return 0 - end - while (line.bytesize - 1) > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar =~ /\S/ - byte_size += size - end - prev_byte_size = byte_size - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar =~ /\s/ - prev_byte_size = byte_size - byte_size += size - end - prev_byte_size + gcs = line.byteslice(byte_pointer..).grapheme_clusters + first = gcs.shift(1) + spaces = gcs.take_while { |c| space_character?(c) } + nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) } + matched = spaces + nonspaces + matched.pop + first.sum(&:bytesize) + matched.sum(&:bytesize) end def self.vi_big_backward_word(line, byte_pointer) - byte_size = 0 - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - break if mbchar =~ /\S/ - byte_size += size - end - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - break if mbchar =~ /\s/ - byte_size += size - end - byte_size + gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse + spaces = gcs.take_while { |c| space_character?(c) } + nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) } + spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize) end def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false) - if line.bytesize > byte_pointer - size = get_next_mbchar_size(line, byte_pointer) - mbchar = line.byteslice(byte_pointer, size) - if mbchar =~ /\w/ - started_by = :word - elsif mbchar =~ /\s/ - started_by = :space + gcs = line.byteslice(byte_pointer..).grapheme_clusters + return 0 if gcs.empty? + + c = gcs.first + matched = + if word_character?(c) + gcs.take_while { |c| word_character?(c) } + elsif space_character?(c) + gcs.take_while { |c| space_character?(c) } else - started_by = :non_word_printable - end - byte_size = size - else - return 0 - end - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - case started_by - when :word - break if mbchar =~ /\W/ - when :space - break if mbchar =~ /\S/ - when :non_word_printable - break if mbchar =~ /\w|\s/ + gcs.take_while { |c| !word_character?(c) && !space_character?(c) } end - byte_size += size - end - return byte_size if drop_terminate_spaces - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - break if mbchar =~ /\S/ - byte_size += size - end - byte_size + + return matched.sum(&:bytesize) if drop_terminate_spaces + + spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) } + matched.sum(&:bytesize) + spaces.sum(&:bytesize) end def self.vi_forward_end_word(line, byte_pointer) - if (line.bytesize - 1) > byte_pointer - size = get_next_mbchar_size(line, byte_pointer) - mbchar = line.byteslice(byte_pointer, size) - if mbchar =~ /\w/ - started_by = :word - elsif mbchar =~ /\s/ - started_by = :space - else - started_by = :non_word_printable - end - byte_size = size - else - return 0 - end - if (line.bytesize - 1) > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - if mbchar =~ /\w/ - second = :word - elsif mbchar =~ /\s/ - second = :space - else - second = :non_word_printable - end - second_byte_size = size - else - return byte_size - end - if second == :space - byte_size += second_byte_size - while (line.bytesize - 1) > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - if mbchar =~ /\S/ - if mbchar =~ /\w/ - started_by = :word - else - started_by = :non_word_printable - end - break - end - byte_size += size - end - else - case [started_by, second] - when [:word, :non_word_printable], [:non_word_printable, :word] - started_by = second - else - byte_size += second_byte_size - started_by = second - end - end - prev_byte_size = byte_size - while line.bytesize > (byte_pointer + byte_size) - size = get_next_mbchar_size(line, byte_pointer + byte_size) - mbchar = line.byteslice(byte_pointer + byte_size, size) - case started_by - when :word - break if mbchar =~ /\W/ - when :non_word_printable - break if mbchar =~ /[\w\s]/ - end - prev_byte_size = byte_size - byte_size += size - end - prev_byte_size + gcs = line.byteslice(byte_pointer..).grapheme_clusters + return 0 if gcs.empty? + return gcs.first.bytesize if gcs.size == 1 + + start = gcs.shift + skips = [start] + if space_character?(start) || space_character?(gcs.first) + spaces = gcs.take_while { |c| space_character?(c) } + skips += spaces + gcs.shift(spaces.size) + end + start_with_word = word_character?(gcs.first) + matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) } + matched.pop + skips.sum(&:bytesize) + matched.sum(&:bytesize) end def self.vi_backward_word(line, byte_pointer) - byte_size = 0 - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - if mbchar =~ /\S/ - if mbchar =~ /\w/ - started_by = :word - else - started_by = :non_word_printable - end - break - end - byte_size += size - end - while 0 < (byte_pointer - byte_size) - size = get_prev_mbchar_size(line, byte_pointer - byte_size) - mbchar = line.byteslice(byte_pointer - byte_size - size, size) - case started_by - when :word - break if mbchar =~ /\W/ - when :non_word_printable - break if mbchar =~ /[\w\s]/ - end - byte_size += size - end - byte_size + gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse + spaces = gcs.take_while { |c| space_character?(c) } + gcs.shift(spaces.size) + start_with_word = word_character?(gcs.first) + matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) } + spaces.sum(&:bytesize) + matched.sum(&:bytesize) end def self.common_prefix(list, ignore_case: false) @@ -647,15 +398,18 @@ def self.common_prefix(list, ignore_case: false) end def self.vi_first_print(line) - byte_size = 0 - while (line.bytesize - 1) > byte_size - size = get_next_mbchar_size(line, byte_size) - mbchar = line.byteslice(byte_size, size) - if mbchar =~ /\S/ - break - end - byte_size += size - end - byte_size + gcs = line.grapheme_clusters + spaces = gcs.take_while { |c| space_character?(c) } + spaces.sum(&:bytesize) + end + + def self.word_character?(s) + s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s + rescue Encoding::UndefinedConversionError + false + end + + def self.space_character?(s) + s.match?(/\s/) if s end end diff --git a/test/reline/test_unicode.rb b/test/reline/test_unicode.rb index 07ed8c6230..0778306c32 100644 --- a/test/reline/test_unicode.rb +++ b/test/reline/test_unicode.rb @@ -147,6 +147,7 @@ def test_encoding_conversion def test_em_forward_word assert_equal(12, Reline::Unicode.em_forward_word('abc---fooあbar-baz', 3)) + assert_equal(11, Reline::Unicode.em_forward_word('abc---fooあbar-baz'.encode('sjis'), 3)) assert_equal(3, Reline::Unicode.em_forward_word('abcfoo', 3)) assert_equal(3, Reline::Unicode.em_forward_word('abc---', 3)) assert_equal(0, Reline::Unicode.em_forward_word('abc', 3)) @@ -154,6 +155,7 @@ def test_em_forward_word def test_em_forward_word_with_capitalization assert_equal([12, '---Fooあbar'], Reline::Unicode.em_forward_word_with_capitalization('abc---foOあBar-baz', 3)) + assert_equal([11, '---Fooあbar'.encode('sjis')], Reline::Unicode.em_forward_word_with_capitalization('abc---foOあBar-baz'.encode('sjis'), 3)) assert_equal([3, 'Foo'], Reline::Unicode.em_forward_word_with_capitalization('abcfOo', 3)) assert_equal([3, '---'], Reline::Unicode.em_forward_word_with_capitalization('abc---', 3)) assert_equal([0, ''], Reline::Unicode.em_forward_word_with_capitalization('abc', 3)) @@ -162,6 +164,7 @@ def test_em_forward_word_with_capitalization def test_em_backward_word assert_equal(12, Reline::Unicode.em_backward_word('abc foo-barあbaz--- xyz', 20)) + assert_equal(11, Reline::Unicode.em_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 19)) assert_equal(2, Reline::Unicode.em_backward_word(' ', 2)) assert_equal(2, Reline::Unicode.em_backward_word('ab', 2)) assert_equal(0, Reline::Unicode.em_backward_word('ab', 0)) @@ -169,6 +172,7 @@ def test_em_backward_word def test_em_big_backward_word assert_equal(16, Reline::Unicode.em_big_backward_word('abc foo-barあbaz--- xyz', 20)) + assert_equal(15, Reline::Unicode.em_big_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 19)) assert_equal(2, Reline::Unicode.em_big_backward_word(' ', 2)) assert_equal(2, Reline::Unicode.em_big_backward_word('ab', 2)) assert_equal(0, Reline::Unicode.em_big_backward_word('ab', 0)) @@ -184,22 +188,22 @@ def test_ed_transpose_words assert_equal([3, 5, 6, 8], Reline::Unicode.ed_transpose_words('aa bb cc ', 7)) assert_equal([3, 5, 6, 10], Reline::Unicode.ed_transpose_words('aa bb cc ', 8)) assert_equal([3, 5, 6, 10], Reline::Unicode.ed_transpose_words('aa bb cc ', 9)) - word1 = 'fooあ' - word2 = 'barあbaz' - left = 'aaa -' - middle = '- -' - right = '- bbb' - expected = [left.bytesize, (left + word1).bytesize, (left + word1 + middle).bytesize, (left + word1 + middle + word2).bytesize] - assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize)) - assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize)) - assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize + word2.bytesize - 1)) + ['sjis', 'utf-8'].each do |encoding| + texts = ['fooあ', 'barあbaz', 'aaa -', '- -', '- bbb'] + word1, word2, left, middle, right = texts.map { |text| text.encode(encoding) } + expected = [left.bytesize, (left + word1).bytesize, (left + word1 + middle).bytesize, (left + word1 + middle + word2).bytesize] + assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize)) + assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize)) + assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize + word2.bytesize - 1)) + end end def test_vi_big_forward_word assert_equal(18, Reline::Unicode.vi_big_forward_word('abc---fooあbar-baz xyz', 3)) assert_equal(8, Reline::Unicode.vi_big_forward_word('abcfooあ --', 3)) + assert_equal(7, Reline::Unicode.vi_big_forward_word('abcfooあ --'.encode('sjis'), 3)) assert_equal(6, Reline::Unicode.vi_big_forward_word('abcfooあ', 3)) - assert_equal(2, Reline::Unicode.vi_big_forward_word('abc- ', 3)) # maybe inconsistent + assert_equal(3, Reline::Unicode.vi_big_forward_word('abc- ', 3)) assert_equal(0, Reline::Unicode.vi_big_forward_word('abc', 3)) end @@ -211,6 +215,7 @@ def test_vi_big_forward_end_word assert_equal(1, Reline::Unicode.vi_big_forward_end_word('aa b', 0)) assert_equal(3, Reline::Unicode.vi_big_forward_end_word(' aa b', 0)) assert_equal(15, Reline::Unicode.vi_big_forward_end_word('abc---fooあbar-baz xyz', 3)) + assert_equal(14, Reline::Unicode.vi_big_forward_end_word('abc---fooあbar-baz xyz'.encode('sjis'), 3)) assert_equal(3, Reline::Unicode.vi_big_forward_end_word('abcfooあ --', 3)) assert_equal(3, Reline::Unicode.vi_big_forward_end_word('abcfooあ', 3)) assert_equal(2, Reline::Unicode.vi_big_forward_end_word('abc- ', 3)) @@ -219,6 +224,7 @@ def test_vi_big_forward_end_word def test_vi_big_backward_word assert_equal(16, Reline::Unicode.vi_big_backward_word('abc foo-barあbaz--- xyz', 20)) + assert_equal(15, Reline::Unicode.vi_big_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 19)) assert_equal(2, Reline::Unicode.vi_big_backward_word(' ', 2)) assert_equal(2, Reline::Unicode.vi_big_backward_word('ab', 2)) assert_equal(0, Reline::Unicode.vi_big_backward_word('ab', 0)) @@ -226,23 +232,28 @@ def test_vi_big_backward_word def test_vi_forward_word assert_equal(3, Reline::Unicode.vi_forward_word('abc---fooあbar-baz', 3)) - assert_equal(3, Reline::Unicode.vi_forward_word('abc---fooあbar-baz', 6)) # maybe bug - assert_equal(3, Reline::Unicode.vi_forward_word('abcfooあ', 3)) # maybe bug + assert_equal(9, Reline::Unicode.vi_forward_word('abc---fooあbar-baz', 6)) + assert_equal(8, Reline::Unicode.vi_forward_word('abc---fooあbar-baz'.encode('sjis'), 6)) + assert_equal(6, Reline::Unicode.vi_forward_word('abcfooあ', 3)) assert_equal(3, Reline::Unicode.vi_forward_word('abc---', 3)) assert_equal(0, Reline::Unicode.vi_forward_word('abc', 3)) + assert_equal(2, Reline::Unicode.vi_forward_word('abc def', 1, true)) + assert_equal(5, Reline::Unicode.vi_forward_word('abc def', 1, false)) end def test_vi_forward_end_word assert_equal(2, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz', 3)) - assert_equal(2, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz', 6)) # maybe bug - assert_equal(2, Reline::Unicode.vi_forward_end_word('abcfooあ', 3)) # maybe bug + assert_equal(8, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz', 6)) + assert_equal(7, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz'.encode('sjis'), 6)) + assert_equal(3, Reline::Unicode.vi_forward_end_word('abcfooあ', 3)) assert_equal(2, Reline::Unicode.vi_forward_end_word('abc---', 3)) assert_equal(0, Reline::Unicode.vi_forward_end_word('abc', 3)) end def test_vi_backward_word assert_equal(3, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz', 20)) - assert_equal(3, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz', 17)) # maybe bug + assert_equal(9, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz', 17)) + assert_equal(8, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 16)) assert_equal(2, Reline::Unicode.vi_backward_word(' ', 2)) assert_equal(2, Reline::Unicode.vi_backward_word('ab', 2)) assert_equal(0, Reline::Unicode.vi_backward_word('ab', 0)) @@ -250,8 +261,26 @@ def test_vi_backward_word def test_vi_first_print assert_equal(3, Reline::Unicode.vi_first_print(' abcdefg')) - assert_equal(2, Reline::Unicode.vi_first_print(' ')) # maybe inconsistent + assert_equal(3, Reline::Unicode.vi_first_print(' ')) assert_equal(0, Reline::Unicode.vi_first_print('abc')) + assert_equal(0, Reline::Unicode.vi_first_print('あ')) + assert_equal(0, Reline::Unicode.vi_first_print('あ'.encode('sjis'))) assert_equal(0, Reline::Unicode.vi_first_print('')) end + + def test_character_type + assert(Reline::Unicode.word_character?('a')) + assert(Reline::Unicode.word_character?('あ')) + assert(Reline::Unicode.word_character?('あ'.encode('sjis'))) + refute(Reline::Unicode.word_character?(33345.chr('sjis'))) + refute(Reline::Unicode.word_character?('-')) + refute(Reline::Unicode.word_character?(nil)) + + assert(Reline::Unicode.space_character?(' ')) + refute(Reline::Unicode.space_character?('あ')) + refute(Reline::Unicode.space_character?('あ'.encode('sjis'))) + refute(Reline::Unicode.space_character?(33345.chr('sjis'))) + refute(Reline::Unicode.space_character?('-')) + refute(Reline::Unicode.space_character?(nil)) + end end