-
-
Notifications
You must be signed in to change notification settings - Fork 477
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(transformer): JSX source calculate correct column when Unicode chars
- Loading branch information
1 parent
66ed8ed
commit 326d58e
Showing
2 changed files
with
88 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,91 @@ | ||
use ropey::Rope; | ||
|
||
/// Get line and column from offset and source text | ||
/// Get line and column from offset and source text. | ||
/// | ||
/// Line number starts at 1. | ||
/// Column number is in UTF-16 characters, and starts at 1. | ||
/// | ||
/// This matches Babel's output. | ||
pub fn get_line_column(offset: u32, source_text: &str) -> (usize, usize) { | ||
let offset = offset as usize; | ||
let rope = Rope::from_str(source_text); | ||
let line = rope.byte_to_line(offset); | ||
let first_char_of_line = rope.line_to_char(line); | ||
// Original offset is byte, but Rope uses char offset | ||
let offset = rope.byte_to_char(offset); | ||
let column = offset - first_char_of_line; | ||
// line and column is zero-indexed, but we want 1-indexed | ||
(line + 1, column + 1) | ||
// Get line number and byte offset of start of line | ||
let line_index = rope.byte_to_line(offset); | ||
let line_offset = rope.line_to_byte(line_index); | ||
// Get column number | ||
let column_index = source_text[line_offset..offset].encode_utf16().count(); | ||
// line and column are zero-indexed, but we want 1-indexed | ||
(line_index + 1, column_index + 1) | ||
} | ||
|
||
#[test] | ||
fn empty_file() { | ||
assert_eq!(get_line_column(0, ""), (1, 1)); | ||
} | ||
|
||
#[test] | ||
fn first_line_start() { | ||
assert_eq!(get_line_column(0, "foo\nbar\n"), (1, 1)); | ||
} | ||
|
||
#[test] | ||
fn first_line_middle() { | ||
assert_eq!(get_line_column(5, "blahblahblah\noops\n"), (1, 6)); | ||
} | ||
|
||
#[test] | ||
fn later_line_start() { | ||
assert_eq!(get_line_column(8, "foo\nbar\nblahblahblah"), (3, 1)); | ||
} | ||
|
||
#[test] | ||
fn later_line_middle() { | ||
assert_eq!(get_line_column(12, "foo\nbar\nblahblahblah"), (3, 5)); | ||
} | ||
|
||
#[test] | ||
fn after_2_byte_unicode() { | ||
assert_eq!("£".len(), 2); | ||
assert_eq!(utf16_len("£"), 1); | ||
assert_eq!(get_line_column(4, "£abc"), (1, 4)); | ||
} | ||
|
||
#[test] | ||
fn after_3_byte_unicode() { | ||
assert_eq!("अ".len(), 3); | ||
assert_eq!(utf16_len("अ"), 1); | ||
assert_eq!(get_line_column(5, "अabc"), (1, 4)); | ||
} | ||
|
||
#[test] | ||
fn after_4_byte_unicode() { | ||
assert_eq!("🍄".len(), 4); | ||
assert_eq!(utf16_len("🍄"), 2); | ||
assert_eq!(get_line_column(6, "🍄abc"), (1, 5)); | ||
} | ||
|
||
#[test] | ||
fn after_2_byte_unicode_on_previous_line() { | ||
assert_eq!("£".len(), 2); | ||
assert_eq!(utf16_len("£"), 1); | ||
assert_eq!(get_line_column(4, "£\nabc"), (2, 2)); | ||
} | ||
|
||
#[test] | ||
fn after_3_byte_unicode_on_previous_line() { | ||
assert_eq!("अ".len(), 3); | ||
assert_eq!(utf16_len("अ"), 1); | ||
assert_eq!(get_line_column(5, "अ\nabc"), (2, 2)); | ||
} | ||
|
||
#[test] | ||
fn after_4_byte_unicode_on_previous_line() { | ||
assert_eq!("🍄".len(), 4); | ||
assert_eq!(utf16_len("🍄"), 2); | ||
assert_eq!(get_line_column(6, "🍄\nabc"), (2, 2)); | ||
} | ||
|
||
#[cfg(test)] | ||
fn utf16_len(s: &str) -> usize { | ||
s.encode_utf16().count() | ||
} |