Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement full Unicode 16.0.0 extended grapheme breaking. #719

Merged
merged 17 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions pkgs/characters/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ using a [`CharacterRange`][CharacterRange].

Based on Unicode <!-- unicode-version -->version 16.0.0<!-- /unicode-version -->.

This package is not script-aware, and does not currently support the rule for
Indic Conjunct Breaks introduced in Unicode 15.10.0
([GB9c](https://www.unicode.org/reports/tr29/tr29-43.html#GB9c)).

## Unicode characters and representations

There is no such thing as plain text.
Expand Down
4 changes: 0 additions & 4 deletions pkgs/characters/analysis_options.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
include: package:dart_flutter_team_lints/analysis_options.yaml

analyzer:
errors:
prefer_single_quotes: ignore
32 changes: 16 additions & 16 deletions pkgs/characters/benchmark/benchmark.dart
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

// Benchmark of efficiency of grapheme cluster operations.

import "package:characters/characters.dart";
import 'package:characters/characters.dart';

import "../test/src/text_samples.dart";
import '../test/src/text_samples.dart';

double bench(int Function() action, int ms) {
var elapsed = 0;
Expand Down Expand Up @@ -49,12 +49,12 @@ int reverseStrings() {
var revHangul = reverse(hangul);
var rev2Hangul = reverse(revHangul);
if (hangul != rev2Hangul || hangul == revHangul) {
throw AssertionError("Bad reverse");
throw AssertionError('Bad reverse');
}
var revGenesis = reverse(genesis);
var rev2Genesis = reverse(revGenesis);
if (genesis != rev2Genesis || genesis == revGenesis) {
throw AssertionError("Bad reverse");
throw AssertionError('Bad reverse');
}

return (hangul.length + genesis.length) * 2;
Expand All @@ -63,16 +63,16 @@ int reverseStrings() {
int replaceStrings() {
var count = 0;
{
const language = "한글";
const language = '한글';
assert(language.length == 6);
var chars = Characters(hangul);
var replaced =
chars.replaceAll(Characters(language), Characters("Hangul!"));
chars.replaceAll(Characters(language), Characters('Hangul!'));
count += replaced.string.length - hangul.length;
}
{
var chars = Characters(genesis);
var replaced = chars.replaceAll(Characters("And"), Characters("Also"));
var replaced = chars.replaceAll(Characters('And'), Characters('Also'));
count += replaced.string.length - genesis.length;
}
return count;
Expand Down Expand Up @@ -111,27 +111,27 @@ void main(List<String> args) {

for (var i = 0; i < count; i++) {
var performance = bench(iterateIndicesOnly, 2000);
print("Index Iteration: ${toDigits(performance)} gc/ms");
print('Index Iteration: ${toDigits(performance)} gc/ms');
if (performance > bestIterateIndices) bestIterateIndices = performance;

performance = bench(iterateStrings, 2000);
print("String Iteration: ${toDigits(performance)} cu/ms");
print('String Iteration: ${toDigits(performance)} cu/ms');
if (performance > bestIterateStrings) bestIterateStrings = performance;

performance = bench(reverseStrings, 2000);
print("String Reversing: ${toDigits(performance)} cu/ms");
print('String Reversing: ${toDigits(performance)} cu/ms');
if (performance > bestReverseStrings) bestReverseStrings = performance;

performance = bench(replaceStrings, 2000);
print("String Replacing: ${toDigits(performance)} changes/ms");
print('String Replacing: ${toDigits(performance)} changes/ms');
if (performance > bestReplaceStrings) bestReplaceStrings = performance;
}

if (count > 1) {
print("Best: ");
print("Index Iteration: ${toDigits(bestIterateIndices)} gc/ms");
print("String Iteration: ${toDigits(bestIterateStrings)} cu/ms");
print("String Reversing: ${toDigits(bestReverseStrings)} cu/ms");
print("String Replacing: ${toDigits(bestReplaceStrings)} changes/ms");
print('Best: ');
print('Index Iteration: ${toDigits(bestIterateIndices)} gc/ms');
print('String Iteration: ${toDigits(bestIterateStrings)} cu/ms');
print('String Reversing: ${toDigits(bestReverseStrings)} cu/ms');
print('String Replacing: ${toDigits(bestReplaceStrings)} changes/ms');
}
}
4 changes: 2 additions & 2 deletions pkgs/characters/lib/characters.dart
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
/// String operations based on characters (Unicode grapheme clusters).
library;

export "src/characters.dart";
export "src/extensions.dart";
export 'src/characters.dart';
export 'src/extensions.dart';
16 changes: 8 additions & 8 deletions pkgs/characters/lib/src/characters.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import "characters_impl.dart";
import 'characters_impl.dart';

/// The characters of a string.
///
Expand All @@ -21,7 +21,7 @@ import "characters_impl.dart";
/// in different ways.
abstract class Characters implements Iterable<String> {
/// An empty [Characters] containing no characters.
static const Characters empty = StringCharacters("");
static const Characters empty = StringCharacters('');

/// Creates a [Characters] allowing iteration of
/// the characters of [string].
Expand Down Expand Up @@ -260,9 +260,9 @@ abstract class Characters implements Iterable<String> {
/// Any further occurrences will be included in the last part.
/// Example:
/// ```dart
/// var c = "abracadabra".characters;
/// var parts = c.split("a".characters, 4).toList();
/// print(parts); // Prints is ["", "br", "c", "dabra"]
/// var c = 'abracadabra'.characters;
/// var parts = c.split('a'.characters, 4).toList();
/// print(parts); // Prints is ['', 'br', 'c', 'dabra']
/// ```
/// If there are fewer than `maxParts - 1` occurrences of [pattern],
/// then the characters are split at all occurrences.
Expand Down Expand Up @@ -790,9 +790,9 @@ abstract class CharacterRange implements Iterator<String> {
///
/// Example:
/// ```dart
/// var c = "abracadabra".characters.dropFirst().dropLast();
/// // c is "bracadabr".
/// var parts = c.split("a".characters, 3).toList();
/// var c = 'abracadabra'.characters.dropFirst().dropLast();
/// // c is 'bracadabr'.
/// var parts = c.split('a'.characters, 3).toList();
/// print(parts); // [br, c, dabr]
/// ```
/// If there are fewer than `maxParts - 1` occurrences of [pattern],
Expand Down
54 changes: 27 additions & 27 deletions pkgs/characters/lib/src/characters_impl.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import "characters.dart";
import "grapheme_clusters/breaks.dart";
import "grapheme_clusters/constants.dart";
import 'characters.dart';
import 'grapheme_clusters/breaks.dart';
import 'grapheme_clusters/constants.dart';
import 'grapheme_clusters/table.dart';

/// The grapheme clusters of a string.
Expand All @@ -28,23 +28,23 @@ final class StringCharacters extends Iterable<String> implements Characters {

@override
String get first => string.isEmpty
? throw StateError("No element")
? throw StateError('No element')
: string.substring(
0, Breaks(string, 0, string.length, stateSoTNoBreak).nextBreak());

@override
String get last => string.isEmpty
? throw StateError("No element")
? throw StateError('No element')
: string.substring(
BackBreaks(string, string.length, 0, stateEoTNoBreak).nextBreak());

@override
String get single {
if (string.isEmpty) throw StateError("No element");
if (string.isEmpty) throw StateError('No element');
var firstEnd =
Breaks(string, 0, string.length, stateSoTNoBreak).nextBreak();
if (firstEnd == string.length) return string;
throw StateError("Too many elements");
throw StateError('Too many elements');
}

@override
Expand Down Expand Up @@ -74,9 +74,9 @@ final class StringCharacters extends Iterable<String> implements Characters {
}

@override
String join([String separator = ""]) {
if (separator == "") return string;
return _explodeReplace(string, 0, string.length, separator, "");
String join([String separator = '']) {
if (separator == '') return string;
return _explodeReplace(string, 0, string.length, separator, '');
}

@override
Expand All @@ -91,12 +91,12 @@ final class StringCharacters extends Iterable<String> implements Characters {
cursor = next;
}
if (orElse != null) return orElse();
throw StateError("No element");
throw StateError('No element');
}

@override
String elementAt(int index) {
RangeError.checkNotNegative(index, "index");
RangeError.checkNotNegative(index, 'index');
var count = 0;
if (string.isNotEmpty) {
var breaks = Breaks(string, 0, string.length, stateSoTNoBreak);
Expand All @@ -108,7 +108,7 @@ final class StringCharacters extends Iterable<String> implements Characters {
start = end;
}
}
throw RangeError.index(index, this, "index", null, count);
throw RangeError.index(index, this, 'index', null, count);
}

@override
Expand Down Expand Up @@ -209,7 +209,7 @@ final class StringCharacters extends Iterable<String> implements Characters {

@override
Characters skip(int count) {
RangeError.checkNotNegative(count, "count");
RangeError.checkNotNegative(count, 'count');
return _skip(count);
}

Expand All @@ -221,7 +221,7 @@ final class StringCharacters extends Iterable<String> implements Characters {

@override
Characters take(int count) {
RangeError.checkNotNegative(count, "count");
RangeError.checkNotNegative(count, 'count');
return _take(count);
}

Expand All @@ -233,9 +233,9 @@ final class StringCharacters extends Iterable<String> implements Characters {

@override
Characters getRange(int start, [int? end]) {
RangeError.checkNotNegative(start, "start");
RangeError.checkNotNegative(start, 'start');
if (end == null) return _skip(start);
if (end < start) throw RangeError.range(end, start, null, "end");
if (end < start) throw RangeError.range(end, start, null, 'end');
if (end == start) return Characters.empty;
if (start == 0) return _take(end);
if (string.isEmpty) return this;
Expand All @@ -254,10 +254,10 @@ final class StringCharacters extends Iterable<String> implements Characters {
while (position > 0) {
position--;
start = breaks.nextBreak();
if (start < 0) throw StateError("No element");
if (start < 0) throw StateError('No element');
}
var end = breaks.nextBreak();
if (end < 0) throw StateError("No element");
if (end < 0) throw StateError('No element');
if (start == 0 && end == string.length) return this;
return StringCharacters(string.substring(start, end));
}
Expand Down Expand Up @@ -311,7 +311,7 @@ final class StringCharacters extends Iterable<String> implements Characters {

@override
Characters skipLast(int count) {
RangeError.checkNotNegative(count, "count");
RangeError.checkNotNegative(count, 'count');
if (count == 0) return this;
if (string.isNotEmpty) {
var breaks = BackBreaks(string, string.length, 0, stateEoTNoBreak);
Expand Down Expand Up @@ -351,7 +351,7 @@ final class StringCharacters extends Iterable<String> implements Characters {

@override
Characters takeLast(int count) {
RangeError.checkNotNegative(count, "count");
RangeError.checkNotNegative(count, 'count');
if (count == 0) return Characters.empty;
if (string.isNotEmpty) {
var breaks = BackBreaks(string, string.length, 0, stateEoTNoBreak);
Expand Down Expand Up @@ -446,7 +446,7 @@ class StringCharacterRange implements CharacterRange {
factory StringCharacterRange.at(String string, int startIndex,
[int? endIndex]) {
RangeError.checkValidRange(
startIndex, endIndex, string.length, "startIndex", "endIndex");
startIndex, endIndex, string.length, 'startIndex', 'endIndex');
return _expandRange(string, startIndex, endIndex ?? startIndex);
}

Expand Down Expand Up @@ -501,7 +501,7 @@ class StringCharacterRange implements CharacterRange {
}
}
state = move(state, category);
if (state & stateNoBreak == 0 && --count == 0) {
if (state & maskBreak != flagNoBreak && --count == 0) {
_move(newStart, index);
return true;
}
Expand All @@ -513,7 +513,7 @@ class StringCharacterRange implements CharacterRange {
_move(newStart, _end);
return true;
} else {
throw RangeError.range(count, 0, null, "count");
throw RangeError.range(count, 0, null, 'count');
}
}

Expand All @@ -530,7 +530,7 @@ class StringCharacterRange implements CharacterRange {
bool moveBack([int count = 1]) => _retractStart(count, _start);

bool _retractStart(int count, int newEnd) {
RangeError.checkNotNegative(count, "count");
RangeError.checkNotNegative(count, 'count');
var breaks = _backBreaksFromStart();
var start = _start;
while (count > 0) {
Expand Down Expand Up @@ -578,7 +578,7 @@ class StringCharacterRange implements CharacterRange {

@override
bool dropFirst([int count = 1]) {
RangeError.checkNotNegative(count, "count");
RangeError.checkNotNegative(count, 'count');
if (_start == _end) return count == 0;
var breaks = Breaks(_string, _start, _end, stateSoTNoBreak);
while (count > 0) {
Expand Down Expand Up @@ -636,7 +636,7 @@ class StringCharacterRange implements CharacterRange {

@override
bool dropLast([int count = 1]) {
RangeError.checkNotNegative(count, "count");
RangeError.checkNotNegative(count, 'count');
var breaks = BackBreaks(_string, _end, _start, stateEoTNoBreak);
while (count > 0) {
var nextBreak = breaks.nextBreak();
Expand Down
Loading
Loading