Filename | /usr/local/lib/perl5/5.32/mach/Unicode/Normalize.pm |
Statements | Executed 1035 statements in 21.9ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
345 | 1 | 1 | 20.2ms | 20.2ms | NFC (xsub) | Unicode::Normalize::
345 | 1 | 1 | 1.85ms | 22.0ms | normalize | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | BEGIN@12 | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | BEGIN@13 | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | BEGIN@14 | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | BEGIN@15 | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | BEGIN@17 | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | BEGIN@3 | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | BEGIN@60 | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | CORE:unpack (opcode) | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | FCD | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | NFC_partial | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | NFD_partial | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | NFKC_partial | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | NFKD_partial | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | check | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | normalize_partial | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | pack_U | Unicode::Normalize::
0 | 0 | 0 | 0s | 0s | unpack_U | Unicode::Normalize::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package Unicode::Normalize; | ||||
2 | |||||
3 | BEGIN { | ||||
4 | unless ('A' eq pack('U', 0x41)) { | ||||
5 | die "Unicode::Normalize cannot stringify a Unicode code point\n"; | ||||
6 | } | ||||
7 | unless (0x41 == unpack('U', 'A')) { | ||||
8 | die "Unicode::Normalize cannot get Unicode code point\n"; | ||||
9 | } | ||||
10 | } | ||||
11 | |||||
12 | use 5.006; | ||||
13 | use strict; | ||||
14 | use warnings; | ||||
15 | use Carp; | ||||
16 | |||||
17 | no warnings 'utf8'; | ||||
18 | |||||
19 | our $VERSION = '1.27'; | ||||
20 | our $PACKAGE = __PACKAGE__; | ||||
21 | |||||
22 | our @EXPORT = qw( NFC NFD NFKC NFKD ); | ||||
23 | our @EXPORT_OK = qw( | ||||
24 | normalize decompose reorder compose | ||||
25 | checkNFD checkNFKD checkNFC checkNFKC check | ||||
26 | getCanon getCompat getComposite getCombinClass | ||||
27 | isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex | ||||
28 | isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE | ||||
29 | FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter | ||||
30 | normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial | ||||
31 | ); | ||||
32 | our %EXPORT_TAGS = ( | ||||
33 | all => [ @EXPORT, @EXPORT_OK ], | ||||
34 | normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ], | ||||
35 | check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ], | ||||
36 | fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ], | ||||
37 | ); | ||||
38 | |||||
39 | ## | ||||
40 | ## utilities for tests | ||||
41 | ## | ||||
42 | |||||
43 | sub pack_U { | ||||
44 | return pack('U*', @_); | ||||
45 | } | ||||
46 | |||||
47 | sub unpack_U { | ||||
48 | |||||
49 | # The empty pack returns an empty UTF-8 string, so the effect is to force | ||||
50 | # the shifted parameter into being UTF-8. This allows this to work on | ||||
51 | # Perl 5.6, where there is no utf8::upgrade(). | ||||
52 | return unpack('U*', shift(@_).pack('U*')); | ||||
53 | } | ||||
54 | |||||
55 | require Exporter; | ||||
56 | |||||
57 | ##### The above part is common to XS and PP ##### | ||||
58 | |||||
59 | our @ISA = qw(Exporter); | ||||
60 | use XSLoader (); | ||||
61 | XSLoader::load( 'Unicode::Normalize', $VERSION ); | ||||
62 | |||||
63 | ##### The below part is common to XS and PP ##### | ||||
64 | |||||
65 | ## | ||||
66 | ## normalize | ||||
67 | ## | ||||
68 | |||||
69 | sub FCD ($) { | ||||
70 | my $str = shift; | ||||
71 | return checkFCD($str) ? $str : NFD($str); | ||||
72 | } | ||||
73 | |||||
74 | our %formNorm = ( | ||||
75 | NFC => \&NFC, C => \&NFC, | ||||
76 | NFD => \&NFD, D => \&NFD, | ||||
77 | NFKC => \&NFKC, KC => \&NFKC, | ||||
78 | NFKD => \&NFKD, KD => \&NFKD, | ||||
79 | FCD => \&FCD, FCC => \&FCC, | ||||
80 | ); | ||||
81 | |||||
82 | sub normalize($$) | ||||
83 | # spent 22.0ms (1.85+20.2) within Unicode::Normalize::normalize which was called 345 times, avg 64µs/call:
# 345 times (1.85ms+20.2ms) by Sympa::Tools::Text::canonic_text at line 137 of /usr/local/libexec/sympa/Sympa/Tools/Text.pm, avg 64µs/call | ||||
84 | 345 | 158µs | my $form = shift; | ||
85 | 345 | 124µs | my $str = shift; | ||
86 | 345 | 21.6ms | 345 | 20.2ms | if (exists $formNorm{$form}) { # spent 20.2ms making 345 calls to Unicode::Normalize::NFC, avg 58µs/call |
87 | return $formNorm{$form}->($str); | ||||
88 | } | ||||
89 | croak($PACKAGE."::normalize: invalid form name: $form"); | ||||
90 | } | ||||
91 | |||||
92 | ## | ||||
93 | ## partial | ||||
94 | ## | ||||
95 | |||||
96 | sub normalize_partial ($$) { | ||||
97 | if (exists $formNorm{$_[0]}) { | ||||
98 | my $n = normalize($_[0], $_[1]); | ||||
99 | my($p, $u) = splitOnLastStarter($n); | ||||
100 | $_[1] = $u; | ||||
101 | return $p; | ||||
102 | } | ||||
103 | croak($PACKAGE."::normalize_partial: invalid form name: $_[0]"); | ||||
104 | } | ||||
105 | |||||
106 | sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) } | ||||
107 | sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) } | ||||
108 | sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) } | ||||
109 | sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) } | ||||
110 | |||||
111 | ## | ||||
112 | ## check | ||||
113 | ## | ||||
114 | |||||
115 | our %formCheck = ( | ||||
116 | NFC => \&checkNFC, C => \&checkNFC, | ||||
117 | NFD => \&checkNFD, D => \&checkNFD, | ||||
118 | NFKC => \&checkNFKC, KC => \&checkNFKC, | ||||
119 | NFKD => \&checkNFKD, KD => \&checkNFKD, | ||||
120 | FCD => \&checkFCD, FCC => \&checkFCC, | ||||
121 | ); | ||||
122 | |||||
123 | sub check($$) | ||||
124 | { | ||||
125 | my $form = shift; | ||||
126 | my $str = shift; | ||||
127 | if (exists $formCheck{$form}) { | ||||
128 | return $formCheck{$form}->($str); | ||||
129 | } | ||||
130 | croak($PACKAGE."::check: invalid form name: $form"); | ||||
131 | } | ||||
132 | |||||
133 | 1; | ||||
134 | __END__ | ||||
# spent 20.2ms within Unicode::Normalize::NFC which was called 345 times, avg 58µs/call:
# 345 times (20.2ms+0s) by Unicode::Normalize::normalize at line 86, avg 58µs/call |