Filename | /usr/local/lib/perl5/site_perl/Locale/Recode/_Conversions.pm |
Statements | Executed 78 statements in 106µs |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
26 | 1 | 1 | 82µs | 82µs | resolveAlias | Locale::Recode::_Conversions::
0 | 0 | 0 | 0s | 0s | BEGIN@23 | Locale::Recode::_Conversions::
0 | 0 | 0 | 0s | 0s | BEGIN@24 | Locale::Recode::_Conversions::
0 | 0 | 0 | 0s | 0s | BEGIN@26 | Locale::Recode::_Conversions::
0 | 0 | 0 | 0s | 0s | findPath | Locale::Recode::_Conversions::
0 | 0 | 0 | 0s | 0s | isSupported | Locale::Recode::_Conversions::
0 | 0 | 0 | 0s | 0s | listSupported | Locale::Recode::_Conversions::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | #! /bin/false | ||||
2 | # vim: set autoindent shiftwidth=4 tabstop=4: | ||||
3 | |||||
4 | # List of internally known conversions. | ||||
5 | # Copyright (C) 2002-2017 Guido Flohr <guido.flohr@cantanea.com>, | ||||
6 | # all rights reserved. | ||||
7 | |||||
8 | # This program is free software: you can redistribute it and/or modify | ||||
9 | # it under the terms of the GNU General Public License as published by | ||||
10 | # the Free Software Foundation; either version 3 of the License, or | ||||
11 | # (at your option) any later version. | ||||
12 | |||||
13 | # This program is distributed in the hope that it will be useful, | ||||
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
16 | # GNU General Public License for more details. | ||||
17 | |||||
18 | # You should have received a copy of the GNU General Public License | ||||
19 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
20 | |||||
21 | package Locale::Recode::_Conversions; | ||||
22 | |||||
23 | use strict; | ||||
24 | use integer; | ||||
25 | |||||
26 | use vars qw ($conversions $optional_conversions); | ||||
27 | |||||
28 | # These are the canonical names of the encodings always available. | ||||
29 | $conversions = { | ||||
30 | 'ASMO_449' => 'ASMO_449', | ||||
31 | 'ATARI-ST-EURO' => 'ATARI_ST_EURO', | ||||
32 | 'ATARI-ST' => 'ATARI_ST', | ||||
33 | 'CP10007' => 'CP10007', | ||||
34 | 'CSN_369103' => 'CSN_369103', | ||||
35 | 'CWI' => 'CWI', | ||||
36 | 'DEC-MCS' => 'DEC_MCS', | ||||
37 | 'EBCDIC-AT-DE-A' => 'EBCDIC_AT_DE_A', | ||||
38 | 'EBCDIC-AT-DE' => 'EBCDIC_AT_DE', | ||||
39 | 'EBCDIC-CA-FR' => 'EBCDIC_CA_FR', | ||||
40 | 'EBCDIC-DK-NO-A' => 'EBCDIC_DK_NO_A', | ||||
41 | 'EBCDIC-DK-NO' => 'EBCDIC_DK_NO', | ||||
42 | 'EBCDIC-ES-A' => 'EBCDIC_ES_A', | ||||
43 | 'EBCDIC-ES-S' => 'EBCDIC_ES_S', | ||||
44 | 'EBCDIC-ES' => 'EBCDIC_ES', | ||||
45 | 'EBCDIC-FI-SE-A' => 'EBCDIC_FI_SE_A', | ||||
46 | 'EBCDIC-FI-SE' => 'EBCDIC_FI_SE', | ||||
47 | 'EBCDIC-FR' => 'EBCDIC_FR', | ||||
48 | 'EBCDIC-IS-FRISS' => 'EBCDIC_IS_FRISS', | ||||
49 | 'EBCDIC-IT' => 'EBCDIC_IT', | ||||
50 | 'EBCDIC-PT' => 'EBCDIC_PT', | ||||
51 | 'EBCDIC-UK' => 'EBCDIC_UK', | ||||
52 | 'EBCDIC-US' => 'EBCDIC_US', | ||||
53 | 'ECMA-CYRILLIC' => 'ECMA_CYRILLIC', | ||||
54 | 'GEORGIAN-ACADEMY' => 'GEORGIAN_ACADEMY', | ||||
55 | 'GEORGIAN-PS' => 'GEORGIAN_PS', | ||||
56 | 'GOST_19768-74' => 'GOST_19768_74', | ||||
57 | 'GREEK-CCITT' => 'GREEK_CCITT', | ||||
58 | 'GREEK7-OLD' => 'GREEK7_OLD', | ||||
59 | 'GREEK7' => 'GREEK7', | ||||
60 | 'HP-ROMAN8' => 'HP_ROMAN8', | ||||
61 | 'IBM037' => 'IBM037', | ||||
62 | 'IBM038' => 'IBM038', | ||||
63 | 'IBM1004' => 'IBM1004', | ||||
64 | 'IBM1026' => 'IBM1026', | ||||
65 | 'IBM1047' => 'IBM1047', | ||||
66 | 'IBM256' => 'IBM256', | ||||
67 | 'IBM273' => 'IBM273', | ||||
68 | 'IBM274' => 'IBM274', | ||||
69 | 'IBM275' => 'IBM275', | ||||
70 | 'IBM277' => 'IBM277', | ||||
71 | 'IBM278' => 'IBM278', | ||||
72 | 'IBM280' => 'IBM280', | ||||
73 | 'IBM281' => 'IBM281', | ||||
74 | 'IBM284' => 'IBM284', | ||||
75 | 'IBM285' => 'IBM285', | ||||
76 | 'IBM290' => 'IBM290', | ||||
77 | 'IBM297' => 'IBM297', | ||||
78 | 'IBM420' => 'IBM420', | ||||
79 | 'IBM423' => 'IBM423', | ||||
80 | 'IBM424' => 'IBM424', | ||||
81 | 'IBM437' => 'IBM437', | ||||
82 | 'IBM500' => 'IBM500', | ||||
83 | 'IBM850' => 'IBM850', | ||||
84 | 'IBM851' => 'IBM851', | ||||
85 | 'IBM852' => 'IBM852', | ||||
86 | 'IBM855' => 'IBM855', | ||||
87 | 'IBM857' => 'IBM857', | ||||
88 | 'IBM860' => 'IBM860', | ||||
89 | 'IBM861' => 'IBM861', | ||||
90 | 'IBM862' => 'IBM862', | ||||
91 | 'IBM863' => 'IBM863', | ||||
92 | 'IBM864' => 'IBM864', | ||||
93 | 'IBM865' => 'IBM865', | ||||
94 | 'IBM866' => 'IBM866', | ||||
95 | 'IBM868' => 'IBM868', | ||||
96 | 'IBM869' => 'IBM869', | ||||
97 | 'IBM870' => 'IBM870', | ||||
98 | 'IBM871' => 'IBM871', | ||||
99 | 'IBM874' => 'IBM874', | ||||
100 | 'IBM875' => 'IBM875', | ||||
101 | 'IBM880' => 'IBM880', | ||||
102 | 'IBM891' => 'IBM891', | ||||
103 | 'IBM903' => 'IBM903', | ||||
104 | 'IBM904' => 'IBM904', | ||||
105 | 'IBM905' => 'IBM905', | ||||
106 | 'IBM918' => 'IBM918', | ||||
107 | 'IEC_P27-1' => 'IEC_P27_1', | ||||
108 | 'INIS-8' => 'INIS_8', | ||||
109 | 'INIS-CYRILLIC' => 'INIS_CYRILLIC', | ||||
110 | 'INIS' => 'INIS', | ||||
111 | 'ISO-8859-1' => 'ISO_8859_1', | ||||
112 | 'ISO-8859-10' => 'ISO_8859_10', | ||||
113 | 'ISO-8859-11' => 'ISO_8859_11', | ||||
114 | 'ISO-8859-13' => 'ISO_8859_13', | ||||
115 | 'ISO-8859-14' => 'ISO_8859_14', | ||||
116 | 'ISO-8859-15' => 'ISO_8859_15', | ||||
117 | 'ISO-8859-16' => 'ISO_8859_16', | ||||
118 | 'ISO-8859-2' => 'ISO_8859_2', | ||||
119 | 'ISO-8859-3' => 'ISO_8859_3', | ||||
120 | 'ISO-8859-4' => 'ISO_8859_4', | ||||
121 | 'ISO-8859-5' => 'ISO_8859_5', | ||||
122 | 'ISO-8859-6' => 'ISO_8859_6', | ||||
123 | 'ISO-8859-7' => 'ISO_8859_7', | ||||
124 | 'ISO-8859-8' => 'ISO_8859_8', | ||||
125 | 'ISO-8859-9' => 'ISO_8859_9', | ||||
126 | 'ISO_10367-BOX' => 'ISO_10367_BOX', | ||||
127 | 'ISO_2033-1983' => 'ISO_2033_1983', | ||||
128 | 'ISO_5427-EXT' => 'ISO_5427_EXT', | ||||
129 | 'ISO_5427' => 'ISO_5427', | ||||
130 | 'ISO_5428' => 'ISO_5428', | ||||
131 | 'KOI-8' => 'KOI_8', | ||||
132 | 'KOI8-R' => 'KOI8_R', | ||||
133 | 'KOI8-RU' => 'KOI8_RU', | ||||
134 | 'KOI8-T' => 'KOI8_T', | ||||
135 | 'KOI8-U' => 'KOI8_U', | ||||
136 | 'LATIN-GREEK-1' => 'LATIN_GREEK_1', | ||||
137 | 'LATIN-GREEK' => 'LATIN_GREEK', | ||||
138 | 'MACINTOSH' => 'MACINTOSH', | ||||
139 | 'MACARABIC' => 'MACARABIC', | ||||
140 | 'MACCYRILLIC' => 'MACCYRILLIC', | ||||
141 | 'MACCROATIAN' => 'MACCROATIAN', | ||||
142 | 'MACGREEK' => 'MACGREEK', | ||||
143 | 'MACHEBREW' => 'MACHEBREW', | ||||
144 | 'MACICELAND' => 'MACICELAND', | ||||
145 | 'MACROMANIA' => 'MACROMANIA', | ||||
146 | 'MACTHAI' => 'MACTHAI', | ||||
147 | 'MACTURKISH' => 'MACTURKISH', | ||||
148 | 'MACUKRAINE' => 'MACUKRAINE', | ||||
149 | 'MAC-IS' => 'MAC_IS', | ||||
150 | 'MAC-SAMI' => 'MAC_SAMI', | ||||
151 | 'MAC-UK' => 'MAC_UK', | ||||
152 | 'NATS-DANO' => 'NATS_DANO', | ||||
153 | 'NATS-SEFI' => 'NATS_SEFI', | ||||
154 | 'NEXTSTEP' => 'NEXTSTEP', | ||||
155 | 'TIS-620' => 'TIS_620', | ||||
156 | 'UTF-8' => 'UTF_8', | ||||
157 | 'VISCII' => 'VISCII', | ||||
158 | 'WIN-SAMI-2' => 'SAMI_WS2', | ||||
159 | 'WINDOWS-1250' => 'CP1250', | ||||
160 | 'WINDOWS-1251' => 'CP1251', | ||||
161 | 'WINDOWS-1252' => 'CP1252', | ||||
162 | 'WINDOWS-1253' => 'CP1253', | ||||
163 | 'WINDOWS-1254' => 'CP1254', | ||||
164 | 'WINDOWS-1256' => 'CP1256', | ||||
165 | 'WINDOWS-1257' => 'CP1257', | ||||
166 | 'US-ASCII' => 'US_ASCII', | ||||
167 | }; | ||||
168 | |||||
169 | # These encodings are maybe available via Encode(3pm). | ||||
170 | $optional_conversions = { | ||||
171 | 'BIG5' => undef, | ||||
172 | 'BIG5-HKSCS' => undef, | ||||
173 | 'CN-GB' => undef, | ||||
174 | 'CN-GB-ISOIR165' => undef, | ||||
175 | 'CP1006' => undef, | ||||
176 | 'CP1026' => undef, | ||||
177 | 'CP1047' => undef, | ||||
178 | 'CP1361' => undef, | ||||
179 | 'CP949' => undef, | ||||
180 | 'CP37' => undef, | ||||
181 | 'CP424' => undef, | ||||
182 | 'CP500' => undef, | ||||
183 | 'CP737' => undef, | ||||
184 | 'CP775' => undef, | ||||
185 | 'CP856' => undef, | ||||
186 | 'CP874' => undef, | ||||
187 | 'CP875' => undef, | ||||
188 | 'CP932' => undef, | ||||
189 | 'CP936' => undef, | ||||
190 | 'CP950' => undef, | ||||
191 | 'EUC-JP' => undef, | ||||
192 | 'EUC-KR' => undef, | ||||
193 | 'EUC-TW' => undef, | ||||
194 | # mapping from 0xef to 0xff missing. | ||||
195 | # 'HP-ROMAN8' => undef, | ||||
196 | 'GB18030' => undef, | ||||
197 | 'HZ' => undef, | ||||
198 | 'IBM437' => undef, | ||||
199 | 'IBM850' => undef, | ||||
200 | 'IBM852' => undef, | ||||
201 | 'IBM855' => undef, | ||||
202 | 'IBM857' => undef, | ||||
203 | 'IBM860' => undef, | ||||
204 | 'IBM861' => undef, | ||||
205 | 'IBM862' => undef, | ||||
206 | 'IBM863' => undef, | ||||
207 | 'IBM864' => undef, | ||||
208 | 'IBM865' => undef, | ||||
209 | 'IBM866' => undef, | ||||
210 | 'IBM869' => undef, | ||||
211 | 'ISO-10646-UCS-2' => undef, | ||||
212 | 'ISO-10646-UCS-4' => undef, | ||||
213 | 'ISO-2022-JP' => undef, | ||||
214 | 'ISO-2022-JP-1' => undef, | ||||
215 | 'ISO-2022-KR' => undef, | ||||
216 | 'ISO-8859-1' => undef, | ||||
217 | 'ISO-8859-10' => undef, | ||||
218 | # This is broken in some versions of Encode. | ||||
219 | # 'ISO-8859-11' => undef, | ||||
220 | 'ISO-8859-13' => undef, | ||||
221 | 'ISO-8859-14' => undef, | ||||
222 | 'ISO-8859-15' => undef, | ||||
223 | # Errors at 0xa5 and 0xab. | ||||
224 | # 'ISO-8859-16' => undef, | ||||
225 | 'ISO-8859-2' => undef, | ||||
226 | 'ISO-8859-3' => undef, | ||||
227 | 'ISO-8859-4' => undef, | ||||
228 | 'ISO-8859-5' => undef, | ||||
229 | # Uses arabic digits in ascii range?! | ||||
230 | # 'ISO-8859-6' => undef, | ||||
231 | # 0xa1 and 0xa2 are incorrectly encoded. | ||||
232 | # 'ISO-8859-7' => undef, | ||||
233 | # 0xfd and 0xfe are missing. | ||||
234 | # 'ISO-8859-8' => undef, | ||||
235 | 'ISO-8859-9' => undef, | ||||
236 | 'ISO-IR-149' => undef, | ||||
237 | 'KOI8-R' => undef, | ||||
238 | # 0x95 is BULLET, not BULLET OPERATOR. | ||||
239 | # 'KOI8-U' => undef, | ||||
240 | # Seems to be messed up in certain Encode versions. | ||||
241 | # 'MACINTOSH' => undef, | ||||
242 | # TODO: Check other Mac encodings for correctness. | ||||
243 | # Nextstep is completely broken in my version of Encode. | ||||
244 | # 'NEXTSTEP' => undef, | ||||
245 | 'SHIFT_JIS' => undef, | ||||
246 | 'UCS-2BE' => undef, | ||||
247 | 'UCS-2LE' => undef, | ||||
248 | 'UCS-4BE' => undef, | ||||
249 | 'UCS-4LE' => undef, | ||||
250 | 'US-ASCII' => undef, | ||||
251 | 'UTF-16' => undef, | ||||
252 | 'UTF-16BE' => undef, | ||||
253 | 'UTF-16LE' => undef, | ||||
254 | 'UTF-32' => undef, | ||||
255 | 'UTF-32BE' => undef, | ||||
256 | 'UTF-32LE' => undef, | ||||
257 | 'UTF-8' => undef, | ||||
258 | # 0x86 is missing, 0xa6 is incorrectly encoded. | ||||
259 | # 'VISCII' => undef, | ||||
260 | 'WINDOWS-1250' => undef, | ||||
261 | 'WINDOWS-1251' => undef, | ||||
262 | 'WINDOWS-1252' => undef, | ||||
263 | 'WINDOWS-1253' => undef, | ||||
264 | 'WINDOWS-1254' => undef, | ||||
265 | 'WINDOWS-1255' => undef, | ||||
266 | 'WINDOWS-1256' => undef, | ||||
267 | 'WINDOWS-1257' => undef, | ||||
268 | 'WINDOWS-1258' => undef, | ||||
269 | }; | ||||
270 | |||||
271 | my $has_encode; | ||||
272 | |||||
273 | sub resolveAlias | ||||
274 | # spent 82µs within Locale::Recode::_Conversions::resolveAlias which was called 26 times, avg 3µs/call:
# 26 times (82µs+0s) by Locale::Recode::resolveAlias at line 87 of Locale/Recode.pm, avg 3µs/call | ||||
275 | 26 | 21µs | my (undef, $encoding) = @_; | ||
276 | |||||
277 | 26 | 19µs | $encoding = uc $encoding; | ||
278 | |||||
279 | 26 | 67µs | return $encoding if exists $conversions->{$encoding}; | ||
280 | return $encoding if exists $optional_conversions->{$encoding}; | ||||
281 | |||||
282 | require Locale::Recode::_Aliases; | ||||
283 | |||||
284 | my $resolved = Locale::Recode::_Aliases::ALIASES()->{$encoding}; | ||||
285 | |||||
286 | return $resolved if $resolved; | ||||
287 | |||||
288 | return; | ||||
289 | } | ||||
290 | |||||
291 | sub isSupported | ||||
292 | { | ||||
293 | my ($class, $encoding) = @_; | ||||
294 | |||||
295 | return unless defined $encoding && length $encoding; | ||||
296 | |||||
297 | $encoding = uc $encoding; | ||||
298 | my $mimename = $class->resolveAlias ($encoding); | ||||
299 | |||||
300 | return unless $mimename; | ||||
301 | |||||
302 | # Determine the correct module. | ||||
303 | if (exists $optional_conversions->{$mimename}) { | ||||
304 | unless (defined $has_encode) { | ||||
305 | eval "require Encode"; | ||||
306 | $has_encode = !$@; | ||||
307 | |||||
308 | if ($has_encode) { | ||||
309 | require Encode::Alias; | ||||
310 | |||||
311 | # Add missing real names. | ||||
312 | Encode::Alias::define_alias (MS_KANJI => 'ShiftJIS'); | ||||
313 | Encode::Alias::define_alias ('CN-GB' => 'EUC-CN'); | ||||
314 | } | ||||
315 | } | ||||
316 | |||||
317 | if ($has_encode) { | ||||
318 | # Now check whether Encode really supports that encoding. | ||||
319 | eval "Encode::encode ('$mimename', 'x')"; | ||||
320 | |||||
321 | unless ($@) { | ||||
322 | $conversions->{$mimename} = '_Encode'; | ||||
323 | } | ||||
324 | delete $optional_conversions->{$mimename}; | ||||
325 | } | ||||
326 | } | ||||
327 | |||||
328 | return $conversions->{$mimename} if exists $conversions->{$mimename}; | ||||
329 | |||||
330 | return; | ||||
331 | } | ||||
332 | |||||
333 | sub listSupported | ||||
334 | { | ||||
335 | my ($class) = @_; | ||||
336 | |||||
337 | foreach my $opt (keys %$optional_conversions) { | ||||
338 | $class->isSupported ($opt); | ||||
339 | } | ||||
340 | |||||
341 | my @list = keys %$conversions; | ||||
342 | return @list; | ||||
343 | } | ||||
344 | |||||
345 | # Find a conversion path. | ||||
346 | sub findPath | ||||
347 | { | ||||
348 | my ($class, $from, $to) = @_; | ||||
349 | |||||
350 | $from = 'INTERNAL' eq uc $from ? 'INTERNAL' : $class->resolveAlias ($from); | ||||
351 | $to = 'INTERNAL' eq uc $to ? 'INTERNAL' : $class->resolveAlias ($to); | ||||
352 | |||||
353 | return unless $from && $to; | ||||
354 | |||||
355 | return [] if $from eq $to; | ||||
356 | |||||
357 | my $from_module = $class->isSupported ($from); | ||||
358 | my $to_module = $class->isSupported ($to); | ||||
359 | |||||
360 | if (!$from_module) { | ||||
361 | if ('INTERNAL' eq $from) { | ||||
362 | $from_module = $to_module or return; | ||||
363 | } else { | ||||
364 | return; | ||||
365 | } | ||||
366 | } | ||||
367 | |||||
368 | if (!$to_module) { | ||||
369 | if ('INTERNAL' eq $to) { | ||||
370 | $to_module = $from_module or return; | ||||
371 | } else { | ||||
372 | return; | ||||
373 | } | ||||
374 | } | ||||
375 | |||||
376 | if ($from_module eq $to_module | ||||
377 | || $to eq 'INTERNAL' | ||||
378 | || $to eq 'UTF-8') { | ||||
379 | return [[ $from_module, $from, $to ]]; | ||||
380 | } elsif ($from eq 'INTERNAL') { | ||||
381 | return [[ $to_module, $from, $to ]]; | ||||
382 | } else { | ||||
383 | return [[ $from_module, $from, 'INTERNAL' ], | ||||
384 | [ $to_module, 'INTERNAL', $to ]]; | ||||
385 | } | ||||
386 | } | ||||
387 | |||||
388 | # TODO: check for | ||||
389 | # 7bit-jis | ||||
390 | # AdobeStandardEncoding | ||||
391 | # AdobeSymbol | ||||
392 | # AdobeZdingbat | ||||
393 | # ascii-ctrl | ||||
394 | # big5ext | ||||
395 | # big5plus | ||||
396 | # cccii | ||||
397 | # cns11643-1 | ||||
398 | # cns11643-2 | ||||
399 | # cns11643-3 | ||||
400 | # cns11643-4 | ||||
401 | # cns11643-5 | ||||
402 | # cns11643-6 | ||||
403 | # cns11643-7 | ||||
404 | # cns11643-f | ||||
405 | # dingbats | ||||
406 | # gb12345-raw | ||||
407 | # gb2312-raw | ||||
408 | # gsm0338 | ||||
409 | # jis0201-raw | ||||
410 | # jis0208-raw | ||||
411 | # jis0212-raw | ||||
412 | # koi8-f | ||||
413 | # MIME-B | ||||
414 | # MIME-Header | ||||
415 | # MIME-Q | ||||
416 | # posix-bc | ||||
417 | # symbol | ||||
418 | # unisys | ||||
419 | |||||
420 | 1; | ||||
421 | |||||
422 | __END__ |