← Index
NYTProf Performance Profile   « line view »
For /usr/local/libexec/sympa/task_manager-debug.pl
  Run on Tue Jun 1 22:32:51 2021
Reported on Tue Jun 1 22:35:07 2021

Filename/usr/local/libexec/sympa/Sympa/Tools/Text.pm
StatementsExecuted 5865 statements in 30.5ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
345116.15ms6.15msSympa::Tools::Text::::CORE:readlineSympa::Tools::Text::CORE:readline (opcode)
345116.02ms53.4msSympa::Tools::Text::::slurpSympa::Tools::Text::slurp
345115.21ms35.1msSympa::Tools::Text::::canonic_textSympa::Tools::Text::canonic_text
345115.20ms5.20msSympa::Tools::Text::::CORE:openSympa::Tools::Text::CORE:open (opcode)
345111.70ms1.70msSympa::Tools::Text::::CORE:substSympa::Tools::Text::CORE:subst (opcode)
34511907µs907µsSympa::Tools::Text::::CORE:closeSympa::Tools::Text::CORE:close (opcode)
0000s0sSympa::Tools::Text::::BEGIN@132Sympa::Tools::Text::BEGIN@132
0000s0sSympa::Tools::Text::::BEGIN@30Sympa::Tools::Text::BEGIN@30
0000s0sSympa::Tools::Text::::BEGIN@31Sympa::Tools::Text::BEGIN@31
0000s0sSympa::Tools::Text::::BEGIN@32Sympa::Tools::Text::BEGIN@32
0000s0sSympa::Tools::Text::::BEGIN@33Sympa::Tools::Text::BEGIN@33
0000s0sSympa::Tools::Text::::BEGIN@34Sympa::Tools::Text::BEGIN@34
0000s0sSympa::Tools::Text::::BEGIN@35Sympa::Tools::Text::BEGIN@35
0000s0sSympa::Tools::Text::::BEGIN@36Sympa::Tools::Text::BEGIN@36
0000s0sSympa::Tools::Text::::BEGIN@37Sympa::Tools::Text::BEGIN@37
0000s0sSympa::Tools::Text::::BEGIN@38Sympa::Tools::Text::BEGIN@38
0000s0sSympa::Tools::Text::::BEGIN@39Sympa::Tools::Text::BEGIN@39
0000s0sSympa::Tools::Text::::BEGIN@40Sympa::Tools::Text::BEGIN@40
0000s0sSympa::Tools::Text::::BEGIN@41Sympa::Tools::Text::BEGIN@41
0000s0sSympa::Tools::Text::::BEGIN@42Sympa::Tools::Text::BEGIN@42
0000s0sSympa::Tools::Text::::BEGIN@44Sympa::Tools::Text::BEGIN@44
0000s0sSympa::Tools::Text::::BEGIN@45Sympa::Tools::Text::BEGIN@45
0000s0sSympa::Tools::Text::::CORE:matchSympa::Tools::Text::CORE:match (opcode)
0000s0sSympa::Tools::Text::::CORE:regcompSympa::Tools::Text::CORE:regcomp (opcode)
0000s0sSympa::Tools::Text::::__ANON__Sympa::Tools::Text::__ANON__ (xsub)
0000s0sSympa::Tools::Text::::__ANON__[:178]Sympa::Tools::Text::__ANON__[:178]
0000s0sSympa::Tools::Text::::__ANON__[:188]Sympa::Tools::Text::__ANON__[:188]
0000s0sSympa::Tools::Text::::_gc_lengthSympa::Tools::Text::_gc_length
0000s0sSympa::Tools::Text::::_url_query_stringSympa::Tools::Text::_url_query_string
0000s0sSympa::Tools::Text::::addrencodeSympa::Tools::Text::addrencode
0000s0sSympa::Tools::Text::::canonic_emailSympa::Tools::Text::canonic_email
0000s0sSympa::Tools::Text::::canonic_message_idSympa::Tools::Text::canonic_message_id
0000s0sSympa::Tools::Text::::clipSympa::Tools::Text::clip
0000s0sSympa::Tools::Text::::decode_filesystem_safeSympa::Tools::Text::decode_filesystem_safe
0000s0sSympa::Tools::Text::::decode_htmlSympa::Tools::Text::decode_html
0000s0sSympa::Tools::Text::::encode_filesystem_safeSympa::Tools::Text::encode_filesystem_safe
0000s0sSympa::Tools::Text::::encode_htmlSympa::Tools::Text::encode_html
0000s0sSympa::Tools::Text::::encode_uriSympa::Tools::Text::encode_uri
0000s0sSympa::Tools::Text::::escape_charsSympa::Tools::Text::escape_chars
0000s0sSympa::Tools::Text::::foldcaseSympa::Tools::Text::foldcase
0000s0sSympa::Tools::Text::::guessed_to_utf8Sympa::Tools::Text::guessed_to_utf8
0000s0sSympa::Tools::Text::::mailtourlSympa::Tools::Text::mailtourl
0000s0sSympa::Tools::Text::::padSympa::Tools::Text::pad
0000s0sSympa::Tools::Text::::qdecode_filenameSympa::Tools::Text::qdecode_filename
0000s0sSympa::Tools::Text::::qencode_filenameSympa::Tools::Text::qencode_filename
0000s0sSympa::Tools::Text::::unescape_charsSympa::Tools::Text::unescape_chars
0000s0sSympa::Tools::Text::::valid_emailSympa::Tools::Text::valid_email
0000s0sSympa::Tools::Text::::weburlSympa::Tools::Text::weburl
0000s0sSympa::Tools::Text::::wrap_textSympa::Tools::Text::wrap_text
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# -*- indent-tabs-mode: nil; -*-
2# vim:ft=perl:et:sw=4
3# $Id$
4
5# Sympa - SYsteme de Multi-Postage Automatique
6#
7# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
8# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
9# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
10# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
11# Copyright 2018, 2020 The Sympa Community. See the AUTHORS.md
12# file at the top-level directory of this distribution and at
13# <https://github.com/sympa-community/sympa.git>.
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27
28package Sympa::Tools::Text;
29
30use strict;
31use warnings;
32use feature qw(fc);
33use Encode qw();
34use English qw(-no_match_vars);
35use Encode::MIME::Header; # 'MIME-Q' encoding.
36use HTML::Entities qw();
37use MIME::EncWords;
38use Text::LineFold;
39use Unicode::GCString;
40use URI::Escape qw();
41BEGIN { eval 'use Unicode::Normalize qw()'; }
# spent 0s executing statements in string eval
42BEGIN { eval 'use Unicode::UTF8 qw()'; }
# spent 0s executing statements in string eval
43
44use Sympa::Language;
45use Sympa::Regexps;
46
47# Old name: tools::addrencode().
48sub addrencode {
49 my $addr = shift;
50 my $phrase = (shift || '');
51 my $charset = (shift || 'utf8');
52 my $comment = (shift || '');
53
54 return undef unless $addr =~ /\S/;
55
56 if ($phrase =~ /[^\s\x21-\x7E]/) {
57 $phrase = MIME::EncWords::encode_mimewords(
58 Encode::decode('utf8', $phrase),
59 'Encoding' => 'A',
60 'Charset' => $charset,
61 'Replacement' => 'FALLBACK',
62 'Field' => 'Resent-Sender', # almost longest
63 'Minimal' => 'DISPNAME', # needs MIME::EncWords >= 1.012.
64 );
65 } elsif ($phrase =~ /\S/) {
66 $phrase =~ s/([\\\"])/\\$1/g;
67 $phrase = '"' . $phrase . '"';
68 }
69 if ($comment =~ /[^\s\x21-\x27\x2A-\x5B\x5D-\x7E]/) {
70 $comment = MIME::EncWords::encode_mimewords(
71 Encode::decode('utf8', $comment),
72 'Encoding' => 'A',
73 'Charset' => $charset,
74 'Replacement' => 'FALLBACK',
75 'Minimal' => 'DISPNAME',
76 );
77 } elsif ($comment =~ /\S/) {
78 $comment =~ s/([\\\"])/\\$1/g;
79 }
80
81 return
82 ($phrase =~ /\S/ ? "$phrase " : '')
83 . ($comment =~ /\S/ ? "($comment) " : '')
84 . "<$addr>";
85}
86
87# Old names: tools::clean_email(), tools::get_canonical_email().
88sub canonic_email {
89 my $email = shift;
90
91 return undef unless defined $email;
92
93 # Remove leading and trailing white spaces.
94 $email =~ s/\A\s+//;
95 $email =~ s/\s+\z//;
96
97 # Lower-case.
98 $email =~ tr/A-Z/a-z/;
99
100 return (length $email) ? $email : undef;
101}
102
103# Old name: tools::clean_msg_id().
104sub canonic_message_id {
105 my $msg_id = shift;
106
107 return $msg_id unless defined $msg_id;
108
109 chomp $msg_id;
110
111 if ($msg_id =~ /\<(.+)\>/) {
112 $msg_id = $1;
113 }
114
115 return $msg_id;
116}
117
118
# spent 35.1ms (5.21+29.9) within Sympa::Tools::Text::canonic_text which was called 345 times, avg 102µs/call: # 345 times (5.21ms+29.9ms) by Sympa::Tools::Text::slurp at line 160, avg 102µs/call
sub canonic_text {
119345134µs my $text = shift;
120
121345134µs return undef unless defined $text;
122
123 # Normalize text. See also discussion on
124 # https://listes.renater.fr/sympa/arc/sympa-developpers/2018-03/thrd1.html
125 #
126 # N.B.: Corresponding modules are optional by now, and should be
127 # mandatory in the future.
12834573µs my $utext;
1293451.36ms345338µs if (Encode::is_utf8($text)) {
# spent 338µs making 345 calls to Encode::is_utf8, avg 981ns/call
130 $utext = $text;
131 } elsif ($Unicode::UTF8::VERSION) {
132 no warnings 'utf8';
133 $utext = Unicode::UTF8::decode_utf8($text);
134 } else {
1353455.14ms10359.29ms $utext = Encode::decode_utf8($text);
# spent 5.27ms making 345 calls to Encode::decode_utf8, avg 15µs/call # spent 3.12ms making 345 calls to Encode::utf8::decode, avg 9µs/call # spent 900µs making 345 calls to Encode::Encoding::renewed, avg 3µs/call
136 }
137345998µs34522.0ms if ($Unicode::Normalize::VERSION) {
# spent 22.0ms making 345 calls to Unicode::Normalize::normalize, avg 64µs/call
138 $utext = Unicode::Normalize::normalize('NFC', $utext);
139 }
140
141 # Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
142 # and EIMS:
1433452.25ms3451.70ms $utext =~ s/\r\n|\r/\n/g;
# spent 1.70ms making 345 calls to Sympa::Tools::Text::CORE:subst, avg 5µs/call
144
145345626µs345133µs if (Encode::is_utf8($text)) {
# spent 133µs making 345 calls to Encode::is_utf8, avg 386ns/call
146 return $utext;
147 } else {
1483451.54ms345424µs return Encode::encode_utf8($utext);
# spent 424µs making 345 calls to Encode::encode_utf8, avg 1µs/call
149 }
150}
151
152
# spent 53.4ms (6.02+47.3) within Sympa::Tools::Text::slurp which was called 345 times, avg 155µs/call: # 345 times (6.02ms+47.3ms) by Sympa::Robot::load_topics at line 210 of /usr/local/libexec/sympa/Sympa/Robot.pm, avg 155µs/call
sub slurp {
153345138µs my $path = shift;
154
15534589µs my $ifh;
1563456.85ms3455.20ms return undef unless open $ifh, '<', $path;
# spent 5.20ms making 345 calls to Sympa::Tools::Text::CORE:open, avg 15µs/call
15710358.08ms3456.15ms my $text = do { local $RS; <$ifh> };
# spent 6.15ms making 345 calls to Sympa::Tools::Text::CORE:readline, avg 18µs/call
1583451.51ms345907µs close $ifh;
# spent 907µs making 345 calls to Sympa::Tools::Text::CORE:close, avg 3µs/call
159
1603451.57ms34535.1ms return canonic_text($text);
# spent 35.1ms making 345 calls to Sympa::Tools::Text::canonic_text, avg 102µs/call
161}
162
163sub wrap_text {
164 my $text = shift;
165 my $init = shift;
166 my $subs = shift;
167 my $cols = shift;
168
169 $init //= '';
170 $subs //= '';
171 $cols //= 78;
172 return $text unless $cols;
173
174 my $email_re = Sympa::Regexps::email();
175 my $linefold = Text::LineFold->new(
176 Language => Sympa::Language->instance->get_lang,
177 Prep => 'NONBREAKURI',
178 prep => [$email_re, sub { shift; @_ }],
179 ColumnsMax => $cols,
180 Format => sub {
181 shift;
182 my $event = shift;
183 my $str = shift;
184 if ($event =~ /^eo/) { return "\n"; }
185 if ($event =~ /^so[tp]/) { return $init . $str; }
186 if ($event eq 'sol') { return $subs . $str; }
187 undef;
188 },
189 );
190
191 my $t = Encode::is_utf8($text) ? $text : Encode::decode_utf8($text);
192
193 my $ret = '';
194 while (1000 < length $t) {
195 my $s = substr $t, 0, 1000;
196 $ret .= $linefold->break_partial($s);
197 $t = substr $t, 1000;
198 }
199 $ret .= $linefold->break_partial($t) if length $t;
200 $ret .= $linefold->break_partial(undef);
201
202 return Encode::is_utf8($text) ? $ret : Encode::encode_utf8($ret);
203}
204
205sub decode_filesystem_safe {
206 my $str = shift;
207 return '' unless defined $str and length $str;
208
209 $str = Encode::encode_utf8($str) if Encode::is_utf8($str);
210 # On case-insensitive filesystem "_XX" along with "_xx" should be decoded.
211 $str =~ s/_([0-9A-Fa-f]{2})/chr hex "0x$1"/eg;
212 return $str;
213}
214
215sub decode_html {
216 my $str = shift;
217
218 Encode::encode_utf8(
219 HTML::Entities::decode_entities(Encode::decode_utf8($str)));
220}
221
222sub encode_filesystem_safe {
223 my $str = shift;
224 return '' unless defined $str and length $str;
225
226 $str = Encode::encode_utf8($str) if Encode::is_utf8($str);
227 $str =~ s/([^-+.0-9\@A-Za-z])/sprintf '_%02x', ord $1/eg;
228 return $str;
229}
230
231sub encode_html {
232 my $str = shift;
233 my $additional_unsafe = shift || '';
234
235 HTML::Entities::encode_entities($str, '<>&"' . $additional_unsafe);
236}
237
238sub encode_uri {
239 my $str = shift;
240 my %options = @_;
241
242 # Note: URI-1.35 (URI::Escape 3.28) or later is required.
243 return Encode::encode_utf8(
244 URI::Escape::uri_escape_utf8(
245 Encode::decode_utf8($str),
246 '^-A-Za-z0-9._~' . (exists $options{omit} ? $options{omit} : '')
247 )
248 );
249}
250
251# Old name: tools::escape_chars().
252sub escape_chars {
253 my $s = shift;
254 my $except = shift; ## Exceptions
255 my $ord_except = ord $except if defined $except;
256
257 ## Escape chars
258 ## !"#$%&'()+,:;<=>?[] AND accented chars
259 ## escape % first
260 foreach my $i (
261 0x25,
262 0x20 .. 0x24,
263 0x26 .. 0x2c,
264 0x3a .. 0x3f,
265 0x5b, 0x5d,
266 0x80 .. 0x9f,
267 0xa0 .. 0xff
268 ) {
269 next if defined $ord_except and $i == $ord_except;
270 my $hex_i = sprintf "%lx", $i;
271 $s =~ s/\x$hex_i/%$hex_i/g;
272 }
273 ## Special traetment for '/'
274 $s =~ s/\//%a5/g unless defined $except and $except eq '/';
275
276 return $s;
277}
278
279# Old name: tt2::escape_url().
280# DEPRECATED. Use Sympa::Tools::Text::escape_uri() or
281# Sympa::Tools::Text::mailtourl().
282#sub escape_url;
283
284sub foldcase {
285 my $str = shift;
286
287 return '' unless defined $str and length $str;
288 return Encode::encode_utf8(fc(Encode::decode_utf8($str)));
289}
290
291my %legacy_charsets = (
292 'ar' => [qw(iso-8859-6)],
293 'bs' => [qw(iso-8859-2)],
294 'cs' => [qw(iso-8859-2)],
295 'eo' => [qw(iso-8859-3)],
296 'et' => [qw(iso-8859-4)],
297 'he' => [qw(iso-8859-8)],
298 'hr' => [qw(iso-8859-2)],
299 'hu' => [qw(iso-8859-2)],
300 'ja' => [qw(euc-jp cp932 MacJapanese)],
301 'kl' => [qw(iso-8859-4)],
302 'ko' => [qw(cp949)],
303 'lt' => [qw(iso-8859-4)],
304 'lv' => [qw(iso-8859-4)],
305 'mt' => [qw(iso-8859-3)],
306 'pl' => [qw(iso-8859-2)],
307 'ro' => [qw(iso-8859-2)],
308 'ru' => [qw(koi8-r cp1251)], # cp866? MacCyrillic?
309 'sk' => [qw(iso-8859-2)],
310 'sl' => [qw(iso-8859-2)],
311 'th' => [qw(iso-8859-11 cp874 MacThai)],
312 'tr' => [qw(iso-8859-9)],
313 'uk' => [qw(koi8-u)], # MacUkrainian?
314 'zh-CN' => [qw(euc-cn)],
315 'zh-TW' => [qw(big5-eten)],
316);
317
318sub guessed_to_utf8 {
319 my $text = shift;
320 my @langs = @_;
321
322 return Encode::encode_utf8($text) if Encode::is_utf8($text);
323 return $text
324 unless defined $text
325 and length $text
326 and $text =~ /[^\x00-\x7F]/;
327
328 my $utf8;
329 if ($Unicode::UTF8::VERSION) {
330 $utf8 =
331 eval { Unicode::UTF8::decode_utf8($text, Encode::FB_CROAK()) };
332 }
333 unless (defined $utf8) {
334 foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
335 $utf8 =
336 eval { Encode::decode($charset, $text, Encode::FB_CROAK()) };
337 last if defined $utf8;
338 }
339 }
340 unless (defined $utf8) {
341 $utf8 = Encode::decode('iso-8859-1', $text);
342 }
343
344 # Apply NFC: e.g. for modified-NFD by Mac OS X.
345 $utf8 = Unicode::Normalize::normalize('NFC', $utf8)
346 if $Unicode::Normalize::VERSION;
347
348 return Encode::encode_utf8($utf8);
349}
350
351sub mailtourl {
352 my $text = shift;
353 my %options = @_;
354
355 my $dtext =
356 (not defined $text) ? ''
357 : $options{decode_html} ? Sympa::Tools::Text::decode_html($text)
358 : $text;
359 $dtext =~ s/\A\s+//;
360 $dtext =~ s/\s+\z//;
361 $dtext =~ s/(?:\r\n|\r|\n)(?=[ \t])//g;
362 $dtext =~ s/\r\n|\r|\n/ /g;
363
364 # The ``@'' in email address should not be encoded because some MUAs
365 # aren't able to decode ``%40'' in e-mail address of mailto: URL.
366 # Contrary, ``@'' in query component should be encoded because some
367 # MUAs take it for a delimiter to separate URL from the rest.
368 my ($format, $utext, $qsep);
369 if ($dtext =~ /[()<>\[\]:;,\"\s]/) {
370 # Use "to" header if source text includes any of RFC 5322
371 # "specials", minus ``@'' and ``\'', plus whitespaces.
372 $format = 'mailto:?to=%s%s';
373 $utext = Sympa::Tools::Text::encode_uri($dtext);
374 $qsep = '&';
375 } else {
376 $format = 'mailto:%s%s';
377 $utext = Sympa::Tools::Text::encode_uri($dtext, omit => '@');
378 $qsep = '?';
379 }
380 my $qstring = _url_query_string(
381 $options{query},
382 decode_html => $options{decode_html},
383 leadchar => $qsep,
384 sepchar => '&',
385 trim_values => 1,
386 );
387
388 return sprintf $format, $utext, $qstring;
389}
390
391sub _url_query_string {
392 my $query = shift;
393 my %options = @_;
394
395 unless (ref $query eq 'HASH' and %$query) {
396 return '';
397 } else {
398 my $decode_html = $options{decode_html};
399 my $trim_values = $options{trim_values};
400 return ($options{leadchar} || '?') . join(
401 ($options{sepchar} || ';'),
402 map {
403 my ($dkey, $dval) = map {
404 (not defined $_) ? ''
405 : $decode_html ? Sympa::Tools::Text::decode_html($_)
406 : $_;
407 } ($_, $query->{$_});
408 if ($trim_values and lc $dkey ne 'body') {
409 $dval =~ s/\A\s+//;
410 $dval =~ s/\s+\z//;
411 $dval =~ s/(?:\r\n|\r|\n)(?=[ \t])//g;
412 $dval =~ s/\r\n|\r|\n/ /g;
413 }
414
415 sprintf '%s=%s',
416 Sympa::Tools::Text::encode_uri($dkey),
417 Sympa::Tools::Text::encode_uri($dval);
418 } sort keys %$query
419 );
420 }
421}
422
423sub pad {
424 my $str = shift;
425 my $width = shift;
426
427 return $str unless $width and defined $str;
428
429 my $ustr = Encode::is_utf8($str) ? $str : Encode::decode_utf8($str);
430 my $cols = Unicode::GCString->new($ustr)->columns;
431
432 unless ($cols < abs $width) {
433 return $str;
434 } elsif ($width < 0) {
435 return $str . (' ' x (-$width - $cols));
436 } else {
437 return (' ' x ($width - $cols)) . $str;
438 }
439}
440
441# Old name: tools::qdecode_filename().
442sub qdecode_filename {
443 my $filename = shift;
444
445 ## We don't use MIME::Words here because it does not encode properly
446 ## Unicode
447 ## Check if string is already Q-encoded first
448 #if ($filename =~ /\=\?UTF-8\?/) {
449 $filename = Encode::encode_utf8(Encode::decode('MIME-Q', $filename));
450 #}
451
452 return $filename;
453}
454
455# Old name: tools::qencode_filename().
456sub qencode_filename {
457 my $filename = shift;
458
459 ## We don't use MIME::Words here because it does not encode properly
460 ## Unicode
461 ## Check if string is already Q-encoded first
462 ## Also check if the string contains 8bit chars
463 unless ($filename =~ /\=\?UTF-8\?/
464 || $filename =~ /^[\x00-\x7f]*$/) {
465
466 ## Don't encode elements such as .desc. or .url or .moderate
467 ## or .extension
468 my $part = $filename;
469 my ($leading, $trailing);
470 $leading = $1 if ($part =~ s/^(\.desc\.)//); ## leading .desc
471 $trailing = $1 if ($part =~ s/((\.\w+)+)$//); ## trailing .xx
472
473 my $encoded_part = MIME::EncWords::encode_mimewords(
474 $part,
475 Charset => 'utf8',
476 Encoding => 'q',
477 MaxLineLen => 1000,
478 Minimal => 'NO'
479 );
480
481 $filename = $leading . $encoded_part . $trailing;
482 }
483
484 return $filename;
485}
486
487sub clip {
488 my $string = shift;
489 return undef unless @_;
490 my $length = shift;
491
492 my ($gcstr, $blen);
493 if (ref $string eq 'Unicode::GCString') {
494 $gcstr = $string;
495 $blen = length Encode::encode_utf8($string->as_string);
496 } elsif (Encode::is_utf8($string)) {
497 $gcstr = Unicode::GCString->new($string);
498 $blen = length Encode::encode_utf8($string);
499 } else {
500 $gcstr = Unicode::GCString->new(Encode::decode_utf8($string));
501 $blen = length $string;
502 }
503
504 $length += $blen if $length < 0;
505 return '' if $length < 0; # out of range
506 return $string if $blen <= $length;
507
508 my $result = $gcstr->substr(0, _gc_length($gcstr, $length));
509
510 if (ref $string eq 'Unicode::GCString') {
511 return $result;
512 } elsif (Encode::is_utf8($string)) {
513 return $result->as_string;
514 } else {
515 return Encode::encode_utf8($result->as_string);
516 }
517}
518
519sub _gc_length {
520 my $gcstr = shift;
521 my $length = shift;
522
523 return 0 unless $gcstr->length;
524 return 0 unless $length;
525
526 my ($shorter, $longer) = (0, $gcstr->length);
527 while ($shorter < $longer) {
528 my $cur = ($shorter + $longer + 1) >> 1;
529 my $elen =
530 length Encode::encode_utf8($gcstr->substr(0, $cur)->as_string);
531 if ($elen <= $length) {
532 $shorter = $cur;
533 } else {
534 $longer = $cur - 1;
535 }
536 }
537
538 return $shorter;
539}
540
541# Old name: tools::unescape_chars().
542sub unescape_chars {
543 my $s = shift;
544
545 $s =~ s/%a5/\//g; ## Special traetment for '/'
546 foreach my $i (0x20 .. 0x2c, 0x3a .. 0x3f, 0x5b, 0x5d, 0x80 .. 0x9f,
547 0xa0 .. 0xff) {
548 my $hex_i = sprintf "%lx", $i;
549 my $hex_s = sprintf "%c", $i;
550 $s =~ s/%$hex_i/$hex_s/g;
551 }
552
553 return $s;
554}
555
556# Old name: tools::valid_email().
557sub valid_email {
558 my $email = shift;
559
560 my $email_re = Sympa::Regexps::email();
561 return undef unless $email =~ /^${email_re}$/;
562
563 # Forbidden characters.
564 return undef if $email =~ /[\|\$\*\?\!]/;
565
566 return 1;
567}
568
569sub weburl {
570 my $base = shift;
571 my $paths = shift;
572 my %options = @_;
573
574 my @paths = map {
575 Sympa::Tools::Text::encode_uri(
576 (not defined $_) ? ''
577 : $options{decode_html} ? Sympa::Tools::Text::decode_html($_)
578 : $_
579 );
580 } @{$paths || []};
581
582 my $qstring = _url_query_string(
583 $options{query},
584 decode_html => $options{decode_html},
585 sepchar => '&',
586 );
587
588 my $fstring;
589 my $fragment = $options{fragment};
590 if (defined $fragment) {
591 $fstring = '#'
592 . Sympa::Tools::Text::encode_uri(
593 $options{decode_html}
594 ? Sympa::Tools::Text::decode_html($fragment)
595 : $fragment
596 );
597 } else {
598 $fstring = '';
599 }
600
601 return sprintf '%s%s%s', join('/', grep { defined $_ } ($base, @paths)),
602 $qstring, $fstring;
603}
604
6051;
606__END__
 
# spent 907µs within Sympa::Tools::Text::CORE:close which was called 345 times, avg 3µs/call: # 345 times (907µs+0s) by Sympa::Tools::Text::slurp at line 158, avg 3µs/call
sub Sympa::Tools::Text::CORE:close; # opcode
# spent 5.20ms within Sympa::Tools::Text::CORE:open which was called 345 times, avg 15µs/call: # 345 times (5.20ms+0s) by Sympa::Tools::Text::slurp at line 156, avg 15µs/call
sub Sympa::Tools::Text::CORE:open; # opcode
# spent 6.15ms within Sympa::Tools::Text::CORE:readline which was called 345 times, avg 18µs/call: # 345 times (6.15ms+0s) by Sympa::Tools::Text::slurp at line 157, avg 18µs/call
sub Sympa::Tools::Text::CORE:readline; # opcode
# spent 1.70ms within Sympa::Tools::Text::CORE:subst which was called 345 times, avg 5µs/call: # 345 times (1.70ms+0s) by Sympa::Tools::Text::canonic_text at line 143, avg 5µs/call
sub Sympa::Tools::Text::CORE:subst; # opcode