← Index
NYTProf Performance Profile   « line view »
For /usr/local/libexec/sympa/task_manager-debug.pl
  Run on Tue Jun 1 22:32:51 2021
Reported on Tue Jun 1 22:35:08 2021

Filename/usr/local/lib/perl5/site_perl/mach/5.32/Text/LineFold.pm
StatementsExecuted 0 statements in 0s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
0000s0sText::LineFold::::BEGIN@33Text::LineFold::BEGIN@33
0000s0sText::LineFold::::BEGIN@34Text::LineFold::BEGIN@34
0000s0sText::LineFold::::BEGIN@37Text::LineFold::BEGIN@37
0000s0sText::LineFold::::BEGIN@43Text::LineFold::BEGIN@43
0000s0sText::LineFold::::BEGIN@44Text::LineFold::BEGIN@44
0000s0sText::LineFold::::BEGIN@45Text::LineFold::BEGIN@45
0000s0sText::LineFold::::BEGIN@46Text::LineFold::BEGIN@46
0000s0sText::LineFold::::CORE:qrText::LineFold::CORE:qr (opcode)
0000s0sText::LineFold::::__ANON__[:123]Text::LineFold::__ANON__[:123]
0000s0sText::LineFold::::__ANON__[:127]Text::LineFold::__ANON__[:127]
0000s0sText::LineFold::::__ANON__[:289]Text::LineFold::__ANON__[:289]
0000s0sText::LineFold::::__ANON__[:393]Text::LineFold::__ANON__[:393]
0000s0sText::LineFold::::__ANON__[:88]Text::LineFold::__ANON__[:88]
0000s0sText::LineFold::::configText::LineFold::config
0000s0sText::LineFold::::foldText::LineFold::fold
0000s0sText::LineFold::::newText::LineFold::new
0000s0sText::LineFold::::unfoldText::LineFold::unfold
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1#-*- perl -*-
2
3package Text::LineFold;
4require 5.008;
5
6=encoding utf-8
7
8=head1 NAME
9
10Text::LineFold - Line Folding for Plain Text
11
12=head1 SYNOPSIS
13
14 use Text::LineFold;
15 $lf = Text::LineFold->new();
16
17 # Fold lines
18 $folded = $lf->fold($string, 'PLAIN');
19 $indented = $lf->fold(' ' x 8, ' ' x 4, $string);
20
21 # Unfold lines
22 $unfolded = $lf->unfold($string, 'FIXED');
23
24=head1 DESCRIPTION
25
26Text::LineFold folds or unfolds lines of plain text.
27As it mainly focuses on plain text e-mail messages,
28RFC 3676 flowed format is also supported.
29
30=cut
31
32### Pragmas:
33use strict;
34use vars qw($VERSION @EXPORT_OK @ISA $Config);
35
36### Exporting:
37use Exporter;
38
39### Inheritance:
40our @ISA = qw(Exporter Unicode::LineBreak);
41
42### Other modules:
43use Carp qw(croak carp);
44use Encode qw(is_utf8);
45use MIME::Charset;
46use Unicode::LineBreak qw(:all);
47
48### Globals
49
50### The package Version
51our $VERSION = '2018.012';
52
53### Public Configuration Attributes
54our $Config = {
55 ### %{$Unicode::LineBreak::Config},
56 Charset => 'UTF-8',
57 Language => 'XX',
58 OutputCharset => undef,
59 TabSize => 8,
60};
61
62### Privates
63
64my %FORMAT_FUNCS = (
65 'FIXED' => sub {
66 my $self = shift;
67 my $action = shift;
68 my $str = shift;
69 if ($action =~ /^so[tp]/) {
70 $self->{_} = {};
71 $self->{_}->{'ColMax'} = $self->config('ColMax');
72 $self->config('ColMax' => 0) if $str =~ /^>/;
73 } elsif ($action eq "") {
74 $self->{_}->{line} = $str;
75 } elsif ($action eq "eol") {
76 return $self->config('Newline');
77 } elsif ($action =~ /^eo/) {
78 if (length $self->{_}->{line} and $self->config('ColMax')) {
79 $str = $self->config('Newline').$self->config('Newline');
80 } else {
81 $str = $self->config('Newline');
82 }
83 $self->config('ColMax' => $self->{_}->{'ColMax'});
84 delete $self->{_};
85 return $str;
86 }
87 undef;
88 },
89 'FLOWED' => sub { # RFC 3676
90 my $self = shift;
91 my $action = shift;
92 my $str = shift;
93 if ($action eq 'sol') {
94 if ($self->{_}->{prefix}) {
95 return $self->{_}->{prefix}.' '.$str;
96 }
97 } elsif ($action =~ /^so/) {
98 $self->{_} = {};
99 if ($str =~ /^(>+)/) {
100 $self->{_}->{prefix} = $1;
101 } else {
102 $self->{_}->{prefix} = '';
103 }
104 } elsif ($action eq "") {
105 if ($str =~ /^(?: |From )/
106 or $str =~ /^>/ and !length $self->{_}->{prefix}) {
107 return $self->{_}->{line} = ' ' . $str;
108 }
109 $self->{_}->{line} = $str;
110 } elsif ($action eq 'eol') {
111 $str = ' ' if length $str;
112 return $str.' '.$self->config('Newline');
113 } elsif ($action =~ /^eo/) {
114 if (length $self->{_}->{line} and !length $self->{_}->{prefix}) {
115 $str = ' '.$self->config('Newline').$self->config('Newline');
116 } else {
117 $str = $self->config('Newline');
118 }
119 delete $self->{_};
120 return $str;
121 }
122 undef;
123 },
124 'PLAIN' => sub {
125 return $_[0]->config('Newline') if $_[1] =~ /^eo/;
126 undef;
127 },
128);
129
130=head2 Public Interface
131
132=over 4
133
134=item new ([KEY => VALUE, ...])
135
136I<Constructor>.
137About KEY => VALUE pairs see config method.
138
139=back
140
141=cut
142
143sub new {
144 my $class = shift;
145 my $self = bless __PACKAGE__->SUPER::new(), $class;
146 $self->config(@_);
147 $self;
148}
149
150=over 4
151
152=item $self->config (KEY)
153
154=item $self->config ([KEY => VAL, ...])
155
156I<Instance method>.
157Get or update configuration. Following KEY => VALUE pairs may be specified.
158
159=over 4
160
161=item Charset => CHARSET
162
163Character set that is used to encode string.
164It may be string or L<MIME::Charset> object.
165Default is C<"UTF-8">.
166
167=item Language => LANGUAGE
168
169Along with Charset option, this may be used to define language/region
170context.
171Default is C<"XX">.
172See also L<Unicode::LineBreak/Context> option.
173
174=item Newline => STRING
175
176String to be used for newline sequence.
177Default is C<"\n">.
178
179=item OutputCharset => CHARSET
180
181Character set that is used to encode result of fold()/unfold().
182It may be string or L<MIME::Charset> object.
183If a special value C<"_UNICODE_"> is specified, result will be Unicode string.
184Default is the value of Charset option.
185
186=item TabSize => NUMBER
187
188Column width of tab stops.
189When 0 is specified, tab stops are ignored.
190Default is 8.
191
192=item BreakIndent
193
194=item CharMax
195
196=item ColMax
197
198=item ColMin
199
200=item ComplexBreaking
201
202=item EAWidth
203
204=item HangulAsAL
205
206=item LBClass
207
208=item LegacyCM
209
210=item Prep
211
212=item Urgent
213
214See L<Unicode::LineBreak/Options>.
215
216=back
217
218=back
219
220=cut
221
222sub config {
223 my $self = shift;
224 my @opts = qw{Charset Language OutputCharset TabSize};
225 my %opts = map { (uc $_ => $_) } @opts;
226 my $newline = undef;
227
228 # Get config.
229 if (scalar @_ == 1) {
230 if ($opts{uc $_[0]}) {
231 return $self->{$opts{uc $_[0]}};
232 }
233 return $self->SUPER::config($_[0]);
234 }
235
236 # Set config.
237 my @o = ();
238 while (scalar @_) {
239 my $k = shift;
240 my $v = shift;
241 if ($opts{uc $k}) {
242 $self->{$opts{uc $k}} = $v;
243 } elsif (uc $k eq uc 'Newline') {
244 $newline = $v;
245 } else {
246 push @o, $k => $v;
247 }
248 }
249 $self->SUPER::config(@o) if scalar @o;
250
251 # Character set and language assumed.
252 if (ref $self->{Charset} eq 'MIME::Charset') {
253 $self->{_charset} = $self->{Charset};
254 } else {
255 $self->{Charset} ||= $Config->{Charset};
256 $self->{_charset} = MIME::Charset->new($self->{Charset});
257 }
258 $self->{Charset} = $self->{_charset}->as_string;
259 my $ocharset = uc($self->{OutputCharset} || $self->{Charset});
260 $ocharset = MIME::Charset->new($ocharset)
261 unless ref $ocharset eq 'MIME::Charset' or $ocharset eq '_UNICODE_';
262 unless ($ocharset eq '_UNICODE_') {
263 $self->{_charset}->encoder($ocharset);
264 $self->{OutputCharset} = $ocharset->as_string;
265 }
266 $self->{Language} = uc($self->{Language} || $Config->{Language});
267
268 ## Context
269 $self->SUPER::config(Context =>
270 context(Charset => $self->{Charset},
271 Language => $self->{Language}));
272
273 ## Set sizing method.
274 $self->SUPER::config(Sizing => sub {
275 my ($self, $cols, $pre, $spc, $str) = @_;
276
277 my $tabsize = $self->{TabSize};
278 my $spcstr = $spc.$str;
279 $spcstr->pos(0);
280 while (!$spcstr->eos and $spcstr->item->lbc == LB_SP) {
281 my $c = $spcstr->next;
282 if ($c eq "\t") {
283 $cols += $tabsize - $cols % $tabsize if $tabsize;
284 } else {
285 $cols += $c->columns;
286 }
287 }
288 return $cols + $spcstr->substr($spcstr->pos)->columns;
289 });
290
291 ## Classify horizontal tab as line breaking class SP.
292 $self->SUPER::config(LBClass => [ord("\t") => LB_SP]);
293 ## Tab size
294 if (defined $self->{TabSize}) {
295 croak "Invalid TabSize option" unless $self->{TabSize} =~ /^\d+$/;
296 $self->{TabSize} += 0;
297 } else {
298 $self->{TabSize} = $Config->{TabSize};
299 }
300
301 ## Newline
302 if (defined $newline) {
303 $newline = $self->{_charset}->decode($newline)
304 unless is_utf8($newline);
305 $self->SUPER::config(Newline => $newline);
306 }
307}
308
309=over 4
310
311=item $self->fold (STRING, [METHOD])
312
313=item $self->fold (INITIAL_TAB, SUBSEQUENT_TAB, STRING, ...)
314
315I<Instance method>.
316fold() folds lines of string STRING and returns it.
317Surplus SPACEs and horizontal tabs at end of line are removed,
318newline sequences are replaced by that specified by Newline option
319and newline is appended at end of text if it does not exist.
320Horizontal tabs are treated as tab stops according to TabSize option.
321
322By the first style, following options may be specified for METHOD argument.
323
324=over 4
325
326=item C<"FIXED">
327
328Lines preceded by C<"E<gt>"> won't be folded.
329Paragraphs are separated by empty line.
330
331=item C<"FLOWED">
332
333C<"Format=Flowed; DelSp=Yes"> formatting defined by RFC 3676.
334
335=item C<"PLAIN">
336
337Default method. All lines are folded.
338
339=back
340
341Second style is similar to L<Text::Wrap/wrap()>.
342All lines are folded.
343INITIAL_TAB is inserted at beginning of paragraphs and SUBSEQUENT_TAB
344at beginning of other broken lines.
345
346=back
347
348=cut
349
350# Special breaking characters: VT, FF, NEL, LS, PS
351my $special_break = qr/([\x{000B}\x{000C}\x{0085}\x{2028}\x{2029}])/os;
352
353sub fold {
354 my $self = shift;
355 my $str;
356
357 if (2 < scalar @_) {
358 my $initial_tab = shift || '';
359 $initial_tab = $self->{_charset}->decode($initial_tab)
360 unless is_utf8($initial_tab);
361 my $subsequent_tab = shift || '';
362 $subsequent_tab = $self->{_charset}->decode($subsequent_tab)
363 unless is_utf8($subsequent_tab);
364 my @str = @_;
365
366 ## Decode and concat strings.
367 $str = shift @str;
368 $str = $self->{_charset}->decode($str) unless is_utf8($str);
369 foreach my $s (@str) {
370 next unless defined $s and length $s;
371
372 $s = $self->{_charset}->decode($s) unless is_utf8($s);
373 unless (length $str) {
374 $str = $s;
375 } elsif ($str =~ /(\s|$special_break)$/ or
376 $s =~ /^(\s|$special_break)/) {
377 $str .= $s;
378 } else {
379 $str .= ' ' if $self->breakingRule($str, $s) == INDIRECT;
380 $str .= $s;
381 }
382 }
383
384 ## Set format method.
385 $self->SUPER::config(Format => sub {
386 my $self = shift;
387 my $event = shift;
388 my $str = shift;
389 if ($event =~ /^eo/) { return $self->config('Newline'); }
390 if ($event =~ /^so[tp]/) { return $initial_tab.$str; }
391 if ($event eq 'sol') { return $subsequent_tab.$str; }
392 undef;
393 });
394 } else {
395 $str = shift;
396 my $method = uc(shift || '');
397 return '' unless defined $str and length $str;
398
399 ## Decode string.
400 $str = $self->{_charset}->decode($str) unless is_utf8($str);
401
402 ## Set format method.
403 $self->SUPER::config(Format => $FORMAT_FUNCS{$method} ||
404 $FORMAT_FUNCS{'PLAIN'});
405 }
406
407 ## Do folding.
408 my $result = '';
409 foreach my $s (split $special_break, $str) {
410 if ($s =~ $special_break) {
411 $result .= $s;
412 } else {
413 $result .= $self->break($str);
414 }
415 }
416
417 ## Encode result.
418 if ($self->{OutputCharset} eq '_UNICODE_') {
419 return $result;
420 } else {
421 return $self->{_charset}->encode($result);
422 }
423}
424
425=over 4
426
427=item $self->unfold (STRING, METHOD)
428
429Conjunct folded paragraphs of string STRING and returns it.
430
431Following options may be specified for METHOD argument.
432
433=over 4
434
435=item C<"FIXED">
436
437Default method.
438Lines preceded by C<"E<gt>"> won't be conjuncted.
439Treat empty line as paragraph separator.
440
441=item C<"FLOWED">
442
443Unfold C<"Format=Flowed; DelSp=Yes"> formatting defined by RFC 3676.
444
445=item C<"FLOWEDSP">
446
447Unfold C<"Format=Flowed; DelSp=No"> formatting defined by RFC 3676.
448
449=begin comment
450
451=item C<"OBSFLOWED">
452
453Unfold C<"Format=Flowed> formatting defined by (obsoleted) RFC 2646
454as well as possible.
455
456=end comment
457
458=back
459
460=back
461
462=cut
463
464sub unfold {
465 my $self = shift;
466 my $str = shift;
467 return '' unless defined $str and length $str;
468
469 ## Get format method.
470 my $method = uc(shift || 'FIXED');
471 $method = 'FIXED' unless $method =~ /^(?:FIXED|FLOWED|FLOWEDSP|OBSFLOWED)$/;
472 my $delsp = $method eq 'FLOWED';
473
474 ## Decode string and canonizalize newline.
475 $str = $self->{_charset}->decode($str) unless is_utf8($str);
476 $str =~ s/\r\n|\r/\n/g;
477
478 ## Do unfolding.
479 my $result = '';
480 foreach my $s (split $special_break, $str) {
481 if ($s eq '') {
482 next;
483 } elsif ($s =~ $special_break) {
484 $result .= $s;
485 next;
486 } elsif ($method eq 'FIXED') {
487 pos($s) = 0;
488 while ($s !~ /\G\z/cg) {
489 if ($s =~ /\G\n/cg) {
490 $result .= $self->config('Newline');
491 } elsif ($s =~ /\G(.+)\n\n/cg) {
492 $result .= $1.$self->config('Newline');
493 } elsif ($s =~ /\G(>.*)\n/cg) {
494 $result .= $1.$self->config('Newline');
495 } elsif ($s =~ /\G(.+)\n(?=>)/cg) {
496 $result .= $1.$self->config('Newline');
497 } elsif ($s =~ /\G(.+?)( *)\n(?=(.+))/cg) {
498 my ($l, $s, $n) = ($1, $2, $3);
499 $result .= $l;
500 if ($n =~ /^ /) {
501 $result .= $self->config('Newline');
502 } elsif (length $s) {
503 $result .= $s;
504 } elsif (length $l) {
505 $result .= ' '
506 if $self->breakingRule($l, $n) == INDIRECT;
507 }
508 } elsif ($s =~ /\G(.+)\n/cg) {
509 $result .= $1.$self->config('Newline');
510 } elsif ($s =~ /\G(.+)/cg) {
511 $result .= $1.$self->config('Newline');
512 last;
513 }
514 }
515 } elsif ($method eq 'FLOWED' or $method eq 'FLOWEDSP' or
516 $method eq 'OBSFLOWED') {
517 my $prefix = undef;
518 pos($s) = 0;
519 while ($s !~ /\G\z/cg) {
520 if ($s =~ /\G(>+) ?(.*?)( ?)\n/cg) {
521 my ($p, $l, $s) = ($1, $2, $3);
522 unless (defined $prefix) {
523 $result .= $p.' '.$l;
524 } elsif ($p ne $prefix) {
525 $result .= $self->config('Newline');
526 $result .= $p.' '.$l;
527 } else {
528 $result .= $l;
529 }
530 unless (length $s) {
531 $result .= $self->config('Newline');
532 $prefix = undef;
533 } else {
534 $prefix = $p;
535 $result .= $s unless $delsp;
536 }
537 } elsif ($s =~ /\G ?(.*?)( ?)\n/cg) {
538 my ($l, $s) = ($1, $2);
539 unless (defined $prefix) {
540 $result .= $l;
541 } elsif ('' ne $prefix) {
542 $result .= $self->config('Newline');
543 $result .= $l;
544 } else {
545 $result .= $l;
546 }
547 unless (length $s) {
548 $result .= $self->config('Newline');
549 $prefix = undef;
550 } else {
551 $result .= $s unless $delsp;
552 $prefix = '';
553 }
554 } elsif ($s =~ /\G ?(.*)/cg) {
555 $result .= $1.$self->config('Newline');
556 last;
557 }
558 }
559 }
560 }
561 ## Encode result.
562 if ($self->{OutputCharset} eq '_UNICODE_') {
563 return $result;
564 } else {
565 return $self->{_charset}->encode($result);
566 }
567}
568
569=head1 BUGS
570
571Please report bugs or buggy behaviors to developer.
572
573CPAN Request Tracker:
574L<http://rt.cpan.org/Public/Dist/Display.html?Name=Unicode-LineBreak>.
575
576=head1 VERSION
577
578Consult $VERSION variable.
579
580=head1 SEE ALSO
581
582L<Unicode::LineBreak>, L<Text::Wrap>.
583
584=head1 AUTHOR
585
586Copyright (C) 2009-2012 Hatuka*nezumi - IKEDA Soji <hatuka(at)nezumi.nu>.
587
588This program is free software; you can redistribute it and/or modify it
589under the same terms as Perl itself.
590
591=cut
592
5931;