← Index
NYTProf Performance Profile   « line view »
For /usr/local/libexec/sympa/task_manager-debug.pl
  Run on Tue Jun 1 22:32:51 2021
Reported on Tue Jun 1 22:35:12 2021

Filename/usr/local/lib/perl5/site_perl/HTML/Element.pm
StatementsExecuted 0 statements in 0s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
0000s0sHTML::Element::::ABORTHTML::Element::ABORT
0000s0sHTML::Element::::BEGIN@10HTML::Element::BEGIN@10
0000s0sHTML::Element::::BEGIN@11HTML::Element::BEGIN@11
0000s0sHTML::Element::::BEGIN@12HTML::Element::BEGIN@12
0000s0sHTML::Element::::BEGIN@13HTML::Element::BEGIN@13
0000s0sHTML::Element::::BEGIN@20HTML::Element::BEGIN@20
0000s0sHTML::Element::::BEGIN@2547HTML::Element::BEGIN@2547
0000s0sHTML::Element::::BEGIN@36HTML::Element::BEGIN@36
0000s0sHTML::Element::::BEGIN@43HTML::Element::BEGIN@43
0000s0sHTML::Element::::BEGIN@5HTML::Element::BEGIN@5
0000s0sHTML::Element::::BEGIN@6HTML::Element::BEGIN@6
0000s0sHTML::Element::::CORE:qrHTML::Element::CORE:qr (opcode)
0000s0sHTML::Element::::CORE:regcompHTML::Element::CORE:regcomp (opcode)
0000s0sHTML::Element::::OKHTML::Element::OK
0000s0sHTML::Element::::PRUNEHTML::Element::PRUNE
0000s0sHTML::Element::::PRUNE_SOFTLYHTML::Element::PRUNE_SOFTLY
0000s0sHTML::Element::::PRUNE_UPHTML::Element::PRUNE_UP
0000s0sHTML::Element::::Use_Weak_RefsHTML::Element::Use_Weak_Refs
0000s0sHTML::Element::::VersionHTML::Element::Version
0000s0sHTML::Element::::__ANON__[:1031]HTML::Element::__ANON__[:1031]
0000s0sHTML::Element::::__ANON__[:1081]HTML::Element::__ANON__[:1081]
0000s0sHTML::Element::::__ANON__[:1181]HTML::Element::__ANON__[:1181]
0000s0sHTML::Element::::__ANON__[:1285]HTML::Element::__ANON__[:1285]
0000s0sHTML::Element::::__ANON__[:1859]HTML::Element::__ANON__[:1859]
0000s0sHTML::Element::::__ANON__[:1874]HTML::Element::__ANON__[:1874]
0000s0sHTML::Element::::__ANON__[:1937]HTML::Element::__ANON__[:1937]
0000s0sHTML::Element::::__ANON__[:2205]HTML::Element::__ANON__[:2205]
0000s0sHTML::Element::::__ANON__[:2248]HTML::Element::__ANON__[:2248]
0000s0sHTML::Element::::__ANON__[:2438]HTML::Element::__ANON__[:2438]
0000s0sHTML::Element::::__ANON__[:37]HTML::Element::__ANON__[:37]
0000s0sHTML::Element::::_asserts_failHTML::Element::_asserts_fail
0000s0sHTML::Element::::_empty_element_mapHTML::Element::_empty_element_map
0000s0sHTML::Element::::_fold_case_LCHTML::Element::_fold_case_LC
0000s0sHTML::Element::::_fold_case_NOTHTML::Element::_fold_case_NOT
0000s0sHTML::Element::::_gensymHTML::Element::_gensym
0000s0sHTML::Element::::_i2lHTML::Element::_i2l
0000s0sHTML::Element::::_int2LATINHTML::Element::_int2LATIN
0000s0sHTML::Element::::_int2ROMANHTML::Element::_int2ROMAN
0000s0sHTML::Element::::_int2intHTML::Element::_int2int
0000s0sHTML::Element::::_int2latinHTML::Element::_int2latin
0000s0sHTML::Element::::_int2romanHTML::Element::_int2roman
0000s0sHTML::Element::::_valid_nameHTML::Element::_valid_name
0000s0sHTML::Element::::_xml_escapeHTML::Element::_xml_escape
0000s0sHTML::Element::::addressHTML::Element::address
0000s0sHTML::Element::::all_attrHTML::Element::all_attr
0000s0sHTML::Element::::all_attr_namesHTML::Element::all_attr_names
0000s0sHTML::Element::::all_external_attrHTML::Element::all_external_attr
0000s0sHTML::Element::::all_external_attr_namesHTML::Element::all_external_attr_names
0000s0sHTML::Element::::as_HTMLHTML::Element::as_HTML
0000s0sHTML::Element::::as_Lisp_formHTML::Element::as_Lisp_form
0000s0sHTML::Element::::as_XMLHTML::Element::as_XML
0000s0sHTML::Element::::as_textHTML::Element::as_text
0000s0sHTML::Element::::as_text_trimmedHTML::Element::as_text_trimmed
0000s0sHTML::Element::::as_trimmed_textHTML::Element::as_trimmed_text
0000s0sHTML::Element::::attrHTML::Element::attr
0000s0sHTML::Element::::attr_get_iHTML::Element::attr_get_i
0000s0sHTML::Element::::cloneHTML::Element::clone
0000s0sHTML::Element::::clone_listHTML::Element::clone_list
0000s0sHTML::Element::::contentHTML::Element::content
0000s0sHTML::Element::::content_array_refHTML::Element::content_array_ref
0000s0sHTML::Element::::content_listHTML::Element::content_list
0000s0sHTML::Element::::content_refs_listHTML::Element::content_refs_list
0000s0sHTML::Element::::deleteHTML::Element::delete
0000s0sHTML::Element::::delete_contentHTML::Element::delete_content
0000s0sHTML::Element::::delete_ignorable_whitespaceHTML::Element::delete_ignorable_whitespace
0000s0sHTML::Element::::deobjectify_textHTML::Element::deobjectify_text
0000s0sHTML::Element::::depthHTML::Element::depth
0000s0sHTML::Element::::descendantsHTML::Element::descendants
0000s0sHTML::Element::::descendentsHTML::Element::descendents
0000s0sHTML::Element::::destroyHTML::Element::destroy
0000s0sHTML::Element::::destroy_contentHTML::Element::destroy_content
0000s0sHTML::Element::::detachHTML::Element::detach
0000s0sHTML::Element::::detach_contentHTML::Element::detach_content
0000s0sHTML::Element::::dumpHTML::Element::dump
0000s0sHTML::Element::::element_classHTML::Element::element_class
0000s0sHTML::Element::::endtagHTML::Element::endtag
0000s0sHTML::Element::::endtag_XMLHTML::Element::endtag_XML
0000s0sHTML::Element::::extract_linksHTML::Element::extract_links
0000s0sHTML::Element::::findHTML::Element::find
0000s0sHTML::Element::::find_by_attributeHTML::Element::find_by_attribute
0000s0sHTML::Element::::find_by_tag_nameHTML::Element::find_by_tag_name
0000s0sHTML::Element::::formatHTML::Element::format
0000s0sHTML::Element::::has_insane_linkageHTML::Element::has_insane_linkage
0000s0sHTML::Element::::idHTML::Element::id
0000s0sHTML::Element::::idfHTML::Element::idf
0000s0sHTML::Element::::implicitHTML::Element::implicit
0000s0sHTML::Element::::importHTML::Element::import
0000s0sHTML::Element::::insert_elementHTML::Element::insert_element
0000s0sHTML::Element::::is_emptyHTML::Element::is_empty
0000s0sHTML::Element::::is_insideHTML::Element::is_inside
0000s0sHTML::Element::::leftHTML::Element::left
0000s0sHTML::Element::::lineageHTML::Element::lineage
0000s0sHTML::Element::::lineage_tag_namesHTML::Element::lineage_tag_names
0000s0sHTML::Element::::look_downHTML::Element::look_down
0000s0sHTML::Element::::look_upHTML::Element::look_up
0000s0sHTML::Element::::newHTML::Element::new
0000s0sHTML::Element::::new_from_lolHTML::Element::new_from_lol
0000s0sHTML::Element::::normalize_contentHTML::Element::normalize_content
0000s0sHTML::Element::::number_listsHTML::Element::number_lists
0000s0sHTML::Element::::objectify_textHTML::Element::objectify_text
0000s0sHTML::Element::::parentHTML::Element::parent
0000s0sHTML::Element::::pindexHTML::Element::pindex
0000s0sHTML::Element::::posHTML::Element::pos
0000s0sHTML::Element::::postinsertHTML::Element::postinsert
0000s0sHTML::Element::::preinsertHTML::Element::preinsert
0000s0sHTML::Element::::push_contentHTML::Element::push_content
0000s0sHTML::Element::::replace_withHTML::Element::replace_with
0000s0sHTML::Element::::replace_with_contentHTML::Element::replace_with_content
0000s0sHTML::Element::::rightHTML::Element::right
0000s0sHTML::Element::::rootHTML::Element::root
0000s0sHTML::Element::::same_asHTML::Element::same_as
0000s0sHTML::Element::::simplify_presHTML::Element::simplify_pres
0000s0sHTML::Element::::splice_contentHTML::Element::splice_content
0000s0sHTML::Element::::starttagHTML::Element::starttag
0000s0sHTML::Element::::starttag_XMLHTML::Element::starttag_XML
0000s0sHTML::Element::::tagHTML::Element::tag
0000s0sHTML::Element::::tagname_mapHTML::Element::tagname_map
0000s0sHTML::Element::::traverseHTML::Element::traverse
0000s0sHTML::Element::::unshift_contentHTML::Element::unshift_content
0000s0smain::::__ANON__[:2742] main::__ANON__[:2742]
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package HTML::Element;
2
3# ABSTRACT: Class for objects that represent HTML elements
4
5use strict;
6use warnings;
7
8our $VERSION = '5.07'; # VERSION from OurPkgVersion
9
10use Carp ();
11use HTML::Entities ();
12use HTML::Tagset ();
13use integer; # vroom vroom!
14
15# This controls encoding entities on output.
16# When set entities won't be re-encoded.
17# Defaulting off because parser defaults to unencoding entities
18our $encoded_content = 0;
19
20use vars qw($html_uc $Debug $ID_COUNTER $VERSION %list_type_to_sub);
21
22# Set up support for weak references, if possible:
23my $using_weaken;
24
25#=head1 CLASS METHODS
26
27
28sub Use_Weak_Refs {
29 my $self_or_class = shift;
30
31 if (@_) { # set
32 $using_weaken = !! shift; # Normalize boolean value
33 Carp::croak("The installed Scalar::Util lacks support for weak references")
34 if $using_weaken and not defined &Scalar::Util::weaken;
35
36 no warnings 'redefine';
37 *_weaken = $using_weaken ? \&Scalar::Util::weaken : sub ($) {};
38 } # end if setting value
39
40 return $using_weaken;
41} # end Use_Weak_Refs
42
43BEGIN {
44 # Attempt to import weaken from Scalar::Util, but don't complain
45 # if we can't. Also, rename it to _weaken.
46 require Scalar::Util;
47
48 __PACKAGE__->Use_Weak_Refs(defined &Scalar::Util::weaken);
49}
50
51sub import {
52 my $class = shift;
53
54 for (@_) {
55 if (/^-(no_?)?weak$/) {
56 $class->Use_Weak_Refs(not $1);
57 } else {
58 Carp::croak("$_ is not exported by the $class module");
59 }
60 }
61} # end import
62
63
64$Debug = 0 unless defined $Debug;
65
66#=head1 SUBROUTINES
67
68
69sub Version {
70 Carp::carp("Deprecated subroutine HTML::Element::Version called");
71 $VERSION;
72}
73
74my $nillio = [];
75
76*HTML::Element::emptyElement = \%HTML::Tagset::emptyElement; # legacy
77*HTML::Element::optionalEndTag = \%HTML::Tagset::optionalEndTag; # legacy
78*HTML::Element::linkElements = \%HTML::Tagset::linkElements; # legacy
79*HTML::Element::boolean_attr = \%HTML::Tagset::boolean_attr; # legacy
80*HTML::Element::canTighten = \%HTML::Tagset::canTighten; # legacy
81
82# Constants for signalling back to the traverser:
83my $travsignal_package = __PACKAGE__ . '::_travsignal';
84my ( $ABORT, $PRUNE, $PRUNE_SOFTLY, $OK, $PRUNE_UP )
85 = map { my $x = $_; bless \$x, $travsignal_package; }
86 qw(
87 ABORT PRUNE PRUNE_SOFTLY OK PRUNE_UP
88);
89
90
91## Comments from Father Chrysostomos RT #58880
92## The sole purpose for empty parentheses after a sub name is to make it
93## parse as a 0-ary (nihilary?) function. I.e., ABORT+1 should parse as
94## ABORT()+1, not ABORT(+1). The parentheses also tell perl that it can
95### be inlined.
96##Deparse is really useful for demonstrating this:
97##$ perl -MO=Deparse,-p -e 'sub ABORT {7} print ABORT+8'
98# Vs
99# perl -MO=Deparse,-p -e 'sub ABORT() {7} print ABORT+8'
100#
101# With the parentheses, it not only makes it parse as a term.
102# It even resolves the constant at compile-time, making the code run faster.
103
104## no critic
105sub ABORT () {$ABORT}
106sub PRUNE () {$PRUNE}
107sub PRUNE_SOFTLY () {$PRUNE_SOFTLY}
108sub OK () {$OK}
109sub PRUNE_UP () {$PRUNE_UP}
110## use critic
111
112$html_uc = 0;
113
114# set to 1 if you want tag and attribute names from starttag and endtag
115# to be uc'd
116
117# regexs for XML names
118# http://www.w3.org/TR/2006/REC-xml11-20060816/NT-NameStartChar
119my $START_CHAR
120 = qr/(?:\:|[A-Z]|_|[a-z]|[\x{C0}-\x{D6}]|[\x{D8}-\x{F6}]|[\x{F8}-\x{2FF}]|[\x{370}-\x{37D}]|[\x{37F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])/;
121
122# http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameChar
123my $NAME_CHAR
124 = qr/(?:$START_CHAR|-|\.|[0-9]|\x{B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])/;
125
126# Elements that does not have corresponding end tags (i.e. are empty)
127
128#==========================================================================
129
130#=head1 BASIC METHODS
131
132
133#
134# An HTML::Element is represented by blessed hash reference, much like
135# Tree::DAG_Node objects. Key-names not starting with '_' are reserved
136# for the SGML attributes of the element.
137# The following special keys are used:
138#
139# '_tag': The tag name (i.e., the generic identifier)
140# '_parent': A reference to the HTML::Element above (when forming a tree)
141# '_pos': The current position (a reference to a HTML::Element) is
142# where inserts will be placed (look at the insert_element
143# method) If not set, the implicit value is the object itself.
144# '_content': A ref to an array of nodes under this.
145# It might not be set.
146#
147# Example: <img src="gisle.jpg" alt="Gisle's photo"> is represented like this:
148#
149# bless {
150# _tag => 'img',
151# src => 'gisle.jpg',
152# alt => "Gisle's photo",
153# }, 'HTML::Element';
154#
155
156sub new {
157 my $class = shift;
158 $class = ref($class) || $class;
159
160 my $tag = shift;
161 Carp::croak("No tagname") unless defined $tag and length $tag;
162 Carp::croak "\"$tag\" isn't a good tag name!"
163 if $tag =~ m/[<>\/\x00-\x20]/; # minimal sanity, certainly!
164 my $self = bless { _tag => scalar( $class->_fold_case($tag) ) }, $class;
165 my ( $attr, $val );
166 while ( ( $attr, $val ) = splice( @_, 0, 2 ) ) {
167## RT #42209 why does this default to the attribute name and not remain unset or the empty string?
168 $val = $attr unless defined $val;
169 $self->{ $class->_fold_case($attr) } = $val;
170 }
171 if ( $tag eq 'html' ) {
172 $self->{'_pos'} = undef;
173 }
174 _weaken($self->{'_parent'}) if $self->{'_parent'};
175 return $self;
176}
177
178
179sub attr {
180 my $self = shift;
181 my $attr = scalar( $self->_fold_case(shift) );
182 if (@_) { # set
183 if ( defined $_[0] ) {
184 my $old = $self->{$attr};
185 $self->{$attr} = $_[0];
186 return $old;
187 }
188 else { # delete, actually
189 return delete $self->{$attr};
190 }
191 }
192 else { # get
193 return $self->{$attr};
194 }
195}
196
197
198sub tag {
199 my $self = shift;
200 if (@_) { # set
201 $self->{'_tag'} = $self->_fold_case( $_[0] );
202 }
203 else { # get
204 $self->{'_tag'};
205 }
206}
207
208
209sub parent {
210 my $self = shift;
211 if (@_) { # set
212 Carp::croak "an element can't be made its own parent"
213 if defined $_[0] and ref $_[0] and $self eq $_[0]; # sanity
214 _weaken($self->{'_parent'} = $_[0]);
215 }
216 else {
217 $self->{'_parent'}; # get
218 }
219}
220
221
222sub content_list {
223 return wantarray
224 ? @{ shift->{'_content'} || return () }
225 : scalar @{ shift->{'_content'} || return 0 };
226}
227
228
229# a read-only method! can't say $h->content( [] )!
230sub content {
231 return shift->{'_content'};
232}
233
234
235sub content_array_ref {
236 return shift->{'_content'} ||= [];
237}
238
239
240sub content_refs_list {
241 return \( @{ shift->{'_content'} || return () } );
242}
243
244
245sub implicit {
246 return shift->attr( '_implicit', @_ );
247}
248
249
250sub pos {
251 my $self = shift;
252 my $pos = $self->{'_pos'};
253 if (@_) { # set
254 my $parm = shift;
255 if ( defined $parm and $parm ne $self ) {
256 $self->{'_pos'} = $parm; # means that element
257 }
258 else {
259 $self->{'_pos'} = undef; # means $self
260 }
261 }
262 return $pos if defined($pos);
263 return $self;
264}
265
266
267sub all_attr {
268 return %{ $_[0] };
269
270 # Yes, trivial. But no other way for the user to do the same
271 # without breaking encapsulation.
272 # And if our object representation changes, this method's behavior
273 # should stay the same.
274}
275
276sub all_attr_names {
277 return keys %{ $_[0] };
278}
279
280
281sub all_external_attr {
282 my $self = $_[0];
283 return map( ( length($_) && substr( $_, 0, 1 ) eq '_' )
284 ? ()
285 : ( $_, $self->{$_} ),
286 keys %$self );
287}
288
289sub all_external_attr_names {
290 return grep !( length($_) && substr( $_, 0, 1 ) eq '_' ), keys %{ $_[0] };
291}
292
293
294sub id {
295 if ( @_ == 1 ) {
296 return $_[0]{'id'};
297 }
298 elsif ( @_ == 2 ) {
299 if ( defined $_[1] ) {
300 return $_[0]{'id'} = $_[1];
301 }
302 else {
303 return delete $_[0]{'id'};
304 }
305 }
306 else {
307 Carp::croak '$node->id can\'t take ' . scalar(@_) . ' parameters!';
308 }
309}
310
311
312sub _gensym {
313 unless ( defined $ID_COUNTER ) {
314
315 # start it out...
316 $ID_COUNTER = sprintf( '%04x', rand(0x1000) );
317 $ID_COUNTER =~ tr<0-9a-f><J-NP-Z>; # yes, skip letter "oh"
318 $ID_COUNTER .= '00000';
319 }
320 ++$ID_COUNTER;
321}
322
323sub idf {
324 my $nparms = scalar @_;
325
326 if ( $nparms == 1 ) {
327 my $x;
328 if ( defined( $x = $_[0]{'id'} ) and length $x ) {
329 return $x;
330 }
331 else {
332 return $_[0]{'id'} = _gensym();
333 }
334 }
335 if ( $nparms == 2 ) {
336 if ( defined $_[1] ) {
337 return $_[0]{'id'} = $_[1];
338 }
339 else {
340 return delete $_[0]{'id'};
341 }
342 }
343 Carp::croak '$node->idf can\'t take ' . scalar(@_) . ' parameters!';
344}
345
346
347sub push_content {
348 my $self = shift;
349 return $self unless @_;
350
351 my $content = ( $self->{'_content'} ||= [] );
352 for (@_) {
353 if ( ref($_) eq 'ARRAY' ) {
354
355 # magically call new_from_lol
356 push @$content, $self->new_from_lol($_);
357 _weaken($content->[-1]->{'_parent'} = $self);
358 }
359 elsif ( ref($_) ) { # insert an element
360 $_->detach if $_->{'_parent'};
361 _weaken($_->{'_parent'} = $self);
362 push( @$content, $_ );
363 }
364 else { # insert text segment
365 if ( @$content && !ref $content->[-1] ) {
366
367 # last content element is also text segment -- append
368 $content->[-1] .= $_;
369 }
370 else {
371 push( @$content, $_ );
372 }
373 }
374 }
375 return $self;
376}
377
378
379sub unshift_content {
380 my $self = shift;
381 return $self unless @_;
382
383 my $content = ( $self->{'_content'} ||= [] );
384 for ( reverse @_ ) { # so they get added in the order specified
385 if ( ref($_) eq 'ARRAY' ) {
386
387 # magically call new_from_lol
388 unshift @$content, $self->new_from_lol($_);
389 _weaken($content->[0]->{'_parent'} = $self);
390 }
391 elsif ( ref $_ ) { # insert an element
392 $_->detach if $_->{'_parent'};
393 _weaken($_->{'_parent'} = $self);
394 unshift( @$content, $_ );
395 }
396 else { # insert text segment
397 if ( @$content && !ref $content->[0] ) {
398
399 # last content element is also text segment -- prepend
400 $content->[0] = $_ . $content->[0];
401 }
402 else {
403 unshift( @$content, $_ );
404 }
405 }
406 }
407 return $self;
408}
409
410# Cf. splice ARRAY,OFFSET,LENGTH,LIST
411
412
413sub splice_content {
414 my ( $self, $offset, $length, @to_add ) = @_;
415 Carp::croak "splice_content requires at least one argument"
416 if @_ < 2; # at least $h->splice_content($offset);
417
418 my $content = ( $self->{'_content'} ||= [] );
419
420 # prep the list
421
422 my @out;
423 if ( @_ > 2 ) { # self, offset, length, ...
424 foreach my $n (@to_add) {
425 if ( ref($n) eq 'ARRAY' ) {
426 $n = $self->new_from_lol($n);
427 _weaken($n->{'_parent'} = $self);
428 }
429 elsif ( ref($n) ) {
430 $n->detach;
431 _weaken($n->{'_parent'} = $self);
432 }
433 }
434 @out = splice @$content, $offset, $length, @to_add;
435 }
436 else { # self, offset
437 @out = splice @$content, $offset;
438 }
439 foreach my $n (@out) {
440 $n->{'_parent'} = undef if ref $n;
441 }
442 return @out;
443}
444
445
446sub detach {
447 my $self = $_[0];
448 return undef unless ( my $parent = $self->{'_parent'} );
449 $self->{'_parent'} = undef;
450 my $cohort = $parent->{'_content'} || return $parent;
451 @$cohort = grep { not( ref($_) and $_ eq $self ) } @$cohort;
452
453 # filter $self out, if parent has any evident content
454
455 return $parent;
456}
457
458
459sub detach_content {
460 my $c = $_[0]->{'_content'} || return (); # in case of no content
461 for (@$c) {
462 $_->{'_parent'} = undef if ref $_;
463 }
464 return splice @$c;
465}
466
467
468sub replace_with {
469 my ( $self, @replacers ) = @_;
470 Carp::croak "the target node has no parent"
471 unless my ($parent) = $self->{'_parent'};
472
473 my $parent_content = $parent->{'_content'};
474 Carp::croak "the target node's parent has no content!?"
475 unless $parent_content and @$parent_content;
476
477 my $replacers_contains_self;
478 for (@replacers) {
479 if ( !ref $_ ) {
480
481 # noop
482 }
483 elsif ( $_ eq $self ) {
484
485 # noop, but check that it's there just once.
486 Carp::croak "Replacement list contains several copies of target!"
487 if $replacers_contains_self++;
488 }
489 elsif ( $_ eq $parent ) {
490 Carp::croak "Can't replace an item with its parent!";
491 }
492 elsif ( ref($_) eq 'ARRAY' ) {
493 $_ = $self->new_from_lol($_);
494 _weaken($_->{'_parent'} = $parent);
495 }
496 else {
497 $_->detach;
498 _weaken($_->{'_parent'} = $parent);
499
500 # each of these are necessary
501 }
502 } # for @replacers
503 @$parent_content = map { ( ref($_) and $_ eq $self ) ? @replacers : $_ }
504 @$parent_content;
505
506 $self->{'_parent'} = undef unless $replacers_contains_self;
507
508 # if replacers does contain self, then the parent attribute is fine as-is
509
510 return $self;
511}
512
513
514sub preinsert {
515 my $self = shift;
516 return $self unless @_;
517 return $self->replace_with( @_, $self );
518}
519
520
521sub postinsert {
522 my $self = shift;
523 return $self unless @_;
524 return $self->replace_with( $self, @_ );
525}
526
527
528sub replace_with_content {
529 my $self = $_[0];
530 Carp::croak "the target node has no parent"
531 unless my ($parent) = $self->{'_parent'};
532
533 my $parent_content = $parent->{'_content'};
534 Carp::croak "the target node's parent has no content!?"
535 unless $parent_content and @$parent_content;
536
537 my $content_r = $self->{'_content'} || [];
538 @$parent_content = map { ( ref($_) and $_ eq $self ) ? @$content_r : $_ }
539 @$parent_content;
540
541 $self->{'_parent'} = undef; # detach $self from its parent
542
543 # Update parentage link, removing from $self's content list
544 for ( splice @$content_r ) { _weaken($_->{'_parent'} = $parent) if ref $_ }
545
546 return $self; # note: doesn't destroy it.
547}
548
549
550sub delete_content {
551 for (
552 splice @{
553 delete( $_[0]->{'_content'} )
554
555 # Deleting it here (while holding its value, for the moment)
556 # will keep calls to detach() from trying to uselessly filter
557 # the list (as they won't be able to see it once it's been
558 # deleted)
559 || return ( $_[0] ) # in case of no content
560 },
561 0
562
563 # the splice is so we can null the array too, just in case
564 # something somewhere holds a ref to it
565 )
566 {
567 $_->delete if ref $_;
568 }
569 $_[0];
570}
571
572
573# two handy aliases
574sub destroy { shift->delete(@_) }
575sub destroy_content { shift->delete_content(@_) }
576
577sub delete {
578 my $self = $_[0];
579 $self->delete_content # recurse down
580 if $self->{'_content'} && @{ $self->{'_content'} };
581
582 $self->detach if $self->{'_parent'} and $self->{'_parent'}{'_content'};
583
584 # not the typical case
585
586 %$self = (); # null out the whole object on the way out
587 return;
588}
589
590
591sub clone {
592
593 #print "Cloning $_[0]\n";
594 my $it = shift;
595 Carp::croak "clone() can be called only as an object method"
596 unless ref $it;
597 Carp::croak "clone() takes no arguments" if @_;
598
599 my $new = bless {%$it}, ref($it); # COPY!!! HOOBOY!
600 delete @$new{ '_content', '_parent', '_pos', '_head', '_body' };
601
602 # clone any contents
603 if ( $it->{'_content'} and @{ $it->{'_content'} } ) {
604 $new->{'_content'}
605 = [ ref($it)->clone_list( @{ $it->{'_content'} } ) ];
606 for ( @{ $new->{'_content'} } ) {
607 _weaken($_->{'_parent'} = $new) if ref $_;
608 }
609 }
610
611 return $new;
612}
613
614
615sub clone_list {
616 Carp::croak "clone_list can be called only as a class method"
617 if ref shift @_;
618
619 # all that does is get me here
620 return map {
621 ref($_)
622 ? $_->clone # copy by method
623 : $_ # copy by evaluation
624 } @_;
625}
626
627
628sub normalize_content {
629 my $start = $_[0];
630 my $c;
631 return
632 unless $c = $start->{'_content'} and ref $c and @$c; # nothing to do
633 # TODO: if we start having text elements, deal with catenating those too?
634 my @stretches = (undef); # start with a barrier
635
636 # I suppose this could be rewritten to treat stretches as it goes, instead
637 # of at the end. But feh.
638
639 # Scan:
640 for ( my $i = 0; $i < @$c; ++$i ) {
641 if ( defined $c->[$i] and ref $c->[$i] ) { # not a text segment
642 if ( $stretches[0] ) {
643
644 # put in a barrier
645 if ( $stretches[0][1] == 1 ) {
646
647 #print "Nixing stretch at ", $i-1, "\n";
648 undef $stretches[0]; # nix the previous one-node "stretch"
649 }
650 else {
651
652 #print "End of stretch at ", $i-1, "\n";
653 unshift @stretches, undef;
654 }
655 }
656
657 # else no need for a barrier
658 }
659 else { # text segment
660 $c->[$i] = '' unless defined $c->[$i];
661 if ( $stretches[0] ) {
662 ++$stretches[0][1]; # increase length
663 }
664 else {
665
666 #print "New stretch at $i\n";
667 unshift @stretches, [ $i, 1 ]; # start and length
668 }
669 }
670 }
671
672 # Now combine. Note that @stretches is in reverse order, so the indexes
673 # still make sense as we work our way thru (i.e., backwards thru $c).
674 foreach my $s (@stretches) {
675 if ( $s and $s->[1] > 1 ) {
676
677 #print "Stretch at ", $s->[0], " for ", $s->[1], "\n";
678 $c->[ $s->[0] ]
679 .= join( '', splice( @$c, $s->[0] + 1, $s->[1] - 1 ) )
680
681 # append the subsequent ones onto the first one.
682 }
683 }
684 return;
685}
686
687
688sub delete_ignorable_whitespace {
689
690 # This doesn't delete all sorts of whitespace that won't actually
691 # be used in rendering, tho -- that's up to the rendering application.
692 # For example:
693 # <input type='text' name='foo'>
694 # [some whitespace]
695 # <input type='text' name='bar'>
696 # The WS between the two elements /will/ get used by the renderer.
697 # But here:
698 # <input type='hidden' name='foo' value='1'>
699 # [some whitespace]
700 # <input type='text' name='bar' value='2'>
701 # the WS between them won't be rendered in any way, presumably.
702
703 #my $Debug = 4;
704 die "delete_ignorable_whitespace can be called only as an object method"
705 unless ref $_[0];
706
707 print "About to tighten up...\n" if $Debug > 2;
708 my (@to_do) = ( $_[0] ); # Start off.
709 my ( $i, $sibs, $ptag, $this ); # scratch for the loop...
710 while (@to_do) {
711 if ( ( $ptag = ( $this = shift @to_do )->{'_tag'} ) eq 'pre'
712 or $ptag eq 'textarea'
713 or $HTML::Tagset::isCDATA_Parent{$ptag} )
714 {
715
716 # block the traversal under those
717 print "Blocking traversal under $ptag\n" if $Debug;
718 next;
719 }
720 next unless ( $sibs = $this->{'_content'} and @$sibs );
721 for ( $i = $#$sibs; $i >= 0; --$i ) { # work backwards thru the list
722 if ( ref $sibs->[$i] ) {
723 unshift @to_do, $sibs->[$i];
724
725 # yes, this happens in pre order -- we're going backwards
726 # thru this sibling list. I doubt it actually matters, tho.
727 next;
728 }
729 next if $sibs->[$i] =~ m<[^\n\r\f\t ]>s; # it's /all/ whitespace
730
731 print "Under $ptag whose canTighten ",
732 "value is ", 0 + $HTML::Element::canTighten{$ptag}, ".\n"
733 if $Debug > 3;
734
735 # It's all whitespace...
736
737 if ( $i == 0 ) {
738 if ( @$sibs == 1 ) { # I'm an only child
739 next unless $HTML::Element::canTighten{$ptag}; # parent
740 }
741 else { # I'm leftmost of many
742 # if either my parent or sib are eligible, I'm good.
743 next
744 unless $HTML::Element::canTighten{$ptag} # parent
745 or (ref $sibs->[1]
746 and $HTML::Element::canTighten{ $sibs->[1]
747 {'_tag'} } # right sib
748 );
749 }
750 }
751 elsif ( $i == $#$sibs ) { # I'm rightmost of many
752 # if either my parent or sib are eligible, I'm good.
753 next
754 unless $HTML::Element::canTighten{$ptag} # parent
755 or (ref $sibs->[ $i - 1 ]
756 and $HTML::Element::canTighten{ $sibs->[ $i - 1 ]
757 {'_tag'} } # left sib
758 );
759 }
760 else { # I'm the piggy in the middle
761 # My parent doesn't matter -- it all depends on my sibs
762 next
763 unless ref $sibs->[ $i - 1 ]
764 or ref $sibs->[ $i + 1 ];
765
766 # if NEITHER sib is a node, quit
767
768 next if
769 ref $sibs->[ $i - 1 ]
770
- -
773 and ref $sibs->[ $i + 1 ]
774 and !$HTML::Element::canTighten{ $sibs->[ $i - 1 ]
775 {'_tag'} } # left sib
776 and !$HTML::Element::canTighten{ $sibs->[ $i + 1 ]
777 {'_tag'} } # right sib
778 ;
779 }
780
781 # Unknown tags aren't in canTighten and so AREN'T subject to tightening
782
783 print " delendum: child $i of $ptag\n" if $Debug > 3;
784 splice @$sibs, $i, 1;
785 }
786
787 # end of the loop-over-children
788 }
789
790 # end of the while loop.
791
792 return;
793}
794
795
796sub insert_element {
797 my ( $self, $tag, $implicit ) = @_;
798 return $self->pos() unless $tag; # noop if nothing to insert
799
800 my $e;
801 if ( ref $tag ) {
802 $e = $tag;
803 $tag = $e->tag;
804 }
805 else { # just a tag name -- so make the element
806 $e = $self->element_class->new($tag);
807 ++( $self->{'_element_count'} ) if exists $self->{'_element_count'};
808
809 # undocumented. see TreeBuilder.
810 }
811
812 $e->{'_implicit'} = 1 if $implicit;
813
814 my $pos = $self->{'_pos'};
815 $pos = $self unless defined $pos;
816
817 $pos->push_content($e);
818
819 $self->{'_pos'} = $pos = $e
820 unless $self->_empty_element_map->{$tag} || $e->{'_empty_element'};
821
822 $pos;
823}
824
825#==========================================================================
826# Some things to override in XML::Element
827
828sub _empty_element_map {
829 \%HTML::Element::emptyElement;
830}
831
832sub _fold_case_LC {
833 if (wantarray) {
834 shift;
835 map lc($_), @_;
836 }
837 else {
838 return lc( $_[1] );
839 }
840}
841
842sub _fold_case_NOT {
843 if (wantarray) {
844 shift;
845 @_;
846 }
847 else {
848 return $_[1];
849 }
850}
851
852*_fold_case = \&_fold_case_LC;
853
854#==========================================================================
855
856#=head1 DUMPING METHODS
857
858
859sub dump {
860 my ( $self, $fh, $depth ) = @_;
861 $fh = *STDOUT{IO} unless defined $fh;
862 $depth = 0 unless defined $depth;
863 print $fh " " x $depth, $self->starttag, " \@", $self->address,
864 $self->{'_implicit'} ? " (IMPLICIT)\n" : "\n";
865 for ( @{ $self->{'_content'} } ) {
866 if ( ref $_ ) { # element
867 $_->dump( $fh, $depth + 1 ); # recurse
868 }
869 else { # text node
870 print $fh " " x ( $depth + 1 );
871 if ( length($_) > 65 or m<[\x00-\x1F]> ) {
872
873 # it needs prettyin' up somehow or other
874 my $x
875 = ( length($_) <= 65 )
876 ? $_
877 : ( substr( $_, 0, 65 ) . '...' );
878 $x =~ s<([\x00-\x1F])>
879 <'\\x'.(unpack("H2",$1))>eg;
880 print $fh qq{"$x"\n};
881 }
882 else {
883 print $fh qq{"$_"\n};
884 }
885 }
886 }
887}
888
889
890sub as_HTML {
891 my ( $self, $entities, $indent, $omissible_map ) = @_;
892
893 #my $indent_on = defined($indent) && length($indent);
894 my @html = ();
895
896 $omissible_map ||= \%HTML::Element::optionalEndTag;
897 my $empty_element_map = $self->_empty_element_map;
898
899 my $last_tag_tightenable = 0;
900 my $this_tag_tightenable = 0;
901 my $nonindentable_ancestors = 0; # count of nonindentible tags over us.
902
903 my ( $tag, $node, $start, $depth ); # per-iteration scratch
904
905 if ( defined($indent) && length($indent) ) {
906 $self->traverse(
907 sub {
908 ( $node, $start, $depth ) = @_;
909 if ( ref $node ) { # it's an element
910
911 # detect bogus classes. RT #35948, #61673
912 $node->can('starttag')
913 or Carp::confess( "Object of class "
914 . ref($node)
915 . " cannot be processed by HTML::Element" );
916
917 $tag = $node->{'_tag'};
918
919 if ($start) { # on the way in
920 if (( $this_tag_tightenable
921 = $HTML::Element::canTighten{$tag}
922 )
923 and !$nonindentable_ancestors
924 and $last_tag_tightenable
925 )
926 {
927 push
928 @html,
929 "\n",
930 $indent x $depth,
931 $node->starttag($entities),
932 ;
933 }
934 else {
935 push( @html, $node->starttag($entities) );
936 }
937 $last_tag_tightenable = $this_tag_tightenable;
938
939 ++$nonindentable_ancestors
940 if $tag eq 'pre' or $tag eq 'textarea'
941 or $HTML::Tagset::isCDATA_Parent{$tag};
942
943 }
944 elsif (
945 not( $empty_element_map->{$tag}
946 or $omissible_map->{$tag} )
947 )
948 {
949
950 # on the way out
951 if ( $tag eq 'pre' or $tag eq 'textarea'
952 or $HTML::Tagset::isCDATA_Parent{$tag} )
953 {
954 --$nonindentable_ancestors;
955 $last_tag_tightenable
956 = $HTML::Element::canTighten{$tag};
957 push @html, $node->endtag;
958
959 }
960 else { # general case
961 if (( $this_tag_tightenable
962 = $HTML::Element::canTighten{$tag}
963 )
964 and !$nonindentable_ancestors
965 and $last_tag_tightenable
966 )
967 {
968 push
969 @html,
970 "\n",
971 $indent x $depth,
972 $node->endtag,
973 ;
974 }
975 else {
976 push @html, $node->endtag;
977 }
978 $last_tag_tightenable = $this_tag_tightenable;
979
980 #print "$tag tightenable: $this_tag_tightenable\n";
981 }
982 }
983 }
984 else { # it's a text segment
985
986 $last_tag_tightenable = 0; # I guess this is right
987 HTML::Entities::encode_entities( $node, $entities )
988
989 # That does magic things if $entities is undef.
990 unless (
991 ( defined($entities) && !length($entities) )
992
993 # If there's no entity to encode, don't call it
994 || $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }
995
996 # To keep from amp-escaping children of script et al.
997 # That doesn't deal with descendants; but then, CDATA
998 # parents shouldn't /have/ descendants other than a
999 # text children (or comments?)
1000 || $encoded_content
1001 );
1002 if ($nonindentable_ancestors) {
1003 push @html, $node; # say no go
1004 }
1005 else {
1006 if ($last_tag_tightenable) {
1007 $node =~ s<[\n\r\f\t ]+>< >s;
1008
1009 #$node =~ s< $><>s;
1010 $node =~ s<^ ><>s;
1011 push
1012 @html,
1013 "\n",
1014 $indent x $depth,
1015 $node,
1016
1017 #Text::Wrap::wrap($indent x $depth, $indent x $depth, "\n" . $node)
1018 ;
1019 }
1020 else {
1021 push
1022 @html,
1023 $node,
1024
1025 #Text::Wrap::wrap('', $indent x $depth, $node)
1026 ;
1027 }
1028 }
1029 }
1030 1; # keep traversing
1031 }
1032 ); # End of parms to traverse()
1033 }
1034 else { # no indenting -- much simpler code
1035 $self->traverse(
1036 sub {
1037 ( $node, $start ) = @_;
1038 if ( ref $node ) {
1039
1040 # detect bogus classes. RT #35948
1041 $node->isa( $self->element_class )
1042 or Carp::confess( "Object of class "
1043 . ref($node)
1044 . " cannot be processed by HTML::Element" );
1045
1046 $tag = $node->{'_tag'};
1047 if ($start) { # on the way in
1048 push( @html, $node->starttag($entities) );
1049 }
1050 elsif (
1051 not( $empty_element_map->{$tag}
1052 or $omissible_map->{$tag} )
1053 )
1054 {
1055
1056 # on the way out
1057 push( @html, $node->endtag );
1058 }
1059 }
1060 else {
1061
1062 # simple text content
1063 HTML::Entities::encode_entities( $node, $entities )
1064
1065 # That does magic things if $entities is undef.
1066 unless (
1067 ( defined($entities) && !length($entities) )
1068
1069 # If there's no entity to encode, don't call it
1070 || $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }
1071
1072 # To keep from amp-escaping children of script et al.
1073 # That doesn't deal with descendants; but then, CDATA
1074 # parents shouldn't /have/ descendants other than a
1075 # text children (or comments?)
1076 || $encoded_content
1077 );
1078 push( @html, $node );
1079 }
1080 1; # keep traversing
1081 }
1082 ); # End of parms to traverse()
1083 }
1084
1085 if ( $self->{_store_declarations} && defined $self->{_decl} ) {
1086 unshift @html, sprintf "<!%s>\n", $self->{_decl}->{text};
1087 }
1088
1089 return join( '', @html );
1090}
1091
1092
1093sub as_text {
1094
1095 # Yet another iteratively implemented traverser
1096 my ( $this, %options ) = @_;
1097 my $skip_dels = $options{'skip_dels'} || 0;
1098 my (@pile) = ($this);
1099 my $tag;
1100 my $text = '';
1101 while (@pile) {
1102 if ( !defined( $pile[0] ) ) { # undef!
1103 # no-op
1104 }
1105 elsif ( !ref( $pile[0] ) ) { # text bit! save it!
1106 $text .= shift @pile;
1107 }
1108 else { # it's a ref -- traverse under it
1109 unshift @pile, @{ $this->{'_content'} || $nillio }
1110 unless ( $tag = ( $this = shift @pile )->{'_tag'} ) eq 'style'
1111 or $tag eq 'script'
1112 or ( $skip_dels and $tag eq 'del' );
1113 }
1114 }
1115 return $text;
1116}
1117
1118# extra_chars added for RT #26436
1119sub as_trimmed_text {
1120 my ( $this, %options ) = @_;
1121 my $text = $this->as_text(%options);
1122 my $extra_chars = defined $options{'extra_chars'}
1123 ? $options{'extra_chars'} : '';
1124
1125 $text =~ s/[\n\r\f\t$extra_chars ]+$//s;
1126 $text =~ s/^[\n\r\f\t$extra_chars ]+//s;
1127 $text =~ s/[\n\r\f\t$extra_chars ]+/ /g;
1128 return $text;
1129}
1130
1131sub as_text_trimmed { shift->as_trimmed_text(@_) } # alias, because I forget
1132
1133
1134# TODO: make it wrap, if not indent?
1135
1136sub as_XML {
1137
1138 # based an as_HTML
1139 my ($self) = @_;
1140
1141 #my $indent_on = defined($indent) && length($indent);
1142 my @xml = ();
1143 my $empty_element_map = $self->_empty_element_map;
1144
1145 my ( $tag, $node, $start ); # per-iteration scratch
1146 $self->traverse(
1147 sub {
1148 ( $node, $start ) = @_;
1149 if ( ref $node ) { # it's an element
1150 $tag = $node->{'_tag'};
1151 if ($start) { # on the way in
1152
1153 foreach my $attr ( $node->all_attr_names() ) {
1154 Carp::croak(
1155 "$tag has an invalid attribute name '$attr'")
1156 unless ( $attr eq '/' || $self->_valid_name($attr) );
1157 }
1158
1159 if ( $empty_element_map->{$tag}
1160 and !@{ $node->{'_content'} || $nillio } )
1161 {
1162 push( @xml, $node->starttag_XML( undef, 1 ) );
1163 }
1164 else {
1165 push( @xml, $node->starttag_XML(undef) );
1166 }
1167 }
1168 else { # on the way out
1169 unless ( $empty_element_map->{$tag}
1170 and !@{ $node->{'_content'} || $nillio } )
1171 {
1172 push( @xml, $node->endtag_XML() );
1173 } # otherwise it will have been an <... /> tag.
1174 }
1175 }
1176 else { # it's just text
1177 _xml_escape($node);
1178 push( @xml, $node );
1179 }
1180 1; # keep traversing
1181 }
1182 );
1183
1184 join( '', @xml, "\n" );
1185}
1186
1187sub _xml_escape {
1188
1189# DESTRUCTIVE (a.k.a. "in-place")
1190# Five required escapes: http://www.w3.org/TR/2006/REC-xml11-20060816/#syntax
1191# We allow & if it's part of a valid escape already: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-references
1192 foreach my $x (@_) {
1193
1194 # In strings with no encoded entities all & should be encoded.
1195 if ($encoded_content) {
1196 $x
1197 =~ s/&(?! # An ampersand that isn't followed by...
1198 (\#\d+; | # A hash mark, digits and semicolon, or
1199 \#x[\da-f]+; | # A hash mark, "x", hex digits and semicolon, or
1200 $START_CHAR$NAME_CHAR+; ) # A valid unicode entity name and semicolon
1201 )/&amp;/gx; # Needs to be escaped to amp
1202 }
1203 else {
1204 $x =~ s/&/&amp;/g;
1205 }
1206
1207 # simple character escapes
1208 $x =~ s/</&lt;/g;
1209 $x =~ s/>/&gt;/g;
1210 $x =~ s/"/&quot;/g;
1211 $x =~ s/'/&apos;/g;
1212 }
1213 return;
1214}
1215
1216
1217# NOTES:
1218#
1219# It's been suggested that attribute names be made :-keywords:
1220# (:_tag "img" :border 0 :src "pie.png" :usemap "#main.map")
1221# However, it seems that Scheme has no such data type as :-keywords.
1222# So, for the moment at least, I tend toward simplicity, uniformity,
1223# and universality, where everything a string or a list.
1224
1225sub as_Lisp_form {
1226 my @out;
1227
1228 my $sub;
1229 my $depth = 0;
1230 my ( @list, $val );
1231 $sub = sub { # Recursor
1232 my $self = $_[0];
1233 @list = ( '_tag', $self->{'_tag'} );
1234 @list = () unless defined $list[-1]; # unlikely
1235
1236 for ( sort keys %$self ) { # predictable ordering
1237 next
1238 if $_ eq '_content'
1239 or $_ eq '_tag'
1240 or $_ eq '_parent'
1241 or $_ eq '/';
1242
1243 # Leave the other private attributes, I guess.
1244 push @list, $_, $val
1245 if defined( $val = $self->{$_} ); # and !ref $val;
1246 }
1247
1248 for (@list) {
1249
1250 # octal-escape it
1251 s<([^\x20\x21\x23\x27-\x5B\x5D-\x7E])>
1252 <sprintf('\\%03o',ord($1))>eg;
1253 $_ = qq{"$_"};
1254 }
1255 push @out, ( ' ' x $depth ) . '(' . join ' ', splice @list;
1256 if ( @{ $self->{'_content'} || $nillio } ) {
1257 $out[-1] .= " \"_content\" (\n";
1258 ++$depth;
1259 foreach my $c ( @{ $self->{'_content'} } ) {
1260 if ( ref($c) ) {
1261
1262 # an element -- recurse
1263 $sub->($c);
1264 }
1265 else {
1266
1267 # a text segment -- stick it in and octal-escape it
1268 push @out, $c;
1269 $out[-1] =~ s<([^\x20\x21\x23\x27-\x5B\x5D-\x7E])>
1270 <sprintf('\\%03o',ord($1))>eg;
1271
1272 # And quote and indent it.
1273 $out[-1] .= "\"\n";
1274 $out[-1] = ( ' ' x $depth ) . '"' . $out[-1];
1275 }
1276 }
1277 --$depth;
1278 substr( $out[-1], -1 )
1279 = "))\n"; # end of _content and of the element
1280 }
1281 else {
1282 $out[-1] .= ")\n";
1283 }
1284 return;
1285 };
1286
1287 $sub->( $_[0] );
1288 undef $sub;
1289 return join '', @out;
1290}
1291
1292
1293sub format {
1294 my ( $self, $formatter ) = @_;
1295 unless ( defined $formatter ) {
1296 # RECOMMEND PREREQ: HTML::FormatText
1297 require HTML::FormatText;
1298 $formatter = HTML::FormatText->new();
1299 }
1300 $formatter->format($self);
1301}
1302
1303
1304sub starttag {
1305 my ( $self, $entities ) = @_;
1306
1307 my $name = $self->{'_tag'};
1308
1309 return $self->{'text'} if $name eq '~literal';
1310 return "<!" . $self->{'text'} . ">" if $name eq '~declaration';
1311 return "<?" . $self->{'text'} . ">" if $name eq '~pi';
1312
1313 if ( $name eq '~comment' ) {
1314 if ( ref( $self->{'text'} || '' ) eq 'ARRAY' ) {
1315
1316 # Does this ever get used? And is this right?
1317 return
1318 "<!"
1319 . join( ' ', map( "--$_--", @{ $self->{'text'} } ) ) . ">";
1320 }
1321 else {
1322 return "<!--" . $self->{'text'} . "-->";
1323 }
1324 }
1325
1326 my $tag = $html_uc ? "<\U$name" : "<\L$name";
1327 my $val;
1328 for ( sort keys %$self ) { # predictable ordering
1329 next if !length $_ or m/^_/s or $_ eq '/';
1330 $val = $self->{$_};
1331 next if !defined $val; # or ref $val;
1332 if ($_ eq $val && # if attribute is boolean, for this element
1333 exists( $HTML::Element::boolean_attr{$name} )
1334 && (ref( $HTML::Element::boolean_attr{$name} )
1335 ? $HTML::Element::boolean_attr{$name}{$_}
1336 : $HTML::Element::boolean_attr{$name} eq $_
1337 )
1338 )
1339 {
1340 $tag .= $html_uc ? " \U$_" : " \L$_";
1341 }
1342 else { # non-boolean attribute
1343
1344 if ( ref $val eq 'HTML::Element'
1345 and $val->{_tag} eq '~literal' )
1346 {
1347 $val = $val->{text};
1348 }
1349 else {
1350 HTML::Entities::encode_entities( $val, $entities )
1351 unless (
1352 defined($entities) && !length($entities)
1353 || $encoded_content
1354
1355 );
1356 }
1357
1358 $val = qq{"$val"};
1359 $tag .= $html_uc ? qq{ \U$_\E=$val} : qq{ \L$_\E=$val};
1360 }
1361 } # for keys
1362 if ( scalar $self->content_list == 0
1363 && $self->_empty_element_map->{ $self->tag } )
1364 {
1365 return $tag . " />";
1366 }
1367 else {
1368 return $tag . ">";
1369 }
1370}
1371
1372
1373sub starttag_XML {
1374 my ($self) = @_;
1375
1376 # and a third parameter to signal emptiness?
1377
1378 my $name = $self->{'_tag'};
1379
1380 return $self->{'text'} if $name eq '~literal';
1381 return '<!' . $self->{'text'} . '>' if $name eq '~declaration';
1382 return "<?" . $self->{'text'} . "?>" if $name eq '~pi';
1383
1384 if ( $name eq '~comment' ) {
1385 if ( ref( $self->{'text'} || '' ) eq 'ARRAY' ) {
1386
1387 # Does this ever get used? And is this right?
1388 $name = join( ' ', @{ $self->{'text'} } );
1389 }
1390 else {
1391 $name = $self->{'text'};
1392 }
1393 $name =~ s/--/-&#45;/g; # can't have double --'s in XML comments
1394 return "<!--$name-->";
1395 }
1396
1397 my $tag = "<$name";
1398 my $val;
1399 for ( sort keys %$self ) { # predictable ordering
1400 next if !length $_ or m/^_/s or $_ eq '/';
1401
1402 # Hm -- what to do if val is undef?
1403 # I suppose that shouldn't ever happen.
1404 next if !defined( $val = $self->{$_} ); # or ref $val;
1405 _xml_escape($val);
1406 $tag .= qq{ $_="$val"};
1407 }
1408 @_ == 3 ? "$tag />" : "$tag>";
1409}
1410
1411
1412sub endtag {
1413 $html_uc ? "</\U$_[0]->{'_tag'}>" : "</\L$_[0]->{'_tag'}>";
1414}
1415
1416sub endtag_XML {
1417 "</$_[0]->{'_tag'}>";
1418}
1419
1420#==========================================================================
1421# This, ladies and germs, is an iterative implementation of a
1422# recursive algorithm. DON'T TRY THIS AT HOME.
1423# Basically, the algorithm says:
1424#
1425# To traverse:
1426# 1: pre-order visit this node
1427# 2: traverse any children of this node
1428# 3: post-order visit this node, unless it's a text segment,
1429# or a prototypically empty node (like "br", etc.)
1430# Add to that the consideration of the callbacks' return values,
1431# so you can block visitation of the children, or siblings, or
1432# abort the whole excursion, etc.
1433#
1434# So, why all this hassle with making the code iterative?
1435# It makes for real speed, because it eliminates the whole
1436# hassle of Perl having to allocate scratch space for each
1437# instance of the recursive sub. Since the algorithm
1438# is basically simple (and not all recursive ones are!) and
1439# has few necessary lexicals (basically just the current node's
1440# content list, and the current position in it), it was relatively
1441# straightforward to store that information not as the frame
1442# of a sub, but as a stack, i.e., a simple Perl array (well, two
1443# of them, actually: one for content-listrefs, one for indexes of
1444# current position in each of those).
1445
1446my $NIL = [];
1447
1448sub traverse {
1449 my ( $start, $callback, $ignore_text ) = @_;
1450
1451 Carp::croak "traverse can be called only as an object method"
1452 unless ref $start;
1453
1454 Carp::croak('must provide a callback for traverse()!')
1455 unless defined $callback and ref $callback;
1456
1457 # Elementary type-checking:
1458 my ( $c_pre, $c_post );
1459 if ( UNIVERSAL::isa( $callback, 'CODE' ) ) {
1460 $c_pre = $c_post = $callback;
1461 }
1462 elsif ( UNIVERSAL::isa( $callback, 'ARRAY' ) ) {
1463 ( $c_pre, $c_post ) = @$callback;
1464 Carp::croak(
1465 "pre-order callback \"$c_pre\" is true but not a coderef!")
1466 if $c_pre and not UNIVERSAL::isa( $c_pre, 'CODE' );
1467 Carp::croak(
1468 "pre-order callback \"$c_post\" is true but not a coderef!")
1469 if $c_post and not UNIVERSAL::isa( $c_post, 'CODE' );
1470 return $start unless $c_pre or $c_post;
1471
1472 # otherwise there'd be nothing to actually do!
1473 }
1474 else {
1475 Carp::croak("$callback is not a known kind of reference")
1476 unless ref($callback);
1477 }
1478
1479 my $empty_element_map = $start->_empty_element_map;
1480
1481 my (@C) = [$start]; # a stack containing lists of children
1482 my (@I) = (-1); # initial value must be -1 for each list
1483 # a stack of indexes to current position in corresponding lists in @C
1484 # In each of these, 0 is the active point
1485
1486 # scratch:
1487 my ($rv, # return value of callback
1488 $this, # current node
1489 $content_r, # child list of $this
1490 );
1491
1492 # THE BIG LOOP
1493 while (@C) {
1494
1495 # Move to next item in this frame
1496 if ( !defined( $I[0] ) or ++$I[0] >= @{ $C[0] } ) {
1497
1498 # We either went off the end of this list, or aborted the list
1499 # So call the post-order callback:
1500 if ( $c_post
1501 and defined $I[0]
1502 and @C > 1
1503
1504 # to keep the next line from autovivifying
1505 and defined( $this = $C[1][ $I[1] ] ) # sanity, and
1506 # suppress callbacks on exiting the fictional top frame
1507 and ref($this) # sanity
1508 and not(
1509 $this->{'_empty_element'}
1510 || ( $empty_element_map->{ $this->{'_tag'} || '' }
1511 && !@{ $this->{'_content'} } ) # RT #49932
1512 ) # things that don't get post-order callbacks
1513 )
1514 {
1515 shift @I;
1516 shift @C;
1517
1518 #print "Post! at depth", scalar(@I), "\n";
1519 $rv = $c_post->(
1520
1521 #map $_, # copy to avoid any messiness
1522 $this, # 0: this
1523 0, # 1: startflag (0 for post-order call)
1524 @I - 1, # 2: depth
1525 );
1526
1527 if ( defined($rv) and ref($rv) eq $travsignal_package ) {
1528 $rv = $$rv; #deref
1529 if ( $rv eq 'ABORT' ) {
1530 last; # end of this excursion!
1531 }
1532 elsif ( $rv eq 'PRUNE' ) {
1533
1534 # NOOP on post!!
1535 }
1536 elsif ( $rv eq 'PRUNE_SOFTLY' ) {
1537
1538 # NOOP on post!!
1539 }
1540 elsif ( $rv eq 'OK' ) {
1541
1542 # noop
1543 }
1544 elsif ( $rv eq 'PRUNE_UP' ) {
1545 $I[0] = undef;
1546 }
1547 else {
1548 die "Unknown travsignal $rv\n";
1549
1550 # should never happen
1551 }
1552 }
1553 }
1554 else {
1555 shift @I;
1556 shift @C;
1557 }
1558 next;
1559 }
1560
1561 $this = $C[0][ $I[0] ];
1562
1563 if ($c_pre) {
1564 if ( defined $this and ref $this ) { # element
1565 $rv = $c_pre->(
1566
1567 #map $_, # copy to avoid any messiness
1568 $this, # 0: this
1569 1, # 1: startflag (1 for pre-order call)
1570 @I - 1, # 2: depth
1571 );
1572 }
1573 else { # text segment
1574 next if $ignore_text;
1575 $rv = $c_pre->(
1576
1577 #map $_, # copy to avoid any messiness
1578 $this, # 0: this
1579 1, # 1: startflag (1 for pre-order call)
1580 @I - 1, # 2: depth
1581 $C[1][ $I[1] ], # 3: parent
1582 # And there will always be a $C[1], since
1583 # we can't start traversing at a text node
1584 $I[0] # 4: index of self in parent's content list
1585 );
1586 }
1587 if ( not $rv ) { # returned false. Same as PRUNE.
1588 next; # prune
1589 }
1590 elsif ( ref($rv) eq $travsignal_package ) {
1591 $rv = $$rv; # deref
1592 if ( $rv eq 'ABORT' ) {
1593 last; # end of this excursion!
1594 }
1595 elsif ( $rv eq 'PRUNE' ) {
1596 next;
1597 }
1598 elsif ( $rv eq 'PRUNE_SOFTLY' ) {
1599 if (ref($this)
1600 and not( $this->{'_empty_element'}
1601 || $empty_element_map->{ $this->{'_tag'} || '' } )
1602 )
1603 {
1604
1605 # push a dummy empty content list just to trigger a post callback
1606 unshift @I, -1;
1607 unshift @C, $NIL;
1608 }
1609 next;
1610 }
1611 elsif ( $rv eq 'OK' ) {
1612
1613 # noop
1614 }
1615 elsif ( $rv eq 'PRUNE_UP' ) {
1616 $I[0] = undef;
1617 next;
1618
1619 # equivalent of last'ing out of the current child list.
1620
1621 # Used to have PRUNE_UP_SOFTLY and ABORT_SOFTLY here, but the code
1622 # for these was seriously upsetting, served no particularly clear
1623 # purpose, and could not, I think, be easily implemented with a
1624 # recursive routine. All bad things!
1625 }
1626 else {
1627 die "Unknown travsignal $rv\n";
1628
1629 # should never happen
1630 }
1631 }
1632
1633 # else fall thru to meaning same as \'OK'.
1634 }
1635
1636 # end of pre-order calling
1637
1638 # Now queue up content list for the current element...
1639 if (ref $this
1640 and not( # ...except for those which...
1641 not( $content_r = $this->{'_content'} and @$content_r )
1642
1643 # ...have empty content lists...
1644 and $this->{'_empty_element'}
1645 || $empty_element_map->{ $this->{'_tag'} || '' }
1646
1647 # ...and that don't get post-order callbacks
1648 )
1649 )
1650 {
1651 unshift @I, -1;
1652 unshift @C, $content_r || $NIL;
1653
1654 #print $this->{'_tag'}, " ($this) adds content_r ", $C[0], "\n";
1655 }
1656 }
1657 return $start;
1658}
1659
1660
1661sub is_inside {
1662 my $self = shift;
1663 return 0 unless @_; # if no items specified, I guess this is right.
1664
1665 my $current = $self;
1666 # the loop starts by looking at the given element
1667
1668 if (scalar @_ == 1) {
1669 while ( defined $current and ref $current ) {
1670 return 1 if $current eq $_[0] || $current->{'_tag'} eq $_[0];
1671 $current = $current->{'_parent'};
1672 }
1673 return 0;
1674 } else {
1675 my %elements = map { $_ => 1 } @_;
1676 while ( defined $current and ref $current ) {
1677 return 1 if $elements{$current} || $elements{ $current->{'_tag'} };
1678 $current = $current->{'_parent'};
1679 }
1680 }
1681 return 0;
1682}
1683
1684
1685sub is_empty {
1686 my $self = shift;
1687 !$self->{'_content'} || !@{ $self->{'_content'} };
1688}
1689
1690
1691sub pindex {
1692 my $self = shift;
1693
1694 my $parent = $self->{'_parent'} || return undef;
1695 my $pc = $parent->{'_content'} || return undef;
1696 for ( my $i = 0; $i < @$pc; ++$i ) {
1697 return $i if ref $pc->[$i] and $pc->[$i] eq $self;
1698 }
1699 return undef; # we shouldn't ever get here
1700}
1701
1702#--------------------------------------------------------------------------
1703
1704
1705sub left {
1706 Carp::croak "left() is supposed to be an object method"
1707 unless ref $_[0];
1708 my $pc = ( $_[0]->{'_parent'} || return )->{'_content'}
1709 || die "parent is childless?";
1710
1711 die "parent is childless" unless @$pc;
1712 return if @$pc == 1; # I'm an only child
1713
1714 if (wantarray) {
1715 my @out;
1716 foreach my $j (@$pc) {
1717 return @out if ref $j and $j eq $_[0];
1718 push @out, $j;
1719 }
1720 }
1721 else {
1722 for ( my $i = 0; $i < @$pc; ++$i ) {
1723 return $i ? $pc->[ $i - 1 ] : undef
1724 if ref $pc->[$i] and $pc->[$i] eq $_[0];
1725 }
1726 }
1727
1728 die "I'm not in my parent's content list?";
1729 return;
1730}
1731
1732
1733sub right {
1734 Carp::croak "right() is supposed to be an object method"
1735 unless ref $_[0];
1736 my $pc = ( $_[0]->{'_parent'} || return )->{'_content'}
1737 || die "parent is childless?";
1738
1739 die "parent is childless" unless @$pc;
1740 return if @$pc == 1; # I'm an only child
1741
1742 if (wantarray) {
1743 my ( @out, $seen );
1744 foreach my $j (@$pc) {
1745 if ($seen) {
1746 push @out, $j;
1747 }
1748 else {
1749 $seen = 1 if ref $j and $j eq $_[0];
1750 }
1751 }
1752 die "I'm not in my parent's content list?" unless $seen;
1753 return @out;
1754 }
1755 else {
1756 for ( my $i = 0; $i < @$pc; ++$i ) {
1757 return +( $i == $#$pc ) ? undef : $pc->[ $i + 1 ]
1758 if ref $pc->[$i] and $pc->[$i] eq $_[0];
1759 }
1760 die "I'm not in my parent's content list?";
1761 return;
1762 }
1763}
1764
1765#--------------------------------------------------------------------------
1766
1767
1768sub address {
1769 if ( @_ == 1 ) { # report-address form
1770 return join(
1771 '.',
1772 reverse( # so it starts at the top
1773 map( $_->pindex() || '0', # so that root's undef -> '0'
1774 $_[0], # self and...
1775 $_[0]->lineage )
1776 )
1777 );
1778 }
1779 else { # get-node-at-address
1780 my @stack = split( /\./, $_[1] );
1781 my $here;
1782
1783 if ( @stack and !length $stack[0] ) { # relative addressing
1784 $here = $_[0];
1785 shift @stack;
1786 }
1787 else { # absolute addressing
1788 return undef unless 0 == shift @stack; # pop the initial 0-for-root
1789 $here = $_[0]->root;
1790 }
1791
1792 while (@stack) {
1793 return undef
1794 unless $here->{'_content'}
1795 and @{ $here->{'_content'} } > $stack[0];
1796
1797 # make sure the index isn't too high
1798 $here = $here->{'_content'}[ shift @stack ];
1799 return undef if @stack and not ref $here;
1800
1801 # we hit a text node when we expected a non-terminal element node
1802 }
1803
1804 return $here;
1805 }
1806}
1807
1808
1809sub depth {
1810 my $here = $_[0];
1811 my $depth = 0;
1812 while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
1813 ++$depth;
1814 }
1815 return $depth;
1816}
1817
1818
1819sub root {
1820 my $here = my $root = shift;
1821 while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
1822 $root = $here;
1823 }
1824 return $root;
1825}
1826
1827
1828sub lineage {
1829 my $here = shift;
1830 my @lineage;
1831 while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
1832 push @lineage, $here;
1833 }
1834 return @lineage;
1835}
1836
1837
1838sub lineage_tag_names {
1839 my $here = my $start = shift;
1840 my @lineage_names;
1841 while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
1842 push @lineage_names, $here->{'_tag'};
1843 }
1844 return @lineage_names;
1845}
1846
1847
1848sub descendents { shift->descendants(@_) }
1849
1850sub descendants {
1851 my $start = shift;
1852 if (wantarray) {
1853 my @descendants;
1854 $start->traverse(
1855 [ # pre-order sub only
1856 sub {
1857 push( @descendants, $_[0] );
1858 return 1;
1859 },
1860 undef # no post
1861 ],
1862 1, # ignore text
1863 );
1864 shift @descendants; # so $self doesn't appear in the list
1865 return @descendants;
1866 }
1867 else { # just returns a scalar
1868 my $descendants = -1; # to offset $self being counted
1869 $start->traverse(
1870 [ # pre-order sub only
1871 sub {
1872 ++$descendants;
1873 return 1;
1874 },
1875 undef # no post
1876 ],
1877 1, # ignore text
1878 );
1879 return $descendants;
1880 }
1881}
1882
1883
1884sub find { shift->find_by_tag_name(@_) }
1885
1886# yup, a handy alias
1887
1888sub find_by_tag_name {
1889 my (@pile) = shift(@_); # start out the to-do stack for the traverser
1890 Carp::croak "find_by_tag_name can be called only as an object method"
1891 unless ref $pile[0];
1892 return () unless @_;
1893 my (@tags) = $pile[0]->_fold_case(@_);
1894 my ( @matching, $this, $this_tag );
1895 while (@pile) {
1896 $this_tag = ( $this = shift @pile )->{'_tag'};
1897 foreach my $t (@tags) {
1898 if ( $t eq $this_tag ) {
1899 if (wantarray) {
1900 push @matching, $this;
1901 last;
1902 }
1903 else {
1904 return $this;
1905 }
1906 }
1907 }
1908 unshift @pile, grep ref($_), @{ $this->{'_content'} || next };
1909 }
1910 return @matching if wantarray;
1911 return;
1912}
1913
1914
1915sub find_by_attribute {
1916
1917 # We could limit this to non-internal attributes, but hey.
1918 my ( $self, $attribute, $value ) = @_;
1919 Carp::croak "Attribute must be a defined value!"
1920 unless defined $attribute;
1921 $attribute = $self->_fold_case($attribute);
1922
1923 my @matching;
1924 my $wantarray = wantarray;
1925 my $quit;
1926 $self->traverse(
1927 [ # pre-order only
1928 sub {
1929 if ( exists $_[0]{$attribute}
1930 and $_[0]{$attribute} eq $value )
1931 {
1932 push @matching, $_[0];
1933 return HTML::Element::ABORT
1934 unless $wantarray; # only take the first
1935 }
1936 1; # keep traversing
1937 },
1938 undef # no post
1939 ],
1940 1, # yes, ignore text nodes.
1941 );
1942
1943 if ($wantarray) {
1944 return @matching;
1945 }
1946 else {
1947 return $matching[0];
1948 }
1949}
1950
1951#--------------------------------------------------------------------------
1952
1953
1954sub look_down {
1955 ref( $_[0] ) or Carp::croak "look_down works only as an object method";
1956
1957 my @criteria;
1958 for ( my $i = 1; $i < @_; ) {
1959 Carp::croak "Can't use undef as an attribute name"
1960 unless defined $_[$i];
1961 if ( ref $_[$i] ) {
1962 Carp::croak "A " . ref( $_[$i] ) . " value is not a criterion"
1963 unless ref $_[$i] eq 'CODE';
1964 push @criteria, $_[ $i++ ];
1965 }
1966 else {
1967 Carp::croak "param list to look_down ends in a key!" if $i == $#_;
1968 push @criteria, [
1969 scalar( $_[0]->_fold_case( $_[$i] ) ),
1970 defined( $_[ $i + 1 ] )
1971 ? ( ( ref $_[ $i + 1 ] ? $_[ $i + 1 ] : lc( $_[ $i + 1 ] ) ),
1972 ref( $_[ $i + 1 ] )
1973 )
1974
1975 # yes, leave that LC!
1976 : undef
1977 ];
1978 $i += 2;
1979 }
1980 }
1981 Carp::croak "No criteria?" unless @criteria;
1982
1983 my (@pile) = ( $_[0] );
1984 my ( @matching, $val, $this );
1985Node:
1986 while ( defined( $this = shift @pile ) ) {
1987
1988 # Yet another traverser implemented with merely iterative code.
1989 foreach my $c (@criteria) {
1990 if ( ref($c) eq 'CODE' ) {
1991 next Node unless $c->($this); # jump to the continue block
1992 }
1993 else { # it's an attr-value pair
1994 next Node # jump to the continue block
1995 if # two values are unequal if:
1996 ( defined( $val = $this->{ $c->[0] } ) )
1997 ? ( !defined $c->[ 1
1998 ] # actual is def, critval is undef => fail
1999 # allow regex matching
2000 # allow regex matching
2001 or (
2002 $c->[2] eq 'Regexp'
2003 ? $val !~ $c->[1]
2004 : ( ref $val ne $c->[2]
2005
2006 # have unequal ref values => fail
2007 or lc($val) ne lc( $c->[1] )
2008
2009 # have unequal lc string values => fail
2010 )
2011 )
2012 )
2013 : ( defined $c->[1]
2014 ) # actual is undef, critval is def => fail
2015 }
2016 }
2017
2018 # We make it this far only if all the criteria passed.
2019 return $this unless wantarray;
2020 push @matching, $this;
2021 }
2022 continue {
2023 unshift @pile, grep ref($_), @{ $this->{'_content'} || $nillio };
2024 }
2025 return @matching if wantarray;
2026 return;
2027}
2028
2029
2030sub look_up {
2031 ref( $_[0] ) or Carp::croak "look_up works only as an object method";
2032
2033 my @criteria;
2034 for ( my $i = 1; $i < @_; ) {
2035 Carp::croak "Can't use undef as an attribute name"
2036 unless defined $_[$i];
2037 if ( ref $_[$i] ) {
2038 Carp::croak "A " . ref( $_[$i] ) . " value is not a criterion"
2039 unless ref $_[$i] eq 'CODE';
2040 push @criteria, $_[ $i++ ];
2041 }
2042 else {
2043 Carp::croak "param list to look_up ends in a key!" if $i == $#_;
2044 push @criteria, [
2045 scalar( $_[0]->_fold_case( $_[$i] ) ),
2046 defined( $_[ $i + 1 ] )
2047 ? ( ( ref $_[ $i + 1 ] ? $_[ $i + 1 ] : lc( $_[ $i + 1 ] ) ),
2048 ref( $_[ $i + 1 ] )
2049 )
2050 : undef # Yes, leave that LC!
2051 ];
2052 $i += 2;
2053 }
2054 }
2055 Carp::croak "No criteria?" unless @criteria;
2056
2057 my ( @matching, $val );
2058 my $this = $_[0];
2059Node:
2060 while (1) {
2061
2062 # You'll notice that the code here is almost the same as for look_down.
2063 foreach my $c (@criteria) {
2064 if ( ref($c) eq 'CODE' ) {
2065 next Node unless $c->($this); # jump to the continue block
2066 }
2067 else { # it's an attr-value pair
2068 next Node # jump to the continue block
2069 if # two values are unequal if:
2070 ( defined( $val = $this->{ $c->[0] } ) )
2071 ? ( !defined $c->[ 1
2072 ] # actual is def, critval is undef => fail
2073 or (
2074 $c->[2] eq 'Regexp'
2075 ? $val !~ $c->[1]
2076 : ( ref $val ne $c->[2]
2077
2078 # have unequal ref values => fail
2079 or lc($val) ne $c->[1]
2080
2081 # have unequal lc string values => fail
2082 )
2083 )
2084 )
2085 : ( defined $c->[1]
2086 ) # actual is undef, critval is def => fail
2087 }
2088 }
2089
2090 # We make it this far only if all the criteria passed.
2091 return $this unless wantarray;
2092 push @matching, $this;
2093 }
2094 continue {
2095 last unless defined( $this = $this->{'_parent'} ) and ref $this;
2096 }
2097
2098 return @matching if wantarray;
2099 return;
2100}
2101
2102#--------------------------------------------------------------------------
2103
2104
2105sub attr_get_i {
2106 if ( @_ > 2 ) {
2107 my $self = shift;
2108 Carp::croak "No attribute names can be undef!"
2109 if grep !defined($_), @_;
2110 my @attributes = $self->_fold_case(@_);
2111 if (wantarray) {
2112 my @out;
2113 foreach my $x ( $self, $self->lineage ) {
2114 push @out,
2115 map { exists( $x->{$_} ) ? $x->{$_} : () } @attributes;
2116 }
2117 return @out;
2118 }
2119 else {
2120 foreach my $x ( $self, $self->lineage ) {
2121 foreach my $attribute (@attributes) {
2122 return $x->{$attribute}
2123 if exists $x->{$attribute}; # found
2124 }
2125 }
2126 return; # never found
2127 }
2128 }
2129 else {
2130
2131 # Single-attribute search. Simpler, most common, so optimize
2132 # for the most common case
2133 Carp::croak "Attribute name must be a defined value!"
2134 unless defined $_[1];
2135 my $self = $_[0];
2136 my $attribute = $self->_fold_case( $_[1] );
2137 if (wantarray) { # list context
2138 return
2139 map { exists( $_->{$attribute} ) ? $_->{$attribute} : () }
2140 $self, $self->lineage;
2141 }
2142 else { # scalar context
2143 foreach my $x ( $self, $self->lineage ) {
2144 return $x->{$attribute} if exists $x->{$attribute}; # found
2145 }
2146 return; # never found
2147 }
2148 }
2149}
2150
2151
2152sub tagname_map {
2153 my (@pile) = $_[0]; # start out the to-do stack for the traverser
2154 Carp::croak "find_by_tag_name can be called only as an object method"
2155 unless ref $pile[0];
2156 my ( %map, $this_tag, $this );
2157 while (@pile) {
2158 $this_tag = ''
2159 unless defined( $this_tag = ( $this = shift @pile )->{'_tag'} )
2160 ; # dance around the strange case of having an undef tagname.
2161 push @{ $map{$this_tag} ||= [] }, $this; # add to map
2162 unshift @pile, grep ref($_),
2163 @{ $this->{'_content'} || next }; # traverse
2164 }
2165 return \%map;
2166}
2167
2168
2169sub extract_links {
2170 my $start = shift;
2171
2172 my %wantType;
2173 @wantType{ $start->_fold_case(@_) } = (1) x @_; # if there were any
2174 my $wantType = scalar(@_);
2175
2176 my @links;
2177
2178 # TODO: add xml:link?
2179
2180 my ( $link_attrs, $tag, $self, $val ); # scratch for each iteration
2181 $start->traverse(
2182 [ sub { # pre-order call only
2183 $self = $_[0];
2184
2185 $tag = $self->{'_tag'};
2186 return 1
2187 if $wantType && !$wantType{$tag}; # if we're selective
2188
2189 if (defined(
2190 $link_attrs = $HTML::Element::linkElements{$tag}
2191 )
2192 )
2193 {
2194
2195 # If this is a tag that has any link attributes,
2196 # look over possibly present link attributes,
2197 # saving the value, if found.
2198 for ( ref($link_attrs) ? @$link_attrs : $link_attrs ) {
2199 if ( defined( $val = $self->attr($_) ) ) {
2200 push( @links, [ $val, $self, $_, $tag ] );
2201 }
2202 }
2203 }
2204 1; # return true, so we keep recursing
2205 },
2206 undef
2207 ],
2208 1, # ignore text nodes
2209 );
2210 \@links;
2211}
2212
2213
2214sub simplify_pres {
2215 my $pre = 0;
2216
2217 my $sub;
2218 my $line;
2219 $sub = sub {
2220 ++$pre if $_[0]->{'_tag'} eq 'pre';
2221 foreach my $it ( @{ $_[0]->{'_content'} || return } ) {
2222 if ( ref $it ) {
2223 $sub->($it); # recurse!
2224 }
2225 elsif ($pre) {
2226
2227 #$it =~ s/(?:(?:\cm\cj*)|(?:\cj))/\n/g;
2228
2229 $it = join "\n", map {
2230 ;
2231 $line = $_;
2232 while (
2233 $line
2234 =~ s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
2235
2236 # Sort of adapted from Text::Tabs -- yes, it's hardwired-in that
2237 # tabs are at every EIGHTH column.
2238 )
2239 {
2240 }
2241 $line;
2242 }
2243 split /(?:(?:\cm\cj*)|(?:\cj))/, $it, -1;
2244 }
2245 }
2246 --$pre if $_[0]->{'_tag'} eq 'pre';
2247 return;
2248 };
2249 $sub->( $_[0] );
2250
2251 undef $sub;
2252 return;
2253}
2254
2255
2256sub same_as {
2257 die 'same_as() takes only one argument: $h->same_as($i)' unless @_ == 2;
2258 my ( $h, $i ) = @_[ 0, 1 ];
2259 die "same_as() can be called only as an object method" unless ref $h;
2260
2261 return 0 unless defined $i and ref $i;
2262
2263 # An element can't be same_as anything but another element!
2264 # They needn't be of the same class, tho.
2265
2266 return 1 if $h eq $i;
2267
2268 # special (if rare) case: anything is the same as... itself!
2269
2270 # assumes that no content lists in/under $h or $i contain subsequent
2271 # text segments, like: ['foo', ' bar']
2272
2273 # compare attributes now.
2274 #print "Comparing tags of $h and $i...\n";
2275
2276 return 0 unless $h->{'_tag'} eq $i->{'_tag'};
2277
2278 # only significant attribute whose name starts with "_"
2279
2280 #print "Comparing attributes of $h and $i...\n";
2281 # Compare attributes, but only the real ones.
2282 {
2283
2284 # Bear in mind that the average element has very few attributes,
2285 # and that element names are rather short.
2286 # (Values are a different story.)
2287
2288 # XXX I would think that /^[^_]/ would be faster, at least easier to read.
2289 my @keys_h
2290 = sort grep { length $_ and substr( $_, 0, 1 ) ne '_' } keys %$h;
2291 my @keys_i
2292 = sort grep { length $_ and substr( $_, 0, 1 ) ne '_' } keys %$i;
2293
2294 return 0 unless @keys_h == @keys_i;
2295
2296 # different number of real attributes? they're different.
2297 for ( my $x = 0; $x < @keys_h; ++$x ) {
2298 return 0
2299 unless $keys_h[$x] eq $keys_i[$x] and # same key name
2300 $h->{ $keys_h[$x] } eq $i->{ $keys_h[$x] }; # same value
2301 # Should this test for definedness on values?
2302 # People shouldn't be putting undef in attribute values, I think.
2303 }
2304 }
2305
2306 #print "Comparing children of $h and $i...\n";
2307 my $hcl = $h->{'_content'} || [];
2308 my $icl = $i->{'_content'} || [];
2309
2310 return 0 unless @$hcl == @$icl;
2311
2312 # different numbers of children? they're different.
2313
2314 if (@$hcl) {
2315
2316 # compare each of the children:
2317 for ( my $x = 0; $x < @$hcl; ++$x ) {
2318 if ( ref $hcl->[$x] ) {
2319 return 0 unless ref( $icl->[$x] );
2320
2321 # an element can't be the same as a text segment
2322 # Both elements:
2323 return 0 unless $hcl->[$x]->same_as( $icl->[$x] ); # RECURSE!
2324 }
2325 else {
2326 return 0 if ref( $icl->[$x] );
2327
2328 # a text segment can't be the same as an element
2329 # Both text segments:
2330 return 0 unless $hcl->[$x] eq $icl->[$x];
2331 }
2332 }
2333 }
2334
2335 return 1; # passed all the tests!
2336}
2337
2338
2339sub new_from_lol {
2340 my $class = shift;
2341 $class = ref($class) || $class;
2342
2343 # calling as an object method is just the same as ref($h)->new_from_lol(...)
2344 my $lol = $_[1];
2345
2346 my @ancestor_lols;
2347
2348 # So we can make sure there's no cyclicities in this lol.
2349 # That would be perverse, but one never knows.
2350 my ( $sub, $k, $v, $node ); # last three are scratch values
2351 $sub = sub {
2352
2353 #print "Building for $_[0]\n";
2354 my $lol = $_[0];
2355 return unless @$lol;
2356 my ( @attributes, @children );
2357 Carp::croak "Cyclicity detected in source LOL tree, around $lol?!?"
2358 if grep( $_ eq $lol, @ancestor_lols );
2359 push @ancestor_lols, $lol;
2360
2361 my $tag_name = 'null';
2362
2363 # Recursion in in here:
2364 for ( my $i = 0; $i < @$lol; ++$i ) { # Iterate over children
2365 if ( ref( $lol->[$i] ) eq 'ARRAY' )
2366 { # subtree: most common thing in loltree
2367 push @children, $sub->( $lol->[$i] );
2368 }
2369 elsif ( !ref( $lol->[$i] ) ) {
2370 if ( $i == 0 ) { # name
2371 $tag_name = $lol->[$i];
2372 Carp::croak "\"$tag_name\" isn't a good tag name!"
2373 if $tag_name =~ m/[<>\/\x00-\x20]/
2374 ; # minimal sanity, certainly!
2375 }
2376 else { # text segment child
2377 push @children, $lol->[$i];
2378 }
2379 }
2380 elsif ( ref( $lol->[$i] ) eq 'HASH' ) { # attribute hashref
2381 keys %{ $lol->[$i] }; # reset the each-counter, just in case
2382 while ( ( $k, $v ) = each %{ $lol->[$i] } ) {
2383 push @attributes, $class->_fold_case($k), $v
2384 if defined $v
2385 and $k ne '_name'
2386 and $k ne '_content'
2387 and $k ne '_parent';
2388
2389 # enforce /some/ sanity!
2390 }
2391 }
2392 elsif ( UNIVERSAL::isa( $lol->[$i], __PACKAGE__ ) ) {
2393 if ( $lol->[$i]->{'_parent'} ) { # if claimed
2394 #print "About to clone ", $lol->[$i], "\n";
2395 push @children, $lol->[$i]->clone();
2396 }
2397 else {
2398 push @children, $lol->[$i]; # if unclaimed...
2399 #print "Claiming ", $lol->[$i], "\n";
2400 $lol->[$i]->{'_parent'} = 1; # claim it NOW
2401 # This WILL be replaced by the correct value once we actually
2402 # construct the parent, just after the end of this loop...
2403 }
2404 }
2405 else {
2406 Carp::croak "new_from_lol doesn't handle references of type "
2407 . ref( $lol->[$i] );
2408 }
2409 }
2410
2411 pop @ancestor_lols;
2412 $node = $class->new($tag_name);
2413
2414 #print "Children: @children\n";
2415
2416 if ( $class eq __PACKAGE__ ) { # Special-case it, for speed:
2417 %$node = ( %$node, @attributes ) if @attributes;
2418
2419 #print join(' ', $node, ' ' , map("<$_>", %$node), "\n");
2420 if (@children) {
2421 $node->{'_content'} = \@children;
2422 foreach my $c (@children) {
2423 _weaken($c->{'_parent'} = $node)
2424 if ref $c;
2425 }
2426 }
2427 }
2428 else { # Do it the clean way...
2429 #print "Done neatly\n";
2430 while (@attributes) { $node->attr( splice @attributes, 0, 2 ) }
2431 $node->push_content(
2432 map { _weaken($_->{'_parent'} = $node) if ref $_; $_ }
2433 @children )
2434 if @children;
2435 }
2436
2437 return $node;
2438 };
2439
2440 # End of sub definition.
2441
2442 if (wantarray) {
2443 my (@nodes) = map { ; ( ref($_) eq 'ARRAY' ) ? $sub->($_) : $_ } @_;
2444 # Let text bits pass thru, I guess. This makes this act more like
2445 # unshift_content et al. Undocumented.
2446
2447 undef $sub;
2448 # so it won't be in its own frame, so its refcount can hit 0
2449
2450 return @nodes;
2451 }
2452 else {
2453 Carp::croak "new_from_lol in scalar context needs exactly one lol"
2454 unless @_ == 1;
2455 return $_[0] unless ref( $_[0] ) eq 'ARRAY';
2456 # used to be a fatal error. still undocumented tho.
2457
2458 $node = $sub->( $_[0] );
2459 undef $sub;
2460 # so it won't be in its own frame, so its refcount can hit 0
2461
2462 return $node;
2463 }
2464}
2465
2466
2467sub objectify_text {
2468 my (@stack) = ( $_[0] );
2469
2470 my ($this);
2471 while (@stack) {
2472 foreach my $c ( @{ ( $this = shift @stack )->{'_content'} } ) {
2473 if ( ref($c) ) {
2474 unshift @stack, $c; # visit it later.
2475 }
2476 else {
2477 $c = $this->element_class->new(
2478 '~text',
2479 'text' => $c,
2480 '_parent' => $this
2481 );
2482 }
2483 }
2484 }
2485 return;
2486}
2487
2488sub deobjectify_text {
2489 my (@stack) = ( $_[0] );
2490 my ($old_node);
2491
2492 if ( $_[0]{'_tag'} eq '~text' ) { # special case
2493 # Puts the $old_node variable to a different purpose
2494 if ( $_[0]{'_parent'} ) {
2495 $_[0]->replace_with( $old_node = delete $_[0]{'text'} )->delete;
2496 }
2497 else { # well, that's that, then!
2498 $old_node = delete $_[0]{'text'};
2499 }
2500
2501 if ( ref( $_[0] ) eq __PACKAGE__ ) { # common case
2502 %{ $_[0] } = (); # poof!
2503 }
2504 else {
2505
2506 # play nice:
2507 delete $_[0]{'_parent'};
2508 $_[0]->delete;
2509 }
2510 return '' unless defined $old_node; # sanity!
2511 return $old_node;
2512 }
2513
2514 while (@stack) {
2515 foreach my $c ( @{ ( shift @stack )->{'_content'} } ) {
2516 if ( ref($c) ) {
2517 if ( $c->{'_tag'} eq '~text' ) {
2518 $c = ( $old_node = $c )->{'text'};
2519 if ( ref($old_node) eq __PACKAGE__ ) { # common case
2520 %$old_node = (); # poof!
2521 }
2522 else {
2523
2524 # play nice:
2525 delete $old_node->{'_parent'};
2526 $old_node->delete;
2527 }
2528 }
2529 else {
2530 unshift @stack, $c; # visit it later.
2531 }
2532 }
2533 }
2534 }
2535
2536 return undef;
2537}
2538
2539
2540{
2541
2542 # The next three subs are basically copied from Number::Latin,
2543 # based on a one-liner by Abigail. Yes, I could simply require that
2544 # module, and a Roman numeral module too, but really, HTML-Tree already
2545 # has enough dependecies as it is; and anyhow, I don't need the functions
2546 # that do latin2int or roman2int.
2547 no integer;
2548
2549 sub _int2latin {
2550 return unless defined $_[0];
2551 return '0' if $_[0] < 1 and $_[0] > -1;
2552 return '-' . _i2l( abs int $_[0] )
2553 if $_[0] <= -1; # tolerate negatives
2554 return _i2l( int $_[0] );
2555 }
2556
2557 sub _int2LATIN {
2558
2559 # just the above plus uc
2560 return unless defined $_[0];
2561 return '0' if $_[0] < 1 and $_[0] > -1;
2562 return '-' . uc( _i2l( abs int $_[0] ) )
2563 if $_[0] <= -1; # tolerate negs
2564 return uc( _i2l( int $_[0] ) );
2565 }
2566
2567 my @alpha = ( 'a' .. 'z' );
2568
2569 sub _i2l { # the real work
2570 my $int = $_[0] || return "";
2571 _i2l( int( ( $int - 1 ) / 26 ) )
2572 . $alpha[ $int % 26 - 1 ]; # yes, recursive
2573 # Yes, 26 => is (26 % 26 - 1), which is -1 => Z!
2574 }
2575}
2576
2577{
2578
2579 # And now, some much less impressive Roman numerals code:
2580
2581 my (@i) = ( '', qw(I II III IV V VI VII VIII IX) );
2582 my (@x) = ( '', qw(X XX XXX XL L LX LXX LXXX XC) );
2583 my (@c) = ( '', qw(C CC CCC CD D DC DCC DCCC CM) );
2584 my (@m) = ( '', qw(M MM MMM) );
2585
2586 sub _int2ROMAN {
2587 my ( $i, $pref );
2588 return '0'
2589 if 0 == ( $i = int( $_[0] || 0 ) ); # zero is a special case
2590 return $i + 0 if $i <= -4000 or $i >= 4000;
2591
2592 # Because over 3999 would require non-ASCII chars, like D-with-)-inside
2593 if ( $i < 0 ) { # grumble grumble tolerate negatives grumble
2594 $pref = '-';
2595 $i = abs($i);
2596 }
2597 else {
2598 $pref = ''; # normal case
2599 }
2600
2601 my ( $x, $c, $m ) = ( 0, 0, 0 );
2602 if ( $i >= 10 ) {
2603 $x = $i / 10;
2604 $i %= 10;
2605 if ( $x >= 10 ) {
2606 $c = $x / 10;
2607 $x %= 10;
2608 if ( $c >= 10 ) { $m = $c / 10; $c %= 10; }
2609 }
2610 }
2611
2612 #print "m$m c$c x$x i$i\n";
2613
2614 return join( '', $pref, $m[$m], $c[$c], $x[$x], $i[$i] );
2615 }
2616
2617 sub _int2roman { lc( _int2ROMAN( $_[0] ) ) }
2618}
2619
2620sub _int2int { $_[0] } # dummy
2621
2622%list_type_to_sub = (
2623 'I' => \&_int2ROMAN,
2624 'i' => \&_int2roman,
2625 'A' => \&_int2LATIN,
2626 'a' => \&_int2latin,
2627 '1' => \&_int2int,
2628);
2629
2630sub number_lists {
2631 my (@stack) = ( $_[0] );
2632 my ( $this, $tag, $counter, $numberer ); # scratch
2633 while (@stack) { # yup, pre-order-traverser idiom
2634 if ( ( $tag = ( $this = shift @stack )->{'_tag'} ) eq 'ol' ) {
2635
2636 # Prep some things:
2637 $counter
2638 = ( ( $this->{'start'} || '' ) =~ m<^\s*(\d{1,7})\s*$>s )
2639 ? $1
2640 : 1;
2641 $numberer = $list_type_to_sub{ $this->{'type'} || '' }
2642 || $list_type_to_sub{'1'};
2643
2644 # Immeditately iterate over all children
2645 foreach my $c ( @{ $this->{'_content'} || next } ) {
2646 next unless ref $c;
2647 unshift @stack, $c;
2648 if ( $c->{'_tag'} eq 'li' ) {
2649 $counter = $1
2650 if (
2651 ( $c->{'value'} || '' ) =~ m<^\s*(\d{1,7})\s*$>s );
2652 $c->{'_bullet'} = $numberer->($counter) . '.';
2653 ++$counter;
2654 }
2655 }
2656
2657 }
2658 elsif ( $tag eq 'ul' or $tag eq 'dir' or $tag eq 'menu' ) {
2659
2660 # Immeditately iterate over all children
2661 foreach my $c ( @{ $this->{'_content'} || next } ) {
2662 next unless ref $c;
2663 unshift @stack, $c;
2664 $c->{'_bullet'} = '*' if $c->{'_tag'} eq 'li';
2665 }
2666
2667 }
2668 else {
2669 foreach my $c ( @{ $this->{'_content'} || next } ) {
2670 unshift @stack, $c if ref $c;
2671 }
2672 }
2673 }
2674 return;
2675}
2676
2677
2678sub has_insane_linkage {
2679 my @pile = ( $_[0] );
2680 my ( $c, $i, $p, $this ); # scratch
2681
2682 # Another iterative traverser; this time much simpler because
2683 # only in pre-order:
2684 my %parent_of = ( $_[0], 'TOP-OF-SCAN' );
2685 while (@pile) {
2686 $this = shift @pile;
2687 $c = $this->{'_content'} || next;
2688 return ( $this, "_content attribute is true but nonref." )
2689 unless ref($c) eq 'ARRAY';
2690 next unless @$c;
2691 for ( $i = 0; $i < @$c; ++$i ) {
2692 return ( $this, "Child $i is undef" )
2693 unless defined $c->[$i];
2694 if ( ref( $c->[$i] ) ) {
2695 return ( $c->[$i], "appears in its own content list" )
2696 if $c->[$i] eq $this;
2697 return ( $c->[$i],
2698 "appears twice in the tree: once under $this, once under $parent_of{$c->[$i]}"
2699 ) if exists $parent_of{ $c->[$i] };
2700 $parent_of{ $c->[$i] } = '' . $this;
2701
2702 # might as well just use the stringification of it.
2703
2704 return ( $c->[$i],
2705 "_parent attribute is wrong (not defined)" )
2706 unless defined( $p = $c->[$i]{'_parent'} );
2707 return ( $c->[$i], "_parent attribute is wrong (nonref)" )
2708 unless ref($p);
2709 return ( $c->[$i],
2710 "_parent attribute is wrong (is $p; should be $this)" )
2711 unless $p eq $this;
2712 }
2713 }
2714 unshift @pile, grep ref($_), @$c;
2715
2716 # queue up more things on the pile stack
2717 }
2718 return; #okay
2719}
2720
2721sub _asserts_fail { # to be run on trusted documents only
2722 my (@pile) = ( $_[0] );
2723 my ( @errors, $this, $id, $assert, $parent, $rv );
2724 while (@pile) {
2725 $this = shift @pile;
2726 if ( defined( $assert = $this->{'assert'} ) ) {
2727 $id = ( $this->{'id'} ||= $this->address )
2728 ; # don't use '0' as an ID, okay?
2729 unless ( ref($assert) ) {
2730
2731 package main;
2732## no critic
2733 $assert = $this->{'assert'} = (
2734 $assert =~ m/\bsub\b/
2735 ? eval($assert)
2736 : eval("sub { $assert\n}")
2737 );
2738## use critic
2739 if ($@) {
2740 push @errors,
2741 [ $this, "assertion at $id broke in eval: $@" ];
2742 $assert = $this->{'assert'} = sub { };
2743 }
2744 }
2745 $parent = $this->{'_parent'};
2746 $rv = undef;
2747 eval {
2748 $rv = $assert->(
2749 $this, $this->{'_tag'}, $this->{'_id'}, # 0,1,2
2750 $parent
2751 ? ( $parent, $parent->{'_tag'}, $parent->{'id'} )
2752 : () # 3,4,5
2753 );
2754 };
2755 if ($@) {
2756 push @errors, [ $this, "assertion at $id died: $@" ];
2757 }
2758 elsif ( !$rv ) {
2759 push @errors, [ $this, "assertion at $id failed" ];
2760 }
2761
2762 # else OK
2763 }
2764 push @pile, grep ref($_), @{ $this->{'_content'} || next };
2765 }
2766 return @errors;
2767}
2768
2769## _valid_name
2770# validate XML style attribute names
2771# http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name
2772
2773sub _valid_name {
2774 my $self = shift;
2775 my $attr = shift
2776 or Carp::croak("sub valid_name requires an attribute name");
2777
2778 return (0) unless ( $attr =~ /^$START_CHAR$NAME_CHAR+$/ );
2779
2780 return (1);
2781}
2782
2783
2784sub element_class {
2785 $_[0]->{_element_class} || __PACKAGE__;
2786}
2787
27881;
2789
2790
27911;
2792
2793__END__