← Index
NYTProf Performance Profile   « line view »
For /usr/local/libexec/sympa/task_manager-debug.pl
  Run on Tue Jun 1 22:32:51 2021
Reported on Tue Jun 1 22:35:13 2021

Filename/usr/local/libexec/sympa/Sympa/HTMLSanitizer.pm
StatementsExecuted 0 statements in 0s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
0000s0sSympa::HTMLSanitizer::::BEGIN@30Sympa::HTMLSanitizer::BEGIN@30
0000s0sSympa::HTMLSanitizer::::BEGIN@31Sympa::HTMLSanitizer::BEGIN@31
0000s0sSympa::HTMLSanitizer::::BEGIN@32Sympa::HTMLSanitizer::BEGIN@32
0000s0sSympa::HTMLSanitizer::::BEGIN@34Sympa::HTMLSanitizer::BEGIN@34
0000s0sSympa::HTMLSanitizer::::BEGIN@35Sympa::HTMLSanitizer::BEGIN@35
0000s0sSympa::HTMLSanitizer::::BEGIN@37Sympa::HTMLSanitizer::BEGIN@37
0000s0sSympa::HTMLSanitizer::::BEGIN@38Sympa::HTMLSanitizer::BEGIN@38
0000s0sSympa::HTMLSanitizer::::BEGIN@39Sympa::HTMLSanitizer::BEGIN@39
0000s0sSympa::HTMLSanitizer::::__ANON__Sympa::HTMLSanitizer::__ANON__ (xsub)
0000s0sSympa::HTMLSanitizer::::newSympa::HTMLSanitizer::new
0000s0sSympa::HTMLSanitizer::::sanitize_htmlSympa::HTMLSanitizer::sanitize_html
0000s0sSympa::HTMLSanitizer::::sanitize_html_fileSympa::HTMLSanitizer::sanitize_html_file
0000s0sSympa::HTMLSanitizer::::sanitize_varSympa::HTMLSanitizer::sanitize_var
0000s0sSympa::HTMLSanitizer::::validate_href_attributeSympa::HTMLSanitizer::validate_href_attribute
0000s0sSympa::HTMLSanitizer::::validate_src_attributeSympa::HTMLSanitizer::validate_src_attribute
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# -*- indent-tabs-mode: nil; -*-
2# vim:ft=perl:et:sw=4
3# $Id$
4
5# Sympa - SYsteme de Multi-Postage Automatique
6#
7# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
8# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
9# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
10# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
11# Copyright 2017 The Sympa Community. See the AUTHORS.md file at the top-level
12# directory of this distribution and at
13# <https://github.com/sympa-community/sympa.git>.
14#
15# This program is free software; you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation; either version 2 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27
28package Sympa::HTMLSanitizer;
29
30use strict;
31use warnings;
32use base qw(HTML::StripScripts::Parser);
33
34use Scalar::Util qw();
35use URI;
36
37use Sympa;
38use Conf;
39use Sympa::Tools::Text;
40
41# Returns a specialized HTML::StripScripts::Parser object built with the
42# parameters provided as arguments.
43sub new {
44 my $class = shift;
45 my $robot_id = shift || '*';
46
47 my $self = $class->SUPER::new(
48 { Context => 'Document',
49 AllowSrc => 1,
50 AllowHref => 1,
51 AllowRelURL => 1,
52 EscapeFiltered => 0,
53 }
54 );
55
56 my @allowed_origins = (
57 Sympa::get_url($robot_id),
58 split /\s*,\s*/,
59 (Conf::get_robot_conf($robot_id, 'allowed_external_origin') || '')
60 );
61 $self->{_shsAllowedOriginRe} = '\A(?:' . join(
62 '|',
63 map {
64 my $uri;
65 unless (defined $_ and length $_) {
66 ;
67 } elsif (m{\A[-+\w]+:}) {
68 $uri = URI->new($_)->canonical;
69 } elsif ($_ =~ m{\A//}) {
70 $uri = URI->new('http:' . $_)->canonical;
71 } else {
72 $uri = URI->new('http://' . $_)->canonical;
73 }
74
75 if ($uri
76 and ($uri->scheme eq 'http' or $uri->scheme eq 'https')) {
77 my $regexp = $uri->authority;
78 # Escape metacharacters except wildcard '*'.
79 $regexp =~
80 s/([^\s\w\x80-\xFF])/($1 eq '*') ? '.*' : "\\$1"/eg;
81
82 ($regexp);
83 } else {
84 ();
85 }
86 } @allowed_origins
87 ) . ')\z';
88
89 return $self;
90}
91
92# Overridden method.
93sub validate_src_attribute {
94 my $self = shift;
95 my $text = shift;
96
97 my $uri = URI->new($text)->canonical;
98 # Allow only cid URLs, local URLs and links with the same origin, i.e.
99 # URLs with the same host etc.
100 return $text if $uri->scheme and $uri->scheme eq 'cid';
101 return $text unless $uri->can('authority') and $uri->authority;
102 return $text if $uri->authority =~ $self->{_shsAllowedOriginRe};
103
104 return undef;
105}
106
107# Overridden method.
108sub validate_href_attribute {
109 goto &validate_src_attribute; # "&" required.
110}
111
112# This method is specific to this subclass.
113sub sanitize_html {
114 my $self = shift;
115 my $string = shift;
116
117 return $self->filter_html($string);
118}
119
120# This method is specific to this subclass.
121sub sanitize_html_file {
122 my $self = shift;
123 my $file = shift;
124
125 $self->parse_file($file);
126 return $self->filtered_document;
127}
128
129## Sanitize all values in the hashref or arrayref $var, starting from $level
130sub sanitize_var {
131 my $self = shift;
132 my $var = shift;
133 my %parameters = @_;
134
135 unless (defined $var) {
136 return undef;
137 }
138 unless (defined $parameters{'htmlAllowedParam'}
139 && $parameters{'htmlToFilter'}) {
140 die sprintf 'Missing var *** %s *** %s *** to ignore',
141 $parameters{'htmlAllowedParam'},
142 $parameters{'htmlToFilter'};
143 }
144 my $level = $parameters{'level'};
145 $level |= 0;
146
147 if (ref $var) {
148 if (ref $var eq 'ARRAY') {
149 foreach my $index (0 .. $#{$var}) {
150 if ( (ref($var->[$index]) eq 'ARRAY')
151 || (ref($var->[$index]) eq 'HASH')) {
152 $self->sanitize_var(
153 $var->[$index],
154 'level' => $level + 1,
155 'htmlAllowedParam' => $parameters{'htmlAllowedParam'},
156 'htmlToFilter' => $parameters{'htmlToFilter'},
157 );
158 } elsif (defined $var->[$index]) {
159 # preserve numeric flags.
160 $var->[$index] =
161 Sympa::Tools::Text::encode_html($var->[$index])
162 unless Scalar::Util::looks_like_number(
163 $var->[$index]);
164 }
165 }
166 } elsif (ref $var eq 'HASH') {
167 foreach my $key (keys %{$var}) {
168 if ( (ref($var->{$key}) eq 'ARRAY')
169 || (ref($var->{$key}) eq 'HASH')) {
170 $self->sanitize_var(
171 $var->{$key},
172 'level' => $level + 1,
173 'htmlAllowedParam' => $parameters{'htmlAllowedParam'},
174 'htmlToFilter' => $parameters{'htmlToFilter'},
175 );
176 } elsif (defined $var->{$key}) {
177 unless ($parameters{'htmlAllowedParam'}{$key}
178 or $parameters{'htmlToFilter'}{$key}) {
179 # preserve numeric flags.
180 $var->{$key} =
181 Sympa::Tools::Text::encode_html($var->{$key})
182 unless Scalar::Util::looks_like_number(
183 $var->{$key});
184 }
185 if ($parameters{'htmlToFilter'}{$key}) {
186 $var->{$key} = $self->sanitize_html($var->{$key});
187 }
188 }
189
190 }
191 }
192 } else {
193 die 'Variable is neither a hash nor an array';
194 }
195 return 1;
196}
197
1981;
199__END__