Skip to content

Commit

Permalink
Merge pull request #840 from dverdin/urlize2 by dverdin
Browse files Browse the repository at this point in the history
Urlize mode bug fixes
  • Loading branch information
ikedas authored Feb 12, 2020
2 parents 1fa6e70 + c3c27da commit a176097
Show file tree
Hide file tree
Showing 13 changed files with 845 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ install:
- cpan-install --coverage
- cpanm --installdeps --notest --with-develop --with-feature=soap --with-feature=ldap .
- cpanm --notest --quiet Unicode::CaseFold
- cpanm --notest --quiet DBD::SQLite

before_script:
- coverage-setup
Expand All @@ -40,6 +41,7 @@ script:
- cd src; make; cd ..
- make check-local TEST_FILES='xt/perltidy.t' || true
- make check-local TEST_FILES='t/compile_executables.t t/compile_modules.t t/Language.t t/compile_scenarios.t t/parse_templates.t t/pod-syntax.t'
- make check-local TEST_FILES='t/Message.t'

after_success:
- coverage-report
Expand Down
61 changes: 45 additions & 16 deletions src/lib/Sympa/Message.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2091,7 +2091,7 @@ sub _urlize_parts {

## Only multipart/mixed messages are modified.
my $eff_type = $entity->effective_type || 'text/plain';
unless ($eff_type eq 'multipart/mixed') {
unless ($eff_type eq 'multipart/mixed' or $eff_type eq 'multipart/alternative' or $eff_type eq 'multipart/related') {
return undef;
}

Expand All @@ -2101,39 +2101,59 @@ sub _urlize_parts {
return undef;
}

## Clean up Message-ID
my $dir1 = Sympa::Tools::Text::escape_chars($message_id);
## Clean up Message-ID and preventing double percent encoding.
my $dir1 = Sympa::Tools::Text::encode_filesystem_safe($message_id);
#XXX$dir1 = '/' . $dir1;
unless (mkdir "$expl/$dir1", 0775) {
$log->syslog('err', 'Unable to create urlized directory %s/%s',
$expl, $dir1);
return 0;
}
return _urlize_sub_parts($entity, $list, $message_id, $dir1, 0);
}

sub _urlize_sub_parts {
my $entity = shift;
my $list = shift;
my $message_id = shift;
my $directory = shift;
my $i = shift;
my @parts = ();
my $i = 0;
use Data::Dumper;
my $parent_eff_type = $entity->effective_type();
foreach my $part ($entity->parts) {
my $p = _urlize_one_part($part->dup, $list, $dir1, $i);
if (defined $p) {
my $eff_type = $part->effective_type || 'text/plain';
if ($eff_type eq 'multipart/mixed') {
$i++;
my $p = _urlize_sub_parts($part->dup, $list, $message_id, $directory, $i);
push @parts, $p;
} elsif (($eff_type eq 'multipart/alternative' or $eff_type eq 'multipart/related') and $i < 2) {
$i++;
my $p = _urlize_sub_parts($part->dup, $list, $message_id, $directory, $i);
push @parts, $p;
} else {
push @parts, $part;
my $p = _urlize_one_part($part->dup, $list, $directory, $i, $parent_eff_type);
if (defined $p) {
push @parts, $p;
$i++;
} else {
push @parts, $part;
}
}
}
if ($i) {
## Replace message parts
$entity->parts(\@parts);
}


$entity->parts(\@parts);
return $entity;
}

sub _urlize_one_part {
my $entity = shift;
my $list = shift;
my $dir = shift;
my $i = shift;
my $entity = shift;
my $list = shift;
my $dir = shift;
my $i = shift;
my $parent_eff_type = shift;

return undef unless ($parent_eff_type eq 'multipart/mixed');

my $expl = $list->{'dir'} . '/urlized';
my $listname = $list->{'name'};
Expand All @@ -2148,6 +2168,15 @@ sub _urlize_one_part {
$filename = Encode::encode_utf8($filename)
if Encode::is_utf8($filename);
} else {
my $content_disposition = lc($entity->head->mime_attr('Content-Disposition') // '');
if ($entity->effective_type =~ m{\Atext}
&& (!$content_disposition
|| $content_disposition eq 'attachment'
)
&& $entity->head->mime_attr('content-type.charset')
){
return undef;
}
my $fileExt = Conf::get_mime_type($entity->effective_type || '')
|| 'bin';
$filename = sprintf 'msg.%d.%s', $i, $fileExt;
Expand Down
243 changes: 243 additions & 0 deletions t/Message.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
#!/usr/bin/perl
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id: tools_data.t 8606 2013-02-06 08:44:02Z rousse $

use strict;
use warnings;
use Data::Dumper;
use English;
use File::Path qw(make_path rmtree);
use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove);

use FindBin qw($Bin);
use lib "$Bin/../src/lib";
use lib 't/stub';

use Test::More;

BEGIN {
use_ok('Sympa::Message');
}

my $tmp_dir = 't/tmp';
my $db_dir = $tmp_dir.'/db';
my $home_dir = $tmp_dir.'/list_data';
my $etc_dir = $tmp_dir.'/etc';
my $test_list_name = 'test';

%Conf::Conf = (
domain => 'lists.example.com', # mandatory
listmaster => 'dude@example.com', # mandatory
lang => 'en-US',
sender_headers => 'From',
tmpdir => $tmp_dir,
db_type => 'SQLite',
db_name => $db_dir.'/message-test-db.sqlite',
update_db_field_types => 'auto',
home => $home_dir,
etc => $etc_dir,
cache_list_config => '',
supported_lang => 'en-US',
filesystem_encoding => 'utf-8',
urlize_min_size => 0,
);

if (-d $tmp_dir) {
rmtree($tmp_dir);
}
make_path($tmp_dir);
make_path($db_dir);
make_path($home_dir);
dircopy('t/data/list_data/', $home_dir);
make_path($etc_dir);

my $log = Sympa::Log->instance;
$log->{log_to_stderr} = 'err';

if (-f $Conf::Conf{db_name}) {
unlink $Conf::Conf{db_name};
}

open my $fileHandle, ">", "$Conf::Conf{db_name}" or die "Can't create '$Conf::Conf{db_name}'\n";
close $fileHandle;

my $sdm = Sympa::DatabaseManager->instance;
Sympa::DatabaseManager::probe_db();

my $list = Sympa::List->new($test_list_name, '*');
$list->_update_list_db;
my $root_url = '/attach/test/';

my @to_urlize = (
{
test_case => 'simple',
filename => 't/samples/urlize-simple.eml',
attachments =>
[
{
name => 'attachment.pdf',
escaped_name => 'attachment.pdf',
},
],
dirname => 'simple@example.com',
escaped_dirname => 'simple%40example.com',
},
{
test_case => 'simple with several attachments',
filename => 't/samples/urlize-simple-mutiple-attachments.eml',
attachments =>
[
{
name => 'attachment.pdf',
escaped_name => 'attachment.pdf',
},
{
name => 'text.txt',
escaped_name => 'text.txt',
},
{
name => 'image.png',
escaped_name => 'image.png',
},
],
dirname => 'simple@example.com',
escaped_dirname => 'simple%40example.com',
},
{
test_case => 'encoding',
filename => 't/samples/urlize-encoding.eml',
attachments =>
[
{
name => 'ございます.pdf',
escaped_name => '%25e3%2581%2594%25e3%2581%259',
},
],
dirname => 'globuz_24_3c_3e_25@example.com',
escaped_dirname => 'globuz_24_3c_3e_25%40example.com',
},
{
test_case => 'nested in multipart/mixed message',
filename => 't/samples/urlize-nested-mixed.eml',
attachments =>
[
{
name => 'Würzburg.txt',
escaped_name => 'W%25c3%25bcrzburg.txt',
},
],
dirname => '3_24@domain.tld',
escaped_dirname => '3_24%40domain.tld',
},
{
test_case => 'nested in multipart/alternative message',
filename => 't/samples/urlize-nested-alternative.eml',
attachments =>
[
{
name => 'globuz.pdf',
escaped_name => 'globuz.pdf',
},
],
dirname => '4_24@domain.tld',
escaped_dirname => '4_24%40domain.tld',
},
{
test_case => 'Deep nested message',
filename => 't/samples/urlize-deep-nested-mixed.eml',
attachments =>
[
{
name => 'Würzburg.txt',
escaped_name => 'W%25c3%25bcrzburg.txt',
},
{
name => 'msg.3.bin',
escaped_name => 'msg.3.bin',
},
],
dirname => 'deep-nested@domain.tld',
escaped_dirname => 'deep-nested%40domain.tld',
},
{
test_case => 'Related/alternative nested message',
filename => 't/samples/urlize-nested-alternative-and-related.eml',
attachments =>
[
{
name => 'document.pdf',
escaped_name => 'document.pdf',
},
],
dirname => 'alt-nested@domain.tld',
escaped_dirname => 'alt-nested%40domain.tld',
},
);



foreach my $test_file (@to_urlize) {
my $to_urlize_file = $test_file->{filename};
my $lock_fh = Sympa::LockedFile->new($to_urlize_file, -1, '+<');
my $to_urlize_string = do { local $RS; <$lock_fh> };
my $to_urlize = Sympa::Message->new($to_urlize_string);

my $parser = MIME::Parser->new;
$parser->extract_nested_messages(0);
$parser->extract_uuencode(1);
$parser->output_to_core(1);
$parser->tmp_dir($Conf::Conf{'tmpdir'});

my $msg_string = $to_urlize->as_string;
$msg_string =~ s/\AReturn-Path: (.*?)\n(?![ \t])//s;
my $entity = $parser->parse_data($msg_string);

my $new_entity = Sympa::Message::_urlize_parts($entity, $list, $to_urlize->{'message_id'});

### Preparation done. Actual testing starts here.

my $urlized_directory;
opendir my $dh, $home_dir.'/'.$test_list_name.'/urlized/';
foreach my $file (readdir $dh) {
next if $file =~ m{\A\.+\Z};
$urlized_directory = $file; last;
}
closedir $dh;

is($urlized_directory, $test_file->{dirname}, 'Test case: '.$test_file->{test_case}.' - Directory where urlized parts are stored correctly escaped.');

ok(! -f $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory.'/msg.0.bin', 'Test case: '.$test_file->{test_case}.' - The text of the message has not been converted to binary attachment.') ;

ok(! -f $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory.'/msg.0.txt', 'Test case: '.$test_file->{test_case}.' - The text of the message has not been converted to text attachment.') ;

my @expected_files;
foreach my $file (@{$test_file->{attachments}}) {
ok( -f "$home_dir/$test_list_name/urlized/$urlized_directory/$file->{name}", 'Test case: '.$test_file->{test_case}.' - The attachment '.$file->{name}.' has been stored on the filesystem.') ;
if (-f "$home_dir/$test_list_name/urlized/$urlized_directory/$file->{name}") {
push @expected_files, $file->{name};
}
my $found_url_to_attachment = 0;
foreach my $line (split '\n', $new_entity->as_string()) {
my $line_to_match = $root_url.$test_file->{escaped_dirname}.'/'.$file->{escaped_name};
if ($line =~ m{$line_to_match}) {
$found_url_to_attachment = 1;
last;
}
}

is( $found_url_to_attachment, 1, 'Test case: '.$test_file->{test_case}.' - The attachment '.$file->{name}.' stored on the filesystem has an URL to retrieve it in the new message.');
}
my @found_files;
opendir my $dh2, "$home_dir/$test_list_name/urlized/$urlized_directory/";
foreach my $file (readdir $dh2) {
next if $file =~ m{\A\.+\Z};
push @found_files, $file;
}
closedir $dh2;
my $total_expected_files = $#expected_files+1;
is($#found_files, $#expected_files, 'Test case: '.$test_file->{test_case}.' - Found the urlized attachments (total: '.$total_expected_files.') and only them.');
rmtree $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory;
}
rmtree $tmp_dir;
done_testing();
Loading

0 comments on commit a176097

Please sign in to comment.