diff --git a/benchmarks/body-parser.pl b/benchmarks/body-parser.pl new file mode 100644 index 000000000..01bcfb19c --- /dev/null +++ b/benchmarks/body-parser.pl @@ -0,0 +1,66 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; +use 5.010000; +use autodie; + +use Plack::Request; +use Plack::BodyParser::UrlEncoded; +use Plack::BodyParser::JSON; +use Plack::BodyParser::MultiPart; +use Plack::BodyParser::OctetStream; + +use Benchmark ':all'; + +my $content = 'xxx=hogehoge&yyy=aaaaaaaaaaaaaaaaaaaaa'; + +my $body_parser = sub { + open my $input, '<', \$content; + my $req = Plack::Request->new( + +{ + 'psgi.input' => $input, + CONTENT_LENGTH => length($content), + CONTENT_TYPE => 'application/x-www-form-urlencoded', + }, + parse_request_body => \&parse_request_body, + ); + $req->body_parameters; +}; +my $orig = sub { + open my $input, '<', \$content; + my $req = Plack::Request->new( + +{ + 'psgi.input' => $input, + CONTENT_LENGTH => length($content), + CONTENT_TYPE => 'application/x-www-form-urlencoded', + }, + ); + $req->body_parameters; +}; +use Data::Dumper; warn Dumper($orig->()); +use Data::Dumper; warn Dumper($body_parser->()); + +cmpthese( + -1, { + orig => $orig, + body_parser => $body_parser, + }, +); + +sub parse_request_body { + my $req = shift; + my $content_type = $req->content_type; + + my $parser = + $content_type =~ m{\Aapplication/json} + ? Plack::BodyParser::JSON->new() + : $content_type =~ m{\Aapplication/x-www-form-urlencoded} + ? Plack::BodyParser::UrlEncoded->new() + : $content_type =~ m{\Amultipart/form-data} + ? Plack::BodyParser::MultiPart->new($req->env) + : Plack::BodyParser::OctetStream->new() + ; + Plack::BodyParser->parse($req->env, $parser); +} + diff --git a/cpanfile b/cpanfile index 9dca1adad..1e44710d2 100644 --- a/cpanfile +++ b/cpanfile @@ -4,7 +4,6 @@ requires 'Devel::StackTrace', '1.23'; requires 'Devel::StackTrace::AsHTML', '0.11'; requires 'File::ShareDir', '1.00'; requires 'Filesys::Notify::Simple'; -requires 'HTTP::Body', '1.06'; requires 'HTTP::Message', '5.814'; requires 'Hash::MultiValue', '0.05'; requires 'Pod::Usage', '1.36'; @@ -15,6 +14,9 @@ requires 'URI', '1.59'; requires 'parent'; requires 'Apache::LogFormat::Compiler', '0.12'; requires 'HTTP::Tiny', 0.034; +requires 'URL::Encode'; +requires 'HTTP::MultiPartParser'; +requires 'JSON', 2; on test => sub { requires 'Test::More', '0.88'; diff --git a/lib/Plack/BodyParser.pm b/lib/Plack/BodyParser.pm new file mode 100644 index 000000000..05ac4b2fa --- /dev/null +++ b/lib/Plack/BodyParser.pm @@ -0,0 +1,91 @@ +package Plack::BodyParser; +use strict; +use warnings; +use utf8; +use 5.008_001; +use Hash::MultiValue; +use Stream::Buffered; + +use Plack::BodyParser::OctetStream; + +sub new { + my $class = shift; + bless { handlers => [] }, $class; +} + +sub register { + my ($self, $content_type, $klass, $opts) = @_; + push @{$self->{handlers}}, [$content_type, $klass, $opts]; +} + +sub get_parser { + my ($self, $env) = @_; + + if (defined $env->{CONTENT_TYPE}) { + for my $handler (@{$self->{handlers}}) { + if (index($env->{CONTENT_TYPE}, $handler->[0]) == 0) { + return $handler->[1]->new($env, $handler->[2]); + } + } + } + return Plack::BodyParser::OctetStream->new(); +} + +sub parse { + my ($self, $env) = @_; + + my $parser = $self->get_parser($env); + + my $ct = $env->{CONTENT_TYPE}; + my $cl = $env->{CONTENT_LENGTH}; + if (!$ct && !$cl) { + # No Content-Type nor Content-Length -> GET/HEAD + $env->{'plack.request.body'} = Hash::MultiValue->new; + $env->{'plack.request.upload'} = Hash::MultiValue->new; + return; + } + + my $input = $env->{'psgi.input'}; + + my $buffer; + if ($env->{'psgix.input.buffered'}) { + # Just in case if input is read by middleware/apps beforehand + $input->seek(0, 0); + } else { + $buffer = Stream::Buffered->new($cl); + } + + my $spin = 0; + while ($cl) { + $input->read(my $chunk, $cl < 8192 ? $cl : 8192); + my $read = length $chunk; + $cl -= $read; + $parser->add($chunk); + $buffer->print($chunk) if $buffer; + + if ($read == 0 && $spin++ > 2000) { + Carp::croak "Bad Content-Length: maybe client disconnect? ($cl bytes remaining)"; + } + } + + if ($buffer) { + $env->{'psgix.input.buffered'} = 1; + $env->{'psgi.input'} = $buffer->rewind; + } else { + $input->seek(0, 0); + } + + ($env->{'plack.request.body'}, $env->{'plack.request.upload'}) + = $parser->finalize(); + + return 1; +} + + +1; +__END__ + +=head1 NAME + +Plack::BodyParser - HTTP request body parser + diff --git a/lib/Plack/BodyParser/JSON.pm b/lib/Plack/BodyParser/JSON.pm new file mode 100644 index 000000000..b0d9076db --- /dev/null +++ b/lib/Plack/BodyParser/JSON.pm @@ -0,0 +1,39 @@ +package Plack::BodyParser::JSON; +use strict; +use warnings; +use utf8; +use JSON (); +use Encode qw(encode_utf8); +use Hash::MultiValue; + +sub new { + my $class = shift; + bless {buffer => ''}, $class; +} + +sub add { + my $self = shift; + $self->{buffer} .= $_[0] if defined $_[0]; +} + +sub finalize { + my $self = shift; + + my $p = JSON::decode_json($self->{buffer}); + my $params = Hash::MultiValue->new(); + if (ref $p eq 'HASH') { + while (my ($k, $v) = each %$p) { + if (ref $v eq 'ARRAY') { + for (@$v) { + $params->add(encode_utf8($k), encode_utf8($_)); + } + } else { + $params->add(encode_utf8($k), encode_utf8($v)); + } + } + } + return ($params, Hash::MultiValue->new()); +} + +1; + diff --git a/lib/Plack/BodyParser/MultiPart.pm b/lib/Plack/BodyParser/MultiPart.pm new file mode 100644 index 000000000..0515adab8 --- /dev/null +++ b/lib/Plack/BodyParser/MultiPart.pm @@ -0,0 +1,133 @@ +package Plack::BodyParser::MultiPart; +use strict; +use warnings; +use utf8; +use 5.010_001; +use HTTP::MultiPartParser; +use HTTP::Headers::Util qw[split_header_words]; +use File::Temp; +use Hash::MultiValue; +use Carp (); +use Plack::Request::Upload; +use HTTP::Headers; + +sub new { + my ($class, $env, $opts) = @_; + + my $self = bless { }, $class; + + my $uploads = Hash::MultiValue->new(); + my $params = Hash::MultiValue->new(); + + unless (defined $env->{CONTENT_TYPE}) { + Carp::croak("Missing CONTENT_TYPE in PSGI env"); + } + unless ( $env->{CONTENT_TYPE} =~ /boundary=\"?([^\";]+)\"?/ ) { + Carp::croak("Invalid boundary in content_type: $env->{CONTENT_TYPE}"); + } + my $boundary = $1; + + my $part; + my $parser = HTTP::MultiPartParser->new( + boundary => $boundary, + on_header => sub { + my ($headers) = @_; + + my $disposition; + foreach (@$headers) { + if (/\A Content-Disposition: [\x09\x20]* (.*)/xi) { + $disposition = $1; + last; + } + } + + (defined $disposition) + or die q/Content-Disposition header is missing in part/; + + my ($p) = split_header_words($disposition); + + ($p->[0] eq 'form-data') + or die q/Disposition type is not form-data/; + + my ($name, $filename); + for(my $i = 2; $i < @$p; $i += 2) { + if ($p->[$i] eq 'name') { $name = $p->[$i + 1] } + elsif ($p->[$i] eq 'filename') { $filename = $p->[$i + 1] } + } + + (defined $name) + or die q/Parameter 'name' is missing from Content-Disposition header/; + + $part = { + name => $name, + headers => $headers, + }; + + if (defined $filename) { + $part->{filename} = $filename; + + if (length $filename) { + my $fh = File::Temp->new(UNLINK => 1); + $part->{fh} = $fh; + $part->{tempname} = $fh->filename; + + # Save temporary files to $env. + # Temporary files will remove after the request. + push @{$env->{'plack.bodyparser.multipart.filehandles'}}, $part->{fh}; + } + } + }, + on_body => sub { + my ($chunk, $final) = @_; + + my $fh = $part->{fh}; + + if ($fh) { + print $fh $chunk + or die qq/Could not write to file handle: '$!'/; + if ($final) { + seek($fh, 0, SEEK_SET) + or die qq/Could not rewind file handle: '$!'/; + + my $headers = HTTP::Headers->new( + map { split(/\s*:\s*/, $_, 2) } + @{$part->{headers}} + ); + $uploads->add($part->{name}, Plack::Request::Upload->new( + headers => $headers, + size => -s $part->{fh}, + filename => $part->{filename}, + tempname => $part->{tempname}, + )); + } + } else { + $part->{data} .= $chunk; + if ($final) { + $params->add($part->{name}, $part->{data}); + } + } + }, + $opts->{on_error} ? (on_error => $opts->{on_error}) : (), + ); + + $self->{parser} = $parser; + $self->{params} = $params; + $self->{uploads} = $uploads; + + return $self; +} + +sub add { + my $self = shift; + $self->{parser}->parse($_[0]) if defined $_[0]; +} + +sub finalize { + my $self = shift; + $self->{parser}->finish(); + + return ($self->{params}, $self->{uploads}); +} + +1; + diff --git a/lib/Plack/BodyParser/OctetStream.pm b/lib/Plack/BodyParser/OctetStream.pm new file mode 100644 index 000000000..d35b4dc3a --- /dev/null +++ b/lib/Plack/BodyParser/OctetStream.pm @@ -0,0 +1,22 @@ +package Plack::BodyParser::OctetStream; +use strict; +use warnings; +use utf8; +use 5.008_001; + +sub new { + my $class = shift; + bless {}, $class; +} + +sub add { } + +sub finalize { + return ( + Hash::MultiValue->new(), + Hash::MultiValue->new() + ); +} + +1; + diff --git a/lib/Plack/BodyParser/UrlEncoded.pm b/lib/Plack/BodyParser/UrlEncoded.pm new file mode 100644 index 000000000..3e48d011e --- /dev/null +++ b/lib/Plack/BodyParser/UrlEncoded.pm @@ -0,0 +1,55 @@ +package Plack::BodyParser::UrlEncoded; +use strict; +use warnings; +use utf8; +use 5.010_001; +use URL::Encode; +use Hash::MultiValue; + +sub new { + my $class = shift; + bless { buffer => '' }, $class; +} + +sub add { + my $self = shift; + if (defined $_[0]) { + $self->{buffer} .= $_[0]; + } +} + +sub finalize { + my $self = shift; + + my $p = URL::Encode::url_params_flat($self->{buffer}); + return (Hash::MultiValue->new(@$p), Hash::MultiValue->new()); +} + +1; +__END__ + +=head1 NAME + +Plack::BodyParser::UrlEncoded - application/x-www-form-urlencoded + +=head1 SYNOPSIS + + use Plack::Request; + use Plack::BodyParser; + use Plack::BodyParser::UrlEncoded; + + my $req = Plack::Request->new( + $env, + parse_request_body => sub { + my $self = shift; + if ($self->env->{CONTENT_TYPE} =~ m{\Aapplication/x-www-form-urlencoded}) { + my $parser = Plack::BodyParser::UrlEncoded->new(); + Plack::BodyParser->parse($self->env, $parser); + } + } + ); + +=head1 DESCRIPTION + +This is a HTTP body parser class for application/x-www-form-urlencoded. + diff --git a/lib/Plack/Request.pm b/lib/Plack/Request.pm index 275e44020..48a01cf21 100644 --- a/lib/Plack/Request.pm +++ b/lib/Plack/Request.pm @@ -7,19 +7,47 @@ our $VERSION = '1.0029'; use HTTP::Headers; use Carp (); use Hash::MultiValue; -use HTTP::Body; use Plack::Request::Upload; +use Plack::BodyParser; +use Plack::BodyParser::UrlEncoded; +use Plack::BodyParser::MultiPart; use Stream::Buffered; use URI; use URI::Escape (); sub new { - my($class, $env) = @_; + my($class, $env, %opts) = @_; Carp::croak(q{$env is required}) unless defined $env && ref($env) eq 'HASH'; - bless { env => $env }, $class; + bless { + env => $env, + ($opts{request_body_parser} ? (request_body_parser => $opts{request_body_parser}) : ()), + }, $class; +} + +sub request_body_parser { + my $self = shift; + unless (exists $self->{request_body_parser}) { + $self->{request_body_parser} = $self->_build_request_body_parser(); + } + return $self->{request_body_parser}; +} + +sub _build_request_body_parser { + my $self = shift; + + my $parser = Plack::BodyParser->new(); + $parser->register( + 'application/x-www-form-urlencoded', + 'Plack::BodyParser::UrlEncoded' + ); + $parser->register( + 'multipart/form-data', + 'Plack::BodyParser::MultiPart' + ); + $parser; } sub env { $_[0]->{env} } @@ -245,74 +273,7 @@ sub new_response { sub _parse_request_body { my $self = shift; - - my $ct = $self->env->{CONTENT_TYPE}; - my $cl = $self->env->{CONTENT_LENGTH}; - if (!$ct && !$cl) { - # No Content-Type nor Content-Length -> GET/HEAD - $self->env->{'plack.request.body'} = Hash::MultiValue->new; - $self->env->{'plack.request.upload'} = Hash::MultiValue->new; - return; - } - - my $body = HTTP::Body->new($ct, $cl); - - # HTTP::Body will create temporary files in case there was an - # upload. Those temporary files can be cleaned up by telling - # HTTP::Body to do so. It will run the cleanup when the request - # env is destroyed. That the object will not go out of scope by - # the end of this sub we will store a reference here. - $self->env->{'plack.request.http.body'} = $body; - $body->cleanup(1); - - my $input = $self->input; - - my $buffer; - if ($self->env->{'psgix.input.buffered'}) { - # Just in case if input is read by middleware/apps beforehand - $input->seek(0, 0); - } else { - $buffer = Stream::Buffered->new($cl); - } - - my $spin = 0; - while ($cl) { - $input->read(my $chunk, $cl < 8192 ? $cl : 8192); - my $read = length $chunk; - $cl -= $read; - $body->add($chunk); - $buffer->print($chunk) if $buffer; - - if ($read == 0 && $spin++ > 2000) { - Carp::croak "Bad Content-Length: maybe client disconnect? ($cl bytes remaining)"; - } - } - - if ($buffer) { - $self->env->{'psgix.input.buffered'} = 1; - $self->env->{'psgi.input'} = $buffer->rewind; - } else { - $input->seek(0, 0); - } - - $self->env->{'plack.request.body'} = Hash::MultiValue->from_mixed($body->param); - - my @uploads = Hash::MultiValue->from_mixed($body->upload)->flatten; - my @obj; - while (my($k, $v) = splice @uploads, 0, 2) { - push @obj, $k, $self->_make_upload($v); - } - - $self->env->{'plack.request.upload'} = Hash::MultiValue->new(@obj); - - 1; -} - -sub _make_upload { - my($self, $upload) = @_; - my %copy = %$upload; - $copy{headers} = HTTP::Headers->new(%{$upload->{headers}}); - Plack::Request::Upload->new(%copy); + return $self->request_body_parser->parse($self->env); } 1; diff --git a/t/Plack-BodyParser/json.t b/t/Plack-BodyParser/json.t new file mode 100644 index 000000000..440cf652a --- /dev/null +++ b/t/Plack-BodyParser/json.t @@ -0,0 +1,25 @@ +use strict; +use warnings; +use Test::More; + +use Hash::MultiValue; +use Plack::BodyParser::JSON; + +my $parser = Plack::BodyParser::JSON->new(); +$parser->add('{'); +$parser->add('"hoge":["fuga","hige"],'); +$parser->add('"\u306b\u307b\u3093\u3054":"\u65e5\u672c\u8a9e",'); +$parser->add('"moge":"muga"'); +$parser->add('}'); + +my ($params, $uploads) = $parser->finalize(); +is_deeply $params->as_hashref_multi, + +{ + 'hoge' => [ 'fuga', 'hige' ], + 'moge' => ['muga'], + 'にほんご' => ['日本語'], + }; +is_deeply [$uploads->keys], []; + +done_testing; + diff --git a/t/Plack-BodyParser/multipart.t b/t/Plack-BodyParser/multipart.t new file mode 100644 index 000000000..30b5f436d --- /dev/null +++ b/t/Plack-BodyParser/multipart.t @@ -0,0 +1,99 @@ +use strict; +use warnings; +use utf8; +use Test::More; +use Plack::BodyParser::MultiPart; + +my $content = qq{------BOUNDARY +Content-Disposition: form-data; name="hoge" +Content-Type: text/plain + +fuga +------BOUNDARY +Content-Disposition: form-data; name="hoge" +Content-Type: text/plain + +hige +------BOUNDARY +Content-Disposition: form-data; name="nobuko" +Content-Type: text/plain + +iwaki +------BOUNDARY +Content-Disposition: form-data; name="test_upload_file"; filename="yappo.txt" +Content-Type: text/plain + +SHOGUN +------BOUNDARY +Content-Disposition: form-data; name="test_upload_file"; filename="yappo2.txt" +Content-Type: text/plain + +SHOGUN2 +------BOUNDARY +Content-Disposition: form-data; name="test_upload_file3"; filename="yappo3.txt" +Content-Type: text/plain + +SHOGUN3 +------BOUNDARY +Content-Disposition: form-data; name="test_upload_file4"; filename="yappo4.txt" +Content-Type: text/plain + +SHOGUN4 +------BOUNDARY +Content-Disposition: form-data; name="test_upload_file4"; filename="yappo5.txt" +Content-Type: text/plain + +SHOGUN4 +------BOUNDARY +Content-Disposition: form-data; name="test_upload_file6"; filename="yappo6.txt" +Foo: bar + baz +X: Y:Z +Content-Type: text/plain + +SHOGUN6 +------BOUNDARY-- +}; +$content =~ s/\r\n/\n/g; +$content =~ s/\n/\r\n/g; + +my $env = { + CONTENT_LENGTH => length($content), + CONTENT_TYPE => 'multipart/form-data; boundary=----BOUNDARY', +}; + +# read from file. +my $parser = Plack::BodyParser::MultiPart->new($env); +$parser->add($_) for split //, $content; +my ($params, $uploads) = $parser->finalize(); + +is_deeply $params->as_hashref_multi, { + hoge => ['fuga', 'hige'], + nobuko => ['iwaki'], +}; +my @test_upload_file = $uploads->get_all('test_upload_file'); +is 0+@test_upload_file, 2; +is slurp($test_upload_file[0]), 'SHOGUN'; +is slurp($test_upload_file[1]), 'SHOGUN2'; + +{ + my $test_upload_file3 = $uploads->{'test_upload_file3'}; + is slurp($test_upload_file3), 'SHOGUN3'; + + my @test_upload_file6 = $uploads->{'test_upload_file6'}; + is slurp($test_upload_file6[0]), 'SHOGUN6'; + isa_ok $test_upload_file6[0]->headers, 'HTTP::Headers'; + is $test_upload_file6[0]->headers->header('Content-Type'), 'text/plain'; + is $test_upload_file6[0]->content_type, 'text/plain'; + is $test_upload_file6[0]->headers->header('X'), 'Y:Z'; + is $test_upload_file6[0]->headers->header('Foo'), 'bar baz'; +} + +done_testing; + +sub slurp { + my $up = shift; + open my $fh, "<", $up->path or die "$!"; + scalar do { local $/; <$fh> }; +} + diff --git a/t/Plack-BodyParser/url_encoded.t b/t/Plack-BodyParser/url_encoded.t new file mode 100644 index 000000000..d61f5552b --- /dev/null +++ b/t/Plack-BodyParser/url_encoded.t @@ -0,0 +1,15 @@ +use strict; +use warnings; +use utf8; +use Test::More 0.96; +use Hash::MultiValue; +use Plack::BodyParser::UrlEncoded; + +my $parser = Plack::BodyParser::UrlEncoded->new(); +$parser->add('oo=xx%20yy'); +my ($params, $uploads) = $parser->finalize(); +is_deeply $params, Hash::MultiValue->new('oo' => 'xx yy'); +is_deeply $uploads, Hash::MultiValue->new(); + +done_testing; + diff --git a/t/Plack-Request/request_body_parser.t b/t/Plack-Request/request_body_parser.t new file mode 100644 index 000000000..1fe2c09ab --- /dev/null +++ b/t/Plack-Request/request_body_parser.t @@ -0,0 +1,86 @@ +use strict; +use warnings; +use utf8; +use Test::More; + +use Plack::Request; +use Plack::BodyParser::UrlEncoded; +use Plack::BodyParser::JSON; +use Plack::BodyParser::MultiPart; +use Plack::BodyParser::OctetStream; + +subtest 'JSON' => sub { + is_deeply make_request('application/json', '{"hoge":"fuga"}')->parameters()->as_hashref_multi, { + hoge => ['fuga'], + }; +}; + +subtest 'UrlEncoded' => sub { + is_deeply make_request('application/x-www-form-urlencoded', 'xxx=yyy')->parameters()->as_hashref_multi, { + xxx => ['yyy'], + }; +}; + +subtest 'MultiPart' => sub { + my $content = <<'...'; +--BOUNDARY +Content-Disposition: form-data; name="xxx" +Content-Type: text/plain + +yyy +--BOUNDARY +Content-Disposition: form-data; name="yappo"; filename="osawa.txt" +Content-Type: text/plain + +SHOGUN +--BOUNDARY-- +... + $content =~ s/\r\n/\n/g; + $content =~ s/\n/\r\n/g; + + my $req = make_request('multipart/form-data; boundary=BOUNDARY', $content); + is_deeply $req->parameters()->as_hashref_multi, { + xxx => ['yyy'], + }; + + is slurp($req->upload('yappo')), 'SHOGUN'; + is $req->upload('yappo')->filename, 'osawa.txt'; + isa_ok $req->upload('yappo')->headers, 'HTTP::Headers'; +}; + +subtest 'OctetStream' => sub { + my $content = 'hogehoge'; + my $req = make_request('application/octet-stream', $content); + is $req->content, 'hogehoge'; + is 0+($req->parameters->keys), 0; + is 0+($req->uploads->keys), 0; +}; + +done_testing; + +sub make_request { + my ($content_type, $content) = @_; + + my $parser = Plack::BodyParser->new(); + $parser->register('application/json', 'Plack::BodyParser::JSON'); + $parser->register('application/x-www-form-urlencoded', 'Plack::BodyParser::UrlEncoded'); + $parser->register('multipart/form-data', 'Plack::BodyParser::MultiPart'); + + open my $input, '<', \$content; + my $req = Plack::Request->new( + +{ + 'psgi.input' => $input, + CONTENT_TYPE => $content_type, + CONTENT_LENGTH => length($content), + }, + request_body_parser => $parser, + ); + return $req; +} + +sub slurp { + my $up = shift; + open my $fh, "<", $up->path or die "$!"; + scalar do { local $/; <$fh> }; +} +