Skip to content

Commit

Permalink
packetbeat: Add support for HTTP encodings (#7915)
Browse files Browse the repository at this point in the history
This patch adds support for decoding HTTP bodies based on the
`Content-Encoding` and `Transfer-Encoding` headers. Supported decoders
are `gzip` and `deflate`.

Multiple encoders are supported, for example:
```
Content-Encoding: gzip
Transfer-Encoding: chunked
```

Or the rarely used but allowed by the standard:

```
Transfer-Encoding: deflate, gzip
```

The difference between `Transfer-Encoding` and `Content-Encoding` is not
relevant to packetbeat, so both are treated the same, with the exception
that `chunked` can only appear as the last or only element of
`Transfer-Encoding`.

To avoid decompression bombs, the value specified in `http.max_message_size`
(default 10MB) is honored when decoding.

A new configuration option, `decode_body` (default true), has been added
to the http protocol, allowing to disable this feature.
  • Loading branch information
adriansr authored and andrewkroh committed Aug 13, 2018
1 parent bcb0531 commit 03f7e87
Show file tree
Hide file tree
Showing 9 changed files with 436 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ https://github.com/elastic/beats/compare/v6.4.0...master[Check the HEAD diff]
*Packetbeat*

- Added DHCP protocol support. {pull}7647[7647]
- Add support to decode HTTP bodies compressed with `gzip` and `deflate`. {pull}7915[7915]

*Winlogbeat*

Expand Down
4 changes: 4 additions & 0 deletions packetbeat/_meta/beat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ packetbeat.protocols:
# response payload.
#include_response_body_for: []

# Whether the body of a request must be decoded when a content-encoding
# or transfer-encoding has been applied.
#decode_body: true

# If the Cookie or Set-Cookie headers are sent, this option controls whether
# they are split into individual values.
#split_cookie: false
Expand Down
9 changes: 8 additions & 1 deletion packetbeat/docs/packetbeat-options.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,14 @@ packetbeat.protocols:
include_body_for: ["text/html"]
------------------------------------------------------------------------------


===== `decode_body`

A boolean flag that controls decoding of HTTP payload. It interprets the
`Content-Encoding` and `Transfer-Encoding` headers and uncompresses the entity
body. Supported encodings are `gzip` and `deflate`. This option is only
applicable in the cases where the HTTP payload is exported, that is, when
one of the `include_*_body_for` options is specified or a POST request
contains url-encoded parameters.

===== `split_cookie`

Expand Down
4 changes: 4 additions & 0 deletions packetbeat/packetbeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ packetbeat.protocols:
# response payload.
#include_response_body_for: []

# Whether the body of a request must be decoded when a content-encoding
# or transfer-encoding has been applied.
#decode_body: true

# If the Cookie or Set-Cookie headers are sent, this option controls whether
# they are split into individual values.
#split_cookie: false
Expand Down
2 changes: 2 additions & 0 deletions packetbeat/protos/http/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type httpConfig struct {
HideKeywords []string `config:"hide_keywords"`
RedactAuthorization bool `config:"redact_authorization"`
MaxMessageSize int `config:"max_message_size"`
DecodeBody bool `config:"decode_body"`
}

var (
Expand All @@ -43,5 +44,6 @@ var (
TransactionTimeout: protos.DefaultTransactionExpiration,
},
MaxMessageSize: tcp.TCPMaxDataInStream,
DecodeBody: true,
}
)
110 changes: 110 additions & 0 deletions packetbeat/protos/http/decode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package http

import (
"bytes"
"compress/flate"
"compress/gzip"
"io"

"github.com/pkg/errors"
)

var (
decoders = map[string]func(io.Reader) (io.ReadCloser, error){
"gzip": decodeGZIP,
"x-gzip": decodeGZIP,
"deflate": decodeDeflate,
"x-deflate": decodeDeflate,

// Not really expected, withdrawn by RFC
"identity": decodeIdentity,

// Couldn't find an implementation of `compress` nor a server/library
// that supports it. Seems long dead.
// "compress": nil,
// "x-compress": nil,
}

// ErrNoDecoder is returned when an unknown content-encoding is used.
ErrNoDecoder = errors.New("decoder not found")

// ErrSizeLimited is returned when
ErrSizeLimited = errors.New("body truncated due to size limitation")
)

func decodeHTTPBody(data []byte, format string, maxSize int) ([]byte, error) {
decoder, found := decoders[format]
if !found {
return nil, ErrNoDecoder
}
reader, err := decoder(bytes.NewReader(data))
if err != nil {
return nil, err
}
defer reader.Close()
return readMax(reader, maxSize)
}

func decodeGZIP(reader io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(reader)
}

func decodeDeflate(reader io.Reader) (io.ReadCloser, error) {
return flate.NewReader(reader), nil
}

type closeDecorator struct {
io.Reader
}

func (closeDecorator) Close() error {
return nil
}

func decodeIdentity(reader io.Reader) (io.ReadCloser, error) {
return closeDecorator{reader}, nil
}

func readMax(reader io.Reader, maxSize int) (result []byte, err error) {
const minSize = 512
for used := 0; ; {
if len(result)-used < minSize {
grow := len(result) >> 1
if grow < minSize {
grow = minSize
}
result = append(result, make([]byte, grow)...)
}
n, err := reader.Read(result[used:])
if n > 0 {
used += n
if used > maxSize {
used = maxSize
err = ErrSizeLimited
}
}
if err != nil {
if err == io.EOF {
err = nil
}
return result[:used], err
}
}
}
40 changes: 40 additions & 0 deletions packetbeat/protos/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
"strings"
"time"

"github.com/pkg/errors"

"github.com/elastic/beats/libbeat/beat"
"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/logp"
Expand Down Expand Up @@ -84,6 +86,7 @@ type httpPlugin struct {
hideKeywords []string
redactAuthorization bool
maxMessageSize int
mustDecodeBody bool

parserConfig parserConfig

Expand Down Expand Up @@ -139,6 +142,8 @@ func (http *httpPlugin) setFromConfig(config *httpConfig) {
http.splitCookie = config.SplitCookie
http.parserConfig.realIPHeader = strings.ToLower(config.RealIPHeader)
http.transactionTimeout = config.TransactionTimeout
http.mustDecodeBody = config.DecodeBody

for _, list := range [][]string{config.IncludeBodyFor, config.IncludeRequestBodyFor} {
http.parserConfig.includeRequestBodyFor = append(http.parserConfig.includeRequestBodyFor, list...)
}
Expand Down Expand Up @@ -500,6 +505,8 @@ func (http *httpPlugin) newTransaction(requ, resp *message) beat.Event {
var timestamp time.Time

if requ != nil {
// Body must be decoded before extractParameters
http.decodeBody(requ)
path, params, err := http.extractParameters(requ)
if err != nil {
logp.Warn("Fail to parse HTTP parameters: %v", err)
Expand Down Expand Up @@ -533,6 +540,7 @@ func (http *httpPlugin) newTransaction(requ, resp *message) beat.Event {
}

if resp != nil {
http.decodeBody(resp)
httpDetails["response"] = common.MapStr{
"code": resp.statusCode,
"phrase": resp.statusPhrase,
Expand Down Expand Up @@ -625,6 +633,38 @@ func (http *httpPlugin) setBody(result common.MapStr, m *message) {
}
}

func (http *httpPlugin) decodeBody(m *message) {
if m.saveBody && len(m.body) > 0 {
if http.mustDecodeBody && len(m.encodings) > 0 {
var err error
m.body, err = decodeBody(m.body, m.encodings, http.maxMessageSize)
if err != nil {
// Body can contain partial data
m.notes = append(m.notes, err.Error())
}
}
}
}

func decodeBody(body []byte, encodings []string, maxSize int) (result []byte, err error) {
if isDebug {
debugf("decoding body with encodings=%v", encodings)
}
for idx := len(encodings) - 1; idx >= 0; idx-- {
format := encodings[idx]
body, err = decodeHTTPBody(body, format, maxSize)
if err != nil {
// Do not output a partial body unless failure occurs on the
// last decoder.
if idx != 0 {
body = nil
}
return body, errors.Wrapf(err, "unable to decode body using %s encoding", format)
}
}
return body, nil
}

func splitCookiesHeader(headerVal string) map[string]string {
cookies := map[string]string{}

Expand Down
43 changes: 35 additions & 8 deletions packetbeat/protos/http/http_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ type message struct {
realIP common.NetString

// Http Headers
contentLength int
contentType common.NetString
transferEncoding common.NetString
isChunked bool
headers map[string]common.NetString
size uint64
contentLength int
contentType common.NetString
encodings []string
isChunked bool
headers map[string]common.NetString
size uint64

rawHeaders []byte

Expand Down Expand Up @@ -94,7 +94,7 @@ type parserConfig struct {
}

var (
transferEncodingChunked = []byte("chunked")
transferEncodingChunked = "chunked"

constCRLF = []byte("\r\n")

Expand All @@ -105,6 +105,7 @@ var (
nameContentLength = []byte("content-length")
nameContentType = []byte("content-type")
nameTransferEncoding = []byte("transfer-encoding")
nameContentEncoding = []byte("content-encoding")
nameConnection = []byte("connection")
)

Expand Down Expand Up @@ -366,7 +367,24 @@ func (parser *parser) parseHeader(m *message, data []byte) (bool, bool, int) {
} else if bytes.Equal(headerName, nameContentType) {
m.contentType = headerVal
} else if bytes.Equal(headerName, nameTransferEncoding) {
m.isChunked = bytes.Equal(common.NetString(headerVal), transferEncodingChunked)
encodings := parseCommaSeparatedList(headerVal)
// 'chunked' can only appear at the end
if n := len(encodings); n > 0 && encodings[n-1] == transferEncodingChunked {
m.isChunked = true
encodings = encodings[:n-1]
}
if len(encodings) > 0 {
// Append at the end of encodings. If a content-encoding
// header is also present, it was applied by sender before
// transfer-encoding.
m.encodings = append(m.encodings, encodings...)
}

} else if bytes.Equal(headerName, nameContentEncoding) {
encodings := parseCommaSeparatedList(headerVal)
// Append at the beginning of m.encodings, as Content-Encoding
// is supposed to be applied before Transfer-Encoding.
m.encodings = append(encodings, m.encodings...)
} else if bytes.Equal(headerName, nameConnection) {
m.connection = headerVal
}
Expand Down Expand Up @@ -402,6 +420,15 @@ func (parser *parser) parseHeader(m *message, data []byte) (bool, bool, int) {
return true, false, len(data)
}

func parseCommaSeparatedList(s common.NetString) (list []string) {
values := bytes.Split(s, []byte(","))
list = make([]string, len(values))
for idx := range values {
list[idx] = string(bytes.ToLower(bytes.Trim(values[idx], " ")))
}
return list
}

func (*parser) parseBody(s *stream, m *message) (ok, complete bool) {
nbytes := len(s.data)
if !m.hasContentLength && (bytes.Equal(m.connection, constClose) ||
Expand Down
Loading

0 comments on commit 03f7e87

Please sign in to comment.