From 03f7e873b2df8ed71e40570976bc36eb9a0fda7c Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Mon, 13 Aug 2018 23:29:10 +0200 Subject: [PATCH] packetbeat: Add support for HTTP encodings (#7915) This patch adds support for decoding HTTP bodies based on the `Content-Encoding` and `Transfer-Encoding` headers. Supported decoders are `gzip` and `deflate`. Multiple encoders are supported, for example: ``` Content-Encoding: gzip Transfer-Encoding: chunked ``` Or the rarely used but allowed by the standard: ``` Transfer-Encoding: deflate, gzip ``` The difference between `Transfer-Encoding` and `Content-Encoding` is not relevant to packetbeat, so both are treated the same, with the exception that `chunked` can only appear as the last or only element of `Transfer-Encoding`. To avoid decompression bombs, the value specified in `http.max_message_size` (default 10MB) is honored when decoding. A new configuration option, `decode_body` (default true), has been added to the http protocol, allowing to disable this feature. --- CHANGELOG.asciidoc | 1 + packetbeat/_meta/beat.reference.yml | 4 + packetbeat/docs/packetbeat-options.asciidoc | 9 +- packetbeat/packetbeat.reference.yml | 4 + packetbeat/protos/http/config.go | 2 + packetbeat/protos/http/decode.go | 110 ++++++++++ packetbeat/protos/http/http.go | 40 ++++ packetbeat/protos/http/http_parser.go | 43 +++- packetbeat/protos/http/http_test.go | 232 ++++++++++++++++++++ 9 files changed, 436 insertions(+), 9 deletions(-) create mode 100644 packetbeat/protos/http/decode.go diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 723255eeca2..16373fddd81 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -95,6 +95,7 @@ https://github.com/elastic/beats/compare/v6.4.0...master[Check the HEAD diff] *Packetbeat* - Added DHCP protocol support. {pull}7647[7647] +- Add support to decode HTTP bodies compressed with `gzip` and `deflate`. {pull}7915[7915] *Winlogbeat* diff --git a/packetbeat/_meta/beat.reference.yml b/packetbeat/_meta/beat.reference.yml index f91827b33ba..f70cbfdf1fb 100644 --- a/packetbeat/_meta/beat.reference.yml +++ b/packetbeat/_meta/beat.reference.yml @@ -195,6 +195,10 @@ packetbeat.protocols: # response payload. #include_response_body_for: [] + # Whether the body of a request must be decoded when a content-encoding + # or transfer-encoding has been applied. + #decode_body: true + # If the Cookie or Set-Cookie headers are sent, this option controls whether # they are split into individual values. #split_cookie: false diff --git a/packetbeat/docs/packetbeat-options.asciidoc b/packetbeat/docs/packetbeat-options.asciidoc index 071bbed5ae1..7180412add3 100644 --- a/packetbeat/docs/packetbeat-options.asciidoc +++ b/packetbeat/docs/packetbeat-options.asciidoc @@ -681,7 +681,14 @@ packetbeat.protocols: include_body_for: ["text/html"] ------------------------------------------------------------------------------ - +===== `decode_body` + +A boolean flag that controls decoding of HTTP payload. It interprets the +`Content-Encoding` and `Transfer-Encoding` headers and uncompresses the entity +body. Supported encodings are `gzip` and `deflate`. This option is only +applicable in the cases where the HTTP payload is exported, that is, when +one of the `include_*_body_for` options is specified or a POST request +contains url-encoded parameters. ===== `split_cookie` diff --git a/packetbeat/packetbeat.reference.yml b/packetbeat/packetbeat.reference.yml index 3fc9799783d..bafc5400a59 100644 --- a/packetbeat/packetbeat.reference.yml +++ b/packetbeat/packetbeat.reference.yml @@ -195,6 +195,10 @@ packetbeat.protocols: # response payload. #include_response_body_for: [] + # Whether the body of a request must be decoded when a content-encoding + # or transfer-encoding has been applied. + #decode_body: true + # If the Cookie or Set-Cookie headers are sent, this option controls whether # they are split into individual values. #split_cookie: false diff --git a/packetbeat/protos/http/config.go b/packetbeat/protos/http/config.go index d3ef57f4647..2197dbad088 100644 --- a/packetbeat/protos/http/config.go +++ b/packetbeat/protos/http/config.go @@ -35,6 +35,7 @@ type httpConfig struct { HideKeywords []string `config:"hide_keywords"` RedactAuthorization bool `config:"redact_authorization"` MaxMessageSize int `config:"max_message_size"` + DecodeBody bool `config:"decode_body"` } var ( @@ -43,5 +44,6 @@ var ( TransactionTimeout: protos.DefaultTransactionExpiration, }, MaxMessageSize: tcp.TCPMaxDataInStream, + DecodeBody: true, } ) diff --git a/packetbeat/protos/http/decode.go b/packetbeat/protos/http/decode.go new file mode 100644 index 00000000000..34301955e9b --- /dev/null +++ b/packetbeat/protos/http/decode.go @@ -0,0 +1,110 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package http + +import ( + "bytes" + "compress/flate" + "compress/gzip" + "io" + + "github.com/pkg/errors" +) + +var ( + decoders = map[string]func(io.Reader) (io.ReadCloser, error){ + "gzip": decodeGZIP, + "x-gzip": decodeGZIP, + "deflate": decodeDeflate, + "x-deflate": decodeDeflate, + + // Not really expected, withdrawn by RFC + "identity": decodeIdentity, + + // Couldn't find an implementation of `compress` nor a server/library + // that supports it. Seems long dead. + // "compress": nil, + // "x-compress": nil, + } + + // ErrNoDecoder is returned when an unknown content-encoding is used. + ErrNoDecoder = errors.New("decoder not found") + + // ErrSizeLimited is returned when + ErrSizeLimited = errors.New("body truncated due to size limitation") +) + +func decodeHTTPBody(data []byte, format string, maxSize int) ([]byte, error) { + decoder, found := decoders[format] + if !found { + return nil, ErrNoDecoder + } + reader, err := decoder(bytes.NewReader(data)) + if err != nil { + return nil, err + } + defer reader.Close() + return readMax(reader, maxSize) +} + +func decodeGZIP(reader io.Reader) (io.ReadCloser, error) { + return gzip.NewReader(reader) +} + +func decodeDeflate(reader io.Reader) (io.ReadCloser, error) { + return flate.NewReader(reader), nil +} + +type closeDecorator struct { + io.Reader +} + +func (closeDecorator) Close() error { + return nil +} + +func decodeIdentity(reader io.Reader) (io.ReadCloser, error) { + return closeDecorator{reader}, nil +} + +func readMax(reader io.Reader, maxSize int) (result []byte, err error) { + const minSize = 512 + for used := 0; ; { + if len(result)-used < minSize { + grow := len(result) >> 1 + if grow < minSize { + grow = minSize + } + result = append(result, make([]byte, grow)...) + } + n, err := reader.Read(result[used:]) + if n > 0 { + used += n + if used > maxSize { + used = maxSize + err = ErrSizeLimited + } + } + if err != nil { + if err == io.EOF { + err = nil + } + return result[:used], err + } + } +} diff --git a/packetbeat/protos/http/http.go b/packetbeat/protos/http/http.go index 7415fea3988..d3c75b35a16 100644 --- a/packetbeat/protos/http/http.go +++ b/packetbeat/protos/http/http.go @@ -24,6 +24,8 @@ import ( "strings" "time" + "github.com/pkg/errors" + "github.com/elastic/beats/libbeat/beat" "github.com/elastic/beats/libbeat/common" "github.com/elastic/beats/libbeat/logp" @@ -84,6 +86,7 @@ type httpPlugin struct { hideKeywords []string redactAuthorization bool maxMessageSize int + mustDecodeBody bool parserConfig parserConfig @@ -139,6 +142,8 @@ func (http *httpPlugin) setFromConfig(config *httpConfig) { http.splitCookie = config.SplitCookie http.parserConfig.realIPHeader = strings.ToLower(config.RealIPHeader) http.transactionTimeout = config.TransactionTimeout + http.mustDecodeBody = config.DecodeBody + for _, list := range [][]string{config.IncludeBodyFor, config.IncludeRequestBodyFor} { http.parserConfig.includeRequestBodyFor = append(http.parserConfig.includeRequestBodyFor, list...) } @@ -500,6 +505,8 @@ func (http *httpPlugin) newTransaction(requ, resp *message) beat.Event { var timestamp time.Time if requ != nil { + // Body must be decoded before extractParameters + http.decodeBody(requ) path, params, err := http.extractParameters(requ) if err != nil { logp.Warn("Fail to parse HTTP parameters: %v", err) @@ -533,6 +540,7 @@ func (http *httpPlugin) newTransaction(requ, resp *message) beat.Event { } if resp != nil { + http.decodeBody(resp) httpDetails["response"] = common.MapStr{ "code": resp.statusCode, "phrase": resp.statusPhrase, @@ -625,6 +633,38 @@ func (http *httpPlugin) setBody(result common.MapStr, m *message) { } } +func (http *httpPlugin) decodeBody(m *message) { + if m.saveBody && len(m.body) > 0 { + if http.mustDecodeBody && len(m.encodings) > 0 { + var err error + m.body, err = decodeBody(m.body, m.encodings, http.maxMessageSize) + if err != nil { + // Body can contain partial data + m.notes = append(m.notes, err.Error()) + } + } + } +} + +func decodeBody(body []byte, encodings []string, maxSize int) (result []byte, err error) { + if isDebug { + debugf("decoding body with encodings=%v", encodings) + } + for idx := len(encodings) - 1; idx >= 0; idx-- { + format := encodings[idx] + body, err = decodeHTTPBody(body, format, maxSize) + if err != nil { + // Do not output a partial body unless failure occurs on the + // last decoder. + if idx != 0 { + body = nil + } + return body, errors.Wrapf(err, "unable to decode body using %s encoding", format) + } + } + return body, nil +} + func splitCookiesHeader(headerVal string) map[string]string { cookies := map[string]string{} diff --git a/packetbeat/protos/http/http_parser.go b/packetbeat/protos/http/http_parser.go index 92089cea4ca..510ef4cfc79 100644 --- a/packetbeat/protos/http/http_parser.go +++ b/packetbeat/protos/http/http_parser.go @@ -53,12 +53,12 @@ type message struct { realIP common.NetString // Http Headers - contentLength int - contentType common.NetString - transferEncoding common.NetString - isChunked bool - headers map[string]common.NetString - size uint64 + contentLength int + contentType common.NetString + encodings []string + isChunked bool + headers map[string]common.NetString + size uint64 rawHeaders []byte @@ -94,7 +94,7 @@ type parserConfig struct { } var ( - transferEncodingChunked = []byte("chunked") + transferEncodingChunked = "chunked" constCRLF = []byte("\r\n") @@ -105,6 +105,7 @@ var ( nameContentLength = []byte("content-length") nameContentType = []byte("content-type") nameTransferEncoding = []byte("transfer-encoding") + nameContentEncoding = []byte("content-encoding") nameConnection = []byte("connection") ) @@ -366,7 +367,24 @@ func (parser *parser) parseHeader(m *message, data []byte) (bool, bool, int) { } else if bytes.Equal(headerName, nameContentType) { m.contentType = headerVal } else if bytes.Equal(headerName, nameTransferEncoding) { - m.isChunked = bytes.Equal(common.NetString(headerVal), transferEncodingChunked) + encodings := parseCommaSeparatedList(headerVal) + // 'chunked' can only appear at the end + if n := len(encodings); n > 0 && encodings[n-1] == transferEncodingChunked { + m.isChunked = true + encodings = encodings[:n-1] + } + if len(encodings) > 0 { + // Append at the end of encodings. If a content-encoding + // header is also present, it was applied by sender before + // transfer-encoding. + m.encodings = append(m.encodings, encodings...) + } + + } else if bytes.Equal(headerName, nameContentEncoding) { + encodings := parseCommaSeparatedList(headerVal) + // Append at the beginning of m.encodings, as Content-Encoding + // is supposed to be applied before Transfer-Encoding. + m.encodings = append(encodings, m.encodings...) } else if bytes.Equal(headerName, nameConnection) { m.connection = headerVal } @@ -402,6 +420,15 @@ func (parser *parser) parseHeader(m *message, data []byte) (bool, bool, int) { return true, false, len(data) } +func parseCommaSeparatedList(s common.NetString) (list []string) { + values := bytes.Split(s, []byte(",")) + list = make([]string, len(values)) + for idx := range values { + list[idx] = string(bytes.ToLower(bytes.Trim(values[idx], " "))) + } + return list +} + func (*parser) parseBody(s *stream, m *message) (ok, complete bool) { nbytes := len(s.data) if !m.hasContentLength && (bytes.Equal(m.connection, constClose) || diff --git a/packetbeat/protos/http/http_test.go b/packetbeat/protos/http/http_test.go index cd8330d6361..9318bfb4358 100644 --- a/packetbeat/protos/http/http_test.go +++ b/packetbeat/protos/http/http_test.go @@ -1423,6 +1423,238 @@ func TestHttp_includeBodies(t *testing.T) { } } +func TestHTTP_Encodings(t *testing.T) { + const req = "GET / HTTP/1.1\r\n" + + "Host: server\r\n" + + "\r\n" + const payload = "hola\n" + + deflateBody := string([]byte{0xcb, 0xc8, 0xcf, 0x49, 0xe4, 0x02, 0x00}) + + gzipBody := string([]byte{0x1f, 0x8b, 0x08, 0x00, 0x68, 0xc4, 0x6a, 0x5b, 0x00, 0x03}) + + deflateBody + + string([]byte{0x78, 0xad, 0xdb, 0xd1, 0x05, 0x00, 0x00, 0x00}) + + gzipDeflateBody := string([]byte{ + 0x1f, 0x8b, 0x08, 0x00, 0x65, 0xdb, 0x6a, 0x5b, 0x00, 0x03, 0x3b, 0x7d, + 0xe2, 0xbc, 0xe7, 0x13, 0x26, 0x06, 0x00, 0x95, 0xfa, 0x49, 0xbf, 0x07, + 0x00, 0x00, 0x00}) + + var store eventStore + http := httpModForTests(&store) + config := defaultConfig + config.IncludeResponseBodyFor = []string{""} + http.setFromConfig(&config) + + tcptuple := testCreateTCPTuple() + + for testNum, testData := range []struct{ resp, expectedBody, note string }{ + // Test case #0 + // A chunked request + { + resp: "HTTP/1.1 200 OK\r\n" + + "Transfer-Encoding: chunked\r\n" + + "\r\n" + + "4\r\n" + + "ABCD\r\n" + + "0\r\n", + expectedBody: "ABCD", + }, + // Test case #1 + // gzip Transfer-Encoding + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Transfer-Encoding: gzip\r\n"+ + "Content-Length: %d\r\n"+ + "\r\n"+ + "%s", len(gzipBody), gzipBody), + expectedBody: payload, + }, + // Test case #2 + // gzip Content-Encoding, the difference with #1 is purely semantic + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Content-Encoding: gzip\r\n"+ + "Content-Length: %d\r\n"+ + "\r\n"+ + "%s", len(gzipBody), gzipBody), + expectedBody: payload, + }, + // Test case #3 + // gzip Content-Encoding, chunked Transfer encoding. + // Should first de-chunk and then apply gzip + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Content-Encoding: gzip\r\n"+ + "Transfer-Encoding: chunked\r\n"+ + "\r\n"+ + "%x\r\n"+ + "%s\r\n"+ + "0\r\n", len(gzipBody), gzipBody), + expectedBody: payload, + }, + // Test case #4 + // gzip, chunked Transfer encoding. + // Same as #3 + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Transfer-Encoding: gzip, chunked\r\n"+ + "\r\n"+ + "%x\r\n"+ + "%s\r\n"+ + "0\r\n", len(gzipBody), gzipBody), + expectedBody: payload, + }, + // Test case #5 + // Deflate transfer encoding + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Transfer-Encoding: deflate\r\n"+ + "Content-Length: %d\r\n"+ + "\r\n"+ + "%s", len(deflateBody), deflateBody), + expectedBody: payload, + }, + // Test case #6 + // Deflate content encoding, x-gzip(=gzip) transfer encoding + // First gzip, then deflate + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Transfer-Encoding: x-gzip\r\n"+ + "Content-Encoding: deflate\r\n"+ + "Content-Length: %d\r\n"+ + "\r\n"+ + "%s", len(gzipDeflateBody), gzipDeflateBody), + expectedBody: payload, + }, + // Test case #7 + // First deflate, then gzip + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Transfer-Encoding: x-deflate, gzip\r\n"+ + "Content-Length: %d\r\n"+ + "\r\n"+ + "%s", len(gzipDeflateBody), gzipDeflateBody), + expectedBody: payload, + }, + // Test case #8 + // Same behavior as #7 + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Content-Encoding: deflate, gzip\r\n"+ + "Content-Length: %d\r\n"+ + "\r\n"+ + "%s", len(gzipDeflateBody), gzipDeflateBody), + expectedBody: payload, + }, + // Test case #9 + // First de-chunk, then gzip, then deflate + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Content-Encoding: x-deflate, x-gzip\r\n"+ + "Transfer-Encoding: chunked\r\n"+ + "\r\n"+ + "%x\r\n"+ + "%s\r\n"+ + "0\r\n", len(gzipDeflateBody), gzipDeflateBody), + expectedBody: payload, + }, + // Test case #10 + // Same behavior as #9 + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Content-Encoding: deflate, identity\r\n"+ + "Transfer-Encoding: gzip, chunked\r\n"+ + "\r\n"+ + "%x\r\n"+ + "%s\r\n"+ + "0\r\n", len(gzipDeflateBody), gzipDeflateBody), + expectedBody: payload, + }, + // Test case #11 + // Unsupported encoding + { + resp: fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Content-Encoding: sdch\r\n"+ + "Transfer-Encoding: chunked\r\n"+ + "\r\n"+ + "%x\r\n"+ + "%s\r\n"+ + "0\r\n", len(gzipDeflateBody), gzipDeflateBody), + note: "unable to decode body using sdch encoding: decoder not found", + }, + } { + msg := fmt.Sprintf("test case #%d: %+v", testNum, testData) + packet := protos.Packet{Payload: []byte(req)} + private := protos.ProtocolData(&httpConnectionData{}) + private = http.Parse(&packet, tcptuple, 0, private) + + packet.Payload = []byte(testData.resp) + private = http.Parse(&packet, tcptuple, 1, private) + + http.ReceivedFin(tcptuple, 1, private) + + trans := expectTransaction(t, &store) + assert.NotNil(t, trans, msg) + body, err := trans.GetValue("http.response.body") + if err == nil { + assert.Equal(t, testData.expectedBody, body, msg) + } else { + if len(testData.expectedBody) == 0 && len(testData.note) > 0 { + note, err := trans.GetValue("notes") + if !assert.Nil(t, err, msg) { + t.Fatal(err) + } + assert.Equal(t, []string{testData.note}, note) + } else { + t.Fatal(err) + } + } + } +} + +func TestHTTP_Decoding_disabled(t *testing.T) { + const req = "GET / HTTP/1.1\r\n" + + "Host: server\r\n" + + "\r\n" + + deflateBody := string([]byte{0xcb, 0xc8, 0xcf, 0x49, 0xe4, 0x02, 0x00}) + + var store eventStore + http := httpModForTests(&store) + config := defaultConfig + config.IncludeResponseBodyFor = []string{""} + config.DecodeBody = false + + http.setFromConfig(&config) + + tcptuple := testCreateTCPTuple() + + resp := fmt.Sprintf("HTTP/1.1 200 OK\r\n"+ + "Transfer-Encoding: deflate\r\n"+ + "Content-Length: %d\r\n"+ + "\r\n"+ + "%s", len(deflateBody), deflateBody) + + packet := protos.Packet{Payload: []byte(req)} + private := protos.ProtocolData(&httpConnectionData{}) + private = http.Parse(&packet, tcptuple, 0, private) + + packet.Payload = []byte(resp) + private = http.Parse(&packet, tcptuple, 1, private) + + http.ReceivedFin(tcptuple, 1, private) + + trans := expectTransaction(t, &store) + assert.NotNil(t, trans) + body, err := trans.GetValue("http.response.body") + if err != nil { + t.Fatal(err) + } + + assert.Equal(t, deflateBody, body) +} + func benchmarkHTTPMessage(b *testing.B, data []byte) { http := httpModForTests(nil) parser := newParser(&http.parserConfig)