From 5a3831ca8f9ba98007615d4aeaa233e7f76ef126 Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Wed, 19 Jul 2017 13:04:39 +0300 Subject: [PATCH] Cherry-pick #4417 to 5.x: Nginx module: use first not private IP address as remote_ip (#4703) * Nginx module: use first not private IP address as remote_ip (#4417) A common customization to the nginx logs is to add the contents of the X-Forwarded-For header in front of the remote IPs. This typically results in a list of remote IPs. This adds a new field `remote_ip_list` which is an array, and uses a Painless script to automatically select the first non-private IP for the `remote_ip` field, which is the field on which GeoIP is applied. Fixes #4322. (cherry picked from commit a2c162f35846c5005c71d15359df9f8a9879a2f7) --- filebeat/docs/fields.asciidoc | 42 ++++++++++++++++++- filebeat/filebeat.template-es2x.json | 23 ++++++++++ filebeat/filebeat.template-es6x.json | 19 +++++++++ filebeat/filebeat.template.json | 19 +++++++++ .../module/apache2/access/_meta/fields.yml | 8 ++++ filebeat/module/nginx/access/_meta/fields.yml | 17 +++++++- .../module/nginx/access/ingest/default.json | 17 +++++++- 7 files changed, 141 insertions(+), 4 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 461655b7e42d..574e56868ebe 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -231,6 +231,22 @@ type: geo_point The longitude and latitude. +[float] +=== apache2.access.geoip.region_name + +type: keyword + +The region name. + + +[float] +=== apache2.access.geoip.city_name + +type: keyword + +The city name. + + [float] == error Fields @@ -765,12 +781,20 @@ Contains fields for the Nginx access logs. +[float] +=== nginx.access.remote_ip_list + +type: list + +An array of remote IP addresses. It is a list because it is common to include, besides the client IP address, IP addresses from headers like `X-Forwarded-For`. See also the `remote_ip` field. + + [float] === nginx.access.remote_ip type: keyword -Client IP address. +Client IP address. The first public IP address from the `remote_ip_list` array. If no public IP addresses are present, this field contains the first private IP address from the `remote_ip_list` array. [float] @@ -953,6 +977,22 @@ type: geo_point The longitude and latitude. +[float] +=== nginx.access.geoip.region_name + +type: keyword + +The region name. + + +[float] +=== nginx.access.geoip.city_name + +type: keyword + +The city name. + + [float] == error Fields diff --git a/filebeat/filebeat.template-es2x.json b/filebeat/filebeat.template-es2x.json index e3cd05e39442..bbc3dd67500f 100644 --- a/filebeat/filebeat.template-es2x.json +++ b/filebeat/filebeat.template-es2x.json @@ -46,6 +46,11 @@ }, "geoip": { "properties": { + "city_name": { + "ignore_above": 1024, + "index": "not_analyzed", + "type": "string" + }, "continent_name": { "ignore_above": 1024, "index": "not_analyzed", @@ -58,6 +63,11 @@ }, "location": { "type": "geo_point" + }, + "region_name": { + "ignore_above": 1024, + "index": "not_analyzed", + "type": "string" } } }, @@ -458,6 +468,11 @@ }, "geoip": { "properties": { + "city_name": { + "ignore_above": 1024, + "index": "not_analyzed", + "type": "string" + }, "continent_name": { "ignore_above": 1024, "index": "not_analyzed", @@ -470,6 +485,11 @@ }, "location": { "type": "geo_point" + }, + "region_name": { + "ignore_above": 1024, + "index": "not_analyzed", + "type": "string" } } }, @@ -493,6 +513,9 @@ "index": "not_analyzed", "type": "string" }, + "remote_ip_list": { + "properties": {} + }, "response_code": { "type": "long" }, diff --git a/filebeat/filebeat.template-es6x.json b/filebeat/filebeat.template-es6x.json index 672526ac9785..aa16fd612e9b 100644 --- a/filebeat/filebeat.template-es6x.json +++ b/filebeat/filebeat.template-es6x.json @@ -37,6 +37,10 @@ }, "geoip": { "properties": { + "city_name": { + "ignore_above": 1024, + "type": "keyword" + }, "continent_name": { "ignore_above": 1024, "type": "keyword" @@ -47,6 +51,10 @@ }, "location": { "type": "geo_point" + }, + "region_name": { + "ignore_above": 1024, + "type": "keyword" } } }, @@ -386,6 +394,10 @@ }, "geoip": { "properties": { + "city_name": { + "ignore_above": 1024, + "type": "keyword" + }, "continent_name": { "ignore_above": 1024, "type": "keyword" @@ -396,6 +408,10 @@ }, "location": { "type": "geo_point" + }, + "region_name": { + "ignore_above": 1024, + "type": "keyword" } } }, @@ -415,6 +431,9 @@ "ignore_above": 1024, "type": "keyword" }, + "remote_ip_list": { + "properties": {} + }, "response_code": { "type": "long" }, diff --git a/filebeat/filebeat.template.json b/filebeat/filebeat.template.json index fca67c5f4497..2cc5af97cef3 100644 --- a/filebeat/filebeat.template.json +++ b/filebeat/filebeat.template.json @@ -40,6 +40,10 @@ }, "geoip": { "properties": { + "city_name": { + "ignore_above": 1024, + "type": "keyword" + }, "continent_name": { "ignore_above": 1024, "type": "keyword" @@ -50,6 +54,10 @@ }, "location": { "type": "geo_point" + }, + "region_name": { + "ignore_above": 1024, + "type": "keyword" } } }, @@ -389,6 +397,10 @@ }, "geoip": { "properties": { + "city_name": { + "ignore_above": 1024, + "type": "keyword" + }, "continent_name": { "ignore_above": 1024, "type": "keyword" @@ -399,6 +411,10 @@ }, "location": { "type": "geo_point" + }, + "region_name": { + "ignore_above": 1024, + "type": "keyword" } } }, @@ -418,6 +434,9 @@ "ignore_above": 1024, "type": "keyword" }, + "remote_ip_list": { + "properties": {} + }, "response_code": { "type": "long" }, diff --git a/filebeat/module/apache2/access/_meta/fields.yml b/filebeat/module/apache2/access/_meta/fields.yml index 97fabdc5cab3..be09717198ce 100644 --- a/filebeat/module/apache2/access/_meta/fields.yml +++ b/filebeat/module/apache2/access/_meta/fields.yml @@ -104,4 +104,12 @@ type: geo_point description: > The longitude and latitude. + - name: region_name + type: keyword + description: > + The region name. + - name: city_name + type: keyword + description: > + The city name. diff --git a/filebeat/module/nginx/access/_meta/fields.yml b/filebeat/module/nginx/access/_meta/fields.yml index 0b5f1eb275ed..6c694b0abcfe 100644 --- a/filebeat/module/nginx/access/_meta/fields.yml +++ b/filebeat/module/nginx/access/_meta/fields.yml @@ -3,10 +3,17 @@ description: > Contains fields for the Nginx access logs. fields: + - name: remote_ip_list + type: list + description: > + An array of remote IP addresses. It is a list because it is common to include, besides the client + IP address, IP addresses from headers like `X-Forwarded-For`. See also the `remote_ip` field. - name: remote_ip type: keyword description: > - Client IP address. + Client IP address. The first public IP address from the `remote_ip_list` array. If no public IP + addresses are present, this field contains the first private IP address from the `remote_ip_list` + array. - name: user_name type: keyword description: > @@ -104,4 +111,12 @@ type: geo_point description: > The longitude and latitude. + - name: region_name + type: keyword + description: > + The region name. + - name: city_name + type: keyword + description: > + The city name. diff --git a/filebeat/module/nginx/access/ingest/default.json b/filebeat/module/nginx/access/ingest/default.json index cf0441d5126c..c1ddbda2cdbf 100644 --- a/filebeat/module/nginx/access/ingest/default.json +++ b/filebeat/module/nginx/access/ingest/default.json @@ -4,11 +4,24 @@ "grok": { "field": "message", "patterns":[ - "%{IPORHOST:nginx.access.remote_ip} - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{WORD:nginx.access.method} %{DATA:nginx.access.url} HTTP/%{NUMBER:nginx.access.http_version}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\"" + "\"?%{IP_LIST:nginx.access.remote_ip_list} - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{WORD:nginx.access.method} %{DATA:nginx.access.url} HTTP/%{NUMBER:nginx.access.http_version}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\"" ], + "pattern_definitions": { + "IP_LIST": "%{IP}(\"?,?\\s*%{IP})*" + }, "ignore_missing": true } - },{ + }, { + "split": { + "field": "nginx.access.remote_ip_list", + "separator": "\"?,?\\s+" + } + }, { + "script": { + "lang": "painless", + "inline": "boolean isPrivate(def ip) { try { StringTokenizer tok = new StringTokenizer(ip, '.'); int firstByte = Integer.parseInt(tok.nextToken()); int secondByte = Integer.parseInt(tok.nextToken()); if (firstByte == 10) { return true; } if (firstByte == 192 && secondByte == 168) { return true; } if (firstByte == 172 && secondByte >= 16 && secondByte <= 31) { return true; } if (firstByte == 127) { return true; } return false; } catch (Exception e) { return false; } } def found = false; for (def item : ctx.nginx.access.remote_ip_list) { if (!isPrivate(item)) { ctx.nginx.access.remote_ip = item; found = true; break; } } if (!found) { ctx.nginx.access.remote_ip = ctx.nginx.access.remote_ip_list[0]; }" + } + }, { "remove":{ "field": "message" }