diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 2830c8fc459..ee38f711a07 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -36,6 +36,7 @@ https://github.com/elastic/beats/compare/v6.3.0...6.3[Check the HEAD diff] *Filebeat* - Comply with PostgreSQL database name format {pull}7198[7198] +- Optimize PostgreSQL ingest pipeline to use anchored regexp and merge multiple regexp into a single expression. {pull}7269[7269] *Heartbeat* diff --git a/filebeat/module/postgresql/log/ingest/pipeline.json b/filebeat/module/postgresql/log/ingest/pipeline.json index 1d1904b1e38..c9c33c0bb7e 100644 --- a/filebeat/module/postgresql/log/ingest/pipeline.json +++ b/filebeat/module/postgresql/log/ingest/pipeline.json @@ -6,16 +6,11 @@ "field": "message", "ignore_missing": true, "patterns": [ - "%{LOCALDATETIME:postgresql.log.timestamp} %{WORD:postgresql.log.timezone} \\[%{NUMBER:postgresql.log.thread_id}\\] %{USERNAME:postgresql.log.user}@%{POSTGRESQL_DB_NAME:postgresql.log.database} %{WORD:postgresql.log.level}: duration: %{NUMBER:postgresql.log.duration} ms statement: %{MULTILINEQUERY:postgresql.log.query}", - "%{LOCALDATETIME:postgresql.log.timestamp} %{WORD:postgresql.log.timezone} \\[%{NUMBER:postgresql.log.thread_id}\\] \\[%{USERNAME:postgresql.log.user}\\]@\\[%{POSTGRESQL_DB_NAME:postgresql.log.database}\\] %{WORD:postgresql.log.level}: duration: %{NUMBER:postgresql.log.duration} ms statement: %{MULTILINEQUERY:postgresql.log.query}", - "%{LOCALDATETIME:postgresql.log.timestamp} %{WORD:postgresql.log.timezone} \\[%{NUMBER:postgresql.log.thread_id}\\] %{USERNAME:postgresql.log.user}@%{POSTGRESQL_DB_NAME:postgresql.log.database} %{WORD:postgresql.log.level}: ?%{GREEDYDATA:postgresql.log.message}", - "%{LOCALDATETIME:postgresql.log.timestamp} %{WORD:postgresql.log.timezone} \\[%{NUMBER:postgresql.log.thread_id}\\] \\[%{USERNAME:postgresql.log.user}\\]@\\[%{POSTGRESQL_DB_NAME:postgresql.log.database}\\] %{WORD:postgresql.log.level}: ?%{GREEDYDATA:postgresql.log.message}", - "%{LOCALDATETIME:postgresql.log.timestamp} %{WORD:postgresql.log.timezone} \\[%{NUMBER:postgresql.log.thread_id}\\] %{WORD:postgresql.log.level}: ?%{GREEDYDATA:postgresql.log.message}" + "^%{LOCALDATETIME:postgresql.log.timestamp} %{WORD:postgresql.log.timezone} \\[%{NUMBER:postgresql.log.thread_id}\\] ((\\[%{USERNAME:postgresql.log.user}\\]@\\[%{POSTGRESQL_DB_NAME:postgresql.log.database}\\]|%{USERNAME:postgresql.log.user}@%{POSTGRESQL_DB_NAME:postgresql.log.database}) )?%{WORD:postgresql.log.level}: (duration: %{NUMBER:postgresql.log.duration} ms statement: %{GREEDYDATA:postgresql.log.query}|%{GREEDYDATA:postgresql.log.message})" ], "pattern_definitions": { "LOCALDATETIME": "[-0-9]+ %{TIME}", - "GREEDYDATA": ".*", - "MULTILINEQUERY" : "(.|\n|\t)*?;$", + "GREEDYDATA": "(.|\n|\t)*", "POSTGRESQL_DB_NAME": "[a-zA-Z0-9_]+[a-zA-Z0-9_\\$]*" } } diff --git a/filebeat/tests/system/test_modules.py b/filebeat/tests/system/test_modules.py index 05882d26786..ce1161a91ee 100644 --- a/filebeat/tests/system/test_modules.py +++ b/filebeat/tests/system/test_modules.py @@ -91,7 +91,7 @@ def _test_expected_events(self, module, test_file, res, objects): break assert found, "The following expected object was not found:\n {}\nSearched in: \n{}".format( - ev["_source"][module], objects) + pretty_json(ev["_source"][module]), pretty_json(objects)) def run_on_file(self, module, fileset, test_file, cfgfile): print("Testing {}/{} on {}".format(module, fileset, test_file)) @@ -108,7 +108,8 @@ def run_on_file(self, module, fileset, test_file, cfgfile): "-c", cfgfile, "-modules={}".format(module), "-M", "{module}.*.enabled=false".format(module=module), - "-M", "{module}.{fileset}.enabled=true".format(module=module, fileset=fileset), + "-M", "{module}.{fileset}.enabled=true".format( + module=module, fileset=fileset), "-M", "{module}.{fileset}.var.paths=[{test_file}]".format( module=module, fileset=fileset, test_file=test_file), "-M", "*.*.input.close_eof=true", @@ -138,7 +139,8 @@ def run_on_file(self, module, fileset, test_file, cfgfile): assert obj["fileset"]["module"] == module, "expected fileset.module={} but got {}".format( module, obj["fileset"]["module"]) - assert "error" not in obj, "not error expected but got: {}".format(obj) + assert "error" not in obj, "not error expected but got: {}".format( + obj) if (module == "auditd" and fileset == "log") \ or (module == "osquery" and fileset == "result"): @@ -229,13 +231,16 @@ def _run_ml_test(self, setup_flag, modules_flag): # Clean any previous state for df in self.es.transport.perform_request("GET", "/_xpack/ml/datafeeds/")["datafeeds"]: if df["datafeed_id"] == 'filebeat-nginx-access-response_code': - self.es.transport.perform_request("DELETE", "/_xpack/ml/datafeeds/" + df["datafeed_id"]) + self.es.transport.perform_request( + "DELETE", "/_xpack/ml/datafeeds/" + df["datafeed_id"]) for df in self.es.transport.perform_request("GET", "/_xpack/ml/anomaly_detectors/")["jobs"]: if df["job_id"] == 'datafeed-filebeat-nginx-access-response_code': - self.es.transport.perform_request("DELETE", "/_xpack/ml/anomaly_detectors/" + df["job_id"]) + self.es.transport.perform_request( + "DELETE", "/_xpack/ml/anomaly_detectors/" + df["job_id"]) - shutil.rmtree(os.path.join(self.working_dir, "modules.d"), ignore_errors=True) + shutil.rmtree(os.path.join(self.working_dir, + "modules.d"), ignore_errors=True) # generate a minimal configuration cfgfile = os.path.join(self.working_dir, "filebeat.yml") @@ -302,3 +307,7 @@ def _run_ml_test(self, setup_flag, modules_flag): max_timeout=30) beat.kill() + + +def pretty_json(obj): + return json.dumps(obj, indent=2, separators=(',', ': '))