Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: handle JSON character encoding for hec (Umlauts) #112

Merged
merged 4 commits into from
Nov 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ jobs:
test-splunk:
name: test-splunk
runs-on: ubuntu-latest
continue-on-error: true
needs:
- meta
strategy:
Expand Down
2 changes: 1 addition & 1 deletion solnlib/modular_input/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _to_hec(self, event_field):
if hasattr(self, "_fields"):
event["fields"] = self._fields

return json.dumps(event)
return json.dumps(event, ensure_ascii=False)

@classmethod
def format_events(cls, events: List, event_field: str = "event") -> List:
Expand Down
2 changes: 1 addition & 1 deletion solnlib/modular_input/event_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def write_events(
try:
self._rest_client.post(
self.HTTP_EVENT_COLLECTOR_ENDPOINT,
body=event,
body=event.encode("utf-8"),
headers=self.headers,
)
except binding.HTTPError as e:
Expand Down
49 changes: 49 additions & 0 deletions tests/integration/_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#
# Copyright 2021 Splunk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os.path as op
import sys
import time

sys.path.insert(0, op.dirname(op.dirname(op.abspath(__file__))))
import context
from splunklib import client
from splunklib import results as splunklib_results


def search(session_key, query):
service = client.connect(host=context.host, token=session_key)
job = service.jobs.create(query)
while True:
while not job.is_ready():
pass
stats = {
"isDone": job["isDone"],
"doneProgress": job["doneProgress"],
"scanCount": job["scanCount"],
"eventCount": job["eventCount"],
"resultCount": job["resultCount"],
}
if stats["isDone"] == "1":
break
time.sleep(0.5)
json_results_reader = splunklib_results.JSONResultsReader(
job.results(output_mode="json")
)
results = []
for result in json_results_reader:
if isinstance(result, dict):
results.append(result)
return results
35 changes: 34 additions & 1 deletion tests/integration/test_hec_event_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os.path as op
import sys
import time

sys.path.insert(0, op.dirname(op.dirname(op.abspath(__file__))))
import context
from _search import search

from solnlib.modular_input import event_writer as hew

Expand All @@ -36,3 +37,35 @@ def test_hec_event_writer():
m2[i] = "test2 data %s" % i
e2 = ew.create_event(m2, index="main", host="testing", sourcetype="hec")
ew.write_events([e1, e2])


def test_hec_event_writes_with_non_utf_8():
# To test scenario listed in https://github.com/splunk/addonfactory-solutions-library-python/pull/112.
test_name = "test_hec_event_writes_with_non_utf_8"
session_key = context.get_session_key()
ew = hew.HECEventWriter("test", session_key)
event = ew.create_event(
[
{
"test_name": test_name,
"field_a": "Üü_Öö_Ää_some_text",
"field_b": "some_text_Üü_Öö_Ää",
},
],
index="main",
host="testing",
sourcetype="hec",
)
ew.write_events([event])
time.sleep(2)

search_results = search(
session_key, f"search index=main sourcetype=hec {test_name}"
)

assert len(search_results) == 1
_raw_event = search_results[0]["_raw"]
assert "Üü_Öö_Ää_some_text" in _raw_event
assert "some_text_Üü_Öö_Ää" in _raw_event
assert "\\u00dc\\u00fc_\\u00d6\\u00f6_\\u00c4\\u00e4_some_text" not in _raw_event
assert "some_text_\\u00dc\\u00fc_\\u00d6\\u00f6_\\u00c4\\u00e4" not in _raw_event
3 changes: 2 additions & 1 deletion tests/unit/test_modular_input_event_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def mock_post(
self, path_segment, owner=None, app=None, sharing=None, headers=None, **query
):
event_strings = [
json.dumps(json.loads(e), sort_keys=True) for e in query["body"].split("\n")
json.dumps(json.loads(e), sort_keys=True)
for e in query["body"].decode("utf-8").split("\n")
]

assert (
Expand Down