Skip to content

Commit

Permalink
Filter out null join keys
Browse files Browse the repository at this point in the history
  • Loading branch information
astefan committed Sep 22, 2021
1 parent fec1a94 commit 63d96c3
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,30 @@ query = '''
process where substring(command_line, 5) regex (".*?net[1]? localgroup.*?", ".*? myappserver.py .*?")
'''


[[queries]]
name = "sequenceOnOneNullKey"
query = '''
sequence
[process where parent_process_path == null] by parent_process_path
[any where true] by parent_process_path
'''
expected_event_ids = []

[[queries]]
name = "sequenceOnTwoNullKeys"
query = '''
sequence by ppid
[process where parent_process_path == null] by parent_process_path
[any where true] by parent_process_path
'''
expected_event_ids = []

[[queries]]
name = "sequenceOnImplicitNullKeys"
query = '''
sequence by ppid, parent_process_path
[process where parent_process_path == null]
[any where true]
'''
expected_event_ids = []
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ public BoxedQueryRequest(QueryRequest original, String timestamp, List<String> k
timestampRange = rangeQuery(timestamp).timeZone("UTC").format("epoch_millis");
keys = keyNames;
RuntimeUtils.addFilter(timestampRange, searchSource);
// do not join on null values
if (keyNames.isEmpty() == false) {
BoolQueryBuilder nullValuesFilter = boolQuery();
for (int keyIndex = 0; keyIndex < keyNames.size(); keyIndex++) {
// add an "exists" query for each join key to filter out any non-existent values
nullValuesFilter.must(existsQuery(keyNames.get(keyIndex)));
}
RuntimeUtils.addFilter(nullValuesFilter, searchSource);
}
}

@Override
Expand Down Expand Up @@ -112,18 +121,9 @@ public BoxedQueryRequest keys(List<List<Object>> values) {
// iterate on all possible values for a given key
newFilters = new ArrayList<>(values.size());
for (int keyIndex = 0; keyIndex < keys.size(); keyIndex++) {

boolean hasNullValue = false;
Set<Object> keyValues = new HashSet<>(BoxedQueryRequest.MAX_TERMS);
// check the given keys but make sure to double check for
// null as it translates to a different query (missing/not exists)
for (List<Object> value : values) {
Object keyValue = value.get(keyIndex);
if (keyValue == null) {
hasNullValue = true;
} else {
keyValues.add(keyValue);
}
keyValues.add(value.get(keyIndex));
}

// too many unique terms, don't filter on the keys
Expand All @@ -141,21 +141,6 @@ public BoxedQueryRequest keys(List<List<Object>> values) {
} else if (keyValues.size() > 1) {
query = termsQuery(key, keyValues);
}

// if null values are present
// make an OR call - either terms or null/missing values
if (hasNullValue) {
BoolQueryBuilder isMissing = boolQuery().mustNot(existsQuery(key));
if (query != null) {
query = boolQuery()
// terms query
.should(query)
// is missing
.should(isMissing);
} else {
query = isMissing;
}
}
newFilters.add(query);
}
}
Expand Down

0 comments on commit 63d96c3

Please sign in to comment.