elastic · vitaliidm · Nov 18, 2022 · Nov 14, 2022 · Nov 15, 2022 · Nov 15, 2022
diff --git a/...ns/security_solution/server/lib/detection_engine/rule_types/new_terms/README.md b/...ns/security_solution/server/lib/detection_engine/rule_types/new_terms/README.md
@@ -27,4 +27,3 @@ The new terms rule type reuses the singleSearchAfter function which implements t
 ## Limitations and future enhancements
 
 - Value list exceptions are not supported at the moment. Commit ead04ce removes an experimental method I tried for evaluating value list exceptions.
-- Runtime field supports only 100 emitted values. So for large arrays or combination of values greater than 100, results may not be exhaustive. This applies only to new terms with multiple fields
diff --git a/..._solution/server/lib/detection_engine/rule_types/new_terms/create_new_terms_alert_type.ts b/..._solution/server/lib/detection_engine/rule_types/new_terms/create_new_terms_alert_type.ts
@@ -34,6 +34,7 @@ import {
   getNewTermsRuntimeMappings,
   getAggregationField,
   decodeMatchedValues,
+  createFieldValuesMap,
 } from './utils';
 import {
   addToSearchAfterReturn,
@@ -193,6 +194,7 @@ export const createNewTermsAlertType = (
         }
         const bucketsForField = searchResultWithAggs.aggregations.new_terms.buckets;
         const includeValues = transformBucketsToValues(params.newTermsFields, bucketsForField);
+        const fieldsValuesMap = createFieldValuesMap(params.newTermsFields, bucketsForField);
         // PHASE 2: Take the page of results from Phase 1 and determine if each term exists in the history window.
         // The aggregation filters out buckets for terms that exist prior to `tuple.from`, so the buckets in the
         // response correspond to each new term.
@@ -209,7 +211,7 @@ export const createNewTermsAlertType = (
           }),
           runtimeMappings: {
             ...runtimeMappings,
-            ...getNewTermsRuntimeMappings(params.newTermsFields),
+            ...getNewTermsRuntimeMappings(params.newTermsFields, fieldsValuesMap),
           },
           searchAfterSortIds: undefined,
           index: inputIndex,
@@ -255,7 +257,7 @@ export const createNewTermsAlertType = (
             }),
             runtimeMappings: {
               ...runtimeMappings,
-              ...getNewTermsRuntimeMappings(params.newTermsFields),
+              ...getNewTermsRuntimeMappings(params.newTermsFields, fieldsValuesMap),
             },
             searchAfterSortIds: undefined,
             index: inputIndex,

diff --git a/.../plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.test.ts b/.../plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.test.ts
@@ -12,6 +12,7 @@ import {
   getAggregationField,
   decodeMatchedValues,
   getNewTermsRuntimeMappings,
+  createFieldValuesMap,
   AGG_FIELD_NAME,
 } from './utils';
 
@@ -209,3 +210,137 @@ describe('new terms utils', () => {
     });
   });
 });
+
+describe('createFieldValuesMap', () => {
+  it('should return undefined if new terms fields has only one field', () => {
+    expect(
+      createFieldValuesMap(
+        ['host.name'],
+        [
+          {
+            key: {
+              'source.host': 'host-0',
+            },
+            doc_count: 1,
+          },
+          {
+            key: {
+              'source.host': 'host-1',
+            },
+            doc_count: 3,
+          },
+        ]
+      )
+    ).toBeUndefined();
+  });
+
+  it('should return values map if new terms fields has more than one field', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'source.ip'],
+        [
+          {
+            key: {
+              'source.host': 'host-0',
+              'source.ip': '127.0.0.1',
+            },
+            doc_count: 1,
+          },
+          {
+            key: {
+              'source.host': 'host-1',
+              'source.ip': '127.0.0.1',
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-0': true,
+        'host-1': true,
+      },
+      'source.ip': {
+        '127.0.0.1': true,
+      },
+    });
+  });
+
+  it('should not put value in map if it is null', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'source.ip'],
+        [
+          {
+            key: {
+              'source.host': 'host-1',
+              'source.ip': null,
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-1': true,
+      },
+      'source.ip': {},
+    });
+  });
+
+  it('should not put value in map if it is a number', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'source.id'],
+        [
+          {
+            key: {
+              'source.host': 'host-1',
+              'source.id': 100,
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-1': true,
+      },
+      'source.id': {
+        '100': true,
+      },
+    });
+  });
+
+  it('should not put value in map if it is a boolean', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'user.enabled'],
+        [
+          {
+            key: {
+              'source.host': 'host-1',
+              'user.enabled': true,
+            },
+            doc_count: 1,
+          },
+          {
+            key: {
+              'source.host': 'host-1',
+              'user.enabled': false,
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-1': true,
+      },
+      'user.enabled': {
+        true: true,
+        false: true,
+      },
+    });
+  });
+});
diff --git a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.ts b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.ts
@@ -80,8 +80,43 @@ export const transformBucketsToValues = (
     );
 };
 
+/**
+ * transforms arrays of new terms fields and its values in object
+ * [new_terms_field]: { [value1]: true, [value1]: true  }
+ * It's needed to have constant time complexity of accessing whether value is present in new terms
+ * It will be passed to Painless script used in runtime field
+ */
+export const createFieldValuesMap = (
+  newTermsFields: string[],
+  buckets: estypes.AggregationsCompositeBucket[]
+) => {
+  if (newTermsFields.length === 1) {
+    return undefined;
+  }
+
+  const valuesMap = newTermsFields.reduce<Record<string, Record<string, boolean>>>(
+    (acc, field) => ({ ...acc, [field]: {} }),
+    {}
+  );
+
+  buckets
+    .map((bucket) => bucket.key)
+    .forEach((bucket) => {
+      Object.entries(bucket).forEach(([key, value]) => {
+        if (value == null) {
+          return;
+        }
+        const strValue = typeof value !== 'string' ? value.toString() : value;
+        valuesMap[key][strValue] = true;
+      });
+    });
+
+  return valuesMap;
+};
+
 export const getNewTermsRuntimeMappings = (
-  newTermsFields: string[]
+  newTermsFields: string[],
+  values?: Record<string, Record<string, boolean>>
 ): undefined | { [AGG_FIELD_NAME]: estypes.MappingRuntimeField } => {
   // if new terms include only one field we don't use runtime mappings and don't stich fields buckets together
   if (newTermsFields.length <= 1) {
@@ -92,7 +127,7 @@ export const getNewTermsRuntimeMappings = (
     [AGG_FIELD_NAME]: {
       type: 'keyword',
       script: {
-        params: { fields: newTermsFields },
+        params: { fields: newTermsFields, values },
         source: `
           def stack = new Stack();
           // ES has limit in 100 values for runtime field, after this query will fail
@@ -110,9 +145,14 @@ export const getNewTermsRuntimeMappings = (
                 emit(line);
                 emitLimit = emitLimit - 1;
               } else {
-                for (field in doc[params['fields'][index]]) {
+                def fieldName = params['fields'][index];
+                for (field in doc[fieldName]) {
+                    def fieldStr = String.valueOf(field);
+                    if (!params['values'][fieldName].containsKey(fieldStr)) {
+                      continue;
+                    }
                     def delimiter = index === 0 ? '' : '${DELIMITER}';
-                    def nextLine = line + delimiter + String.valueOf(field).encodeBase64();
+                    def nextLine = line + delimiter + fieldStr.encodeBase64();
 
                     stack.add([index + 1, nextLine])
                 }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -27,4 +27,3 @@ The new terms rule type reuses the singleSearchAfter function which implements t
		## Limitations and future enhancements

		- Value list exceptions are not supported at the moment. Commit ead04ce removes an experimental method I tried for evaluating value list exceptions.
		- Runtime field supports only 100 emitted values. So for large arrays or combination of values greater than 100, results may not be exhaustive. This applies only to new terms with multiple fields
Copy link Contributor marshallmain Nov 16, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. This limitation could still be hit, right? It's just less likely now Copy link Contributor Author vitaliidm Nov 17, 2022 • edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Updated README with recent findings