From c4123eed54590417b11f8cbab31b7a07156a564d Mon Sep 17 00:00:00 2001 From: Justin Tay <49700559+justin-tay@users.noreply.github.com> Date: Tue, 16 Apr 2024 06:30:22 +0800 Subject: [PATCH] Add options to control caching of schemas --- README.md | 23 +- .../networknt/schema/DynamicRefValidator.java | 19 +- .../com/networknt/schema/JsonNodePath.java | 4 +- .../networknt/schema/JsonSchemaFactory.java | 29 +- .../schema/RecursiveRefValidator.java | 19 +- .../com/networknt/schema/RefValidator.java | 33 +- .../schema/SchemaValidatorsConfig.java | 55 + .../schema/JsonSchemaPreloadTest.java | 42 + src/test/resources/issues/1016/schema.json | 11059 ++++++++++++++++ 9 files changed, 11247 insertions(+), 36 deletions(-) create mode 100644 src/test/java/com/networknt/schema/JsonSchemaPreloadTest.java create mode 100644 src/test/resources/issues/1016/schema.json diff --git a/README.md b/README.md index d9a8c8336..c1d7ce1a6 100644 --- a/README.md +++ b/README.md @@ -461,16 +461,19 @@ The following is sample output from the Hierarchical format. ### Schema Validators Configuration -| Name | Description | Default Value -|--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------- -| `pathType` | The path type to use for reporting the instance location and evaluation path. Set to `PathType.JSON_POINTER` to use JSON Pointer. | `PathType.DEFAULT` -| `ecma262Validator` | Whether to use the ECMA 262 `joni` library to validate the `pattern` keyword. This requires the dependency to be manually added to the project or a `ClassNotFoundException` will be thrown. | `false` -| `executionContextCustomizer` | This can be used to customize the `ExecutionContext` generated by the `JsonSchema` for each validation run. | `null` -| `schemaIdValidator` | This is used to customize how the `$id` values are validated. Note that the default implementation allows non-empty fragments where no base IRI is specified and also allows non-absolute IRI `$id` values in the root schema. | `JsonSchemaIdValidator.DEFAULT` -| `messageSource` | This is used to retrieve the locale specific messages. | `DefaultMessageSource.getInstance()` -| `locale` | The locale to use for generating messages in the `ValidationMessage`. | `Locale.getDefault()` -| `failFast` | Whether to return failure immediately when an assertion is generated. | `false` -| `formatAssertionsEnabled` | The default is to generate format assertions from Draft 4 to Draft 7 and to only generate annotations from Draft 2019-09. Setting to `true` or `false` will override the default behavior. | `null` +| Name | Description | Default Value +|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------- +| `pathType` | The path type to use for reporting the instance location and evaluation path. Set to `PathType.JSON_POINTER` to use JSON Pointer. | `PathType.DEFAULT` +| `ecma262Validator` | Whether to use the ECMA 262 `joni` library to validate the `pattern` keyword. This requires the dependency to be manually added to the project or a `ClassNotFoundException` will be thrown. | `false` +| `executionContextCustomizer` | This can be used to customize the `ExecutionContext` generated by the `JsonSchema` for each validation run. | `null` +| `schemaIdValidator` | This is used to customize how the `$id` values are validated. Note that the default implementation allows non-empty fragments where no base IRI is specified and also allows non-absolute IRI `$id` values in the root schema. | `JsonSchemaIdValidator.DEFAULT` +| `messageSource` | This is used to retrieve the locale specific messages. | `DefaultMessageSource.getInstance()` +| `preloadJsonSchema` | Whether the schema will be preloaded before processing any input. This will use memory but the execution of the validation will be faster. | `true` +| `preloadJsonSchemaRefMaxNestingDepth` | The max depth of the evaluation path to preload when preloading refs. | `40` +| `cacheRefs` | Whether the schemas loaded from refs will be cached and reused for subsequent runs. Setting this to `false` will affect performance but may be neccessary to prevent high memory usage for the cache if multiple nested applicators like `anyOf`, `oneOf` and `allOf` are used. | `true` +| `locale` | The locale to use for generating messages in the `ValidationMessage`. | `Locale.getDefault()` +| `failFast` | Whether to return failure immediately when an assertion is generated. | `false` +| `formatAssertionsEnabled` | The default is to generate format assertions from Draft 4 to Draft 7 and to only generate annotations from Draft 2019-09. Setting to `true` or `false` will override the default behavior. | `null` ## Performance Considerations diff --git a/src/main/java/com/networknt/schema/DynamicRefValidator.java b/src/main/java/com/networknt/schema/DynamicRefValidator.java index 7d68d2344..ea4da483f 100644 --- a/src/main/java/com/networknt/schema/DynamicRefValidator.java +++ b/src/main/java/com/networknt/schema/DynamicRefValidator.java @@ -20,7 +20,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; +import java.util.Collections; +import java.util.Set; +import java.util.function.Supplier; /** * {@link JsonValidator} that resolves $dynamicRef. @@ -39,7 +41,7 @@ public DynamicRefValidator(SchemaLocation schemaLocation, JsonNodePath evaluatio static JsonSchemaRef getRefSchema(JsonSchema parentSchema, ValidationContext validationContext, String refValue, JsonNodePath evaluationPath) { String ref = resolve(parentSchema, refValue); - return new JsonSchemaRef(new CachedSupplier<>(() -> { + return new JsonSchemaRef(getSupplier(() -> { JsonSchema refSchema = validationContext.getDynamicAnchors().get(ref); if (refSchema == null) { // This is a $dynamicRef without a matching $dynamicAnchor // A $dynamicRef without a matching $dynamicAnchor in the same schema resource @@ -73,9 +75,13 @@ static JsonSchemaRef getRefSchema(JsonSchema parentSchema, ValidationContext val refSchema = refSchema.fromRef(parentSchema, evaluationPath); } return refSchema; - })); + }, validationContext.getConfig().isCacheRefs())); } - + + static Supplier getSupplier(Supplier supplier, boolean cache) { + return cache ? new CachedSupplier<>(supplier) : supplier; + } + private static String resolve(JsonSchema parentSchema, String refValue) { // $ref prevents a sibling $id from changing the base uri JsonSchema base = parentSchema; @@ -153,14 +159,17 @@ public void preloadJsonSchema() { SchemaLocation schemaLocation = jsonSchema.getSchemaLocation(); JsonSchema check = jsonSchema; boolean circularDependency = false; + int depth = 0; while (check.getEvaluationParentSchema() != null) { + depth++; check = check.getEvaluationParentSchema(); if (check.getSchemaLocation().equals(schemaLocation)) { circularDependency = true; break; } } - if (!circularDependency) { + if (this.validationContext.getConfig().isCacheRefs() && !circularDependency + && depth < this.validationContext.getConfig().getPreloadJsonSchemaRefMaxNestingDepth()) { jsonSchema.initializeValidators(); } } diff --git a/src/main/java/com/networknt/schema/JsonNodePath.java b/src/main/java/com/networknt/schema/JsonNodePath.java index 55001859b..3307a89a4 100644 --- a/src/main/java/com/networknt/schema/JsonNodePath.java +++ b/src/main/java/com/networknt/schema/JsonNodePath.java @@ -227,8 +227,8 @@ public boolean equals(Object obj) { if (getClass() != obj.getClass()) return false; JsonNodePath other = (JsonNodePath) obj; - return Objects.equals(parent, other.parent) && Objects.equals(pathSegment, other.pathSegment) - && pathSegmentIndex == other.pathSegmentIndex && type == other.type; + return Objects.equals(pathSegment, other.pathSegment) && pathSegmentIndex == other.pathSegmentIndex + && type == other.type && Objects.equals(parent, other.parent); } @Override diff --git a/src/main/java/com/networknt/schema/JsonSchemaFactory.java b/src/main/java/com/networknt/schema/JsonSchemaFactory.java index 685821db3..3a537cad9 100644 --- a/src/main/java/com/networknt/schema/JsonSchemaFactory.java +++ b/src/main/java/com/networknt/schema/JsonSchemaFactory.java @@ -287,6 +287,17 @@ protected JsonSchema newJsonSchema(final SchemaLocation schemaUri, final JsonNod final ValidationContext validationContext = createValidationContext(schemaNode, config); JsonSchema jsonSchema = doCreate(validationContext, getSchemaLocation(schemaUri), new JsonNodePath(validationContext.getConfig().getPathType()), schemaNode, null, false); + preload(jsonSchema, config); + return jsonSchema; + } + + /** + * Preloads the json schema if the configuration option is set. + * + * @param jsonSchema the schema to preload + * @param config containing the configuration option + */ + private void preload(JsonSchema jsonSchema, SchemaValidatorsConfig config) { if (config.isPreloadJsonSchema()) { try { /* @@ -302,7 +313,6 @@ protected JsonSchema newJsonSchema(final SchemaLocation schemaUri, final JsonNod */ } } - return jsonSchema; } public JsonSchema create(ValidationContext validationContext, SchemaLocation schemaLocation, JsonNodePath evaluationPath, JsonNode schemaNode, JsonSchema parentSchema) { @@ -471,7 +481,7 @@ public JsonSchema getSchema(final InputStream schemaStream, final SchemaValidato public JsonSchema getSchema(final InputStream schemaStream) { return getSchema(schemaStream, createSchemaValidatorsConfig()); } - + /** * Gets the schema. * @@ -480,6 +490,19 @@ public JsonSchema getSchema(final InputStream schemaStream) { * @return the schema */ public JsonSchema getSchema(final SchemaLocation schemaUri, final SchemaValidatorsConfig config) { + JsonSchema schema = loadSchema(schemaUri, config); + preload(schema, config); + return schema; + } + + /** + * Loads the schema. + * + * @param schemaUri the absolute IRI of the schema which can map to the retrieval IRI. + * @param config the config + * @return the schema + */ + protected JsonSchema loadSchema(final SchemaLocation schemaUri, final SchemaValidatorsConfig config) { if (enableSchemaCache) { // ConcurrentHashMap computeIfAbsent does not allow calls that result in a // recursive update to the map. @@ -500,7 +523,7 @@ public JsonSchema getSchema(final SchemaLocation schemaUri, final SchemaValidato } return getMappedSchema(schemaUri, config); } - + protected ObjectMapper getYamlMapper() { return this.yamlMapper != null ? this.yamlMapper : YamlMapperFactory.getInstance(); } diff --git a/src/main/java/com/networknt/schema/RecursiveRefValidator.java b/src/main/java/com/networknt/schema/RecursiveRefValidator.java index 8cd0de742..73a72ce9c 100644 --- a/src/main/java/com/networknt/schema/RecursiveRefValidator.java +++ b/src/main/java/com/networknt/schema/RecursiveRefValidator.java @@ -20,7 +20,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; +import java.util.Collections; +import java.util.Set; +import java.util.function.Supplier; /** * {@link JsonValidator} that resolves $recursiveRef. @@ -47,11 +49,15 @@ public RecursiveRefValidator(SchemaLocation schemaLocation, JsonNodePath evaluat static JsonSchemaRef getRefSchema(JsonSchema parentSchema, ValidationContext validationContext, String refValue, JsonNodePath evaluationPath) { - return new JsonSchemaRef(new CachedSupplier<>(() -> { + return new JsonSchemaRef(getSupplier(() -> { return getSchema(parentSchema, validationContext, refValue, evaluationPath); - })); + }, validationContext.getConfig().isCacheRefs())); } - + + static Supplier getSupplier(Supplier supplier, boolean cache) { + return cache ? new CachedSupplier<>(supplier) : supplier; + } + static JsonSchema getSchema(JsonSchema parentSchema, ValidationContext validationContext, String refValue, JsonNodePath evaluationPath) { JsonSchema refSchema = parentSchema.findSchemaResourceRoot(); // Get the document @@ -150,14 +156,17 @@ public void preloadJsonSchema() { SchemaLocation schemaLocation = jsonSchema.getSchemaLocation(); JsonSchema check = jsonSchema; boolean circularDependency = false; + int depth = 0; while (check.getEvaluationParentSchema() != null) { + depth++; check = check.getEvaluationParentSchema(); if (check.getSchemaLocation().equals(schemaLocation)) { circularDependency = true; break; } } - if (!circularDependency) { + if (this.validationContext.getConfig().isCacheRefs() && !circularDependency + && depth < this.validationContext.getConfig().getPreloadJsonSchemaRefMaxNestingDepth()) { jsonSchema.initializeValidators(); } } diff --git a/src/main/java/com/networknt/schema/RefValidator.java b/src/main/java/com/networknt/schema/RefValidator.java index e6b803e76..f7df836ae 100644 --- a/src/main/java/com/networknt/schema/RefValidator.java +++ b/src/main/java/com/networknt/schema/RefValidator.java @@ -20,7 +20,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; +import java.util.Collections; +import java.util.Set; +import java.util.function.Supplier; /** * {@link JsonValidator} that resolves $ref. @@ -58,10 +60,10 @@ static JsonSchemaRef getRefSchema(JsonSchema parentSchema, ValidationContext val String schemaUriFinal = resolve(parentSchema, refUri); SchemaLocation schemaLocation = SchemaLocation.of(schemaUriFinal); // This should retrieve schemas regardless of the protocol that is in the uri. - return new JsonSchemaRef(new CachedSupplier<>(() -> { + return new JsonSchemaRef(getSupplier(() -> { JsonSchema schemaResource = validationContext.getSchemaResources().get(schemaUriFinal); if (schemaResource == null) { - schemaResource = validationContext.getJsonSchemaFactory().getSchema(schemaLocation, validationContext.getConfig()); + schemaResource = validationContext.getJsonSchemaFactory().loadSchema(schemaLocation, validationContext.getConfig()); if (schemaResource != null) { copySchemaResources(validationContext, schemaResource); } @@ -89,12 +91,12 @@ static JsonSchemaRef getRefSchema(JsonSchema parentSchema, ValidationContext val } return schemaResource.fromRef(parentSchema, evaluationPath); } - })); + }, validationContext.getConfig().isCacheRefs())); } else if (SchemaLocation.Fragment.isAnchorFragment(refValue)) { String absoluteIri = resolve(parentSchema, refValue); // Schema resource needs to update the parent and evaluation path - return new JsonSchemaRef(new CachedSupplier<>(() -> { + return new JsonSchemaRef(getSupplier(() -> { JsonSchema schemaResource = validationContext.getSchemaResources().get(absoluteIri); if (schemaResource == null) { schemaResource = validationContext.getDynamicAnchors().get(absoluteIri); @@ -106,15 +108,21 @@ static JsonSchemaRef getRefSchema(JsonSchema parentSchema, ValidationContext val return null; } return schemaResource.fromRef(parentSchema, evaluationPath); - })); + }, validationContext.getConfig().isCacheRefs())); } if (refValue.equals(REF_CURRENT)) { - return new JsonSchemaRef(new CachedSupplier<>( - () -> parentSchema.findSchemaResourceRoot().fromRef(parentSchema, evaluationPath))); + return new JsonSchemaRef( + getSupplier(() -> parentSchema.findSchemaResourceRoot().fromRef(parentSchema, evaluationPath), + validationContext.getConfig().isCacheRefs())); } - return new JsonSchemaRef(new CachedSupplier<>( + return new JsonSchemaRef(getSupplier( () -> getJsonSchema(parentSchema, validationContext, refValue, refValueOriginal, evaluationPath) - .fromRef(parentSchema, evaluationPath))); + .fromRef(parentSchema, evaluationPath), + validationContext.getConfig().isCacheRefs())); + } + + static Supplier getSupplier(Supplier supplier, boolean cache) { + return cache ? new CachedSupplier<>(supplier) : supplier; } private static void copySchemaResources(ValidationContext validationContext, JsonSchema schemaResource) { @@ -235,14 +243,17 @@ public void preloadJsonSchema() { SchemaLocation schemaLocation = jsonSchema.getSchemaLocation(); JsonSchema check = jsonSchema; boolean circularDependency = false; + int depth = 0; while (check.getEvaluationParentSchema() != null) { + depth++; check = check.getEvaluationParentSchema(); if (check.getSchemaLocation().equals(schemaLocation)) { circularDependency = true; break; } } - if (!circularDependency) { + if (this.validationContext.getConfig().isCacheRefs() && !circularDependency + && depth < this.validationContext.getConfig().getPreloadJsonSchemaRefMaxNestingDepth()) { jsonSchema.initializeValidators(); } } diff --git a/src/main/java/com/networknt/schema/SchemaValidatorsConfig.java b/src/main/java/com/networknt/schema/SchemaValidatorsConfig.java index 538aa8001..1240da3ec 100644 --- a/src/main/java/com/networknt/schema/SchemaValidatorsConfig.java +++ b/src/main/java/com/networknt/schema/SchemaValidatorsConfig.java @@ -33,6 +33,8 @@ import java.util.Objects; public class SchemaValidatorsConfig { + public static final int DEFAULT_PRELOAD_JSON_SCHEMA_REF_MAX_NESTING_DEPTH = 40; + /** * Used to validate the acceptable $id values. */ @@ -126,6 +128,16 @@ public class SchemaValidatorsConfig { */ private boolean preloadJsonSchema = true; + /** + * Controls the max depth of the evaluation path to preload when preloading refs. + */ + private int preloadJsonSchemaRefMaxNestingDepth = DEFAULT_PRELOAD_JSON_SCHEMA_REF_MAX_NESTING_DEPTH; + + /** + * Controls if schemas loaded from refs will be cached and reused for subsequent runs. + */ + private boolean cacheRefs = true; + // This is just a constant for listening to all Keywords. public static final String ALL_KEYWORD_WALK_LISTENER_KEY = "com.networknt.AllKeywordWalkListener"; @@ -623,4 +635,47 @@ public boolean isPreloadJsonSchema() { public void setPreloadJsonSchema(boolean preloadJsonSchema) { this.preloadJsonSchema = preloadJsonSchema; } + + /** + * Gets the max depth of the evaluation path to preload when preloading refs. + * + * @return the max depth to preload + */ + public int getPreloadJsonSchemaRefMaxNestingDepth() { + return preloadJsonSchemaRefMaxNestingDepth; + } + + /** + * Sets the max depth of the evaluation path to preload when preloading refs. + * + * @param preloadJsonSchemaRefMaxNestingDepth the max depth to preload + */ + public void setPreloadJsonSchemaRefMaxNestingDepth(int preloadJsonSchemaRefMaxNestingDepth) { + this.preloadJsonSchemaRefMaxNestingDepth = preloadJsonSchemaRefMaxNestingDepth; + } + + /** + * Gets if schemas loaded from refs will be cached and reused for subsequent + * runs. + * + * @return true if schemas loaded from refs should be cached + */ + public boolean isCacheRefs() { + return cacheRefs; + } + + /** + * Sets if schemas loaded from refs will be cached and reused for subsequent + * runs. + *

+ * Note that setting this to false will affect performance as refs will need to + * be repeatedly resolved for each evaluation run. It may be needed to be set to + * false if there are multiple nested applicators like anyOf, oneOf and allOf as + * that will consume a lot of memory to cache all the permutations. + * + * @param cacheRefs true to cache + */ + public void setCacheRefs(boolean cacheRefs) { + this.cacheRefs = cacheRefs; + } } diff --git a/src/test/java/com/networknt/schema/JsonSchemaPreloadTest.java b/src/test/java/com/networknt/schema/JsonSchemaPreloadTest.java new file mode 100644 index 000000000..2fe9e3036 --- /dev/null +++ b/src/test/java/com/networknt/schema/JsonSchemaPreloadTest.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.networknt.schema; + +import org.junit.jupiter.api.Test; + +import com.networknt.schema.SpecVersion.VersionFlag; + +/** + * Test to control preloading of schemas. + */ +public class JsonSchemaPreloadTest { + @Test + void cacheRefsFalse() { + JsonSchemaFactory factory = JsonSchemaFactory.getInstance(VersionFlag.V7); + SchemaValidatorsConfig config = new SchemaValidatorsConfig(); + config.setCacheRefs(false); + factory.getSchema(SchemaLocation.of("classpath:/issues/1016/schema.json"), config); + } + + @Test + void preloadSchemaRefMaxNestingDepth() { + JsonSchemaFactory factory = JsonSchemaFactory.getInstance(VersionFlag.V7); + SchemaValidatorsConfig config = new SchemaValidatorsConfig(); + config.setPreloadJsonSchemaRefMaxNestingDepth(20); + factory.getSchema(SchemaLocation.of("classpath:/issues/1016/schema.json"), config); + } +} diff --git a/src/test/resources/issues/1016/schema.json b/src/test/resources/issues/1016/schema.json new file mode 100644 index 000000000..0d8fb1afd --- /dev/null +++ b/src/test/resources/issues/1016/schema.json @@ -0,0 +1,11059 @@ +{ + "$schema" : "http://json-schema.org/draft-07/schema#", + "type" : "object", + "definitions" : { + "var" : { + "type" : "object", + "properties" : { + "@var" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "@var" ], + "defaultSnippets" : [ { + "description" : "Variable reference", + "body" : { + "@var" : "$0" + } + } ] + }, + "comment" : { + "oneOf" : [ { + "type" : "string" + }, { + "type" : "array", + "items" : { + "type" : "string" + } + } ] + }, + "group" : { + "oneOf" : [ { + "oneOf" : [ { + "type" : "string" + }, { + "$ref" : "#/definitions/var" + } ] + }, { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string" + } + }, { + "$ref" : "#/definitions/var" + } ] + } ] + }, + "threads" : { + "oneOf" : [ { + "type" : "string", + "pattern" : "^(\\d+|\\d+-\\d+|[aA][uU][tT][oO])$", + "defaultSnippets" : [ { + "description" : "Let Lingo4G automatically and dynamically adjust the number of threads for this task to minimize latency.", + "body" : "auto" + }, { + "description" : "Let Lingo4G automatically and dynamically adjust the number of threads for this task within the range you provide.", + "body" : "4-8" + }, { + "description" : "Use a fixed number of threads (exactly one).", + "body" : "1" + } ] + }, { + "type" : "integer", + "minimum" : 1 + } ] + }, + "weightAggregation" : { + "type" : "string", + "enum" : [ "MIN", "MAX", "COUNT", "SUM", "PRODUCT", "MEAN", "GEOMETRIC_MEAN" ] + }, + "sortOrder" : { + "type" : "string", + "enum" : [ "ASCENDING", "DESCENDING", "UNSPECIFIED" ] + }, + "fields:simple" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "fields:simple" + }, + "fields" : { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string", + "enum" : [ "fld_double", "fld_double_a", "fld_int", "fld_int_a", "fld_mv", "id", "summary", "summary$phrases", "title", "title$phrases" ] + }, + "examples" : [ [ "fld_double", "fld_double_a", "fld_int", "fld_int_a", "fld_mv", "id", "summary", "summary$phrases", "title", "title$phrases" ] ], + "default" : [ ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "examples" : [ { + "type" : "fields:simple", + "fields" : [ "fld_double", "fld_double_a", "fld_int", "fld_int_a", "fld_mv", "id", "summary", "summary$phrases", "title", "title$phrases" ] + } ], + "required" : [ "type", "fields" ], + "default" : { + "type" : "fields:simple", + "fields" : [ ] + } + }, + "fields" : { + "oneOf" : [ { + "$ref" : "#/definitions/fields:reference" + }, { + "$ref" : "#/definitions/fields:simple" + } ] + }, + "featureFields:simple" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureFields:simple" + }, + "fields" : { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string", + "enum" : [ "summary$phrases", "title$phrases" ] + }, + "examples" : [ [ "summary$phrases", "title$phrases" ] ], + "default" : [ ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "examples" : [ { + "type" : "featureFields:simple", + "fields" : [ "summary$phrases", "title$phrases" ] + } ], + "required" : [ "type", "fields" ], + "default" : { + "type" : "featureFields:simple", + "fields" : [ ] + } + }, + "featureFields" : { + "oneOf" : [ { + "$ref" : "#/definitions/featureFields:reference" + }, { + "$ref" : "#/definitions/featureFields:simple" + } ] + }, + "contentField" : { + "type" : "object", + "properties" : { + "maxValues" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 3 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxValueLength" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 250 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "truncationMarker" : { + "oneOf" : [ { + "type" : "string", + "default" : "…" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "valueCount" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "highlighting" : { + "type" : "object", + "properties" : { + "enabled" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "startMarker" : { + "oneOf" : [ { + "type" : "string", + "default" : "⁌%s⁍" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "endMarker" : { + "oneOf" : [ { + "type" : "string", + "default" : "⁌\\%s⁍" + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "additionalProperties" : false, + "default" : { + "enabled" : true, + "startMarker" : "⁌%s⁍", + "endMarker" : "⁌\\%s⁍" + } + } + }, + "additionalProperties" : false, + "defaultSnippets" : [ { + "body" : { + "maxValues" : 3, + "maxValueLength" : 160 + } + }, { + "description" : "Field without query highlighting.", + "body" : { + "maxValues" : 3, + "maxValueLength" : 160, + "highlighting" : { + "enabled" : false + } + } + } ], + "default" : { + "maxValues" : 3, + "maxValueLength" : 250, + "truncationMarker" : "…", + "valueCount" : false, + "highlighting" : { + "enabled" : true, + "startMarker" : "⁌%s⁍", + "endMarker" : "⁌\\%s⁍" + } + } + }, + "contentFields:simple" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "contentFields:simple" + }, + "fields" : { + "type" : "object", + "additionalProperties" : false, + "examples" : [ { + "fld_double" : { }, + "fld_double_a" : { }, + "fld_int" : { }, + "fld_int_a" : { }, + "fld_mv" : { }, + "id" : { }, + "summary" : { }, + "title" : { } + } ], + "properties" : { + "fld_double" : { + "$ref" : "#/definitions/contentField" + }, + "fld_double_a" : { + "$ref" : "#/definitions/contentField" + }, + "fld_int" : { + "$ref" : "#/definitions/contentField" + }, + "fld_int_a" : { + "$ref" : "#/definitions/contentField" + }, + "fld_mv" : { + "$ref" : "#/definitions/contentField" + }, + "id" : { + "$ref" : "#/definitions/contentField" + }, + "summary" : { + "$ref" : "#/definitions/contentField" + }, + "title" : { + "$ref" : "#/definitions/contentField" + } + }, + "default" : { } + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "examples" : [ { + "type" : "contentFields:simple", + "fields" : { + "fld_double" : { }, + "fld_double_a" : { }, + "fld_int" : { }, + "fld_int_a" : { }, + "fld_mv" : { }, + "id" : { }, + "summary" : { }, + "title" : { } + } + } ], + "required" : [ "type", "fields" ], + "default" : { + "type" : "contentFields:simple", + "fields" : { } + } + }, + "contentFields:grouped" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "contentFields:grouped" + }, + "groups" : { + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "fields" : { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string", + "enum" : [ "summary", "fld_double_a", "fld_mv", "fld_int", "fld_int_a", "id", "fld_double", "title" ] + } + }, { + "$ref" : "#/definitions/var" + } ] + }, + "config" : { + "$ref" : "#/definitions/contentField" + } + }, + "required" : [ "fields" ] + }, + "examples" : [ [ { + "fields" : [ "summary", "fld_double_a", "fld_mv", "fld_int", "fld_int_a", "id", "fld_double", "title" ], + "config" : { } + } ] ], + "default" : [ ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "examples" : [ { + "type" : "contentFields:grouped", + "groups" : [ ] + } ], + "required" : [ "type", "groups" ], + "default" : { + "type" : "contentFields:grouped", + "groups" : [ ] + } + }, + "contentFields:empty" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "contentFields:empty" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "default" : { + "type" : "contentFields:empty" + } + }, + "contentFields" : { + "oneOf" : [ { + "$ref" : "#/definitions/contentFields:reference" + }, { + "$ref" : "#/definitions/contentFields:simple" + }, { + "$ref" : "#/definitions/contentFields:grouped" + }, { + "$ref" : "#/definitions/contentFields:empty" + } ] + }, + "dictionary:glob" : { + "type" : "object", + "description" : "Word and wildcard-based label filtering dictionary.", + "properties" : { + "type" : { + "const" : "dictionary:glob" + }, + "entries" : { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string", + "defaultSnippets" : [ { + "description" : "Matches labels equal to 'word', case insensitive.", + "body" : "${1:word}" + }, { + "description" : "Matches labels equal to 'Word', case sensitive.", + "body" : "\"${1:Word}\"" + }, { + "description" : "Matches labels starting with or equal to 'including'.", + "body" : "${1:including} *" + }, { + "description" : "Matches labels ending with or equal to 'respectively'", + "body" : "* ${1:respectively}" + }, { + "description" : "Matches labels containing or equal to 'results'", + "body" : "* ${1:results} *" + } ] + }, + "defaultSnippets" : [ { + "description" : "Matches labels equal to 'project' and labels containing the word 'results'.", + "body" : [ "project", "* results *" ] + } ], + "default" : [ ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "entries" ], + "defaultSnippets" : [ { + "description" : "Simple dictionary with an empty list of rules.", + "body" : { + "type" : "dictionary:glob", + "entries" : "^[ $0]" + } + }, { + "description" : "Matches labels equal to 'project' and labels containing the word 'results'.", + "body" : { + "type" : "dictionary:glob", + "entries" : [ "project", "* results *" ] + } + } ], + "default" : { + "type" : "dictionary:glob", + "entries" : [ ] + } + }, + "dictionary:regex" : { + "type" : "object", + "description" : "Regular expression based label filtering dictionary.", + "properties" : { + "type" : { + "const" : "dictionary:regex" + }, + "entries" : { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string", + "defaultSnippets" : [ { + "description" : "Matches labels containing the word 'low' or 'high', case sensitive.", + "body" : ".*(low|high).*" + }, { + "description" : "Matches labels containing the word 'low' or 'high', case insensitive.", + "body" : "(i).*(low|high).*" + }, { + "description" : "Matches labels starting with one or more digits.", + "body" : "\\d+.*" + } ] + }, + "defaultSnippets" : [ { + "description" : "Regex dictionary with an empty list of rules.", + "body" : "^[ $0]" + }, { + "description" : "Matches labels containing the words 'low' or 'high'.", + "body" : [ ".*(low|high).*" ] + } ], + "default" : [ ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "entries" ], + "defaultSnippets" : [ { + "description" : "Regex dictionary with an empty list of rules.", + "body" : { + "type" : "dictionary:regex", + "entries" : "^[ $0]" + } + }, { + "description" : "Matches labels containing the words 'low' or 'high'.", + "body" : { + "type" : "dictionary:regex", + "entries" : [ ".*(low|high).*" ] + } + } ], + "default" : { + "type" : "dictionary:regex", + "entries" : [ ] + } + }, + "dictionary:project" : { + "type" : "object", + "description" : "A reference to a dictionary declared in the project descriptor.", + "properties" : { + "type" : { + "const" : "dictionary:project" + }, + "dictionary" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "defaultSimple", "defaultRegex", "test_dict_1" ], + "default" : null + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "dictionary" ], + "default" : { + "type" : "dictionary:project", + "dictionary" : null + } + }, + "dictionary:all" : { + "type" : "object", + "description" : "All dictionaries declared in the project descriptor, combined.", + "properties" : { + "type" : { + "const" : "dictionary:all" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "default" : { + "type" : "dictionary:all" + } + }, + "dictionary:queryTerms" : { + "type" : "object", + "description" : "A dictionary created dynamically from query terms (if they are available).", + "properties" : { + "type" : { + "const" : "dictionary:queryTerms" + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "query" ], + "default" : { + "type" : "dictionary:queryTerms", + "query" : null + } + }, + "dictionary" : { + "oneOf" : [ { + "$ref" : "#/definitions/dictionary:reference" + }, { + "$ref" : "#/definitions/dictionary:glob" + }, { + "$ref" : "#/definitions/dictionary:regex" + }, { + "$ref" : "#/definitions/dictionary:project" + }, { + "$ref" : "#/definitions/dictionary:all" + }, { + "$ref" : "#/definitions/dictionary:queryTerms" + } ] + }, + "labelFilter:composite" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:composite" + }, + "labelFilters" : { + "type" : "object", + "additionalProperties" : { + "$ref" : "#/definitions/labelFilter" + }, + "defaultSnippets" : [ { + "body" : { + "${1:filterName}" : "^$0" + } + } ], + "default" : { } + }, + "operator" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "OR", "AND" ], + "default" : "AND", + "description" : "The operator to use to combine label filters.\n\nAND will create a composite conjunction filter, OR will create a disjunction filter." + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "labelFilters" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelFilter:composite", + "labelFilters" : { + "${1:filterName}" : "^$0" + }, + "operator" : "AND" + } + } ], + "default" : { + "type" : "labelFilter:composite", + "labelFilters" : { }, + "operator" : "AND" + } + }, + "labelFilter:complement" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:complement" + }, + "labelFilter" : { + "allOf" : [ { + "$ref" : "#/definitions/labelFilter" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "labelFilter" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelFilter:complement", + "filter" : "^$0" + } + } ], + "default" : { + "type" : "labelFilter:complement", + "labelFilter" : null + } + }, + "labelFilter:acceptAll" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:acceptAll" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "description" : "Accepts all labels.", + "body" : { + "type" : "labelFilter:acceptAll" + } + } ], + "default" : { + "type" : "labelFilter:acceptAll" + } + }, + "labelFilter:acceptLabels" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:acceptLabels" + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "labels" ], + "defaultSnippets" : [ { + "description" : "Accepts labels from the provided list.", + "body" : { + "type" : "labelFilter:acceptLabels", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "labelFilter:acceptLabels", + "labels" : { + "type" : "labels:reference", + "auto" : true + } + } + }, + "labelFilter:rejectLabels" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:rejectLabels" + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "labels" ], + "defaultSnippets" : [ { + "description" : "Rejects labels from the provided list.", + "body" : { + "type" : "labelFilter:rejectLabels", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "labelFilter:rejectLabels", + "labels" : { + "type" : "labels:reference", + "auto" : true + } + } + }, + "labelFilter:autoStopLabels" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:autoStopLabels" + }, + "minCoverage" : { + "oneOf" : [ { + "type" : "number", + "defaultSnippets" : [ { + "label" : "1.0: removes only the most popular stop labels", + "description" : "Removes only the stop labels that appeared in 100% of the documents sampled during automatic stop label discovery. Use this setting to avoid removing meaningful labels at the cost of some stop labels passing through the filter.", + "body" : 1.0 + }, { + "label" : "0.8: removes popular stop labels (recommended value)", + "body" : 0.8 + }, { + "label" : "0.6: removes most auto-discovered stop labels", + "body" : 0.8 + }, { + "label" : "0.0: removes all auto-discovered stop labels", + "body" : 0.0 + } ], + "default" : 0.4, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "removalStrength" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.35, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "description" : "Removes the stop labels that appeared in at least 80% of the documents sampled during automatic stop label discovery.\n\nThis is the recommended configuration of this filter.", + "body" : { + "type" : "labelFilter:autoStopLabels", + "minCoverage" : 0.8 + } + }, { + "description" : "Removes only the stop labels that appeared in 100% of the documents sampled during automatic stop label discovery.\n\nUse this setting to avoid removing meaningful labels at the cost of some stop labels passing through the filter.", + "body" : { + "type" : "labelFilter:autoStopLabels", + "minCoverage" : 1.0 + } + } ], + "default" : { + "type" : "labelFilter:autoStopLabels", + "minCoverage" : 0.4, + "removalStrength" : 0.35 + } + }, + "labelFilter:tokenCount" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:tokenCount" + }, + "minTokens" : { + "oneOf" : [ { + "type" : "integer", + "default" : 3, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxTokens" : { + "oneOf" : [ { + "type" : "integer", + "default" : 8, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "description" : "Keeps labels that are between 2 and 5 words long.", + "body" : { + "type" : "labelFilter:tokenCount", + "minTokens" : "^${1:2}", + "maxTokens" : "^${2:5}" + } + }, { + "description" : "Removes one-word labels.", + "body" : { + "type" : "labelFilter:tokenCount", + "minTokens" : 2 + } + }, { + "description" : "Removes labels longer than 4 words.", + "body" : { + "type" : "labelFilter:tokenCount", + "maxTokens" : 4 + } + } ], + "default" : { + "type" : "labelFilter:tokenCount", + "minTokens" : 3, + "maxTokens" : 8 + } + }, + "labelFilter:characterCount" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:characterCount" + }, + "minCharacters" : { + "oneOf" : [ { + "type" : "number", + "default" : 4, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minCharactersAveragePerToken" : { + "oneOf" : [ { + "type" : "number", + "default" : 2.9, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "description" : "Keeps labels that are 4 or more characters long, including spaces.", + "body" : { + "type" : "labelFilter:characterCount", + "minCharacters" : "^${1:4}" + } + }, { + "description" : "Keeps labels whose average word length is 2.8 and larger. You can use this filter to remove labels consisting of very short words.", + "body" : { + "type" : "labelFilter:characterCount", + "minCharactersAveragePerToken" : "^${1:2.8}" + } + } ], + "default" : { + "type" : "labelFilter:characterCount", + "minCharacters" : 4, + "minCharactersAveragePerToken" : 2.9 + } + }, + "labelFilter:surface" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:surface" + }, + "removeCapitalized" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "removeUppercase" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "removeAcronyms" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "description" : "Removes 'Capitalized', 'UPPERCASE' and 'AcRoNYM' labels.", + "body" : { + "type" : "labelFilter:surface", + "removeCapitalized" : "^${1:true}", + "removeUppercase" : "^${2:true}", + "removeAcronyms" : "^${3:true}" + } + } ], + "default" : { + "type" : "labelFilter:surface", + "removeCapitalized" : false, + "removeUppercase" : false, + "removeAcronyms" : false + } + }, + "labelFilter:dictionary" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:dictionary" + }, + "exclude" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/dictionary" + }, + "default" : [ ] + }, + "addAllProjectDictionaries" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "exclude" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelFilter:dictionary", + "exclude" : [ "^$0" ] + } + } ], + "default" : { + "type" : "labelFilter:dictionary", + "exclude" : [ ], + "addAllProjectDictionaries" : true + } + }, + "labelFilter:hasEmbedding" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:hasEmbedding" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelFilter:hasEmbedding" + } + } ], + "default" : { + "type" : "labelFilter:hasEmbedding" + } + }, + "labelFilter" : { + "oneOf" : [ { + "$ref" : "#/definitions/labelFilter:reference" + }, { + "$ref" : "#/definitions/labelFilter:composite" + }, { + "$ref" : "#/definitions/labelFilter:complement" + }, { + "$ref" : "#/definitions/labelFilter:tokenCount" + }, { + "$ref" : "#/definitions/labelFilter:characterCount" + }, { + "$ref" : "#/definitions/labelFilter:surface" + }, { + "$ref" : "#/definitions/labelFilter:autoStopLabels" + }, { + "$ref" : "#/definitions/labelFilter:dictionary" + }, { + "$ref" : "#/definitions/labelFilter:hasEmbedding" + }, { + "$ref" : "#/definitions/labelFilter:acceptAll" + }, { + "$ref" : "#/definitions/labelFilter:acceptLabels" + }, { + "$ref" : "#/definitions/labelFilter:rejectLabels" + } ] + }, + "labelListFilter:acceptAll" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelListFilter:acceptAll" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelListFilter:acceptAll" + } + } ], + "default" : { + "type" : "labelListFilter:acceptAll" + } + }, + "labelListFilter:truncatedPhrases" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelListFilter:truncatedPhrases" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelListFilter:truncatedPhrases" + } + } ], + "default" : { + "type" : "labelListFilter:truncatedPhrases" + } + }, + "labelListFilter" : { + "oneOf" : [ { + "$ref" : "#/definitions/labelListFilter:reference" + }, { + "$ref" : "#/definitions/labelListFilter:acceptAll" + }, { + "$ref" : "#/definitions/labelListFilter:truncatedPhrases" + } ] + }, + "labelScorer:identity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelScorer:identity" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelScorer:identity" + } + } ], + "default" : { + "type" : "labelScorer:identity" + } + }, + "labelScorer:composite" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelScorer:composite" + }, + "scorers" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/labelScorer" + }, + "default" : [ ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "scorers" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelScorer:composite", + "scorers" : [ "^$0" ] + } + } ], + "default" : { + "type" : "labelScorer:composite", + "scorers" : [ ] + } + }, + "labelScorer:df" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelScorer:df" + }, + "scope" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "scope" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelScorer:df" + } + } ], + "default" : { + "type" : "labelScorer:df", + "scope" : { + "type" : "documents:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + } + } + }, + "labelScorer:idf" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelScorer:idf" + }, + "scope" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "scope" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelScorer:idf" + } + } ], + "default" : { + "type" : "labelScorer:idf", + "scope" : { + "type" : "documents:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + } + } + }, + "labelScorer:tf" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelScorer:tf" + }, + "scope" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "scope" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelScorer:tf" + } + } ], + "default" : { + "type" : "labelScorer:tf", + "scope" : { + "type" : "documents:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + } + } + }, + "labelScorer:probabilityRatio" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelScorer:probabilityRatio" + }, + "baseScope" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "referenceScope" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:sample", + "limit" : 10000, + "query" : { + "type" : "query:all" + }, + "randomSeed" : 0, + "samplingRatio" : 1.0 + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "baseScope", "fields" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelScorer:probabilityRatio" + } + } ], + "default" : { + "type" : "labelScorer:probabilityRatio", + "baseScope" : { + "type" : "documents:reference", + "auto" : true + }, + "referenceScope" : { + "type" : "documents:sample", + "limit" : 10000, + "query" : { + "type" : "query:all" + }, + "randomSeed" : 0, + "samplingRatio" : 1.0 + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + } + } + }, + "labelScorer" : { + "oneOf" : [ { + "$ref" : "#/definitions/labelScorer:identity" + }, { + "$ref" : "#/definitions/labelScorer:composite" + }, { + "$ref" : "#/definitions/labelScorer:df" + }, { + "$ref" : "#/definitions/labelScorer:idf" + }, { + "$ref" : "#/definitions/labelScorer:tf" + }, { + "$ref" : "#/definitions/labelScorer:probabilityRatio" + } ] + }, + "labelCollector:topFromFeatureFields" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelCollector:topFromFeatureFields", + "description" : "Collects labels occurring in the provided documents." + }, + "labelFilter" : { + "allOf" : [ { + "$ref" : "#/definitions/labelFilter" + }, { + "default" : { + "type" : "labelFilter:reference", + "auto" : true + } + } ] + }, + "labelListFilter" : { + "allOf" : [ { + "$ref" : "#/definitions/labelListFilter" + }, { + "default" : { + "type" : "labelListFilter:truncatedPhrases" + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "minTf" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minTfMass" : { + "oneOf" : [ { + "type" : "number", + "default" : 1.0, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "tieResolution" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "TRUNCATE", "EXTEND", "REDUCE", "AUTO" ], + "default" : "AUTO" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "defaultSnippets" : [ { + "body" : { + "type" : "labelCollector:topFromFeatureFields" + } + } ], + "default" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + } + }, + "labelCollector" : { + "oneOf" : [ { + "$ref" : "#/definitions/labelCollector:reference" + }, { + "$ref" : "#/definitions/labelCollector:topFromFeatureFields" + } ] + }, + "labelAggregator:topWeight" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelAggregator:topWeight" + }, + "labelCollector" : { + "allOf" : [ { + "$ref" : "#/definitions/labelCollector" + }, { + "default" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + } + } ] + }, + "maxLabelsPerDocument" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minAbsoluteDf" : { + "oneOf" : [ { + "type" : "integer", + "default" : 1, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minRelativeDf" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.0, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxRelativeDf" : { + "oneOf" : [ { + "type" : "number", + "default" : 1.0, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minWeight" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.0, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "tieResolution" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "TRUNCATE", "EXTEND", "REDUCE", "AUTO" ], + "default" : "AUTO" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "outputWeightFormula" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "TF", "DF" ], + "default" : "TF" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelAggregator:topWeight" + } + } ], + "default" : { + "type" : "labelAggregator:topWeight", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxLabelsPerDocument" : 10, + "minAbsoluteDf" : 1, + "minRelativeDf" : 0.0, + "maxRelativeDf" : 1.0, + "minWeight" : 0.0, + "tieResolution" : "AUTO", + "outputWeightFormula" : "TF", + "threads" : "auto" + } + }, + "labelAggregator" : { + "oneOf" : [ { + "$ref" : "#/definitions/labelAggregator:topWeight" + } ] + }, + "labelCount" : { + "oneOf" : [ { + "$ref" : "#/definitions/labelCount:reference" + }, { + "$ref" : "#/definitions/labelCount:fixed" + }, { + "$ref" : "#/definitions/labelCount:unlimited" + }, { + "$ref" : "#/definitions/labelCount:progressive" + } ] + }, + "labelCount:fixed" : { + "type" : "object", + "description" : "An explicit, hard limit of the number of labels that is not document-count sensitive.", + "properties" : { + "type" : { + "const" : "labelCount:fixed" + }, + "value" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10000 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelCount:fixed", + "value" : "^$0" + } + } ], + "default" : { + "type" : "labelCount:fixed", + "value" : 10000 + } + }, + "labelCount:unlimited" : { + "type" : "object", + "description" : "No limit on the number of labels.", + "properties" : { + "type" : { + "const" : "labelCount:unlimited" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelCount:unlimited" + } + } ], + "default" : { + "type" : "labelCount:unlimited" + } + }, + "labelCount:progressive" : { + "type" : "object", + "description" : "A limit that scales with the number of documents, progressively.", + "properties" : { + "type" : { + "const" : "labelCount:progressive" + }, + "min" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "max" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : "unlimited" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "multiplier" : { + "oneOf" : [ { + "type" : "number", + "default" : 2.0, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "exponent" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.75, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelCount:progressive", + "multiplier" : "^$0" + } + } ], + "default" : { + "type" : "labelCount:progressive", + "max" : "unlimited", + "min" : 0, + "multiplier" : 2.0, + "exponent" : 0.75 + } + }, + "query:forFieldValues" : { + "type" : "object", + "description" : "A query that is defined by a set of documents with the provided field values.", + "properties" : { + "type" : { + "const" : "query:forFieldValues" + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/contentFields" + }, { + "default" : { + "type" : "contentFields:reference", + "auto" : true + } + } ] + }, + "values" : { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string" + }, + "description" : "An array of values to look for in the provided set of fields.", + "default" : [ ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "fields", "values" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:forFieldValues", + "fields" : { + "type" : "contentFields:simple", + "fields" : { } + }, + "values" : [ ] + } + } ], + "default" : { + "type" : "query:forFieldValues", + "fields" : { + "type" : "contentFields:reference", + "auto" : true + }, + "values" : [ ] + } + }, + "query:forLabels" : { + "type" : "object", + "description" : "A query that is defined by a set of labels.", + "properties" : { + "type" : { + "const" : "query:forLabels" + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "operator" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "OR", "AND" ], + "default" : "OR", + "description" : "The operator to use to combine labels.\n\nOR will select documents containing at least one of the provided labels. AND will select documents containing all of the provided labels." + }, { + "$ref" : "#/definitions/var" + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "minOrMatches" : { + "oneOf" : [ { + "type" : "integer", + "default" : 1, + "exclusiveMinimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:forLabels", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "query:forLabels", + "labels" : { + "type" : "labels:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "operator" : "OR", + "minOrMatches" : 1 + } + }, + "query:all" : { + "type" : "object", + "description" : "A query that matches all documents in the index.", + "properties" : { + "type" : { + "const" : "query:all" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:all" + } + } ], + "default" : { + "type" : "query:all" + } + }, + "query:string" : { + "type" : "object", + "description" : "A query that is defined by a text query string and a query parser that converts it to a Lucene Query.", + "properties" : { + "type" : { + "const" : "query:string" + }, + "query" : { + "oneOf" : [ { + "type" : "string", + "description" : "The query string to use\n\nThe query string must be valid with respect to the provided query parser.", + "default" : "" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "queryParser" : { + "allOf" : [ { + "$ref" : "#/definitions/queryParser" + }, { + "default" : { + "type" : "queryParser:project", + "queryParserKey" : "" + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "query" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:string", + "query" : "$0" + } + } ], + "default" : { + "type" : "query:string", + "queryParser" : { + "type" : "queryParser:project", + "queryParserKey" : "" + }, + "query" : "" + } + }, + "query:filter" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "query:filter" + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "filter" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "query", "filter" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:filter", + "query" : "^$1", + "filter" : "^$0" + } + } ], + "default" : { + "type" : "query:filter", + "query" : null, + "filter" : null + } + }, + "query:complement" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "query:complement" + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "query" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:complement", + "query" : "^$0" + } + } ], + "default" : { + "type" : "query:complement", + "query" : null + } + }, + "query:composite" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "query:composite" + }, + "queries" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/query" + }, + "default" : [ ] + }, + "operator" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "OR", "AND" ], + "default" : "OR" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "queries" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:composite", + "queries" : [ "^$0" ] + } + } ], + "default" : { + "type" : "query:composite", + "queries" : [ ], + "operator" : "OR" + } + }, + "query:fromDocuments" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "query:fromDocuments" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "buildFromDocumentIds" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:fromDocuments", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "buildFromDocumentIds" : false + } + }, + "queryParser" : { + "oneOf" : [ { + "$ref" : "#/definitions/queryParser:reference" + }, { + "$ref" : "#/definitions/queryParser:project" + }, { + "$ref" : "#/definitions/queryParser:enhanced" + } ] + }, + "queryParser:project" : { + "type" : "object", + "description" : "A reference to a query parser declared in the project descriptor's queryParsers block.", + "properties" : { + "type" : { + "const" : "queryParser:project" + }, + "queryParserKey" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "enhanced", "standard", "", "complex" ], + "default" : "" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "default" : { + "type" : "queryParser:project", + "queryParserKey" : "" + } + }, + "queryParser:enhanced" : { + "type" : "object", + "description" : "Query parser parsing Lucene syntax with interval queries and other enhancements.", + "properties" : { + "type" : { + "const" : "queryParser:enhanced" + }, + "sanitizeSpaces" : { + "oneOf" : [ { + "type" : "string", + "default" : "(?U)[\\u001c-\\u001f\\p{IsWhite_Space}\\u2028\\u2029]+" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sanitizeNonSpaces" : { + "oneOf" : [ { + "type" : "string", + "default" : "(?U)[\\u200B-\\u200D]+" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "validateFields" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "defaultFields" : { + "oneOf" : [ { + "type" : "array", + "items" : { + "type" : "string", + "enum" : [ "summary", "fld_double_a", "fld_mv", "fld_int", "fld_int_a", "id", "fld_double", "title" ] + }, + "default" : [ ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "defaultOperator" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "OR", "AND" ], + "default" : "AND" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "defaultFields" ], + "default" : { + "type" : "queryParser:enhanced", + "sanitizeSpaces" : "(?U)[\\u001c-\\u001f\\p{IsWhite_Space}\\u2028\\u2029]+", + "sanitizeNonSpaces" : "(?U)[\\u200B-\\u200D]+", + "validateFields" : true, + "defaultFields" : [ ], + "defaultOperator" : "AND" + } + }, + "query" : { + "oneOf" : [ { + "$ref" : "#/definitions/query:reference" + }, { + "$ref" : "#/definitions/query:all" + }, { + "$ref" : "#/definitions/query:string" + }, { + "$ref" : "#/definitions/query:forLabels" + }, { + "$ref" : "#/definitions/query:forFieldValues" + }, { + "$ref" : "#/definitions/query:filter" + }, { + "$ref" : "#/definitions/query:complement" + }, { + "$ref" : "#/definitions/query:composite" + }, { + "$ref" : "#/definitions/query:fromDocuments" + } ] + }, + "limit" : { + "oneOf" : [ { + "const" : "unlimited" + }, { + "type" : "integer", + "minimum" : 0 + } ] + }, + "documents:byId" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:byId", + "description" : "Selects documents by the provided internal document identifiers." + }, + "documents" : { + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "id" : { + "oneOf" : [ { + "type" : "integer" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "weight" : { + "oneOf" : [ { + "type" : "number" + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "required" : [ "id" ], + "defaultSnippets" : [ { + "description" : "Document identifier and optional weight.", + "body" : { + "id" : "^$1", + "weight" : "^$0" + } + } ] + }, + "default" : [ ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:byId", + "documents" : [ "^$0" ] + } + } ], + "default" : { + "type" : "documents:byId", + "documents" : [ ] + } + }, + "documents:byQuery" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:byQuery", + "description" : "Selects documents using a Lucene syntax query string." + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10000 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "accurateHitCount" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "requireScores" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "query" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:byQuery", + "query" : "^$0" + } + }, { + "description" : "Selects documents using a Lucene syntax query string.", + "body" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "$1" + }, + "limit" : "^${2:1000}" + } + } ], + "default" : { + "type" : "documents:byQuery", + "limit" : 10000, + "query" : null, + "accurateHitCount" : false, + "requireScores" : true + } + }, + "documents:composite" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:composite" + }, + "selectors" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/documents" + }, + "default" : [ ] + }, + "operator" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "OR", "AND" ], + "default" : "OR" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sortOrder" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/sortOrder" + }, { + "default" : "DESCENDING" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "weightAggregation" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/weightAggregation" + }, { + "default" : "SUM" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "selectors" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:composite", + "selectors" : [ "^$0" ] + } + } ], + "default" : { + "type" : "documents:composite", + "selectors" : [ ], + "operator" : "OR", + "sortOrder" : "DESCENDING", + "weightAggregation" : "SUM" + } + }, + "documents:fromMatrixColumns" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:fromMatrixColumns" + }, + "matrixRows" : { + "allOf" : [ { + "$ref" : "#/definitions/matrixRows" + }, { + "default" : { + "type" : "matrixRows:reference", + "auto" : true + } + } ] + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10000 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sortOrder" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/sortOrder" + }, { + "default" : "DESCENDING" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "weightAggregation" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/weightAggregation" + }, { + "default" : "SUM" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "matrixRows" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:fromMatrixColumns", + "matrixRows" : "^$1", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "documents:fromMatrixColumns", + "limit" : 10000, + "matrixRows" : { + "type" : "matrixRows:reference", + "auto" : true + }, + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "sortOrder" : "DESCENDING", + "weightAggregation" : "SUM" + } + }, + "documents:fromClusterExemplars" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:fromClusterExemplars" + }, + "clusters" : { + "allOf" : [ { + "$ref" : "#/definitions/clusters" + }, { + "default" : { + "type" : "clusters:reference", + "auto" : true + } + } ] + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10000 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sortOrder" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/sortOrder" + }, { + "default" : "DESCENDING" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "clusters" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:fromClusterExemplars", + "clusters" : "^$1", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "documents:fromClusterExemplars", + "limit" : 10000, + "clusters" : { + "type" : "clusters:reference", + "auto" : true + }, + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "sortOrder" : "DESCENDING" + } + }, + "documents:fromClusterMembers" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:fromClusterMembers" + }, + "clusters" : { + "allOf" : [ { + "$ref" : "#/definitions/clusters" + }, { + "default" : { + "type" : "clusters:reference", + "auto" : true + } + } ] + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10000 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sortOrder" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/sortOrder" + }, { + "default" : "DESCENDING" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "clusters" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:fromClusterMembers", + "clusters" : "^$2", + "documents" : "^$1" + } + } ], + "default" : { + "type" : "documents:fromClusterMembers", + "limit" : 10000, + "clusters" : { + "type" : "clusters:reference", + "auto" : true + }, + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "sortOrder" : "DESCENDING" + } + }, + "documents:sample" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:sample" + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10000 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "samplingRatio" : { + "oneOf" : [ { + "type" : "number", + "default" : 1.0, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "randomSeed" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "query" ], + "defaultSnippets" : [ { + "description" : "Takes a random sample of documents returned by a query. The limit property sets the maximum number of documents to select, regardless of the number of documents that match the query.", + "body" : { + "type" : "documents:sample", + "limit" : "^$1", + "query" : "^$0" + } + }, { + "description" : "Samples documents selected by a string query. The limit property sets the maximum number of documents to select, regardless of the number of documents that match the query.", + "body" : { + "type" : "documents:sample", + "query" : { + "type" : "query:string", + "query" : "$1" + }, + "limit" : "^$2" + } + } ], + "default" : { + "type" : "documents:sample", + "limit" : 10000, + "query" : null, + "randomSeed" : 0, + "samplingRatio" : 1.0 + } + }, + "documents:embeddingNearestNeighbors" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:embeddingNearestNeighbors" + }, + "vector" : { + "allOf" : [ { + "$ref" : "#/definitions/vector" + }, { + "default" : { + "type" : "vector:reference", + "auto" : true + } + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 100 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "filterQuery" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : { + "type" : "query:all" + } + } ] + }, + "searcher" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "AUTO", "APPROXIMATE", "COMPLETE" ], + "default" : "AUTO" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "failIfEmbeddingsNotAvailable" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "vector" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:embeddingNearestNeighbors", + "vector" : "^$0" + } + } ], + "default" : { + "type" : "documents:embeddingNearestNeighbors", + "limit" : 100, + "vector" : { + "type" : "vector:reference", + "auto" : true + }, + "filterQuery" : { + "type" : "query:all" + }, + "failIfEmbeddingsNotAvailable" : true, + "searcher" : "AUTO" + } + }, + "documents:contrastScore" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:contrastScore" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "matrixRows" : { + "allOf" : [ { + "$ref" : "#/definitions/matrixRows" + }, { + "default" : { + "type" : "matrixRows:reference", + "auto" : true + } + } ] + }, + "documentTimestamps" : { + "allOf" : [ { + "$ref" : "#/definitions/values" + }, { + "default" : null + } ] + }, + "contextTimestamps" : { + "allOf" : [ { + "$ref" : "#/definitions/values" + }, { + "default" : null + } ] + }, + "forceSymmetricalContext" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 10000 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minSimilarDocuments" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sortOrder" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/sortOrder" + }, { + "default" : "DESCENDING" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "matrixRows", "documentTimestamps", "contextTimestamps" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:contrastScore", + "documents" : "^$3", + "matrixRows" : "^$2", + "documentTimestamps" : "^$1", + "contextTimestamps" : "^$0" + } + } ], + "default" : { + "type" : "documents:contrastScore", + "limit" : 10000, + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "matrixRows" : { + "type" : "matrixRows:reference", + "auto" : true + }, + "documentTimestamps" : null, + "contextTimestamps" : null, + "forceSymmetricalContext" : true, + "minSimilarDocuments" : 0, + "sortOrder" : "DESCENDING" + } + }, + "documents:rwmd" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:rwmd" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "labelFilter" : { + "allOf" : [ { + "$ref" : "#/definitions/labelFilter" + }, { + "default" : { + "type" : "labelFilter:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "failIfEmbeddingsNotAvailable" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:rwmd", + "documents" : "^$1", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "documents:rwmd", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "labels" : { + "type" : "labels:reference", + "auto" : true + }, + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "failIfEmbeddingsNotAvailable" : true + } + }, + "documents:fromDocumentPairs" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:fromDocumentPairs" + }, + "documentPairs" : { + "allOf" : [ { + "$ref" : "#/definitions/documentPairs" + }, { + "default" : { + "type" : "documentPairs:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documentPairs" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:fromDocumentPairs", + "documentPairs" : "^$0" + } + } ], + "default" : { + "type" : "documents:fromDocumentPairs", + "documentPairs" : { + "type" : "documentPairs:reference", + "auto" : true + } + } + }, + "documents" : { + "oneOf" : [ { + "$ref" : "#/definitions/documents:reference" + }, { + "$ref" : "#/definitions/documents:byQuery" + }, { + "$ref" : "#/definitions/documents:byId" + }, { + "$ref" : "#/definitions/documents:embeddingNearestNeighbors" + }, { + "$ref" : "#/definitions/documents:composite" + }, { + "$ref" : "#/definitions/documents:fromMatrixColumns" + }, { + "$ref" : "#/definitions/documents:fromClusterExemplars" + }, { + "$ref" : "#/definitions/documents:fromClusterMembers" + }, { + "$ref" : "#/definitions/documents:sample" + }, { + "$ref" : "#/definitions/documents:contrastScore" + }, { + "$ref" : "#/definitions/documents:rwmd" + }, { + "$ref" : "#/definitions/documents:fromDocumentPairs" + } ] + }, + "labels:byPrefix" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:byPrefix", + "description" : "Retrieves labels starting with the provided prefix." + }, + "labelFilter" : { + "allOf" : [ { + "$ref" : "#/definitions/labelFilter" + }, { + "default" : { + "type" : "labelFilter:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "limit" : { + "oneOf" : [ { + "type" : "integer", + "default" : 30, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "prefix" : { + "oneOf" : [ { + "type" : "string", + "default" : "" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "prefix" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:byPrefix", + "prefix" : "$1", + "limit" : "^${2:20}" + } + } ], + "default" : { + "type" : "labels:byPrefix", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "prefix" : "", + "limit" : 30 + } + }, + "labels:direct" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:direct", + "description" : "Provides a constant list of labels." + }, + "labels" : { + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "label" : { + "oneOf" : [ { + "type" : "string" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "weight" : { + "oneOf" : [ { + "type" : "number" + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "required" : [ "label" ], + "defaultSnippets" : [ { + "body" : { + "label" : "$0" + } + } ] + }, + "defaultSnippets" : [ { + "body" : [ "^$0" ] + } ], + "default" : [ ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:direct", + "labels" : [ "^$0" ] + } + } ], + "default" : { + "type" : "labels:direct", + "labels" : [ ] + } + }, + "labels:fromDocuments" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:fromDocuments", + "description" : "Collects labels occurring in the provided documents." + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "maxLabels" : { + "allOf" : [ { + "$ref" : "#/definitions/labelCount" + }, { + "default" : { + "type" : "labelCount:fixed", + "value" : 10000 + } + } ] + }, + "labelAggregator" : { + "allOf" : [ { + "$ref" : "#/definitions/labelAggregator" + }, { + "default" : { + "type" : "labelAggregator:topWeight", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxLabelsPerDocument" : 10, + "minAbsoluteDf" : 1, + "minRelativeDf" : 0.0, + "maxRelativeDf" : 1.0, + "minWeight" : 0.0, + "tieResolution" : "AUTO", + "outputWeightFormula" : "TF", + "threads" : "auto" + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:fromDocuments", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "labels:fromDocuments", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "maxLabels" : { + "type" : "labelCount:fixed", + "value" : 10000 + }, + "labelAggregator" : { + "type" : "labelAggregator:topWeight", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxLabelsPerDocument" : 10, + "minAbsoluteDf" : 1, + "minRelativeDf" : 0.0, + "maxRelativeDf" : 1.0, + "minWeight" : 0.0, + "tieResolution" : "AUTO", + "outputWeightFormula" : "TF", + "threads" : "auto" + } + } + }, + "labels:fromText" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:fromText" + }, + "text" : { + "oneOf" : [ { + "type" : "string", + "default" : "" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "analyzer" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "english", "keyword", "whitespace", "literal" ], + "default" : "english" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "featureExtractor" : { + "oneOf" : [ { + "type" : "string", + "default" : "" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "labelFilter" : { + "allOf" : [ { + "$ref" : "#/definitions/labelFilter" + }, { + "default" : { + "type" : "labelFilter:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "text" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:fromText", + "text" : "$0" + } + } ], + "default" : { + "type" : "labels:fromText", + "text" : "", + "analyzer" : "english", + "featureExtractor" : "", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + } + } + }, + "labels:scored" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:scored" + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "scorer" : { + "allOf" : [ { + "$ref" : "#/definitions/labelScorer" + }, { + "default" : { + "type" : "labelScorer:identity" + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "scorer", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:scored", + "scorer" : "^$0" + } + } ], + "default" : { + "type" : "labels:scored", + "labels" : { + "type" : "labels:reference", + "auto" : true + }, + "scorer" : { + "type" : "labelScorer:identity" + } + } + }, + "labels:embeddingNearestNeighbors" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:embeddingNearestNeighbors" + }, + "vector" : { + "allOf" : [ { + "$ref" : "#/definitions/vector" + }, { + "default" : { + "type" : "vector:reference", + "auto" : true + } + } ] + }, + "labelFilter" : { + "allOf" : [ { + "$ref" : "#/definitions/labelFilter" + }, { + "default" : { + "type" : "labelFilter:acceptAll" + } + } ] + }, + "limit" : { + "oneOf" : [ { + "type" : "integer", + "default" : 10, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "failIfEmbeddingsNotAvailable" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "vector" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : "^$0" + } + } ], + "default" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:reference", + "auto" : true + }, + "labelFilter" : { + "type" : "labelFilter:acceptAll" + }, + "limit" : 10, + "failIfEmbeddingsNotAvailable" : true + } + }, + "labels:composite" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:composite" + }, + "sources" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/labels" + }, + "default" : [ ] + }, + "operator" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "OR", "AND" ], + "default" : "OR" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sortOrder" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/sortOrder" + }, { + "default" : "DESCENDING" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "weightAggregation" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/weightAggregation" + }, { + "default" : "SUM" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "examples" : [ { + "type" : "labels:composite", + "sources" : [ ] + } ], + "required" : [ "type", "sources" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:composite", + "sources" : [ "^$0" ] + } + } ], + "default" : { + "type" : "labels:composite", + "sources" : [ ], + "operator" : "OR", + "sortOrder" : "DESCENDING", + "weightAggregation" : "SUM" + } + }, + "labels" : { + "oneOf" : [ { + "$ref" : "#/definitions/labels:reference" + }, { + "$ref" : "#/definitions/labels:byPrefix" + }, { + "$ref" : "#/definitions/labels:direct" + }, { + "$ref" : "#/definitions/labels:fromDocuments" + }, { + "$ref" : "#/definitions/labels:fromText" + }, { + "$ref" : "#/definitions/labels:scored" + }, { + "$ref" : "#/definitions/labels:embeddingNearestNeighbors" + }, { + "$ref" : "#/definitions/labels:composite" + } ] + }, + "matrix:direct" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:direct" + }, + "matrix" : { + "type" : "object", + "properties" : { + "columns" : { + "oneOf" : [ { + "type" : "integer", + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "indices" : { + "type" : "array", + "items" : { + "type" : "array", + "items" : { + "type" : "integer" + }, + "defaultSnippets" : [ { + "body" : [ "^$0" ] + } ] + }, + "defaultSnippets" : [ { + "body" : [ "^$0" ] + } ] + }, + "values" : { + "type" : "array", + "items" : { + "type" : "array", + "items" : { + "type" : "number" + }, + "defaultSnippets" : [ { + "body" : [ "^$0" ] + } ] + }, + "defaultSnippets" : [ { + "body" : [ "^$0" ] + } ] + } + }, + "additionalProperties" : false, + "required" : [ "columns", "indices", "values" ], + "defaultSnippets" : [ { + "body" : { + "columns" : "^$1", + "indices" : "^$2", + "values" : "^$3" + } + } ], + "default" : { + "columns" : 0, + "indices" : [ ], + "values" : [ ] + } + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "matrix" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:direct", + "matrix" : { + "columns" : 0, + "indices" : [ [ ] ], + "values" : [ [ ] ] + } + } + } ], + "default" : { + "type" : "matrix:direct", + "matrix" : { + "columns" : 0, + "indices" : [ ], + "values" : [ ] + } + } + }, + "matrix:fromMatrixRows" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:fromMatrixRows" + }, + "matrixRows" : { + "allOf" : [ { + "$ref" : "#/definitions/matrixRows" + }, { + "default" : { + "type" : "matrixRows:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "matrixRows" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:fromMatrixRows", + "matrixRows" : "^$1" + } + } ], + "default" : { + "type" : "matrix:fromMatrixRows", + "matrixRows" : { + "type" : "matrixRows:reference", + "auto" : true + } + } + }, + "matrix:cooccurrenceLabelSimilarity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:cooccurrenceLabelSimilarity" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "cooccurrenceWindowSize" : { + "oneOf" : [ { + "type" : "integer", + "default" : 32, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "similarityWeighting" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "COOCCURRENCES", "RR", "INCLUSION", "LOEVINGER", "BB", "DICE", "YULE", "OCHIAI", "INNER_PRODUCT", "COSINE", "PEARSON" ], + "default" : "INCLUSION" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "normalized" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:cooccurrenceLabelSimilarity", + "documents" : "^$1", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "matrix:cooccurrenceLabelSimilarity", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "labels" : { + "type" : "labels:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "cooccurrenceWindowSize" : 32, + "similarityWeighting" : "INCLUSION", + "normalized" : true, + "threads" : "auto" + } + }, + "matrix:keywordDocumentSimilarity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:keywordDocumentSimilarity" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "labelCollector" : { + "allOf" : [ { + "$ref" : "#/definitions/labelCollector" + }, { + "default" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "maxNeighbors" : { + "oneOf" : [ { + "type" : "integer", + "default" : 8, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minQueryLabelsPerDocument" : { + "oneOf" : [ { + "type" : "integer", + "default" : 1, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxQueryLabelsPerDocument" : { + "oneOf" : [ { + "type" : "integer", + "default" : 4, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minQueryLabelsRequiredInSimilarDocument" : { + "oneOf" : [ { + "type" : "integer", + "default" : 1, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxDocumentsForSubIndex" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.3, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxInMemorySubIndexSize" : { + "oneOf" : [ { + "type" : "integer", + "default" : 8000000, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "normalized" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:keywordDocumentSimilarity", + "documents" : "^$1" + } + } ], + "default" : { + "type" : "matrix:keywordDocumentSimilarity", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxNeighbors" : 8, + "minQueryLabelsPerDocument" : 1, + "maxQueryLabelsPerDocument" : 4, + "minQueryLabelsRequiredInSimilarDocument" : 1, + "maxDocumentsForSubIndex" : 0.3, + "maxInMemorySubIndexSize" : 8000000, + "normalized" : true, + "threads" : "auto" + } + }, + "matrix:knn2dDistanceSimilarity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:knn2dDistanceSimilarity" + }, + "embedding2d" : { + "allOf" : [ { + "$ref" : "#/definitions/embedding2d" + }, { + "default" : { + "type" : "embedding2d:reference", + "auto" : true + } + } ] + }, + "maxNearestPoints" : { + "oneOf" : [ { + "type" : "integer", + "default" : 8, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "embedding2d" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:knn2dDistanceSimilarity", + "embedding2d" : "^$0" + } + } ], + "default" : { + "type" : "matrix:knn2dDistanceSimilarity", + "embedding2d" : { + "type" : "embedding2d:reference", + "auto" : true + }, + "maxNearestPoints" : 8 + } + }, + "matrix:knnVectorsSimilarity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:knnVectorsSimilarity" + }, + "vectors" : { + "allOf" : [ { + "$ref" : "#/definitions/vectors" + }, { + "default" : { + "type" : "vectors:reference", + "auto" : true + } + } ] + }, + "maxNeighbors" : { + "oneOf" : [ { + "type" : "integer", + "default" : 10, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "vectors" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : "^$0" + } + } ], + "default" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:reference", + "auto" : true + }, + "maxNeighbors" : 10, + "threads" : "auto" + } + }, + "matrix:elementWiseProduct" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:elementWiseProduct" + }, + "factorA" : { + "allOf" : [ { + "$ref" : "#/definitions/matrix" + }, { + "default" : null + } ] + }, + "factorB" : { + "allOf" : [ { + "$ref" : "#/definitions/matrix" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "factorA", "factorB" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:elementWiseProduct", + "factorA" : "^$1", + "factorB" : "^$0" + } + } ], + "default" : { + "type" : "matrix:elementWiseProduct", + "factorA" : null, + "factorB" : null + } + }, + "matrix:keywordLabelDocumentSimilarity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:keywordLabelDocumentSimilarity" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "maxSimilarDocumentsPerLabel" : { + "oneOf" : [ { + "type" : "integer", + "default" : 5, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "documents" : "^$1", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "labels" : { + "type" : "labels:reference", + "auto" : true + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "maxSimilarDocumentsPerLabel" : 5, + "threads" : "auto" + } + }, + "matrix" : { + "oneOf" : [ { + "$ref" : "#/definitions/matrix:reference" + }, { + "$ref" : "#/definitions/matrix:direct" + }, { + "$ref" : "#/definitions/matrix:fromMatrixRows" + }, { + "$ref" : "#/definitions/matrix:cooccurrenceLabelSimilarity" + }, { + "$ref" : "#/definitions/matrix:keywordDocumentSimilarity" + }, { + "$ref" : "#/definitions/matrix:knn2dDistanceSimilarity" + }, { + "$ref" : "#/definitions/matrix:knnVectorsSimilarity" + }, { + "$ref" : "#/definitions/matrix:keywordLabelDocumentSimilarity" + }, { + "$ref" : "#/definitions/matrix:elementWiseProduct" + } ] + }, + "matrixRows:keywordDocumentSimilarity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrixRows:keywordDocumentSimilarity" + }, + "index" : { + "type" : "object", + "properties" : { + "columns" : { + "type" : "object", + "properties" : { + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : null + } ] + } + }, + "required" : [ "documents" ], + "additionalProperties" : false, + "default" : { + "documents" : null + } + }, + "rows" : { + "type" : "object", + "properties" : { + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : null + } ] + }, + "labelCollector" : { + "allOf" : [ { + "$ref" : "#/definitions/labelCollector" + }, { + "default" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + } + } ] + }, + "minQueryLabelsPerRowDocument" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxQueryLabelsPerRowDocument" : { + "oneOf" : [ { + "type" : "integer", + "default" : 10, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "required" : [ "documents" ], + "additionalProperties" : false, + "default" : { + "documents" : null, + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxQueryLabelsPerRowDocument" : 10, + "minQueryLabelsPerRowDocument" : 0, + "threads" : "auto" + } + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "maxColumnDocumentsForSubIndex" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.3, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxInMemorySubIndexSize" : { + "oneOf" : [ { + "type" : "integer", + "default" : 8000000, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "required" : [ "columns", "rows" ], + "additionalProperties" : false, + "default" : { + "rows" : { + "documents" : null, + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxQueryLabelsPerRowDocument" : 10, + "minQueryLabelsPerRowDocument" : 0, + "threads" : "auto" + }, + "columns" : { + "documents" : null + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "maxColumnDocumentsForSubIndex" : 0.3, + "maxInMemorySubIndexSize" : 8000000, + "threads" : "auto" + } + }, + "maxNeighbors" : { + "oneOf" : [ { + "type" : "integer", + "default" : 10, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minQueryLabelsRequiredInColumnDocument" : { + "oneOf" : [ { + "type" : "integer", + "default" : 1, + "exclusiveMinimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "normalized" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "index" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrixRows:keywordDocumentSimilarity", + "index" : { + "rows" : { + "documents" : "^$1" + }, + "columns" : { + "documents" : "^$2" + } + } + } + } ], + "default" : { + "type" : "matrixRows:keywordDocumentSimilarity", + "index" : { + "rows" : { + "documents" : null, + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxQueryLabelsPerRowDocument" : 10, + "minQueryLabelsPerRowDocument" : 0, + "threads" : "auto" + }, + "columns" : { + "documents" : null + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "maxColumnDocumentsForSubIndex" : 0.3, + "maxInMemorySubIndexSize" : 8000000, + "threads" : "auto" + }, + "maxNeighbors" : 10, + "minQueryLabelsRequiredInColumnDocument" : 1, + "normalized" : false, + "threads" : "auto" + } + }, + "matrixRows:knnVectorsSimilarity" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrixRows:knnVectorsSimilarity" + }, + "vectors" : { + "type" : "object", + "properties" : { + "rows" : { + "allOf" : [ { + "$ref" : "#/definitions/vectors" + }, { + "default" : { + "type" : "vectors:reference", + "auto" : true + } + } ] + }, + "columns" : { + "allOf" : [ { + "$ref" : "#/definitions/vectors" + }, { + "default" : { + "type" : "vectors:reference", + "auto" : true + } + } ] + } + }, + "required" : [ "rows", "columns" ], + "additionalProperties" : false, + "default" : { + "rows" : { + "type" : "vectors:reference", + "auto" : true + }, + "columns" : { + "type" : "vectors:reference", + "auto" : true + } + } + }, + "maxNeighbors" : { + "oneOf" : [ { + "type" : "integer", + "default" : 10, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "vectors" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrixRows:knnVectorsSimilarity", + "vectors" : { + "rows" : "^$1", + "columns" : "^$0" + } + } + } ], + "default" : { + "type" : "matrixRows:knnVectorsSimilarity", + "vectors" : { + "rows" : { + "type" : "vectors:reference", + "auto" : true + }, + "columns" : { + "type" : "vectors:reference", + "auto" : true + } + }, + "maxNeighbors" : 10, + "threads" : "auto" + } + }, + "matrixRows:fromMatrix" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrixRows:fromMatrix" + }, + "matrix" : { + "allOf" : [ { + "$ref" : "#/definitions/matrix" + }, { + "default" : { + "type" : "matrix:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "matrix" ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrixRows:fromMatrix", + "matrix" : "^$1" + } + } ], + "default" : { + "type" : "matrixRows:fromMatrix", + "matrix" : { + "type" : "matrix:reference", + "auto" : true + } + } + }, + "matrixRows" : { + "oneOf" : [ { + "$ref" : "#/definitions/matrixRows:reference" + }, { + "$ref" : "#/definitions/matrixRows:keywordDocumentSimilarity" + }, { + "$ref" : "#/definitions/matrixRows:knnVectorsSimilarity" + }, { + "$ref" : "#/definitions/matrixRows:fromMatrix" + } ] + }, + "vector:labelEmbedding" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vector:labelEmbedding" + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "failIfEmbeddingsNotAvailable" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "vector:labelEmbedding", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "auto" : true + }, + "failIfEmbeddingsNotAvailable" : true + } + }, + "vector:documentEmbedding" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vector:documentEmbedding" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "failIfEmbeddingsNotAvailable" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "vector:documentEmbedding", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "vector:documentEmbedding", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "failIfEmbeddingsNotAvailable" : true + } + }, + "vector:composite" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vector:composite" + }, + "vectors" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/vector" + }, + "default" : [ ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "vectors" ], + "defaultSnippets" : [ { + "body" : { + "type" : "vector:composite", + "vectors" : [ "^$0" ] + } + } ], + "default" : { + "type" : "vector:composite", + "vectors" : [ ] + } + }, + "vector:estimateFromContext" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vector:estimateFromContext" + }, + "contextVector" : { + "allOf" : [ { + "$ref" : "#/definitions/vector" + }, { + "default" : { + "type" : "vector:reference", + "auto" : true + } + } ] + }, + "failIfEmbeddingsNotAvailable" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "contextVector" ], + "defaultSnippets" : [ { + "body" : { + "type" : "vector:estimateFromContext", + "contextVector" : "^$0" + } + } ], + "default" : { + "type" : "vector:estimateFromContext", + "contextVector" : { + "type" : "vector:reference", + "auto" : true + }, + "failIfEmbeddingsNotAvailable" : true + } + }, + "vector" : { + "oneOf" : [ { + "$ref" : "#/definitions/vector:reference" + }, { + "$ref" : "#/definitions/vector:labelEmbedding" + }, { + "$ref" : "#/definitions/vector:documentEmbedding" + }, { + "$ref" : "#/definitions/vector:composite" + }, { + "$ref" : "#/definitions/vector:estimateFromContext" + } ] + }, + "vectors:precomputedDocumentEmbeddings" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vectors:precomputedDocumentEmbeddings" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "maxInMemoryKnnSubIndexSize" : { + "oneOf" : [ { + "type" : "number", + "default" : 50000000 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "vectors:precomputedDocumentEmbeddings", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "vectors:precomputedDocumentEmbeddings", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "maxInMemoryKnnSubIndexSize" : 50000000 + } + }, + "vectors:precomputedLabelEmbeddings" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vectors:precomputedLabelEmbeddings" + }, + "labels" : { + "allOf" : [ { + "$ref" : "#/definitions/labels" + }, { + "default" : { + "type" : "labels:reference", + "auto" : true + } + } ] + }, + "maxLabelsForSubIndex" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.05, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "labels" ], + "defaultSnippets" : [ { + "body" : { + "type" : "vectors:precomputedLabelEmbeddings", + "labels" : "^$0" + } + } ], + "default" : { + "type" : "vectors:precomputedLabelEmbeddings", + "labels" : { + "type" : "labels:reference", + "auto" : true + }, + "maxLabelsForSubIndex" : 0.05 + } + }, + "vectors" : { + "oneOf" : [ { + "$ref" : "#/definitions/vectors:reference" + }, { + "$ref" : "#/definitions/vectors:precomputedLabelEmbeddings" + }, { + "$ref" : "#/definitions/vectors:precomputedDocumentEmbeddings" + } ] + }, + "clusters:ap" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "clusters:ap", + "description" : "Performs clustering using the modified Affinity Propagation algorithm." + }, + "matrix" : { + "allOf" : [ { + "$ref" : "#/definitions/matrix" + }, { + "default" : { + "type" : "matrix:reference", + "auto" : true + } + } ] + }, + "inputPreference" : { + "oneOf" : [ { + "type" : "number", + "default" : -1000.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "softening" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.2, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "damping" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.9, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minPruningGain" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.3, + "minimum" : 0.0, + "maximum" : 1.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxIterations" : { + "oneOf" : [ { + "type" : "integer", + "default" : 2000, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "minSteadyIterations" : { + "oneOf" : [ { + "type" : "number", + "default" : 100, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "matrix" ], + "defaultSnippets" : [ { + "body" : { + "type" : "clusters:ap", + "matrix" : "^$0" + } + } ], + "default" : { + "type" : "clusters:ap", + "matrix" : { + "type" : "matrix:reference", + "auto" : true + }, + "inputPreference" : -1000.0, + "softening" : 0.2, + "damping" : 0.9, + "minPruningGain" : 0.3, + "maxIterations" : 2000, + "minSteadyIterations" : 100, + "threads" : "auto" + } + }, + "clusters:withRemappedDocuments" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "clusters:withRemappedDocuments" + }, + "clusters" : { + "allOf" : [ { + "$ref" : "#/definitions/clusters" + }, { + "default" : { + "type" : "clusters:reference", + "auto" : true + } + } ] + }, + "exemplarsFrom" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : null + } ] + }, + "exemplarsTo" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : null + } ] + }, + "membersFrom" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : null + } ] + }, + "membersTo" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "clusters", "exemplarsFrom", "exemplarsTo", "membersFrom", "membersTo" ], + "defaultSnippets" : [ { + "body" : { + "type" : "clusters:withRemappedDocuments", + "clusters" : "^$4", + "exemplarsFrom" : "^$3", + "exemplarsTo" : "^$2", + "membersFrom" : "^$1", + "membersTo" : "^$0" + } + } ], + "default" : { + "type" : "clusters:withRemappedDocuments", + "clusters" : { + "type" : "clusters:reference", + "auto" : true + }, + "membersFrom" : null, + "membersTo" : null, + "exemplarsFrom" : null, + "exemplarsTo" : null + } + }, + "clusters:fromMatrixColumns" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "clusters:fromMatrixColumns" + }, + "matrixRows" : { + "allOf" : [ { + "$ref" : "#/definitions/matrixRows" + }, { + "default" : { + "type" : "matrixRows:reference", + "auto" : true + } + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : 100 + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "sortOrder" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/sortOrder" + }, { + "default" : "DESCENDING" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "weightAggregation" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/weightAggregation" + }, { + "default" : "SUM" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "matrixRows" ], + "defaultSnippets" : [ { + "body" : { + "type" : "clusters:fromMatrixColumns", + "matrixRows" : "^$1" + } + } ], + "default" : { + "type" : "clusters:fromMatrixColumns", + "matrixRows" : { + "type" : "matrixRows:reference", + "auto" : true + }, + "limit" : 100, + "sortOrder" : "DESCENDING", + "weightAggregation" : "SUM" + } + }, + "clusters:byValues" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "clusters:byValues" + }, + "values" : { + "allOf" : [ { + "$ref" : "#/definitions/values" + }, { + "default" : { + "type" : "values:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "values" ], + "defaultSnippets" : [ { + "body" : { + "type" : "clusters:byValues", + "values" : "^$1" + } + } ], + "default" : { + "type" : "clusters:byValues", + "values" : { + "type" : "values:reference", + "auto" : true + } + } + }, + "clusters" : { + "oneOf" : [ { + "$ref" : "#/definitions/clusters:reference" + }, { + "$ref" : "#/definitions/clusters:ap" + }, { + "$ref" : "#/definitions/clusters:fromMatrixColumns" + }, { + "$ref" : "#/definitions/clusters:withRemappedDocuments" + }, { + "$ref" : "#/definitions/clusters:byValues" + } ] + }, + "labelClusters:documentClusterLabels" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelClusters:documentClusterLabels" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "clusters" : { + "allOf" : [ { + "$ref" : "#/definitions/clusters" + }, { + "default" : { + "type" : "clusters:reference", + "auto" : true + } + } ] + }, + "maxLabels" : { + "oneOf" : [ { + "type" : "integer", + "default" : 3, + "exclusiveMinimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "labelAggregator" : { + "allOf" : [ { + "$ref" : "#/definitions/labelAggregator" + }, { + "default" : { + "type" : "labelAggregator:topWeight", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxLabelsPerDocument" : 10, + "minAbsoluteDf" : 1, + "minRelativeDf" : 0.0, + "maxRelativeDf" : 1.0, + "minWeight" : 0.0, + "tieResolution" : "AUTO", + "outputWeightFormula" : "TF", + "threads" : "auto" + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "clusters" ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelClusters:documentClusterLabels", + "documents" : "^$0", + "clusters" : "^$1" + } + } ], + "default" : { + "type" : "labelClusters:documentClusterLabels", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "clusters" : { + "type" : "clusters:reference", + "auto" : true + }, + "labelAggregator" : { + "type" : "labelAggregator:topWeight", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxLabelsPerDocument" : 10, + "minAbsoluteDf" : 1, + "minRelativeDf" : 0.0, + "maxRelativeDf" : 1.0, + "minWeight" : 0.0, + "tieResolution" : "AUTO", + "outputWeightFormula" : "TF", + "threads" : "auto" + }, + "maxLabels" : 3 + } + }, + "labelClusters" : { + "oneOf" : [ { + "$ref" : "#/definitions/labelClusters:reference" + }, { + "$ref" : "#/definitions/labelClusters:documentClusterLabels" + } ] + }, + "embedding2d:lv" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "embedding2d:lv" + }, + "matrix" : { + "allOf" : [ { + "$ref" : "#/definitions/matrix" + }, { + "default" : { + "type" : "matrix:reference", + "auto" : true + } + } ] + }, + "initial" : { + "oneOf" : [ { + "$ref" : "#/definitions/embedding2d" + }, { + "type" : "null" + } ], + "default" : null + }, + "maxIterations" : { + "oneOf" : [ { + "type" : "integer", + "default" : 300, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "negativeEdgeCount" : { + "oneOf" : [ { + "type" : "integer", + "default" : 5, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "negativeEdgeWeight" : { + "oneOf" : [ { + "type" : "number", + "default" : 2.0, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "negativeEdgeDenominator" : { + "oneOf" : [ { + "type" : "number", + "default" : 1.0, + "exclusiveMinimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "initializedLearningRate" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.02, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "matrix" ], + "defaultSnippets" : [ { + "body" : { + "type" : "embedding2d:lv", + "matrix" : "^$0" + } + } ], + "default" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:reference", + "auto" : true + }, + "initial" : null, + "maxIterations" : 300, + "negativeEdgeCount" : 5, + "negativeEdgeWeight" : 2.0, + "negativeEdgeDenominator" : 1.0, + "initializedLearningRate" : 0.02, + "threads" : "auto" + } + }, + "embedding2d:lvOverlay" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "embedding2d:lvOverlay" + }, + "matrix" : { + "allOf" : [ { + "$ref" : "#/definitions/matrix" + }, { + "default" : { + "type" : "matrix:reference", + "auto" : true + } + } ] + }, + "embedding2d" : { + "allOf" : [ { + "$ref" : "#/definitions/embedding2d" + }, { + "default" : { + "type" : "embedding2d:reference", + "auto" : true + } + } ] + }, + "initial" : { + "oneOf" : [ { + "$ref" : "#/definitions/embedding2d" + }, { + "type" : "null" + } ], + "default" : null + }, + "maxIterations" : { + "oneOf" : [ { + "type" : "integer", + "default" : 300, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "negativeEdgeCount" : { + "oneOf" : [ { + "type" : "integer", + "default" : 5, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "negativeEdgeWeight" : { + "oneOf" : [ { + "type" : "number", + "default" : 2.0, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "negativeEdgeDenominator" : { + "oneOf" : [ { + "type" : "number", + "default" : 1.0, + "exclusiveMinimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "initializedLearningRate" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.02, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "matrix", "embedding2d" ], + "defaultSnippets" : [ { + "body" : { + "type" : "embedding2d:lvOverlay", + "matrix" : "^$1", + "embedding2d" : "^$0" + } + } ], + "default" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:reference", + "auto" : true + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "auto" : true + }, + "initial" : null, + "maxIterations" : 300, + "negativeEdgeCount" : 5, + "negativeEdgeWeight" : 2.0, + "negativeEdgeDenominator" : 1.0, + "initializedLearningRate" : 0.02, + "threads" : "auto" + } + }, + "embedding2d:transferred" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "embedding2d:transferred" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/entity" + }, { + "default" : null + } ] + }, + "embedding2d" : { + "allOf" : [ { + "$ref" : "#/definitions/embedding2d" + }, { + "default" : null + } ] + }, + "target" : { + "allOf" : [ { + "$ref" : "#/definitions/entity" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source", "embedding2d", "target" ], + "defaultSnippets" : [ { + "body" : { + "type" : "embedding2d:transferred", + "source" : "^$1", + "embedding2d" : "^$2", + "target" : "^$0" + } + } ], + "default" : { + "type" : "embedding2d:transferred", + "source" : null, + "target" : null, + "embedding2d" : null + } + }, + "embedding2d" : { + "oneOf" : [ { + "$ref" : "#/definitions/embedding2d:reference" + }, { + "$ref" : "#/definitions/embedding2d:lv" + }, { + "$ref" : "#/definitions/embedding2d:lvOverlay" + }, { + "$ref" : "#/definitions/embedding2d:transferred" + } ] + }, + "entity" : { + "oneOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "$ref" : "#/definitions/labels" + } ] + }, + "values" : { + "oneOf" : [ { + "$ref" : "#/definitions/values:reference" + }, { + "$ref" : "#/definitions/values:fromDocumentField" + } ] + }, + "values:fromDocumentField" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "values:fromDocumentField" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "fieldName" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "summary", "fld_double_a", "fld_mv", "fld_int", "fld_int_a", "id", "fld_double", "title" ], + "default" : null + }, { + "$ref" : "#/definitions/var" + } ] + }, + "threads" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/threads" + }, { + "default" : "auto" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "multipleValues" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "COLLECT_FIRST", "REQUIRE_EXACTLY_ONE", "COLLECT_ALL" ], + "default" : "REQUIRE_EXACTLY_ONE" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents", "fieldName" ], + "defaultSnippets" : [ { + "body" : { + "type" : "values:fromDocumentField", + "documents" : "^$1", + "fieldName" : "^$0" + } + } ], + "default" : { + "type" : "values:fromDocumentField", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "fieldName" : null, + "multipleValues" : "REQUIRE_EXACTLY_ONE", + "threads" : "auto" + } + }, + "documentContent" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documentContent" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/contentFields" + }, { + "default" : { + "type" : "contentFields:reference", + "auto" : true + } + } ] + }, + "start" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : "unlimited" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "queries" : { + "type" : "object", + "additionalProperties" : { + "$ref" : "#/definitions/query" + }, + "default" : { } + }, + "mode" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "STREAMING", "EAGER" ], + "default" : "STREAMING" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documentContent", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "fields" : { + "type" : "contentFields:reference", + "auto" : true + }, + "queries" : { }, + "start" : 0, + "limit" : "unlimited", + "mode" : "STREAMING" + } + }, + "documentLabels" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documentLabels" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "labelCollector" : { + "allOf" : [ { + "$ref" : "#/definitions/labelCollector" + }, { + "default" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + } + } ] + }, + "maxLabels" : { + "oneOf" : [ { + "type" : "integer", + "default" : 10, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "start" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "limit" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : "unlimited" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "documentLabels", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "documentLabels", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:reference", + "auto" : true + }, + "labelListFilter" : { + "type" : "labelListFilter:truncatedPhrases" + }, + "fields" : { + "type" : "featureFields:reference", + "auto" : true + }, + "minTf" : 0, + "minTfMass" : 1.0, + "tieResolution" : "AUTO" + }, + "maxLabels" : 10, + "start" : 0, + "limit" : "unlimited" + } + }, + "stats:documents" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "stats:documents" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "stats:documents", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "stats:documents", + "documents" : { + "type" : "documents:reference", + "auto" : true + } + } + }, + "stats" : { + "oneOf" : [ { + "$ref" : "#/definitions/stats:documents" + } ] + }, + "documentOverlap" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documentOverlap" + }, + "documentPairs" : { + "allOf" : [ { + "$ref" : "#/definitions/documentPairs" + }, { + "default" : { + "type" : "documentPairs:reference", + "auto" : true + } + } ] + }, + "pairwiseSimilarity" : { + "allOf" : [ { + "$ref" : "#/definitions/pairwiseSimilarity" + }, { + "default" : { + "type" : "pairwiseSimilarity:documentOverlapRatio", + "fields" : { + "type" : "fields:reference", + "auto" : true + }, + "ngramWindow" : 6, + "crossFieldOverlaps" : true, + "allowedGapRatio" : 0.0, + "computeDifferences" : false + } + } ] + }, + "alignedFragments" : { + "type" : "object", + "properties" : { + "maxFragments" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : "unlimited" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "contextChars" : { + "oneOf" : [ { + "type" : "integer", + "default" : 160 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/contentFields" + }, { + "default" : { + "type" : "contentFields:empty" + } + } ] + } + }, + "additionalProperties" : false, + "default" : { + "maxFragments" : "unlimited", + "contextChars" : 160, + "fields" : { + "type" : "contentFields:empty" + } + } + }, + "fragmentsInFields" : { + "type" : "object", + "properties" : { + "contextChars" : { + "oneOf" : [ { + "type" : "integer", + "default" : 160 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/contentFields" + }, { + "default" : { + "type" : "contentFields:empty" + } + } ] + } + }, + "additionalProperties" : false, + "default" : { + "fields" : { + "type" : "contentFields:empty" + }, + "contextChars" : 160 + } + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documentPairs", "pairwiseSimilarity" ], + "default" : { + "type" : "documentOverlap", + "documentPairs" : { + "type" : "documentPairs:reference", + "auto" : true + }, + "pairwiseSimilarity" : { + "type" : "pairwiseSimilarity:documentOverlapRatio", + "fields" : { + "type" : "fields:reference", + "auto" : true + }, + "ngramWindow" : 6, + "crossFieldOverlaps" : true, + "allowedGapRatio" : 0.0, + "computeDifferences" : false + }, + "alignedFragments" : { + "maxFragments" : "unlimited", + "contextChars" : 160, + "fields" : { + "type" : "contentFields:empty" + } + }, + "fragmentsInFields" : { + "fields" : { + "type" : "contentFields:empty" + }, + "contextChars" : 160 + } + } + }, + "documentPairs" : { + "oneOf" : [ { + "$ref" : "#/definitions/documentPairs:reference" + }, { + "$ref" : "#/definitions/documentPairs:duplicates" + }, { + "$ref" : "#/definitions/documentPairs:all" + } ] + }, + "documentPairs:all" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documentPairs:all" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "documentPairs:all", + "documents" : { + "type" : "documents:reference", + "auto" : true + } + } + }, + "documentPairs:duplicates" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documentPairs:duplicates" + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "documentPairFilter" : { + "type" : "object", + "properties" : { + "comment" : { + "$ref" : "#/definitions/comment" + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : { + "type" : "query:all" + } + } ] + }, + "countCondition" : { + "oneOf" : [ { + "type" : "string", + "enum" : [ "ZERO", "ONE", "ONE_OR_MORE", "TWO" ], + "default" : "ONE_OR_MORE" + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "additionalProperties" : false, + "required" : [ ], + "default" : { + "query" : { + "type" : "query:all" + }, + "countCondition" : "ONE_OR_MORE" + } + }, + "hashGrouping" : { + "type" : "object", + "properties" : { + "comment" : { + "$ref" : "#/definitions/comment" + }, + "features" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "pairing" : { + "type" : "object", + "properties" : { + "maxHashBitsDifferent" : { + "oneOf" : [ { + "type" : "integer", + "default" : 0, + "minimum" : 0, + "maximum" : 5 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxHashGroupSize" : { + "oneOf" : [ { + "type" : "integer", + "default" : 200, + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "additionalProperties" : false, + "default" : { + "maxHashBitsDifferent" : 0, + "maxHashGroupSize" : 200 + } + } + }, + "additionalProperties" : false, + "required" : [ "features" ], + "default" : { + "features" : { + "type" : "featureSource:reference", + "auto" : true + }, + "pairing" : { + "maxHashBitsDifferent" : 0, + "maxHashGroupSize" : 200 + } + } + }, + "validationFilters" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/validationCriteria" + }, + "default" : [ ] + }, + "validation" : { + "allOf" : [ { + "$ref" : "#/definitions/validationCriteria" + }, { + "default" : { + "min" : 0.0, + "max" : 1.7976931348623157E308, + "debug" : false, + "pairwiseSimilarity" : { + "type" : "pairwiseSimilarity:featureIntersectionToUnionRatio", + "features" : { + "type" : "featureSource:reference", + "auto" : true + } + } + } + } ] + }, + "output" : { + "type" : "object", + "properties" : { + "explanations" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "diagnostics" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "limit" : { + "oneOf" : [ { + "type" : "integer", + "minimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "additionalProperties" : false, + "default" : { + "explanations" : false, + "diagnostics" : true + } + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + }, + "useCache" : { + "type" : "boolean", + "default" : true + } + }, + "additionalProperties" : false, + "required" : [ "type", "query", "hashGrouping", "validation" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "documentPairs:duplicates", + "query" : null, + "documentPairFilter" : { + "query" : { + "type" : "query:all" + }, + "countCondition" : "ONE_OR_MORE" + }, + "hashGrouping" : { + "features" : { + "type" : "featureSource:reference", + "auto" : true + }, + "pairing" : { + "maxHashBitsDifferent" : 0, + "maxHashGroupSize" : 200 + } + }, + "validation" : { + "min" : 0.0, + "max" : 1.7976931348623157E308, + "debug" : false, + "pairwiseSimilarity" : { + "type" : "pairwiseSimilarity:featureIntersectionToUnionRatio", + "features" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "validationFilters" : [ ], + "output" : { + "explanations" : false, + "diagnostics" : true + } + } + }, + "pairwiseSimilarity" : { + "oneOf" : [ { + "$ref" : "#/definitions/pairwiseSimilarity:reference" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:featureIntersectionMinRatio" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:featureIntersectionToUnionRatio" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:featureIntersectionSize" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:documentOverlapRatio" + } ] + }, + "pairwiseSimilarity:featureIntersectionToUnionRatio" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "pairwiseSimilarity:featureIntersectionToUnionRatio" + }, + "features" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "features" ], + "default" : { + "type" : "pairwiseSimilarity:featureIntersectionToUnionRatio", + "features" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "pairwiseSimilarity:featureIntersectionMinRatio" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "pairwiseSimilarity:featureIntersectionMinRatio" + }, + "features" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "features" ], + "default" : { + "type" : "pairwiseSimilarity:featureIntersectionMinRatio", + "features" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "pairwiseSimilarity:featureIntersectionSize" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "pairwiseSimilarity:featureIntersectionSize" + }, + "features" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "features" ], + "default" : { + "type" : "pairwiseSimilarity:featureIntersectionSize", + "features" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "pairwiseSimilarity:documentOverlapRatio" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "pairwiseSimilarity:documentOverlapRatio" + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/fields" + }, { + "default" : { + "type" : "fields:reference", + "auto" : true + } + } ] + }, + "ngramWindow" : { + "oneOf" : [ { + "type" : "integer", + "default" : 6, + "exclusiveMinimum" : 0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "crossFieldOverlaps" : { + "oneOf" : [ { + "type" : "boolean", + "default" : true + }, { + "$ref" : "#/definitions/var" + } ] + }, + "allowedGapRatio" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "computeDifferences" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "fields" ], + "default" : { + "type" : "pairwiseSimilarity:documentOverlapRatio", + "fields" : { + "type" : "fields:reference", + "auto" : true + }, + "ngramWindow" : 6, + "crossFieldOverlaps" : true, + "allowedGapRatio" : 0.0, + "computeDifferences" : false + } + }, + "featureSource" : { + "oneOf" : [ { + "$ref" : "#/definitions/featureSource:reference" + }, { + "$ref" : "#/definitions/featureSource:sentences" + }, { + "$ref" : "#/definitions/featureSource:chunks" + }, { + "$ref" : "#/definitions/featureSource:values" + }, { + "$ref" : "#/definitions/featureSource:words" + }, { + "$ref" : "#/definitions/featureSource:labels" + }, { + "$ref" : "#/definitions/featureSource:flatten" + }, { + "$ref" : "#/definitions/featureSource:group" + }, { + "$ref" : "#/definitions/featureSource:ngrams" + }, { + "$ref" : "#/definitions/featureSource:count" + }, { + "$ref" : "#/definitions/featureSource:unique" + }, { + "$ref" : "#/definitions/featureSource:minhash" + }, { + "$ref" : "#/definitions/featureSource:simhash" + } ] + }, + "featureSource:sentences" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:sentences" + }, + "minCharacters" : { + "oneOf" : [ { + "type" : "integer", + "default" : 40 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/fields" + }, { + "default" : { + "type" : "fields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "fields" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:sentences", + "minCharacters" : 40, + "fields" : { + "type" : "fields:reference", + "auto" : true + } + } + }, + "featureSource:chunks" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:chunks" + }, + "minCharacters" : { + "oneOf" : [ { + "type" : "integer", + "default" : 80 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "modulo" : { + "oneOf" : [ { + "type" : "integer", + "default" : 5 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/fields" + }, { + "default" : { + "type" : "fields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "fields" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:chunks", + "fields" : { + "type" : "fields:reference", + "auto" : true + }, + "modulo" : 5, + "minCharacters" : 80 + } + }, + "featureSource:values" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:values" + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/fields" + }, { + "default" : { + "type" : "fields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "fields" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:values", + "fields" : { + "type" : "fields:reference", + "auto" : true + } + } + }, + "featureSource:words" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:words" + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/fields" + }, { + "default" : { + "type" : "fields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "fields" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:words", + "fields" : { + "type" : "fields:reference", + "auto" : true + } + } + }, + "featureSource:labels" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:labels" + }, + "minDocFrequency" : { + "oneOf" : [ { + "type" : "integer", + "default" : 1 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxDocFrequency" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : "unlimited" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "fields" : { + "allOf" : [ { + "$ref" : "#/definitions/featureFields" + }, { + "default" : { + "type" : "featureFields:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "fields" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:labels", + "minDocFrequency" : 1, + "maxDocFrequency" : "unlimited", + "fields" : { + "type" : "featureFields:reference", + "auto" : true + } + } + }, + "featureSource:flatten" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:flatten" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:flatten", + "source" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "featureSource:group" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:group" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:group", + "source" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "featureSource:ngrams" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:ngrams" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "window" : { + "oneOf" : [ { + "type" : "integer", + "default" : 10 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source", "window" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:ngrams", + "source" : { + "type" : "featureSource:reference", + "auto" : true + }, + "window" : 10 + } + }, + "featureSource:count" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:count" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "minFeatureCount" : { + "oneOf" : [ { + "type" : "integer", + "default" : 1 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "maxFeatureCount" : { + "oneOf" : [ { + "allOf" : [ { + "$ref" : "#/definitions/limit" + }, { + "default" : "unlimited" + } ] + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:count", + "source" : { + "type" : "featureSource:reference", + "auto" : true + }, + "minFeatureCount" : 1, + "maxFeatureCount" : "unlimited" + } + }, + "featureSource:unique" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:unique" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:unique", + "source" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "featureSource:minhash" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:minhash" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "functionCount" : { + "oneOf" : [ { + "type" : "integer", + "default" : 128 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:minhash", + "source" : { + "type" : "featureSource:reference", + "auto" : true + }, + "functionCount" : 128 + } + }, + "featureSource:simhash" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:simhash" + }, + "source" : { + "allOf" : [ { + "$ref" : "#/definitions/featureSource" + }, { + "default" : { + "type" : "featureSource:reference", + "auto" : true + } + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "required" : [ "type", "source" ], + "defaultSnippets" : [ ], + "default" : { + "type" : "featureSource:simhash", + "source" : { + "type" : "featureSource:reference", + "auto" : true + } + } + }, + "validationCriteria" : { + "type" : "object", + "properties" : { + "comment" : { + "$ref" : "#/definitions/comment" + }, + "pairwiseSimilarity" : { + "allOf" : [ { + "$ref" : "#/definitions/pairwiseSimilarity" + }, { + "default" : { + "type" : "pairwiseSimilarity:featureIntersectionToUnionRatio", + "features" : { + "type" : "featureSource:reference", + "auto" : true + } + } + } ] + }, + "min" : { + "oneOf" : [ { + "type" : "number", + "default" : 0.0, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "max" : { + "oneOf" : [ { + "type" : "number", + "default" : 1.7976931348623157E308, + "minimum" : 0.0 + }, { + "$ref" : "#/definitions/var" + } ] + }, + "debug" : { + "oneOf" : [ { + "type" : "boolean", + "default" : false + }, { + "$ref" : "#/definitions/var" + } ] + } + }, + "additionalProperties" : false, + "required" : [ "pairwiseSimilarity", "min" ], + "default" : { + "min" : 0.0, + "max" : 1.7976931348623157E308, + "debug" : false, + "pairwiseSimilarity" : { + "type" : "pairwiseSimilarity:featureIntersectionToUnionRatio", + "features" : { + "type" : "featureSource:reference", + "auto" : true + } + } + } + }, + "taskSpec" : { + "type" : "object", + "properties" : { + "name" : { + "oneOf" : [ { + "type" : "string" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "durationMs" : { + "oneOf" : [ { + "type" : "integer" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "progressAvailable" : { + "oneOf" : [ { + "type" : "boolean" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "fail" : { + "oneOf" : [ { + "type" : "boolean" + }, { + "$ref" : "#/definitions/var" + } ] + }, + "tasks" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/taskSpec" + } + } + }, + "additionalProperties" : false, + "required" : [ ], + "defaultSnippets" : [ { + "body" : { + "name" : "${1:Task}", + "durationMs" : "^${2:2000}" + } + } ] + }, + "debug:progress" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "debug:progress" + }, + "tasks" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/taskSpec" + }, + "default" : [ { + "name" : "Task", + "durationMs" : 2000, + "progressAvailable" : true, + "tasks" : [ ], + "fail" : false + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type" ], + "defaultSnippets" : [ { + "body" : { + "type" : "debug:progress", + "tasks" : [ "^$0" ] + } + } ], + "default" : { + "type" : "debug:progress", + "tasks" : [ { + "name" : "Task", + "durationMs" : 2000, + "progressAvailable" : true, + "tasks" : [ ], + "fail" : false + } ] + } + }, + "debug:explain" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "debug:explain" + }, + "documents" : { + "allOf" : [ { + "$ref" : "#/definitions/documents" + }, { + "default" : { + "type" : "documents:reference", + "auto" : true + } + } ] + }, + "query" : { + "allOf" : [ { + "$ref" : "#/definitions/query" + }, { + "default" : null + } ] + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "name" : { + "type" : "string" + } + }, + "additionalProperties" : false, + "required" : [ "type", "documents" ], + "defaultSnippets" : [ { + "body" : { + "type" : "debug:explain", + "documents" : "^$0" + } + } ], + "default" : { + "type" : "debug:explain", + "documents" : { + "type" : "documents:reference", + "auto" : true + }, + "query" : null + } + }, + "component" : { + "oneOf" : [ { + "$ref" : "#/definitions/query:all" + }, { + "$ref" : "#/definitions/query:string" + }, { + "$ref" : "#/definitions/query:forLabels" + }, { + "$ref" : "#/definitions/query:forFieldValues" + }, { + "$ref" : "#/definitions/query:filter" + }, { + "$ref" : "#/definitions/query:complement" + }, { + "$ref" : "#/definitions/query:composite" + }, { + "$ref" : "#/definitions/query:fromDocuments" + }, { + "$ref" : "#/definitions/queryParser:project" + }, { + "$ref" : "#/definitions/queryParser:enhanced" + }, { + "$ref" : "#/definitions/labelFilter:composite" + }, { + "$ref" : "#/definitions/labelFilter:complement" + }, { + "$ref" : "#/definitions/labelFilter:tokenCount" + }, { + "$ref" : "#/definitions/labelFilter:characterCount" + }, { + "$ref" : "#/definitions/labelFilter:surface" + }, { + "$ref" : "#/definitions/labelFilter:autoStopLabels" + }, { + "$ref" : "#/definitions/labelFilter:dictionary" + }, { + "$ref" : "#/definitions/labelFilter:hasEmbedding" + }, { + "$ref" : "#/definitions/labelFilter:acceptAll" + }, { + "$ref" : "#/definitions/labelFilter:acceptLabels" + }, { + "$ref" : "#/definitions/labelFilter:rejectLabels" + }, { + "$ref" : "#/definitions/labelListFilter:acceptAll" + }, { + "$ref" : "#/definitions/labelListFilter:truncatedPhrases" + }, { + "$ref" : "#/definitions/labelCount:fixed" + }, { + "$ref" : "#/definitions/labelCount:unlimited" + }, { + "$ref" : "#/definitions/labelCount:progressive" + }, { + "$ref" : "#/definitions/labelScorer:identity" + }, { + "$ref" : "#/definitions/labelScorer:composite" + }, { + "$ref" : "#/definitions/labelScorer:df" + }, { + "$ref" : "#/definitions/labelScorer:idf" + }, { + "$ref" : "#/definitions/labelScorer:tf" + }, { + "$ref" : "#/definitions/labelScorer:probabilityRatio" + }, { + "$ref" : "#/definitions/labelCollector:topFromFeatureFields" + }, { + "$ref" : "#/definitions/labelAggregator:topWeight" + }, { + "$ref" : "#/definitions/fields:simple" + }, { + "$ref" : "#/definitions/featureFields:simple" + }, { + "$ref" : "#/definitions/contentFields:simple" + }, { + "$ref" : "#/definitions/contentFields:grouped" + }, { + "$ref" : "#/definitions/contentFields:empty" + }, { + "$ref" : "#/definitions/featureSource:sentences" + }, { + "$ref" : "#/definitions/featureSource:chunks" + }, { + "$ref" : "#/definitions/featureSource:values" + }, { + "$ref" : "#/definitions/featureSource:words" + }, { + "$ref" : "#/definitions/featureSource:labels" + }, { + "$ref" : "#/definitions/featureSource:flatten" + }, { + "$ref" : "#/definitions/featureSource:group" + }, { + "$ref" : "#/definitions/featureSource:ngrams" + }, { + "$ref" : "#/definitions/featureSource:count" + }, { + "$ref" : "#/definitions/featureSource:unique" + }, { + "$ref" : "#/definitions/featureSource:minhash" + }, { + "$ref" : "#/definitions/featureSource:simhash" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:featureIntersectionMinRatio" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:featureIntersectionToUnionRatio" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:featureIntersectionSize" + }, { + "$ref" : "#/definitions/pairwiseSimilarity:documentOverlapRatio" + }, { + "$ref" : "#/definitions/matrixRows:keywordDocumentSimilarity" + }, { + "$ref" : "#/definitions/matrixRows:knnVectorsSimilarity" + }, { + "$ref" : "#/definitions/matrixRows:fromMatrix" + }, { + "$ref" : "#/definitions/dictionary:glob" + }, { + "$ref" : "#/definitions/dictionary:regex" + }, { + "$ref" : "#/definitions/dictionary:project" + }, { + "$ref" : "#/definitions/dictionary:all" + }, { + "$ref" : "#/definitions/dictionary:queryTerms" + } ] + }, + "stage" : { + "oneOf" : [ { + "$ref" : "#/definitions/documents:byQuery" + }, { + "$ref" : "#/definitions/documents:byId" + }, { + "$ref" : "#/definitions/documents:embeddingNearestNeighbors" + }, { + "$ref" : "#/definitions/documents:composite" + }, { + "$ref" : "#/definitions/documents:fromMatrixColumns" + }, { + "$ref" : "#/definitions/documents:fromClusterExemplars" + }, { + "$ref" : "#/definitions/documents:fromClusterMembers" + }, { + "$ref" : "#/definitions/documents:sample" + }, { + "$ref" : "#/definitions/documents:contrastScore" + }, { + "$ref" : "#/definitions/documents:rwmd" + }, { + "$ref" : "#/definitions/documents:fromDocumentPairs" + }, { + "$ref" : "#/definitions/documentPairs:duplicates" + }, { + "$ref" : "#/definitions/documentPairs:all" + }, { + "$ref" : "#/definitions/labels:byPrefix" + }, { + "$ref" : "#/definitions/labels:direct" + }, { + "$ref" : "#/definitions/labels:fromDocuments" + }, { + "$ref" : "#/definitions/labels:fromText" + }, { + "$ref" : "#/definitions/labels:scored" + }, { + "$ref" : "#/definitions/labels:embeddingNearestNeighbors" + }, { + "$ref" : "#/definitions/labels:composite" + }, { + "$ref" : "#/definitions/matrix:direct" + }, { + "$ref" : "#/definitions/matrix:fromMatrixRows" + }, { + "$ref" : "#/definitions/matrix:cooccurrenceLabelSimilarity" + }, { + "$ref" : "#/definitions/matrix:keywordDocumentSimilarity" + }, { + "$ref" : "#/definitions/matrix:knn2dDistanceSimilarity" + }, { + "$ref" : "#/definitions/matrix:knnVectorsSimilarity" + }, { + "$ref" : "#/definitions/matrix:keywordLabelDocumentSimilarity" + }, { + "$ref" : "#/definitions/matrix:elementWiseProduct" + }, { + "$ref" : "#/definitions/vector:labelEmbedding" + }, { + "$ref" : "#/definitions/vector:documentEmbedding" + }, { + "$ref" : "#/definitions/vector:composite" + }, { + "$ref" : "#/definitions/vector:estimateFromContext" + }, { + "$ref" : "#/definitions/clusters:ap" + }, { + "$ref" : "#/definitions/clusters:fromMatrixColumns" + }, { + "$ref" : "#/definitions/clusters:withRemappedDocuments" + }, { + "$ref" : "#/definitions/clusters:byValues" + }, { + "$ref" : "#/definitions/labelClusters:documentClusterLabels" + }, { + "$ref" : "#/definitions/embedding2d:lv" + }, { + "$ref" : "#/definitions/embedding2d:lvOverlay" + }, { + "$ref" : "#/definitions/embedding2d:transferred" + }, { + "$ref" : "#/definitions/vectors:precomputedLabelEmbeddings" + }, { + "$ref" : "#/definitions/vectors:precomputedDocumentEmbeddings" + }, { + "$ref" : "#/definitions/documentContent" + }, { + "$ref" : "#/definitions/documentLabels" + }, { + "$ref" : "#/definitions/values:fromDocumentField" + }, { + "$ref" : "#/definitions/documentOverlap" + }, { + "$ref" : "#/definitions/stats:documents" + }, { + "$ref" : "#/definitions/debug:explain" + }, { + "$ref" : "#/definitions/debug:progress" + } ] + }, + "variableValue" : { + "anyOf" : [ { + "type" : "integer" + }, { + "type" : "number" + }, { + "type" : "boolean" + }, { + "type" : "string" + }, { + "type" : "null" + }, { + "type" : "array", + "items" : { + "type" : "string" + } + } ] + }, + "variable" : { + "type" : "object", + "properties" : { + "value" : { + "$ref" : "#/definitions/variableValue" + }, + "name" : { + "type" : "string" + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "meta" : { + "type" : "object", + "properties" : { + "ui" : { + "type" : "string", + "enum" : [ "textarea", "tags" ] + } + } + } + }, + "required" : [ "value" ], + "additionalProperties" : false, + "defaultSnippets" : [ { + "description" : "Variable with documentation", + "body" : { + "name" : "${1:Variable name}", + "comment" : "${2:Variable description}", + "value" : "^$0" + } + }, { + "description" : "Variable without documentation", + "body" : { + "value" : "^$0" + } + } ] + }, + "variableGroup" : { + "type" : "object", + "properties" : { + "name" : { + "type" : "string" + }, + "comment" : { + "$ref" : "#/definitions/comment" + }, + "variables" : { + "$ref" : "#/definitions/variableContainer" + } + }, + "required" : [ "variables" ], + "additionalProperties" : false, + "defaultSnippets" : [ { + "description" : "Group of variables", + "body" : { + "name" : "${1:Group name}", + "comment" : "${2:Group description}", + "variables" : { + "${3:variable1}" : "^$0" + } + } + } ] + }, + "variableGroupOrVariable" : { + "oneOf" : [ { + "$ref" : "#/definitions/variable" + }, { + "$ref" : "#/definitions/variableGroup" + } ] + }, + "variableContainer" : { + "type" : "object", + "additionalProperties" : { + "$ref" : "#/definitions/variableGroupOrVariable" + } + }, + "labelFilter:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelFilter:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelFilter:reference", + "use" : "$0" + } + } ] + }, + "labelListFilter:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelListFilter:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelListFilter:reference", + "use" : "$0" + } + } ] + }, + "labelCollector:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelCollector:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelCollector:reference", + "use" : "$0" + } + } ] + }, + "query:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "query:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "query:reference", + "use" : "$0" + } + } ] + }, + "fields:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "fields:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "fields:reference", + "use" : "$0" + } + } ] + }, + "featureFields:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureFields:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "featureFields:reference", + "use" : "$0" + } + } ] + }, + "contentFields:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "contentFields:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "contentFields:reference", + "use" : "$0" + } + } ] + }, + "matrixRows:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrixRows:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrixRows:reference", + "use" : "$0" + } + } ] + }, + "featureSource:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "featureSource:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "featureSource:reference", + "use" : "$0" + } + } ] + }, + "pairwiseSimilarity:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "pairwiseSimilarity:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "pairwiseSimilarity:reference", + "use" : "$0" + } + } ] + }, + "dictionary:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "dictionary:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "dictionary:reference", + "use" : "$0" + } + } ] + }, + "labelCount:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelCount:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelCount:reference", + "use" : "$0" + } + } ] + }, + "queryParser:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "queryParser:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "queryParser:reference", + "use" : "$0" + } + } ] + }, + "documents:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documents:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "documents:reference", + "use" : "$0" + } + } ] + }, + "labels:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labels:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "labels:reference", + "use" : "$0" + } + } ] + }, + "vector:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vector:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "vector:reference", + "use" : "$0" + } + } ] + }, + "matrix:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "matrix:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "matrix:reference", + "use" : "$0" + } + } ] + }, + "clusters:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "clusters:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "clusters:reference", + "use" : "$0" + } + } ] + }, + "labelClusters:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "labelClusters:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "labelClusters:reference", + "use" : "$0" + } + } ] + }, + "embedding2d:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "embedding2d:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "embedding2d:reference", + "use" : "$0" + } + } ] + }, + "vectors:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "vectors:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "vectors:reference", + "use" : "$0" + } + } ] + }, + "values:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "values:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "values:reference", + "use" : "$0" + } + } ] + }, + "documentPairs:reference" : { + "type" : "object", + "properties" : { + "type" : { + "const" : "documentPairs:reference" + }, + "use" : { + "type" : "string" + }, + "auto" : { + "type" : "boolean" + } + }, + "additionalProperties" : false, + "oneOf" : [ { + "required" : [ "use" ] + }, { + "properties" : { + "auto" : { + "const" : true + } + }, + "required" : [ "auto" ] + } ], + "defaultSnippets" : [ { + "body" : { + "type" : "documentPairs:reference", + "use" : "$0" + } + } ] + } + }, + "properties" : { + "variables" : { + "$ref" : "#/definitions/variableContainer" + }, + "components" : { + "type" : "object", + "additionalProperties" : { + "$ref" : "#/definitions/component" + }, + "defaultSnippets" : [ { + "body" : { } + } ] + }, + "stages" : { + "type" : "object", + "additionalProperties" : { + "$ref" : "#/definitions/stage" + } + }, + "output" : { + "type" : "object", + "properties" : { + "progress" : { + "type" : "boolean" + }, + "request" : { + "type" : "boolean" + }, + "stages" : { + "type" : "array", + "items" : { + "type" : "string" + }, + "uniqueItems" : true, + "defaultSnippets" : [ { + "body" : [ "^$0" ] + } ] + } + } + }, + "useCache" : { + "type" : "boolean", + "default" : true + }, + "tags" : { + "description" : "One or more tags that describe this request. Used by the API sandbox to label requests.", + "type" : "array", + "items" : { + "type" : "string" + } + }, + "name" : { + "type" : "string" + }, + "comment" : { + "$ref" : "#/definitions/comment" + } + }, + "additionalProperties" : false, + "defaultSnippets" : [ { + "body" : { + "name" : "Documents by query", + "comment" : "Retrieves identifiers of documents that match the provided search query. This request is useful as a building block for larger requests, such as 'Documents by query with contents' or 'Labels from documents'.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 1000 + } + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Documents by query", + "description" : "Retrieves identifiers of documents that match the provided search query. This request is useful as a building block for larger requests, such as 'Documents by query with contents' or 'Labels from documents'." + }, { + "body" : { + "name" : "Documents by query statistics", + "comment" : "Computes the number of documents matching the provided search query. This request is useful to get the number of documents matching a query without downloading the, potentially large, list of document identifiers.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 1000, + "accurateHitCount" : true + }, + "documentStats" : { + "type" : "stats:documents" + } + }, + "output" : { + "stages" : [ "documentStats" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Documents by query statistics", + "description" : "Computes the number of documents matching the provided search query. This request is useful to get the number of documents matching a query without downloading the, potentially large, list of document identifiers." + }, { + "body" : { + "name" : "Documents by query with contents", + "comment" : "Retrieves identifiers and content of documents that match the provided query. Use this request for display purposes.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 10 + }, + "documentContent" : { + "type" : "documentContent" + } + }, + "output" : { + "stages" : [ "documentContent", "documents" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Documents by query with contents", + "description" : "Retrieves identifiers and content of documents that match the provided query. Use this request for display purposes." + }, { + "body" : { + "name" : "Documents sample by query", + "comment" : "Selects a uniform random sample of documents from the pool of all documents matching the provided query. This request is a useful building block when processing the whole pool of documents may be costly while processing a small sample the may also provide reasonable results.", + "stages" : { + "documents" : { + "type" : "documents:sample", + "limit" : 100, + "query" : { + "type" : "query:string", + "query" : "" + } + } + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Documents sample by query", + "description" : "Selects a uniform random sample of documents from the pool of all documents matching the provided query. This request is a useful building block when processing the whole pool of documents may be costly while processing a small sample the may also provide reasonable results." + }, { + "body" : { + "name" : "Documents with content and label occurrences", + "comment" : "Retrieves the contents and labels occurring in each document matching the query. Use this request to get labels occurring in individual documents for display purposes. To collect an aggregated list of labels from multiple documents, see the 'Labels from documents' request.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "photon" + }, + "limit" : 10 + }, + "documentContent" : { + "type" : "documentContent" + }, + "documentLabels" : { + "type" : "documentLabels" + } + }, + "output" : { + "stages" : [ "documents", "documentContent", "documentLabels" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Documents with content and label occurrences", + "description" : "Retrieves the contents and labels occurring in each document matching the query. Use this request to get labels occurring in individual documents for display purposes. To collect an aggregated list of labels from multiple documents, see the 'Labels from documents' request." + }, { + "body" : { + "name" : "Documents with content and filtered label occurrences", + "comment" : "Retrieves the contents and labels occurring in each document matching the query, narrowing down the list of labels. Use this request to get labels occurring in individual documents for display purposes. To collect an aggregated list of labels from multiple documents with label filtering applied, see the 'Labels from documents with filtering' request.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "photon" + }, + "limit" : 10 + }, + "labels" : { + "type" : "labels:fromDocuments" + }, + "documentContent" : { + "type" : "documentContent" + }, + "documentLabels" : { + "type" : "documentLabels", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:acceptLabels" + } + } + } + }, + "output" : { + "stages" : [ "documents", "labels", "documentContent", "documentLabels" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Documents with content and filtered label occurrences", + "description" : "Retrieves the contents and labels occurring in each document matching the query, narrowing down the list of labels. Use this request to get labels occurring in individual documents for display purposes. To collect an aggregated list of labels from multiple documents with label filtering applied, see the 'Labels from documents with filtering' request." + }, { + "body" : { + "name" : "Labels by prefix", + "comment" : "Retrieves labels starting with the provided prefix. This request is useful for building a label selection user interface with a prefix-based suggestion list.", + "stages" : { + "labels" : { + "type" : "labels:byPrefix", + "prefix" : "test", + "limit" : 20 + } + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels by prefix", + "description" : "Retrieves labels starting with the provided prefix. This request is useful for building a label selection user interface with a prefix-based suggestion list." + }, { + "body" : { + "name" : "Labels by label embedding vector", + "comment" : "Retrieves labels that are semantically similar to the provided list of labels.", + "stages" : { + "queryLabel" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "similarLabels" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "use" : "queryLabel" + } + } + } + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels by label embedding vector", + "description" : "Retrieves labels that are semantically similar to the provided list of labels." + }, { + "body" : { + "name" : "Labels by label embedding vector with filtering", + "comment" : "Retrieves labels that are semantically similar to the provided list of labels while filtering out some of the results. The request finds labels similar to 'photon' but excludes from results all labels that actually contain the word 'photon'.", + "stages" : { + "queryLabel" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "similarLabels" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "use" : "queryLabel" + } + }, + "labelFilter" : { + "type" : "labelFilter:dictionary", + "exclude" : [ { + "type" : "dictionary:regex", + "entries" : [ ".*photon.*" ] + } ] + } + } + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels by label embedding vector with filtering", + "description" : "Retrieves labels that are semantically similar to the provided list of labels while filtering out some of the results. The request finds labels similar to 'photon' but excludes from results all labels that actually contain the word 'photon'." + }, { + "body" : { + "name" : "Labels from text", + "comment" : "Extracts labels occurring in the provided raw text. This request is a useful building block when processing text that is not present in the index. For example, this request can be a starting point for making a More-Like-This search where the seed document is an arbitrary piece of text, not present in the index.", + "stages" : { + "labels" : { + "type" : "labels:fromText", + "text" : "Text to extract labels from." + } + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels from text", + "description" : "Extracts labels occurring in the provided raw text. This request is a useful building block when processing text that is not present in the index. For example, this request can be a starting point for making a More-Like-This search where the seed document is an arbitrary piece of text, not present in the index." + }, { + "body" : { + "name" : "Labels from documents", + "comment" : "Retrieves labels that occur most frequently in the provided set of documents.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "photon" + }, + "limit" : 1000 + }, + "labels" : { + "type" : "labels:fromDocuments" + } + }, + "output" : { + "stages" : [ "labels" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels from documents", + "description" : "Retrieves labels that occur most frequently in the provided set of documents." + }, { + "body" : { + "name" : "Labels from documents with filtering", + "comment" : "Retrieves labels that occur most frequently in the provided documents, with additionally filtering applied.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 1000 + }, + "labels" : { + "type" : "labels:fromDocuments", + "labelAggregator" : { + "type" : "labelAggregator:topWeight", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "labelFilter" : { + "type" : "labelFilter:composite", + "labelFilters" : { + "dictionary" : { + "type" : "labelFilter:dictionary", + "exclude" : [ { + "type" : "dictionary:glob", + "entries" : [ "* project *" ] + } ] + }, + "default" : { + "type" : "labelFilter:reference", + "use" : "labelFilter" + } + } + } + } + } + } + }, + "output" : { + "stages" : [ "labels" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels from documents with filtering", + "description" : "Retrieves labels that occur most frequently in the provided documents, with additionally filtering applied." + }, { + "body" : { + "name" : "Labels from text with filtering", + "comment" : "Extracts labels occurring in the provided raw text. This request is a useful building block when processing text that is not present in the index. For example, this request can be a starting point for making a More-Like-This search where the seed document is an arbitrary piece of text, not present in the index.", + "variables" : { + "inputText" : { + "name" : "Input text", + "comment" : "Text from which to extract labels.", + "value" : "Using the effective Lagrangian for the low energy photon−photon interaction the lowest order photon self energy at finite temperature and in non−equilibrium is calculated within the real time formalism. The Debye mass, the dispersion relation, the dielectric tensor, and the velocity of light following from the photon self energy are discussed. As an application we consider the interaction of photons with the cosmic microwave background radiation.", + "meta" : { + "ui" : "textarea" + } + }, + "minLabelWords" : { + "name" : "Min label words", + "comment" : "Remove labels with fewer words than specified.", + "value" : 2 + }, + "removeCapitalized" : { + "name" : "Remove capitalized", + "value" : false + }, + "removeUppercase" : { + "name" : "Remove uppercase", + "value" : false + }, + "removeAcronyms" : { + "name" : "Remove acronyms", + "value" : false + }, + "exclusionsDictionary" : { + "name" : "Exclusions", + "comment" : "Remove labels matching any of the exclusions dictionary entries.", + "value" : [ "* preliminary *" ] + } + }, + "stages" : { + "labels" : { + "type" : "labels:fromText", + "text" : { + "@var" : "inputText" + }, + "labelFilter" : { + "type" : "labelFilter:composite", + "labelFilters" : { + "stopLabels" : { + "type" : "labelFilter:autoStopLabels" + }, + "surface" : { + "type" : "labelFilter:surface", + "removeCapitalized" : { + "@var" : "removeCapitalized" + }, + "removeUppercase" : { + "@var" : "removeUppercase" + }, + "removeAcronyms" : { + "@var" : "removeAcronyms" + } + }, + "tokens" : { + "type" : "labelFilter:tokenCount", + "minTokens" : { + "@var" : "minLabelWords" + }, + "maxTokens" : 5 + }, + "filter" : { + "type" : "labelFilter:dictionary", + "addAllProjectDictionaries" : true, + "exclude" : [ { + "type" : "dictionary:glob", + "entries" : { + "@var" : "exclusionsDictionary" + } + } ] + } + } + } + } + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels from text with filtering", + "description" : "Extracts labels occurring in the provided raw text. This request is a useful building block when processing text that is not present in the index. For example, this request can be a starting point for making a More-Like-This search where the seed document is an arbitrary piece of text, not present in the index." + }, { + "body" : { + "name" : "Labels from documents with tf-idf and probability ratio scores", + "comment" : "Computes TF-IDF and probability ratio scores for a list of labels collected from a set of documents. This request is useful to get additional statistics for a list of labels.", + "stages" : { + "documents" : { + "type" : "documents:sample", + "query" : { + "type" : "query:string", + "query" : "photon" + }, + "limit" : 10000 + }, + "labels" : { + "type" : "labels:fromDocuments" + }, + "tfidf" : { + "type" : "labels:scored", + "scorer" : { + "type" : "labelScorer:composite", + "scorers" : [ { + "type" : "labelScorer:tf" + }, { + "type" : "labelScorer:idf" + } ] + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + }, + "probabilityRatios" : { + "type" : "labels:scored", + "scorer" : { + "type" : "labelScorer:probabilityRatio" + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + } + }, + "output" : { + "stages" : [ "labels", "tfidf", "probabilityRatios" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Labels from documents with tf-idf and probability ratio scores", + "description" : "Computes TF-IDF and probability ratio scores for a list of labels collected from a set of documents. This request is useful to get additional statistics for a list of labels." + }, { + "body" : { + "name" : "Label frequency estimates for a list of labels", + "comment" : "Estimates the occurrence frequencies for a list of labels based on a sample of all documents in the index. Computing frequencies based on the whole index may be costly. An estimate is much faster to compute while retaining reasonable quality.", + "stages" : { + "scope" : { + "type" : "documents:sample", + "limit" : 10000, + "query" : { + "type" : "query:all" + } + }, + "labels" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + }, { + "label" : "electron" + }, { + "label" : "proton" + } ] + }, + "df" : { + "type" : "labels:scored", + "scorer" : { + "type" : "labelScorer:df" + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + } + }, + "output" : { + "stages" : [ "labels", "df" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Label frequency estimates for a list of labels", + "description" : "Estimates the occurrence frequencies for a list of labels based on a sample of all documents in the index. Computing frequencies based on the whole index may be costly. An estimate is much faster to compute while retaining reasonable quality." + }, { + "body" : { + "name" : "Label frequencies across a time series", + "comment" : "Computes occurrence frequencies for a list of labels across a set of time ranges. This request may be useful to create a chart showing the popularity of selected labels over time.", + "components" : { + "rangeQuery0" : { + "type" : "query:string", + "query" : "created:[2014-01-01 TO 2014-12-31]" + }, + "rangeQuery1" : { + "type" : "query:string", + "query" : "created:[2016-01-01 TO 2016-12-31]" + }, + "rangeQuery2" : { + "type" : "query:string", + "query" : "created:[2018-01-01 TO 2018-12-31]" + } + }, + "variables" : { + "limit" : { + "value" : 10000 + } + }, + "stages" : { + "labels" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + }, { + "label" : "electron" + }, { + "label" : "proton" + } ] + }, + "df0" : { + "type" : "labels:scored", + "scorer" : { + "type" : "labelScorer:df", + "scope" : { + "type" : "documents:sample", + "limit" : { + "@var" : "limit" + }, + "query" : { + "type" : "query:reference", + "use" : "rangeQuery0" + } + } + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + }, + "df1" : { + "type" : "labels:scored", + "scorer" : { + "type" : "labelScorer:df", + "scope" : { + "type" : "documents:sample", + "limit" : { + "@var" : "limit" + }, + "query" : { + "type" : "query:reference", + "use" : "rangeQuery1" + } + } + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + }, + "df2" : { + "type" : "labels:scored", + "scorer" : { + "type" : "labelScorer:df", + "scope" : { + "type" : "documents:sample", + "limit" : { + "@var" : "limit" + }, + "query" : { + "type" : "query:reference", + "use" : "rangeQuery2" + } + } + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + } + }, + "output" : { + "stages" : [ "labels", "df0", "df1", "df2" ] + }, + "tags" : [ "Documents and Labels" ] + }, + "label" : "Label frequencies across a time series", + "description" : "Computes occurrence frequencies for a list of labels across a set of time ranges. This request may be useful to create a chart showing the popularity of selected labels over time." + }, { + "body" : { + "name" : "Document clusters by keyword similarity", + "comment" : "Clusters a set of documents based on the common labels the documents share. Attempts to describe the clusters by top-frequency labels from cluster's documents. Fetches the content of documents being clustered.", + "variables" : { + "documents" : { + "name" : "Documents to cluster", + "variables" : { + "query" : { + "name" : "Documents query", + "comment" : "Defines the set of documents to cluster.", + "value" : "clustering" + }, + "limit" : { + "name" : "Max documents", + "comment" : "The maximum number of documents matching the query to select for clustering.", + "value" : 1000 + } + } + }, + "clustering" : { + "name" : "Clustering", + "variables" : { + "clusterCreationPreference" : { + "name" : "Cluster creation preference", + "comment" : "How many clusters to create. The more negative the preference, the fewer clusters. The closer the preference to 0, the more clusters.", + "value" : -1000 + }, + "clusterLinkingPreference" : { + "name" : "Cluster linking preference", + "comment" : "How many links to create between clusters. Softening of 0 creates unlinked, flat structure of clusters. Softening of 1.0 creates a highly-linked structure of clusters.", + "value" : 0 + }, + "maxSimilarDocuments" : { + "name" : "Max similar documents", + "comment" : "How many similar documents to find for each document in the similarity matrix. The larger the number of similar documents, the larger and more general the clusters and the longer clustering time.", + "value" : 10 + }, + "maxClusterLabels" : { + "name" : "Max cluster labels", + "comment" : "How many labels to use to label each cluster.", + "value" : 3 + } + } + } + }, + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "query" + } + }, + "limit" : { + "@var" : "limit" + } + }, + "content" : { + "type" : "documentContent", + "limit" : { + "@var" : "limit" + } + }, + "clusters" : { + "type" : "clusters:ap", + "matrix" : { + "type" : "matrix:keywordDocumentSimilarity", + "maxNeighbors" : { + "@var" : "maxSimilarDocuments" + } + }, + "inputPreference" : { + "@var" : "clusterCreationPreference" + }, + "softening" : { + "@var" : "clusterLinkingPreference" + } + }, + "labelClusters" : { + "type" : "labelClusters:documentClusterLabels", + "maxLabels" : { + "@var" : "maxClusterLabels" + } + } + }, + "output" : { + "stages" : [ "documents", "content", "clusters", "labelClusters" ] + }, + "tags" : [ "Clustering" ] + }, + "label" : "Document clusters by keyword similarity", + "description" : "Clusters a set of documents based on the common labels the documents share. Attempts to describe the clusters by top-frequency labels from cluster's documents. Fetches the content of documents being clustered." + }, { + "body" : { + "name" : "Document clusters by embedding similarity", + "comment" : "Clusters a set of documents based on the document embedding vector similarity. Attempts to describe the clusters by top-frequency labels from cluster's documents. Fetches the content of documents being clustered.", + "variables" : { + "documents" : { + "name" : "Documents to cluster", + "variables" : { + "query" : { + "name" : "Documents query", + "comment" : "Defines the set of documents to cluster.", + "value" : "clustering" + }, + "limit" : { + "name" : "Max documents", + "comment" : "The maximum number of documents matching the query to select for clustering.", + "value" : 10000 + } + } + }, + "clustering" : { + "name" : "Clustering", + "variables" : { + "clusterCreationPreference" : { + "name" : "Cluster creation preference", + "comment" : "How many clusters to create. The more negative the preference, the fewer clusters. The closer the preference to 0, the more clusters.", + "value" : -1000 + }, + "clusterLinkingPreference" : { + "name" : "Cluster linking preference", + "comment" : "How many links to create between clusters. Softening of 0 creates unlinked, flat structure of clusters. Softening of 1.0 creates a highly-linked structure of clusters.", + "value" : 0 + }, + "maxSimilarDocuments" : { + "name" : "Max similar documents", + "comment" : "How many similar documents to find for each document in the similarity matrix. The larger the number of similar documents, the larger and more general the clusters and the longer clustering time.", + "value" : 10 + }, + "maxClusterLabels" : { + "name" : "Max cluster labels", + "comment" : "How many labels to use to label each cluster.", + "value" : 3 + } + } + } + }, + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "query" + } + }, + "limit" : { + "@var" : "limit" + } + }, + "content" : { + "type" : "documentContent", + "limit" : { + "@var" : "limit" + } + }, + "clusters" : { + "type" : "clusters:ap", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedDocumentEmbeddings" + }, + "maxNeighbors" : { + "@var" : "maxSimilarDocuments" + } + }, + "inputPreference" : { + "@var" : "clusterCreationPreference" + }, + "softening" : { + "@var" : "clusterLinkingPreference" + } + }, + "labelClusters" : { + "type" : "labelClusters:documentClusterLabels", + "maxLabels" : { + "@var" : "maxClusterLabels" + } + } + }, + "output" : { + "stages" : [ "documents", "content", "clusters", "labelClusters" ] + }, + "tags" : [ "Clustering" ] + }, + "label" : "Document clusters by embedding similarity", + "description" : "Clusters a set of documents based on the document embedding vector similarity. Attempts to describe the clusters by top-frequency labels from cluster's documents. Fetches the content of documents being clustered." + }, { + "body" : { + "name" : "Label clusters by embedding similarity", + "comment" : "Groups semantically similar labels into clusters based on the embedding vector similarity.", + "stages" : { + "queryLabel" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "similarLabels" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "use" : "queryLabel" + } + }, + "limit" : 100 + }, + "labelClusters" : { + "type" : "clusters:ap", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedLabelEmbeddings", + "labels" : { + "type" : "labels:reference", + "use" : "similarLabels" + } + } + } + } + }, + "output" : { + "stages" : [ "similarLabels", "labelClusters" ] + }, + "tags" : [ "Clustering" ] + }, + "label" : "Label clusters by embedding similarity", + "description" : "Groups semantically similar labels into clusters based on the embedding vector similarity." + }, { + "body" : { + "name" : "Label clusters by label co-occurrence similarity", + "comment" : "Groups labels into clusters based on how frequently they co-occur in documents in the index.", + "stages" : { + "queryLabel" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "similarLabels" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "use" : "queryLabel" + } + }, + "limit" : 100 + }, + "labelClusters" : { + "type" : "clusters:ap", + "matrix" : { + "type" : "matrix:cooccurrenceLabelSimilarity", + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:forLabels", + "labels" : { + "type" : "labels:reference", + "use" : "similarLabels" + } + } + }, + "labels" : { + "type" : "labels:reference", + "use" : "similarLabels" + } + } + } + }, + "output" : { + "stages" : [ "similarLabels", "labelClusters" ] + }, + "tags" : [ "Clustering" ] + }, + "label" : "Label clusters by label co-occurrence similarity", + "description" : "Groups labels into clusters based on how frequently they co-occur in documents in the index." + }, { + "body" : { + "name" : "Documents by document embedding vector", + "comment" : "Retrieves identifiers of documents that are most similar to the query document, based on multidimensional document embedding similarity. Use this request to find a list of documents that are semantically similar to one or more input documents.", + "stages" : { + "queryDocuments" : { + "type" : "documents:byId", + "documents" : [ { + "id" : 0 + } ] + }, + "similarDocuments" : { + "type" : "documents:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:documentEmbedding", + "documents" : { + "type" : "documents:reference", + "use" : "queryDocuments" + } + }, + "limit" : 10 + } + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "Documents by document embedding vector", + "description" : "Retrieves identifiers of documents that are most similar to the query document, based on multidimensional document embedding similarity. Use this request to find a list of documents that are semantically similar to one or more input documents." + }, { + "body" : { + "name" : "Documents by document embedding vector with contents", + "comment" : "Retrieves the content of documents that are most similar to the query document, based on multidimensional document embedding similarity. Use this request to find a list of documents that are semantically similar to one or more input documents.", + "stages" : { + "queryDocuments" : { + "type" : "documents:byId", + "documents" : [ { + "id" : 0 + } ] + }, + "similarDocuments" : { + "type" : "documents:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:documentEmbedding", + "documents" : { + "type" : "documents:reference", + "use" : "queryDocuments" + } + }, + "limit" : 10 + }, + "queryDocumentsContents" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "queryDocuments" + } + }, + "similarDocumentsContents" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "similarDocuments" + } + } + }, + "output" : { + "stages" : [ "queryDocumentsContents", "similarDocumentsContents", "queryDocuments", "similarDocuments" ] + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "Documents by document embedding vector with contents", + "description" : "Retrieves the content of documents that are most similar to the query document, based on multidimensional document embedding similarity. Use this request to find a list of documents that are semantically similar to one or more input documents." + }, { + "body" : { + "name" : "Documents by label embedding vector", + "comment" : "Retrieves identifiers of documents that are most similar to the provided labels, based on multidimensional embeddings of labels and documents. Use this request to find documents that are semantically similar to one or more labels. Compared to a traditional keyword search, the embedding similarity search may return documents that don't contain any of the input labels, but do contain a synonym of the input labels.", + "stages" : { + "queryLabels" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "documents" : { + "type" : "documents:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding" + }, + "limit" : 10 + } + }, + "output" : { + "stages" : [ "documents" ] + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "Documents by label embedding vector", + "description" : "Retrieves identifiers of documents that are most similar to the provided labels, based on multidimensional embeddings of labels and documents. Use this request to find documents that are semantically similar to one or more labels. Compared to a traditional keyword search, the embedding similarity search may return documents that don't contain any of the input labels, but do contain a synonym of the input labels." + }, { + "body" : { + "name" : "Documents by label embedding vector with content", + "comment" : "Retrieves content of documents that are most similar to the provided labels, based on multidimensional embeddings of labels and documents. Use this request to find documents that are semantically similar to one or more labels. Compared to a traditional keyword search, the embedding similarity search may return documents that don't contain any of the input labels, but do contain a synonym of the input labels.", + "stages" : { + "queryLabels" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "documents" : { + "type" : "documents:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding" + }, + "limit" : 10 + }, + "content" : { + "type" : "documentContent" + } + }, + "output" : { + "stages" : [ "content", "documents" ] + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "Documents by label embedding vector with content", + "description" : "Retrieves content of documents that are most similar to the provided labels, based on multidimensional embeddings of labels and documents. Use this request to find documents that are semantically similar to one or more labels. Compared to a traditional keyword search, the embedding similarity search may return documents that don't contain any of the input labels, but do contain a synonym of the input labels." + }, { + "body" : { + "name" : "More-Like-This document, keyword-based", + "comment" : "Finds documents that are similar to the provided 'seed' document. The keyword-based method extracts top-frequency labels from the seed document and retrieves other documents containing those labels.", + "variables" : { + "seedQuery" : { + "name" : "Seed query", + "comment" : "Defines the set of documents for which to find other similar documents. In most cases, the query should return one or a small number of documents. A by-document-id query is usually appropriate here.", + "value" : "id:1703.01028" + }, + "maxSeedDocuments" : { + "name" : "Max seed documents", + "comment" : "Maximum number of documents matching 'seedQuery' to use for the similarity search.", + "value" : 1 + }, + "maxSimilarDocuments" : { + "name" : "Max similar documents", + "comment" : "Maximum number of similar documents to find.", + "value" : 20 + } + }, + "stages" : { + "seedDocuments" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "seedQuery" + } + }, + "limit" : { + "@var" : "maxSeedDocuments" + } + }, + "seedLabels" : { + "type" : "labels:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + }, + "maxLabels" : { + "type" : "labelCount:progressive", + "multiplier" : 1000 + }, + "labelAggregator" : { + "type" : "labelAggregator:topWeight", + "tieResolution" : "EXTEND", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "tieResolution" : "EXTEND" + } + } + }, + "mltDocuments" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:forLabels", + "labels" : { + "type" : "labels:reference", + "use" : "seedLabels" + } + }, + "limit" : { + "@var" : "maxSimilarDocuments" + } + }, + "seedDocumentsContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + }, + "queries" : { + "s" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + } + }, + "k" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "mltDocuments" + } + } + } + }, + "mltDocumentContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "mltDocuments" + }, + "queries" : { + "s" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + } + }, + "k" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "mltDocuments" + } + } + }, + "limit" : { + "@var" : "maxSimilarDocuments" + } + } + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "More-Like-This document, keyword-based", + "description" : "Finds documents that are similar to the provided 'seed' document. The keyword-based method extracts top-frequency labels from the seed document and retrieves other documents containing those labels." + }, { + "body" : { + "name" : "More-Like-This document, keyword-based with label and results filtering", + "comment" : "Finds documents that are similar to the provided 'seed' document. Compared to the simpler 'More-Like-This document, keyword-based' request, this one adds the possibility to exclude some of the seed labels and also filter the MLT document list.", + "variables" : { + "seedQuery" : { + "name" : "Seed query", + "comment" : "Defines the set of documents for which to find other similar documents. In most cases, the query should return one or a small number of documents. A by-document-id query is usually appropriate here.", + "value" : "id:1703.01028" + }, + "maxSeedDocuments" : { + "name" : "Max seed documents", + "comment" : "Maximum number of documents matching 'seedQuery' to use for the similarity search.", + "value" : 1 + }, + "maxSimilarDocuments" : { + "name" : "Max similar documents", + "comment" : "Maximum number of similar documents to find.", + "value" : 20 + }, + "similarDocumentsFilterQuery" : { + "name" : "Similar documents filter query", + "comment" : "Additional condition all similar documents must meet. Use the query to narrow down the results to, for example, documents from a specific category.", + "value" : "set:math*" + }, + "seedLabelExclusionDictionaryEntries" : { + "name" : "Seed label exclusion dictionary entries", + "comment" : "Exclusion dictionary entries to apply to filter labels collected from seed documents. Use the entries to remove unwanted seed labels.", + "value" : [ "* paradox *" ] + } + }, + "stages" : { + "seedDocuments" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "seedQuery" + } + }, + "limit" : { + "@var" : "maxSeedDocuments" + } + }, + "seedLabels" : { + "type" : "labels:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + }, + "maxLabels" : { + "type" : "labelCount:progressive", + "multiplier" : 1000 + }, + "labelAggregator" : { + "type" : "labelAggregator:topWeight", + "tieResolution" : "EXTEND", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "tieResolution" : "EXTEND", + "labelFilter" : { + "type" : "labelFilter:composite", + "labelFilters" : { + "autoStopLabels" : { + "type" : "labelFilter:autoStopLabels" + }, + "dictionary" : { + "type" : "labelFilter:dictionary", + "exclude" : [ { + "type" : "dictionary:glob", + "entries" : { + "@var" : "seedLabelExclusionDictionaryEntries" + } + } ] + } + } + } + } + } + }, + "mltDocuments" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:filter", + "query" : { + "type" : "query:forLabels", + "labels" : { + "type" : "labels:reference", + "use" : "seedLabels" + } + }, + "filter" : { + "type" : "query:string", + "query" : { + "@var" : "similarDocumentsFilterQuery" + } + } + }, + "limit" : { + "@var" : "maxSimilarDocuments" + } + }, + "seedDocumentsContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + } + }, + "mltDocumentContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "mltDocuments" + }, + "limit" : { + "@var" : "maxSimilarDocuments" + } + } + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "More-Like-This document, keyword-based with label and results filtering", + "description" : "Finds documents that are similar to the provided 'seed' document. Compared to the simpler 'More-Like-This document, keyword-based' request, this one adds the possibility to exclude some of the seed labels and also filter the MLT document list." + }, { + "body" : { + "name" : "More-Like-This document, embedding-based", + "comment" : "Finds documents that are similar to the provided 'seed' document. The embedding-based method finds documents whose embedding vectors are similar to the vector of the seed document.", + "variables" : { + "seedQuery" : { + "name" : "Seed query", + "comment" : "Defines the set of documents for which to find other similar documents. In most cases, the query should return one or a small number of documents. A by-document-id query is usually appropriate here.", + "value" : "id:1703.01028" + }, + "maxSeedDocuments" : { + "name" : "Max seed documents", + "comment" : "Maximum number of documents matching 'seedQuery' to use for the similarity search.", + "value" : 1 + }, + "maxSimilarDocuments" : { + "name" : "Max similar documents", + "comment" : "Maximum number of similar documents to find.", + "value" : 20 + } + }, + "stages" : { + "seedDocuments" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "seedQuery" + } + }, + "limit" : { + "@var" : "maxSeedDocuments" + } + }, + "mltDocuments" : { + "type" : "documents:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:documentEmbedding", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + } + }, + "limit" : { + "@var" : "maxSimilarDocuments" + } + }, + "seedDocumentsContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + } + }, + "mltDocumentContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "mltDocuments" + }, + "limit" : { + "@var" : "maxSimilarDocuments" + } + } + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "More-Like-This document, embedding-based", + "description" : "Finds documents that are similar to the provided 'seed' document. The embedding-based method finds documents whose embedding vectors are similar to the vector of the seed document." + }, { + "body" : { + "name" : "More-Like-This document, keyword- and embedding-based with content, label and results filtering", + "comment" : "Finds documents that are similar to the provided 'seed' document. This request combines results from the keyword- and embedding-based methods. Additionally, it allows filtering and providing additional seed labels as well as filtering of MLT results.", + "variables" : { + "seeds" : { + "name" : "Search seed definition", + "comment" : "Defines the 'seed' to use for the similar documents search.", + "variables" : { + "seedQuery" : { + "name" : "Seed query", + "comment" : "Defines the set of documents for which to find other similar documents. In most cases, the query should return one or a small number of documents. A by-document-id query is usually appropriate here.", + "value" : "id:1703.01028" + }, + "maxSeedDocuments" : { + "name" : "Max seed documents", + "comment" : "Maximum number of documents matching 'seedQuery' to use for the similarity search.", + "value" : 1 + }, + "seedLabelExclusionDictionaryEntries" : { + "name" : "Seed label exclusion dictionary entries", + "comment" : "Exclusion dictionary entries to apply to filter labels collected from seed documents. Use the entries to remove unwanted seed labels.", + "value" : [ "* paradox *" ] + } + } + }, + "results" : { + "name" : "Similar documents", + "comment" : "Configures the similarity search results", + "variables" : { + "maxKeywordSimilarDocuments" : { + "name" : "Max keyword-similar documents", + "comment" : "Maximum number of similar documents to find using the keyword method.", + "value" : 10 + }, + "maxEmbeddingSimilarDocuments" : { + "name" : "Max embedding-similar documents", + "comment" : "Maximum number of similar documents to find using the embedding method.", + "value" : 10 + }, + "similarDocumentsFilterQuery" : { + "name" : "Similar documents filter query", + "comment" : "Additional condition all similar documents must meet. Use the query to narrow down the results to, for example, documents from a specific category.", + "value" : "set:math*" + }, + "compositionOperator" : { + "name" : "Composition operator", + "comment" : "Determines how to combine the keyword- and embedding-based similar documents. Use 'OR' to take a union of the two lists. Use 'AND' to include only the documents returned by both methods.", + "value" : "OR" + } + } + } + }, + "components" : { + "filterQuery" : { + "type" : "query:string", + "query" : { + "@var" : "similarDocumentsFilterQuery" + } + }, + "labelFilter" : { + "type" : "labelFilter:composite", + "labelFilters" : { + "dictionary" : { + "type" : "labelFilter:dictionary", + "exclude" : [ { + "type" : "dictionary:glob", + "entries" : { + "@var" : "seedLabelExclusionDictionaryEntries" + } + } ] + }, + "autoStopLabels" : { + "type" : "labelFilter:autoStopLabels" + } + } + } + }, + "stages" : { + "seedDocuments" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "seedQuery" + } + }, + "limit" : { + "@var" : "maxSeedDocuments" + } + }, + "seedLabels" : { + "type" : "labels:composite", + "sources" : [ { + "type" : "labels:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + }, + "maxLabels" : { + "type" : "labelCount:progressive", + "multiplier" : 1000 + }, + "labelAggregator" : { + "type" : "labelAggregator:topWeight", + "tieResolution" : "EXTEND", + "labelCollector" : { + "type" : "labelCollector:topFromFeatureFields", + "tieResolution" : "EXTEND", + "labelFilter" : { + "type" : "labelFilter:reference", + "use" : "labelFilter" + } + } + } + }, { + "type" : "labels:direct", + "labels" : [ { + "label" : "observable" + } ] + } ] + }, + "keywordMlt" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:filter", + "query" : { + "type" : "query:forLabels", + "labels" : { + "type" : "labels:reference", + "use" : "seedLabels" + } + }, + "filter" : { + "type" : "query:reference", + "use" : "filterQuery" + } + }, + "limit" : { + "@var" : "maxKeywordSimilarDocuments" + } + }, + "embeddingMlt" : { + "type" : "documents:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:composite", + "vectors" : [ { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "use" : "seedLabels" + } + }, { + "type" : "vector:documentEmbedding", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + } + } ] + }, + "filterQuery" : { + "type" : "query:reference", + "use" : "filterQuery" + }, + "limit" : { + "@var" : "maxEmbeddingSimilarDocuments" + } + }, + "combinedRwmd" : { + "type" : "documents:composite", + "selectors" : [ { + "type" : "documents:reference", + "use" : "keywordMlt" + }, { + "type" : "documents:reference", + "use" : "embeddingMlt" + } ], + "operator" : { + "@var" : "compositionOperator" + }, + "weightAggregation" : "SUM" + }, + "seedContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "seedDocuments" + }, + "queries" : { + "k" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "keywordMlt" + } + }, + "e" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "mlt" + } + } + } + }, + "keywordMltContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "keywordMlt" + }, + "queries" : { + "k" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "keywordMlt" + } + } + }, + "limit" : { + "@var" : "maxKeywordSimilarDocuments" + } + }, + "embeddingMltContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "embeddingMlt" + }, + "queries" : { + "e" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "mlt" + } + } + }, + "limit" : { + "@var" : "maxEmbeddingSimilarDocuments" + } + }, + "mlt" : { + "type" : "documents:rwmd", + "documents" : { + "type" : "documents:reference", + "use" : "combinedRwmd" + }, + "labels" : { + "type" : "labels:reference", + "use" : "seedLabels" + } + }, + "mltContent" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:reference", + "use" : "mlt" + }, + "queries" : { + "k" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "keywordMlt" + } + }, + "e" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "combinedRwmd" + } + } + }, + "limit" : "unlimited" + } + }, + "output" : { + "stages" : [ "embeddingMlt", "embeddingMltContent", "keywordMlt", "keywordMltContent", "seedDocuments", "seedContent", "seedLabels", "mlt", "mltContent" ] + }, + "tags" : [ "Similar Documents (MLT)" ] + }, + "label" : "More-Like-This document, keyword- and embedding-based with content, label and results filtering", + "description" : "Finds documents that are similar to the provided 'seed' document. This request combines results from the keyword- and embedding-based methods. Additionally, it allows filtering and providing additional seed labels as well as filtering of MLT results." + }, { + "body" : { + "name" : "Document 2d embedding by keyword similarity", + "comment" : "Puts documents on a 2d map based on the common labels they share. Documents containing similar labels will be close to each other on the map, while documents not sharing any labels will be far apart on the map.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 1000 + }, + "documents2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:keywordDocumentSimilarity" + } + } + }, + "output" : { + "stages" : [ "documents", "documents2dEmbedding" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Document 2d embedding by keyword similarity", + "description" : "Puts documents on a 2d map based on the common labels they share. Documents containing similar labels will be close to each other on the map, while documents not sharing any labels will be far apart on the map." + }, { + "body" : { + "name" : "Document 2d embedding by document embedding similarity", + "comment" : "Puts documents on a 2d map based on the embedding vector similarity. Similar documents will be close to each other on the map, while dissimilar documents should be far apart on the map.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 1000 + }, + "documents2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedDocumentEmbeddings" + } + } + } + }, + "output" : { + "stages" : [ "documents", "documents2dEmbedding" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Document 2d embedding by document embedding similarity", + "description" : "Puts documents on a 2d map based on the embedding vector similarity. Similar documents will be close to each other on the map, while dissimilar documents should be far apart on the map." + }, { + "body" : { + "name" : "Document 2d embedding with labels by keyword similarity", + "comment" : "Puts documents on a 2d map based on the common labels they share. Additionally, the request computes 2d coordinates of labels to describe specific areas of the map.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 1000 + }, + "labels" : { + "type" : "labels:fromDocuments" + }, + "documents2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:keywordDocumentSimilarity" + } + }, + "documents2dEmbeddingLabels" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "documents2dEmbedding" + } + } + }, + "output" : { + "stages" : [ "documents", "labels", "documents2dEmbedding", "documents2dEmbeddingLabels" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Document 2d embedding with labels by keyword similarity", + "description" : "Puts documents on a 2d map based on the common labels they share. Additionally, the request computes 2d coordinates of labels to describe specific areas of the map." + }, { + "body" : { + "name" : "Document 2d embedding with labels by document embedding similarity", + "comment" : "Puts documents on a 2d map based on the embedding vector similarity. Additionally, the request computes 2d coordinates of labels to describe specific areas of the map.", + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : "" + }, + "limit" : 1000 + }, + "labels" : { + "type" : "labels:fromDocuments" + }, + "documents2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedDocumentEmbeddings" + } + } + }, + "documents2dEmbeddingLabels" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "documents2dEmbedding" + } + } + }, + "output" : { + "stages" : [ "documents", "labels", "documents2dEmbedding", "documents2dEmbeddingLabels" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Document 2d embedding with labels by document embedding similarity", + "description" : "Puts documents on a 2d map based on the embedding vector similarity. Additionally, the request computes 2d coordinates of labels to describe specific areas of the map." + }, { + "body" : { + "name" : "Document 2d embedding with labels and clusters by keyword similarity", + "comment" : "Puts documents on a 2d map based on the keyword document similarity. Additionally, computes 2d coordinates of labels to describe specific areas of the map and clusters documents based on their 2d embedding distance.", + "variables" : { + "query" : { + "name" : "Query", + "comment" : "Selects documents to arrange into a 2d map.", + "value" : "\"dark matter\"" + }, + "maxDocuments" : { + "name" : "Max documents", + "comment" : "Maximum number of documents to include in the 2d map.", + "value" : 5000 + }, + "maxLabels" : { + "name" : "Max labels", + "comment" : "Maximum number of labels to put on the 2d map.", + "value" : 200 + } + }, + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "query" + } + }, + "limit" : { + "@var" : "maxDocuments" + } + }, + "labels" : { + "type" : "labels:fromDocuments", + "maxLabels" : { + "type" : "labelCount:fixed", + "value" : { + "@var" : "maxLabels" + } + } + }, + "documents2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:keywordDocumentSimilarity" + } + }, + "documents2dEmbeddingLabels" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "documents2dEmbedding" + } + }, + "docClusters" : { + "type" : "clusters:ap", + "matrix" : { + "type" : "matrix:knn2dDistanceSimilarity", + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "documents2dEmbedding" + }, + "maxNearestPoints" : 32 + }, + "softening" : 0.2, + "inputPreference" : -10000 + } + }, + "output" : { + "stages" : [ "documents", "labels", "documents2dEmbedding", "documents2dEmbeddingLabels", "docClusters" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Document 2d embedding with labels and clusters by keyword similarity", + "description" : "Puts documents on a 2d map based on the keyword document similarity. Additionally, computes 2d coordinates of labels to describe specific areas of the map and clusters documents based on their 2d embedding distance." + }, { + "body" : { + "name" : "Document 2d embedding with labels and clusters by embedding similarity", + "comment" : "Puts documents on a 2d map based on the document embedding similarity. Additionally, computes 2d coordinates of labels to describe specific areas of the map and clusters documents based on their 2d embedding distance.", + "variables" : { + "query" : { + "name" : "Query", + "comment" : "Selects documents to arrange into a 2d map.", + "value" : "\"dark matter\"" + }, + "maxDocuments" : { + "name" : "Max documents", + "comment" : "Maximum number of documents to include in the 2d map.", + "value" : 5000 + }, + "maxLabels" : { + "name" : "Max labels", + "comment" : "Maximum number of labels to put on the 2d map.", + "value" : 200 + } + }, + "stages" : { + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "query" + } + }, + "limit" : { + "@var" : "maxDocuments" + } + }, + "labels" : { + "type" : "labels:fromDocuments", + "maxLabels" : { + "type" : "labelCount:fixed", + "value" : { + "@var" : "maxLabels" + } + } + }, + "documents2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedDocumentEmbeddings" + } + } + }, + "documents2dEmbeddingLabels" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "labels" : { + "type" : "labels:reference", + "use" : "labels" + } + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "documents2dEmbedding" + } + }, + "docClusters" : { + "type" : "clusters:ap", + "matrix" : { + "type" : "matrix:knn2dDistanceSimilarity", + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "documents2dEmbedding" + }, + "maxNearestPoints" : 32 + }, + "softening" : 0.2, + "inputPreference" : -10000 + } + }, + "output" : { + "stages" : [ "documents", "labels", "documents2dEmbedding", "documents2dEmbeddingLabels", "docClusters" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Document 2d embedding with labels and clusters by embedding similarity", + "description" : "Puts documents on a 2d map based on the document embedding similarity. Additionally, computes 2d coordinates of labels to describe specific areas of the map and clusters documents based on their 2d embedding distance." + }, { + "body" : { + "name" : "Label 2d embedding by embedding similarity", + "comment" : "Puts labels on a 2d map based on their embedding vector similarity. Semantically similar labels will be close to each other on the map, dissimilar labels should be far apart on the map.", + "stages" : { + "queryLabel" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "similarLabels" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "use" : "queryLabel" + } + }, + "limit" : 100 + }, + "label2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedLabelEmbeddings", + "labels" : { + "type" : "labels:reference", + "use" : "similarLabels" + } + } + } + } + }, + "output" : { + "stages" : [ "label2dEmbedding", "similarLabels" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Label 2d embedding by embedding similarity", + "description" : "Puts labels on a 2d map based on their embedding vector similarity. Semantically similar labels will be close to each other on the map, dissimilar labels should be far apart on the map." + }, { + "body" : { + "name" : "Label 2d embedding by label co-occurrence similarity", + "comment" : "Puts labels on a 2d map based on how frequently they co-occur in the documents. Frequently co-occurring labels will be close to each other on the map.", + "stages" : { + "queryLabel" : { + "type" : "labels:direct", + "labels" : [ { + "label" : "photon" + } ] + }, + "similarLabels" : { + "type" : "labels:embeddingNearestNeighbors", + "vector" : { + "type" : "vector:labelEmbedding", + "labels" : { + "type" : "labels:reference", + "use" : "queryLabel" + } + }, + "limit" : 100 + }, + "label2dEmbedding" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:cooccurrenceLabelSimilarity", + "documents" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:forLabels", + "labels" : { + "type" : "labels:reference", + "use" : "similarLabels" + } + } + }, + "labels" : { + "type" : "labels:reference", + "use" : "similarLabels" + } + } + } + }, + "output" : { + "stages" : [ "label2dEmbedding", "similarLabels" ] + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Label 2d embedding by label co-occurrence similarity", + "description" : "Puts labels on a 2d map based on how frequently they co-occur in the documents. Frequently co-occurring labels will be close to each other on the map." + }, { + "body" : { + "name" : "Document embedding with labels time series", + "comment" : "Computes a series of 2d maps for documents from overlapping periods of time. You can use this request to show how the thematic structure of documents evolves over time.", + "components" : { + "baseQuery" : { + "type" : "query:string", + "query" : "photon" + }, + "rangeQuery0" : { + "type" : "query:string", + "query" : "created:[2015-01-01 TO 2017-01-01]" + }, + "rangeQuery1" : { + "type" : "query:string", + "query" : "created:[2016-01-01 TO 2018-01-01]" + }, + "rangeQuery2" : { + "type" : "query:string", + "query" : "created:[2017-01-01 TO 2019-01-01]" + }, + "baseAndRangesQuery" : { + "type" : "query:composite", + "operator" : "AND", + "queries" : [ { + "type" : "query:reference", + "use" : "baseQuery" + }, { + "type" : "query:composite", + "operator" : "OR", + "queries" : [ { + "type" : "query:reference", + "use" : "rangeQuery0" + }, { + "type" : "query:reference", + "use" : "rangeQuery1" + }, { + "type" : "query:reference", + "use" : "rangeQuery2" + } ] + } ] + } + }, + "stages" : { + "sample" : { + "type" : "documents:sample", + "limit" : 10000, + "query" : { + "type" : "query:reference", + "use" : "baseAndRangesQuery" + } + }, + "documents0" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:filter", + "query" : { + "type" : "query:reference", + "use" : "rangeQuery0" + }, + "filter" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "sample" + }, + "buildFromDocumentIds" : true + } + } + }, + "documents1" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:filter", + "query" : { + "type" : "query:reference", + "use" : "rangeQuery1" + }, + "filter" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "sample" + }, + "buildFromDocumentIds" : true + } + } + }, + "documents2" : { + "type" : "documents:byQuery", + "query" : { + "type" : "query:filter", + "query" : { + "type" : "query:reference", + "use" : "rangeQuery2" + }, + "filter" : { + "type" : "query:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "sample" + }, + "buildFromDocumentIds" : true + } + } + }, + "labels0" : { + "type" : "labels:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "documents0" + } + }, + "labels1" : { + "type" : "labels:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "documents1" + } + }, + "labels2" : { + "type" : "labels:fromDocuments", + "documents" : { + "type" : "documents:reference", + "use" : "documents2" + } + }, + "docEmbedding0" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedDocumentEmbeddings", + "documents" : { + "type" : "documents:reference", + "use" : "documents0" + } + } + } + }, + "labelEmbedding0" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "documents" : { + "type" : "documents:reference", + "use" : "documents0" + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels0" + } + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "docEmbedding0" + } + }, + "docEmbedding1" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedDocumentEmbeddings", + "documents" : { + "type" : "documents:reference", + "use" : "documents1" + } + } + }, + "initial" : { + "type" : "embedding2d:transferred", + "source" : { + "type" : "documents:reference", + "use" : "documents0" + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "docEmbedding0" + }, + "target" : { + "type" : "documents:reference", + "use" : "documents1" + } + } + }, + "labelEmbedding1" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "documents" : { + "type" : "documents:reference", + "use" : "documents1" + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels1" + } + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "docEmbedding1" + }, + "initial" : { + "type" : "embedding2d:transferred", + "source" : { + "type" : "labels:reference", + "use" : "labels0" + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "labelEmbedding0" + }, + "target" : { + "type" : "labels:reference", + "use" : "labels1" + } + } + }, + "docEmbedding2" : { + "type" : "embedding2d:lv", + "matrix" : { + "type" : "matrix:knnVectorsSimilarity", + "vectors" : { + "type" : "vectors:precomputedDocumentEmbeddings", + "documents" : { + "type" : "documents:reference", + "use" : "documents2" + } + } + }, + "initial" : { + "type" : "embedding2d:transferred", + "source" : { + "type" : "documents:reference", + "use" : "documents1" + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "docEmbedding1" + }, + "target" : { + "type" : "documents:reference", + "use" : "documents2" + } + } + }, + "labelEmbedding2" : { + "type" : "embedding2d:lvOverlay", + "matrix" : { + "type" : "matrix:keywordLabelDocumentSimilarity", + "documents" : { + "type" : "documents:reference", + "use" : "documents2" + }, + "labels" : { + "type" : "labels:reference", + "use" : "labels2" + } + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "docEmbedding2" + }, + "initial" : { + "type" : "embedding2d:transferred", + "source" : { + "type" : "labels:reference", + "use" : "labels1" + }, + "embedding2d" : { + "type" : "embedding2d:reference", + "use" : "labelEmbedding1" + }, + "target" : { + "type" : "labels:reference", + "use" : "labels2" + } + } + } + }, + "tags" : [ "2D Embeddings" ] + }, + "label" : "Document embedding with labels time series", + "description" : "Computes a series of 2d maps for documents from overlapping periods of time. You can use this request to show how the thematic structure of documents evolves over time." + }, { + "body" : { + "name" : "Pairs of similar but not identical documents", + "comment" : "Finds pairs of documents with similar but not identical content in the 'abstract' field. You can use requests of this type to identify potentially plagiarized content, where large parts of text are identical or nearly identical.", + "variables" : { + "scopeQuery" : { + "name" : "Duplicate search scope query", + "comment" : "Determines the set of documents to search for duplicated content.", + "value" : "*:*" + }, + "scopeMaxDocuments" : { + "name" : "Duplicate search scope size", + "comment" : "Determines how many of the in-scope documents to search for duplicated content.", + "value" : 1000000 + }, + "fieldsToCompare" : { + "name" : "Fields to compare", + "comment" : "Fields to check for duplicated content.", + "value" : [ "abstract" ] + } + }, + "components" : { + "fields" : { + "type" : "fields:simple", + "fields" : { + "@var" : "fieldsToCompare" + } + } + }, + "stages" : { + "duplicates" : { + "type" : "documentPairs:duplicates", + "query" : { + "type" : "query:string", + "query" : { + "@var" : "scopeQuery" + } + }, + "hashGrouping" : { + "pairing" : { + "maxHashBitsDifferent" : 3, + "maxHashGroupSize" : 500 + }, + "features" : { + "type" : "featureSource:simhash", + "source" : { + "type" : "featureSource:flatten", + "source" : { + "type" : "featureSource:words" + } + } + } + }, + "validation" : { + "pairwiseSimilarity" : { + "type" : "pairwiseSimilarity:featureIntersectionMinRatio", + "features" : { + "type" : "featureSource:count", + "minFeatureCount" : 20, + "source" : { + "type" : "featureSource:flatten", + "source" : { + "type" : "featureSource:words" + } + } + } + }, + "min" : 0.8, + "max" : 0.99 + }, + "output" : { + "explanations" : true, + "limit" : 10 + } + }, + "content" : { + "type" : "documentContent", + "documents" : { + "type" : "documents:fromDocumentPairs" + }, + "fields" : { + "type" : "contentFields:grouped", + "groups" : [ { + "fields" : { + "@var" : "fieldsToCompare" + }, + "config" : { + "maxValues" : 1, + "maxValueLength" : 160 + } + } ] + } + } + }, + "tags" : [ "Duplicate Detection" ] + }, + "label" : "Pairs of similar but not identical documents", + "description" : "Finds pairs of documents with similar but not identical content in the 'abstract' field. You can use requests of this type to identify potentially plagiarized content, where large parts of text are identical or nearly identical." + } ] +} \ No newline at end of file