Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Inner hit of multi vector with filter #1708

Closed
heemin32 opened this issue May 20, 2024 · 1 comment
Closed

[FEATURE] Inner hit of multi vector with filter #1708

heemin32 opened this issue May 20, 2024 · 1 comment

Comments

@heemin32
Copy link
Collaborator

heemin32 commented May 20, 2024

Similar to #1447, I want to see inner hit of multi vector with filter.

Create KNN index

PUT /my-knn-index-1
{
  "settings": {
    "index": {
      "knn": true,
      "knn.algo_param.ef_search": 100
    }
  },
  "mappings": {
    "properties": {
      "nested_field": {
        "type": "nested",
        "properties": {
          "my_vector": {
            "type": "knn_vector",
            "dimension": 3,
            "method": {
              "name": "hnsw",
              "space_type": "l2",
              "engine": "lucene",
              "parameters": {
                "ef_construction": 100,
                "m": 16
              }
            }
          }
        }
      }
    }
  }
}

Insert Data

PUT /_bulk?refresh=true
{ "index": { "_index": "my-knn-index-1", "_id": "1" } }
{"parking": false, "nested_field":[{"my_vector":[1,1,1], "color": "yellow"},{"my_vector":[2,2,2]},{"my_vector":[3,3,3]}]}
{ "index": { "_index": "my-knn-index-1", "_id": "2" } }
{"parking": true, "nested_field":[{"my_vector":[10,10,10], "color": "blue"},{"my_vector":[20,20,20], "color": "blue"},{"my_vector":[30,30,30]}]}
{ "index": { "_index": "my-knn-index-1", "_id": "3" } }
{"parking": true, "nested_field":[{"my_vector":[100,100,100], "color": "blue"},{"my_vector":[200,200,200]},{"my_vector":[300,300,300]}]}

Query

GET /my-knn-index-1/_search
{
  "query": {
    "nested": {
      "path": "nested_field",
      "query": {
        "knn": {
          "nested_field.my_vector": {
            "vector": [
              10,
              11,
              12
            ],
            "k": 1,
            "filter": {
              "term": {
                "parking": true
              }
            }
          }
        }
      },
      "inner_hits": {
        "name": "nested_hits"
      }
    }
  }
}

Response

It return empty result for inner hit. The filter is getting applied to nested field item for inner hit even though it is for parent doc. Therefore, every nested items are filtered out.

{
  "took": 20,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 0.16666667,
    "hits": [
      {
        "_index": "my-knn-index-1",
        "_id": "2",
        "_score": 0.16666667,
        "_source": {
          "parking": true,
          "nested_field": [
            {
              "my_vector": [
                10,
                10,
                10
              ],
              "color": "blue"
            },
            {
              "my_vector": [
                20,
                20,
                20
              ],
              "color": "blue"
            },
            {
              "my_vector": [
                30,
                30,
                30
              ]
            }
          ]
        },
        "inner_hits": {
          "nested_hits": {
            "hits": {
              "total": {
                "value": 0,
                "relation": "eq"
              },
              "max_score": null,
              "hits": []
            }
          }
        }
      }
    ]
  }
}

Query with filter on nested item

When filter is applied to nested item attribute only, it returns non-empty result for inner hit. The result is not correct due to #1447 though.

{
  "query": {
    "nested": {
      "path": "nested_field",
      "query": {
        "knn": {
          "nested_field.my_vector": {
            "vector": [
              10,
              11,
              12
            ],
            "k": 2,
            "filter": {
              "term": {
                "nested_field.color": "blue"
              }
            }
          }
        }
      },
      "inner_hits": {}
    }
  }
}

Response

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 0.16666667,
    "hits": [
      {
        "_index": "my-knn-index-1",
        "_id": "2",
        "_score": 0.16666667,
        "_source": {
          "parking": true,
          "nested_field": [
            {
              "my_vector": [
                10,
                10,
                10
              ],
              "color": "blue"
            },
            {
              "my_vector": [
                20,
                20,
                20
              ],
              "color": "blue"
            },
            {
              "my_vector": [
                30,
                30,
                30
              ]
            }
          ]
        },
        "inner_hits": {
          "nested_field": {
            "hits": {
              "total": {
                "value": 2,
                "relation": "eq"
              },
              "max_score": 0.16666667,
              "hits": [
                {
                  "_index": "my-knn-index-1",
                  "_id": "2",
                  "_nested": {
                    "field": "nested_field",
                    "offset": 0
                  },
                  "_score": 0.16666667,
                  "_source": {
                    "color": "blue",
                    "my_vector": [
                      10,
                      10,
                      10
                    ]
                  }
                },
                {
                  "_index": "my-knn-index-1",
                  "_id": "2",
                  "_nested": {
                    "field": "nested_field",
                    "offset": 1
                  },
                  "_score": 0.0040650405,
                  "_source": {
                    "color": "blue",
                    "my_vector": [
                      20,
                      20,
                      20
                    ]
                  }
                }
              ]
            }
          }
        }
      },
      {
        "_index": "my-knn-index-1",
        "_id": "3",
        "_score": 4.2076917E-5,
        "_source": {
          "parking": true,
          "nested_field": [
            {
              "my_vector": [
                100,
                100,
                100
              ],
              "color": "blue"
            },
            {
              "my_vector": [
                200,
                200,
                200
              ]
            },
            {
              "my_vector": [
                300,
                300,
                300
              ]
            }
          ]
        },
        "inner_hits": {
          "nested_field": {
            "hits": {
              "total": {
                "value": 0,
                "relation": "eq"
              },
              "max_score": null,
              "hits": []
            }
          }
        }
      }
    ]
  }
}
@heemin32
Copy link
Collaborator Author

Closing in favor of opensearch-project/OpenSearch#13903

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants