Use k-Nearest Neighbor (k-NN) Search in Amazon OpenSearch Service - 2/N - the YouTube Video (OpenSearch and Bulk Vector Search)

2024年08月10日


This article serves as a study note of the follow YouTube on OpenSearch and Bulk Vector Search.



[02:52]
Introducing the Multi-Search API

GET /_msearch
{"index": "my-index-00001"}
{"query": {"match": {"message": "this is a test"}}}
{"index": "my-index-00002"}
{"query": {"match_all": {}}}

[03:27]
Bulk KNN Search using Multi-Search API
GET _msearch
{ "index": "my-index2" }
{ "query": { "knn": { "my_vector6": { "vector": [2, 3, 5, 6], "k":2 } } } }
{ "index": "my-index2" }
{ "query": { "knn": { "my_vector5": { "vector": [2, 3], "k":2 } } } }

k-NN enables users to find the k-nearest neighbors to a query point in a vector index.
Customization options include the vector field, query vector and number of nearest neighbors.
The API response includes results from each KNN queries.

[04:09] Demo
Create an index with properties named as vector5 and vector6, forming the foundation for vector search.
PUT my-index2
{
  "settings": {
    "index.knn": true
  },
  "mappings": {
    "properties": {
      "my_vector5": {
        "type": "knn_vector",
        "dimension": 2
      },
      "my_vector6": {
        "type": "knn_vector",
        "dimension": 4
      }
    }
  }
}
Output:
{
  "acknowledged": true,
  "shards_acknowledged": true,
  "index": "my-index2"
}

Put some data into OpenSearch.
POST _bulk
{ "index": { "_index": "my-index2", "_id": "1" } }
{ "my_vector5": [1.5, 2.5], "price": 12.2 }
{ "index": { "_index": "my-index2", "_id": "2" } }
{ "my_vector5": [2.5, 3.5], "price": 7.1 }
{ "index": { "_index": "my-index2", "_id": "3" } }
{ "my_vector5": [3.5, 4.5], "price": 12.9 }
{ "index": {"_index": "my-index2", "_id": "4" } }
{ "my_vector5": [5.5, 6.5], "price": 1.2 }
{ "index": { "_index": "my-index2", "_id": "5" } }
{ "my_vector5": [4.5, 5.5], "price": 3.7 }
{ "index": { "_index": "my-index2", "_id":"6" } }
{ "my_vector6": [1.5, 5.5, 4.5, 6.4], "price": 10.3 }
{ "index": { "_index": "my-index2", "_id": "7" } }
{ "my_vector6": [2.5, 3.5, 5.6, 6.7], "price": 5.5 }
{ "index": { "_index": "my-index2", "_id": "8" } }
{ "my_vector6": [4.5, 5.5, 6.7, 3.7], "price": 4.4 }
{ "index": {"_index": "my-index2", "_id":"9" } }
{ "my_vector6": [1.5, 5.5, 4.5, 6.4], "price": 8.9 }
Output:
{
  "took": 3788,
  "errors": false,
  "items": [
    {
      "index": {
        "_index": "my-index2",
        "_id": "1",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "2",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "3",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "4",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "5",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 1,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "6",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 1,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "7",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "8",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 1,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "my-index2",
        "_id": "9",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 2,
        "_primary_term": 1,
        "status": 201
      }
    }
  ]
}

After ingesting of sample data using price as vector, we can perform a search to find the price range associated with a specific vector value, [2, 3, 5, 6].
GET my-index2/_search
{
  "size": 2,
  "query": {
    "knn": {
      "my_vector6": {
        "vector": [2, 3, 5, 6],
        "k": 2
      }
    }
  }
}
Output:
{
  "took": 4173,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 4,
      "relation": "eq"
    },
    "max_score": 0.42553198,
    "hits": [
      {
        "_index": "my-index2",
        "_id": "7",
        "_score": 0.42553198,
        "_source": {
          "my_vector6": [
            2.5,
            3.5,
            5.6,
            6.7
          ],
          "price": 5.5
        }
      },
      {
        "_index": "my-index2",
        "_id": "9",
        "_score": 0.12642226,
        "_source": {
          "my_vector6": [
            1.5,
            5.5,
            4.5,
            6.4
          ],
          "price": 8.9
        }
      }
    ]
  }
}
You can notice it is resulting in a range of the prices 5.5 and 8.9.

Next, let's explore the potential of multiple queries in a single search operation.
OpenSearch msearch feature enables simulteneously execution of multiple queries, by specifying the index name and search parameters for each query.
GET _msearch
{ "index": "my-index2" }
{ "query": { "knn": { "my_vector6": { "vector": [2, 3, 5, 6], "k": 2 } } } }
{ "index": "my-index2", "search_type": "dfs_query_then_fetch" }
{ "query": { "knn": { "my_vector5": { "vector": [2, 3], "k": 2 } } } }
Output:
{
  "took": 951,
  "responses": [
    {
      "took": 185,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": {
          "value": 4,
          "relation": "eq"
        },
        "max_score": 0.42553198,
        "hits": [
          {
            "_index": "my-index2",
            "_id": "7",
            "_score": 0.42553198,
            "_source": {
              "my_vector6": [
                2.5,
                3.5,
                5.6,
                6.7
              ],
              "price": 5.5
            }
          },
          {
            "_index": "my-index2",
            "_id": "9",
            "_score": 0.12642226,
            "_source": {
              "my_vector6": [
                1.5,
                5.5,
                4.5,
                6.4
              ],
              "price": 8.9
            }
          },
          {
            "_index": "my-index2",
            "_id": "6",
            "_score": 0.12642226,
            "_source": {
              "my_vector6": [
                1.5,
                5.5,
                4.5,
                6.4
              ],
              "price": 10.3
            }
          },
          {
            "_index": "my-index2",
            "_id": "8",
            "_score": 0.04612546,
            "_source": {
              "my_vector6": [
                4.5,
                5.5,
                6.7,
                3.7
              ],
              "price": 4.4
            }
          }
        ]
      },
      "status": 200
    },
    {
      "took": 588,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": {
          "value": 5,
          "relation": "eq"
        },
        "max_score": 0.6666667,
        "hits": [
          {
            "_index": "my-index2",
            "_id": "2",
            "_score": 0.6666667,
            "_source": {
              "my_vector5": [
                2.5,
                3.5
              ],
              "price": 7.1
            }
          },
          {
            "_index": "my-index2",
            "_id": "1",
            "_score": 0.6666667,
            "_source": {
              "my_vector5": [
                1.5,
                2.5
              ],
              "price": 12.2
            }
          },
          {
            "_index": "my-index2",
            "_id": "3",
            "_score": 0.18181819,
            "_source": {
              "my_vector5": [
                3.5,
                4.5
              ],
              "price": 12.9
            }
          },
          {
            "_index": "my-index2",
            "_id": "5",
            "_score": 0.074074075,
            "_source": {
              "my_vector5": [
                4.5,
                5.5
              ],
              "price": 3.7
            }
          },
          {
            "_index": "my-index2",
            "_id": "4",
            "_score": 0.039215688,
            "_source": {
              "my_vector5": [
                5.5,
                6.5
              ],
              "price": 1.2
            }
          }
        ]
      },
      "status": 200
    }
  ]
}
It retrieves the prices associated with each query.

Results for the first query:
...
        "hits": [
          {
            "_index": "my-index2",
            "_id": "7",
            "_score": 0.42553198,
            "_source": {
              "my_vector6": [
                2.5,
                3.5,
                5.6,
                6.7
              ],
              "price": 5.5
            }
          },
          {
            "_index": "my-index2",
            "_id": "9",
            "_score": 0.12642226,
            "_source": {
              "my_vector6": [
                1.5,
                5.5,
                4.5,
                6.4
              ],
              "price": 8.9
            }
          },
          {
            "_index": "my-index2",
            "_id": "6",
            "_score": 0.12642226,
            "_source": {
              "my_vector6": [
                1.5,
                5.5,
                4.5,
                6.4
              ],
              "price": 10.3
            }
          },
          {
            "_index": "my-index2",
            "_id": "8",
            "_score": 0.04612546,
            "_source": {
              "my_vector6": [
                4.5,
                5.5,
                6.7,
                3.7
              ],
              "price": 4.4
            }
          }
        ]
...
Results for the other query:
...
      "hits": {
        "total": {
          "value": 5,
          "relation": "eq"
        },
        "max_score": 0.6666667,
        "hits": [
          {
            "_index": "my-index2",
            "_id": "2",
            "_score": 0.6666667,
            "_source": {
              "my_vector5": [
                2.5,
                3.5
              ],
              "price": 7.1
            }
          },
          {
            "_index": "my-index2",
            "_id": "1",
            "_score": 0.6666667,
            "_source": {
              "my_vector5": [
                1.5,
                2.5
              ],
              "price": 12.2
            }
          },
          {
            "_index": "my-index2",
            "_id": "3",
            "_score": 0.18181819,
            "_source": {
              "my_vector5": [
                3.5,
                4.5
              ],
              "price": 12.9
            }
          },
          {
            "_index": "my-index2",
            "_id": "5",
            "_score": 0.074074075,
            "_source": {
              "my_vector5": [
                4.5,
                5.5
              ],
              "price": 3.7
            }
          },
          {
            "_index": "my-index2",
            "_id": "4",
            "_score": 0.039215688,
            "_source": {
              "my_vector5": [
                5.5,
                6.5
              ],
              "price": 1.2
            }
          }
        ]
...


Category: data Tags: public

Upvote


Downvote