From 3885b5c25178d2a88fc3b953d572b518ef0d1da6 Mon Sep 17 00:00:00 2001
From: Jimmy Lin <jimmylin@uwaterloo.ca>
Date: Mon, 8 Jul 2024 10:08:58 -0400
Subject: [PATCH] Refactor tolerance settings for MS MARCO dense vector
 regressions (#2541)

Continuation of #2538 - refactor tolerance values for HNSW indexes, calibrate wrt flat index scores.
---
 ...ssage.bge-base-en-v1.5.hnsw-int8.cached.md |  13 +-
 ...passage.bge-base-en-v1.5.hnsw-int8.onnx.md |  11 +-
 ...19-passage.bge-base-en-v1.5.hnsw.cached.md |  11 +-
 ...dl19-passage.bge-base-en-v1.5.hnsw.onnx.md |  13 +-
 ...ere-embed-english-v3.0.hnsw-int8.cached.md |  13 +-
 ...e.cohere-embed-english-v3.0.hnsw.cached.md |  13 +-
 ...passage.cos-dpr-distil.hnsw-int8.cached.md |  13 +-
 ...9-passage.cos-dpr-distil.hnsw-int8.onnx.md |  13 +-
 ...dl19-passage.cos-dpr-distil.hnsw.cached.md |  13 +-
 ...s-dl19-passage.cos-dpr-distil.hnsw.onnx.md |  13 +-
 ...19-passage.openai-ada2.hnsw-int8.cached.md |  13 +-
 ...ns-dl19-passage.openai-ada2.hnsw.cached.md |  11 +-
 ...ssage.bge-base-en-v1.5.hnsw-int8.cached.md |  13 +-
 ...passage.bge-base-en-v1.5.hnsw-int8.onnx.md |  13 +-
 ...20-passage.bge-base-en-v1.5.hnsw.cached.md |  13 +-
 ...dl20-passage.bge-base-en-v1.5.hnsw.onnx.md |  13 +-
 ...ere-embed-english-v3.0.hnsw-int8.cached.md |  13 +-
 ...e.cohere-embed-english-v3.0.hnsw.cached.md |  11 +-
 ...passage.cos-dpr-distil.hnsw-int8.cached.md |  13 +-
 ...0-passage.cos-dpr-distil.hnsw-int8.onnx.md |  13 +-
 ...dl20-passage.cos-dpr-distil.hnsw.cached.md |  13 +-
 ...s-dl20-passage.cos-dpr-distil.hnsw.onnx.md |  13 +-
 ...20-passage.openai-ada2.hnsw-int8.cached.md |  11 +-
 ...ns-dl20-passage.openai-ada2.hnsw.cached.md |   9 +-
 ...ssage.bge-base-en-v1.5.hnsw-int8.cached.md |  13 +-
 ...passage.bge-base-en-v1.5.hnsw-int8.onnx.md |  13 +-
 ...v1-passage.bge-base-en-v1.5.hnsw.cached.md |  11 +-
 ...o-v1-passage.bge-base-en-v1.5.hnsw.onnx.md |  11 +-
 ...ere-embed-english-v3.0.hnsw-int8.cached.md |  13 +-
 ...e.cohere-embed-english-v3.0.hnsw.cached.md |  13 +-
 ...passage.cos-dpr-distil.hnsw-int8.cached.md |  13 +-
 ...1-passage.cos-dpr-distil.hnsw-int8.onnx.md |  13 +-
 ...o-v1-passage.cos-dpr-distil.hnsw.cached.md |  13 +-
 ...rco-v1-passage.cos-dpr-distil.hnsw.onnx.md |  13 +-
 ...v1-passage.openai-ada2.hnsw-int8.cached.md |  13 +-
 ...arco-v1-passage.openai-ada2.hnsw.cached.md |   9 +-
 src/main/python/run_regression.py             | 192 +++++++++++-------
 ...bge-base-en-v1.5.hnsw-int8.cached.template |   5 +-
 ...e.bge-base-en-v1.5.hnsw-int8.onnx.template |   5 +-
 ...sage.bge-base-en-v1.5.hnsw.cached.template |   5 +-
 ...assage.bge-base-en-v1.5.hnsw.onnx.template |   5 +-
 ...bed-english-v3.0.hnsw-int8.cached.template |   5 +-
 ...re-embed-english-v3.0.hnsw.cached.template |   5 +-
 ...e.cos-dpr-distil.hnsw-int8.cached.template |   5 +-
 ...age.cos-dpr-distil.hnsw-int8.onnx.template |   5 +-
 ...assage.cos-dpr-distil.hnsw.cached.template |   5 +-
 ...-passage.cos-dpr-distil.hnsw.onnx.template |   5 +-
 ...sage.openai-ada2.hnsw-int8.cached.template |   5 +-
 ...9-passage.openai-ada2.hnsw.cached.template |   5 +-
 ...bge-base-en-v1.5.hnsw-int8.cached.template |   5 +-
 ...e.bge-base-en-v1.5.hnsw-int8.onnx.template |   5 +-
 ...sage.bge-base-en-v1.5.hnsw.cached.template |   5 +-
 ...assage.bge-base-en-v1.5.hnsw.onnx.template |   5 +-
 ...bed-english-v3.0.hnsw-int8.cached.template |   5 +-
 ...re-embed-english-v3.0.hnsw.cached.template |   5 +-
 ...e.cos-dpr-distil.hnsw-int8.cached.template |   5 +-
 ...age.cos-dpr-distil.hnsw-int8.onnx.template |   5 +-
 ...assage.cos-dpr-distil.hnsw.cached.template |   5 +-
 ...-passage.cos-dpr-distil.hnsw.onnx.template |   5 +-
 ...sage.openai-ada2.hnsw-int8.cached.template |   5 +-
 ...0-passage.openai-ada2.hnsw.cached.template |   5 +-
 ...bge-base-en-v1.5.hnsw-int8.cached.template |   5 +-
 ...e.bge-base-en-v1.5.hnsw-int8.onnx.template |   5 +-
 ...sage.bge-base-en-v1.5.hnsw.cached.template |   5 +-
 ...assage.bge-base-en-v1.5.hnsw.onnx.template |   5 +-
 ...bed-english-v3.0.hnsw-int8.cached.template |   5 +-
 ...re-embed-english-v3.0.hnsw.cached.template |   5 +-
 ...e.cos-dpr-distil.hnsw-int8.cached.template |   5 +-
 ...age.cos-dpr-distil.hnsw-int8.onnx.template |   5 +-
 ...assage.cos-dpr-distil.hnsw.cached.template |   5 +-
 ...-passage.cos-dpr-distil.hnsw.onnx.template |   5 +-
 ...sage.openai-ada2.hnsw-int8.cached.template |   5 +-
 ...1-passage.openai-ada2.hnsw.cached.template |   5 +-
 ...age.bge-base-en-v1.5.hnsw-int8.cached.yaml |   8 +-
 ...ssage.bge-base-en-v1.5.hnsw-int8.onnx.yaml |   8 +-
 ...-passage.bge-base-en-v1.5.hnsw.cached.yaml |   8 +-
 ...19-passage.bge-base-en-v1.5.hnsw.onnx.yaml |   8 +-
 ...e-embed-english-v3.0.hnsw-int8.cached.yaml |   8 +-
 ...cohere-embed-english-v3.0.hnsw.cached.yaml |   8 +-
 ...ssage.cos-dpr-distil.hnsw-int8.cached.yaml |   8 +-
 ...passage.cos-dpr-distil.hnsw-int8.onnx.yaml |   8 +-
 ...19-passage.cos-dpr-distil.hnsw.cached.yaml |   8 +-
 ...dl19-passage.cos-dpr-distil.hnsw.onnx.yaml |   8 +-
 ...-passage.openai-ada2.hnsw-int8.cached.yaml |   8 +-
 .../dl19-passage.openai-ada2.hnsw.cached.yaml |   8 +-
 ...age.bge-base-en-v1.5.hnsw-int8.cached.yaml |   8 +-
 ...ssage.bge-base-en-v1.5.hnsw-int8.onnx.yaml |   8 +-
 ...-passage.bge-base-en-v1.5.hnsw.cached.yaml |   8 +-
 ...20-passage.bge-base-en-v1.5.hnsw.onnx.yaml |   8 +-
 ...e-embed-english-v3.0.hnsw-int8.cached.yaml |   8 +-
 ...cohere-embed-english-v3.0.hnsw.cached.yaml |   8 +-
 ...ssage.cos-dpr-distil.hnsw-int8.cached.yaml |   8 +-
 ...passage.cos-dpr-distil.hnsw-int8.onnx.yaml |   8 +-
 ...20-passage.cos-dpr-distil.hnsw.cached.yaml |   8 +-
 ...dl20-passage.cos-dpr-distil.hnsw.onnx.yaml |   8 +-
 ...-passage.openai-ada2.hnsw-int8.cached.yaml |   8 +-
 .../dl20-passage.openai-ada2.hnsw.cached.yaml |   8 +-
 ...age.bge-base-en-v1.5.hnsw-int8.cached.yaml |   8 +-
 ...ssage.bge-base-en-v1.5.hnsw-int8.onnx.yaml |   8 +-
 ...-passage.bge-base-en-v1.5.hnsw.cached.yaml |   8 +-
 ...v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml |   8 +-
 ...e-embed-english-v3.0.hnsw-int8.cached.yaml |   8 +-
 ...cohere-embed-english-v3.0.hnsw.cached.yaml |   8 +-
 ...ssage.cos-dpr-distil.hnsw-int8.cached.yaml |   8 +-
 ...passage.cos-dpr-distil.hnsw-int8.onnx.yaml |   8 +-
 ...v1-passage.cos-dpr-distil.hnsw.cached.yaml |   8 +-
 ...o-v1-passage.cos-dpr-distil.hnsw.onnx.yaml |   8 +-
 ...-passage.openai-ada2.hnsw-int8.cached.yaml |   8 +-
 ...co-v1-passage.openai-ada2.hnsw.cached.yaml |   8 +-
 109 files changed, 616 insertions(+), 490 deletions(-)

diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index dfb916cdd2..be636c039b 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.443     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.708     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.614     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.843     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index b6b0689ea5..b93694b000 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -103,14 +103,15 @@ With the above commands, you should be able to reproduce the following results:
 |:-------------------------------------------------------------------------------------------------------------|-----------|
 | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.609     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.836     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
index 742744f7e5..513212316f 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -99,16 +99,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.442     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
 | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.616     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.842     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
index 77537a951d..b51406b4ea 100644
--- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -99,16 +99,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.447     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.444     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.701     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.706     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.607     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.837     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.847     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index 2c3985fd07..d8facc9287 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -94,16 +94,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.487     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
 | **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.690     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.696     |
 | **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.647     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.648     |
 | **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
index b8c6825dd5..5d1df7ef3c 100644
--- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -94,16 +94,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.486     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
 | **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.690     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.696     |
 | **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.645     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.648     |
 | **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.851     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
index 70a66a7981..b93e2554eb 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.458     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.605     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.805     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
index 9aa8383ef9..cdd25400ad 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -103,16 +103,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.458     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.605     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.805     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
index c4eb2dcb3b..dd9deb1995 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md
@@ -99,16 +99,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.458     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.605     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.805     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
index 44b66615f0..0be142e4a7 100644
--- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.458     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.466     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.605     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.805     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.820     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
index c241c421b3..f007aa8380 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **OpenAI-ada2**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.478     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.479     |
 | **nDCG@10**                                                                                                  | **OpenAI-ada2**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.707     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.703     |
 | **R@100**                                                                                                    | **OpenAI-ada2**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.617     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.623     |
 | **R@1000**                                                                                                   | **OpenAI-ada2**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
index 6101c5d241..61cef1094d 100644
--- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md
@@ -101,14 +101,15 @@ With the above commands, you should be able to reproduce the following results:
 |:-------------------------------------------------------------------------------------------------------------|-----------|
 | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.479     |
 | **nDCG@10**                                                                                                  | **OpenAI-ada2**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.704     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.703     |
 | **R@100**                                                                                                    | **OpenAI-ada2**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.624     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.623     |
 | **R@1000**                                                                                                   | **OpenAI-ada2**|
-| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.857     |
+| [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.863     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index 4f964bbf15..5368960d8a 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.463     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.674     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.712     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.840     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index a4a0e59285..ce8df58074 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.462     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.677     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.711     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.848     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
index 82ef436897..b5e4477cef 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -99,16 +99,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.464     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.677     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.714     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.840     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
index df2686e60a..a34df3fc40 100644
--- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -99,16 +99,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.462     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.465     |
 | **nDCG@10**                                                                                                  | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.677     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.678     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.712     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.717     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.849     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.850     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index a81cc2311a..5110a75e6d 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -94,16 +94,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.505     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.507     |
 | **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.722     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
 | **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.728     |
 | **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.858     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.868     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
index aa28c99e1c..da47afbfae 100644
--- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -94,16 +94,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.505     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.507     |
 | **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
 | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.725     |
 | **R@100**                                                                                                    | **cohere-embed-english-v3.0**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.724     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.728     |
 | **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.864     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.868     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
index da4b118033..8148593918 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.482     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.701     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.712     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.843     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
index 1aca72eb64..2f9d5c0898 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -103,16 +103,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.482     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.701     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.712     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.843     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
index 2e60059bab..c39fd6afdf 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md
@@ -99,16 +99,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.482     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.701     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.712     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.843     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
index ce2bdd0834..095aa1728f 100644
--- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md
@@ -101,16 +101,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.482     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.488     |
 | **nDCG@10**                                                                                                  | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.701     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.702     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.712     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.720     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.843     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.853     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
index 5f7f9c2a1a..b88f3ee759 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md
@@ -103,14 +103,15 @@ With the above commands, you should be able to reproduce the following results:
 |:-------------------------------------------------------------------------------------------------------------|-----------|
 | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.477     |
 | **nDCG@10**                                                                                                  | **OpenAI-ada2**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.675     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.676     |
 | **R@100**                                                                                                    | **OpenAI-ada2**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.727     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.724     |
 | **R@1000**                                                                                                   | **OpenAI-ada2**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.866     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.871     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
index f9f9b70fb5..8be06c47ef 100644
--- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md
@@ -103,12 +103,13 @@ With the above commands, you should be able to reproduce the following results:
 | **nDCG@10**                                                                                                  | **OpenAI-ada2**|
 | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.676     |
 | **R@100**                                                                                                    | **OpenAI-ada2**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.723     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.724     |
 | **R@1000**                                                                                                   | **OpenAI-ada2**|
-| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.867     |
+| [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html)                                                   | 0.871     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
index 78e2f4b56c..db144b9dee 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md
@@ -97,16 +97,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.362     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
 | **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.356     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.897     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.977     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
index ff8d251335..fb494b97b1 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md
@@ -97,16 +97,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.362     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
 | **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.356     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.897     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.977     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
index 01f458764b..404c821997 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md
@@ -95,16 +95,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.363     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
 | **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
 | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.897     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.977     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
index 080fe67cbe..9e026f78a6 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md
@@ -95,16 +95,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **BGE-base-en-v1.5**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.363     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.364     |
 | **RR@10**                                                                                                    | **BGE-base-en-v1.5**|
 | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.358     |
 | **R@100**                                                                                                    | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.897     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.901     |
 | **R@1000**                                                                                                   | **BGE-base-en-v1.5**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.977     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.981     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
index 5cf1a11852..641496596d 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md
@@ -95,16 +95,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.427     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.429     |
 | **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.371     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.372     |
 | **RR@10**                                                                                                    | **cohere-embed-english-v3.0**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.365     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.366     |
 | **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.974     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.979     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
index 98741acdd8..4e90bcbb05 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md
@@ -93,16 +93,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **nDCG@10**                                                                                                  | **cohere-embed-english-v3.0**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.428     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.429     |
 | **AP@1000**                                                                                                  | **cohere-embed-english-v3.0**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.371     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.372     |
 | **RR@10**                                                                                                    | **cohere-embed-english-v3.0**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.365     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.366     |
 | **R@1000**                                                                                                   | **cohere-embed-english-v3.0**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.974     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.979     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
index eca31bdbb9..b4befc6d38 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md
@@ -97,16 +97,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.393     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
 | **RR@10**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.388     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.903     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.974     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
index a72f7ec16e..30b090de99 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md
@@ -99,16 +99,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.393     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
 | **RR@10**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.388     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.903     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.974     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
index 4f908615ef..d793851efd 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md
@@ -95,16 +95,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.393     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
 | **RR@10**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.388     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.903     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.974     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
index 4aad9f8895..40335af5a9 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md
@@ -97,16 +97,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **cosDPR-distil**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.393     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.394     |
 | **RR@10**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.388     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.390     |
 | **R@100**                                                                                                    | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.903     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.908     |
 | **R@1000**                                                                                                   | **cosDPR-distil**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.974     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.980     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
index a1b562ed84..271e5c99ca 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md
@@ -97,16 +97,17 @@ With the above commands, you should be able to reproduce the following results:
 
 | **AP@1000**                                                                                                  | **OpenAI-ada2**|
 |:-------------------------------------------------------------------------------------------------------------|-----------|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.343     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.350     |
 | **RR@10**                                                                                                    | **OpenAI-ada2**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.336     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.343     |
 | **R@100**                                                                                                    | **OpenAI-ada2**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.894     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.900     |
 | **R@1000**                                                                                                   | **OpenAI-ada2**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.983     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.986     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
index 383c26e5a5..564fdc0447 100644
--- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
+++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md
@@ -99,12 +99,13 @@ With the above commands, you should be able to reproduce the following results:
 | **RR@10**                                                                                                    | **OpenAI-ada2**|
 | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.343     |
 | **R@100**                                                                                                    | **OpenAI-ada2**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.898     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.900     |
 | **R@1000**                                                                                                   | **OpenAI-ada2**|
-| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.985     |
+| [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking)                                | 0.986     |
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](../../docs/reproducibility.md)
 
diff --git a/src/main/python/run_regression.py b/src/main/python/run_regression.py
index 72741f06ac..335f071eaf 100644
--- a/src/main/python/run_regression.py
+++ b/src/main/python/run_regression.py
@@ -263,6 +263,104 @@ def construct_convert_commands(yaml_data):
 
 beir_dataset_pattern = re.compile(r'BEIR \(v1.0.0\): (.*)$')
 
+msmarco_v1_flat_int8_onnx = defaultdict(lambda: 0.002)
+msmarco_v1_flat_int8_cached = defaultdict(lambda: 0.002)
+msmarco_v1_flat_int8_cached['openai-ada2-flat-int8-cached'] = 0.008
+msmarco_v1_flat_onnx = defaultdict(lambda: 0.0001)
+msmarco_v1_flat_cached = defaultdict(lambda: 1e-9)
+
+msmarco_v1_flat_tolerance = {
+    'flat-int8-onnx': msmarco_v1_flat_int8_onnx,
+    'flat-int8-cached': msmarco_v1_flat_int8_cached,
+    'flat-onnx': msmarco_v1_flat_onnx,
+    'flat-cached': msmarco_v1_flat_cached,
+}
+
+dl19_flat_int8_onnx = defaultdict(lambda: 0.002)
+dl19_flat_int8_onnx['bge-flat-int8-onnx'] = 0.008
+dl19_flat_int8_cached = defaultdict(lambda: 0.002)
+dl19_flat_int8_cached['bge-flat-int8-cached'] = 0.005
+dl19_flat_int8_cached['openai-ada2-flat-int8-cached'] = 0.008
+dl19_flat_onnx = defaultdict(lambda: 0.0001)
+dl19_flat_onnx['bge-flat-onnx'] = 0.008
+dl19_flat_cached = defaultdict(lambda: 1e-9)
+
+dl19_flat_tolerance = {
+    'flat-int8-onnx': dl19_flat_int8_onnx,
+    'flat-int8-cached': dl19_flat_int8_cached,
+    'flat-onnx': dl19_flat_onnx,
+    'flat-cached': dl19_flat_cached,
+}
+
+dl20_flat_int8_onnx = defaultdict(lambda: 0.002)
+dl20_flat_int8_onnx['bge-flat-int8-onnx'] = 0.004
+dl20_flat_int8_cached = defaultdict(lambda: 0.002)
+dl20_flat_int8_cached['bge-flat-int8-cached'] = 0.005
+dl20_flat_int8_cached['cos-dpr-distil-flat-int8-cached'] = 0.004
+dl20_flat_int8_cached['cohere-embed-english-v3.0-flat-int8-cached'] = 0.004
+dl20_flat_onnx = defaultdict(lambda: 0.0001)
+dl20_flat_onnx['bge-flat-onnx'] = 0.005
+dl20_flat_cached = defaultdict(lambda: 1e-9)
+
+dl20_flat_tolerance = {
+    'flat-int8-onnx': dl20_flat_int8_onnx,
+    'flat-int8-cached': dl20_flat_int8_cached,
+    'flat-onnx': dl20_flat_onnx,
+    'flat-cached': dl20_flat_cached,
+}
+
+msmarco_v1_hnsw_int8_onnx = defaultdict(lambda: 0.01)
+msmarco_v1_hnsw_int8_cached = defaultdict(lambda: 0.01)
+msmarco_v1_hnsw_onnx = defaultdict(lambda: 0.01)
+msmarco_v1_hnsw_onnx['cos-dpr-distil-hnsw-onnx']  = 0.015
+msmarco_v1_hnsw_cached = defaultdict(lambda: 0.01)
+msmarco_v1_hnsw_cached['cos-dpr-distil-hnsw-cached'] = 0.015
+
+msmarco_v1_hnsw_tolerance = {
+    'hnsw-int8-onnx': msmarco_v1_hnsw_int8_onnx,
+    'hnsw-int8-cached': msmarco_v1_hnsw_int8_cached,
+    'hnsw-onnx': msmarco_v1_hnsw_onnx,
+    'hnsw-cached': msmarco_v1_hnsw_cached,
+}
+
+dl19_hnsw_int8_onnx = defaultdict(lambda: 0.01)
+dl19_hnsw_int8_onnx['bge-hnsw-int8-onnx'] = 0.02
+dl19_hnsw_int8_onnx['cos-dpr-distil-hnsw-int8-onnx'] = 0.025
+dl19_hnsw_int8_cached = defaultdict(lambda: 0.01)
+dl19_hnsw_int8_cached['bge-hnsw-int8-cached'] = 0.015
+dl19_hnsw_int8_cached['cohere-embed-english-v3.0-hnsw-int8-cached'] = 0.015
+dl19_hnsw_int8_cached['cos-dpr-distil-hnsw-int8-cached'] = 0.025
+dl19_hnsw_int8_cached['openai-ada2-hnsw-int8-cached'] = 0.015
+dl19_hnsw_onnx = defaultdict(lambda: 0.015)
+dl19_hnsw_onnx['bge-hnsw-onnx'] = 0.02
+dl19_hnsw_cached = defaultdict(lambda: 0.01)
+dl19_hnsw_cached['cohere-embed-english-v3.0-hnsw-cached'] = 0.02
+dl19_hnsw_cached['cos-dpr-distil-hnsw-cached'] = 0.015
+
+dl19_hnsw_tolerance = {
+    'hnsw-int8-onnx': dl19_hnsw_int8_onnx,
+    'hnsw-int8-cached': dl19_hnsw_int8_cached,
+    'hnsw-onnx': dl19_hnsw_onnx,
+    'hnsw-cached': dl19_hnsw_cached,
+}
+
+dl20_hnsw_int8_onnx = defaultdict(lambda: 0.01)
+dl20_hnsw_int8_cached = defaultdict(lambda: 0.01)
+dl20_hnsw_int8_cached['bge-hnsw-int8-cached'] = 0.015
+dl20_hnsw_int8_cached['cohere-embed-english-v3.0-hnsw-int8-cached'] = 0.012
+dl20_hnsw_onnx = defaultdict(lambda: 0.015)
+dl20_hnsw_cached = defaultdict(lambda: 0.01)
+dl20_hnsw_cached['bge-hnsw-cached'] = 0.015
+dl20_hnsw_cached['cohere-embed-english-v3.0-hnsw-cached'] = 0.025
+dl20_hnsw_cached['cos-dpr-distil-hnsw-cached'] = 0.015
+
+dl20_hnsw_tolerance = {
+    'hnsw-int8-onnx': dl20_hnsw_int8_onnx,
+    'hnsw-int8-cached': dl20_hnsw_int8_cached,
+    'hnsw-onnx': dl20_hnsw_onnx,
+    'hnsw-cached': dl20_hnsw_cached,
+}
+
 
 def evaluate_and_verify(yaml_data, dry_run):
     fail_str = '\033[91m[FAIL]\033[0m '
@@ -295,85 +393,41 @@ def evaluate_and_verify(yaml_data, dry_run):
                 using_hnsw = True if 'type' in model and model['type'] == 'hnsw' else False
                 using_flat = True if 'type' in model and model['type'] == 'flat' else False
 
-                if using_flat and 'BEIR' in topic_set['name']:
-                    # Extract BEIR dataset
-                    match = beir_dataset_pattern.search(topic_set['name'])
-                    beir_dataset = match.group(1)
-
+                if using_flat:
                     # Extract model
                     match = flat_model_type_pattern.search(model['name'])
                     model_type = match.group(1)
 
-                    # Lookup tolerance
-                    tolerance_ok = beir_flat_tolerance[model_type][beir_dataset]
-                elif using_flat and 'MS MARCO Passage' in topic_set['name']:
-                    if model['name'].endswith('-flat-int8-onnx'):
-                        tolerance_ok = 0.002
-                    elif model['name'].endswith('-flat-int8-cached'):
-                        if model['name'] == 'openai-ada2-flat-int8-cached':
-                            tolerance_ok = 0.008
-                        else:
-                            tolerance_ok = 0.002
-                    elif model['name'].endswith('-flat-onnx'):
-                        tolerance_ok = 0.0001
-                    else:
-                        tolerance_ok = 1e-9
-                elif using_flat and 'DL19' in topic_set['name']:
-                    if model['name'].endswith('-flat-int8-onnx'):
-                        if model['name'] == 'bge-flat-int8-onnx':
-                            tolerance_ok = 0.007
-                        elif model['name'] == 'cos-dpr-distil-flat-int8-onnx':
-                            tolerance_ok = 0.004
-                        else:
-                            tolerance_ok = 0.002
-                    elif model['name'].endswith('-flat-int8-cached'):
-                        if model['name'] == 'openai-ada2-flat-int8-cached':
-                            tolerance_ok = 0.008
-                        else:
-                            tolerance_ok = 0.002
-                    elif model['name'].endswith('-flat-onnx'):
-                        if model['name'] == 'bge-flat-onnx':
-                            tolerance_ok = 0.008
-                        else:
-                            tolerance_ok = 0.0001
-                    else:
-                        tolerance_ok = 1e-9
-                elif using_flat and 'DL20' in topic_set['name']:
-                    if model['name'].endswith('-flat-int8-onnx'):
-                        if model['name'] == 'bge-flat-int8-onnx':
-                            tolerance_ok = 0.004
-                        elif model['name'] == 'cos-dpr-distil-flat-int8-onnx':
-                            tolerance_ok = 0.004
-                        else:
-                            tolerance_ok = 0.002
-                    elif model['name'].endswith('-flat-int8-cached'):
-                        if model['name'] == 'bge-flat-int8-cached':
-                            tolerance_ok = 0.005
-                        elif model['name'] == 'cos-dpr-distil-flat-int8-cached':
-                            tolerance_ok = 0.004
-                        else:
-                            tolerance_ok = 0.002
-                    elif model['name'].endswith('-flat-onnx'):
-                        if model['name'] == 'bge-flat-onnx':
-                            tolerance_ok = 0.005
-                        else:
-                            tolerance_ok = 0.0001
-                    else:
-                        tolerance_ok = 1e-9
-                else:
-                    tolerance_ok = 1e-9
+                    if 'BEIR' in topic_set['name']:
+                        # Extract BEIR dataset
+                        match = beir_dataset_pattern.search(topic_set['name'])
+                        beir_dataset = match.group(1)
 
-                if using_hnsw and 'BEIR' in topic_set['name']:
-                    # Extract BEIR dataset
-                    match = beir_dataset_pattern.search(topic_set['name'])
-                    beir_dataset = match.group(1)
+                        tolerance_ok = beir_flat_tolerance[model_type][beir_dataset]
+                    elif 'MS MARCO Passage' in topic_set['name']:
+                        tolerance_ok = msmarco_v1_flat_tolerance[model_type][model['name']]
+                    elif 'DL19' in topic_set['name']:
+                        tolerance_ok = dl19_flat_tolerance[model_type][model['name']]
+                    elif using_flat and 'DL20' in topic_set['name']:
+                        tolerance_ok = dl20_flat_tolerance[model_type][model['name']]
 
+                if using_hnsw:
                     # Extract model
                     match = hnsw_model_type_pattern.search(model['name'])
                     model_type = match.group(1)
 
-                    # Lookup tolerance
-                    tolerance_ok = beir_hnsw_tolerance[model_type][beir_dataset]
+                    if 'BEIR' in topic_set['name']:
+                        # Extract BEIR dataset
+                        match = beir_dataset_pattern.search(topic_set['name'])
+                        beir_dataset = match.group(1)
+
+                        tolerance_ok = beir_hnsw_tolerance[model_type][beir_dataset]
+                    elif 'MS MARCO Passage' in topic_set['name']:
+                        tolerance_ok = msmarco_v1_hnsw_tolerance[model_type][model['name']]
+                    elif 'DL19' in topic_set['name']:
+                        tolerance_ok = dl19_hnsw_tolerance[model_type][model['name']]
+                    elif 'DL20' in topic_set['name']:
+                        tolerance_ok = dl20_hnsw_tolerance[model_type][model['name']]
 
                 if using_flat or using_hnsw:
                     result_str = (f'expected: {expected:.4f} actual: {actual:.4f} '
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index 684d6a68a8..63fdad31d4 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 07f64b2993..69afbfc033 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
index 1684a43b43..d011773dd3 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
index 5c7f62f36e..89c9d8ee76 100644
--- a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
index c3b3877000..565c13fea7 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
@@ -80,8 +80,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template
index 789f388206..a47174b5f5 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template
@@ -80,8 +80,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template
index 751379ca58..9406fe3822 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template
index e66a46e480..ff348cdbc9 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.template
@@ -89,8 +89,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template
index 681c0bcd6d..e387bf030e 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template
index 65bc136073..c6d7c0e522 100644
--- a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.onnx.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template
index 7e71728323..991983ff4f 100644
--- a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template
index 0ffa09bb80..24ec176d91 100644
--- a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index ec86f6078b..a67e150823 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 5500041509..957322a5bf 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
index 324a9c2a42..9197b856be 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
index 7bf9c62844..0585024953 100644
--- a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
index 6afb3a9c6a..fcc92a8dc7 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
@@ -80,8 +80,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template
index ac85f4fae7..1bc8727a09 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template
@@ -80,8 +80,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template
index b9622009f8..4c9517f5c5 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template
index 55826ab0be..28dc4082c8 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.template
@@ -89,8 +89,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template
index fafa6ebc2d..fd89cd5bdf 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template
index 42a2906370..582cc46099 100644
--- a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.onnx.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template
index 3633e0aa96..ad2259f779 100644
--- a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template
@@ -87,8 +87,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template
index c7ee57e2ff..361c8bb695 100644
--- a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ❗ Retrieval metrics here are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking).
 For computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`).
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
index af69251204..486b4bc69d 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template
@@ -83,8 +83,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
index 94075818a8..5dc942633e 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.template
@@ -83,8 +83,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
index e75f7be10e..aba6e3262f 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template
@@ -81,8 +81,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
index d8180d75c4..fb6a697224 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.template
@@ -81,8 +81,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
index c809996e13..924906b796 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template
@@ -81,8 +81,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template
index f159ced864..5f51caffa4 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template
@@ -79,8 +79,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template
index 5f85acc05b..89099d71fe 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template
@@ -83,8 +83,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template
index aa88ab7d67..b3737d5459 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.template
@@ -85,8 +85,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template
index affa039161..5173b9b09b 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template
@@ -81,8 +81,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template
index cad852f311..3defd04a81 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.template
@@ -83,8 +83,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With ONNX query encoding on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template
index cdf4ff2c98..b82567babb 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template
@@ -83,8 +83,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that both HNSW indexing and quantization are non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template
index 6b705c906e..a1827f765c 100644
--- a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template
+++ b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template
@@ -81,8 +81,9 @@ With the above commands, you should be able to reproduce the following results:
 
 ${effectiveness}
 
-Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run.
-Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](${yaml}).
+The above figures are from running brute-force search with cached queries on non-quantized **flat** indexes.
+With cached queries on non-quantized HNSW indexes, observed results are likely to differ; scores may be lower by up to 0.01, sometimes more.
+Note that HNSW indexing is non-deterministic (i.e., results may differ slightly between trials).
 
 ## Reproduction Log[*](${root_path}/docs/reproducibility.md)
 
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
index af877e2c0b..eb8604232f 100644
--- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.443
+        - 0.4435
       nDCG@10:
-        - 0.708
+        - 0.7065
       R@100:
-        - 0.614
+        - 0.6171
       R@1000:
-        - 0.843
+        - 0.8472
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
index 14d4e1438e..fb2b2c1cc2 100644
--- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15
     results:
       AP@1000:
-        - 0.444
+        - 0.4435
       nDCG@10:
-        - 0.702
+        - 0.7065
       R@100:
-        - 0.609
+        - 0.6171
       R@1000:
-        - 0.836
+        - 0.8472
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml
index 4eeb1942e6..f47a2977e0 100644
--- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.442
+        - 0.4435
       nDCG@10:
-        - 0.706
+        - 0.7065
       R@100:
-        - 0.616
+        - 0.6171
       R@1000:
-        - 0.842
+        - 0.8472
diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml
index 73796aa95a..d5103d9d6b 100644
--- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml
+++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15
     results:
       AP@1000:
-        - 0.447
+        - 0.4435
       nDCG@10:
-        - 0.701
+        - 0.7065
       R@100:
-        - 0.607
+        - 0.6171
       R@1000:
-        - 0.837
+        - 0.8472
diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
index aa91abdb7f..50e305c663 100644
--- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.487
+        - 0.4884
       nDCG@10:
-        - 0.690
+        - 0.6956
       R@100:
-        - 0.647
+        - 0.6484
       R@1000:
-        - 0.850
\ No newline at end of file
+        - 0.8630
\ No newline at end of file
diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
index 99a9b9652c..029bc8cedb 100644
--- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.486
+        - 0.4884
       nDCG@10:
-        - 0.690
+        - 0.6956
       R@100:
-        - 0.645
+        - 0.6484
       R@1000:
-        - 0.851
\ No newline at end of file
+        - 0.8630
\ No newline at end of file
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml
index bfb7bf2930..a6100f94a9 100644
--- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.458
+        - 0.4656
       nDCG@10:
-        - 0.717
+        - 0.7250
       R@100:
-        - 0.605
+        - 0.6173
       R@1000:
-        - 0.805
+        - 0.8201
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
index a70061e7ae..2fc1199b0d 100644
--- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil
     results:
       AP@1000:
-        - 0.458
+        - 0.4656
       nDCG@10:
-        - 0.717
+        - 0.7250
       R@100:
-        - 0.605
+        - 0.6173
       R@1000:
-        - 0.805
+        - 0.8201
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml
index 73576b9798..8c215bf0a2 100644
--- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.458
+        - 0.4656
       nDCG@10:
-        - 0.717
+        - 0.7250
       R@100:
-        - 0.605
+        - 0.6173
       R@1000:
-        - 0.805
+        - 0.8201
diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml
index 8455574323..65010dbabb 100644
--- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml
+++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil
     results:
       AP@1000:
-        - 0.458
+        - 0.4656
       nDCG@10:
-        - 0.717
+        - 0.7250
       R@100:
-        - 0.605
+        - 0.6173
       R@1000:
-        - 0.805
+        - 0.8201
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml
index a0b05f6cc5..ff8a1c5d26 100644
--- a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.478
+        - 0.4788
       nDCG@10:
-        - 0.707
+        - 0.7035
       R@100:
-        - 0.617
+        - 0.6235
       R@1000:
-        - 0.853
+        - 0.8629
diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml
index 21ac8f5c9d..8aeade94f9 100644
--- a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.479
+        - 0.4788
       nDCG@10:
-        - 0.704
+        - 0.7035
       R@100:
-        - 0.624
+        - 0.6235
       R@1000:
-        - 0.857
+        - 0.8629
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
index e092a742bb..65f7cbfc88 100644
--- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.463
+        - 0.4650
       nDCG@10:
-        - 0.674
+        - 0.6780
       R@100:
-        - 0.712
+        - 0.7169
       R@1000:
-        - 0.840
+        - 0.8503
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
index 78a5495329..cb04b68b38 100644
--- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15
     results:
       AP@1000:
-        - 0.462
+        - 0.4650
       nDCG@10:
-        - 0.677
+        - 0.6780
       R@100:
-        - 0.711
+        - 0.7169
       R@1000:
-        - 0.848
+        - 0.8503
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml
index f08b12e116..af7c099584 100644
--- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.464
+        - 0.4650
       nDCG@10:
-        - 0.677
+        - 0.6780
       R@100:
-        - 0.714
+        - 0.7169
       R@1000:
-        - 0.840
+        - 0.8503
diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml
index 526e313788..bf5892e910 100644
--- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml
+++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15
     results:
       AP@1000:
-        - 0.462
+        - 0.4650
       nDCG@10:
-        - 0.677
+        - 0.6780
       R@100:
-        - 0.712
+        - 0.7169
       R@1000:
-        - 0.849
+        - 0.8503
diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
index 5b3f8b4056..1fc331b7a2 100644
--- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.505
+        - 0.5067
       nDCG@10:
-        - 0.722
+        - 0.7245
       R@100:
-        - 0.720
+        - 0.7279
       R@1000:
-        - 0.858
\ No newline at end of file
+        - 0.8682
\ No newline at end of file
diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
index 9fda129765..5c4529fa7e 100644
--- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.505
+        - 0.5067
       nDCG@10:
-        - 0.725
+        - 0.7245
       R@100:
-        - 0.724
+        - 0.7279
       R@1000:
-        - 0.864
\ No newline at end of file
+        - 0.8682
\ No newline at end of file
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml
index 5dce72b873..20873dfe04 100644
--- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.482
+        - 0.4876
       nDCG@10:
-        - 0.701
+        - 0.7025
       R@100:
-        - 0.712
+        - 0.7204
       R@1000:
-        - 0.843
+        - 0.8533
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
index b20ad4affd..51049d8555 100644
--- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil
     results:
       AP@1000:
-        - 0.482
+        - 0.4876
       nDCG@10:
-        - 0.701
+        - 0.7025
       R@100:
-        - 0.712
+        - 0.7204
       R@1000:
-        - 0.843
+        - 0.8533
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml
index ff5c51375f..b1ac3947a2 100644
--- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.482
+        - 0.4876
       nDCG@10:
-        - 0.701
+        - 0.7025
       R@100:
-        - 0.712
+        - 0.7204
       R@1000:
-        - 0.843
+        - 0.8533
diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml
index a7f57293c2..0e1c03d3c1 100644
--- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml
+++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil
     results:
       AP@1000:
-        - 0.482
+        - 0.4876
       nDCG@10:
-        - 0.701
+        - 0.7025
       R@100:
-        - 0.712
+        - 0.7204
       R@1000:
-        - 0.843
+        - 0.8533
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml
index 787a2fb507..a76e57f1b5 100644
--- a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.477
+        - 0.4771
       nDCG@10:
-        - 0.675
+        - 0.6759
       R@100:
-        - 0.727
+        - 0.7237
       R@1000:
-        - 0.866
+        - 0.8705
diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml
index 13b1ac8a14..e0f50391ed 100644
--- a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml
+++ b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.477
+        - 0.4771
       nDCG@10:
-        - 0.676
+        - 0.6759
       R@100:
-        - 0.723
+        - 0.7237
       R@1000:
-        - 0.867
+        - 0.8705
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
index 0008962a82..fff856cbb5 100644
--- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.362
+        - 0.3641
       RR@10:
-        - 0.356
+        - 0.3583
       R@100:
-        - 0.897
+        - 0.9006
       R@1000:
-        - 0.977
+        - 0.9811
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
index 61a07240de..ae45b57f8a 100644
--- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15
     results:
       AP@1000:
-        - 0.362
+        - 0.3641
       RR@10:
-        - 0.356
+        - 0.3583
       R@100:
-        - 0.897
+        - 0.9006
       R@1000:
-        - 0.977
+        - 0.9811
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml
index 61e0188f74..da3bae3244 100644
--- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.363
+        - 0.3641
       RR@10:
-        - 0.358
+        - 0.3583
       R@100:
-        - 0.897
+        - 0.9006
       R@1000:
-        - 0.977
+        - 0.9811
diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml
index 996c2daafb..41af6b0138 100644
--- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15
     results:
       AP@1000:
-        - 0.363
+        - 0.3641
       RR@10:
-        - 0.358
+        - 0.3583
       R@100:
-        - 0.897
+        - 0.9006
       R@1000:
-        - 0.977
+        - 0.9811
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
index 53fd269549..dc50962369 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       nDCG@10:
-        - 0.427
+        - 0.4287
       AP@1000:
-        - 0.371
+        - 0.3716
       RR@10:
-        - 0.365
+        - 0.3658
       R@1000:
-        - 0.974
+        - 0.9786
diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
index bd45211b5d..433c39ba24 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       nDCG@10:
-        - 0.428
+        - 0.4287
       AP@1000:
-        - 0.371
+        - 0.3716
       RR@10:
-        - 0.365
+        - 0.3658
       R@1000:
-        - 0.974
+        - 0.9786
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml
index 7b1dd87fb1..1aae195713 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.393
+        - 0.3942
       RR@10:
-        - 0.388
+        - 0.3896
       R@100:
-        - 0.903
+        - 0.9075
       R@1000:
-        - 0.974
+        - 0.9796
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
index b5347ae6e0..51f76139ea 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil
     results:
       AP@1000:
-        - 0.393
+        - 0.3942
       RR@10:
-        - 0.388
+        - 0.3896
       R@100:
-        - 0.903
+        - 0.9075
       R@1000:
-        - 0.974
+        - 0.9796
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml
index 5fa7e7c702..adb737abb1 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.393
+        - 0.3942
       RR@10:
-        - 0.388
+        - 0.3896
       R@100:
-        - 0.903
+        - 0.9075
       R@1000:
-        - 0.974
+        - 0.9796
diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml
index 0c79b27540..27b10e26b2 100644
--- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil
     results:
       AP@1000:
-        - 0.393
+        - 0.3942
       RR@10:
-        - 0.388
+        - 0.3896
       R@100:
-        - 0.903
+        - 0.9075
       R@1000:
-        - 0.974
+        - 0.9796
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml
index 66b1bb8cde..8309dc34dc 100644
--- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.343
+        - 0.3505
       RR@10:
-        - 0.336
+        - 0.3434
       R@100:
-        - 0.894
+        - 0.8996
       R@1000:
-        - 0.983
+        - 0.9858
diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml
index 10cb1a48f3..388193244a 100644
--- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml
+++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml
@@ -56,10 +56,10 @@ models:
     params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000
     results:
       AP@1000:
-        - 0.350
+        - 0.3505
       RR@10:
-        - 0.343
+        - 0.3434
       R@100:
-        - 0.898
+        - 0.8996
       R@1000:
-        - 0.985
+        - 0.9858