Skip to content

Commit

Permalink
vector db test
Browse files Browse the repository at this point in the history
  • Loading branch information
shultseva committed Jun 17, 2024
1 parent a1fc789 commit f0c6491
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 36 deletions.
5 changes: 5 additions & 0 deletions java/drivers/driver-hazelcast4plus/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@
<version>0.6.10</version>
</dependency>

<dependency>
<groupId>org.jctools</groupId>
<artifactId>jctools-core</artifactId>
<version>4.0.3</version>
</dependency>

</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,24 @@ public class VectorCollectionPutDatasetTest extends HazelcastTest {

public String workingDirectory;

public String name;

// common parameters
public int loadFirst = Integer.MAX_VALUE;

public int putBatchSize = 10_000;

// graph parameters
public String metric = "COSINE";
public String metric;

public int maxDegree = 40;
public int maxDegree;

public int efConstruction = 50;
public int efConstruction;

// inner test parameters

private static final String collectionName = "performance-collection";

private static final TimeMetrics metrics = new TimeMetrics();
private VectorCollection<Integer, Integer> collection;

private final AtomicInteger counter = new AtomicInteger(0);

private DatasetReader reader;
Expand All @@ -57,7 +56,7 @@ public void setup() {

collection = VectorCollection.getCollection(
targetInstance,
new VectorCollectionConfig(collectionName)
new VectorCollectionConfig(name)
.addVectorIndexConfig(
new VectorIndexConfig()
.setMetric(Metric.valueOf(metric))
Expand All @@ -66,6 +65,7 @@ public void setup() {
.setEfConstruction(efConstruction)
)
);
logger.info("Use collection with name: {}", collection.getName());
}

@TimeStep(prob = 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.hazelcast.simulator.hz.HazelcastTest;
import com.hazelcast.simulator.test.BaseThreadState;
import com.hazelcast.simulator.test.annotations.AfterRun;
import com.hazelcast.simulator.test.annotations.Prepare;
import com.hazelcast.simulator.test.annotations.Setup;
import com.hazelcast.simulator.test.annotations.TimeStep;
import com.hazelcast.simulator.tests.vector.model.TestDataset;
Expand All @@ -27,8 +28,6 @@
import java.util.Queue;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;

import static java.util.concurrent.TimeUnit.MILLISECONDS;
Expand All @@ -47,17 +46,17 @@ public class VectorCollectionSearchDatasetTest extends HazelcastTest {
public int loadFirst = Integer.MAX_VALUE;

// graph parameters
public String metric = "COSINE";
public String metric;

public int maxDegree = 40;
public int maxDegree;

public int efConstruction = 50;
public int efConstruction;

public boolean normalize = false;

// search parameters

public int limit = 1;
public int limit;

// inner test parameters

Expand All @@ -67,6 +66,8 @@ public class VectorCollectionSearchDatasetTest extends HazelcastTest {

private VectorCollection<Integer, Integer> collection;

private DatasetReader reader;

private TestDataset testDataset;

private final Queue<TestSearchResult> searchResults = new ConcurrentLinkedQueue<>();
Expand All @@ -75,24 +76,12 @@ public class VectorCollectionSearchDatasetTest extends HazelcastTest {

private long indexBuildTime = 0;

private final ReentrantLock lock = new ReentrantLock();

private final CountDownLatch setupDone = new CountDownLatch(1);

@Setup
public void setup() {
if (!lock.tryLock()) {
try {
setupDone.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
return;
}
// only one thread perform the setup
scoreMetrics.setName(name);
DatasetReader reader = DatasetReader.create(datasetUrl, workingDirectory, normalize);
var size = Math.min(reader.getSize(), loadFirst);
reader = DatasetReader.create(datasetUrl, workingDirectory, normalize);

int dimension = reader.getDimension();
assert dimension == reader.getTestDatasetDimension() : "dataset dimension does not correspond to query vector dimension";
testDataset = reader.getTestDataset();
Expand All @@ -111,6 +100,11 @@ public void setup() {
.setEfConstruction(efConstruction)
)
);
}

@Prepare(global = true)
public void prepare() {
var size = Math.min(reader.getSize(), loadFirst);

var indexBuildTimeStart = System.currentTimeMillis();

Expand Down Expand Up @@ -154,8 +148,6 @@ public void setup() {
logger.info("Collection dimension: {}", reader.getDimension());
logger.info("Cleanup time (min): {}", MILLISECONDS.toMinutes(cleanupTimer));
logger.info("Index build time (min): {}", MILLISECONDS.toMinutes(indexBuildTime));

setupDone.countDown();
}

@TimeStep()
Expand All @@ -166,8 +158,11 @@ public void search(ThreadState state) {
return;
}
var vector = testDataset.getSearchVector(iteration);
SearchOptions options = new SearchOptionsBuilder().vector(vector).includePayload().includeVectors().limit(limit).build();
var result = collection.searchAsync(options).toCompletableFuture().join();
SearchOptions options = new SearchOptionsBuilder().includeValue().includeVectors().limit(limit).build();
var result = collection.searchAsync(
VectorValues.of(vector),
options
).toCompletableFuture().join();
searchResults.add(new TestSearchResult(iteration, vector, result));
}

Expand All @@ -181,7 +176,7 @@ public void afterRun() {
});

writeAllSearchResultsToFile("precision_" + name + ".out");
appendStatisticsToFile("statistics.out");
appendStatisticsToFile();
logger.info("Results for {}", name);
logger.info("Min score: {}", scoreMetrics.getMin());
logger.info("Max score: {}", scoreMetrics.getMax());
Expand All @@ -203,7 +198,7 @@ public int getAndIncrementIteration() {
}
}

public record TestSearchResult(int index, float[] searchVector, SearchResults results) {
public record TestSearchResult(int index, float[] searchVector, SearchResults<?, ?> results) {
}

private void writeAllSearchResultsToFile(String fileName) {
Expand Down Expand Up @@ -232,9 +227,9 @@ private void writeAllSearchResultsToFile(String fileName) {
}
}

private void appendStatisticsToFile(String fileName) {
private void appendStatisticsToFile() {
try {
FileWriter fileWriter = new FileWriter(fileName, true);
FileWriter fileWriter = new FileWriter("statistics.out", true);
PrintWriter printWriter = new PrintWriter(fileWriter);
List<String> values = List.of(
name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

public class VectorUtils {

public static void forEach(SearchResults searchResults, Consumer<SearchResult> consumer) {
public static void forEach(SearchResults<?, ?> searchResults, Consumer<SearchResult<?, ?>> consumer) {
var resultsIterator = searchResults.results();
while (resultsIterator.hasNext()) {
consumer.accept(resultsIterator.next());
Expand Down
188 changes: 188 additions & 0 deletions vector-search-simulator/vector-test/v_dbpedia_openai_1M_all.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
- name: dbpedia_openai_1M
duration: 60m
repetitions: 1
clients: 1
members: 1
driver: hazelcast-enterprise5
version: maven=5.5.0-SNAPSHOT
client_args: >
-Xms30g
-Xmx30g
--add-modules jdk.incubator.vector
--enable-preview
--enable-native-access=ALL-UNNAMED
--add-modules java.se
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/java.lang=ALL-UNNAMED
--add-opens java.base/sun.nio.ch=ALL-UNNAMED
--add-opens java.management/sun.management=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
member_args: >
-Xms60g
-Xmx60g
--add-modules jdk.incubator.vector
--enable-preview
--enable-native-access=ALL-UNNAMED
--add-modules java.se
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/java.lang=ALL-UNNAMED
--add-opens java.base/sun.nio.ch=ALL-UNNAMED
--add-opens java.management/sun.management=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
loadgenerator_hosts: loadgenerators
node_hosts: nodes
verify_enabled: False
performance_monitor_interval_seconds: 1
warmup_seconds: 0
cooldown_seconds: 0
license_key: eyJ2ZXJzaW9uIjoiVjciLCJlbnZpcm9ubWVudFR5cGUiOiJQUk9EIiwidHJpYWwiOmZhbHNlLCJhbGxvd2VkTnVtYmVyT2ZOb2RlcyI6MTAsImNyZWF0aW9uRGF0ZSI6IjIwMjQtMDQtMDhUMjM6NTk6NTkuOTk5OTk5OTk5WiIsImV4cGlyeURhdGUiOiIyMDI0LTA4LTA4VDIzOjU5OjU5Ljk5OTk5OTk5OVoiLCJhbGxvd2VkTmF0aXZlTWVtb3J5U2l6ZSI6MjAwLCJhbGxvd2VkVGllcmVkU3RvcmVTaXplIjo1MDAsImFsbG93ZWRUcGNDb3JlcyI6MzYsImhhemVsY2FzdFZlcnNpb24iOjk5LCJvZW0iOmZhbHNlLCJncmFjZVBlcmlvZCI6MSwiZmVhdHVyZXMiOlsxLDIsMyw0LDUsNiw3LDgsOSwxMCwxMSwxMiwxMywxNCwxNSwxNiwxNywxOSwyMCwyMV19.MSAftNdoUABHjc6RakGUjCstsg975YwvXO-2S6qvIQtvfI--LCJW9gI22Zhk7HDjV3UduqITfM7JrKaQ_IKxDw==
parallel: False
test:
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_16_ef_128
threadCount: 1
# ratePerSecond: 5_000
# interval: 100us
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 16
efConstruction: 128
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_32_ef_128
threadCount: 1
# ratePerSecond: 5_000
# interval: 100us
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 32
efConstruction: 128
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_64_ef_128
threadCount: 1
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 64
efConstruction: 128
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_16_ef_256
threadCount: 1
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 16
efConstruction: 256
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_32_ef_256
threadCount: 1
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 32
efConstruction: 256
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_64_ef_256
threadCount: 1
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 64
efConstruction: 256
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_16_ef_512
threadCount: 1
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 16
efConstruction: 512
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_32_ef_512
threadCount: 1
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 32
efConstruction: 512
normalize: false
# search params
limit: 10
- class: com.hazelcast.simulator.tests.vector.VectorCollectionSearchDatasetTest
name: vector_m_64_ef_512
threadCount: 1
logRateMs: 60_000
searchProb: 1
# data parameters
datasetUrl: https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz
workingDirectory: /mnt/nvme1n1/workingDirectory/
# loadFirst: 10_
# index params
metric: DOT
maxDegree: 64
efConstruction: 512
normalize: false
# search params
limit: 10

0 comments on commit f0c6491

Please sign in to comment.