From 111d9b3853883104d8c13affb521af09d17d5fde Mon Sep 17 00:00:00 2001 From: Maxime Wiewiora <48218208+maximevw@users.noreply.github.com> Date: Sat, 28 Oct 2023 15:14:14 +0200 Subject: [PATCH] Use Cassandra 5.0 image for tests Activate vector type testing --- CHANGELOG.md | 3 ++ pom.xml | 6 ++-- .../jdbc/UsingCassandraContainerTest.java | 3 +- .../data/cassandra/jdbc/VectorsUnitTest.java | 25 +++++++++++---- src/test/resources/initEmbeddedCassandra.cql | 31 +++++++++++++++++-- 5 files changed, 56 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93f6f0b..d952f2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Changed - Deprecate the parameter `version` (CQL version) in JDBC URL because this one is purely informational and has no effect. This will be removed in the next release. +- Update Apache Commons IO to version 2.15.0. +- Update Jackson dependencies to version 2.15.3. +- Use Apache Cassandra® 5.0 image to run tests. - Replace references to "DataStax Java driver" by "Java Driver for Apache Cassandra®" following the transfer of the codebase to Apache Software Foundation (see: [IP clearance status](https://incubator.apache.org/ip-clearance/cassandra-java-driver.html) and diff --git a/pom.xml b/pom.xml index 9bd479c..ab59657 100644 --- a/pom.xml +++ b/pom.xml @@ -108,10 +108,10 @@ 9.3 2.9.3 4.4 - 2.14.0 + 2.15.0 3.13.0 4.17.0 - 2.15.2 + 2.15.3 2.2 5.10.0 @@ -119,7 +119,7 @@ 1.18.30 3.12.4 1.7.36 - 1.19.0 + 1.19.1 0.6.11 3.3.0 diff --git a/src/test/java/com/ing/data/cassandra/jdbc/UsingCassandraContainerTest.java b/src/test/java/com/ing/data/cassandra/jdbc/UsingCassandraContainerTest.java index b3cad8b..20dc31b 100644 --- a/src/test/java/com/ing/data/cassandra/jdbc/UsingCassandraContainerTest.java +++ b/src/test/java/com/ing/data/cassandra/jdbc/UsingCassandraContainerTest.java @@ -28,13 +28,14 @@ abstract class UsingCassandraContainerTest { // For the official Cassandra image, see here: https://hub.docker.com/_/cassandra - static final DockerImageName CASSANDRA_IMAGE = DockerImageName.parse("cassandra:4.1.3"); + static final DockerImageName CASSANDRA_IMAGE = DockerImageName.parse("cassandra:5.0"); static CassandraConnection sqlConnection = null; // Using @Container annotation restarts a new container for each test of the class, so as it takes ~20/30 sec. to // start a Cassandra container, we just want to have one container instance for all the tests of the class. See: // https://www.testcontainers.org/test_framework_integration/manual_lifecycle_control/#singleton-containers + @SuppressWarnings("resource") static final CassandraContainer cassandraContainer = new CassandraContainer<>(CASSANDRA_IMAGE) .withEnv("CASSANDRA_DC", "datacenter1") .withEnv("CASSANDRA_CLUSTER_NAME", "embedded_test_cluster") diff --git a/src/test/java/com/ing/data/cassandra/jdbc/VectorsUnitTest.java b/src/test/java/com/ing/data/cassandra/jdbc/VectorsUnitTest.java index 152c971..91dde4a 100644 --- a/src/test/java/com/ing/data/cassandra/jdbc/VectorsUnitTest.java +++ b/src/test/java/com/ing/data/cassandra/jdbc/VectorsUnitTest.java @@ -14,6 +14,7 @@ package com.ing.data.cassandra.jdbc; import com.datastax.oss.driver.api.core.data.CqlVector; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -29,8 +30,6 @@ /** * Test CQL Vector data type */ -// FIXME: Implement vector testing when Cassandra 5.0 is available. -@Disabled class VectorsUnitTest extends UsingCassandraContainerTest { private static final String KEYSPACE = "test_keyspace_vect"; @@ -61,12 +60,26 @@ void givenVectorInsertStatement_whenExecute_insertExpectedValues() throws Except assertEquals(8, intsVector.get(2)); final CqlVector floatsVector = ((CassandraResultSet) resultSet).getVector(2); assertEquals(4, floatsVector.size()); - assertEquals(2.1, floatsVector.get(0)); - assertEquals(3.7, floatsVector.get(1)); - assertEquals(9.0, floatsVector.get(2)); - assertEquals(5.5, floatsVector.get(2)); + assertEquals(2.1f, floatsVector.get(0)); + assertEquals(3.7f, floatsVector.get(1)); + assertEquals(9.0f, floatsVector.get(2)); + assertEquals(5.5f, floatsVector.get(3)); statement.close(); } + @Test + @Disabled("ANN OF not supported in the version of Cassandra currently used by the test class (5.0.0-alpha1)") + void givenVectorTable_whenSimilaritySearch_shouldReturnResults() throws Exception { + final CassandraPreparedStatement prepStatement = sqlConnection.prepareStatement( + "SELECT product_id, product_vector," + + "similarity_dot_product(product_vector,[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) as similarity " + + "FROM pet_supply_vectors ORDER BY product_vector ANN OF [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] " + + "LIMIT 2"); + java.sql.ResultSet rs = prepStatement.executeQuery(); + Assertions.assertTrue(rs.next()); + Assertions.assertNotNull(rs.getObject("product_vector")); + Assertions.assertEquals(3.0d, rs.getDouble("similarity")); + } + } diff --git a/src/test/resources/initEmbeddedCassandra.cql b/src/test/resources/initEmbeddedCassandra.cql index 0a8d1a6..6594d95 100644 --- a/src/test/resources/initEmbeddedCassandra.cql +++ b/src/test/resources/initEmbeddedCassandra.cql @@ -222,10 +222,37 @@ varintValue: 4321 DROP KEYSPACE IF EXISTS test_keyspace_vect; CREATE KEYSPACE "test_keyspace_vect" WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; -/* FIXME: Uncomment this script part when Cassandra 5.0 is available. USE test_keyspace_vect; CREATE TABLE vectors_test ( keyValue int PRIMARY KEY, intsVector vector, floatsVector vector); -*/ + +CREATE TABLE IF NOT EXISTS pet_supply_vectors ( + product_id TEXT PRIMARY KEY, + product_name TEXT, + product_vector vector +); + +/* Ni similarity search without the SAI INDEX. */ +CREATE CUSTOM INDEX IF NOT EXISTS idx_vector +ON pet_supply_vectors(product_vector) +USING 'StorageAttachedIndex'; + +INSERT INTO pet_supply_vectors (product_id, product_name, product_vector) +VALUES ('pf1843','HealthyFresh - Chicken raw dog food',[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + +INSERT INTO pet_supply_vectors (product_id, product_name, product_vector) +VALUES ('pf1844','HealthyFresh - Beef raw dog food',[1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]); + +INSERT INTO pet_supply_vectors (product_id, product_name, product_vector) +VALUES ('pt0021','Dog Tennis Ball Toy',[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]); + +INSERT INTO pet_supply_vectors (product_id, product_name, product_vector) +VALUES ('pt0041','Dog Ring Chew Toy',[0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]); + +INSERT INTO pet_supply_vectors (product_id, product_name, product_vector) +VALUES ('pf7043','PupperSausage Bacon dog Treats',[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1]); + +INSERT INTO pet_supply_vectors (product_id, product_name, product_vector) +VALUES ('pf7044','PupperSausage Beef dog Treats',[0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0]);