Skip to content

Commit

Permalink
Use Cassandra 5.0 image for tests
Browse files Browse the repository at this point in the history
Activate vector type testing
  • Loading branch information
maximevw committed Oct 28, 2023
1 parent ab289ea commit 111d9b3
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 12 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Changed
- Deprecate the parameter `version` (CQL version) in JDBC URL because this one is purely informational and has no
effect. This will be removed in the next release.
- Update Apache Commons IO to version 2.15.0.
- Update Jackson dependencies to version 2.15.3.
- Use Apache Cassandra® 5.0 image to run tests.
- Replace references to "DataStax Java driver" by "Java Driver for Apache Cassandra®" following the transfer of the
codebase to Apache Software Foundation (see:
[IP clearance status](https://incubator.apache.org/ip-clearance/cassandra-java-driver.html) and
Expand Down
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,18 @@
<checkstyle.version>9.3</checkstyle.version>
<caffeine.version>2.9.3</caffeine.version>
<commons-collections.version>4.4</commons-collections.version>
<commons-io.version>2.14.0</commons-io.version>
<commons-io.version>2.15.0</commons-io.version>
<commons-lang3.version>3.13.0</commons-lang3.version>
<java.driver.version>4.17.0</java.driver.version>
<jackson.version>2.15.2</jackson.version>
<jackson.version>2.15.3</jackson.version>
<!-- Versions for test dependencies -->
<hamcrest.version>2.2</hamcrest.version>
<junit5.version>5.10.0</junit5.version>
<junit-platform.version>1.10.0</junit-platform.version>
<lombok.version>1.18.30</lombok.version>
<mockito.version>3.12.4</mockito.version>
<slf4j.version>1.7.36</slf4j.version>
<testcontainers.version>1.19.0</testcontainers.version>
<testcontainers.version>1.19.1</testcontainers.version>
<astra-sdk.version>0.6.11</astra-sdk.version>
<!-- Versions for plugins -->
<maven-checkstyle-plugin.version>3.3.0</maven-checkstyle-plugin.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@
abstract class UsingCassandraContainerTest {

// For the official Cassandra image, see here: https://hub.docker.com/_/cassandra
static final DockerImageName CASSANDRA_IMAGE = DockerImageName.parse("cassandra:4.1.3");
static final DockerImageName CASSANDRA_IMAGE = DockerImageName.parse("cassandra:5.0");

static CassandraConnection sqlConnection = null;

// Using @Container annotation restarts a new container for each test of the class, so as it takes ~20/30 sec. to
// start a Cassandra container, we just want to have one container instance for all the tests of the class. See:
// https://www.testcontainers.org/test_framework_integration/manual_lifecycle_control/#singleton-containers
@SuppressWarnings("resource")
static final CassandraContainer<?> cassandraContainer = new CassandraContainer<>(CASSANDRA_IMAGE)
.withEnv("CASSANDRA_DC", "datacenter1")
.withEnv("CASSANDRA_CLUSTER_NAME", "embedded_test_cluster")
Expand Down
25 changes: 19 additions & 6 deletions src/test/java/com/ing/data/cassandra/jdbc/VectorsUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package com.ing.data.cassandra.jdbc;

import com.datastax.oss.driver.api.core.data.CqlVector;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
Expand All @@ -29,8 +30,6 @@
/**
* Test CQL Vector data type
*/
// FIXME: Implement vector testing when Cassandra 5.0 is available.
@Disabled
class VectorsUnitTest extends UsingCassandraContainerTest {

private static final String KEYSPACE = "test_keyspace_vect";
Expand Down Expand Up @@ -61,12 +60,26 @@ void givenVectorInsertStatement_whenExecute_insertExpectedValues() throws Except
assertEquals(8, intsVector.get(2));
final CqlVector<?> floatsVector = ((CassandraResultSet) resultSet).getVector(2);
assertEquals(4, floatsVector.size());
assertEquals(2.1, floatsVector.get(0));
assertEquals(3.7, floatsVector.get(1));
assertEquals(9.0, floatsVector.get(2));
assertEquals(5.5, floatsVector.get(2));
assertEquals(2.1f, floatsVector.get(0));
assertEquals(3.7f, floatsVector.get(1));
assertEquals(9.0f, floatsVector.get(2));
assertEquals(5.5f, floatsVector.get(3));

statement.close();
}

@Test
@Disabled("ANN OF not supported in the version of Cassandra currently used by the test class (5.0.0-alpha1)")
void givenVectorTable_whenSimilaritySearch_shouldReturnResults() throws Exception {
final CassandraPreparedStatement prepStatement = sqlConnection.prepareStatement(
"SELECT product_id, product_vector,"
+ "similarity_dot_product(product_vector,[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) as similarity "
+ "FROM pet_supply_vectors ORDER BY product_vector ANN OF [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] "
+ "LIMIT 2");
java.sql.ResultSet rs = prepStatement.executeQuery();
Assertions.assertTrue(rs.next());
Assertions.assertNotNull(rs.getObject("product_vector"));
Assertions.assertEquals(3.0d, rs.getDouble("similarity"));
}

}
31 changes: 29 additions & 2 deletions src/test/resources/initEmbeddedCassandra.cql
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,37 @@ varintValue: 4321
DROP KEYSPACE IF EXISTS test_keyspace_vect;
CREATE KEYSPACE "test_keyspace_vect" WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};

/* FIXME: Uncomment this script part when Cassandra 5.0 is available.
USE test_keyspace_vect;
CREATE TABLE vectors_test (
keyValue int PRIMARY KEY,
intsVector vector<int, 3>,
floatsVector vector<float, 4>);
*/

CREATE TABLE IF NOT EXISTS pet_supply_vectors (
product_id TEXT PRIMARY KEY,
product_name TEXT,
product_vector vector<float, 14>
);

/* Ni similarity search without the SAI INDEX. */
CREATE CUSTOM INDEX IF NOT EXISTS idx_vector
ON pet_supply_vectors(product_vector)
USING 'StorageAttachedIndex';

INSERT INTO pet_supply_vectors (product_id, product_name, product_vector)
VALUES ('pf1843','HealthyFresh - Chicken raw dog food',[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]);

INSERT INTO pet_supply_vectors (product_id, product_name, product_vector)
VALUES ('pf1844','HealthyFresh - Beef raw dog food',[1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]);

INSERT INTO pet_supply_vectors (product_id, product_name, product_vector)
VALUES ('pt0021','Dog Tennis Ball Toy',[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]);

INSERT INTO pet_supply_vectors (product_id, product_name, product_vector)
VALUES ('pt0041','Dog Ring Chew Toy',[0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]);

INSERT INTO pet_supply_vectors (product_id, product_name, product_vector)
VALUES ('pf7043','PupperSausage Bacon dog Treats',[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1]);

INSERT INTO pet_supply_vectors (product_id, product_name, product_vector)
VALUES ('pf7044','PupperSausage Beef dog Treats',[0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0]);

0 comments on commit 111d9b3

Please sign in to comment.