diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index de671b4..f9cb98c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,7 +49,7 @@ jobs: - name: Semantic Release run: | bun install @conveyal/maven-semantic-release semantic-release @semantic-release/git conventional-changelog-conventionalcommits - bun x semantic-release --prepare @conveyal/maven-semantic-release,@semantic-release/git --publish @semantic-release/github,@conveyal/maven-semantic-release --verify-conditions @semantic-release/github,@conveyal/maven-semantic-release,@semantic-release/git --verify-release @conveyal/maven-semantic-release + bun x semantic-release --prepare @conveyal/maven-semantic-release --publish @semantic-release/github,@conveyal/maven-semantic-release --verify-conditions @semantic-release/github,@conveyal/maven-semantic-release --verify-release @conveyal/maven-semantic-release env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GPG_KEY_NAME: ${{ secrets.GPG_KEY_NAME }} diff --git a/.releaserc b/.releaserc index 24223a5..3cd93e2 100644 --- a/.releaserc +++ b/.releaserc @@ -59,15 +59,6 @@ ] } ], - "@semantic-release/release-notes-generator", - [ - "@semantic-release/git", - { - "assets": [ - "pom.xml" - ], - "message": "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" - } - ] + "@semantic-release/release-notes-generator" ] } \ No newline at end of file diff --git a/README.md b/README.md index c439b34..a7534c8 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,8 @@ spark = SparkSession.builder.config( The connector supports ingesting multiple named/unnamed, dense/sparse vectors. +_Click each to expand._ +
Unnamed/Default vector @@ -194,7 +196,6 @@ You can use the connector as a library in Databricks to ingest data into Qdrant. Screenshot 2024-04-28 at 11 34 17 AM - ## Datatype support The appropriate Spark data types are mapped to the Qdrant payload based on the provided `schema`. diff --git a/pom.xml b/pom.xml index a613dad..77ccdbd 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 io.qdrant spark - 2.2.0 + 2.2.1 qdrant-spark https://github.com/qdrant/qdrant-spark An Apache Spark connector for the Qdrant vector database diff --git a/src/main/java/io/qdrant/spark/QdrantVectorHandler.java b/src/main/java/io/qdrant/spark/QdrantVectorHandler.java index e074751..e42e5d6 100644 --- a/src/main/java/io/qdrant/spark/QdrantVectorHandler.java +++ b/src/main/java/io/qdrant/spark/QdrantVectorHandler.java @@ -9,7 +9,6 @@ import io.qdrant.client.grpc.Points.Vectors; import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.StructType; @@ -26,8 +25,7 @@ public static Vectors prepareVectors( // Maitaining support for the "embedding_field" and "vector_name" options if (!options.embeddingField.isEmpty()) { - int embeddingFieldIndex = schema.fieldIndex(options.embeddingField); - float[] embeddings = record.getArray(embeddingFieldIndex).toFloatArray(); + float[] embeddings = extractFloatArray(record, schema, options.embeddingField); // 'options.vectorName' defaults to "" vectorsBuilder.mergeFrom( namedVectors(Collections.singletonMap(options.vectorName, vector(embeddings)))); @@ -42,9 +40,10 @@ private static Vectors prepareSparseVectors( for (int i = 0; i < options.sparseVectorNames.length; i++) { String name = options.sparseVectorNames[i]; - List values = extractFloatArray(record, schema, options.sparseVectorValueFields[i]); - List indices = extractIntArray(record, schema, options.sparseVectorIndexFields[i]); - sparseVectors.put(name, vector(values, indices)); + float[] values = extractFloatArray(record, schema, options.sparseVectorValueFields[i]); + int[] indices = extractIntArray(record, schema, options.sparseVectorIndexFields[i]); + + sparseVectors.put(name, vector(Floats.asList(values), Ints.asList(indices))); } return namedVectors(sparseVectors); @@ -56,22 +55,21 @@ private static Vectors prepareDenseVectors( for (int i = 0; i < options.vectorNames.length; i++) { String name = options.vectorNames[i]; - List values = extractFloatArray(record, schema, options.vectorFields[i]); + float[] values = extractFloatArray(record, schema, options.vectorFields[i]); denseVectors.put(name, vector(values)); } return namedVectors(denseVectors); } - private static List extractFloatArray( + private static float[] extractFloatArray( InternalRow record, StructType schema, String fieldName) { int fieldIndex = schema.fieldIndex(fieldName); - return Floats.asList(record.getArray(fieldIndex).toFloatArray()); + return record.getArray(fieldIndex).toFloatArray(); } - private static List extractIntArray( - InternalRow record, StructType schema, String fieldName) { + private static int[] extractIntArray(InternalRow record, StructType schema, String fieldName) { int fieldIndex = schema.fieldIndex(fieldName); - return Ints.asList(record.getArray(fieldIndex).toIntArray()); + return record.getArray(fieldIndex).toIntArray(); } }