Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added tests for JsonVectorCollection #1563

Merged
merged 3 commits into from
Jun 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/main/java/io/anserini/collection/JsonCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,6 @@ public void readNext() throws NoSuchElementException {
} else {
atEOF = true; // there is no more JSON object in the bufferedReader
}
} else if (node.isArray()) {
if (iter != null && iter.hasNext()) {
JsonNode json = iter.next();
bufferedRecord = (T) createNewDocument(node);
} else {
throw new NoSuchElementException("Reached end of JsonNode iterator");
}
} else {
LOG.error("Error: invalid JsonNode type");
throw new NoSuchElementException("Invalid JsonNode type");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Anserini: A Lucene toolkit for reproducible information retrieval research
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.anserini.collection;

import org.junit.Before;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;

// A file in a JsonVectorCollection can either be:
// (1) A single JSON object (i.e., a single document)
// (2) An array of JSON objects
// (3) JSON Lines (i.e., one JSON object per line)
//
// This is the test case for (2)
public class JsonVectorCollectionDocumentArrayTest extends JsonVectorCollectionTest {

@Before
public void setUp() throws Exception {
super.setUp();

collectionPath = Paths.get("src/test/resources/sample_docs/json_vector/collection2");
collection = new JsonVectorCollection(collectionPath);

Path segment1 = Paths.get("src/test/resources/sample_docs/json_vector/collection2/segment1.json");

segmentPaths.add(segment1);
segmentDocCounts.put(segment1, 2);

totalSegments = 1;
totalDocs = 2;

expected.put("doc1", Map.of("id", "doc1",
"content", "f1 f2 f2 f3 f4 f4 f4 f4 f5 "));
expected.put("doc2", Map.of("id", "doc2",
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 "));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Anserini: A Lucene toolkit for reproducible information retrieval research
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.anserini.collection;

import org.junit.Before;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;

// A file in a JsonVectorCollection can either be:
// (1) A single JSON object (i.e., a single document)
// (2) An array of JSON objects
// (3) JSON Lines (i.e., one JSON object per line)
//
// This is the test case for (1)
public class JsonVectorCollectionDocumentObjectTest extends JsonVectorCollectionTest {
@Before
public void setUp() throws Exception {
super.setUp();

collectionPath = Paths.get("src/test/resources/sample_docs/json_vector/collection1");
collection = new JsonVectorCollection(collectionPath);

Path segment1 = Paths.get("src/test/resources/sample_docs/json_vector/collection1/doc1.json");
Path segment2 = Paths.get("src/test/resources/sample_docs/json_vector/collection1/doc2.json");

segmentPaths.add(segment1);
segmentDocCounts.put(segment1, 1);
segmentPaths.add(segment2);
segmentDocCounts.put(segment2, 1);

totalSegments = 2;
totalDocs = 2;

expected.put("doc1", Map.of("id", "doc1",
"content", "f1 f2 f2 f3 f4 f4 f4 f4 f5 "));
expected.put("doc2", Map.of("id", "doc2",
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 "));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@
import java.nio.file.Paths;
import java.util.Map;

// A file in a JsonCollection can either be:
// A file in a JsonVectorCollection can either be:
// (1) A single JSON object (i.e., a single document)
// (2) An array of JSON objects
// (3) JSON Lines (i.e., one JSON object per line)
//
// This is the test case for (3)
//
// Note that we're testing the multifield capability here and only here, since the codepath is shared.
public class JsonVectorCollectionLineObjectTest extends JsonVectorCollectionTest {

@Before
Expand All @@ -52,11 +50,4 @@ public void setUp() throws Exception {
expected.put("doc2", Map.of("id", "doc2",
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 "));
}

@Override
void checkDocument(SourceDocument doc, Map<String, String> expected) {
assertTrue(doc.indexable());
assertEquals(expected.get("id"), doc.id());
assertEquals(expected.get("content"), doc.contents());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ void checkDocument(SourceDocument doc, Map<String, String> expected) {
assertTrue(doc.indexable());
assertEquals(expected.get("id"), doc.id());
assertEquals(expected.get("content"), doc.contents());
assertEquals(expected.get("raw"), doc.raw());

// Checking raw is optional
if (expected.get("raw") != null) {
assertEquals(expected.get("raw"), doc.raw());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"id": "doc1",
"contents": "this is the contents 1.",
"vector": {"f1": 1, "f2": 2, "f3": 1, "f4": 4, "f5": 1}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"id": "doc2",
"contents": "this is the contents 2.",
"vector": {"f4": 3, "f5": 1, "f9": 2, "f22": 6, "f35": 8}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"id": "doc1",
"contents": "this is the contents 1.",
"vector": {"f1": 1, "f2": 2, "f3": 1, "f4": 4, "f5": 1}
},
{
"id": "doc2",
"contents": "this is the contents 2.",
"vector": {"f4": 3, "f5": 1, "f9": 2, "f22": 6, "f35": 8}
}
]