Skip to content

Commit

Permalink
Added tests for JsonVectorCollection (#1563)
Browse files Browse the repository at this point in the history
Along the way, I discovered a code path in JsonCollection that is never taken, and hence can be killed.
  • Loading branch information
lintool committed Jun 15, 2021
1 parent 5e8743f commit c86974f
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 18 deletions.
7 changes: 0 additions & 7 deletions src/main/java/io/anserini/collection/JsonCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,6 @@ public void readNext() throws NoSuchElementException {
} else {
atEOF = true; // there is no more JSON object in the bufferedReader
}
} else if (node.isArray()) {
if (iter != null && iter.hasNext()) {
JsonNode json = iter.next();
bufferedRecord = (T) createNewDocument(node);
} else {
throw new NoSuchElementException("Reached end of JsonNode iterator");
}
} else {
LOG.error("Error: invalid JsonNode type");
throw new NoSuchElementException("Invalid JsonNode type");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Anserini: A Lucene toolkit for reproducible information retrieval research
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.anserini.collection;

import org.junit.Before;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;

// A file in a JsonVectorCollection can either be:
// (1) A single JSON object (i.e., a single document)
// (2) An array of JSON objects
// (3) JSON Lines (i.e., one JSON object per line)
//
// This is the test case for (2)
public class JsonVectorCollectionDocumentArrayTest extends JsonVectorCollectionTest {

@Before
public void setUp() throws Exception {
super.setUp();

collectionPath = Paths.get("src/test/resources/sample_docs/json_vector/collection2");
collection = new JsonVectorCollection(collectionPath);

Path segment1 = Paths.get("src/test/resources/sample_docs/json_vector/collection2/segment1.json");

segmentPaths.add(segment1);
segmentDocCounts.put(segment1, 2);

totalSegments = 1;
totalDocs = 2;

expected.put("doc1", Map.of("id", "doc1",
"content", "f1 f2 f2 f3 f4 f4 f4 f4 f5 "));
expected.put("doc2", Map.of("id", "doc2",
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 "));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Anserini: A Lucene toolkit for reproducible information retrieval research
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.anserini.collection;

import org.junit.Before;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;

// A file in a JsonVectorCollection can either be:
// (1) A single JSON object (i.e., a single document)
// (2) An array of JSON objects
// (3) JSON Lines (i.e., one JSON object per line)
//
// This is the test case for (1)
public class JsonVectorCollectionDocumentObjectTest extends JsonVectorCollectionTest {
@Before
public void setUp() throws Exception {
super.setUp();

collectionPath = Paths.get("src/test/resources/sample_docs/json_vector/collection1");
collection = new JsonVectorCollection(collectionPath);

Path segment1 = Paths.get("src/test/resources/sample_docs/json_vector/collection1/doc1.json");
Path segment2 = Paths.get("src/test/resources/sample_docs/json_vector/collection1/doc2.json");

segmentPaths.add(segment1);
segmentDocCounts.put(segment1, 1);
segmentPaths.add(segment2);
segmentDocCounts.put(segment2, 1);

totalSegments = 2;
totalDocs = 2;

expected.put("doc1", Map.of("id", "doc1",
"content", "f1 f2 f2 f3 f4 f4 f4 f4 f5 "));
expected.put("doc2", Map.of("id", "doc2",
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 "));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@
import java.nio.file.Paths;
import java.util.Map;

// A file in a JsonCollection can either be:
// A file in a JsonVectorCollection can either be:
// (1) A single JSON object (i.e., a single document)
// (2) An array of JSON objects
// (3) JSON Lines (i.e., one JSON object per line)
//
// This is the test case for (3)
//
// Note that we're testing the multifield capability here and only here, since the codepath is shared.
public class JsonVectorCollectionLineObjectTest extends JsonVectorCollectionTest {

@Before
Expand All @@ -52,11 +50,4 @@ public void setUp() throws Exception {
expected.put("doc2", Map.of("id", "doc2",
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 "));
}

@Override
void checkDocument(SourceDocument doc, Map<String, String> expected) {
assertTrue(doc.indexable());
assertEquals(expected.get("id"), doc.id());
assertEquals(expected.get("content"), doc.contents());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ void checkDocument(SourceDocument doc, Map<String, String> expected) {
assertTrue(doc.indexable());
assertEquals(expected.get("id"), doc.id());
assertEquals(expected.get("content"), doc.contents());
assertEquals(expected.get("raw"), doc.raw());

// Checking raw is optional
if (expected.get("raw") != null) {
assertEquals(expected.get("raw"), doc.raw());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"id": "doc1",
"contents": "this is the contents 1.",
"vector": {"f1": 1, "f2": 2, "f3": 1, "f4": 4, "f5": 1}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"id": "doc2",
"contents": "this is the contents 2.",
"vector": {"f4": 3, "f5": 1, "f9": 2, "f22": 6, "f35": 8}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"id": "doc1",
"contents": "this is the contents 1.",
"vector": {"f1": 1, "f2": 2, "f3": 1, "f4": 4, "f5": 1}
},
{
"id": "doc2",
"contents": "this is the contents 2.",
"vector": {"f4": 3, "f5": 1, "f9": 2, "f22": 6, "f35": 8}
}
]

0 comments on commit c86974f

Please sign in to comment.