-
Notifications
You must be signed in to change notification settings - Fork 450
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added tests for JsonVectorCollection (#1563)
Along the way, I discovered a code path in JsonCollection that is never taken, and hence can be killed.
- Loading branch information
Showing
8 changed files
with
136 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
53 changes: 53 additions & 0 deletions
53
src/test/java/io/anserini/collection/JsonVectorCollectionDocumentArrayTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* Anserini: A Lucene toolkit for reproducible information retrieval research | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.anserini.collection; | ||
|
||
import org.junit.Before; | ||
|
||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.util.Map; | ||
|
||
// A file in a JsonVectorCollection can either be: | ||
// (1) A single JSON object (i.e., a single document) | ||
// (2) An array of JSON objects | ||
// (3) JSON Lines (i.e., one JSON object per line) | ||
// | ||
// This is the test case for (2) | ||
public class JsonVectorCollectionDocumentArrayTest extends JsonVectorCollectionTest { | ||
|
||
@Before | ||
public void setUp() throws Exception { | ||
super.setUp(); | ||
|
||
collectionPath = Paths.get("src/test/resources/sample_docs/json_vector/collection2"); | ||
collection = new JsonVectorCollection(collectionPath); | ||
|
||
Path segment1 = Paths.get("src/test/resources/sample_docs/json_vector/collection2/segment1.json"); | ||
|
||
segmentPaths.add(segment1); | ||
segmentDocCounts.put(segment1, 2); | ||
|
||
totalSegments = 1; | ||
totalDocs = 2; | ||
|
||
expected.put("doc1", Map.of("id", "doc1", | ||
"content", "f1 f2 f2 f3 f4 f4 f4 f4 f5 ")); | ||
expected.put("doc2", Map.of("id", "doc2", | ||
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 ")); | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
src/test/java/io/anserini/collection/JsonVectorCollectionDocumentObjectTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/* | ||
* Anserini: A Lucene toolkit for reproducible information retrieval research | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.anserini.collection; | ||
|
||
import org.junit.Before; | ||
|
||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.util.Map; | ||
|
||
// A file in a JsonVectorCollection can either be: | ||
// (1) A single JSON object (i.e., a single document) | ||
// (2) An array of JSON objects | ||
// (3) JSON Lines (i.e., one JSON object per line) | ||
// | ||
// This is the test case for (1) | ||
public class JsonVectorCollectionDocumentObjectTest extends JsonVectorCollectionTest { | ||
@Before | ||
public void setUp() throws Exception { | ||
super.setUp(); | ||
|
||
collectionPath = Paths.get("src/test/resources/sample_docs/json_vector/collection1"); | ||
collection = new JsonVectorCollection(collectionPath); | ||
|
||
Path segment1 = Paths.get("src/test/resources/sample_docs/json_vector/collection1/doc1.json"); | ||
Path segment2 = Paths.get("src/test/resources/sample_docs/json_vector/collection1/doc2.json"); | ||
|
||
segmentPaths.add(segment1); | ||
segmentDocCounts.put(segment1, 1); | ||
segmentPaths.add(segment2); | ||
segmentDocCounts.put(segment2, 1); | ||
|
||
totalSegments = 2; | ||
totalDocs = 2; | ||
|
||
expected.put("doc1", Map.of("id", "doc1", | ||
"content", "f1 f2 f2 f3 f4 f4 f4 f4 f5 ")); | ||
expected.put("doc2", Map.of("id", "doc2", | ||
"content", "f4 f4 f4 f5 f9 f9 f22 f22 f22 f22 f22 f22 f35 f35 f35 f35 f35 f35 f35 f35 ")); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
src/test/resources/sample_docs/json_vector/collection1/doc1.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"id": "doc1", | ||
"contents": "this is the contents 1.", | ||
"vector": {"f1": 1, "f2": 2, "f3": 1, "f4": 4, "f5": 1} | ||
} |
5 changes: 5 additions & 0 deletions
5
src/test/resources/sample_docs/json_vector/collection1/doc2.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"id": "doc2", | ||
"contents": "this is the contents 2.", | ||
"vector": {"f4": 3, "f5": 1, "f9": 2, "f22": 6, "f35": 8} | ||
} |
12 changes: 12 additions & 0 deletions
12
src/test/resources/sample_docs/json_vector/collection2/segment1.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[ | ||
{ | ||
"id": "doc1", | ||
"contents": "this is the contents 1.", | ||
"vector": {"f1": 1, "f2": 2, "f3": 1, "f4": 4, "f5": 1} | ||
}, | ||
{ | ||
"id": "doc2", | ||
"contents": "this is the contents 2.", | ||
"vector": {"f4": 3, "f5": 1, "f9": 2, "f22": 6, "f35": 8} | ||
} | ||
] |