Skip to content

Commit

Permalink
Exposes method in IndexReaderUtils to fetch raw document (#937)
Browse files Browse the repository at this point in the history
  • Loading branch information
zeynepakkalyoncu authored and lintool committed Jan 7, 2020
1 parent f310706 commit f63cd22
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
19 changes: 19 additions & 0 deletions src/main/java/io/anserini/index/IndexReaderUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,25 @@ public static Map<String, Long> getDocumentVector(IndexReader reader, String doc
return docVector;
}

/**
* Returns the raw document given its collection docid.
* @param reader index reader
* @param docid collection docid
* @return the raw document given its collection docid, or <code>null</code> if not found.
*/
public static String getRawDocument(IndexReader reader, String docid) {
try {
Document rawDoc = reader.document(convertDocidToLuceneDocid(reader, docid));

if (rawDoc == null) {
return null;
}
return rawDoc.get(LuceneDocumentGenerator.FIELD_RAW);
} catch (IOException e) {
return null;
}
}

/**
* Computes the BM25 weight of a term (prior to analysis) in a particular document.
* @param reader index reader
Expand Down
10 changes: 10 additions & 0 deletions src/test/java/io/anserini/index/IndexReaderUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,16 @@ public void testDocumentVector() throws Exception {
assertEquals(Long.valueOf(1), documentVector.get("test"));
}

@Test
public void testRawDoc() throws Exception {
Directory dir = FSDirectory.open(tempDir1);
IndexReader reader = DirectoryReader.open(dir);

assertEquals("here is some text here is some more text", IndexReaderUtils.getRawDocument(reader, "doc1"));
assertEquals("more texts", IndexReaderUtils.getRawDocument(reader, "doc2"));
assertEquals("here is a test", IndexReaderUtils.getRawDocument(reader, "doc3"));
}

@Test
public void testDocidConversion() throws Exception {
Directory dir = FSDirectory.open(tempDir1);
Expand Down

0 comments on commit f63cd22

Please sign in to comment.