Skip to content

Commit

Permalink
Add index statistics function (#1218)
Browse files Browse the repository at this point in the history
  • Loading branch information
PepijnBoers committed May 23, 2020
1 parent 67285c9 commit b541d1e
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
31 changes: 31 additions & 0 deletions src/main/java/io/anserini/index/IndexReaderUtils.java
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
Expand Down Expand Up @@ -768,4 +770,33 @@ public static String convertLuceneDocidToDocid(IndexReader reader, int docid) {
return null;
}
}

/**
* Returns index statistics
*
* @param reader index reader
* @return Index statistics as a map of statistic's name to statistic.
* @throws IOException
*/
public static Map<String, Object> getIndexStats(IndexReader reader) throws IOException {
Map<String, Object> indexStats = new HashMap<String, Object>();
try {
Terms terms = MultiTerms.getTerms(reader, IndexArgs.CONTENTS);

indexStats.put("documents", reader.numDocs());
indexStats.put("non_empty_documents", reader.getDocCount(IndexArgs.CONTENTS));
indexStats.put("unique_terms", terms.size());
indexStats.put("total_terms", reader.getSumTotalTermFreq(IndexArgs.CONTENTS));

FieldInfos fieldInfos = FieldInfos.getMergedFieldInfos(reader);
for (FieldInfo fi : fieldInfos) {
indexStats.put(fi.name, "indexOption: " + fi.getIndexOptions() +
", hasVectors: " + fi.hasVectors());
}
} catch (IOException e) {
// Eat any exceptions and just return null.
return null;
}
return indexStats;
}
}
12 changes: 12 additions & 0 deletions src/test/java/io/anserini/index/IndexReaderUtilsTest.java
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -498,4 +498,16 @@ public void testComputeQueryDocumentScore() throws Exception {
reader.close();
dir.close();
}

@Test
public void testGetIndexStats() throws Exception {
Directory dir = FSDirectory.open(tempDir1);
IndexReader reader = DirectoryReader.open(dir);

assertEquals(3, IndexReaderUtils.getIndexStats(reader).get("documents"));
assertEquals(Long.valueOf(6), IndexReaderUtils.getIndexStats(reader).get("unique_terms"));

reader.close();
dir.close();
}
}

0 comments on commit b541d1e

Please sign in to comment.