Skip to content

Commit

Permalink
Fix MS MARCO V2.1 repo experiments on segmented doc collection (#2483)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored May 2, 2024
1 parent f6f0dd6 commit 3abc2a0
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
5 changes: 2 additions & 3 deletions src/main/java/io/anserini/reproduce/RunRepro.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,11 @@ public void run() throws StreamReadException, DatabindException, IOException, In
else {
System.out.println(String.format(" %7s: %.4f [OK]", metric, score));
}
}
else {
} else {
System.out.println("Evaluation command failed for metric: " + metric);
}
System.out.println("");
}
System.out.println("");
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions src/main/java/io/anserini/search/SearchCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,13 @@ public SearchCollection(Args args) throws IOException {
}
}

LOG.info("MaxPassage: " + args.selectMaxPassage);
if (args.selectMaxPassage) {
LOG.info("MaxPassage delimiter: " + args.selectMaxPassageDelimiter);
LOG.info("MaxPassage hits: " + args.selectMaxPassageHits);
}
LOG.info("Hits: " + args.hits);

// get collection class if available
if (args.collectionClass != null) {
try {
Expand All @@ -1035,6 +1042,7 @@ public SearchCollection(Args args) throws IOException {
} else {
this.collectionClass = null;
}
LOG.info("Collection class: " + this.collectionClass);

this.isRerank = args.rm3 || args.axiom || args.bm25prf || args.rocchio;
this.analyzer = getAnalyzer();
Expand Down
10 changes: 5 additions & 5 deletions src/main/resources/reproduce/msmarco-v2.1-doc.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
conditions:
- name: bm25
display: "BM25 v2.1 (k1=0.9, b=0.4)"
display_html: "BM25 v2.1 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display: "BM25 doc (k1=0.9, b=0.4)"
display_html: "BM25 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
command: java -cp $fatjar io.anserini.search.SearchCollection -threads $threads -index msmarco-v2.1-doc -topics $topics -output $output -hits 1000 -bm25
topics:
Expand Down Expand Up @@ -38,10 +38,10 @@ conditions:
R@100: 0.2604
R@1K: 0.5383
- name: bm25-segmented
display: "BM25 v2.1 Segmented Corpus (k1=0.9, b=0.4)"
display_html: "BM25 v2.1 Segmented Corpus (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display: "BM25 segmented doc (k1=0.9, b=0.4)"
display_html: "BM25 segmented doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)"
display_row: ""
command: java -cp $fatjar io.anserini.search.SearchCollection -threads $threads -index msmarco-v2.1-doc-segmented -topics $topics -output $output -hits 1000 -bm25 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000
command: java -cp $fatjar io.anserini.search.SearchCollection -threads $threads -index msmarco-v2.1-doc-segmented -topics $topics -output $output -hits 10000 -bm25 -selectMaxPassage -selectMaxPassage.delimiter \# -selectMaxPassage.hits 1000
topics:
- topic_key: msmarco-v2-doc-dev
eval_key: msmarco-v2.1-doc.dev
Expand Down

0 comments on commit 3abc2a0

Please sign in to comment.